Commit a084fc5a authored by Jason Rigby's avatar Jason Rigby
Browse files

Merge branch 'substudy-membership' into 'master'

Substudy membership

See merge request aspree/aspree-r-package!28
parents aea00da8 ee74b602
......@@ -19,5 +19,6 @@ Imports:
dbplyr,
digest,
openssl,
reshape2
reshape2,
tidyr
RoxygenNote: 6.1.1
......@@ -11,6 +11,7 @@ export(ExtractDeathDate)
export(ExtractEventsASPREE)
export(ExtractEventsBridge)
export(ExtractEventsXT)
export(FilterByCountry)
export(FilterRandomisedParticipants)
export(GenerateLinkageKey)
export(GenerateOneTimeLinkageKey)
......@@ -23,7 +24,9 @@ export(GetRetinalPhotographyConsent)
export(GetSNOREConsent)
export(GetSubstudyCogs)
export(GetSubstudyConsent)
export(GetSubstudyMembership)
export(GetXTConsent)
export(HasVisit)
export(MergeCountryBySubjectId)
export(SDrivePaths)
export(extract_ASP_End)
......
......@@ -122,6 +122,7 @@ FilterRandomisedParticipants <- function(con, data) {
#' @examples
#' consent_retinal <- con %>% GetSubstudyConsent(c(0, 80), "retinal")
#' @return a data frame with a boolean consent column
#' @import dplyr
#' @export
GetSubstudyConsent <- function(con, visitTimes, consentColumn) {
if (!is.vector(visitTimes) & is.numeric(visitTimes)) {
......
......@@ -14,3 +14,19 @@ MergeCountryBySubjectId <- function(data, sectionPaths = SDrivePaths()) {
country <- country[, c("SubjectID", "Country")]
return(merge(data, country, by = "SubjectID", all = TRUE))
}
#' Filter by country
#'
#' Filters a dataset to include only those participants from the specified country
#'
#' @import dplyr
#' @export
FilterByCountry <- function(data, country = c("AUS", "USA"), sectionPaths = SDrivePaths()) {
country <- toupper(country)
if (!all(country %in% c("USA", "AUS"))) {
stop("`country` must be one of \"USA\" or \"AUS\"")
}
country <- as.numeric(country == "USA") + 1
country_merged <- MergeCountryBySubjectId(data, sectionPaths)
country_merged[which(country_merged$Country %in% country),] %>% select(-Country)
}
#' Find participants with visit
#'
#' Returns a dataset of all participants who have has a given visit conducted
#'
#' @param con a database connection using the DBI package or \code{ASPREEDb()}
#' @param visitTime the visit in question, can be either the numeric code or text label for the visit
#' @param visitTimeColumn the name of the column in the visit table that contains the numeric codes
#' @param tableName the name of the table from which to retrieve the visits
#' @param out_col the name of the column that will contain the TRUE/FALSE values indicating whether the visit was conducted
#' @param complexFilter is an optional function that accepts the current database connection and dataset as parameters to be filtered with additional logic
#' @import dplyr
#' @export
HasVisit <- function(con, visitTime, tableName = "tblVisits", out_col = "visitConducted", keep_cols = c(), complexFilter = NA) {
visits <- con %>% tbl(tableName)
if ("visitTime" %in% colnames(visits)) {
visitTimeColumn <- "visitTime"
} else {
visitTimeColumn <- "VisitTime"
}
visits <- visits %>% left_join(con %>% tbl("tlkpVisitTime"), by = setNames("Code", visitTimeColumn))
if (is.numeric(visitTime)) {
visits <- visits %>% filter(UQ(as.name(visitTimeColumn)) == !!visitTime)
} else {
visits <- visits %>% filter(Description == !!visitTime)
}
if (is.function(complexFilter)) {
visits <- con %>% complexFilter(con, visits %>% collect())
} else if (!is.na(complexFilter)) {
stop(paste0(c("`complexFilter` must be a function or NA, not ", class(complexFilter))))
}
visits <- con %>% FilterRandomisedParticipants(visits %>% data.frame())
visits[,out_col] <- !is.na(visits[,visitTimeColumn])
return(do.call("select", list(.data=visits, c("SubjectID", out_col, keep_cols))))
}
#' Derive sub-study membership
#'
#' @param con a database connection using the DBI package or \code{ASPREEDb()}
#' @param country filter based on country, either "AUS" or "USA" or NA for all
#' @param returnFrequencyTable if TRUE, return a summary of study participation as a frequency table
#' @param sectionPaths a list mapping section names, e.g. SectionA1, to a file path
#' @import dplyr
#' @export
GetSubstudyMembership <- function(con, country = NA, returnFrequencyTable = TRUE, sectionPaths = SDrivePaths()) {
# Biobank membership defined as participants who have consented AND have either a baseline and/or follow-up biobank visit
biobank_consent <- con %>% HasVisit("Biobank - Baseline", tableName = "tblConsent", keep_cols = "Consent")
biobank_consent$Consent[is.na(biobank_consent$Consent)] <- 0
biobank_consent$Consent <- biobank_consent$Consent == 1
biobank_membership <- merge(
con %>% HasVisit("Biobank - Baseline"),
con %>% HasVisit("Biobank - Follow-up"),
by = "SubjectID")
biobank_membership <- merge(biobank_membership, biobank_consent[,c("SubjectID", "Consent")], by = "SubjectID")
biobank_membership$visitConducted <- (biobank_membership$visitConducted.x | biobank_membership$visitConducted.y) & biobank_membership$Consent
biobank_membership <- biobank_membership[,c("SubjectID", "visitConducted")] %>% rename(Biobank = "visitConducted")
# ENVISion membership defined as participants with consent and an Envision baseline visit
envision_consent <- con %>% HasVisit("Envision - Baseline", tableName = "tblConsent", keep_cols = "Consent")
envision_consent$Consent[is.na(envision_consent$Consent)] <- 0
envision_consent$Consent <- envision_consent$Consent == 1
envision_membership <- con %>% HasVisit("Envision - Baseline") %>%
merge(envision_consent[,c("SubjectID", "Consent")], by = "SubjectID") %>%
rename(ENVISion = "visitConducted")
envision_membership$ENVISion <- envision_membership$ENVISion & envision_membership$Consent
envision_membership <- envision_membership %>% select(SubjectID, ENVISion)
# SNORE membership defined as participants with consent and a SNORE baseline visit
snore_consent <- con %>% HasVisit("SNORE - Baseline", tableName = "tblConsent", keep_cols = "Consent") %>%
select(SubjectID, Consent)
snore_consent$Consent[is.na(snore_consent$Consent)] <- 0
snore_consent$Consent <- snore_consent$Consent == 1
snore_membership <- con %>% HasVisit("SNORE - Baseline") %>%
merge(snore_consent, by = "SubjectID") %>%
rename(SNORE = "visitConducted")
snore_membership$SNORE <- snore_membership$SNORE & snore_membership$Consent
snore_membership <- snore_membership %>% select(SubjectID, SNORE)
# SNORE imaging membership defined as participants with "Consent", "imaging" and "imgTakePart" = 1
snore_img_membership <- con %>% HasVisit("SNORE - Baseline", tableName = "tblConsent", keep_cols = c("Consent", "imaging", "imgTakePart"))
snore_img_membership$Consent[is.na(snore_img_membership$Consent)] <- 0
snore_img_membership$imaging[is.na(snore_img_membership$imaging)] <- 0
snore_img_membership$imgTakePart[is.na(snore_img_membership$imgTakePart)] <- 0
snore_img_membership$Consent <- snore_img_membership$Consent == 1
snore_img_membership$imaging <- snore_img_membership$imaging == 1
snore_img_membership$imgTakePart <- snore_img_membership$imgTakePart == 1
snore_img_membership$visitConducted <- snore_img_membership$Consent & snore_img_membership$imaging & snore_img_membership$imgTakePart
snore_img_membership <- snore_img_membership[,c("SubjectID", "visitConducted")] %>% rename(SNORE_img = "visitConducted")
# AMD membership is defined as anyone with retinal photography consent and analysed retcam images
amd_consent <- con %>% GetRetinalPhotographyConsent()
amd_consent$consent[is.na(amd_consent$consent)] <- FALSE
amd_analysis <- con %>% tbl("tblAnalysisRetCam") %>% filter(visitTime %in% c(81, 82, 84, 701)) %>%
select(StudyID, R_F1_Qual_Focus, R_F1_Qual_Field, R_F2_Qual_Focus, R_F2_Qual_Field, L_F1_Qual_Focus, L_F1_Qual_Field, L_F2_Qual_Focus, L_F2_Qual_Field) %>%
rename(SubjectID = "StudyID") %>% data.frame()
amd_analysis <- con %>% FilterRandomisedParticipants(amd_analysis)
amd_analysis$AMD <- apply(amd_analysis[, -c(1)], 1, function(x) {!any(is.na(x))})
amd_membership <- merge(amd_consent, amd_analysis[,c("SubjectID", "AMD")], by="SubjectID")
amd_membership$AMD <- amd_membership$AMD & amd_membership$consent
amd_membership <- amd_membership %>% select(SubjectID, AMD)
# Knee membership defined as anyone with consent who has at least one datapoint in PASE, Pedometer, WOMAC, or knee MRI
knee_consent <- con %>% HasVisit("Knee - Baseline", tableName = "tblConsent", keep_cols = "Consent") %>% select(SubjectID, Consent)
knee_consent$Consent[is.na(knee_consent$Consent)] <- 0
knee_consent$Consent <- knee_consent$Consent == 1
knee_membership <- knee_consent %>%
merge(con %>% HasVisit("Knee - Baseline", tableName = "tblKneePASE", out_col = "pase"), by = "SubjectID") %>%
merge(con %>% HasVisit("Knee - Baseline", tableName = "tblKneePedometer", out_col = "pedometer") %>% distinct(), by = "SubjectID") %>%
merge(con %>% HasVisit("Knee - Baseline", tableName = "tblKneeWOMAC", out_col = "womac"), by = "SubjectID") %>%
merge(con %>% HasVisit("Knee - Baseline", tableName = "tblKneeAnalysisMRI", out_col = "mri"), by = "SubjectID")
knee_membership$Knee <- apply(knee_membership %>% select(-c(SubjectID, Consent)), 1, any)
knee_membership$Knee <- knee_membership$Knee & knee_membership$Consent
knee_membership <- knee_membership %>% select(SubjectID, Knee)
# NEURO membership defined as any participant with consent and a NEURO visit
neuro_consent <- con %>% HasVisit("Neuro - Baseline", tableName = "tblConsent", keep_cols = "Consent") %>% select(SubjectID, Consent)
neuro_consent$Consent[is.na(neuro_consent$Consent)] <- 0
neuro_consent$Consent <- neuro_consent$Consent == 1
neuro_membership <- merge(
neuro_consent, con %>% HasVisit("Neuro - Baseline", out_col = "NEURO"),
by = "SubjectID") %>%
merge(
con %>% HasVisit("Neuro - Baseline", tableName = "tblAnalysisMRI", out_col = "NEURO_MRI"),
by = "SubjectID"
)
neuro_membership$NEURO <- neuro_membership$NEURO & neuro_membership$Consent & neuro_membership$NEURO_MRI
neuro_membership <- neuro_membership %>% select(SubjectID, NEURO)
# Hearing membership is defined as any participant with consent and a hearing visit
hearing_consent <- con %>% HasVisit("Hearing - Baseline", tableName = "tblConsent", keep_cols = "Consent") %>% select(SubjectID, Consent)
hearing_consent$Consent[is.na(hearing_consent$Consent)] <- 0
hearing_consent$Consent <- hearing_consent$Consent == 1
hearing_membership <- merge(
hearing_consent,
con %>% HasVisit("Hearing - Baseline", out_col = "Hearing"),
by = "SubjectID")
hearing_membership$Hearing <- hearing_membership$Hearing & hearing_membership$Consent
hearing_membership <- hearing_membership %>% select(SubjectID, Hearing)
membership <- data.frame(SubjectID = hearing_membership$SubjectID, ASPREE = TRUE) %>%
merge(biobank_membership, by = "SubjectID") %>%
merge(envision_membership, by = "SubjectID") %>%
merge(snore_membership, by = "SubjectID") %>%
merge(snore_img_membership, by = "SubjectID") %>%
merge(amd_membership, by = "SubjectID") %>%
merge(knee_membership, by = "SubjectID") %>%
merge(neuro_membership, by = "SubjectID") %>%
merge(hearing_membership, by = "SubjectID")
if (!is.na(country)) {
membership <- membership %>% FilterByCountry(country, sectionPaths)
}
if (returnFrequencyTable) {
membership <- reshape2::melt(membership, id.vars = "SubjectID", variable.name = "Study")
membership <- membership[membership$value == TRUE,c("SubjectID", "Study")]
membership <- membership %>% left_join(membership, by = "SubjectID") %>%
select(Columns = Study.x, Rows = Study.y) %>%
group_by(Columns, Rows) %>%
summarise(Count = n()) %>%
tidyr::spread(Columns, Count, fill = 0)
}
return(membership %>% as.data.frame())
}
#' Get cognitive data
#'
#' Creates a query that retrieves all substudy cognitive data
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/demographics.R
\name{FilterByCountry}
\alias{FilterByCountry}
\title{Filter by country}
\usage{
FilterByCountry(data, country = c("AUS", "USA"),
sectionPaths = SDrivePaths())
}
\description{
Filters a dataset to include only those participants from the specified country
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/substudy_data.R
\name{GetSubstudyMembership}
\alias{GetSubstudyMembership}
\title{Derive sub-study membership}
\usage{
GetSubstudyMembership(con, country = NA, returnFrequencyTable = TRUE,
sectionPaths = SDrivePaths())
}
\arguments{
\item{con}{a database connection using the DBI package or \code{ASPREEDb()}}
\item{country}{filter based on country, either "AUS" or "USA" or NA for all}
\item{returnFrequencyTable}{if TRUE, return a summary of study participation as a frequency table}
\item{sectionPaths}{a list mapping section names, e.g. SectionA1, to a file path}
}
\description{
Derive sub-study membership
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/substudy_data.R
\name{HasVisit}
\alias{HasVisit}
\title{Find participants with visit}
\usage{
HasVisit(con, visitTime, tableName = "tblVisits",
out_col = "visitConducted", keep_cols = c(), complexFilter = NA)
}
\arguments{
\item{con}{a database connection using the DBI package or \code{ASPREEDb()}}
\item{visitTime}{the visit in question, can be either the numeric code or text label for the visit}
\item{tableName}{the name of the table from which to retrieve the visits}
\item{out_col}{the name of the column that will contain the TRUE/FALSE values indicating whether the visit was conducted}
\item{complexFilter}{is an optional function that accepts the current database connection and dataset as parameters to be filtered with additional logic}
\item{visitTimeColumn}{the name of the column in the visit table that contains the numeric codes}
}
\description{
Returns a dataset of all participants who have has a given visit conducted
}
......@@ -4,8 +4,7 @@
\alias{SDrivePaths}
\title{Get S-drive paths}
\usage{
SDrivePaths(prefix = "S:/MNHS-SPHPM-EPM/ASPREE-Data/ASPREE Longitudinal Data Set/")
SDrivePaths(ver = 2, prefix = NA)
}
\arguments{
\item{prefix}{the base path of the dataset location}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment