Commit 8010b485 authored by Ashley Stewart's avatar Ashley Stewart 💃
Browse files

Merge remote-tracking branch 'Upstream/master'

parents ea66b59f 9fda02b0
...@@ -19,5 +19,6 @@ Imports: ...@@ -19,5 +19,6 @@ Imports:
dbplyr, dbplyr,
digest, digest,
openssl, openssl,
reshape2 reshape2,
tidyr
RoxygenNote: 6.1.1 RoxygenNote: 6.1.1
...@@ -11,10 +11,12 @@ export(ExtractDeathDate) ...@@ -11,10 +11,12 @@ export(ExtractDeathDate)
export(ExtractEventsASPREE) export(ExtractEventsASPREE)
export(ExtractEventsBridge) export(ExtractEventsBridge)
export(ExtractEventsXT) export(ExtractEventsXT)
export(FilterByCountry)
export(FilterRandomisedParticipants) export(FilterRandomisedParticipants)
export(GenerateLinkageKey) export(GenerateLinkageKey)
export(GenerateOneTimeLinkageKey) export(GenerateOneTimeLinkageKey)
export(GetAllTables) export(GetAllTables)
export(GetDataBySubjectID)
export(GetENVISionConsent) export(GetENVISionConsent)
export(GetEndpoints) export(GetEndpoints)
export(GetRETCAMConsent) export(GetRETCAMConsent)
...@@ -23,7 +25,9 @@ export(GetRetinalPhotographyConsent) ...@@ -23,7 +25,9 @@ export(GetRetinalPhotographyConsent)
export(GetSNOREConsent) export(GetSNOREConsent)
export(GetSubstudyCogs) export(GetSubstudyCogs)
export(GetSubstudyConsent) export(GetSubstudyConsent)
export(GetSubstudyMembership)
export(GetXTConsent) export(GetXTConsent)
export(HasVisit)
export(MergeCountryBySubjectId) export(MergeCountryBySubjectId)
export(SDrivePaths) export(SDrivePaths)
export(extract_ASP_End) export(extract_ASP_End)
......
...@@ -122,6 +122,7 @@ FilterRandomisedParticipants <- function(con, data) { ...@@ -122,6 +122,7 @@ FilterRandomisedParticipants <- function(con, data) {
#' @examples #' @examples
#' consent_retinal <- con %>% GetSubstudyConsent(c(0, 80), "retinal") #' consent_retinal <- con %>% GetSubstudyConsent(c(0, 80), "retinal")
#' @return a data frame with a boolean consent column #' @return a data frame with a boolean consent column
#' @import dplyr
#' @export #' @export
GetSubstudyConsent <- function(con, visitTimes, consentColumn) { GetSubstudyConsent <- function(con, visitTimes, consentColumn) {
if (!is.vector(visitTimes) & is.numeric(visitTimes)) { if (!is.vector(visitTimes) & is.numeric(visitTimes)) {
......
...@@ -122,3 +122,91 @@ GetAllTables <- function(con) { ...@@ -122,3 +122,91 @@ GetAllTables <- function(con) {
rownames(tables) <- NULL rownames(tables) <- NULL
return(tables) return(tables)
} }
#' Get one or more variables across multiple tables
#'
#' Merges data across tables by Subject ID and optionally by visitTime
#'
#' @param con a database connection using the DBI package or \code{ASPREEDb()}
#' @param tables_and_fields a list of tables and columns to retrieve, e.g. \code{list("tblAnnualVisits" = c("Wt", "Ht"))}
#' omitting the columns will retrieve all columns
#' @param include_visit_time TRUE if the tables to be merged include visit time
#' @param normalise_visit_time TRUE if the visit times need to be reduced to the overall annual visit number, i.e. integer-division by 10
#' @param randomised_pt_only set to TRUE to ensure only randomise participants are returned
#' @return the merged data
#' @import dplyr
#' @export
GetDataBySubjectID <- function(con, tables_and_fields = list(), include_visit_time = TRUE, normalise_visit_time = TRUE, randomised_pt_only = TRUE) {
data <- NULL
# Iterate over each table
for (i in seq_along(names(tables_and_fields))) {
# Get the current table
table <- names(tables_and_fields)[i]
if (nchar(table) == 0) {
table <- tables_and_fields[[i]]
}
d <- con %>% tbl(table)
# Set up the join depending on whether visitTime is to be included
if (include_visit_time) {
if (normalise_visit_time) {
join_by <- c("SubjectID", "AV")
} else {
join_by <- c("SubjectID", "visitTime")
}
if ("visitTime" %in% (d %>% colnames())) {
vt <- "visitTime"
} else {
vt <- "VisitTime"
}
cols <- c("SubjectID", vt, tables_and_fields[[table]])
} else {
join_by <- "SubjectID"
cols <- c("SubjectID", tables_and_fields[[table]])
}
# Get subset of columns, or all columns if none provided
if (!is.null(tables_and_fields[[table]])) {
d <- d %>% select(.dots = cols)
d <- as.data.frame(d)
names(d) <- cols
} else {
d <- as.data.frame(d)
}
# Normalise visit times if requested
if (include_visit_time) {
if (normalise_visit_time) {
d <- d %>% mutate(visitTime = visitTime %/% 10) %>% rename(AV = vt)
} else {
d <- d %>% dplyr::rename(visitTime = vt)
}
}
# Merge in table to overall result
if (is.null(data)) {
data <- d
} else {
data <- data %>% dplyr::full_join(d, by = join_by)
}
}
# Merge in the visit descriptions if visit times are not normalised
if (include_visit_time & !normalise_visit_time) {
data <- data %>% left_join(
con %>% tbl("tlkpVisitTime") %>%
select(Code, Description) %>%
collect(), by = c("visitTime" = "Code")
) %>%
rename(visitName = "Description")
}
# Filter for randomised participants only if requested
if (randomised_pt_only) {
data <- con %>% FilterRandomisedParticipants(data)
}
return(data)
}
...@@ -14,3 +14,19 @@ MergeCountryBySubjectId <- function(data, sectionPaths = SDrivePaths()) { ...@@ -14,3 +14,19 @@ MergeCountryBySubjectId <- function(data, sectionPaths = SDrivePaths()) {
country <- country[, c("SubjectID", "Country")] country <- country[, c("SubjectID", "Country")]
return(merge(data, country, by = "SubjectID", all = TRUE)) return(merge(data, country, by = "SubjectID", all = TRUE))
} }
#' Filter by country
#'
#' Filters a dataset to include only those participants from the specified country
#'
#' @import dplyr
#' @export
FilterByCountry <- function(data, country = c("AUS", "USA"), sectionPaths = SDrivePaths()) {
country <- toupper(country)
if (!all(country %in% c("USA", "AUS"))) {
stop("`country` must be one of \"USA\" or \"AUS\"")
}
country <- as.numeric(country == "USA") + 1
country_merged <- MergeCountryBySubjectId(data, sectionPaths)
country_merged[which(country_merged$Country %in% country),] %>% select(-Country)
}
#' Find participants with visit
#'
#' Returns a dataset of all participants who have has a given visit conducted
#'
#' @param con a database connection using the DBI package or \code{ASPREEDb()}
#' @param visitTime the visit in question, can be either the numeric code or text label for the visit
#' @param visitTimeColumn the name of the column in the visit table that contains the numeric codes
#' @param tableName the name of the table from which to retrieve the visits
#' @param out_col the name of the column that will contain the TRUE/FALSE values indicating whether the visit was conducted
#' @param complexFilter is an optional function that accepts the current database connection and dataset as parameters to be filtered with additional logic
#' @import dplyr
#' @export
HasVisit <- function(con, visitTime, tableName = "tblVisits", out_col = "visitConducted", keep_cols = c(), complexFilter = NA) {
visits <- con %>% tbl(tableName)
if ("visitTime" %in% colnames(visits)) {
visitTimeColumn <- "visitTime"
} else {
visitTimeColumn <- "VisitTime"
}
visits <- visits %>% left_join(con %>% tbl("tlkpVisitTime"), by = setNames("Code", visitTimeColumn))
if (is.numeric(visitTime)) {
visits <- visits %>% filter(UQ(as.name(visitTimeColumn)) == !!visitTime)
} else {
visits <- visits %>% filter(Description == !!visitTime)
}
if (is.function(complexFilter)) {
visits <- con %>% complexFilter(con, visits %>% collect())
} else if (!is.na(complexFilter)) {
stop(paste0(c("`complexFilter` must be a function or NA, not ", class(complexFilter))))
}
visits <- con %>% FilterRandomisedParticipants(visits %>% data.frame())
visits[,out_col] <- !is.na(visits[,visitTimeColumn])
return(do.call("select", list(.data=visits, c("SubjectID", out_col, keep_cols))))
}
#' Derive sub-study membership
#'
#' @param con a database connection using the DBI package or \code{ASPREEDb()}
#' @param country filter based on country, either "AUS" or "USA" or NA for all
#' @param returnFrequencyTable if TRUE, return a summary of study participation as a frequency table
#' @param sectionPaths a list mapping section names, e.g. SectionA1, to a file path
#' @import dplyr
#' @export
GetSubstudyMembership <- function(con, country = NA, returnFrequencyTable = TRUE, sectionPaths = SDrivePaths()) {
# Biobank membership defined as participants who have consented AND have either a baseline and/or follow-up biobank visit
biobank_consent <- con %>% HasVisit("Biobank - Baseline", tableName = "tblConsent", keep_cols = "Consent")
biobank_consent$Consent[is.na(biobank_consent$Consent)] <- 0
biobank_consent$Consent <- biobank_consent$Consent == 1
biobank_membership <- merge(
con %>% HasVisit("Biobank - Baseline"),
con %>% HasVisit("Biobank - Follow-up"),
by = "SubjectID")
biobank_membership <- merge(biobank_membership, biobank_consent[,c("SubjectID", "Consent")], by = "SubjectID")
biobank_membership$visitConducted <- (biobank_membership$visitConducted.x | biobank_membership$visitConducted.y) & biobank_membership$Consent
biobank_membership <- biobank_membership[,c("SubjectID", "visitConducted")] %>% rename(Biobank = "visitConducted")
# ENVISion membership defined as participants with consent and an Envision baseline visit
envision_consent <- con %>% HasVisit("Envision - Baseline", tableName = "tblConsent", keep_cols = "Consent")
envision_consent$Consent[is.na(envision_consent$Consent)] <- 0
envision_consent$Consent <- envision_consent$Consent == 1
envision_membership <- con %>% HasVisit("Envision - Baseline") %>%
merge(envision_consent[,c("SubjectID", "Consent")], by = "SubjectID") %>%
rename(ENVISion = "visitConducted")
envision_membership$ENVISion <- envision_membership$ENVISion & envision_membership$Consent
envision_membership <- envision_membership %>% select(SubjectID, ENVISion)
# SNORE membership defined as participants with consent and a SNORE baseline visit
snore_consent <- con %>% HasVisit("SNORE - Baseline", tableName = "tblConsent", keep_cols = "Consent") %>%
select(SubjectID, Consent)
snore_consent$Consent[is.na(snore_consent$Consent)] <- 0
snore_consent$Consent <- snore_consent$Consent == 1
snore_membership <- con %>% HasVisit("SNORE - Baseline") %>%
merge(snore_consent, by = "SubjectID") %>%
rename(SNORE = "visitConducted")
snore_membership$SNORE <- snore_membership$SNORE & snore_membership$Consent
snore_membership <- snore_membership %>% select(SubjectID, SNORE)
# SNORE imaging membership defined as participants with "Consent", "imaging" and "imgTakePart" = 1
snore_img_membership <- con %>% HasVisit("SNORE - Baseline", tableName = "tblConsent", keep_cols = c("Consent", "imaging", "imgTakePart"))
snore_img_membership$Consent[is.na(snore_img_membership$Consent)] <- 0
snore_img_membership$imaging[is.na(snore_img_membership$imaging)] <- 0
snore_img_membership$imgTakePart[is.na(snore_img_membership$imgTakePart)] <- 0
snore_img_membership$Consent <- snore_img_membership$Consent == 1
snore_img_membership$imaging <- snore_img_membership$imaging == 1
snore_img_membership$imgTakePart <- snore_img_membership$imgTakePart == 1
snore_img_membership$visitConducted <- snore_img_membership$Consent & snore_img_membership$imaging & snore_img_membership$imgTakePart
snore_img_membership <- snore_img_membership[,c("SubjectID", "visitConducted")] %>% rename(SNORE_img = "visitConducted")
# AMD membership is defined as anyone with retinal photography consent and analysed retcam images
amd_consent <- con %>% GetRetinalPhotographyConsent()
amd_consent$consent[is.na(amd_consent$consent)] <- FALSE
amd_analysis <- con %>% tbl("tblAnalysisRetCam") %>% filter(visitTime %in% c(81, 82, 84, 701)) %>%
select(StudyID, R_F1_Qual_Focus, R_F1_Qual_Field, R_F2_Qual_Focus, R_F2_Qual_Field, L_F1_Qual_Focus, L_F1_Qual_Field, L_F2_Qual_Focus, L_F2_Qual_Field) %>%
rename(SubjectID = "StudyID") %>% data.frame()
amd_analysis <- con %>% FilterRandomisedParticipants(amd_analysis)
amd_analysis$AMD <- apply(amd_analysis[, -c(1)], 1, function(x) {!any(is.na(x))})
amd_membership <- merge(amd_consent, amd_analysis[,c("SubjectID", "AMD")], by="SubjectID")
amd_membership$AMD <- amd_membership$AMD & amd_membership$consent
amd_membership <- amd_membership %>% select(SubjectID, AMD)
# Knee membership defined as anyone with consent who has at least one datapoint in PASE, Pedometer, WOMAC, or knee MRI
knee_consent <- con %>% HasVisit("Knee - Baseline", tableName = "tblConsent", keep_cols = "Consent") %>% select(SubjectID, Consent)
knee_consent$Consent[is.na(knee_consent$Consent)] <- 0
knee_consent$Consent <- knee_consent$Consent == 1
knee_membership <- knee_consent %>%
merge(con %>% HasVisit("Knee - Baseline", tableName = "tblKneePASE", out_col = "pase"), by = "SubjectID") %>%
merge(con %>% HasVisit("Knee - Baseline", tableName = "tblKneePedometer", out_col = "pedometer") %>% distinct(), by = "SubjectID") %>%
merge(con %>% HasVisit("Knee - Baseline", tableName = "tblKneeWOMAC", out_col = "womac"), by = "SubjectID") %>%
merge(con %>% HasVisit("Knee - Baseline", tableName = "tblKneeAnalysisMRI", out_col = "mri"), by = "SubjectID")
knee_membership$Knee <- apply(knee_membership %>% select(-c(SubjectID, Consent)), 1, any)
knee_membership$Knee <- knee_membership$Knee & knee_membership$Consent
knee_membership <- knee_membership %>% select(SubjectID, Knee)
# NEURO membership defined as any participant with consent and a NEURO visit
neuro_consent <- con %>% HasVisit("Neuro - Baseline", tableName = "tblConsent", keep_cols = "Consent") %>% select(SubjectID, Consent)
neuro_consent$Consent[is.na(neuro_consent$Consent)] <- 0
neuro_consent$Consent <- neuro_consent$Consent == 1
neuro_membership <- merge(
neuro_consent, con %>% HasVisit("Neuro - Baseline", out_col = "NEURO"),
by = "SubjectID") %>%
merge(
con %>% HasVisit("Neuro - Baseline", tableName = "tblAnalysisMRI", out_col = "NEURO_MRI"),
by = "SubjectID"
)
neuro_membership$NEURO <- neuro_membership$NEURO & neuro_membership$Consent & neuro_membership$NEURO_MRI
neuro_membership <- neuro_membership %>% select(SubjectID, NEURO)
# Hearing membership is defined as any participant with consent and a hearing visit
hearing_consent <- con %>% HasVisit("Hearing - Baseline", tableName = "tblConsent", keep_cols = "Consent") %>% select(SubjectID, Consent)
hearing_consent$Consent[is.na(hearing_consent$Consent)] <- 0
hearing_consent$Consent <- hearing_consent$Consent == 1
hearing_membership <- merge(
hearing_consent,
con %>% HasVisit("Hearing - Baseline", out_col = "Hearing"),
by = "SubjectID")
hearing_membership$Hearing <- hearing_membership$Hearing & hearing_membership$Consent
hearing_membership <- hearing_membership %>% select(SubjectID, Hearing)
membership <- data.frame(SubjectID = hearing_membership$SubjectID, ASPREE = TRUE) %>%
merge(biobank_membership, by = "SubjectID") %>%
merge(envision_membership, by = "SubjectID") %>%
merge(snore_membership, by = "SubjectID") %>%
merge(snore_img_membership, by = "SubjectID") %>%
merge(amd_membership, by = "SubjectID") %>%
merge(knee_membership, by = "SubjectID") %>%
merge(neuro_membership, by = "SubjectID") %>%
merge(hearing_membership, by = "SubjectID")
if (!is.na(country)) {
membership <- membership %>% FilterByCountry(country, sectionPaths)
}
if (returnFrequencyTable) {
membership <- reshape2::melt(membership, id.vars = "SubjectID", variable.name = "Study")
membership <- membership[membership$value == TRUE,c("SubjectID", "Study")]
membership <- membership %>% left_join(membership, by = "SubjectID") %>%
select(Columns = Study.x, Rows = Study.y) %>%
group_by(Columns, Rows) %>%
summarise(Count = n()) %>%
tidyr::spread(Columns, Count, fill = 0)
}
return(membership %>% as.data.frame())
}
#' Get cognitive data #' Get cognitive data
#' #'
#' Creates a query that retrieves all substudy cognitive data #' Creates a query that retrieves all substudy cognitive data
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/demographics.R
\name{FilterByCountry}
\alias{FilterByCountry}
\title{Filter by country}
\usage{
FilterByCountry(data, country = c("AUS", "USA"),
sectionPaths = SDrivePaths())
}
\description{
Filters a dataset to include only those participants from the specified country
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data_access.R
\name{GetDataBySubjectID}
\alias{GetDataBySubjectID}
\title{Get one or more variables across multiple tables}
\usage{
GetDataBySubjectID(con, tables_and_fields = list(),
include_visit_time = TRUE, normalise_visit_time = TRUE,
randomised_pt_only = TRUE)
}
\arguments{
\item{con}{a database connection using the DBI package or \code{ASPREEDb()}}
\item{tables_and_fields}{a list of tables and columns to retrieve, e.g. \code{list("tblAnnualVisits" = c("Wt", "Ht"))}
omitting the columns will retrieve all columns}
\item{include_visit_time}{TRUE if the tables to be merged include visit time}
\item{normalise_visit_time}{TRUE if the visit times need to be reduced to the overall annual visit number, i.e. integer-division by 10}
\item{randomised_pt_only}{set to TRUE to ensure only randomise participants are returned}
}
\value{
the merged data
}
\description{
Merges data across tables by Subject ID and optionally by visitTime
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/substudy_data.R
\name{GetSubstudyMembership}
\alias{GetSubstudyMembership}
\title{Derive sub-study membership}
\usage{
GetSubstudyMembership(con, country = NA, returnFrequencyTable = TRUE,
sectionPaths = SDrivePaths())
}
\arguments{
\item{con}{a database connection using the DBI package or \code{ASPREEDb()}}
\item{country}{filter based on country, either "AUS" or "USA" or NA for all}
\item{returnFrequencyTable}{if TRUE, return a summary of study participation as a frequency table}
\item{sectionPaths}{a list mapping section names, e.g. SectionA1, to a file path}
}
\description{
Derive sub-study membership
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/substudy_data.R
\name{HasVisit}
\alias{HasVisit}
\title{Find participants with visit}
\usage{
HasVisit(con, visitTime, tableName = "tblVisits",
out_col = "visitConducted", keep_cols = c(), complexFilter = NA)
}
\arguments{
\item{con}{a database connection using the DBI package or \code{ASPREEDb()}}
\item{visitTime}{the visit in question, can be either the numeric code or text label for the visit}
\item{tableName}{the name of the table from which to retrieve the visits}
\item{out_col}{the name of the column that will contain the TRUE/FALSE values indicating whether the visit was conducted}
\item{complexFilter}{is an optional function that accepts the current database connection and dataset as parameters to be filtered with additional logic}
\item{visitTimeColumn}{the name of the column in the visit table that contains the numeric codes}
}
\description{
Returns a dataset of all participants who have has a given visit conducted
}
...@@ -4,8 +4,7 @@ ...@@ -4,8 +4,7 @@
\alias{SDrivePaths} \alias{SDrivePaths}
\title{Get S-drive paths} \title{Get S-drive paths}
\usage{ \usage{
SDrivePaths(ver = 2, prefix = NA)
SDrivePaths(prefix = "S:/MNHS-SPHPM-EPM/ASPREE-Data/ASPREE Longitudinal Data Set/")
} }
\arguments{ \arguments{
\item{prefix}{the base path of the dataset location} \item{prefix}{the base path of the dataset location}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment