Commit 9fda02b0 authored by Jason Rigby's avatar Jason Rigby
Browse files

Merge branch 'fetch-merged-data' into 'master'

Fetch merged data

See merge request aspree/aspree-r-package!29
parents a084fc5a e1bed4cc
...@@ -16,6 +16,7 @@ export(FilterRandomisedParticipants) ...@@ -16,6 +16,7 @@ export(FilterRandomisedParticipants)
export(GenerateLinkageKey) export(GenerateLinkageKey)
export(GenerateOneTimeLinkageKey) export(GenerateOneTimeLinkageKey)
export(GetAllTables) export(GetAllTables)
export(GetDataBySubjectID)
export(GetENVISionConsent) export(GetENVISionConsent)
export(GetEndpoints) export(GetEndpoints)
export(GetRETCAMConsent) export(GetRETCAMConsent)
......
...@@ -122,3 +122,91 @@ GetAllTables <- function(con) { ...@@ -122,3 +122,91 @@ GetAllTables <- function(con) {
rownames(tables) <- NULL rownames(tables) <- NULL
return(tables) return(tables)
} }
#' Get one or more variables across multiple tables
#'
#' Merges data across tables by Subject ID and optionally by visitTime
#'
#' @param con a database connection using the DBI package or \code{ASPREEDb()}
#' @param tables_and_fields a list of tables and columns to retrieve, e.g. \code{list("tblAnnualVisits" = c("Wt", "Ht"))}
#' omitting the columns will retrieve all columns
#' @param include_visit_time TRUE if the tables to be merged include visit time
#' @param normalise_visit_time TRUE if the visit times need to be reduced to the overall annual visit number, i.e. integer-division by 10
#' @param randomised_pt_only set to TRUE to ensure only randomise participants are returned
#' @return the merged data
#' @import dplyr
#' @export
GetDataBySubjectID <- function(con, tables_and_fields = list(), include_visit_time = TRUE, normalise_visit_time = TRUE, randomised_pt_only = TRUE) {
data <- NULL
# Iterate over each table
for (i in seq_along(names(tables_and_fields))) {
# Get the current table
table <- names(tables_and_fields)[i]
if (nchar(table) == 0) {
table <- tables_and_fields[[i]]
}
d <- con %>% tbl(table)
# Set up the join depending on whether visitTime is to be included
if (include_visit_time) {
if (normalise_visit_time) {
join_by <- c("SubjectID", "AV")
} else {
join_by <- c("SubjectID", "visitTime")
}
if ("visitTime" %in% (d %>% colnames())) {
vt <- "visitTime"
} else {
vt <- "VisitTime"
}
cols <- c("SubjectID", vt, tables_and_fields[[table]])
} else {
join_by <- "SubjectID"
cols <- c("SubjectID", tables_and_fields[[table]])
}
# Get subset of columns, or all columns if none provided
if (!is.null(tables_and_fields[[table]])) {
d <- d %>% select(.dots = cols)
d <- as.data.frame(d)
names(d) <- cols
} else {
d <- as.data.frame(d)
}
# Normalise visit times if requested
if (include_visit_time) {
if (normalise_visit_time) {
d <- d %>% mutate(visitTime = visitTime %/% 10) %>% rename(AV = vt)
} else {
d <- d %>% dplyr::rename(visitTime = vt)
}
}
# Merge in table to overall result
if (is.null(data)) {
data <- d
} else {
data <- data %>% dplyr::full_join(d, by = join_by)
}
}
# Merge in the visit descriptions if visit times are not normalised
if (include_visit_time & !normalise_visit_time) {
data <- data %>% left_join(
con %>% tbl("tlkpVisitTime") %>%
select(Code, Description) %>%
collect(), by = c("visitTime" = "Code")
) %>%
rename(visitName = "Description")
}
# Filter for randomised participants only if requested
if (randomised_pt_only) {
data <- con %>% FilterRandomisedParticipants(data)
}
return(data)
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data_access.R
\name{GetDataBySubjectID}
\alias{GetDataBySubjectID}
\title{Get one or more variables across multiple tables}
\usage{
GetDataBySubjectID(con, tables_and_fields = list(),
include_visit_time = TRUE, normalise_visit_time = TRUE,
randomised_pt_only = TRUE)
}
\arguments{
\item{con}{a database connection using the DBI package or \code{ASPREEDb()}}
\item{tables_and_fields}{a list of tables and columns to retrieve, e.g. \code{list("tblAnnualVisits" = c("Wt", "Ht"))}
omitting the columns will retrieve all columns}
\item{include_visit_time}{TRUE if the tables to be merged include visit time}
\item{normalise_visit_time}{TRUE if the visit times need to be reduced to the overall annual visit number, i.e. integer-division by 10}
\item{randomised_pt_only}{set to TRUE to ensure only randomise participants are returned}
}
\value{
the merged data
}
\description{
Merges data across tables by Subject ID and optionally by visitTime
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment