#General functions to read out, process and observe time series data
#' FBCanalysis: A package for developing and evaluating biomedical time series
#' data clustering model based on Fluctuation Based Clustering (FBC)
#'
#' This R package aims to offer researchers with fast tools for clustering
#' patient time series data and confirming the distinction using additional
#' metrics such as population parameter enrichment analysis, stability after
#' random data removal, and conventional cluster stability measures.
#'
#' @section FBCanaylsis functions: \link{add_clust2enrich}
#'
#' \link{addclust_2ts}
#'
#' \link{add_enrich}
#'
#' \link{clust_matrix}
#'
#' \link{clValid_flow}
#'
#' \link{emd_heatmap}
#'
#' \link{emd_matrix}
#'
#' \link{enr_obs_clust}
#'
#' \link{addclust_2ts}
#'
#' \link{init_clValid}
#'
#' \link{jaccard_run_cognate}
#'
#' \link{jaccard_run_emd}
#'
#' \link{max_fluc}
#'
#' \link{patient_boxplot}
#'
#' \link{patient_hist}
#'
#' \link{patient_list}
#'
#' \link{patient_ts_plot}
#'
#' \link{rnd_dat_rm}
#'
#' \link{addclust_2ts}
#'
#' \link{sim_jaccard_cognate}
#'
#' \link{sim_jaccard_emd}
#'
#' \link{sim_sample_enr}
#'
#' \link{znorm}
#'
#' @seealso \href{https://github.com/MrMaximumMax/FBCanalysis}{GitHub Repository}
#'
#' @docType package
#' @name FBCanalysis
NULL
#> NULL
#' Process patient time series data by interpolation options and store data in an object of type list
#'
#' @param path Path where csv file(s) are stored (only folder, not specific file(s))
#'
#' @return Object of type list storing patient time series data
#'
#' @import glmnet
#' @import lubridate
#' @import imputeTS
#' @import tibble
#' @import dplyr
#' @import readr
#' @import utils
#'
#' @examples
#' list <- patient_list('.../ts_demofiles1') #files can be pulled from GitHub demo files
#' #(https://github.com/MrMaximumMax/FBCanalysis/tree/master/demo_and_testfiles/ts_demofiles1)
#' #Sampling frequency is supposed to be daily
#'
#' @export
patient_list <- function (path) {
#Prepare the raw data and get all csv. files from the indicated file path
raw.files <- tibble(filename = list.files(path))
#List all the files
raw.file.paths <- raw.files %>%
mutate(filepath = paste0(path,"/", filename))
raw.data <- raw.file.paths %>%
#Combine them all together into one raw data frame
rowwise() %>%
do(., read_csv(file=.$filepath, show_col_types = FALSE))
#Change object type of raw.data to data.frame
raw.data <- as.data.frame(raw.data)
#First terminal output to present the user the available columns
cat("\n\n")
#Show the data frame with the first 5 lines as well as column names
print(head(raw.data[1:5,]))
cat("\n\n")
#Make a small df where each column name has a corresponding number assigned
#of which the user can decide which column number denotes the Patient_ID and
#the time
feedback1 <- colnames(raw.data)
feedback2 <- 1:length(feedback1)
feedback <- matrix(nrow = 2, ncol = length(feedback1))
feedback[1,] <- feedback1
feedback[2,] <- feedback2
feedback <- as.data.frame(feedback)
print(feedback, row.names=F, col.names=F, quote=F)
cat("\n")
#Ask which of the assigned column names/numbers denotes the Patient_ID
question1 <- as.numeric(readline("Which column represents the Patient_ID (please indicate number)?: "))
#Change the assigned column name for the indicated number to "Patient_ID"
#for data standardization for later processing
names(raw.data)[names(raw.data) == feedback[1,question1]] <- 'Patient_ID'
#Ask which of the assigned column names/numbers denotes the Time
question2 <- as.numeric(readline("Which column represents the time (please indicate number)?: "))
#Change the assigned column name for the indicated number to "Time"
#for data standardization for later processing
names(raw.data)[names(raw.data) == feedback[1,question2]] <- 'Time'
#Present the user the possible lubridate format option to standardize time formats
cat("\n\n", "Day:Month:Year", "\t", "(1)", "\n", "Month:Day:Year", "\t", "(2)", "\n",
"Year:Month:Day", "\t", "(3)", "\n", "Year:Day:Month", "\t", "(4)", "\n",
"Day:Month:Year:Hour:Min", "(5)", "\n", "Month:Day:Year:Hour:Min", "(6)", "\n",
"Year:Month:Day:Hour:Min", "(7)", "\n", "Year:Day:Month:Hour:Min", "(8)", "\n\n")
#Store indicated format with a number and later apply lubridate function to
#format and sort the time of each data distribution by parameter
question3 <- readline("What's the time format (please indicate number)?: ")
#Present the user the possible sample frequency option on time series data
cat("\n\n", "Twice daily", "\t", "(1)", "\n", "Daily", "\t\t", "(2)", "\n",
"Twice a week", "\t", "(3)", "\n", "Weekly", "\t", "(4)", "\n",
"Twice a month", "\t", "(5)", "\n", "Monthly", "\t", "(6)", "\n",
"Twice a quarter", "(7)", "\n", "Quarterly", "\t", "(8)", "\n\n")
#Store indicated format with a number and later apply a check to recognize leaky
#time series and eventually fill missing times up with NA
question4 <- readline("What's the sampling frequency (please indicate number)?: ")
#Present the user the possible options fill up missing data and let him decide
cat("\n\n", "Sample missing values from top quantile", "\t", "(1)", "\n",
"Sample missing values with bottom quantile", "\t", "(2)", "\n\n",
"Interpolate missing values and apply L1 Regularization / Lasso Regression (3)", "\n",
"Interpolate missing values and apply L2 Regulariztation / Ridge Regression (4)","\n",
"Interpolate missing values and apply Elastic Net on Regression and Regularization (5)", "\n\n",
"Interpolate missing values by Linear Spline (6)", "\n",
"Interpolate missing values by Cubic C2 Spline (7)", "\n\n")
#Store the option that the user has chosen
usecase <- readline("Select measure for filling up NA values: ")
#Extract the individual Patient_IDs from raw.data frame to the filter the
#raw.data by Patient_ID and process the time series distributions according
#to the previously specified criteria
patnames <- names(table(raw.data[,"Patient_ID"]))
#Preparation to for-loop to go and filter through each Patient_ID
n <- length(patnames)
#Create an empty list where each Patient_ID specific processed time series
#data will be stored
datalist <- list()
#In case the user has chosen Interpolation/Regularizatio by Elastic Net
if (usecase == 5) {
cat("\n\n")
#Let user decide which Alpha value to use for Elastic Net
Alpha_for_elastic_net <<- as.numeric(readline("Please indicate your alpha value for elastic net (between 0 and 1): "))
#Remark: The Alpha_for_elastic_net will be added to the environment by now
#because after a few Iterations, there have been issues with glmnet which could
#not find the assigned alpha value anymore when alpha is not added to the
#environment; debugging could not resolve this so far
}
#Initialize a progress bar
pb <- txtProgressBar(min = 0, max = n, style = 3)
#Go through each Patient_ID from the raw.data df
for (i in 1:n) {
#Make the filter from the Patient_ID's vector for current Patient_ID
filter <- patnames[i]
#Filter for current Patient_ID
patdat <- filter(raw.data, Patient_ID %in% filter)
#In case user has chosen dmy time format
if (question3 == 1) {
patdat$Time <- dmy(patdat$Time)
}
#In case user has chosen mdy time format
if (question3 == 2) {
patdat$Time <- mdy(patdat$Time)
}
#In case user has chosen ymd time format
if (question3 == 3) {
patdat$Time <- ymd(patdat$Time)
}
#In case user has chosen ydm time format
if (question3 == 4) {
patdat$Time <- ydm(patdat$Time)
}
#In case user has chosen dmy_hm time format
if (question3 == 5) {
patdat$Time <- dmy_hm(patdat$Time)
}
#In case user has chosen mdy_hm time format
if (question3 == 6) {
patdat$Time <- mdy_hm(patdat$Time)
}
#In case user has chosen ymd_hm time format
if (question3 == 7) {
patdat$Time <- ymd_hm(patdat$Time)
}
#In case user has chosen ydm_hm time format
if (question3 == 8) {
patdat$Time <- ydm_hm(patdat$Time)
}
#Now order by time after the time column has been formated
patdat <- arrange(patdat, Time)
#Find the first date of the current time series
firstdate <- patdat[1,"Time"]
#Find the last date of the current time series
lastdate <- patdat[max(nrow(patdat)),"Time"]
#Match first and last date of time series with twice daily sampling frequency
if (question4 == 1) {
#Create a first time sequence
newdates <- data.frame(Time = seq(firstdate, lastdate, by = "day"))
#Add hald a day to first and last date (so twice daily is resulting)
firstdate_2 <- firstdate + 0.5
lastdate_2 <- lastdate + 0.5
#Create a second time sequence
newdates_2 <- data.frame(Time = seq(firstdate_2, lastdate_2, by = "day"))
#Combine both sequences
newdates <- full_join(newdates,newdates_2)
}
if (question4 == 2) {
#Make a daily sequence out of first and last date
newdates <- data.frame(Time = seq(firstdate, lastdate, by = "day"))
}
if (question4 == 3) {
#Make a first weekly sequence out of first and last date
newdates <- data.frame(Time = seq(firstdate, lastdate, by = "week"))
#Now add 3 days (approx. 1/2 week) to first and last date for biweekly sequence
firstdate_2 <- firstdate + 3
lastdate_2 <- lastdate + 3
#Make a second weekly sequence out of the new dates
newdates_2 <- data.frame(Time = seq(firstdate_2, lastdate_2, by = "week"))
#Combine both sequences to a complete biweekly sequence
newdates <- full_join(newdates,newdates_2)
}
if (question4 == 4) {
#Make a weekly sequence out of first and last date
newdates <- data.frame(Time = seq(firstdate, lastdate, by = "week"))
}
if (question4 == 5) {
#Make a first monthly sequence out of first and last date
newdates <- data.frame(Time = seq(firstdate, lastdate, by = "month"))
#Add 15 days (1/2 month) to first and last date
firstdate_2 <- firstdate + 15
lastdate_2 <- lastdate + 15
#Make a second monthly sequence out of these new dates
newdates_2 <- data.frame(Time = seq(firstdate_2, lastdate_2, by = "month"))
#Combine both sequences to a twice monthly sequence
newdates <- full_join(newdates,newdates_2)
}
if (question4 == 6) {
#Make a monthly sequence out of first and last date
newdates <- data.frame(Time = seq(firstdate, lastdate, by = "month"))
}
if (question4 == 7) {
#Make a quarterly sequence out of first and last date
newdates <- data.frame(Time = seq(firstdate, lastdate, by = "quarter"))
#Add 45 days (1/2 quarter) to first and last date for second sequence
firstdate_2 <- firstdate + 45
lastdate_2 <- lastdate + 45
#Make a second sequence out of these new date
newdates_2 <- data.frame(Time = seq(firstdate_2, lastdate_2, by = "quarter"))
#Combine both sequences to a twice a quarter sequence
newdates <- full_join(newdates,newdates_2)
}
if (question4 == 8) {
#Make a quarterly seaquence out of first and last date
newdates <- data.frame(Time = seq(firstdate, lastdate, by = "quarter"))
}
#Join the current patient data by newdates and Time to check if time series is
#incomplete; in case the time series is incomplete, a new line with this
#time will be added and all the other cells are filled up with NAs that can
#be filled up/interpolated later
patdat <- full_join(patdat,newdates, by = "Time")
#Filter current patient data by complete cases (lines with no NAs) and make a new column
#to note with FALSE that no interpolation took place
patdat1 <- patdat[complete.cases(patdat),]
patdat1[,"Interpolated"] <- FALSE
#Filter current patient data by incomplete cases (lines with NAs) and make a new column
#to note with TRUE that an interpolation took place
patdat2 <- patdat[!complete.cases(patdat),]
patdat2[,"Interpolated"] <- TRUE
#Merge both again and order again by time
patdat <- rbind(patdat1,patdat2)[order(patdat$Time),]
#Extract the time series data parameters from the current patient data but
#exclude Patient_ID-, Time, Interpolation-column as they represent no time
#series dependent data
parameters <- (names(patdat))
parameters <- gsub("Patient_ID",NA,parameters)
parameters <- gsub("Time",NA,parameters)
parameters <- gsub("Interpolated",NA,parameters)
parameters <- na.omit(parameters)
#In case the user opted to sample by highest quantile of data distribution
if (usecase == 1) {
#Go through each parameter
for (j in 1:length(parameters)) {
#Filter data for current parameter in complete data
lookup <- patdat1[,parameters[j]]
#Extract the top quantile of the data
top_quantile <- quantile(lookup)[3]
lookup <- subset(lookup, lookup > top_quantile)
#Go through each NA entry
for (k in 1:nrow(patdat2)) {
#Randomly samople from top quantile
patdat2[k,parameters[j]] <- sample(lookup, 1)
}
}
#Combine both complete and incomplete data again
patdat <- rbind(patdat2,patdat1)
#Sort patient data by time and add to the list as an entry wit list item
#name is Patient_ID
datalist[[filter]] <- patdat[order(patdat$Time),]
#Update the progress bar once a patient distribution is processed
}
#Analogous approach to usecase == 1 but with bottom quantile
if (usecase == 2) {
for (j in 1:length(parameters)) {
lookup <- patdat1[,parameters[j]]
#Take here bottom quantile instead
bottom_quantile <- quantile(lookup)[2]
#Here smaller than...
lookup <- subset(lookup, lookup < bottom_quantile)
for (k in 1:nrow(patdat2)) {
patdat2[k,parameters[j]] <- sample(lookup, 1)
}
}
patdat <- rbind(patdat2,patdat1)
datalist[[filter]] <- patdat[order(patdat$Time),]
}
#In case the user indicated Polynomial regression/L1 regularization
#Remark here (as in 309); Usecase 3,4,5 were created in an isolated manner
#even though that is not the most elegant solution; Here likewise, there has
#been the issue that after a few for-loop-runs where glmnet could not recognize
#the defined alpha value that has been defined in an object of type numeric
#before; These issues did not occur when alpha was indicated as a fixed number
#within the glmnet(...) function
if (usecase == 3) {
#Order the current patient data by time
patdat <- patdat[order(patdat$Time),]
#Add a new line which indicates that the time is now an equally spaced
#sequence to train the models via glmnet
patdat[,"Seq"] <- 1:nrow(patdat)
#Filter for existing data = Training data
patdat1 <- filter(patdat, Interpolated == FALSE)
#Polynomial degress to try out are number of data points - 1
pol_degrees <- length(complete.cases(patdat))-1
#Loop through each parameters individually and find out best polynomial
#degree and lambda for regularized model
for (j in 1:length(parameters)) {
#Define the current parameter
current_par <- parameters[j]
#Make a matrix where the line number denotes the polynomial degree and
#the first column stores the model's lambda.min and the second column
#the correspoinding Mean Squared Erro
regularization_mat <- matrix(0, nrow = pol_degrees, ncol = 2)
#Make a glmnet regularized model for each possible polynomial degree
for (k in 1:pol_degrees) {
#Define current polynomial degree
PolynomialDegree <- k
#Quiet warnings here: There will always be NA entries added automatically
#in data matrix which would lead to many warning messages when looping
#for each possible polynomial degree
options(warn=-1)
X <- matrix(patdat1[,"Seq"],ncol = 1);
for (l in (c(2:PolynomialDegree))){
X <- cbind(X,patdat1[,"Seq"]*X[,(l-1)])
}
#Disable warnings
options(warn=0)
#Define max of iterations for cross validation
MaxIt<-1e+07;
#Make glmnet for regularized model and current polynomial degree
Y <- glmnet(X, patdat1[,current_par],family = "gaussian",alpha = 1, standardize = TRUE,intercept = TRUE, maxit = MaxIt)
#Apply corss validation to determine best fitting lamda for min. MSE
cvfit <- cv.glmnet(X,patdat1[,current_par],family = "gaussian",alpha = 1, standardize = TRUE, type.measure = "mse", nfolds = 10);#nfolds = n corresponds to leave-one-out cross validation. A popular value is 10.
#Add both lamda.min and correspionding MSE to matrix
regularization_mat[k,1] <- cvfit$lambda.min
regularization_mat[k,2] <- min(cvfit$cvm)
}
#Search lowest MSE from matrix
best_pol_deg <- which.min(regularization_mat[,2])
#As in line 513 quiet the warning messages
options(warn=-1)
X <- matrix(patdat1[,"Seq"],ncol = 1);
for (m in (c(2:best_pol_deg))){
X <- cbind(X,patdat1[,"Seq"]*X[,(m-1)])
}
#Do not quiet the warning messages anymore
options(warn=0)
#Apply glmnet to the best fitting polynomial degree
Y <- glmnet(X,patdat1[,current_par],family = "gaussian",alpha = 1, standardize = TRUE,intercept = TRUE, maxit = MaxIt)
#Get the coefficients from this model with lamda.min
C <- coef(Y, s = cvfit$lambda.min,exact = TRUE,x=X, y=patdat1[,current_par], maxit = MaxIt)
CC <- matrix(data = c(C[2:(best_pol_deg+1),1]),ncol = 1)
YY <- (X * as.vector(CC)) + C[1,1]
#find the indexes within the data distribution where NA values are found
na_in_dat <- sum(is.na(patdat[,current_par]))
na_index <- which(is.na(patdat[,current_par]))
#Similar as in line 513 and 535
options(warn=-1)
for (o in 1:na_in_dat) {
#Go to current NA values and let predict the corresponding value at
#this position
seq_miss = na_index[o];
tNA <- matrix(seq_miss, ncol = 1)
for (p in (c(2:best_pol_deg))){
tNA<-cbind(tNA,seq_miss*tNA[,(p-1)])
}
patdat[,current_par][seq_miss] <- predict(Y, newx = tNA, exact = TRUE, s = cvfit$lambda.min, x=X, y=patdat1[,current_par], maxit = MaxIt)
}
#Quiet warning again
options(warn=-1)
#Remove the sequence column (not relevant for later analyses anymore)
patdat[,"Seq"] <- NULL
#Add the currently processed patient data to the list
datalist[[filter]] <- patdat
#Update the progress bar
}
}
#In case the user indicated Polynomial regression/L1 regularization
#Similar remarks as in line 309 and 484; The only difference here is that
#now glmnet uses a value of 1; Besides this, the approach is similar
if (usecase == 4) {
patdat <- patdat[order(patdat$Time),]
patdat[,"Seq"] <- 1:nrow(patdat)
patdat1 <- filter(patdat, Interpolated == FALSE)
pol_degrees <- length(complete.cases(patdat))-1
for (j in 1:length(parameters)) {
current_par <- parameters[j]
regularization_mat <- matrix(0, nrow = pol_degrees, ncol = 2)
for (k in 1:pol_degrees) {
PolynomialDegree <- k
options(warn=-1)
X <- matrix(patdat1[,"Seq"],ncol = 1);
for (l in (c(2:PolynomialDegree))){
X <- cbind(X,patdat1[,"Seq"]*X[,(l-1)])
}
options(warn=0)
MaxIt<-1e+07;
Y <- glmnet(X, patdat1[,current_par],family = "gaussian",alpha = 0, standardize = TRUE,intercept = TRUE, maxit = MaxIt)
cvfit <- cv.glmnet(X,patdat1[,current_par],family = "gaussian",alpha = 0, standardize = TRUE, type.measure = "mse", nfolds = 10);#nfolds = n corresponds to leave-one-out cross validation. A popular value is 10.
regularization_mat[k,1] <- cvfit$lambda.min
regularization_mat[k,2] <- min(cvfit$cvm)
}
best_pol_deg <- which.min(regularization_mat[,2])
options(warn=-1)
X <- matrix(patdat1[,"Seq"],ncol = 1);
for (m in (c(2:best_pol_deg))){
X <- cbind(X,patdat1[,"Seq"]*X[,(m-1)])
}
options(warn=0)
Y <- glmnet(X,patdat1[,current_par],family = "gaussian",alpha = 0, standardize = TRUE,intercept = TRUE, maxit = MaxIt)
C <- coef(Y, s = cvfit$lambda.min,exact = TRUE,x=X, y=patdat1[,current_par], maxit = MaxIt)
CC <- matrix(data = c(C[2:(best_pol_deg+1),1]),ncol = 1)
YY <- (X * as.vector(CC)) + C[1,1]
na_in_dat <- sum(is.na(patdat[,current_par]))
na_index <- which(is.na(patdat[,current_par]))
options(warn=-1)
for (o in 1:na_in_dat) {
seq_miss = na_index[o];
tNA <- matrix(seq_miss, ncol = 1)
for (p in (c(2:best_pol_deg))){
tNA<-cbind(tNA,seq_miss*tNA[,(p-1)])
}
patdat[,current_par][seq_miss] <- predict(Y, newx = tNA, exact = TRUE, s = cvfit$lambda.min, x=X, y=patdat1[,current_par], maxit = MaxIt)
}
options(warn=-1)
patdat[,"Seq"] <- NULL
datalist[[filter]] <- patdat
}
}
#In case the user indicated Polynomial regression/L1 regularization
#Similar remarks as in line 309 and 484; The only difference here is that
#now glmnet uses the indicated alpha for elastic net that has been added to
#the environment (see line 309); Besides this, the approach is similar
if (usecase == 5) {
patdat <- patdat[order(patdat$Time),]
patdat[,"Seq"] <- 1:nrow(patdat)
patdat1 <- filter(patdat, Interpolated == FALSE)
pol_degrees <- length(complete.cases(patdat))-1
for (j in 1:length(parameters)) {
current_par <- parameters[j]
regularization_mat <- matrix(0, nrow = pol_degrees, ncol = 2)
for (k in 1:pol_degrees) {
PolynomialDegree <- k
options(warn=-1)
X <- matrix(patdat1[,"Seq"],ncol = 1);
for (l in (c(2:PolynomialDegree))){
X <- cbind(X,patdat1[,"Seq"]*X[,(l-1)])
}
options(warn=0)
MaxIt<-1e+07;
Y <- glmnet(X, patdat1[,current_par],family = "gaussian",alpha = Alpha_for_elastic_net, standardize = TRUE,intercept = TRUE, maxit = MaxIt)
cvfit <- cv.glmnet(X,patdat1[,current_par],family = "gaussian",alpha = Alpha_for_elastic_net, standardize = TRUE, type.measure = "mse", nfolds = 10);#nfolds = n corresponds to leave-one-out cross validation. A "popular" value is 10.
regularization_mat[k,1] <- cvfit$lambda.min
regularization_mat[k,2] <- min(cvfit$cvm)
}
best_pol_deg <- which.min(regularization_mat[,2])
options(warn=-1)
X <- matrix(patdat1[,"Seq"],ncol = 1);
for (m in (c(2:best_pol_deg))){
X <- cbind(X,patdat1[,"Seq"]*X[,(m-1)])
}
options(warn=0)
Y <- glmnet(X,patdat1[,current_par],family = "gaussian",alpha = Alpha_for_elastic_net, standardize = TRUE,intercept = TRUE, maxit = MaxIt)
C <- coef(Y, s = cvfit$lambda.min,exact = TRUE,x=X, y=patdat1[,current_par], maxit = MaxIt)
CC <- matrix(data = c(C[2:(best_pol_deg+1),1]),ncol = 1)
YY <- (X * as.vector(CC)) + C[1,1]
na_in_dat <- sum(is.na(patdat[,current_par]))
na_index <- which(is.na(patdat[,current_par]))
options(warn=-1)
for (o in 1:na_in_dat) {
seq_miss = na_index[o];
tNA <- matrix(seq_miss, ncol = 1)
for (p in (c(2:best_pol_deg))){
tNA<-cbind(tNA,seq_miss*tNA[,(p-1)])
}
patdat[,current_par][seq_miss] <- predict(Y, newx = tNA, exact = TRUE, s = cvfit$lambda.min, x=X, y=patdat1[,current_par], maxit = MaxIt)
}
options(warn=-1)
patdat[,"Seq"] <- NULL
datalist[[filter]] <- patdat
setTxtProgressBar(pb, i)
}
}
#In case the user opted linear interpolation
if (usecase == 6) {
#Order current patient data by time
patdat <- patdat[order(patdat$Time),]
#Perform linear interpolation on NA values
patdat <- na_interpolation(patdat, option = "linear")
#Add to list
datalist[[filter]] <- patdat
}
#In case the user opted cubic spline interpolation
if (usecase == 7) {
#Order current patient data by time
patdat <- patdat[order(patdat$Time),]
#Perform cubic c spline interpolation on NA values
patdat <- na_interpolation(patdat, option = "spline")
#Add to list
datalist[[filter]] <- patdat
}
setTxtProgressBar(pb, i)
}
datalist
}
#' Visualize patient time series data in a time series plot for indicated parameter
#'
#' @param plist List storing patient time series data (also see function: \link{patient_list})
#' @param Patient_ID Patient_ID referring to a list element (also see function: \link{patient_list})
#' @param parameter Parameter of interest in list element
#' @param normalized TRUE/FALSE if z-normalized (TRUE by default)
#'
#' @return Visualized patient time series data in a time series plot for indicated parameter
#'
#' @import graphics
#'
#' @examples
#' list <- patient_list('.../ts_demofiles1') #Just folder; files can be pulled from GitHub demo files
#' #(https://github.com/MrMaximumMax/FBCanalysis/tree/master/demo_and_testfiles/ts_demofiles1)
#' #Sampling frequency is supposed to be daily
#' patient_ts_plot(list,"testpat_1","PEF")
#'
#' @export
patient_ts_plot <- function(plist, Patient_ID, parameter, normalized) {
#Take specific data frame out of list according to specified Patient_ID
patient_df <- as.data.frame(plist[[Patient_ID]])
#In case "normalized" is not specified or "normalized = TRUE"
if (missing(normalized) || normalized == TRUE) {
#Plot time on x-axis and z-normalize specified parameter from data frame on y-axis
graphics::plot(patient_df$Time,znorm(patient_df[, which(names(patient_df) %in% parameter)]),
xlab = "Time", ylab = parameter, main = Patient_ID, col = "blue")
#In case "normalized = FALSE"
} else {
#Plot time on x-axis and non-normalized specified parameter from data frame on y-axis
plot(patient_df$Time,patient_df[, which(names(patient_df) %in% parameter)],
xlab = "Time", ylab = parameter, main = Patient_ID, col = "blue")
}
}
#' Visualize patient(s) time series data in a boxplot for indicated parameter
#'
#' @param plist List storing patient time series data (also see function: \link{patient_list})
#' @param patients Patient_ID(s) referring to (a) list element; can be single ID or multiple IDs (also see function: \link{patient_list(path)})
#' @param parameter Parameter of interest in list element(s)
#' @param normalized TRUE/FALSE if z-normalized (TRUE by default)
#'
#' @return Visualized patient(s) time series data in a boxplot for indicated parameter
#'
#' @import dplyr
#' @import graphics
#'
#' @examples
#' list <- patient_list('.../ts_demofiles1') #Just folder; files can be pulled from GitHub demo files
#' #(https://github.com/MrMaximumMax/FBCanalysis/tree/master/demo_and_testfiles/ts_demofiles1)
#' #Sampling frequency is supposed to be daily
#' patient_boxplot(list,c("ID_2","testpat_1","testpat_2","a301"), "FEV1")
#'
#' @export
patient_boxplot <- function(plist, patients, parameter, normalized) {
#In case boxplot for only one patient
if (length(patients) == 1) {
#Take list entry from specified patient and transform to data frame
patient_df <- plist[[patients]]
#Filter patient data for specified parameter
dat <- patient_df[, which(names(patient_df) %in% parameter)]
#In case "normalized" is not specified or "normalized = TRUE"
if (missing(normalized) || normalized == TRUE) {
#Use written function to z-normalize data
dat <- znorm(dat)
}
#Apply boxplot on z-normalized data
boxplot(dat, main = patients,ylab = parameter)
#In case boxplot for more than one patient
} else {
#Transform whole list to one data frame
patient_df <- do.call("rbind", plist)
#Filter data frame for specified Patient_ID's
dat <- patient_df %>% filter(Patient_ID %in% patients)
#In case "normalized" not specified or "normalized = TRUE"
if (missing(normalized) || normalized == TRUE) {
#Filter data frame for specified parameter and z-normalize
#Apply boxplot of z-normalized data against Patient_ID's
boxplot(znorm(dat[, which(names(dat) %in% parameter)]) ~ dat$Patient_ID,
main = "Combined boxplots", ylab = parameter, xlab = NULL)
#In case "normalized = FALSE"
} else {
#Apply boxplot of non-normalized data against Patient_ID's
boxplot(dat[, which(names(dat) %in% parameter)] ~ dat$Patient_ID,
main = "Combined boxplots", ylab = parameter, xlab = NULL)
}
}
}
#' Visualize patient time series data in a histogram for indicated parameter
#'
#' @param plist List storing patient time series data (also see function: \link{patient_list})
#' @param Patient_ID Patient_ID referring to a list element (also see function: \link{patient_list})
#' @param parameter Parameter of interest in list element
#' @param normalized TRUE/FALSE if z-normalized (TRUE by default)
#'
#' @return Visualized patient time series data in a histogram for indicated parameter
#'
#' @import graphics
#'
#' @examples
#' list <- patient_list('.../ts_demofiles1') #Just folder; files can be pulled from GitHub demo files
#' #(https://github.com/MrMaximumMax/FBCanalysis/tree/master/demo_and_testfiles/ts_demofiles1)
#' #Sampling frequency is supposed to be daily
#' patient_hist(list,"testpat_1","PEF")
#'
#' @export
patient_hist <- function(plist, Patient_ID, parameter, normalized) {
#Take data frame out of list for specified Patient_ID
df <- plist[[Patient_ID]]
#Filter data for specified parameter
dat <- df[,parameter]
#In case "normalized" not specified or "normalized = TRUE"
if (missing(normalized) || normalized == TRUE) {
#z-normalize the data for specified parameter
dat <- znorm(dat)
}
#Apply histogram on data
hist(dat, xlab = parameter, main = Patient_ID)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.