R/formats_gtexClinical.R

Defines functions gtexClinicalInfoFormat

gtexClinicalInfoFormat <- function() {
    list(
        tablename   = "Clinical data",
        filename    = "GTEx_Data_V6_Annotations_SubjectPhenotypesDS.txt",
        description = "Clinical data of GTEx subjects",
        dataType    = "Clinical data",
        
        # Transpose data before parsing? If so, a row in the transposed dataset
        # would be a column in the original
        skip        = 1,     # Rows to skip when parsing file (include header)
        transpose   = FALSE,
        
        # Format checker information
        rowCheck    = TRUE, # Check a row (TRUE) or a column (FALSE)
        checkIndex  = 1,    # Index of row/column to check the format
        
        # File string to check
        check = c("SUBJID", "GENDER", "AGE", "DTHHRDY"),
        
        # Parsing information
        delim       = "\t", # Delimiter used to separate fields
        colNames    = 1,    # Row to use for column names
        rowNames    = 1,    # Column to use for row names
        ignoreCols  = 1,    # Columns to ignore
        ignoreRows  = 1,    # Rows to ignore
        commentChar = NULL, # Ignore lines starting with this string
        
        # Remove duplicated rows
        unique = FALSE,
        
        # Identity of rows and columns
        rows    = "subjects",
        columns = "attributes",
        
        # Default columns to show (NULL to show all)
        show = NULL,
        
        process = function(data) {
            # Replace gender values with their meaning
            gender <- c("1"="Male", "2"="Female")
            value <- as.character(data[ , "GENDER"])
            data[ , "GENDER"] <- as.factor(gender[value])
            
            # Replace death circumstance (4-point hardy scale) values with their
            # meaning
            dthhrdy <- c("0"="Ventilator Case", "1"="Violent and fast death",
                         "2"="Fast death of natural causes", 
                         "3"="Intermediate death", "4"="Slow death")
            value <- as.character(data[ , "DTHHRDY"])
            data[ , "DTHHRDY"] <- as.factor(dthhrdy[value])
            
            # Correctly name columns
            match <- c("SUBJID"="Subject ID", "GENDER"="Gender", "AGE"="Age",
                       "DTHHRDY"="Death Circumstances")
            colnames(data) <- match[colnames(data)]
            
            return(data)
        }
    )
}

attr(gtexClinicalInfoFormat, "loader") <- "formats"
nuno-agostinho/psichomics documentation built on Jan. 2, 2025, 4:10 a.m.