curatedTBData: Curation of existing tuberculosis transcriptomic studies

if (!require("magrittr", character.only = TRUE)) {
  BiocManager::install("magrittr")
  require("magrittr", character.only = TRUE)
}
source("data-raw/UtilityFunctionForCuration.R")

#### Read in raw data ####
geo <- "GSE152218"
sequencePlatform <- "GPL16791"
gse <- GEOquery::getGEO(geo, GSEMatrix = FALSE)
urls <- GEOquery::getGEOSuppFiles(geo, fetch_files = FALSE)
temp <- tempfile()
tempd <- tempdir()
utils::download.file(urls$url, temp)
data_counts <- read.delim(temp)
colnames(data_counts)[1] <- "ID_REF"
colnames(data_counts) <- gsub("X", "", colnames(data_counts))
data_counts1 <- data_counts |> 
    tibble::column_to_rownames("ID_REF")
title_to_name <- lapply(1:length(GEOquery::GSMList(gse)), function(x) {
    gsm_info <- GEOquery::GSMList(gse)[[x]]@header
    title1 <- gsm_info$title
    data.frame(title = gsub("-", ".", title1), gsm_name = gsm_info$geo_accession)
}) |> 
    dplyr::bind_rows()
index_col <- match(title_to_name$title, colnames(data_counts1))
data_counts1 <- data_counts1[, index_col]
colnames(data_counts1) <- title_to_name$gsm_name

#### Create Column Data ####
characteristic_data_frame <- readRawColData2(gse)

colnames(characteristic_data_frame) <- c("Age", "TBStatus", "Gender", "BMI")
characteristic_data_frame$Tissue <- "Whole Blood"
characteristic_data_frame$GeographicalRegion <- "India"
characteristic_data_frame$Age <- as.numeric(characteristic_data_frame$Age)
characteristic_data_frame$HIVStatus <- "Negative"
characteristic_data_frame$BMI <- as.numeric(characteristic_data_frame$BMI)
TBStatus <- ifelse(characteristic_data_frame$TBStatus == "LTBI", "LTBI", "PTB")
characteristic_data_frame$TBStatus <- TBStatus
Gender <- ifelse(characteristic_data_frame$Gender == "male", "Male", "Female")
characteristic_data_frame$Gender <- Gender
col_info <- create_standard_coldata(characteristic_data_frame)
new_col_info <- S4Vectors::DataFrame(col_info)

###### Create raw data: convert ensembl to gene symbol #####
# install.packages("devtools")
# devtools::install_github("stephenturner/annotables")
library(annotables)
data_counts1_new <- data_counts1 |> 
    tibble::rownames_to_column("ENSEMBL") |> 
    dplyr::inner_join(grch38, by = c("ENSEMBL" = "ensgene")) |> 
    dplyr::filter(symbol != "") |> 
    dplyr::select(colnames(data_counts1), "symbol")
# Merge duplicated gene names
data_counts1_new_combine <- stats::aggregate(. ~ symbol , data_counts1_new, median)  
data_counts1_new_combine <- data_counts1_new_combine |> 
    tibble::column_to_rownames("symbol")

#### Create Row Data ####
new_row_data <- S4Vectors::DataFrame(ID_REF = row.names(data_counts1_new_combine),
                                     SYMBOL_NEW = row.names(data_counts1_new_combine))

##### Create Metadata #####
experimentData <- new("MIAME",
                      name = "William Evan Johnson",
                      lab = "Boston University",
                      contact = "wej@bu.edu",
                      title = "Tuberculosis in Malnourished Individuals",
                      abstract = "Whole blood gene expression profiling from well and malnourished Indian individuals with TB and severely malnourished household contacts with latent TB infection (LTBI). Severe malnutrition was defined as body mass index (BMI) <16. kg/m2 in adults and based on weight-for-height Z scores in children <18 years. Gene expression was measured using RNA-sequencing.",
                      url = "10.3389/fimmu.2022.1011166",
                      pubMedIds = "36248906",
                      other=list(Platform = "Illumina HiSeq 2500 (Homo sapiens)
"))

sobject <- SummarizedExperiment::SummarizedExperiment(
    assays = list(counts = as.matrix(data_counts1_new_combine)),
    colData = new_col_info,
    rowData = new_row_data,
    metadata = list(experimentData));sobject
save_raw_files(sobject, path = "data-raw/", geo = geo)
saveRDS(data_counts1_new_combine, paste0("data-raw/", geo, "_assay_curated.RDS"))

compbiomed/curatedTBData documentation built on March 14, 2024, 2:08 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

compbiomed/curatedTBData
Curation of existing tuberculosis transcriptomic studies

inst/scripts/make-data_GSE152218.R
In compbiomed/curatedTBData: Curation of existing tuberculosis transcriptomic studies

R Package Documentation

Browse R Packages

We want your feedback!

compbiomed/curatedTBData Curation of existing tuberculosis transcriptomic studies

inst/scripts/make-data_GSE152218.R In compbiomed/curatedTBData: Curation of existing tuberculosis transcriptomic studies

R Package Documentation

Browse R Packages

We want your feedback!

compbiomed/curatedTBData
Curation of existing tuberculosis transcriptomic studies

inst/scripts/make-data_GSE152218.R
In compbiomed/curatedTBData: Curation of existing tuberculosis transcriptomic studies