Nothing
#' Built Expression Set (eSet) from profile data.
#' @usage geteSet()
#' @export
#' @return ExpressionSet
#' @examples
#' f <- 9
#' \dontrun{
#' readRDS(paste(path.package("canceR"),"/extdata/rdata/prad_michPhenoTest1021.rds", sep=""))
#' geteSet()
#' }
#'
geteSet <- function(){
#function to replace blanks with missing
blank2na <- function(x){
z <- gsub("\\s+", "", x) #make sure it's "" and not " " etc
x[z==""] <- NA
return(x)
}
##Test checked Cases and Genetic Profiles
testCheckedCaseGenProf()
Lchecked_Studies <- myGlobalEnv$lchecked_Studies_forCases
Lchecked_Cases <- length(myGlobalEnv$curselectCases)
Lchecked_GenProf <- length(myGlobalEnv$curselectGenProfs)
ProfDataAll=0
ProfData=0
LengthGenProfs=0
LengthCases=0
for (i in 1:Lchecked_Studies){
Si = myGlobalEnv$checked_StudyIndex[i]
progressBar_ProfilesData <- tkProgressBar(title = myGlobalEnv$Studies[Si], min = 0,
max = Lchecked_GenProf, width = 400)
#tkfocus(progressBar_ProfilesData)
LastLengthGenProfs = LengthGenProfs
LengthGenProfs = LengthGenProfs + myGlobalEnv$LGenProfs[i]+1
LastLengthCases = LengthCases
LengthCases= LengthCases + myGlobalEnv$LCases[i]+1
for (k in 1:length(myGlobalEnv$curselectCases)){
Sys.sleep(0.1)
setTkProgressBar(progressBar_ProfilesData, k, label=paste( round(k/Lchecked_GenProf*100, 0),
"% of Expression Set"))
if (myGlobalEnv$curselectGenProfs[k] <= LengthGenProfs && myGlobalEnv$curselectGenProfs[k]>LastLengthGenProfs){
GenProf<- myGlobalEnv$GenProfsRefStudies[myGlobalEnv$curselectGenProfs[k]]
Case<- myGlobalEnv$CasesRefStudies[myGlobalEnv$curselectCases[k]]
if(length(myGlobalEnv$GeneList)>500){
ProfData <- getMegaProfData(myGlobalEnv$GeneList,k )
} else{
ProfData<-getProfileData(myGlobalEnv$cgds,myGlobalEnv$GeneList, GenProf,Case)
print(ncol(ProfData))
}
#ProfData<- getProfileData(myGlobalEnv$cgds,myGlobalEnv$GeneList, GenProf,Case)
#ProfData <-rbind.na(colnames(ProfData), ProfData)
print("getting Profile Data and removing all NAs rows...")
##remove all NAs rows
ProfData<- ProfData[which( apply( !( apply(ProfData,1,is.na) ),2,sum)!=0 ),]
## Display AssyData with Tcl Table
title <- paste(myGlobalEnv$StudyRefGenProf[k],":",myGlobalEnv$CasesStudies[myGlobalEnv$curselectCases[k]+1])
getInTable(ProfData, title)
#####nicData_MultipleCases function
Case<- myGlobalEnv$CasesRefStudies[myGlobalEnv$curselectCases[k]]
ClinicalData<-getClinicalData(myGlobalEnv$cgds,Case)
matrix <-rbind.na(colnames(ClinicalData), ClinicalData)
rnames <- rownames(ClinicalData)
cnames <- colnames(ClinicalData)
#apply blank2na function
ClinicalData <- data.frame(lapply(ClinicalData, blank2na))
rownames(ClinicalData) <- rnames
names(ClinicalData) <- cnames
## getClinicalData generate CHARACTER class if is there "NA" value in any column
## Convert character value to numeric if grep [0-9] != 0
for(i in 1:ncol(ClinicalData)){
## substitute "Not Available" by "NA"
ClinicalData[,i]<- gsub("\\[Not Available\\]",NA, ClinicalData[,i], ignore.case=TRUE)
ClinicalData[,i]<- gsub("NA",NA, ClinicalData[,i], ignore.case=TRUE)
if(length(grep("[0-9]*\\.[0-9]*",ClinicalData[,i]))!=0){
ClinicalData[,i] <- as.numeric(ClinicalData[,i])
}
}
if(length(ClinicalData[1,])==0){
msgNoClinData=paste("No Clinical Data are Available for\n", CasesStudies[curselectCases[k]+1])
tkmessageBox(message=msgNoClinData, title= paste("Study: ",myGlobalEnv$StudyRefCase[k]))
close(progressBar_ProfilesData)
break
}
title <- paste(myGlobalEnv$StudyRefCase[k],myGlobalEnv$GenProfChoice[k], sep=": ")
getInTable(matrix,title)
## Select only Cases (rownames) that exist in ClinicalDataSub and ProfData
merge <- merge(ClinicalData, ProfData, by="row.names")
print("merge Clinical and Profile Data")
ClinicalData<- merge[,1:(length(ClinicalData)+1)]
rownames(ClinicalData)<- ClinicalData[,1]
ClinicalData <- ClinicalData[-1]
ProfData<-merge[,!(merge %in% ClinicalData)]
#row.names(ProfData)<- ProfData[,1]
#ProfData <- ProfData[-1]
AssayData<- t(ProfData)
colnames(AssayData) <- AssayData[1,]
AssayData <- AssayData[-1,]
rnames <- rownames(AssayData)
AssayData <- as.matrix(apply(AssayData,2 ,function(x) as.numeric(x)))
rownames(AssayData) <- rnames
##Convert column with digital values from factor to numeric
for(i in 1:ncol(ClinicalData)){
ClinicalData[,i] <- sapply(ClinicalData[,i], function(x) if(length(grep("[a-z'-'+A-Z'/'' ']", as.character(ClinicalData[,i])))==0) { as.numeric(as.character(x)) } else {x})
}
myGlobalEnv$ClinicalData <- ClinicalData
myGlobalEnv$ProfData <- ProfData
myGlobalEnv$AssayData <- AssayData
#Test if the same length cases for phenoData and AssayData
if (all(rownames(ClinicalData)==colnames(AssayData))){
## create labelDescription for columns of phenoData.
## labeldescription is used by Biobase packages
## In our case labelDescription is Equal to column names
metaData <- data.frame(labelDescription= colnames(ClinicalData), row.names=colnames(ClinicalData)) ## Bioconductor’s Biobase package provides a class called AnnotatedDataFrame
##that conveniently stores and manipulates
##the phenotypic data and its metadata in a coordinated fashion.
phenoData<-new("AnnotatedDataFrame", data=ClinicalData, varMetadata=metaData)
##Assembling an ExpressionSet
myGlobalEnv$eSet<-Biobase::ExpressionSet(assayData=AssayData, phenoData=phenoData, annotation="GO")
print(paste("End of building eSet..."))
# for (i in 1:length(names(pData(eSet)))){
# pData(eSet)[i] <- as.matrix(na.omit(pData(eSet)[i]))
# }
}else {tkmessageBox( message= "The expression Gene Set and the Clinical Data do not have the same samples", icon="warning")}
}
}
close(progressBar_ProfilesData)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.