R/c_output_parser.R
In rqubic: Qualitative biclustering algorithm for expression data analysis in R

Documented in parseQubicBlocks parseQubicChars parseQubicRules

##======================================================================##
## Parsers for QUBIC ANSI C program outputs
##
## Author: Jitao David Zhang <jitao_david.zhang@roche.com>, March 15, 2011
##======================================================================##

parseQubicRules <- function(filename) {
  rules <- scan(filename, what="character", sep="\n", quiet=TRUE)

  ## the pattern needs to be revised!
  rule.pattern <- "^row\\s([[:graph:]]*)\\s:low=((\\d|\\.|-)*),\\sup=((\\d|\\.|-)*);\\s(\\d*)\\sdown-regulated,(\\d*)\\sup-regulated"
  rules.notfollow.pattern <- !grepl(rule.pattern, rules)
  if(any(rules.notfollow.pattern)) {
    warning.lines <-  paste("[", which(rules.notfollow.pattern), "]:", rules[rules.notfollow.pattern], collapse="\n", sep="")
    stop(sprintf("The following %d lines do not seem to be QUBIC rules:\n%s",
                 sum(rules.notfollow.pattern),
                 warning.lines))
  }
    
  rules.parsed <- gsub(rule.pattern, "\\1\t\\2\t\\4\t\\6\t\\7", rules)
  rules.split <- strsplit(rules.parsed, "\t")

  rules.mat <- do.call(rbind, rules.split)
  rules.df <- data.frame(probe=rules.mat[,1],
                         low=as.numeric(rules.mat[,2]),
                         up=as.numeric(rules.mat[,3]),
                         down.count=as.integer(rules.mat[,4]),
                         up.count=as.integer(rules.mat[,5]))
  return(rules.df)
}


parseQubicChars <- function(file, check.names=FALSE,...) {
  Call <- match.call(expand.dots=TRUE)
  for(argname in c("sep", "head", "row.names"))
    if(!is.null(Call[[argname]]))
      warning(gettextf("attempt to set '%s' ignored", argname), domain=NA)
  Call$sep <- "\t"
  Call$head <- TRUE
  Call$check.names <- check.names
  Call[[1L]] <- as.name("read.csv")
  res <- eval(Call)
  ## note that the first column cannot be set to row names since they might duplicate
  colnames(res)[1] <- "probe"
  return(res)
}

parseQubicCountedVector <- function(vector) {
  bc.items <- strsplit(vector, " ")[[1]]
  bc.item.count <- as.integer(gsub("\\[(\\d*)\\]:", "\\1", bc.items[[3]]))
  bc.items <- bc.items[4:length(bc.items)]
  if(is.na(bc.item.count) || bc.item.count != length(bc.items))
    stop("The count in the vector does not equal the count reported. File may be corrupted")

  return(bc.items)
}


## TODO: the QubicBiclusterSet object should use index to mark biclusters, but not eSets: too heavy
parseQubicBicluster <- function(vector) {
  bc.head.pattern <- "^BC\\d*\\sS=(\\d*)$"
  bc.Svalue <- as.integer(gsub(bc.head.pattern, "\\1", vector[1]))

  ## bicluster features
  bc.features <- parseQubicCountedVector(vector[[2]])

  ## bicluster samples
  bc.conditions <- parseQubicCountedVector(vector[[3]])

  ## table
  vector.split <- strsplit(vector[-(1:3)], "\t")
  ## check features/cond correspond
  table.features.raw <- sapply(vector.split, "[[", 1)
  table.features <- gsub("\\s*([[:graph:]]*):", "\\1", table.features.raw)
  if(!identical(table.features, bc.features))
    stop("Features in the bicluster table differ from those annotated above. Files may corrupt")
  table.conditions.length <- unique(sapply(vector.split, length))-1L
  if(!identical(table.conditions.length, length(bc.conditions)))
    stop("Conditions in the bicluster table differ from those annotated above. Files may corrupt")

  #### expression table
  ## bic.exprs <- do.call(rbind, lapply(vector.split, function(x) as.integer(x[-1])))
  ## if(!identical(dim(bic.exprs), c(length(bc.features), length(bc.conditions))))
  ##  stop("The bicluster table is of wrong dimension. File may corrupt")

  bic.info <- list(Svalue=bc.Svalue,
                   features=bc.features,
                   conditions=bc.conditions)
  ##                ,exprs=bic.exprs)
  return(bic.info)
}

listCharToFactor <- function(list) {
  list.unique <- levels(factor(unlist(list)))
  list.index <- sapply(list, function(x) list.unique %in% x)
  return(list(factor=list.unique,
              indices=list.index))

}
isClassSanity <- function(input.list, class) {
  input.list.unique.class <- unique(sapply(input.list, class))
  identical(input.list.unique.class, class)
}


QUBICBicluster <- function(features, conditions, exprs, Svalue) {
  fdata <- data.frame(features=features)
  colnames(fdata)[1] <- RQUBIC_INPUT_FEATURENAME_SYMBOl
  fd <- new("AnnotatedDataFrame", fdata)
  pdata <- data.frame(conditions=conditions)
  colnames(pdata)[1] <- RQUBIC_INPUT_SAMPLENAME_SYMBOl 
  pd <- new("AnnotatedDataFrame", pdata)
  esObj <- new("QUBICBicluster",
               phenoData=pd, featureData=fd, exprs=exprs, Svalue=Svalue)
}

QUBICBiclusterSet <- function(qubicVersion, inputDataFile,
                              parameters, BCs, features, conditions) {
  parameterNames <- names(parameters)
  if(!setequal(parameterNames,g_QUBIC_PARAMETERS))
    stop("Missing or non-existing parameter(s) detected:\n",
         "Allowed parameter:", paste(g_QUBIC_PARAMETERS, collapse=","),"\n",
         "Input parameter:", paste(parameterNames, collapse=","))
  if(!identical(unique(sapply(BCs, class)), "QUBICBicluster"))
    stop("BCs must be a list of QUBICBicluster")
  res <- new("QUBICBiclusterSet",
             qubicVersion=qubicVersion,
             inputDataFile=inputDataFile,
             parameters=parameters,
             BCs=BCs,
             features=features,
             conditions=conditions)
  return(res)
}
  
parseQubicBlocks <- function(filename) {
  blocks <- scan(filename, what="character", sep="\n", quiet=TRUE)

  ## comment lines
  comment.lines <- grep("^#", blocks, value=TRUE)
  qubic.version <- gsub(".*version\\s*(([[:digit:]]|\\.)*)\\s*output", "\\1", comment.lines[1])
  qubic.datafile <- gsub(".*Datafile\\s*([[:print:]]*):.*", "\\1", comment.lines[2])
  qubic.datafile.type <- gsub(".*:\\s*(\\w*)\\stype$", "\\1", comment.lines[2])
  qubic.parameters <- gsub("#\\s*Parameters:\\s-k\\s(\\d*)\\s-f\\s([[:print:]]*)\\s-c\\s([[:print:]]*)\\s-o\\s(\\d*)\\s-q\\s([[:print:]]*)\\s-r\\s(\\d*)",
                           "\\1;\\2;\\3;\\4;\\5;\\6", comment.lines[3])
  qubic.parameters.split <- strsplit(qubic.parameters, ";")[[1]]
  qubic.para.k <- as.integer(qubic.parameters.split[1])
  qubic.para.f <- as.numeric(qubic.parameters.split[2])
  qubic.para.c <- as.numeric(qubic.parameters.split[3])
  qubic.para.o <- as.integer(qubic.parameters.split[4])
  qubic.para.q <- as.numeric(qubic.parameters.split[5])
  qubic.para.r <- as.integer(qubic.parameters.split[6])

  ## bc
  bc.heads.index <- grep("^BC", blocks, value=FALSE)
  bc.head.pattern <- "^BC\\d*\\sS=(\\d*)$"
  bc.Svalues <- as.integer(gsub(bc.head.pattern, "\\1", blocks[bc.heads.index]))
  bc.index <- seq(along=bc.heads.index)-1 ## note that the index of BCs starts with 0

  bc.nextheads.index <- c(bc.heads.index[-1]-1, length(blocks))
  bc.lines <- lapply(seq(along=bc.heads.index), function(i) seq(from=bc.heads.index[i],
                           to=bc.nextheads.index[i]))

  qubic.bics <- lapply(bc.lines, function(i) parseQubicBicluster(blocks[i]))
  qubic.length <- length(qubic.bics)
  qubics.bics.Svalues <- sapply(qubic.bics, function(x) x$Svalue)
  qubics.bics.features <- listCharToFactor(sapply(qubic.bics, function(x) x$features))
  qubics.bics.conditions <- listCharToFactor(sapply(qubic.bics, function(x) x$conditions))
  #### expression is not parsed, since can be access by subsetting
  ## qubics.bics.exprs <- lapply(qubic.bics, function(x) x$exprs)

  ## qubic.bicObjs <- lapply(seq(along=qubic.bics),
  ##                      function(x) QUBICBicluster(qubics.bics.features[[x]],
  ##                                                 qubics.bics.conditions[[x]],
  ##                                                 qubics.bics.exprs[[x]],
  ##                                                 qubics.bics.Svalues[[x]]))

  obj <- new("QUBICBiclusterSet",
             Parameters=list(
               Call=match.call(),
               Method="QUBIC",
               qubicVersion=qubic.version,
               inputDataFile=qubic.datafile,
               k=qubic.para.k,
               f=qubic.para.f,
               c=qubic.para.c,
               o=qubic.para.o,
               q=qubic.para.q,
               r=qubic.para.r),
             RowxNumber=qubics.bics.features[[2]],
             NumberxCol=t(qubics.bics.conditions[[2]]),
             Number=qubic.length,
             info=list(features=qubics.bics.features[[1]],
               conditions=qubics.bics.conditions[[1]],
               Svalues=qubics.bics.Svalues)
             )
  
  return(obj)
}

fileFound <- function(x) {
  if(!file.exists(x)) {
    warning(sprintf("File %s not found!\n", x))
    return(FALSE)
  }
  return(TRUE)
}

Any scripts or data that you put into this service are public.

rqubic documentation built on Nov. 8, 2020, 8:20 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

rqubic
Qualitative biclustering algorithm for expression data analysis in R

R/c_output_parser.R
In rqubic: Qualitative biclustering algorithm for expression data analysis in R

Defines functions fileFound parseQubicBlocks QUBICBiclusterSet QUBICBicluster isClassSanity listCharToFactor parseQubicBicluster parseQubicCountedVector parseQubicChars parseQubicRules

Documented in parseQubicBlocks parseQubicChars parseQubicRules

Try the rqubic package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

rqubic Qualitative biclustering algorithm for expression data analysis in R

R/c_output_parser.R In rqubic: Qualitative biclustering algorithm for expression data analysis in R

Defines functions fileFound parseQubicBlocks QUBICBiclusterSet QUBICBicluster isClassSanity listCharToFactor parseQubicBicluster parseQubicCountedVector parseQubicChars parseQubicRules

Documented in parseQubicBlocks parseQubicChars parseQubicRules

Try the rqubic package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

rqubic
Qualitative biclustering algorithm for expression data analysis in R

R/c_output_parser.R
In rqubic: Qualitative biclustering algorithm for expression data analysis in R