R/projectR.R
In projectR: Functions for the projection of weights from PCA, CoGAPS, NMF, correlation, and clustering

#' @importFrom stats hclust kmeans prcomp
setOldClass("kmeans")
setOldClass("hclust")
setOldClass("prcomp")
#' @importFrom CoGAPS CoGAPS
#' @importFrom limma lmFit


#######################################################################################################################################
#' @import limma
#' @importFrom stats model.matrix
#' @param NP vector of integers indicating which columns of loadings object to use. The default of NP=NA will use entire matrix.
#' @param full logical indicating whether to return the full model solution. By default only the new pattern object is returned.
#' @param model Optional arguements to choose method for projection
#' @param family VGAM family function for model fitting (default: "gaussianff")
#' @param bootstrapPval logical to indicate whether to generate p-values using bootstrap, not available for prcomp and rotatoR objects
#' @param bootIter number of bootstrap iterations, default = 1000
#' @rdname projectR-methods
#' @aliases projectR
setMethod("projectR",signature(data="matrix",loadings="matrix"),function(
  data, # a dataset to be projected onto
  loadings, # a matrix of continous values to be projected with unique rownames
  dataNames = NULL, # a vector with names of data rows
  loadingsNames = NULL, # a vector with names of loadings rows
  NP=NA, # vector of integers indicating which columns of loadings object to use. The default of NP=NA will use entire matrix.
  full=FALSE, # logical indicating whether to return the full model solution. By default only the new pattern object is returned.
  family="gaussianff",  # VGAM family function (default: "gaussianff")
  bootstrapPval=FALSE, # logical to indicate whether to generate p-values using bootstrap
  bootIter=1e3 # No of bootstrap iterations
  ){

  ifelse(!is.na(NP),loadings<-loadings[,NP],loadings<-loadings)
  #if(!is.na(NP)){loadings<-loadings[,NP]} was giving warning with subset of patterns
  #match genes in data sets
  if(is.null(dataNames)){
    dataNames <- rownames(data)
  }
  if(is.null(loadingsNames)){
    loadingsNames <- rownames(loadings)
  }
  dataM<-geneMatchR(data1=data, data2=loadings, data1Names=dataNames, data2Names=loadingsNames, merge=FALSE)
  print(paste(as.character(dim(dataM[[2]])[1]),'row names matched between data and loadings'))
  print(paste('Updated dimension of data:',as.character(paste(dim(dataM[[2]]), collapse = ' '))))
  # do projection
  Design <- model.matrix(~0 + dataM[[1]])
  colnames(Design) <- colnames(dataM[[1]])
  projection <- lmFit(as.matrix(t(dataM[[2]])),Design)
  projectionPatterns <- t(projection$coefficients)
  projection.ts<-t(projection$coefficients/projection$stdev.unscaled/projection$sigma)

  #projection<-vglm(dataM$data2 ~ 0 + dataM$data1,family=family)
  #projectionPatterns<-coefvlm(projection,matrix.out=TRUE)

  #For VGAM
  #pval.matrix<-matrix(2*pnorm(-abs(summary(projection)@coef3[,3])),nrow=5,byrow=TRUE)

  #For limma
  pval.matrix<-2*pnorm(-abs(projection.ts))
  #colnames(pval.matrix)<-colnames(projectionPatterns)
  #rownames(pval.matrix)<-rownames(projectionPatterns)

  if(bootstrapPval){
  boots <- lapply(1:bootIter,function(x){
  rows <- sample(nrow(Design),nrow(Design),replace = T)
  projection <- lmFit(as.matrix(t(dataM[[2]][rows,])),Design[rows,])
  return(projection$coefficients)
    })
  bootPval <- compareBoots(projection$coefficients,boots)
  }

  if(full & bootstrapPval){
      #projectionFit <- list('projection'=projectionPatterns, 'fit'=projection,'pval'=pval.matrix)
      projectionFit <- list('projection'=projectionPatterns, 'pval'=pval.matrix, 'bootstrapPval' = bootPval)
      return(projectionFit)
  } else if(full){
      projectionFit <- list('projection'=projectionPatterns, 'pval'=pval.matrix)
      return(projectionFit)
  }else{return(projectionPatterns)}
})


#######################################################################################################################################
#' @import limma
#' @importFrom NMF fcnnls
#' @examples
#' library("CoGAPS")
#' CR.RNAseq6l3c3t <- CoGAPS(p.RNAseq6l3c3t, params = new("CogapsParams",
#' nPatterns=5))
#' projectR(data=p.ESepiGen4c1l$mRNA.Seq,loadings=CR.RNAseq6l3c3t,
#' dataNames = map.ESepiGen4c1l[["GeneSymbols"]])
#'
#' @rdname projectR-methods
#' @aliases projectR
setMethod("projectR",signature(data="matrix",loadings="LinearEmbeddingMatrix"),function(
  data, # a dataset to be projected onto
  loadings, # a matrix of continous values to be projected with unique rownames
  dataNames = NULL, # a vector with names of data rows
  loadingsNames = NULL, # a vector with names of loadings rows
  NP=NA, # vector of integers indicating which columns of loadings object to use. The default of NP=NA will use entire matrix.
  full=FALSE, # logical indicating whether to return the full model solution. By default only the new pattern object is returned.
  model=NA, # optional arguements to choose method for projection
  family="gaussianff", # VGAM family function (default: "gaussianff")
  bootstrapPval=FALSE, # logical to indicate whether to generate p-values using bootstrap
  bootIter=1e3 # No of bootstrap iterations
  ){

  loadings<-loadings@featureLoadings
  ifelse(!is.na(NP),loadings<-loadings[,NP],loadings<-loadings)
  return(projectR(data,loadings = loadings,dataNames = dataNames, loadingsNames = loadingsNames,NP,full,bootstrapPval=bootstrapPval,bootIter=bootIter))

})

#######################################################################################################################################

#' @import limma
#' @importFrom stats var
#' @examples
#' pca.RNAseq6l3c3t<-prcomp(t(p.RNAseq6l3c3t))
#' pca.ESepiGen4c1l<-projectR(data=p.ESepiGen4c1l$mRNA.Seq, 
#' loadings=pca.RNAseq6l3c3t, dataNames = map.ESepiGen4c1l[["GeneSymbols"]])
#'
#' @rdname projectR-methods
#' @aliases projectR
setMethod("projectR",signature(data="matrix",loadings="prcomp"),function(
  data, # a dataset to be projected onto
  loadings, # a matrix of continous values to be projected with unique rownames
  dataNames = NULL, # a vector with names of data rows
  loadingsNames = NULL, # a vector with names of loadings rows
  NP=NA, # vector of integers indicating which columns of loadings object to use. The default of NP=NA will use entire matrix.
  full=FALSE # logical indicating whether to return the full model solution. By default only the new pattern object is returned.
  ){

  loadings<-loadings$rotation
  ifelse(!is.na(NP),loadings<-loadings[,NP],loadings<-loadings)

  #match genes in data sets
  if(is.null(dataNames)){
    dataNames <- rownames(data)
  }
  if(is.null(loadingsNames)){
    loadingsNames <- rownames(loadings)
  }
  dataM<-geneMatchR(data1=data, data2=loadings, data1Names=dataNames, data2Names=loadingsNames, merge=FALSE)
  print(paste(as.character(dim(dataM[[2]])[1]),'row names matched between data and loadings'))
  print(paste('Updated dimension of data:',as.character(paste(dim(dataM[[2]]), collapse = ' '))))
  # do projection
  dat2P<-apply(dataM[[2]],1,function(x) x-mean(x))
  projectionPatterns<- dat2P %*% dataM[[1]] #head(X %*% PCA$rotation)

  if(full==TRUE){
  #calculate percent varience accoutned for by each PC in newdata
  #Eigenvalues<-eigen(cov(projectionPatterns))$values
  #PercentVariance<-round(Eigenvalues/sum(Eigenvalues) * 100, digits = 2)

  PercentVariance<-apply(projectionPatterns,2, function(x) 100*var(x)/sum(apply(projectionPatterns,2,var)))

    projectionFit <- list(t(projectionPatterns), PercentVariance) #also need to change this to transpose
    return(projectionFit)
  }
  else{return(t(projectionPatterns))}

})
#######################################################################################################################################

#' @examples
#' pca.RNAseq6l3c3t<-prcomp(t(p.RNAseq6l3c3t))
#' r.RNAseq6l3c3t<-rotatoR(1,1,-1,-1,pca.RNAseq6l3c3t$rotation[,1:2])
#' pca.ESepiGen4c1l<-projectR(data=p.ESepiGen4c1l$mRNA.Seq, 
#' loadings=r.RNAseq6l3c3t, dataNames = map.ESepiGen4c1l[["GeneSymbols"]])
#'
#' @rdname projectR-methods
#' @aliases projectR

setMethod("projectR",signature(data="matrix",loadings="rotatoR"),function(
  data, # a dataset to be projected onto
  loadings, # a matrix of continous values to be projected with unique rownames
  dataNames = NULL, # a vector with names of data rows
  loadingsNames = NULL, # a vector with names of loadings rows
  NP=NA, # vector of integers indicating which columns of loadings object to use. The default of NP=NA will use entire matrix.
  full=FALSE # logical indicating whether to return the full model solution. By default only the new pattern object is returned.
  ){

  loadings <- loadings@rotatedM
  ifelse(!is.na(NP),loadings<-loadings[,NP],loadings<-loadings)

  #match genes in data sets
  if(is.null(dataNames)){
    dataNames <- rownames(data)
  }
  if(is.null(loadingsNames)){
    loadingsNames <- rownames(loadings)
  }
  dataM<-geneMatchR(data1=data, data2=loadings, data1Names=dataNames, data2Names=loadingsNames, merge=FALSE)
  print(paste(as.character(dim(dataM[[2]])[1]),'row names matched between data and loadings'))
  print(paste('Updated dimension of data:',as.character(paste(dim(dataM[[2]]), collapse = ' '))))
  # do projection
  dat2P<-apply(dataM[[2]],1,function(x) x-mean(x))
  projectionPatterns<- dat2P %*% dataM[[1]] #head(X %*% PCA$rotation)

  if(full==TRUE){
  #calculate percent varience accounted for by each PC in newdata
  Eigenvalues<-eigen(cov(projectionPatterns))$values
  PercentVariance<-round(Eigenvalues/sum(Eigenvalues) * 100, digits = 2)

  #PercentVariance<-apply(projectionPatterns,2, function(x) 100*var(x)/sum(apply(p2P,2,var)))

    projectionFit <- list(t(projectionPatterns), PercentVariance)
    return(projectionFit)
  }
  else{return(t(projectionPatterns))}

})

#######################################################################################################################################

#' @import limma
#' @examples
#' c.RNAseq6l3c3t<-correlateR(genes="T", dat=p.RNAseq6l3c3t, threshtype="N", 
#' threshold=10, absR=TRUE)
#' cor.ESepiGen4c1l<-projectR(data=p.ESepiGen4c1l$mRNA.Seq, loadings=c.RNAseq6l3c3t, 
#' NP="PositiveCOR", dataNames = map.ESepiGen4c1l[["GeneSymbols"]])
#'
#' @rdname projectR-methods
#' @aliases projectR
setMethod("projectR",signature(data="matrix",loadings="correlateR"),function(
  data, # a dataset to be projected onto
  loadings, # a matrix of continous values to be projected with unique rownames
  dataNames = NULL, # a vector with names of data rows
  loadingsNames = NULL, # a vector with names of loadings rows
  NP=NA, #can be used to select for "NegativeCOR" or "PositiveCOR" list from correlateR class obj containing both. By default is NA
  full=FALSE, # logical indicating whether to return the percent variance accounted for by each projected PC. By default only the new pattern object is returned.
  bootstrapPval=FALSE, # logical to indicate whether to generate p-values using bootstrap
  bootIter=1e3 # No of bootstrap iterations
  ){

  patterns <- loadings@corM
  if(!is.na(NP)){
    patterns<-as.matrix(patterns[[NP]])
    colnames(patterns) <- NP
  }
  else {
  patterns<-loadings
}
  #check length of patterns "PositiveCOR" and "NegativeCOR" or just positive
  if(length(patterns)==2){
    patterns <- do.call(rbind,patterns)
  }
  else{
    patterns <- as.matrix(patterns)
}
  return(projectR(data = data, loadings = patterns,dataNames = dataNames, loadingsNames = loadingsNames,  full = full,
    bootstrapPval = bootstrapPval, bootIter = bootIter))
 
})

#######################################################################################################################################

#' @param targetNumPatterns desired number of patterns with hclust  
#' @param sourceData data used to create cluster object
#' @import limma
#' @import cluster
#' @importFrom stats cutree
#' @rdname projectR-methods
#' @aliases projectR
setMethod("projectR", signature(data="matrix", loadings="hclust"),
function(data, loadings, dataNames=NULL, loadingsNames=NULL, full=FALSE,
targetNumPatterns, sourceData,bootstrapPval=FALSE,bootIter=1000)
{
  cut <- cutree(loadings, k=targetNumPatterns)
  patterns <- matrix(0, nrow=nrow(sourceData), ncol=targetNumPatterns)
  rownames(patterns) <- rownames(sourceData)
    for(x in 1:targetNumPatterns)
    {
      patterns[cut==x,x] <- apply(sourceData[cut==x,], 1, cor, y=colMeans(sourceData[cut==x,]))
    }
  return(projectR(data, loadings=patterns, dataNames, loadingsNames, full = full))
})

#' @rdname projectR-methods
#' @aliases projectR
setMethod("projectR", signature(data="matrix", loadings="kmeans"),
function(data, loadings, dataNames=NULL, loadingsNames=NULL, full=FALSE, sourceData,bootstrapPval=FALSE,bootIter=1000)
{
  patterns <- matrix(0, nrow=nrow(sourceData), ncol=length(loadings$size))
  rownames(patterns) <- rownames(sourceData)
  for(x in 1:length(loadings$size))
  {
    patterns[loadings$cluster==x,x] <- apply(sourceData[loadings$cluster==x,], 1, cor, y=colMeans(sourceData[loadings$cluster==x,]))
  }
  return(projectR(data, loadings=patterns, dataNames= dataNames, full = full,bootstrapPval=bootstrapPval,bootIter=bootIter))
})

#########################################################################


compareBoots <- function(projection,boots){
mat <- sapply(1:nrow(projection),function(i){
  sapply(1:ncol(projection),function(j){
    val <- sapply(1:length(boots),function(x){
      return(boots[[x]][i,j])
    })
    valD <- ecdf(val)
    qt0 <- valD(0)
    if(qt0 < 0.5){
        return(2*qt0)
      } else {
        return(2*(1-qt0))
      }
  })
})
return(mat)
}
Any scripts or data that you put into this service are public.
projectR documentation built on Nov. 8, 2020, 7:50 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
projectR
Functions for the projection of weights from PCA, CoGAPS, NMF, correlation, and clustering

R/projectR.R
In projectR: Functions for the projection of weights from PCA, CoGAPS, NMF, correlation, and clustering

Defines functions compareBoots

Try the projectR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

projectR Functions for the projection of weights from PCA, CoGAPS, NMF, correlation, and clustering

R/projectR.R In projectR: Functions for the projection of weights from PCA, CoGAPS, NMF, correlation, and clustering

Defines functions compareBoots

Try the projectR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

projectR
Functions for the projection of weights from PCA, CoGAPS, NMF, correlation, and clustering

R/projectR.R
In projectR: Functions for the projection of weights from PCA, CoGAPS, NMF, correlation, and clustering