R/class_and_methods.R

Defines functions newLowMACA

Documented in newLowMACA

# class definition
setClass('LowMACA', 
	slots=c(
		arguments='list'
		, alignment='list'
		, mutations='list'
		, entropy='list'
		)
	, prototype=list(
		arguments=list(
			genes=NULL
			, pfam=NULL
			, pfamAllGenes=''
			, input=''
			, mode=''
			, params=list(
				mutation_type=c('missense', 'all', 'truncating', 'silent')[1]
				, tumor_type='all_tumors'
				, min_mutation_number=1L
				, density_bw=0
				, clustal_cmd='clustalo'
				, use_hmm=FALSE
				, datum=FALSE
				)
			, parallelize=list(
				getMutations=FALSE
				, makeAlignment=TRUE
				)
			)
		)
	,validity=function(object) {
	    object@arguments$params
	    if( class(object@arguments$params) != "list" )
	        return("Parameter value should be a list")
	    if( length(object@arguments$params) != 7 )
	        return("Parameter value should be a list of length 7")
	    if( !identical(
	        names(object@arguments$params),
	        c("mutation_type", "tumor_type", "min_mutation_number", 
	            "density_bw", "clustal_cmd", "use_hmm","datum")
	        ))
	        return("Parameter value should be a list whose names are:
	            mutation_type, tumor_type, min_mutation_number, density_bw and clustal_cmd")
	    if( ! object@arguments$params$mutation_type %in% 
	        c('missense', 'all', 'truncating', 'silent'))
	        return("mutation_type should be one of the following:
	            missense, all, truncating, silent")
	    if( class(object@arguments$params$tumor_type) != 'character' )
	        return('tumor_type must be a character')
	    if( !is.numeric(object@arguments$params$min_mutation_number) )
	        return('min_mutation_number must be an integer')
	    if( !is.numeric(object@arguments$params$density_bw) 
	    	& object@arguments$params$density_bw != 'auto' )
	        return('density_bw must be a number or "auto"')
	    if( class(object@arguments$params$clustal_cmd) != 'character' )
	        return('clustal_cmd must be a character')
    	TRUE
		}
	)

# getters
setGeneric('lmParams', function(object) standardGeneric('lmParams'))
setMethod('lmParams', 'LowMACA', function(object) {
	return(object@arguments$params)
	})

setGeneric('lmAlignment', function(object) standardGeneric('lmAlignment'))
setMethod('lmAlignment', 'LowMACA', function(object) {
	return(object@alignment)
	})

setGeneric('lmMutations', function(object) standardGeneric('lmMutations'))
setMethod('lmMutations', 'LowMACA', function(object) {
	return(object@mutations)
	})

setGeneric('lmEntropy', function(object) standardGeneric('lmEntropy'))
setMethod('lmEntropy', 'LowMACA', function(object) {
	return(object@entropy)
	})


setGeneric('parallelize', function(object) standardGeneric('parallelize'))
setMethod('parallelize', 'LowMACA', function(object) {
	return(object@arguments$parallelize)
	})

# setters
setGeneric('lmParams<-', function(object, value) standardGeneric('lmParams<-'))
setReplaceMethod('lmParams', 'LowMACA', function(object, value) {
	object@arguments$params <- value
	validObject(object)
	    #Check for correct clustalo command
    ClustalCommand <- Sys.which(object@arguments$params$clustal_cmd)
    if(ClustalCommand=="") {
        warning("The path to clustal omega is not correct. Change it ore use the web service. See ?setup for details")
    } else {
    	ClustalVersion <- system(paste(ClustalCommand ,  "--version") , intern=TRUE)
    	if(!grepl("^1.2" , ClustalVersion))
        	warning("Clustal Omega version could be not compatible. LowMACA was tested on 1.2.x")
	}
	object
})

setGeneric('parallelize<-', function(object, value) standardGeneric('parallelize<-'))
setReplaceMethod('parallelize', 'LowMACA', function(object, value) {
	object@arguments$parallelize <- value
	object
	})

# constructor
newLowMACA <- function(genes=NULL, pfam=NULL)
{
	# check arguments
	if( is.null(genes) & is.null(pfam) )
		stop('Either a gene or a pfam should be specified.')
	if( length(pfam)>1 )
		stop('Only one Pfam ID can be evaluated')
	# initialize the LowMaca object
	object <- new('LowMACA')
	if( !is.null(pfam) ) {
		object@arguments$mode <- 'pfam'
		object@arguments$pfam <- pfam
	} else {
		object@arguments$mode <- 'genes'
		object@arguments$genes <- genes
	}
	mode <- object@arguments$mode
	if( mode == 'genes' )
	{
	    # load annotation files
	    myAlias <- getMyAlias()
	    myUni <- getMyUni()
	    #
	    selectedColumns <- c('Gene_Symbol', 'Entrez', 'UNIPROT', 'AMINO_SEQ')
		geneID <- .checkGene_to_geneID(genes, myUni, myAlias)$Gene_Symbol
		selectedRows <- myUni$Gene_Symbol %in% geneID
		genesData <- myUni[selectedRows, selectedColumns]
		genesData$Pfam_ID <- '-'
		genesData$Envelope_Start <- 1
		genesData$Envelope_End <- nchar(genesData$AMINO_SEQ)
        seq_names <- paste(genesData[, 'Gene_Symbol']
            , genesData[, 'Pfam_ID']
            , genesData[, 'Entrez'] 
            , paste(genesData[, 'Envelope_Start'], 
                genesData[, 'Envelope_End'], sep=";") 
            , sep="|")
		rownames(genesData) <- seq_names
	} else {
	## mode == 'pfam'
		# load annotation files
	    myPfam <- getMyPfam()
	    # select rows and colums to match the input
	    selectedColumns <- c('Gene_Symbol', 'Pfam_ID', 'Entrez'
	    	, 'Envelope_Start', 'Envelope_End', 'UNIPROT', 'Pfam_Fasta')
	    selectedRows <- myPfam$Pfam_ID %in% pfam
	    if( !any(selectedRows) ) stop('Pfam name is not correct or is not mapped by LowMACA')
	    genesData <- myPfam[selectedRows, selectedColumns]
        seq_names <- paste(genesData[, 'Gene_Symbol']
            , genesData[, 'Pfam_ID']
            , genesData[, 'Entrez'] 
            , paste(genesData[, 'Envelope_Start'], 
                genesData[, 'Envelope_End'], sep=";") 
            , sep="|")
        colnames(genesData)[colnames(genesData) == 'Pfam_Fasta'] <- 'AMINO_SEQ'
        rownames(genesData) <- seq_names
	    if( !is.null(genes) ) {
	    	object@arguments$genes <- genes
	    	object@arguments$pfamAllGenes <- genesData
	    	# load annotation files
		    myAlias <- getMyAlias()
		    myUni <- getMyUni()
		    geneID <- .checkGene_to_geneID(genes, myUni, myAlias)$Gene_Symbol
		    selectedRows <- genesData$Gene_Symbol %in% geneID
			genesData <- genesData[selectedRows, ]
	    }
	}
	genesData[, 'Gene_Symbol'] <- factor(genesData[, 'Gene_Symbol'])
	genesData[, 'Pfam_ID'] <- factor(genesData[, 'Pfam_ID'])
	genesData[, 'Entrez'] <- factor(genesData[, 'Entrez'])
	genesData[, 'UNIPROT'] <- factor(genesData[, 'UNIPROT'])
	## convert non-canonical amnio acids into their
	## most similar and canonical one
	##		U (selenocysteine) -> A (alanine)
	genesData$AMINO_SEQ <- gsub('U','A', genesData$AMINO_SEQ)
	## return the updated object
	object@arguments$input <- genesData
	return(object)

}

# methods
setGeneric('alignSequences', function(object, clustalo_filename=NULL 
	, mail=NULL , perlCommand="perl", use_hmm=FALSE, datum=FALSE) standardGeneric('alignSequences'))
setMethod('alignSequences', 'LowMACA', function(object, clustalo_filename=NULL 
	, mail=NULL , perlCommand="perl", use_hmm=FALSE, datum=FALSE) {

	if( object@arguments$mode =='genes' & use_hmm==TRUE )
		stop('hmm mode can run only in pfam mode')
	message("Aligning sequences...")
	clustal_cmd <- object@arguments$params$clustal_cmd
	genesData <- object@arguments$input
	object@arguments$params$datum <- datum
	if(!is.null(mail))
		.PerlModuleChecks(stop=TRUE , perl=perlCommand)
	if( object@arguments$mode == 'genes' ) {
		object@alignment <- .clustalOAlign(genesData, clustal_cmd, clustalo_filename , 
			mail , perlCommand, use_hmm, datum)
	} else {
		if( is.null(object@arguments$genes) | !(datum) ) { #!(datum) gives retrocompatibility with pre-datum versions
			object@alignment <- .clustalOAlign(genesData, clustal_cmd, clustalo_filename , 
				mail , perlCommand, use_hmm, datum)
		} else {
			pfamAllGenes <- object@arguments$pfamAllGenes
			alignment <- .clustalOAlign(pfamAllGenes, clustal_cmd, clustalo_filename , 
				mail , perlCommand, use_hmm, datum)
			object@alignment <- .filterMAlign(alignment, object@arguments$genes, datum)			
		}
	}
	if( object@arguments$mode == 'pfam' ) {
		m <- object@alignment$CLUSTAL
		cm <- consensusMatrix(m)[-1,] # -1 removes gaps
		consensus <- apply(cm,2, function(x) rownames(cm)[which.max(x)])
		# put a gap when only gap exist (possible only if "datum" is TRUE)
		consensus[colSums(cm)==0] <- '-' 
		object@alignment$df <- data.frame(
			consensus=consensus
			, conservation=.Trident_Score(object@alignment$CLUSTAL)
			)		
	} else {
		if( nrow(genesData)>1 ) {
			m <- object@alignment$CLUSTAL
			cm <- consensusMatrix(m)[-1,] # -1 removes gaps
			object@alignment$df <- data.frame(
				consensus=apply(cm,2, function(x) rownames(cm)[which.max(x)])
				, conservation=.Trident_Score(object@alignment$CLUSTAL)
				)
		} else {
			object@alignment$df <- data.frame(
				consensus=strsplit(genesData$AMINO_SEQ,'')[[1]]
				, conservation=rep(1, length(genesData$AMINO_SEQ))
				)				
		}
	}
	return(object)
	})

setGeneric('getMutations', function(object, repos=NULL) standardGeneric('getMutations'))
setMethod('getMutations', 'LowMACA', function(object, repos=NULL) {
	myGenes <- object@arguments$input
	mutation_type <- object@arguments$params$mutation_type
	tumor_type <- object@arguments$params$tumor_type
	parallelGetMut <- object@arguments$parallelize$getMutations
	# outputFolder <- object@arguments$paths$output_folder
	if( is.null(repos) ) {
		message("Getting mutations from cancers studies...")
		gmOut <- .getGeneMutations(myGenes=myGenes, 
			mutation_type=mutation_type, tumor_type=tumor_type, 
			parallelize=parallelGetMut)
	} else {
		message("Filtering mutations from local repository...")
		gmOut <- .getLocalGeneMutations(myGenes=myGenes, 
			mutation_type=mutation_type, tumor_type=tumor_type, 
			localData=repos)
	}
	object@mutations$data <- gmOut$Mutations
	object@mutations$freq <- gmOut$AbsFreq
	return(object)
	})

setGeneric('mapMutations', function(object) standardGeneric('mapMutations'))
setMethod('mapMutations', 'LowMACA', function(object) {
	mut <- object@mutations$data
	if( is.null(mut) ){
	  stop("mutations slot is empty. Launch the method getMutations first!")
	}
	if( nrow(mut)==0 ){
	  stop("mutations slot is empty. Launch the method getMutations first!")
	}
	alignment <- object@alignment$ALIGNMENT
	alignmentLength <- max(alignment$Align)
	# # elaborate the mutations and map them with the alignment
	mut_aligned <- merge( mut , alignment[!is.na(alignment$Ref) , ]
					, by.x=c("Entrez" , "Gene_Symbol" , "Amino_Acid_Position") 
					, by.y=c("Entrez" , "Gene_Symbol" , "Ref") 
					, all.x=TRUE)
	mut_aligned <- mut_aligned[!is.na(mut_aligned$Align), ]
	# parameters for the analysis [can't be set by the user, so far]
	splitCriterion <- c('Gene_Symbol', 'Tumor_Type', 'domainID')
	mode <- object@arguments$mode
	if( mode == 'genes' ) {
		splitCriterion <- splitCriterion[1]
	} else splitCriterion <- splitCriterion[3]
	
	# split data
	mut_aligned.split <- split(mut_aligned, mut_aligned[[splitCriterion]])
	mut_aligned.extended <- matrix(0, nrow=length(mut_aligned.split)
		, ncol=alignmentLength)
	for(i in 1:length(mut_aligned.split)) {
		tmp <- sapply(
			split(mut_aligned.split[[i]]$Align, mut_aligned.split[[i]]$Align)
			, length
			)
		if( length(tmp)>0 ) 
			mut_aligned.extended[i,as.numeric(names(tmp))] <- tmp
	}
	rownames(mut_aligned.extended) <- names(mut_aligned.split)
	# filter data based on number of mutations
	minNMut <- object@arguments$params$min_mutation_number
	tokeep <- rowSums(mut_aligned.extended) >= minNMut
	if(any(!tokeep))
	{
		warning(
			paste("We excluded these genes (or domains) because they have less than"
				, minNMut, "mutations")
			, immediate.=TRUE)
		excludedGenes <- rownames(mut_aligned.extended[!tokeep, ])
		print(excludedGenes)
		object@mutations$excluded <- excludedGenes
	}
	mut_aligned.extended <- mut_aligned.extended[tokeep, , drop=FALSE]
	object@mutations$aligned <- mut_aligned.extended
	return(object)

	})

setGeneric('setup', function(object, repos=NULL, clustalo_filename=NULL 
  , mail=NULL , perlCommand="perl", use_hmm=FALSE, datum=FALSE) standardGeneric('setup'))
setMethod('setup', 'LowMACA', function(object, repos=NULL, clustalo_filename=NULL 
  , mail=NULL , perlCommand="perl", use_hmm=FALSE, datum=FALSE) {
  object@arguments$params$datum <- datum
  object <- alignSequences(object, clustalo_filename , mail , perlCommand, use_hmm, datum)
  object <- getMutations(object, repos=repos)
  object <- mapMutations(object)
  return(object)
})

setGeneric('entropy', function(object, bw=NULL , conservation=0.1) standardGeneric('entropy'))
setMethod('entropy', 'LowMACA', function(object, bw=NULL , conservation=.1) {
	if(conservation > 1 || conservation < 0)
		stop("Conservation threshold must be a number between 0 and 1")
	myMut <- object@mutations
	if(identical(myMut , list()))
		stop("mutations slot is empty. Launch the method getMutations first!")
	myAln <- object@alignment
	if(identical(myAln , list()))
		stop("alignment slot is empty. Launch the method alignSequences first!")
	if( nrow(object@mutations$data)==0 ) {
		object@entropy$absval <- NA
		object@entropy$log10pval <- NA
		object@entropy$pvalue <- NA
		return(object)
	}
	mut_extended <- object@mutations$aligned
	alignment    <- object@alignment
	# Uniform variable
	message("Making uniform model...")
	if( is.null(bw) ) bw <- object@arguments$params$density_bw else 
		object@arguments$params$density_bw <- bw
	if( bw=='auto' ) 
	{
		# calculate the bw from the global profile
		bw <- .profileDensity(colSums(mut_extended))$bw
	}
	message(paste('Assigned bandwidth:', round(bw,2)))
	object@entropy$bw <- bw
	weights <- .alnWeights(alignment)
	# if( plotUniform ) pdf(file.path(outputFolder , "Uniform_Model.pdf"))
	object@entropy$uniform <- .makeUniformModel(mut_extended, bw=bw, nboot=1000, 
		weights=weights, plotOUT=FALSE)
	# if( plotUniform ) dev.off()

	# Calculate the entropy values
	globalProfile <- colSums(mut_extended)
	uniform <- object@entropy$uniform
	absval <- .profileEntropy(globalProfile, norm=FALSE, bw=bw)
	log10pval <- .profileEntropy(globalProfile, norm=TRUE, bw=bw, model=uniform)
	pvalue <- 10^log10pval

	object@entropy$absval <- absval
	object@entropy$log10pval <- log10pval
	object@entropy$pvalue <- pvalue
	object@entropy$conservation_thr <- conservation

	# null profile
	# Null profile calculated on the global profile
	nullOut <- .makeNullProfile(mut_extended, bw=bw, 
		nboot=1000, weights=.alnWeights(alignment))

	pvals <- nullOut$pvalue
	filteredPvals <- pvals
	filteredPvals[object@alignment$df$conservation < conservation] <- NA
	qvals <- p.adjust(filteredPvals, method='BH')

	object@alignment$df <- cbind(
		object@alignment$df[, c('consensus', 'conservation')]
		, nullOut, qvalue=qvals
		#, misalnFreq=object@alignment$df[, 'misalnFreq']
		)
	return(object)

	})

setMethod('show', 'LowMACA', function(object) {

	nItems <- nrow(object@arguments$input)
	message(paste('\nLowMACA object with', nItems, 'items:'))

	items <- object@arguments$input$Gene_Symbol
	itemsText <- paste(head(items), collapse=', ')
	message(paste(itemsText, ifelse(nItems>6, ', ...\n', '\n'), sep=''))

	if( length(object@entropy)>0 ) {
		objectPvalue <- object@entropy$pvalue
		objectPvalue <- signif(objectPvalue, 5)
		message(paste('The p-value of the global aligned profile is:', objectPvalue, '\n'))
	}
	})

###################
###### SUMMARY METHODS
############################

setGeneric('lfm', function(object, metric='qvalue', threshold=.05, conservation=NULL)
	standardGeneric('lfm'))
setMethod('lfm', 'LowMACA', function(object, metric='qvalue', threshold=.05, conservation=NULL) {
	if(is.null(conservation))
		conservation <- object@entropy$conservation_thr
	#Checks on parameters
	if(! (metric %in% c("qvalue" , "pvalue") ))
		stop("metric parameter can be only 'pvalue' or 'qvalue'")
	if(!(threshold <= 1 && threshold>=0))
		stop("pvalue/qvalue threshold must be a number between 0 and 1")
	if(!(conservation <= 1 && conservation>=0))
		stop("conservation score threshold must be a number between 0 and 1")
	entrop <- object@entropy
	if(identical(entrop , list()))
		stop("Entropy slot is empty. Launch the method entropy first!")
	if( nrow(object@mutations$data)==0 ) {
		message('No mutations available for this object.')
	} else {
		if( metric == 'qvalue' ) {
			if( conservation != object@entropy$conservation_thr ) {
				## in case a new conservation threshold is given,
				## recalculate the qvalue
				filteredPvals <- object@alignment$df$pvalue
				filteredPvals[object@alignment$df$conservation < conservation] <- NA
				qvalue <- p.adjust(filteredPvals, method='BH')
			} else {
				## use qvalue already stored
				qvalue <- object@alignment$df$qvalue
			}
			signifPos <- which(qvalue < threshold)
		} else {
			## use pvalue
			pvalue <- object@alignment$df$pvalue
			signifPos <- which(pvalue < threshold)
		}
		out <- lapply(signifPos, function(pos) {
			selectedRows <- object@alignment$ALIGNMENT$Align == pos
			alnData <- object@alignment$ALIGNMENT[selectedRows, ]
			selectedColumns <- c('Gene_Symbol', 'Ref', 'Envelope_Start'
				, 'Envelope_End')
			alnData <- alnData[!is.na(alnData$Ref), selectedColumns]
			colnames(alnData) <- c('Gene_Symbol', 'Amino_Acid_Position'
				, 'Envelope_Start', 'Envelope_End')
			selectedColumns <- c('Gene_Symbol', 'Amino_Acid_Position'
				, 'Amino_Acid_Change', 'Sample', 'Tumor_Type')
			mutData <- object@mutations$data[, selectedColumns]
			lfm <- merge(mutData, alnData)
			if( nrow(lfm)>0 ){
				lfm$Multiple_Aln_pos <- pos
				lfm$metric <- switch(metric, 'pvalue'=pvalue[pos], 'qvalue'=qvalue[pos])
			} else {
				lfm$Multiple_Aln_pos <- character(0)
				lfm$metric <- numeric(0)
			}
			return(lfm)
			})
		out <- do.call('rbind', out)
		# load data
	    myUni <- getMyUni()
	    selectedColumns <- c('Gene_Symbol', 'Entrez', 'Entry', 'UNIPROT'
	    	, 'Chromosome', 'Protein.name')
		myUniSmall <- myUni[, selectedColumns]
		out <- merge(out, myUniSmall)
		return(out)
	}
	})

setGeneric('lfmSingleSequence', 
	function(object, metric='qvalue', threshold=.05, conservation=0.1 
		# , parallel=FALSE
		, BPPARAM=bpparam("SerialParam")
		, mail=NULL , perlCommand="perl",verbose=FALSE) standardGeneric('lfmSingleSequence'))
setMethod('lfmSingleSequence', 'LowMACA', 
	function(object, metric='qvalue', threshold=.05, conservation=0.1 
		# , parallel=FALSE
		, BPPARAM=bpparam("SerialParam")
		, mail=NULL, perlCommand="perl",verbose=FALSE) {
	#Checks on parameters
	if(! (metric %in% c("qvalue" , "pvalue") ))
		stop("metric parameter can be only 'pvalue' or 'qvalue'")
	if(!(threshold <= 1 && threshold>=0))
		stop("pvalue/qvalue threshold must be a number between 0 and 1")
	if(!(conservation <= 1 && threshold>=0))
		stop("conservation score threshold must be a number between 0 and 1")
	#Parallel options
	# if(is.logical(parallel) && parallel==TRUE)
	# 	cores <- parallel::detectCores()
	# if(is.logical(parallel) && parallel==FALSE)
	# 	cores <- 1L
	# if(is.numeric(parallel))
	# 	cores <- min(parallel , parallel::detectCores())
	# if( cores > 1 ) {
	# 	applyfun <- bplapply
	# 	if( Sys.info()[['sysname']] == 'Windows' ){
	# 		#applyfun <- lapply
	# 		options(MulticoreParam=SnowParam(workers=cores))
	# 	} else {
	# 		options(MulticoreParam=MulticoreParam(workers=cores))
	# 	}
	# } else {
	# 	applyfun <- lapply
	# }
	mode <- object@arguments$mode
	data_split <- split(object@mutations$data, object@mutations$data$Gene_Symbol)
	if(mode=="pfam") {
		#LOL stands for Lots Of LowMACAs
		LOL <- bplapply(data_split , function(x) {
						if(Sys.info()[['sysname']] == 'Windows') library(LowMACA)
						if(nrow(x)==0)
							return(NULL)
						if(verbose)
							message(paste("Working on:" , unique(x$Gene_Symbol)))
						out <- suppressMessages(newLowMACA(genes=unique(x$Gene_Symbol) , pfam=object@arguments$pfam))
						lmParams(out)$clustal_cmd <- object@arguments$params$clustal_cmd
						out <- suppressMessages(setup(out , repos=x , mail=mail , perlCommand=perlCommand))
						out <- suppressMessages(entropy(out))
						lfm(out , metric=metric , threshold=threshold , conservation=conservation)
						} , BPPARAM=BPPARAM)
	} else {
		LOL <- bplapply(data_split , function(x) {
						if(Sys.info()[['sysname']] == 'Windows') library(LowMACA)
						if(nrow(x)==0)
							return(NULL)
						if(verbose)
							message(paste("Working on:" , unique(x$Gene_Symbol)))
						out <- suppressMessages(newLowMACA(genes=unique(x$Gene_Symbol)))
						out <- suppressMessages(setup(out , repos=x , mail=mail , perlCommand=perlCommand))
						out <- suppressMessages(entropy(out))
						lfm(out , metric=metric , threshold=threshold , conservation=conservation)
						} , BPPARAM=BPPARAM)
	}
	LOL_out <- do.call("rbind" , LOL)
	return(LOL_out)
})


############
###### PLOTTING METHODS
###############################

setGeneric('nullProfile', function(object, conservation=NULL , windowlimits=NULL) standardGeneric('nullProfile'))
setMethod('nullProfile', 'LowMACA', function(object, conservation=NULL , windowlimits=NULL) {

	if( length(object@alignment)==0 )
		stop('Perform alignment on the object before plotting.')

	if( length(object@mutations)==0 )
		stop('Retrieve mutations for the object before plotting.')

	if( length(object@entropy)==0 )
		stop('Perform entropy calculation on the object before plotting.')

	if( nrow(object@mutations$data)==0 ) {
		message('No mutations available for this object.')
	} else {

		if( is.null(windowlimits) )
			windowlimits <- 1:length(object@alignment$df)
		if(is.null(conservation))
			conservation <- object@entropy$conservation_thr
		mean <- object@alignment$df$mean[windowlimits]
		lowerThreshold <- object@alignment$df$lTsh[windowlimits]
		upperThreshold <- object@alignment$df$uTsh[windowlimits]
		profile <- object@alignment$df$profile[windowlimits]
		pvalue <- object@alignment$df$pvalue[windowlimits]
		if( conservation != object@entropy$conservation_thr ) {
			## in case a new conservation threshold is given,
			## recalculate the qvalue
			filteredPvals <- object@alignment$df$pvalue
			filteredPvals[object@alignment$df$conservation < conservation] <- NA
			qvalue <- p.adjust(filteredPvals, method='BH')
		} else {
			## use qvalue already stored
			qvalue <- object@alignment$df$qvalue
		}
		qvalue <- qvalue[windowlimits]
#		misalnFreq <- object@alignment$df$misalnFreq

		#
		qvalSignif_x <- which(qvalue < 5e-2)
	    qvalSignif_y <- profile[qvalSignif_x] + max(profile)/20
	    # over <- profile > upperThreshold
	    # ylim <- range(c(profile, upperThreshold, lowerThreshold, qvalSignif_y*1.05))
	    max_y <- max(c(
	    	object@alignment$df$lTsh,
	    	object@alignment$df$uTsh,
	    	object@alignment$df$profile*1.1
	    	), na.rm=TRUE)

	    # red bars of the resudues over the threshold

	    # plot(profile, main='', type='h'
	    #     , ylim=ylim, bty='n',xaxt='n',xlab='', col='orange', lwd=10)
	    # blackProfile <- sapply(1:length(profile), 
	    # 	function(i) min(profile[i], upperThreshold[i]))
	    # lines(blackProfile, col='black', lwd=10, type='h')
	    # lines(upperThreshold, lty=2, lwd=2, col='blue')

	    # black bars of the other resudues and of residues over, but only
	    # the part below the threshold

	    blackProfile <- sapply(1:length(profile), 
	    	function(i) min(profile[i], upperThreshold[i]))
	    orangeProfile <- profile - blackProfile
	    mp <- barplot(rbind(blackProfile, orangeProfile), 
	    	col=c('black', 'orange'), ylim=c(0, max_y), ylab='Mutation density'
	    	)
	    axis(1, at=mp, labels=windowlimits, las=2)
	    lines(mp, upperThreshold, lty=2, lwd=2, col='blue')
	    qvalSignif_x <- mp[qvalue < 5e-2]

	    ### plot an asterisk above the residues which are significant in 
	    # terms of the pvalue
	    if( length(qvalSignif_x) > 0 ) {
	    	text(qvalSignif_x, qvalSignif_y, '*', cex=2, col='red')
	    	## if more than 70% of mutations can be mapped on
	    	## another protein of the domain, plot the BLUE ASTERISK
	    	## instead of RED
#	    	misaligned <- misalnFreq[qvalSignif_x] > .7
	    	# if(any(misaligned)) {
	    	# 	text(qvalSignif_x[misaligned], qvalSignif_y[misaligned]
	    	# 		, '*', cex=2, col='blue')
	    	# 	legend('topright', pch='*', col='blue', legend='possibly misaligned', cex=2)
	    	# }
	    }
	}

	})

setGeneric('lmPlot', function(object, conservation=NULL , splitLen=NULL) standardGeneric('lmPlot'))
setMethod('lmPlot', 'LowMACA', function(object, conservation=NULL , splitLen=NULL) {
	opar <- par("mfrow" , "mar")
	on.exit(par(opar))
	if( length(object@alignment)==0 )
		stop('Perform alignment on the object before plotting.')

	if( length(object@mutations)==0 )
		stop('Retrieve mutations for the object before plotting.')

	if( length(object@entropy)==0 )
		stop('Perform entropy calculation on the object before plotting.')

	if( nrow(object@mutations$data)==0 ) {
		message('No mutations available for this object.')
	} else {
		if(is.null(conservation))
			conservation <- object@entropy$conservation_thr
		if( is.null(splitLen) )
			splitLen <- ncol(object@mutations$aligned)

		windowlimits <- 1:ncol(object@mutations$aligned)
		windowlimitsSplit <- split(windowlimits, ceiling(windowlimits/splitLen))

		mode <- object@arguments$mode
		nObj <- nrow(object@arguments$input)

		par(mar=c(2,4,2,2))
		if( nObj > 1 ) {
			layoutMatrix <- as.matrix(c(1,1,2,2,3,4,4))
			multipleMat <- as.list(rep(NA), length(windowlimitsSplit))
			for( i in 1:length(windowlimitsSplit) ) 
				multipleMat[[i]] <- layoutMatrix+(max(layoutMatrix)*(i-1))
			layoutMatrix <- do.call('rbind', multipleMat)
			plotType <- 4
		} else if( mode == 'genes' ) {
			layoutMatrix <- as.matrix(c(1,1,1,1,3,2,2,2,2))
			multipleMat <- as.list(rep(NA), length(windowlimitsSplit))
			for( i in 1:length(windowlimitsSplit) ) 
				multipleMat[[i]] <- layoutMatrix+(max(layoutMatrix)*(i-1))
			layoutMatrix <- do.call('rbind', multipleMat)
			plotType <- 3
		} else {
			layoutMatrix <- as.matrix(c(1,1,2,2))
			multipleMat <- as.list(rep(NA), length(windowlimitsSplit))
			for( i in 1:length(windowlimitsSplit) ) 
				multipleMat[[i]] <- layoutMatrix+(max(layoutMatrix)*(i-1))
			layoutMatrix <- do.call('rbind', multipleMat)
			plotType <- 2
		}
		layout(layoutMatrix)

		for( i in 1:length(windowlimitsSplit) ) {

			windowlimits <- windowlimitsSplit[[i]]

			origMAlign <- object@alignment$CLUSTAL
			m <- consensusMatrix(origMAlign)
			motif <- pcm2pfm(m[, windowlimits])
			motif <- new('pfm', mat=motif, name='', color=colorset(alphabet='AA'))
			motif@color <- c("-"="#FFFFFF" , motif@color)
			#m <- consensusMatrix(origMAlign)
			#motif <- pcm2pfm(m[, windowlimits])
			#motif <- new('pfm', mat=motif, name='', color=colorset(alphabet='AA'))
			log10pval <- object@entropy$log10pval
			pvalue <- object@entropy$pvalue

			## plot 1
			if( plotType == 3) par(mar=c(0,4,4,2))
			par(mar=c(2,4,4,2))
			myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")), space="Lab")
			lenAln <- ncol(object@mutations$aligned[,windowlimits,drop=FALSE])
			mut_aligned <- object@mutations$aligned[,windowlimits,drop=FALSE]
			# if( plotType == 3 ) colnames(mut_aligned) <- 1:length(mut_aligned)
			#print(nObj)
			mp <- barplot(
				mut_aligned
				, col=myPalette(nrow(object@mutations$aligned[,windowlimits,drop=FALSE]))
				, border=if(lenAln<50) 'black' else NA
				, main=paste( "Mutations from position", min(windowlimits), "to", max(windowlimits),
					if(nObj>1) "of the multiple alignment" else {if(mode == 'genes') "of the protein" else "of the domain"},
					"\nLog10 P-Value:" , round(log10pval , 2) , 
					"P-Value:" , signif( pvalue , 2 ) , "Bw:" , signif(object@entropy$bw,3) 
					) 
				, ylab='Mutation #'
				, ylim=c(0, max(colSums(object@mutations$aligned)))
				)
			axis(1, at=mp, labels=windowlimits, las=2)
			par(mar=c(2,4,2,2))
			## plot 2
			
			# if( plotType == 3) par(mar=c(2,4,0,2))
			nullProfile(object, conservation=conservation , windowlimits)
			# if( plotType == 3) {
			# 	pSeq <- round(seq(1, nchar(object@arguments$input$AMINO_SEQ), length.out=20))
			# 	axis(1, at=pSeq, labels=pSeq)
			# }
			
			## if there is more than one object also plot 
			## conservation plots	
			if( nObj > 1 ) {
				## plot 3
				mp <- barplot(object@alignment$df$conservation[windowlimits]
					, col='darkgoldenrod1', ylab='Conservation', ylim=c(0,1))
				axis(1, at=mp, labels=windowlimits, las=2)
				## plot 4
				plot.new()
				vps <- baseViewports()
				pushViewport(vps$inner, vps$figure, vps$plot)
				plotMotifLogo(motif, p=motif@background
					, colset=motif@color[rownames(motif@mat)]
					, ylab='Logo', xaxis=FALSE, newpage=FALSE
					)
				axis(1,at=1/length(windowlimits)*(seq_along(windowlimits)-.5)
					, labels=windowlimits,las=2)
				popViewport(3)
				#plot(motif, ylab='Logo')
				# plot.new()
			}

			#############
			## in case the analysis is on a single gene 
			## plot its domains
			############### 
			if( plotType == 3 ) {

				myPfam <- getMyPfam()
				gene <- as.character(object@arguments$input$Gene_Symbol)
				domains <- myPfam[myPfam$Gene_Symbol==gene
					, c("Envelope_Start" , "Envelope_End" , "Pfam_Name")]
				## create empty plot
				plot.new()
				plot.window(
					xlim=range(windowlimits)
					, ylim=c(0,0.05)
					)
				par(mar=c(0,0,0,0))
				## plot domains
				if(nrow(domains)>0) {
					for( i in 1:nrow(domains) ) {
						int <- intersect(domains$Envelope_Start[i]:domains$Envelope_End[i], windowlimits)
						if( length(int)>0 ) {
							domains$Envelope_Start[i] <- min(int)
							domains$Envelope_End[i] <- max(int)
						} else {
							domains <- domains[-i,]
						}
					}
					for (i in 1:nrow(domains)) {
						xleft=as.numeric(domains[i , "Envelope_Start"])
						xright=as.numeric(domains[i , "Envelope_End"])
						ytop=0.05
						ybottom=0
						col=topo.colors(nrow(domains) , alpha=0.5)[i]
						characters <- nchar(domains[i , "Pfam_Name"])
						rect(xleft=xleft , xright=xright 
							, ytop=ytop , ybottom=ybottom 
							, col=col )
						if(characters<=3){
							text(x=(xright+xleft)/2 , y=0.025 
								, domains[i , "Pfam_Name"] 
								, font=2)
						} else {
							if((xright-xleft)<=100){
								text(x=(xright+xleft)/2 , y=0.025 
									, domains[i , "Pfam_Name"] 
									, font=2 , cex=0.8)
							} else {
								text(x=(xright+xleft)/2 , y=0.025 
									, domains[i , "Pfam_Name"] 
									, font=2)
							}
						}
					}
				} else {
					text(x=length(windowlimits)/2, y=0.025
						, 'no pfam domains within gene sequence', cex=1.5)
				}
				par(mar=c(2,4,4,2))
			}
		}

	}

	})


# setGeneric('nullProfile', function(object , conservation=NULL) standardGeneric('nullProfile'))
# setMethod('nullProfile', 'LowMACA', function(object , conservation=NULL) {

# 	if( nrow(object@mutations$data)==0 ) {
# 		message('No mutations available for this object.')
# 	} else {
# 		if(is.null(conservation))
# 			conservation <- object@entropy$conservation_thr
# 		mean <- object@alignment$df$mean
# 		lowerThreshold <- object@alignment$df$lTsh
# 		upperThreshold <- object@alignment$df$uTsh
# 		profile <- object@alignment$df$profile
# 		pvalue <- object@alignment$df$pvalue
# 		if( conservation != object@entropy$conservation_thr ) {
# 			## in case a new conservation threshold is given,
# 			## recalculate the qvalue
# 			filteredPvals <- object@alignment$df$pvalue
# 			filteredPvals[object@alignment$df$conservation < conservation] <- NA
# 			qvalue <- p.adjust(filteredPvals, method='BH')
# 		} else {
# 			## use qvalue already stored
# 			qvalue <- object@alignment$df$qvalue
# 		}
# 		#misalnFreq <- object@alignment$df$misalnFreq

# 	    qvalSignif_x <- which(qvalue < 5e-2)
# 	    qvalSignif_y <- profile[qvalSignif_x] + max(profile)/20
# 	    over <- profile > upperThreshold
# 	    ylim <- range(c(profile, upperThreshold, lowerThreshold, qvalSignif_y), na.rm=TRUE)

# 	    # red bars of the resudues over the threshold
# 	    plot(profile, main='', type='h'
# 	        , ylim=ylim, bty='n',xaxt='n',xlab='', col='orange', lwd=2)
# 	    # black bars of the other resudues and of residues over, but only
# 	    # the part below the threshold
# 	    blackProfile <- sapply(1:length(profile), 
# 	    	function(i) min(profile[i], upperThreshold[i]))
# 	    lines(blackProfile, col='black', lwd=2, type='h')
# 	    lines(upperThreshold, lty=2, lwd=2, col='blue')
# 	    ### plot an asterisk above the residues which are significant in 
# 	    # terms of the pvalue
# 	    if( length(qvalSignif_x) > 0 ) {
# 	    	text(qvalSignif_x, qvalSignif_y, '*', cex=2, col='red')
# 	    	## if more than 70% of mutations can be mapped on
# 	    	## another protein of the domain, plot the BLUE ASTERISK
# 	    	## instead of RED
# 	    	#misaligned <- misalnFreq[qvalSignif_x] > .7
# 	    	# if(any(misaligned)) {
# 	    	# 	text(qvalSignif_x[misaligned], qvalSignif_y[misaligned]
# 	    	# 		, '*', cex=2, col='blue')
# 	    	# 	legend('topright', pch='*', col='blue', legend='possibly misaligned', cex=2)
# 	    	# }
# 	    }
# 	}
# 	})

# setGeneric('lmPlot', function(object , conservation=NULL) standardGeneric('lmPlot'))
# setMethod('lmPlot', 'LowMACA', function(object , conservation=NULL) {
# 	if( nrow(object@mutations$data)==0 ) {
# 		message('No mutations available for this object.')
# 	} else {
# 		if(is.null(conservation))
# 			conservation <-- object@entropy$conservation_thr
# 		mode <- object@arguments$mode
# 		nObj <- nrow(object@arguments$input)

# 		origMAlign <- object@alignment$CLUSTAL
# 		m <- consensusMatrix(origMAlign)
# 		motif <- pcm2pfm(m)
# 		motif <- new('pfm', mat=motif, name='', color=colorset(alphabet='AA'))
# 		motif@color <- c("-"="#FFFFFF" , motif@color)
# 		log10pval <- object@entropy$log10pval
# 		pvalue <- object@entropy$pvalue

# 		par(mar=c(2,4,2,2))
# 		if( nObj > 1 ) {
# 			layoutMatrix <- as.matrix(c(1,1,2,2,3,4,4))
# 			plotType <- 4
# 		} else if( mode == 'genes' ) {
# 			layoutMatrix <- as.matrix(c(1,1,1,1,3,2,2,2,2))
# 			plotType <- 3
# 		} else {
# 			layoutMatrix <- as.matrix(c(1,1,2,2))
# 			plotType <- 2
# 		}
# 		layout(layoutMatrix)

# 		## plot 1
# 		if( plotType == 3) par(mar=c(0,4,2,2))
# 		myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")), space="Lab")
# 		lenAln <- ncol(object@mutations$aligned)
# 		mut_aligned <- object@mutations$aligned
# 		# if( plotType == 3 ) colnames(mut_aligned) <- 1:length(mut_aligned)
# 		barplot(
# 			mut_aligned
# 			, col=myPalette(nrow(object@mutations$aligned))
# 			, border=if(lenAln<300) 'black' else NA
# 			, main=paste( "Mutations by position\nLog10 P-Value:" , round(log10pval , 2) , 
# 					"P-Value:" , signif( pvalue , 2 ) , "Bw:" , signif(object@entropy$bw,3) ) 
# 			, ylab='Mutations'
# 			)
# 		## plot 2
# 		if( plotType == 3) par(mar=c(2,4,0,2))
# 			nullProfile(object , conservation=conservation)
# 		if( plotType == 3) {
# 			pSeq <- round(seq(1, nchar(object@arguments$input$AMINO_SEQ), length.out=20))
# 			axis(1, at=pSeq, labels=pSeq)
# 		}
# 		## if there is more than one object also plot 
# 		## conservation plots	
# 		if( nObj > 1 ) {
# 			## plot 3
# 			barplot(object@alignment$df$conservation
# 				, col='darkgoldenrod1', ylab='Conservation')
# 			## plot 4
# 			plotMotifLogo(motif, ylab='Logo' , colset=motif@color[rownames(motif@mat)] , p=motif@background)
# 		}

# 		#############
# 		## in case the analysis is on a single gene 
# 		## plot its domains
# 		############### 
# 		if( plotType == 3 ) {

# 			myPfam <- getMyPfam()
# 			gene <- as.character(object@arguments$input$Gene_Symbol)
# 			domains <- myPfam[myPfam$Gene_Symbol==gene
# 				, c("Envelope_Start" , "Envelope_End" , "Pfam_Name")]
# 			## create empty plot
# 			plot.new()
# 			plot.window(
# 				xlim=c(1,nchar(object@arguments$input$AMINO_SEQ))
# 				, ylim=c(0,0.05)
# 				)
# 			par(mar=c(0,0,0,0))
# 			## plot domains
# 			if(nrow(domains)>0) {
# 				for (i in 1:nrow(domains)) {
# 					xleft=as.numeric(domains[i , "Envelope_Start"])
# 					xright=as.numeric(domains[i , "Envelope_End"])
# 					ytop=0.05
# 					ybottom=0
# 					col=topo.colors(nrow(domains) , alpha=0.5)[i]
# 					characters <- nchar(domains[i , "Pfam_Name"])
# 					rect(xleft=xleft , xright=xright 
# 						, ytop=ytop , ybottom=ybottom 
# 						, col=col )
# 					if(characters<=3){
# 						text(x=(xright+xleft)/2 , y=0.025 
# 							, domains[i , "Pfam_Name"] 
# 							, font=2)
# 					} else {
# 						if((xright-xleft)<=100){
# 							text(x=(xright+xleft)/2 , y=0.025 
# 								, domains[i , "Pfam_Name"] 
# 								, font=2 , cex=0.8)
# 						} else {
# 							text(x=(xright+xleft)/2 , y=0.025 
# 								, domains[i , "Pfam_Name"] 
# 								, font=2)
# 						}
# 					}
# 				}
# 			} else {
# 				text(x=nchar(object@arguments$input$AMINO_SEQ)/2, y=0.025
# 					, 'no pfam domains within gene sequence', cex=1.5)
# 			}
# 		}
# 	}
# 	})

#Plot a Single specified sequence from a multiple alignment LowMACA object
setGeneric('lmPlotSingleSequence', function(object , gene , mail=NULL , perlCommand="perl") standardGeneric('lmPlotSingleSequence'))
setMethod('lmPlotSingleSequence', 'LowMACA', function(object , gene , mail=NULL , perlCommand="perl") {
	mode <- object@arguments$mode
	myData <- object@mutations$data[ object@mutations$data$Gene_Symbol==gene , ]
	if(nrow(myData)==0)
		stop("No mutation for the specified Gene in this LowMACA object")
	newLM <- newLowMACA(genes=gene , pfam=object@arguments$pfam)
	newLM <- setup(newLM , repos=myData , mail=mail , perlCommand=perlCommand)
	newLM <- entropy(newLM)
	lmPlot(newLM)
})



setGeneric('bpAll', function(object) standardGeneric('bpAll'))
setMethod('bpAll', 'LowMACA', function(object) {
	if( nrow(object@mutations$data)==0 ) {
		message('No mutations available for this object.')
	} else {

		myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")), space="Lab")

		mut_aligned_extended <- object@mutations$aligned
		gene_subset <- object@mutations$aligned
		colnames(gene_subset) <- as.character(1:ncol(gene_subset))
		log10pval <- ifelse( is.numeric(object@entropy$log10pval) , object@entropy$log10pval , 0)
		pvalue <- ifelse( is.numeric(object@entropy$pvalue) , object@entropy$pvalue , 1)
		barplot(gene_subset 
			, col=myPalette(nrow(gene_subset))
			, border=NA
			, las=2
			, cex.names=0.3 
			, main=paste( "Mutations by position\nEntropy Z-Score:" , round(log10pval , 2) , 
					"P-Value:" , signif( pvalue , 2 ) ) 
			)
		legend("topright" 
			, legend=rownames(gene_subset) 
			, fill=myPalette(nrow(gene_subset)) 
			, col=myPalette(nrow(gene_subset)) 
			, border=myPalette(nrow(gene_subset)) 
			, cex=0.5)
	}
	})


setGeneric('protter', function(object, filename='protter.png', threshold=5e-2 , conservation=NULL) standardGeneric('protter'))
setMethod('protter', 'LowMACA', function(object, filename='protter.png', threshold=5e-2 , conservation=NULL) {
	if(is.null(conservation))
		conservation <- object@entropy$conservation_thr
	if(is.null(conservation))
	  stop("No conservation threshold found. Call entropy method or provide one")
	pvalue <- object@alignment$df$pvalue
	#qvalue <- object@alignment$df$qvalue
	if( conservation != object@entropy$conservation_thr ) {
			## in case a new conservation threshold is given,
			## recalculate the qvalue
			filteredPvals <- object@alignment$df$pvalue
			filteredPvals[object@alignment$df$conservation < conservation] <- NA
			qvalue <- p.adjust(filteredPvals, method='BH')
	} else {
			## use qvalue already stored
			qvalue <- object@alignment$df$qvalue
	}
	message(paste('Writing', filename, '...'))
	# remove gaps (possible in datum mode)
	consensus <- object@alignment$df$consensus
	Sequence <- paste(consensus[consensus!='-'], collapse='')
	Mutation_pvalues <- paste(which(pvalue < threshold), collapse=',')
	Mutation_qvalues <- paste(which(qvalue < threshold), collapse=',')
	WebQuery <- paste("http://wlab.ethz.ch/protter/create?seq=", Sequence, 
		"&tm=auto&mc=lightsalmon&lc=blue&tml=numcount&numbers&legend&n:signal%20peptide,cc:white,fc:blue,bc:blue=Phobius.SP&n:N-glyco%20motif,s:box,fc:forestgreen,bc:forestgreen=(N).[ST]&n:MUT_pvalue,bc:orange="
		, Mutation_pvalues, "&n:MUT_qvalue,bc:red=", Mutation_qvalues, "&format=png",sep="")
	if(Sys.info()['sysname']=="Windows"){
		tryCatch( download.file(WebQuery, destfile=filename , mode="wb") 
			, error=function(e) {
				png(filename)
				par(mar = c(0,0,0,0))
        		plot(c(0, 1), c(0, 1), ann = F, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
        		text(x = 0.5, y = 0.5, paste("The plot is not available\n",
                              				"Protter service is out of order\n",
                              				"Sorry, try another time"), 
                              cex = 1.6, col = "black")
        		dev.off()
		})
	} else {
		tryCatch( download.file(WebQuery, destfile=filename) 
			, error=function(e) {
				png(filename)
				par(mar = c(0,0,0,0))
        		plot(c(0, 1), c(0, 1), ann = F, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
        		text(x = 0.5, y = 0.5, paste("The plot is not available\n",
                              				"Protter service is out of order\n",
                              				"Sorry, try another time"), 
                              cex = 1.6, col = "black")
        		dev.off()
		})
	message(paste("Protter plot saved as:" , normalizePath(filename)))
	}
})

Try the LowMACA package in your browser

Any scripts or data that you put into this service are public.

LowMACA documentation built on Nov. 8, 2020, 8:14 p.m.