SnapATAC: Single Nucleus Analysis Package for ATAC-Seq

Documented in addBmatToSnap addGmatToSnap addPmatToSnap barcodeInSnapFile calBmatCor createSnap createSnapFromBmat createSnapFromGmat createSnapFromPmat exportMetaData extractReads filterBins filterCells isSnapFile readMetaData rmBmatFromSnap rmGmatFromSnap rmPmatFromSnap showBinSizes snapRbind

#' Create an empty snap object
#'
#' This function creates an empty snap object
#'
#' @examples
#' x.sp = newSnap();
#' 
#' @return An empty snap object
#'
#' @importFrom methods new
#' @importFrom GenomicRanges GRanges
#' @export
#' 
newSnap <- function () {
	metaData=data.frame();
	des = character()
	file = as.character(c());
	sample = as.character(c());
	barcode = as.character(c());
	feature = GRanges();
	peak = GRanges();		
	metaData = data.frame();
	bmat=Matrix(nrow=0, ncol=0, sparse=TRUE);		
	pmat=Matrix(nrow=0, ncol=0, sparse=TRUE);
	gmat=Matrix(nrow=0, ncol=0, sparse=TRUE);
	mmat=matrix(0,0,0);
	jmat=newJaccard();
	smat=newDimReduct();	
	graph=newKgraph();
	regModel=c();
	tsne=matrix(nrow=0, ncol=0);	
	umap=matrix(nrow=0, ncol=0);	
	cluster=factor();
	res = new("snap", 
			  des=des,
			  file=file,
			  sample=sample,
			  barcode=barcode, 
			  feature=feature, 
			  peak=peak, 
			  metaData=metaData, 
			  bmat=bmat, 
			  pmat=pmat, 
			  gmat=gmat,
			  mmat=mmat, 
			  jmat=jmat, 
			  smat=smat, 
			  graph=graph, 
			  tsne=tsne, 
			  umap=umap, 
			  cluster=cluster
			  );	
}

#' Show bin sizes in a snap file
#'
#' This function takes a snap-format file name as input and check the bin 
#' sizes or resolutions have been generated for count matrix
#'
#' @param file character. input snap-format file name
#'
#' @examples
#' file.name = system.file("extdata", "demo.snap", package = "SnapATAC");
#' showBinSizes(file.name);
#' 
#' @return integer vector. A vector of integers indicating the bin sizes
#'
#' @export
#'
showBinSizes <- function(file) {
  UseMethod("showBinSizes", file);
}

#' @export
showBinSizes.default <- function(file){
	# this is to check what are the binSizes have been generated
	if(!file.exists(file)){stop("file does not exist!")}
	if(!isSnapFile(file)){stop("input file is not a snap file\n")}		
	binSizeList = tryCatch(binSizeList <- h5read(file, '/AM/binSizeList'), error = function(e) {print(paste("Warning @check.bin.size: '/AM/binSizeList' not found in ", file)); return(numeric())})
	return(binSizeList);
}

#' summarySnap for snap object.
#'
#' This function takes a snap object and returns the summary statistics
#' @name summarySnap
#' @param obj snap; a snap object
#' @rdname summarySnap-methods
#' @importFrom stats median
#' @exportMethod summarySnap
#' @examples
#' file.name = system.file("extdata", "demo.snap", package = "SnapATAC");
#' x.sp = createSnap(file.name, sample="demo");
#' summarySnap(x.sp);
setGeneric("summarySnap", function(obj) standardGeneric("summarySnap"))

#' @rdname summarySnap-methods
#' @aliases summarySnap,snap-method
setMethod("summarySnap", "snap", function(obj){
	if(nrow(obj@metaData) == 0){stop("metaData is empty")}
	barcode = obj@metaData;
	message("Total  number of barcodes: ", length(obj@barcode));	
	message("Median number of sequencing fragments: ", median(barcode$TN));
	message("Median number of uniquely mapped fragments: ", median(barcode$UQ));
	message("Median number of mappability ratio: ", round(median((barcode$UM+1)/(barcode$TN+1)),2));
	message("Median number of properly paired ratio: ", round(median((barcode$PP+1)/(barcode$UM+1)),2));
	message("Median number of duplicate ratio: ", round(median(1 - (barcode$UQ+1)/(barcode$PP+1)),2));
	message("Median number of chrM ratio: ", round(median((barcode$CM+1) / (barcode$UQ+1)),2));
	message("Median number of unique molecules (UMI): ", median(barcode$UQ));
});


#' nrow for snap object.
#'
#' This function takes a snap object and returns number of cells
#' @name nrow
#' @param x snap; a snap object
#' @examples
#' data(demo.sp);
#' nrow(demo.sp);
#' @rdname nrow-methods
#' @aliases nrow,snap-method
#' @exportMethod nrow
setMethod("nrow", "snap", function(x) length(x@barcode));

#' colSums for snap objects
#'
#' This function takes a snap object and returns the column sums of its count matrix.
#' @name colSums
#' @param x A snap object
#' @param mat A charater object indicates what matrix slot to use c("bmat", "pmat", "gmat")
#' @param na.rm A logical variable indicates wether to remove NA in the matrix
#' @examples
#' data(demo.sp);
#' a = colSums(demo.sp, mat="bmat");
#' b = colSums(demo.sp, mat="pmat");
#' d = colSums(demo.sp, mat="gmat");
#' @rdname colSums-methods
#' @aliases colSums,snap-method
#' @exportMethod colSums
setMethod("colSums", "snap", function(x, mat=c("bmat", "pmat", "gmat"), na.rm=TRUE){
	mat = match.arg(mat);
	mat.use = methods::slot(x, mat);
	if((x=nrow(mat.use))==0L){
		stop("mat is empty")
	}
	res = Matrix::colSums(mat.use, na.rm);
	return(res);
});

#' rowSums for snap objects
#'
#' This function takes a snap object and returns the row sums of its count matrix.
#' @name rowSums
#' @param x A snap object
#' @param mat A charater object indicates what matrix slot to use
#' @param na.rm A logical variable indicates wether to remove NA in the matrix
#' 
#' @examples
#' data(demo.sp);
#' a = rowSums(demo.sp, mat="bmat");
#' b = rowSums(demo.sp, mat="pmat");
#' d = rowSums(demo.sp, mat="gmat");
#' 
#' @rdname rowSums-methods
#' @aliases rowSums,snap-method
#' @exportMethod rowSums
setMethod("rowSums", "snap", function(x, mat=c("bmat", "pmat", "gmat"), na.rm=TRUE){
	mat = match.arg(mat);
	mat.use = methods::slot(x, mat);
	if((x=nrow(mat.use))==0L){
		stop("mat is empty")
	}
	res = Matrix::rowSums(mat.use, na.rm);
	return(res);
});


#' rowMeans for snap objects
#'
#' This function takes a snap object and returns the row means of its count matrix.
#' 
#' @name rowMeans
#' @param x A snap object
#' @param mat A charater object indicates what matrix slot to use for calculation
#' @param na.rm A logical variable indicates wether to remove NA in the matrix
#'
#' @examples
#' data(demo.sp);
#' a = rowMeans(demo.sp, mat="bmat");
#' b = rowMeans(demo.sp, mat="pmat");
#' d = rowMeans(demo.sp, mat="gmat");
#' 
#' @rdname rowMeans-methods
#' @importFrom methods slot
#' @aliases rowMeans,snap-method
#' @exportMethod rowMeans
setMethod("rowMeans", "snap", function(x, mat=c("bmat", "pmat", "gmat"), na.rm=TRUE){
	mat = match.arg(mat);
	mat.use = methods::slot(x, mat);
	if((x=nrow(mat.use))==0L){
		stop("mat is empty")
	}
	res = Matrix::rowMeans(mat.use, na.rm);
	return(res);
});


#' colMeans for snap objects
#'
#' This function takes a snap object and returns the column means of its count matrix.
#' 
#' @name colMeans
#' @param x A snap object
#' @param mat A charater object indicates what matrix slot to use c("bmat", "pmat", "gmat").
#' @param na.rm A logical variable indicates wether to remove NA in the matrix.
#' 
#' @examples
#' data(demo.sp);
#' a = colMeans(demo.sp, mat="bmat");
#' b = colMeans(demo.sp, mat="pmat");
#' d = colMeans(demo.sp, mat="gmat");
#' 
#' @rdname colMeans-methods
#' @importFrom methods slot
#' @aliases colMeans,snap-method
#' @exportMethod colMeans
setMethod("colMeans", "snap", function(x, mat=c("bmat", "pmat", "gmat"), na.rm=TRUE){
	mat = match.arg(mat);
	mat.use = methods::slot(x, mat);
	if((x=nrow(mat.use))==0L){
		stop("mat is empty")
	}
	res = Matrix::colMeans(mat.use, na.rm);
	return(res);
});

#' Check snap object
#'
#' This function takes any object as input and check if it is a snap object
#' @param obj A snap object
#' @examples
#' data(demo.sp);
#' is.snap(demo.sp);
#' @rdname is.snap-methods
#' @exportMethod is.snap
setGeneric("is.snap", function(obj) standardGeneric("is.snap"))

#' @rdname is.snap-methods
#' @aliases is.snap,snap-method
setMethod("is.snap", "snap", function(obj) return(is(obj, "snap")));

#' subsetting for snap objects
#'
#' This function takes a snap object and returns the subset of snap object.
#' @param x snap; a snap object
#' @param i integer; selected barcode index
#' @param j integer; selected feature index
#' @param mat character; indicates the slot to subsetting
#' @param drop character; 
#' @examples
#' data(demo.sp);
#' demo.sp[1:10,];
#' demo.sp[,1:10,mat="bmat"];
#' demo.sp[,1:10,mat="pmat"];
#' @export
setMethod("[", "snap",
	function(x,i,j,mat=c("bmat", "pmat", "gmat"), drop="missing"){
		.barcode = x@barcode;
		.file = x@file;
		.sample = x@sample;
		.feature = x@feature;
		.peak = x@peak;
		.bmat = x@bmat;
		.pmat = x@pmat;
		.gmat = x@gmat;
		.mmat = x@mmat;
		.jmat = x@jmat;
		.smat = x@smat;
		.graph = x@graph;
		.cluster = x@cluster;
		.tsne = x@tsne;
		.umap = x@umap;
		.metaData = x@metaData;
		# a single row or column
       if(!missing(i)){
		   if(max(i) > nrow(x)){
			   stop("idx exceeds number of cells");
		   }
		   if(nrow(.bmat) > 0){.bmat <- .bmat[i,,drop=FALSE]}
		   if(nrow(.pmat) > 0){.pmat <- .pmat[i,,drop=FALSE]}
		   if(nrow(.gmat) > 0){.gmat <- .gmat[i,,drop=FALSE]}	   
		   if(nrow(.mmat) > 0){.mmat <- .mmat[i,,drop=FALSE]}	   
		   if(nrow(.jmat@jmat) > 0){.jmat <- .jmat[i,,drop=FALSE]}
		   if(nrow(.smat@dmat) > 0){.smat <- .smat[i,,drop=FALSE]}
		   if(nrow(.tsne) > 0){.tsne <- .tsne[i,,drop=FALSE]}
		   if(nrow(.umap) > 0){.umap <- .umap[i,,drop=FALSE]}
		   if(nrow(.graph@mat) > 0){.graph <- .graph[i,,drop=FALSE]}
		   if(nrow(.metaData) > 0){.metaData <- .metaData[i,,drop=FALSE]}
		   if(length(.cluster) > 0){.cluster <- .cluster[i,drop=FALSE]}
		   if(length(.barcode) > 0){.barcode <- .barcode[i,drop=FALSE]}
		   if(length(.file) > 0){.file <- .file[i,drop=FALSE]}
		   if(length(.sample) > 0){.sample <- .sample[i,drop=FALSE]}
	   }
	   if(!missing(j)){
   			mat = match.arg(mat);
	   		if(mat == "bmat"){
	 		   if(ncol(.bmat) > 0){.bmat <- .bmat[,j,drop=FALSE]}
			   if(length(.feature) > 0){.feature <- .feature[j];}	   
	   		}else if(mat == "pmat"){
 	 		   if(ncol(.pmat) > 0){.pmat <- .pmat[,j,drop=FALSE]}
			   if(length(.peak) > 0){.peak <- .peak[j];}	   
	   		}else if(mat == "gmat"){
 	 		   if(ncol(.gmat) > 0){.gmat <- .gmat[,j,drop=FALSE]}
	   		}
	   }
	   x@bmat = .bmat;
	   x@pmat = .pmat;
	   x@gmat = .gmat;
	   x@mmat = .mmat;
	   x@barcode = .barcode;
	   x@file = .file;
	   x@sample = .sample;
	   x@peak = .peak;
	   x@feature = .feature;
	   x@metaData = .metaData;
	   x@umap = .umap;
	   x@feature = .feature;
	   x@jmat = .jmat;
	   x@smat = .smat;
	   x@graph = .graph;
	   x@cluster = .cluster;
	   x@tsne = .tsne;
	   return(x);
})

#' Check barcode existance in snap file
#'
#' This function takes an array of barcodes and a snap-format file as input 
#' and check whether selected barcodes exist in the snap file.
#' 
#' @param barcode An array of selected barcodes.
#' @param file A snap format file.
#' 
#' @examples
#' file.name = system.file("extdata", "demo.snap", package = "SnapATAC");
#' barcodes = c("ACATTGGCAACCAGGTTGCTGGTATTGGAAGT", "ACATTGGCAAGAGGCAACAAGGATATCTGAGT");
#' barcodeInSnapFile(barcodes, file.name);
#' 
#' @return Return an array of logical variable indicates whether the 
#' barcode exists in snap file.
#' @importFrom rhdf5 h5read
#' @importFrom methods is
#' @export
barcodeInSnapFile <- function(barcode, file){
	UseMethod("barcodeInSnapFile", barcode);
}

#' @export
barcodeInSnapFile.default <- function(barcode, file){
	
	if(missing(file)){
		stop("file is missing");
	}else{
		if(!file.exists(file)){
			stop("file does not exist");
		}
		if(!isSnapFile(file)){
			stop("file is not a snap file");
		}
	}
	
	if(missing(barcode)){
		stop("barcode is missing");
	}else{
		if(!is(barcode, "character")){
			stop("barcode is not a character object");
		}
	}
	
	barcode.ref = as.character(tryCatch(barcode.ref <- h5read(file, '/BD/name'), error = function(e) {print(paste("Warning @barcodeInSnapFile: 'BD/name' not found in ",file)); return(vector(mode="character", length=0))}));
	        options(scipen=999);
	return(barcode %in% barcode.ref);

}

#' Create a snap object from a snap file
#'
#' This function takes a snap-format file as input and create
#' a snap object.
#'
#' @param file Name of a snap-format file.
#' @param sample A short sample name (i.g. "MOS.rep1").
#' @param description Description of the experiment [NULL].
#' @param do.par A logical variable indicates if run this using multiple processors [FALSE].
#' @param num.cores Number of processers to use [1].
#' @examples
#' file.name = system.file("extdata", "demo.snap", package = "SnapATAC");
#' demo.sp = createSnap(file.name, sample="demo", do.par=FALSE);
#' 
#' @return A snap object
#' @importFrom rhdf5 h5read
#' @importFrom parallel mclapply
#' @importFrom methods is
#' @export
createSnap <- function(file, sample, description, do.par, num.cores) {
  UseMethod("createSnap", file);
}

#' @export
createSnap.default <- function(file, sample, description=NULL, do.par=FALSE, num.cores=1){
	
	if(missing(file)){
		stop("file is missing");
	}else{
		if(!is(file, "character")){
			stop("file is not character")
		}
		if(any(duplicated(file))){
			stop("file has duplicate name");
		}		
	}
	
	if(!is.numeric(num.cores)){
		stop("num.cores is not an integer")
	}		
	num.cores = round(num.cores);

	fileList = as.list(file);	
	if(missing(sample)){
		stop("sample is missing");
	}else{
		if(!is(sample, "character")){
			stop("sample is not character")
		}
		if(any(duplicated(sample))){
			stop("sample has duplicate name");
		}
	}
	
	if(length(sample) != length(file)){
		stop("sample has different length with file");
	}
	
	sampleList = as.list(sample);
	
	if(!(is.null(description))){
		if(!is(description, "character")){
			stop("description must be character object")
		}
	}else{
		description=character()
	}
	
	# check if snap files exist
	if(any(do.call(c, lapply(fileList, function(x){file.exists(x)})) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){file.exists(x)})) == FALSE)
		print("error: these files does not exist")
		print(fileList[idx])
		stop()
	}
	
	# check if files are all snap files
	if(any(do.call(c, lapply(fileList, function(x){isSnapFile(x)})) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){isSnapFile(x)})) == FALSE)
		print("error: these files are not snap file")
		print(fileList[idx])
		stop()
	}
	
	message("Epoch: reading the barcode session ...");
	if(do.par){
		obj.ls = mclapply(as.list(seq(fileList)), function(i){
			createSnapSingle(file=fileList[[i]], sample=sampleList[[i]]);
		}, mc.cores=num.cores);
	}else{
		obj.ls = lapply(as.list(seq(fileList)), function(i){
			createSnapSingle(file=fileList[[i]], sample=sampleList[[i]]);
		});		
	}
	
	obj = Reduce(snapRbind, obj.ls);
	rm(obj.ls);
	gc();
	obj@des = description;
	return(obj);
}

#' Add cell-by-bin matrix
#' 
#' This function takes a snap object as input and add the cell-by-bin 
#' matrix to the existing snap object.
#' 
#' @param obj A snap object to add cell-by-bin matrix.
#' @param bin.size Cell-by-bin matrix with bin size of bin.size 
#' will be added to snap object.
#' @param do.par A logical variable indicates whether use multiple processors [FALSE].
#' @param num.cores Number of processors to use [1].
#' 
#' @examples
#' file.name = system.file("extdata", "demo.snap", package = "SnapATAC");
#' demo.sp = createSnap(file.name, sample="demo", do.par=FALSE);
#' showBinSizes(file.name);
#' demo.sp = addBmatToSnap(demo.sp, bin.size=100000, do.par=FALSE);
#' 
#' @return Return a snap object
#' @importFrom rhdf5 h5read
#' @importFrom GenomicRanges GRanges findOverlaps
#' @importFrom IRanges IRanges
#' @importFrom parallel mclapply
#' @importFrom methods is
#' @export
addBmatToSnap <- function(obj, bin.size, do.par, num.cores){
  UseMethod("addBmatToSnap", obj);
}

#' @export
addBmatToSnap.default <- function(obj, bin.size=5000, do.par=FALSE, num.cores=1){	
	# close the previously opened H5 file
	if(exists('h5closeAll', where='package:rhdf5', mode='function')){
		rhdf5::h5closeAll();		
	}else{
		rhdf5::H5close();
	}
	if(missing(obj)){
		stop("obj is missing")
	}else{
		if(!is(obj, "snap")){
			stop("obj is not a snap object")
		}
	}
	
	if(!is.numeric(num.cores)){
		stop("num.cores is not an integer")
	}		
	num.cores = round(num.cores);
	
	fileList = as.list(unique(obj@file));

	# check if snap files exist
	if(any(do.call(c, lapply(fileList, function(x){file.exists(x)})) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){file.exists(x)})) == FALSE)
		print("error: these files does not exist")
		print(fileList[idx])
		stop()
	}
	
	# check if files are all snap files
	if(any(do.call(c, lapply(fileList, function(x){isSnapFile(x)})) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){isSnapFile(x)})) == FALSE)
		print("error: these files are not snap file")
		print(fileList[idx])
		stop()
	}
	
	# check if BM session exist
	if(any(do.call(c, lapply(fileList, function(x){ "AM" %in% h5ls(x, recursive=1)$name  })) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){ "AM" %in% h5ls(x, recursive=1)$name  })) == FALSE)
		print("error: the following nsap files do not contain AM session")
		print(fileList[idx])
		stop()
	}
	
	if(any(do.call(c, lapply(fileList, function(x){(bin.size %in% showBinSizes(x))})) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){(bin.size %in% showBinSizes(x))})) == FALSE)
		print("error: chosen bin size does not exist in the following snap files")
		print(fileList[idx])
		stop()
	}

	# check if bins match
	bin.list = lapply(fileList, function(x){
		readBins(x, bin.size=bin.size)
	})
	
	if(!all(sapply(bin.list, FUN = identical, bin.list[[1]]))){
		stop("bins does not match between snap files, please regenerate the cell-by-bin matrix by snaptools")
	}
	
	# read the snap object
	message("Epoch: reading cell-bin count matrix session ...");
	if(do.par){
		obj.ls = mclapply(fileList, function(file){
			idx = which(obj@file == file)
			addBmatToSnapSingle(obj[idx,], file, bin.size=bin.size);
		}, mc.cores=num.cores);		
	}else{
		obj.ls = lapply(fileList, function(file){
			idx = which(obj@file == file)
			addBmatToSnapSingle(obj[idx,], file, bin.size=bin.size);
		});		
	}
	
	# combine
	if((x=length(obj.ls)) == 1L){
		res = obj.ls[[1]]
	}else{
		res = Reduce(snapRbind, obj.ls);		
	}
	obj@feature = res@feature;
	obj@bmat = res@bmat;
	rm(res, obj.ls);
	gc()
	return(obj);
}

#' Add cell-by-peak matrix
#' 
#' This function takes a snap object as input and add the 
#' cell-by-peak matrix to the existing snap object.
#' 
#' @param obj A snap object.
#' @param do.par A logical varaible indicates whether to use multiple processors [FALSE].
#' @param num.cores Number of processors to use [1].
#' @examples
#' file.name = system.file("extdata", "demo.snap", package = "SnapATAC");
#' demo.sp = createSnap(file.name, sample="demo", do.par=FALSE);
#' showBinSizes(file.name);
#' demo.sp = addPmatToSnap(demo.sp, do.par=FALSE);
#' 
#' @importFrom rhdf5 h5read
#' @importFrom GenomicRanges GRanges findOverlaps
#' @importFrom IRanges IRanges
#' @importFrom methods is
#' @export
addPmatToSnap <- function(obj, do.par, num.cores){
  UseMethod("addPmatToSnap", obj);
}

#' @export
addPmatToSnap.default <- function(obj, do.par=FALSE, num.cores=1){
	# close the previously opened H5 file
	if(exists('h5closeAll', where='package:rhdf5', mode='function')){
		rhdf5::h5closeAll();		
	}else{
		rhdf5::H5close();
	}

	if(missing(obj)){
		stop("obj is missing")
	}else{
		if(!is(obj, "snap")){
			stop("obj is not a snap object")
		}
	}
	
	if(!is.numeric(num.cores)){
		stop("num.cores is not an integer")
	}		
	num.cores = round(num.cores);
	
	
	fileList = as.list(unique(obj@file));

	# check if snap files exist
	if(any(do.call(c, lapply(fileList, function(x){file.exists(x)})) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){file.exists(x)})) == FALSE)
		print("error: these files does not exist")
		print(fileList[idx])
		stop()
	}
	
	# check if files are all snap files
	if(any(do.call(c, lapply(fileList, function(x){isSnapFile(x)})) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){isSnapFile(x)})) == FALSE)
		print("error: these files are not snap file")
		print(fileList[idx])
		stop()
	}
	
	# check if PM session exist
	if(any(do.call(c, lapply(fileList, function(x){ "PM" %in% h5ls(x, recursive=1)$name  })) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){ "PM" %in% h5ls(x, recursive=1)$name  })) == FALSE)
		print("error: the following snap files do not contain PM session")
		print(fileList[idx])
		stop()
	}
	
	# check if bins match
	peak.list = lapply(fileList, function(x){
		readPeaks(x)
	})
	
	if(!all(sapply(peak.list, FUN = identical, peak.list[[1]]))){
		stop("peaks does not match between snap files, please regenerate the cell-by-peak matrix by snaptools using the same peak file")
	}
	
	# read the snap object
	message("Epoch: reading cell-peak count matrix session ...");
	if(do.par){
		obj.ls = mclapply(fileList, function(file){
			idx = which(obj@file == file)
			addPmatToSnapSingle(obj[idx,], file);
		}, mc.cores=num.cores);		
	}else{
		obj.ls = lapply(fileList, function(file){
			idx = which(obj@file == file)
			addPmatToSnapSingle(obj[idx,], file);
		});				
	}
	
	# combine
	if((x=length(obj.ls)) == 1L){
		res = obj.ls[[1]]
	}else{
		res = Reduce(snapRbind, obj.ls);		
	}
	
	# re-order the matrix
	o1 = paste(obj@file, obj@barcode, sep=".");
	o2 = paste(res@file, res@barcode, sep=".");
	obj@peak = res@peak;
	obj@pmat = res@pmat[match(o1, o2),];
	rm(obj.ls, res, o1, o2);
	gc();
	return(obj);
}

#' Add cell-by-gene matrix
#' 
#' This function takes a snap object as input and add the cell-by-gene 
#' matrix to the existing snap object.
#' 
#' @param obj A snap object to add cell-by-bin matrix.
#' @param do.par A logical variable indicates whether to use multiple processors [FALSE].
#' @param num.cores Number of processors to use.
#' @examples
#' file.name = system.file("extdata", "demo.snap", package = "SnapATAC");
#' demo.sp = createSnap(file.name, sample="demo", do.par=FALSE);
#' demo.sp = addGmatToSnap(demo.sp, do.par=FALSE);
#' 
#' @return Return a snap object
#' @importFrom rhdf5 h5read 
#' @importFrom GenomicRanges GRanges findOverlaps
#' @importFrom parallel mclapply
#' @importFrom methods is
#' @export
addGmatToSnap <- function(obj, do.par, num.cores) {
  UseMethod("addGmatToSnap", obj);
}

#' @export
addGmatToSnap.default <- function(obj, do.par=FALSE, num.cores=1){	
	# close the previously opened H5 file
	if(exists('h5closeAll', where='package:rhdf5', mode='function')){
		rhdf5::h5closeAll();		
	}else{
		rhdf5::H5close();
	}
	if(missing(obj)){
		stop("obj is missing")
	}else{
		if(!is(obj, "snap")){
			stop("obj is not a snap object")
		}
	}
	
	if(!is.numeric(num.cores)){
		stop("num.cores is not an integer")
	}		
	num.cores = round(num.cores);
	
	fileList = as.list(unique(obj@file));

	# check if snap files exist
	if(any(do.call(c, lapply(fileList, function(x){file.exists(x)})) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){file.exists(x)})) == FALSE)
		print("error: these files does not exist")
		print(fileList[idx])
		stop()
	}
	
	# check if files are all snap files
	if(any(do.call(c, lapply(fileList, function(x){isSnapFile(x)})) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){isSnapFile(x)})) == FALSE)
		print("error: these files are not snap file")
		print(fileList[idx])
		stop()
	}
	
	# check if GM session exist
	if(any(do.call(c, lapply(fileList, function(x){ "GM" %in% h5ls(x, recursive=1)$name  })) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){ "GM" %in% h5ls(x, recursive=1)$name  })) == FALSE)
		print("error: the following nsap files do not contain GM session")
		print(fileList[idx])
		stop()
	}
	
	# read the snap object
	message("Epoch: reading cell-gene count matrix session ...");
	if(do.par){
		obj.ls = mclapply(fileList, function(file){
			idx = which(obj@file == file);
			addGmatToSnapSingle(obj[idx,], file);
		}, mc.cores=num.cores);		
	}else{
		obj.ls = lapply(fileList, function(file){
			idx = which(obj@file == file);
			addGmatToSnapSingle(obj[idx,], file);
		});				
	}

	# combine
	if((x=length(obj.ls)) == 1L){
		res = obj.ls[[1]]
	}else{
		res = Reduce(snapRbind, obj.ls);		
	}
	o1 = paste(obj@file, obj@barcode, sep=".");
	o2 = paste(res@file, res@barcode, sep=".");
	obj@gmat = res@gmat[match(o1, o2),];
	rm(obj.ls, res, o1, o2);
	gc();
	return(obj);
}

#' Remove cell-by-bin matrix
#' 
#' This function takes a snap object as input and removes the cell-by-bin 
#' matrix in the existing snap object. Report error when cell-by-bin matrix is empty.
#' 
#' @param obj A snap object to remove cell-by-bin matrix.
#' @examples
#' data(demo.sp)
#' rmBmatFromSnap(demo.sp)
#' 
#' @return Return a snap object without cell-by-bin matrix 
#' @importFrom methods slot
#' @importFrom GenomicRanges GRanges
#' @import Matrix
#' @export
rmBmatFromSnap <- function(obj){
  UseMethod("rmBmatFromSnap", obj);
}

#' @export
rmBmatFromSnap.default <- function(obj){	
	# close the previously opened H5 file
	if(missing(obj)){
		stop("obj is missing")
	}else{
		if(!is(obj, "snap")){
			stop("obj is not a snap object")
		}
		data.use = methods::slot(obj, "bmat");
		if((x=nrow(data.use)) == 0L){
			stop("cell-by-bin matrix does not exist in obj");
		}
	}
	
	obj@bmat = Matrix(0,0,0, sparse=TRUE);
	obj@feature = GRanges();
	return(obj);
}


#' Remove cell-by-peak matrix
#' 
#' This function takes a snap object as input and removes the cell-by-peak 
#' matrix in the existing snap object. Report error when cell-by-peak matrix is empty.
#' 
#' @param obj A snap object to remove cell-by-peak matrix.
#' @examples
#' data(demo.sp)
#' rmPmatFromSnap(demo.sp)
#' 
#' @return Return a snap object without cell-by-peak matrix
#' @importFrom methods slot
#' @importFrom GenomicRanges GRanges
#' @import Matrix
#' @export
rmPmatFromSnap <- function(obj){
  UseMethod("rmPmatFromSnap", obj);
}

#' @export
rmPmatFromSnap.default <- function(obj){	
	# close the previously opened H5 file
	if(missing(obj)){
		stop("obj is missing")
	}else{
		if(!is(obj, "snap")){
			stop("obj is not a snap object")
		}
		data.use = methods::slot(obj, "pmat");
		if((x=nrow(data.use)) == 0L){
			stop("cell-by-peak matrix does not exist in obj");
		}
	}
	
	obj@pmat = Matrix(0,0,0, sparse=TRUE);
	obj@peak = GRanges();
	return(obj);
}

#' Remove cell-by-gene matrix
#' 
#' This function takes a snap object as input and removes the cell-by-gene 
#' matrix in the existing snap object. Report error when cell-by-gene matrix 
#' is empty.
#' 
#' @param obj A snap object to remove cell-by-gene matrix.
#' @examples
#' data(demo.sp)
#' rmPmatFromSnap(demo.sp)
#' 
#' @return Return a snap object without cell-by-peak matrix
#' @importFrom methods slot
#' @importFrom GenomicRanges GRanges
#' @import Matrix
#' @export
rmGmatFromSnap <- function(obj){
  UseMethod("rmGmatFromSnap", obj);
}

#' @export
rmGmatFromSnap.default <- function(obj){	
	# close the previously opened H5 file
	if(missing(obj)){
		stop("obj is missing")
	}else{
		if(!is(obj, "snap")){
			stop("obj is not a snap object")
		}
		data.use = methods::slot(obj, "gmat");
		if((x=nrow(data.use)) == 0L){
			stop("cell-by-gene matrix does not exist in obj");
		}
	}
	
	obj@gmat = Matrix(0,0,0, sparse=TRUE);
	return(obj);
}

#' Create a snap object from cell-by-bin matrix
#' 
#' This function takes a cell-by-bin count matrix as input and returns a snap object.
#' 
#' @param mat A sparse matrix
#' @param barcodes Corresponding barcodes
#' @param bins A GenomicRanges object for the genomic coordinates of the bins
#' @examples
#' library("GenomicRanges");
#' mat = Matrix(sample(0:10, 100, replace=TRUE),sparse=TRUE, ncol=5);
#' barcodes = paste("barcode", seq(nrow(mat)), sep=".");
#' chroms = c("chr1", "chr1", "chr1", "chr1", "chr1");
#' pos = c(1, 5001, 10001, 15001, 20001);
#' bins = GRanges(chroms, IRanges(pos, pos+5000));
#' x.sp = createSnapFromBmat(
#'	mat, 
#'	barcodes=barcodes,
#'	bins=bins
#'	);
#' @return Return a snap object
#' @importFrom GenomicRanges GRanges findOverlaps
#' @importFrom IRanges IRanges
#' @importFrom methods is
#' @export
createSnapFromBmat <- function(mat, barcodes, bins) {
  UseMethod("createSnapFromBmat", mat);
}

#' @export
createSnapFromBmat.default <- function(mat, barcodes, bins){
	if(missing(mat) || missing(barcodes) || missing(bins)){
		stop("mat or barcodes or bins is missing");
	}

	if(!(is(mat, "dsCMatrix") || is(mat, "dgCMatrix"))){
		stop("'mat' is not a sparse matrix");
	}

	if(length(barcodes) != nrow(mat)){
		stop("'mat' has different number of rows with number of barcodes");
	}
	
	if(!is(bins, "GRanges")){
		stop("'bins' is not a GRanges object")
	}
	
	if(length(bins) != ncol(mat)){
		stop("'mat' has different number of columns with number of bins");
	}
	
	obj = newSnap();
	obj@bmat = mat;
	obj@barcode = barcodes;
	obj@feature = bins;
	return(obj);
}

#' Create a snap object from cell-by-peak matrix
#' 
#' This function takes a cell-by-peak count matrix as input and returns a snap object.
#' 
#' @param mat A sparse matrix
#' @param barcodes Corresponding barcodes
#' @param peaks A GRanges object for the genomic coordinates of peaks
#' @examples
#' library("GenomicRanges");
#' mat = Matrix(sample(0:10, 100, replace=TRUE),sparse=TRUE, ncol=5);
#' barcodes = paste("barcode", seq(nrow(mat)), sep=".");
#' chroms = c("chr1", "chr1", "chr1", "chr1", "chr1");
#' pos = c(1, 5001, 10001, 15001, 20001);
#' peaks = GRanges(chroms, IRanges(pos, pos+100));
#' x.sp = createSnapFromPmat(
#'	mat, 
#'	barcodes=barcodes,
#'	peaks=peaks
#'	);
#' @importFrom GenomicRanges GRanges findOverlaps
#' @importFrom IRanges IRanges
#' @importFrom methods is
#' @export
createSnapFromPmat <- function(mat, barcodes, peaks) {
  UseMethod("createSnapFromPmat", mat);
}

#' @export
createSnapFromPmat.default <- function(mat, barcodes, peaks){
	if(missing(mat) || missing(barcodes) || missing(peaks)){
		stop("mat or barcodes or peaks is missing");
	}

	if(!(is(mat, "dsCMatrix") || is(mat, "dgCMatrix"))){
		stop("'mat' is not a sparse matrix");
	}

	if(length(barcodes) != nrow(mat)){
		stop("'mat' has different number of rows with number of barcodes");
	}
	
	if(!is(peaks, "GRanges")){
		stop("'peaks' is not a GRanges object")
	}
	if(length(peaks) != ncol(mat)){
		stop("'mat' has different number of columns with number of peaks");
	}
	
	obj = newSnap();
	obj@pmat = mat;
	obj@barcode = barcodes;
	obj@peak = peaks;
	return(obj);
}

#' Create a snap object from cell-by-gene matrix
#' 
#' This function takes a cell-by-gene count matrix as input and returns a snap object.
#' 
#' @param mat A sparse matrix
#' @param barcodes An array of characters for barcodes
#' @param gene.names An array of characters for gene names
#' @examples
#' mat = Matrix(sample(0:10, 100, replace=TRUE),sparse=TRUE, ncol=5);
#' barcodes = paste("barcode", seq(nrow(mat)), sep=".");
#' gene.names = paste("genes", seq(ncol(mat)), sep=".");
#' x.sp = createSnapFromGmat(
#'	mat, 
#'	barcodes=barcodes,
#'	gene.names=gene.names
#'	);
#' @importFrom GenomicRanges GRanges findOverlaps
#' @importFrom IRanges IRanges
#' @importFrom methods is
#' @export
createSnapFromGmat <- function(mat, barcodes, gene.names) {
  UseMethod("createSnapFromGmat");
}

#' @export
createSnapFromGmat.default <- function(mat, barcodes, gene.names){
	if(missing(mat) || missing(barcodes) || missing(gene.names)){
		stop("mat or barcodes or gene.names is missing");
	}

	if(!(is(mat, "dsCMatrix") || is(mat, "dgCMatrix"))){
		stop("'mat' is not a sparse matrix");
	}

	if(length(barcodes) != nrow(mat)){
		stop("'mat' has different number of rows with number of barcodes");
	}
	
	if(!is(gene.names, "character")){
		stop("'gene.names' is not a character object")
	}
	if(length(gene.names) != ncol(mat)){
		stop("'mat' has different number of columns with number of gene.names");
	}
	
	obj = newSnap();
	obj@gmat = mat;
	obj@barcode = barcodes;
	colnames(obj@gmat) = gene.names;
	return(obj);
}

#' Read meta data from a snap file
#'
#' Take a snap file as input and read the barcode session only.
#'
#' @param file character for the snap-format file name which the data are to be read from.
#'
#' @examples
#' file.name = system.file("extdata", "demo.snap", package = "SnapATAC");
#' md = readMetaData(file.name);
#' 
#' @return A data frame contains barcodes and its attributes
#'
#' @importFrom rhdf5 h5read
#' @export
readMetaData <- function(file) {
  UseMethod("readMetaData", file);
}

#' @export
readMetaData.default <- function(file){
	if(!file.exists(file)){stop(paste("Error @readMetaData: ", file, " does not exist!", sep=""))};
	if(!isSnapFile(file)){stop(paste("Error @readMetaData: ", file, " is not a snap-format file!", sep=""))};
	# close the previously opened H5 file
	if(exists('h5closeAll', where='package:rhdf5', mode='function')){
		rhdf5::h5closeAll();		
	}else{
		rhdf5::H5close();
	}
	barcode = as.character(tryCatch(barcode <- h5read(file, '/BD/name'), error = function(e) {print(paste("Warning @readSnap: 'BD/name' not found in ",file)); return(vector(mode="character", length=0))}));
	TN = as.numeric(tryCatch(TN <- h5read(file, '/BD/TN'), error = function(e) {print(paste("Warning @readMetaData: 'BD/TN' not found in ",file)); return(c())}));
	UM = as.numeric(tryCatch(UM <- h5read(file, '/BD/UM'), error = function(e) {print(paste("Warning @readMetaData: 'BD/UM' not found in ",file)); return(c())}));
	PP = as.numeric(tryCatch(PP <- h5read(file, '/BD/PP'), error = function(e) {print(paste("Warning @readMetaData: 'BD/PP' not found in ",file)); return(c())}));
	UQ = as.numeric(tryCatch(UQ <- h5read(file, '/BD/UQ'), error = function(e) {print(paste("Warning @readMetaData: 'BD/UQ' not found in ",file)); return(c())}));
	CM = as.numeric(tryCatch(CM <- h5read(file, '/BD/CM'), error = function(e) {print(paste("Warning @readMetaData: 'BD/CM' not found in ",file)); return(c())}));
	data.frame(barcode, TN, UM, PP, UQ, CM)
}


#' Export barcode meta data
#' 
#' This function takes a snap object as input and export its barcode and corresponding attributes
#' 
#' @param obj A snap object
#' @param file Output file name
#' @param slot.names Name of slots to be exported c('barcode', 'tsne', 'umap', 'cluster', 'metaData')
#' @importFrom methods slot
#' @importFrom utils write.table
#' @importFrom methods is
#' @examples
#' data(demo.sp);
#' exportMetaData(demo.sp, file="demo.metadata.txt", slot.names=c("barcode", "tsne"));
#' @export
exportMetaData <- function(obj, file, slot.names){
    UseMethod("exportMetaData", obj);	
}

#' @export
exportMetaData.default <- function(obj, file, slot.names=c('barcode', 'cluster', 'tsne', 'umap', 'metaData')){
	subset.names.ref = c('barcode', 'cluster', 'tsne', 'umap', 'metaData');
	
	if(missing(obj)){
		stop("obj is missing")
	}else{
		if(!is(obj, "snap")){
			stop("'obj' is not a snap object")
		}
		if((x=nrow(obj))==0L){
			stop("obj is empty");
		}
	}
	
	if(file.exists(file)){
		stop("file already exists, remove it first")		
	}
	
	if(missing(slot.names)){
		stop("slot.names is missing")
	}

	if(!all(slot.names %in% subset.names.ref)){
		stop("'slot.names' must be subset of c('barcode', 'cluster', 'tsne', 'umap', 'metaData')");		
	}
	
	
	metaData.ls = lapply(as.list(slot.names), function(x){
		if(x == "barcode"){
			y = data.frame(slot(obj, x));			
			colnames(y) = "barcode"
		}else if(x == "tsne"){
			y = data.frame(slot(obj, x));			
			colnames(y) = c("tsne1", "tsne2");
		}else if(x == "umap"){
			y = data.frame(slot(obj, x));			
			colnames(y) = c("umap1", "umap2");
		}else if(x == "cluster"){
			y = data.frame(slot(obj, x));			
			colnames(y) = "cluster"
		}else{
			y = data.frame(slot(obj, x));			
		}
		y
	})
	
	if(!all(sapply(lapply(metaData.ls, nrow), FUN = identical, nrow(metaData.ls[[1]])))){
		stop("slot in subset.names have different length")
	}
	
	metaData.df = do.call(cbind, metaData.ls);

    write.table(metaData.df, file = file, append = FALSE, quote = FALSE, sep = "\t",
                eol = "\n", na = "NA", dec = ".", row.names = FALSE,
                col.names = TRUE, qmethod = c("escape", "double"),
                fileEncoding = "")
}


#' Check correlation of cell-by-bin matrix
#'
#' This function takes one or two snap object as input and calculate 
#' the correlation between cell-by-bin matrix between replicates. If
#' obj2 is NULL, obj1 will be randomly split into two pseudo replicates  
#' and the correlaion between these two pseudo-replicates will be 
#' calcualted and returned. For obj1, the cell-by-bin matrix
#' cannot be empty. This function helps check whether the current 
#' cell-by-bin matrix is sufficient for downstream analysis. If the 
#' pearson correlation is less than 0.95 recommend to use a bigger bin.size. 
#' 
#' @param obj1 A snap object for replicate 1
#' @param obj2 A snap object for replicate 2 [NULL].
#' @examples
#' data(demo.sp);
#' calBmatCor(demo.sp)
#' 
#' @return Return pearson correlation between replicates.
#' @importFrom stats cor
#' @importFrom methods is
#' @export
calBmatCor <- function(obj1, obj2) {
  UseMethod("calBmatCor", obj1);
}

#' @export
calBmatCor.default <- function(obj1, obj2=NULL){
	if(missing(obj1)){
		stop("obj1 is missing.")
	}else{
		if(!is(obj1, "snap")){
			stop("obj1 is not a snap object")
		}
		
		if((x=nrow(obj1@bmat)) == 0){
			stop("cell-by-bin matrix is empty")		
		}
		
		if((x=max(obj1@bmat)) > 1){
			obj1 = makeBinary(obj1, mat="bmat");
		}
	}
	
	if(!is.null(obj2)){
		# check if obj2 is a snap object
		if(!is(obj2, "snap")){
			stop("obj2 is not a snap object")
		}
		
		# check if obj2 has the same features
		if(any(obj1@feature$name != obj2@feature$name)){
			stop("'obj1' and 'obj2' have different features")			
		}
		if(max(obj2@bmat) > 1){
			obj2 = makeBinary(obj2, mat="bmat");									
		}
		cov1 = log(Matrix::colSums(obj1@bmat) + 1, 10);
		cov2 = log(Matrix::colSums(obj2@bmat) + 1, 10);			
	}else{
		ncell = nrow(obj1);
		idx1 = sort(sample(seq(ncell), ncell/2));
		idx2 = setdiff(seq(ncell), idx1);
		cov1 = log(Matrix::colSums(obj1@bmat[idx1,]) + 1, 10);
		cov2 = log(Matrix::colSums(obj1@bmat[idx2,]) + 1, 10);	
	}		
	return(cor(cov1, cov2, method="pearson"));
}

#' Combine snap objects
#'
#' Takes two snap objects and combines them.
#'
#' @param obj1 a snap object
#' @param obj2 a snap object
#' @return a combined snap object
#' @export
snapRbind <- function(obj1, obj2){
	if(!is.snap(obj1)){stop(paste("Error @snapRbind: obj1 is not a snap object!", sep=""))};
	if(!is.snap(obj2)){stop(paste("Error @snapRbind: obj2 is not a snap object!", sep=""))};

	# barcode from obj1 and obj2
	barcode1 = paste(obj1@file, obj1@barcode, sep=".");
	barcode2 = paste(obj2@file, obj2@barcode, sep=".");	
	
	# check barcode name, if there exists duplicate barcode raise error and exist
	if(length(unique(c(barcode1, barcode2))) < length(barcode1) + length(barcode2)){
		stop("Error: @snapRbind: identifcal barcodes found in obj1 and obj2!")
	}
	rm(barcode1, barcode2)
	gc()
	
	# check meta data
	if(nrow(obj1@metaData) > 0 && nrow(obj2@metaData) > 0){
		metaData = rbind(obj1@metaData, obj2@metaData);		
	}else{
		metaData = data.frame();
	}
	
	# check feature
	feature1 = obj1@feature;
	feature2 = obj2@feature;
	if((length(feature1) == 0) != (length(feature2) == 0)){
		stop("different feature found in obj1 and obj2!")
	}else{
		if(length(feature1) > 0){
			if(FALSE %in% (feature1$name == feature2$name)){
				stop("Error: @snapRbind: different feature found in obj1 and obj2!")
			}
			feature = feature1;					
		}else{
			feature = feature1;								
		}
	}
	gc()
	
	# check peak
	peak1 = obj1@peak;
	peak2 = obj2@peak;
	if((length(peak1) == 0) != (length(peak2) == 0)){
		stop("different peak found in obj1 and obj2!")
	}else{
		if(length(peak1) > 0){
			if(FALSE %in% (peak1$name == peak2$name)){
				stop("Error: @snapRbind: different feature found in obj1 and obj2!")
			}
			peak = peak1;					
		}else{
			peak = peak1;								
		}
	}
	rm(peak1, peak2)
	gc()
	
	# check bmat	
	bmat1 = obj1@bmat;
	bmat2 = obj2@bmat;
	if((length(bmat1) == 0) != (length(bmat2) == 0)){
		stop("bmat has different dimentions in obj1 and obj2!")
	}else{
		bmat = Matrix::rBind(bmat1, bmat2);
	}
	rm(bmat1, bmat2)
	gc()
	
	# check gmat	
	gmat1 = obj1@gmat;
	gmat2 = obj2@gmat;
	if((length(gmat1) == 0) != (length(gmat2) == 0)){
		stop("gmat has different dimentions in obj1 and obj2!")
	}else{
		gmat = Matrix::rBind(gmat1, gmat2);
	}
	rm(gmat1, gmat2)
	gc()

	# check pmat	
	pmat1 = obj1@pmat;
	pmat2 = obj2@pmat;
	if((length(pmat1) == 0) != (length(pmat2) == 0)){
		stop("pmat has different dimentions in obj1 and obj2!")
	}else{
		pmat = Matrix::rBind(pmat1, pmat2);
	}
	rm(pmat1, pmat2)
	gc()


	# check gmat	
	dmat1 = obj1@smat@dmat;
	dmat2 = obj2@smat@dmat;
	
	if((length(dmat1) == 0) != (length(dmat2) == 0)){
		stop("dmat has different dimentions in obj1 and obj2!")
	}else{
		dmat = Matrix::rBind(dmat1, dmat2);
	}
	rm(dmat1, dmat2)
	gc()

	res = newSnap();
	res@feature = feature;
	res@barcode = c(obj1@barcode, obj2@barcode);
	res@file = c(obj1@file, obj2@file);
	res@sample = c(obj1@sample, obj2@sample);
	res@metaData = metaData;
	res@bmat = bmat;
	res@pmat = pmat;
	res@peak = peak;
	res@gmat = gmat;
	res@smat@dmat = dmat;
	res@smat@sdev = obj1@smat@sdev;
	return(res)
}

#' Cell filtration
#'
#' This function takes a snap object as input and filter cells based on given cutoffs. 
#' We next identify the high-quality barcode based on the following metrices: 
#' 
#' 1) fragment.num - total number of fragments per barcode;
#' 2) UMI - unique molecular identifier;
#' 3) mito.ratio - mitochondrial ratio;
#' 4) dup.ratio - PCR duplicate ratio;
#' 5) pair.ratio - properly paired ratio;
#' 6) umap.ratio - uniquely mapped ratio;
#' 
#' Note we no longer use reads in peak ratio as a metric for cell selection mainly for two reasons. 
#' Reads-in-peak ration is highly cell type specific. For instance, according to published single 
#' cell ATAC-seq, human fibroblast (BJ) cells have significantly higher reads in peak ratio (40-60%) 
#' versus 20-40% for GM12878 cells. Similarly, in mammalian brain, glia cells overall have very 
#' different reads in peak ratio distribution compared to neuronal cells. We suspect this may reflect 
#' the nucleus size or global chromatin accessibility. Second, pre-defined set of accessibility peaks 
#' are incomplete and biased to the dominant populations. 
#' 
#' @param obj A snap object.
#' @param subset.names Attributes used to filter cells c('fragment.num', 'UMI', 'mito.ratio', 'umap.ratio', 'dup.ratio', 'pair.ratio'). 
#' @param low.thresholds Low cutoffs for the parameters (default is -Inf)
#' @param high.thresholds High cutoffs for the parameters (default is Inf)
#' 
#' @examples
#' data(demo.sp);
#' filterCells(
#'	obj=demo.sp, 
#'	subset.names=c("UMI"), 
#'	low.thresholds=c(10),
#'	high.thresholds=c(Inf)
#'	);
#'
#' @return Returns a snap object containing only the relevant subset of cells
#' 
#' @importFrom methods is
#' @export
filterCells <- function(obj, subset.names, low.thresholds, high.thresholds) {
  UseMethod("filterCells", obj);
}

#' @export
filterCells.default <- function(
	obj, 
	subset.names, 
	low.thresholds, 
	high.thresholds
){
	if(missing(obj)){
		stop("obj is missing");
	}else{
		if(!is(obj, "snap")){
			stop("obj is not a snap object");
		}
		metaData = obj@metaData;		
		if((x=nrow(metaData))==0L){
			stop("metaData is empty")
		}
	}
	
    if (missing(x = low.thresholds)){
      low.thresholds <- replicate(n = length(x = subset.names), expr = -Inf);
    }

    if (missing(x = high.thresholds)) {
      high.thresholds <- replicate(n = length(x = subset.names), expr = Inf);
    }

	if(!all(subset.names %in% c('fragment.num', 'mito.ratio', 'umap.ratio', 'dup.ratio', 'pair.ratio', 'UMI'))){
		stop("'subset.names' must be subset of c('UMI', 'fragment.num', 'mito.ratio', 'umap.ratio', 'dup.ratio', 'pair.ratio')");		
	}

    length.check <- sapply(
      X = list(subset.names, low.thresholds, high.thresholds),
      FUN = length
    )
	
    if (length(x = unique(x = length.check)) != 1L) {
      stop("'subset.names', 'low.thresholds', and 'high.thresholds' must all have the same length");
    }

	data.subsets <- data.frame(name=subset.names, lw=low.thresholds, hg=high.thresholds);
	
	metaData$UMI = metaData$UQ;
	metaData$fragment.num = metaData$TN;
	metaData$mito.ratio = (metaData$CM)/(metaData$UQ);
	metaData$umap.ratio = (metaData$UM)/(metaData$TN);
	metaData$dup.ratio = 1 - (metaData$UQ)/(metaData$PP);
	metaData$pair.ratio = (metaData$PP)/(metaData$UM);
	for(i in seq(nrow(data.subsets))){		
		f = as.character(data.subsets$name[i]);
		names.use <- which(colnames(metaData) %in% f);
		idx = which((metaData[,names.use] >= data.subsets$lw[i]) & (metaData[,names.use] <= data.subsets$hg[i]));
		obj = obj[idx,];
		metaData = metaData[idx,]
	}
	return(obj);
}

extractReadsFromOneCell <- function(
	barcode, 
	file
){
	if(missing(file)){
		stop("file is missing");
	}else{
		if(!isSnapFile(file)){
			stop(paste0(file, " is not a snap file"));			
		}
	}

	# read the reference barcode list
	barcode.list = as.character(tryCatch(barcode.list <- h5read(file, '/BD/name'), error = function(e) {print(paste("Warning @extractReadsFromOneCell: 'BD/name' not found in ",file)); return(vector(mode="character", length=0))}));
	if(missing(barcode)){
		stop("barcode is missing");
	}else{
		if(class(barcode) != "character"){
			stop(paste0("barocde ", barcode, " is not character object"));
		}else{
			if(!(barcode %in% barcode.list)){
				stop(paste0("barocde ", barcode, " does not exist in snape file", file));
			}
		}		
	}
	
	pos.list = as.numeric(tryCatch(pos.list <- h5read(file, "FM/barcodePos"), error = function(e) {print(paste("Warning @readMetaData: 'FM/barcodePos' not found in ",file)); return(c())}));
	len.list = as.numeric(tryCatch(len.list <- h5read(file, "FM/barcodeLen"), error = function(e) {print(paste("Warning @readMetaData: 'FM/barcodeLen' not found in ",file)); return(c())}));
	
	if((x=length(pos.list)) != (y=length(len.list))){
		stop("FM/barcodeLen has different length with FM/barcodePos")
	}
	
	pos = pos.list[match(barcode, barcode.list)];
	len = len.list[match(barcode, barcode.list)];
	idx.arr = seq(pos, pos + len - 1);

	chroms = h5read(file, "FM/fragChrom", index=list(idx.arr))
	starts = h5read(file, "FM/fragStart", index=list(idx.arr))
	lens = h5read(file, "FM/fragLen", index=list(idx.arr))
	frags.gr = GRanges(chroms, 
		IRanges(starts, starts + lens - 1), 
		barcode=rep(barcode, length(chroms)), 
		file=rep(file, length(chroms))
		);
	return(frags.gr)
}

#' Extract Reads By Barcodes
#'
#' This function takes a barcode list and snap file as input and quickly extract reads belonging to the given barcodes. 
#' 
#' @param barcodes A vector contains the selected barcodes.
#' @param files A vector contains the snap file that barcodes belong to.
#' @param do.par A logic variable indicates weather to run this in parallel with multiple processors.
#' @param num.cores Number of processors to use.
#' 
#' @return Returns A GenomicRanges object that contains the reads
#' 
#' @importFrom methods is
#' @importFrom doParallel registerDoParallel
#' @importFrom parallel makeCluster stopCluster detectCores
#' @importFrom foreach foreach %dopar%
#' @importFrom stats lm
#' @importFrom methods slot
#' @export
extractReads <- function(
	barcodes,
	files,
	do.par=TRUE,
	num.cores=1
){
	if(missing(barcodes)){
		stop("barcodes is missing")
	}else{
		if(class(barcodes) != "character"){
			stop("barcodes must be character");
		}
	}

	if(missing(files)){
		stop("files is missing")
	}else{
		if(class(files) != "character"){
			stop("files must be character");
		}
	}
	ncell = length(barcodes);
	nfile = length(files);
	fileList = as.list(unique(files));
	if(ncell != nfile){
		stop("barcodes have different length with barcodes");
	}
	
	# check if snap files exist
	if(any(do.call(c, lapply(fileList, function(x){file.exists(x)})) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){file.exists(x)})) == FALSE)
		print("error: these files does not exist")
		print(fileList[idx])
		stop()
	}
	
	# check if files are all snap files
	if(any(do.call(c, lapply(fileList, function(x){isSnapFile(x)})) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){isSnapFile(x)})) == FALSE)
		print("error: these files are not snap file")
		print(fileList[idx])
		stop()
	}
	
	# check if FM session exist
	if(any(do.call(c, lapply(fileList, function(x){ "FM" %in% h5ls(x, recursive=1)$name  })) == FALSE)){
		idx = which(do.call(c, lapply(fileList, function(x){ "FM" %in% h5ls(x, recursive=1)$name  })) == FALSE)
		print("error: the following nsap files do not contain FM session")
		print(fileList[idx])
		stop()
	}

	if(do.par){
	    # input checking for parallel options
		if(num.cores > 1){
	        if (num.cores == 1) {
	          num.cores = 1
	        } else if (num.cores > detectCores()) {
	          num.cores <- detectCores() - 1
	          warning(paste0("num.cores set greater than number of available cores(", parallel::detectCores(), "). Setting num.cores to ", num.cores, "."))
	        }
	      } else if (num.cores != 1) {
	        num.cores <- 1
		}
	}
	read.ls = mclapply(as.list(seq(barcodes)), function(i){
		extractReadsFromOneCell(
			barcode = barcodes[i], 
			file = files[i]
		)		
	}, mc.cores=num.cores);
	read.gr = Reduce(c, read.ls);
	return(read.gr);
}

#' Feature filtration
#'
#' This function takes a snap obj as input and perform feature selection in the following manner:
#' 1) calculate coverage of each genomic bin/feature;
#' 2) log scale the coverage by log10(count + 1);
#' 3) the log-nromal distribution is then converted into zscore; 
#' 4) bins with zscore beyond [low.threshold, high.threshold] are filtered;
#' 
#' @param obj A snap obj
#' @param low.threshold Low cutoffs for the parameters (default is -2)
#' @param high.threshold High cutoffs for the parameters (default is 2)
#' @param mat Matrix to filter c("bmat", "pmat")
#'
#' @examples
#' data(demo.sp);
#' filterBins(
#'	obj=demo.sp, 
#'	low.threshold=-2,
#'	high.threshold=2
#'	);
#' 
#' @return Returns a snap obj
#' @importFrom stats sd
#' @importFrom methods slot is
#' @export
filterBins <- function(obj, low.threshold, high.threshold, mat) {
  UseMethod("filterBins", obj);
}

#' @export
filterBins.default <- function(obj, low.threshold=-2, high.threshold=2, mat=c("bmat", "pmat")){
	if(missing(obj)){
		stop("obj is missing")
	}else{
		if(!is(obj, "snap")){
			stop("'obj' is not a snap obj")
		};		
	}
	
	mat = match.arg(mat);
	data.use = methods::slot(obj, mat);

	if((x=nrow(data.use)) == 0L){		
		stop("count matrix is empty")
	}

	idy = which(Matrix::colSums(data.use) > 0);
	cov = log(Matrix::colSums(data.use)[idy] + 1, 10);
	zcov = (cov - mean(cov)) / stats::sd(cov);	
	idy2 = which(zcov >= low.threshold & zcov <= high.threshold);
	idy = idy[idy2];
	methods::slot(obj, mat) = data.use[,idy,drop=FALSE];
	if(mat=="bmat"){
		obj@feature = obj@feature[idy];
	}else if(mat=="pmat"){
		obj@peak = obj@peak[idy];		
	}
	return(obj)
}

#' Check a snap-format file
#' 
#' This function takes a file name as input and check if the file is a snap-formated file 
#' @param file A file name.
#' @examples
#' file.name = system.file("extdata", "demo.snap", package = "SnapATAC");
#' isSnapFile(file.name);
#' 
#' @return Return a logical variable indicates whether file is a snap file.
#' @importFrom rhdf5 h5read h5ls
#' 
#' @export
#' 
isSnapFile <- function(file) {
  UseMethod("isSnapFile", file);
}

#' @export
isSnapFile.default <- function(file){
	if(!file.exists(file)){ 
		stop("file does not exist!")
	}

	monals = tryCatch(
		monals <- h5ls(file), 
		error = function(e) {
			return(character())
	})
	
	if(length(monals) != 0){
		magicString = as.character(tryCatch(magicString <- h5read(file, '/HD/MG'), error = function(e) {print(paste("Warning @isSnapFile: 'HD/MG' not found in ", file)); return(character())}))
		if(length(magicString)==0){
			return(FALSE);
		}
		if(magicString == "SNAP" || magicString == "MONA"){
			return(TRUE);
		}
	}
	return(FALSE);
}

readBins <- function(file, bin.size=5000){	
	if(exists('h5closeAll', where='package:rhdf5', mode='function')){
		rhdf5::h5closeAll();		
	}else{
		rhdf5::H5close();
	}
	if(!file.exists(file)){stop(paste("Error @addBmat: ", file, " does not exist!", sep=""))};
	if(!isSnapFile(file)){stop(paste("Error @addBmat: ", file, " is not a snap-format file!", sep=""))};
	if(!(bin.size %in% showBinSizes(file))){stop(paste("Error @addBmat: bin.size ", bin.size, " does not exist in ", file, "\n", sep=""))};
	options(scipen=999);
	binChrom = tryCatch(binChrom <- h5read(file, paste("AM", bin.size, "binChrom", sep="/")), error = function(e) {stop(paste("Warning @readaddBmatSnap: 'AM/bin.size/binChrom' not found in ",file))})
	binStart = tryCatch(binStart <- h5read(file, paste("AM", bin.size, "binStart", sep="/")), error = function(e) {stop(paste("Warning @addBmat: 'AM/bin.size/binStart' not found in ",file))})
	if(bin.size == 0){
		binEnd = tryCatch(binEnd <- h5read(file, paste("AM", bin.size, "binEnd", sep="/")), error = function(e) {stop(paste("Warning @addBmat: 'AM/bin.size/binStart' not found in ",file))})		
	}else{
		binEnd = binStart + as.numeric(bin.size) -1;
	}
	if((length(binChrom) == 0) || (length(binStart) == 0)){stop("Error @addBmat: bin is empty! Does not support empty snap file")}
	if(length(binChrom) != length(binStart)){
		stop(paste("Error @addBmat: ", "binChrom and binStart has different length!", sep=""))
	}else{
		nBin = length(binChrom);
	}
	bins = GRanges(binChrom, IRanges(as.numeric(binStart),binEnd), name=paste(paste(binChrom, binStart, sep=":"), binEnd, sep="-"));				
	rm(binChrom, binStart);
	return(bins)
}

readPeaks <- function(file){	
	if(exists('h5closeAll', where='package:rhdf5', mode='function')){
		rhdf5::h5closeAll();		
	}else{
		rhdf5::H5close();
	}
	
	if(!file.exists(file)){stop(paste("Error @addBmat: ", file, " does not exist!", sep=""))};
	if(!isSnapFile(file)){stop(paste("Error @addBmat: ", file, " is not a snap-format file!", sep=""))};
	options(scipen=999);
    binChrom = tryCatch(binChrom <- h5read(file, "PM/peakChrom"), error = function(e) {stop(paste("Warning @addPmat: 'PM/peakChrom' not found in ",file))})
    binStart = tryCatch(binStart <- h5read(file, "PM/peakStart"), error = function(e) {stop(paste("Warning @addPmat: 'PM/peakStart' not found in ",file))})
    binEnd = tryCatch(binEnd <- h5read(file, "PM/peakEnd"), error = function(e) {stop(paste("Warning @addPmat: 'PM/peakEnd' not found in ",file))})

    if((length(binChrom) == 0) || (length(binStart) == 0)){stop("Error @readSnap: bin is empty! Does not support empty snap file")}
    if(length(binChrom) != length(binStart)){
            stop(paste("Error @addPmat: ", "binChrom and binStart has different length!", sep=""))
    }else{
            nBin = length(binChrom);
    }

    bins = GRanges(binChrom, IRanges(as.numeric(binStart),binEnd), name=paste(paste(binChrom, binStart, sep=":"), binEnd, sep="-"));
    rm(binChrom, binStart);
	return(bins)
}

#' @importFrom methods is
#' @import Matrix
addBmatToSnapSingle <- function(obj, file, bin.size=5000){	
	# close the previously opened H5 file
	if(exists('h5closeAll', where='package:rhdf5', mode='function')){
		rhdf5::h5closeAll();		
	}else{
		rhdf5::H5close();
	}
	
	if(missing(obj)){
		stop("obj is missing")
	}else{
		if(!is(obj, "snap")){
			stop("obj is not a snap object")
		}
	}

	if(!file.exists(file)){stop(paste("Error @addBmat: ", file, " does not exist!", sep=""))};
	if(!isSnapFile(file)){stop(paste("Error @addBmat: ", file, " is not a snap-format file!", sep=""))};
	if(!(bin.size %in% showBinSizes(file))){stop(paste("Error @addBmat: bin.size ", bin.size, " does not exist in ", file, "\n", sep=""))};
	obj@bmat = Matrix(0,0,0);
	############################################################################
	barcode = as.character(tryCatch(barcode <- h5read(file, '/BD/name'), error = function(e) {print(paste("Warning @addBmat: 'BD/name' not found in ",file)); return(vector(mode="character", length=0))}));	

	bin.sizeList = showBinSizes(file);
	if(length(bin.sizeList) == 0){stop("Error @addBmat: bin.sizeList is empty! Does not support reading empty snap file")}
	if(!(bin.size %in% bin.sizeList)){stop(paste("Error @addBmat: ", bin.size, " does not exist in bin.sizeList, valid bin.size includes ", toString(bin.sizeList), "\n", sep=""))}
	
	bins = readBins(file, bin.size);
	obj@feature = bins;
	idx = as.numeric(tryCatch(idx <- h5read(file, paste("AM", bin.size, "idx", sep="/")), error = function(e) {stop(paste("Warning @addBmat: 'AM/bin.size/idx' not found in ",file))}));
	idy = as.numeric(tryCatch(idy <- h5read(file, paste("AM", bin.size, "idy", sep="/")), error = function(e) {stop(paste("Warning @addBmat: 'AM/bin.size/idy' not found in ",file))}));
	count = as.numeric(tryCatch(count <- h5read(file, paste("AM", bin.size, "count", sep="/")), error = function(e) {stop(paste("Warning @addBmat: 'AM/bin.size/count' not found in ",file))}));	

	if(!all(sapply(list(length(idx),length(idy),length(count)), function(x) x == length(count)))){stop("Error: idx, idy and count has different length in the snap file")}	
	nBarcode = length(barcode);
	nBin = length(obj@feature);
	M = sparseMatrix(i=idx, j =idy, x=count, dims=c(nBarcode, nBin));
	rownames(M) = barcode;
	obj@bmat = M[match(obj@barcode, rownames(M)),]
	rm(idx, idy, count, M);
	if(exists('h5closeAll', where='package:rhdf5', mode='function')){
		rhdf5::h5closeAll();		
	}else{
		rhdf5::H5close();
	}
	gc();
	return(obj);
}

#' @importFrom methods is
addGmatToSnapSingle <- function(obj, file){
        # close the previously opened H5 file
		if(exists('h5closeAll', where='package:rhdf5', mode='function')){
			rhdf5::h5closeAll();		
		}else{
			rhdf5::H5close();
		}
        # check the input
        if(!is(obj, "snap")){stop(paste("Error @addGmat: ", file, " does not exist!", sep=""))}
        if(!file.exists(file)){stop(paste("Error @addGmat: ", file, " does not exist!", sep=""))};
        if(!isSnapFile(file)){stop(paste("Error @addGmat: ", file, " is not a snap-format file!", sep=""))};

        ############################################################################
		barcode = as.character(tryCatch(barcode <- h5read(file, '/BD/name'), error = function(e) {print(paste("Warning @addBmat: 'BD/name' not found in ",file)); return(vector(mode="character", length=0))}));
        options(scipen=999);
        geneName = tryCatch(geneName <- h5read(file, "GM/name"), error = function(e) {stop(paste("Warning @addGmat: 'GM/name' not found in ",file))})
        if(length(geneName) == 0){
			stop("Error @addGmat: GM is empty")
		}

        idx = as.numeric(tryCatch(idx <- h5read(file, "GM/idx"), error = function(e) {stop(paste("Warning @addGmat: 'GM/idx' not found in ",file))}))
        idy = as.numeric(tryCatch(idy <- h5read(file, "GM/idy"), error = function(e) {stop(paste("Warning @addGmat: 'GM/idy' not found in ",file))}))
        count = as.numeric(tryCatch(count <- h5read(file, "GM/count"), error = function(e) {stop(paste("Warning @addGmat: 'GM/count' not found in ",file))}))

        if(!all(sapply(list(length(idx),length(idy),length(count)), function(x) x == length(count)))){stop("Error: idx, idy and count has different length in the snap GM session")}		
		nBarcode = length(barcode);
		nGene = length(geneName);
		M = sparseMatrix(i=idx, j =idy, x=count, dims=c(nBarcode, nGene));
		rownames(M) = barcode;
		obj@gmat = 	M[match(obj@barcode, rownames(M)),]
		colnames(obj@gmat) = geneName;
		rm(idx, idy, count, M);

		if(exists('h5closeAll', where='package:rhdf5', mode='function')){
			rhdf5::h5closeAll();		
		}else{
			rhdf5::H5close();
		}
		gc();
		return(obj);
}

#' @importFrom methods is
addPmatToSnapSingle <- function(obj, file){
        # close the previously opened H5 file
		if(exists('h5closeAll', where='package:rhdf5', mode='function')){
			rhdf5::h5closeAll();		
		}else{
			rhdf5::H5close();
		}
        # check the input
        if(!is(obj, "snap")){stop(paste("Error @addPmat: ", file, " does not exist!", sep=""))}
        if(!file.exists(file)){stop(paste("Error @addPmat: ", file, " does not exist!", sep=""))};
        if(!isSnapFile(file)){stop(paste("Error @addPmat: ", file, " is not a snap-format file!", sep=""))};

        ############################################################################
		barcode = as.character(tryCatch(barcode <- h5read(file, '/BD/name'), error = function(e) {print(paste("Warning @addBmat: 'BD/name' not found in ",file)); return(vector(mode="character", length=0))}));
        options(scipen=999);
        binChrom = tryCatch(binChrom <- h5read(file, "PM/peakChrom"), error = function(e) {stop(paste("Warning @addPmat: 'PM/peakChrom' not found in ",file))})
        binStart = tryCatch(binStart <- h5read(file, "PM/peakStart"), error = function(e) {stop(paste("Warning @addPmat: 'PM/peakStart' not found in ",file))})
        binEnd = tryCatch(binEnd <- h5read(file, "PM/peakEnd"), error = function(e) {stop(paste("Warning @addPmat: 'PM/peakEnd' not found in ",file))})

        if((length(binChrom) == 0) || (length(binStart) == 0)){stop("Error @readSnap: bin is empty! Does not support empty snap file")}
        if(length(binChrom) != length(binStart)){
                stop(paste("Error @addPmat: ", "binChrom and binStart has different length!", sep=""))
        }else{
                nBin = length(binChrom);
        }

        bins = GRanges(binChrom, IRanges(as.numeric(binStart),binEnd), name=paste(paste(binChrom, binStart, sep=":"), binEnd, sep="-"));
        rm(binChrom, binStart);
        obj@peak = bins;

        idx = as.numeric(tryCatch(idx <- h5read(file, "PM/idx"), error = function(e) {stop(paste("Warning @readSnap: 'PM/idx' not found in ",file))}))
        idy = as.numeric(tryCatch(idy <- h5read(file, "PM/idy"), error = function(e) {stop(paste("Warning @readSnap: 'PM/idy' not found in ",file))}))
        count = as.numeric(tryCatch(count <- h5read(file, "PM/count"), error = function(e) {stop(paste("Warning @readSnap: 'PM/count' not found in ",file))}))

        if(!all(sapply(list(length(idx),length(idy),length(count)), function(x) x == length(count)))){stop("Error: idx, idy and count has different length in the snap file")}
		nBarcode = length(barcode);
		nPeak = length(obj@peak);
		M = sparseMatrix(i=idx, j =idy, x=count, dims=c(nBarcode, nPeak));
		rownames(M) = barcode;
		obj@pmat = 	M[match(obj@barcode, rownames(M)),]
		rm(idx, idy, count, M);
		if(exists('h5closeAll', where='package:rhdf5', mode='function')){
			rhdf5::h5closeAll();		
		}else{
			rhdf5::H5close();
		}
		
		gc();
		return(obj);
}

#' @importFrom methods is
createSnapSingle <- function(file, sample, metaData=TRUE, description=NULL){	
	# close the previously opened H5 file
	if(exists('h5closeAll', where='package:rhdf5', mode='function')){
		rhdf5::h5closeAll();		
	}else{
		rhdf5::H5close();
	}
	# check the input
	if(!file.exists(file)){stop(paste(file, " does not exist!", sep=""))};
	if(!isSnapFile(file)){stop(paste(file, " is not a snap-format file!", sep=""))};
	if(!(is.logical(metaData))){stop(paste("metaData is not a logical variable!", sep=""))};
	
	if(!(is.null(description))){
		if(!is(description, "character")){
			stop("description must be character object")
		}
	}else{
		description=character()
	}
	
	# create an empty snap object
	res = newSnap();
	############################################################################
	barcode = as.character(tryCatch(barcode <- h5read(file, '/BD/name'), error = function(e) {print(paste("Warning @readSnap: 'BD/name' not found in ",file)); return(vector(mode="character", length=0))}));	
	if(metaData){
		metaData = readMetaData(file);
		if(any((metaData$barcode == barcode) == FALSE)){stop(paste("Error @readSnap: meta data does not match with barcode name!", sep=""))};
		res@metaData = metaData;
	}else{
		metaData = data.frame(barcode=barcode, TN=0, UM=0, PP=0, UQ=0, CM=0);
		res@metaData = metaData;
	}
	nBarcode = length(barcode);
	if((x=nBarcode) == 0L){
		stop("barcode is empty! Does not support reading an empty snap file")
	}
	res@barcode = barcode;
	res@des = description;
	res@sample = rep(sample, length(barcode));
	res@file = rep(normalizePath(file), length(res@barcode));
	if(exists('h5closeAll', where='package:rhdf5', mode='function')){
		rhdf5::h5closeAll();		
	}else{
		rhdf5::H5close();
	}
	gc();
	return(res);	
}
r3fang/SnapATAC documentation built on March 29, 2022, 4:33 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
r3fang/SnapATAC
Single Nucleus Analysis Package for ATAC-Seq

R/snap-utilities.R
In r3fang/SnapATAC: Single Nucleus Analysis Package for ATAC-Seq

Documented in addBmatToSnap addGmatToSnap addPmatToSnap barcodeInSnapFile calBmatCor createSnap createSnapFromBmat createSnapFromGmat createSnapFromPmat exportMetaData extractReads filterBins filterCells isSnapFile readMetaData rmBmatFromSnap rmGmatFromSnap rmPmatFromSnap showBinSizes snapRbind

R Package Documentation

Browse R Packages

We want your feedback!

r3fang/SnapATAC Single Nucleus Analysis Package for ATAC-Seq

R/snap-utilities.R In r3fang/SnapATAC: Single Nucleus Analysis Package for ATAC-Seq

Documented in addBmatToSnap addGmatToSnap addPmatToSnap barcodeInSnapFile calBmatCor createSnap createSnapFromBmat createSnapFromGmat createSnapFromPmat exportMetaData extractReads filterBins filterCells isSnapFile readMetaData rmBmatFromSnap rmGmatFromSnap rmPmatFromSnap showBinSizes snapRbind

R Package Documentation

Browse R Packages

We want your feedback!

r3fang/SnapATAC
Single Nucleus Analysis Package for ATAC-Seq

R/snap-utilities.R
In r3fang/SnapATAC: Single Nucleus Analysis Package for ATAC-Seq