Nothing
#' Get the biotype of the non-coding genes. It is suitable for the
#' GENCODE gtf files
#'
#' @param gtfFile Path of the input gtf file which contains biotype
#' information. The gtf file must be provided from the Ensembl
#' or Gencode site. For space efficiency, gft files should be
#' in a zip format.
#'
#' @return Tabular form of the gtf file with the required features
#' such as gene id and biotypes
#'
#' @import readr
#'
#' @examples
#' fileImport<-system.file("extdata", "temp.gtf", package = "NoRCE")
#' gtf <- extractBiotype(gtfFile = fileImport)
#'
#' @export
#'
extractBiotype <- function(gtfFile) {
mydata <- read_table(gtfFile, comment = '#', col_names = FALSE)
temp <- strsplit(as.character(mydata$X1), "[;\t]+")
#r<-lapply(temp,function(x) grepl("type|transcript_id|gene_id",x))
r <- lapply(temp, function(x)
grepl("gene_type|gene_id", x))
r <- lapply(r, as.logical)
# a <- list()
# for (i in seq_along(r)) {
# tr <- which(r[[i]] == 'TRUE')
# a[[i]] <- temp[[i]][tr]
# }
#
# mat <- t(lapply(a,
# function(x, m)
# c(x, rep(NA, m - length(
# x
# ))),
# max(rapply(a, length))))
# mat<-matrix(unlist(lapply(mat, `[[`, 1)))
a <- data.frame()
for (i in seq_along(r)) {
tr <- which(r[[i]] == 'TRUE')
a[i, seq_len(2)] <- temp[[i]][tr]
}
#gtf <- gsub("^.* ", "", mat, perl = TRUE)
gtf <- apply(a, 2, function(x)
(gsub("^.* ", "", x)))
gtf <- as.data.frame(gsub("\"", "", gtf))
return(gtf)
}
#' Extract the genes that have user provided biotypes. This method is useful
#' when input gene list is mixed or when research of the interest is only
#' focused on specific group of genes.
#'
#' @param gtfFile Input gtf file for the genes provided by the extractBiotype
#' function
#' @param biotypes Selected biotypes for the genes
#'
#' @return Table format of genes with a given biotypes
#'
#' @examples
#' biotypes <- c('unprocessed_pseudogene','transcribed_unprocessed_pseudogene')
#' fileImport<-system.file("extdata", "temp.gtf", package = "NoRCE")
#' extrResult <- filterBiotype(fileImport, biotypes)
#'
#' @export
filterBiotype <- function(gtfFile, biotypes) {
gtf <- extractBiotype(gtfFile = gtfFile)
all <- data.frame(gene = character(), stringsAsFactors = FALSE)
for (i in seq_along(biotypes)) {
index <- which(gtf == biotypes[i], arr.ind = TRUE)
all <- rbind(all, as.data.frame(gtf[index[, 1], 1]))
}
all <-
as.data.frame(
unlist(apply(unique(all), 2, strsplit, '[.]'))[c(TRUE, FALSE)])
colnames(all) <- 'gene'
return(all)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.