Nothing
## -----------------------------------------------------------------------------
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("cmapR")
## -----------------------------------------------------------------------------
library(cmapR)
## -----------------------------------------------------------------------------
ds
## -----------------------------------------------------------------------------
# access the data matrix
m <- mat(ds)
# access the row and column metadata
rdesc <- meta(ds, dimension = "row")
cdesc <- meta(ds, dimension = "column")
# access the row and column ids
rid <- ids(ds, dimension = "row")
cid <- ids(ds, dimension = "column")
## -----------------------------------------------------------------------------
# update the matrix data to set some values to zero
# note that the updated matrix must be the of the same dimensions as
# the current matrix
m[1:10, 1:10] <- 0
mat(ds) <- m
# replace row and column metadata
meta(ds, dimension = "row") <- data.frame(x=sample(letters, nrow(m),
replace=TRUE))
meta(ds, dimension = "column") <- data.frame(x=sample(letters, ncol(m),
replace=TRUE))
# replace row and column ids
ids(ds, dimension = "row") <- as.character(seq_len(nrow(m)))
ids(ds, dimension = "column") <- as.character(seq_len(ncol(m)))
# and let's look at the modified object
ds
## -----------------------------------------------------------------------------
# create a variable to store the path to the GCTX file
# here we'll use a file that's internal to the cmapR package, but
# in practice this could be any valid path to a GCT or GCTX file
ds_path <- system.file("extdata", "modzs_n25x50.gctx", package="cmapR")
my_ds <- parse_gctx(ds_path)
## -----------------------------------------------------------------------------
my_ds
## -----------------------------------------------------------------------------
# read just the first 10 columns, using numeric indices
(my_ds_10_columns <- parse_gctx(ds_path, cid=1:10))
## -----------------------------------------------------------------------------
# read the column metadata
col_meta <- read_gctx_meta(ds_path, dim="col")
# figure out which signatures correspond to vorinostat by searching the 'pert_iname' column
idx <- which(col_meta$pert_iname=="vemurafenib")
# read only those columns from the GCTX file by using the 'cid' parameter
vemurafenib_ds <- parse_gctx(ds_path, cid=idx)
## -----------------------------------------------------------------------------
# get a vector of character ids, using the id column in col_meta
col_ids <- col_meta$id[idx]
vemurafenib_ds2 <- parse_gctx(ds_path, cid=col_ids)
identical(vemurafenib_ds, vemurafenib_ds2)
## -----------------------------------------------------------------------------
# initialize a matrix object
# note that you either must assign values to the rownames and colnames
# of the matrix, or pass them,
# as the 'rid' and 'cid' arguments to GCT"
m <- matrix(stats::rnorm(100), ncol=10)
rownames(m) <- letters[1:10]
colnames(m) <- LETTERS[1:10]
(my_new_ds <- new("GCT", mat=m))
## -----------------------------------------------------------------------------
# we can also include the row/column annotations as data.frames
# note these are just arbitrary annotations used to illustrate the function call
rdesc <- data.frame(id=letters[1:10], type=rep(c(1, 2), each=5))
cdesc <- data.frame(id=LETTERS[1:10], type=rep(c(3, 4), each=5))
(my_new_ds <- new("GCT", mat=m, rdesc=rdesc, cdesc=cdesc))
## -----------------------------------------------------------------------------
# we'll use the matrix_only argument to extract just the matrix
(my_ds_no_meta <- parse_gctx(ds_path, matrix_only = TRUE))
## -----------------------------------------------------------------------------
# note we need to specifiy which dimension to annotate (dim)
# and which column in the annotations corresponds to the column
# ids in the matrix (keyfield)
(my_ds_no_meta <- annotate_gct(my_ds_no_meta, col_meta, dim="col",
keyfield="id"))
## -----------------------------------------------------------------------------
# in memory slice using the cid parameter
vemurafenib_ds3 <- subset_gct(my_ds,
cid=which(col_meta$pert_iname=="vemurafenib"))
identical(vemurafenib_ds, vemurafenib_ds3)
## -----------------------------------------------------------------------------
# melt to long form
vemurafenib_ds3_melted <- melt_gct(vemurafenib_ds3)
## -----------------------------------------------------------------------------
# plot the matrix values grouped by gene
library(ggplot2)
ggplot(vemurafenib_ds3_melted, aes(x=pr_gene_symbol, y=value)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle=45, hjust=1, vjust=1))
## -----------------------------------------------------------------------------
# extract the data matrix from the my_ds object
m <- mat(my_ds)
## -----------------------------------------------------------------------------
# compute the row and column means
row_means <- rowMeans(m)
col_means <- colMeans(m)
message("means:")
head(row_means)
head(col_means)
# using 'apply', compute the max of each row and column
row_max <- apply(m, 1, max)
col_max <- apply(m, 2, max)
message("maxes:")
head(row_max)
head(col_max)
## -----------------------------------------------------------------------------
# transposing a GCT object - also swaps row and column annotations
(my_ds_transpose <- transpose_gct(my_ds))
# converting a GCT object's matrix to ranks
# the 'dim' option controls the direction along which the ranks are calculated
my_ds_rank_by_column <- rank_gct(my_ds, dim="col")
# plot z-score vs rank for the first 25 genes (rows)
ranked_m <- mat(my_ds_rank_by_column)
plot(ranked_m[1:25, ],
m[1:25, ],
xlab="rank",
ylab="differential expression score",
main="score vs. rank")
## -----------------------------------------------------------------------------
# write 'my_ds' in both GCT and GCTX format
write_gct(my_ds, "my_ds")
write_gctx(my_ds, "my_ds")
# write.gctx can also compress the dataset upon write,
# which can be controlled using the 'compression_level' option.
# the higher the value, the greater the compression, but the
# longer the read/write time
write_gctx(my_ds, "my_ds_compressed", compression_level = 9)
## -----------------------------------------------------------------------------
# ds is an object of class GCT
(se <- as(ds, "SummarizedExperiment"))
## -----------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.