knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>",
    eval = FALSE
)
library(brainstorm)
library(VariantAnnotation)
library(here)
library(dplyr)
library(ggplot2)
library(purrr)
library(pheatmap)

Prep Data

Prep DNA data

snpsGeno <- make_snpsGeno(snpsGeno_VCF)
snpsGeno[1:5, 1:5]

Prep RNA data

Filter "snps snpsCalled_VCF" data

# Make sure colnames are all IDs in pd table
all(colnames(snpsCalled_VCF)  %in% pd_example$SAMPLE_ID)
colnames(snpsCalled_VCF) <- ss(colnames(snpsCalled_VCF),"_accepted")
all(colnames(snpsCalled_VCF)  %in% pd_example$SAMPLE_ID)

dim(snpsCalled_VCF)
snpsCalled_filter <- filter_called(snpsCalled_VCF)
dim(snpsCalled_filter)

Create snpsGeno2 and snpssnpsCalled_VCF

snpsRNA <- make_snpsRNA(snpsGeno_VCF, snpsCalled_filter)
print("snpsGeno2: Matching Genotype snps")
snpsRNA$snpsGeno2[1:5, 1:5]
print("snpsCalled: snpsCalled_VCF RNA snps")
snpsRNA$snpsCalled[1:5, 1:5]

Build Correlation Tables

DNA vs. DNA

No high correlation between samples.

basic_cor <- cor(snpsGeno, use = "pairwise.comp")

pheatmap(
    basic_cor,
    cluster_rows = FALSE,
    show_rownames = FALSE,
    cluster_cols = FALSE,
    show_colnames = FALSE
)
all(colnames(snpsGeno) %in% brain_sentrix$ID)

corLong_dna_dna <- make_corLong(snpsGeno, BrainTable1 = brain_sentrix)
head(corLong_dna_dna)

But samples from the same brain have low correlation.

corLong_dna_dna %>%
    filter(row_BrNum == col_BrNum) 

RNA vs. RNA

basic_cor <- cor(snpsRNA$snpsCalled, use = "pairwise.comp")

pheatmap(
    basic_cor,
    cluster_rows = FALSE,
    show_rownames = FALSE,
    cluster_cols = FALSE,
    show_colnames = FALSE
)
pd_simple <- pd_example[,c("SAMPLE_ID", "RNum", "BrNum", "BrainRegion")]

corLong_rna_rna <- make_corLong(
    snps1 = snpsRNA$snpsCalled,
    BrainTable1 = pd_simple,
    ID_col1 = "SAMPLE_ID"
)
head(corLong_rna_rna)
corLong_rna_rna %>%
    # filter(row_BrNum == col_BrNum) %>%
    ggplot(aes(x = cor)) +
    geom_density() +
    geom_vline(xintercept = 0.59, color = "red", linetype = "dashed")

DNA vs. RNA

corLong_dna_rna <- make_corLong(
    snps1 = snpsRNA$snpsGeno2,
    snps2 = snpsRNA$snpsCalled,
    BrainTable1 = brain_sentrix,
    BrainTable2 = pd_simple,
    ID_col1 = "ID",
    ID_col2 = "SAMPLE_ID"
)

head(corLong_dna_rna)
corLong_dna_rna %>%
    filter(row_BrNum == col_BrNum) %>%
    ggplot(aes(x = cor)) +
    geom_density() +
    geom_vline(xintercept = 0.59, color = "red", linetype = "dashed")

Run Grouper

dna_dna_groups <- grouper(corLong_dna_dna)
length(dna_dna_groups)
table(unlist(purrr::map_int(dna_dna_groups, "nBrNum")))

Find problem groups & samples

dna_multi_br <- keep(dna_dna_groups, ~ .x$nBrNum > 1)
length(dna_multi_br)
rna_rna_groups <- grouper(corLong_rna_rna)
length(rna_rna_groups)

message("How many samples in each group?")
table(purrr::map_int(rna_rna_groups, "n"))

message("How many Brains in each group?")
table(purrr::map_int(rna_rna_groups, "nBrNum"))
dna_rna_groups <- grouper(corLong_dna_rna)
message("How many groups?")
length(dna_rna_groups)

message("How many samples in each group?")
table(purrr::map_int(dna_rna_groups, "n"))

message("How many Brains in each group?")
table(purrr::map_int(dna_rna_groups, "nBrNum"))
multi_samples <- keep(rna_rna_groups, ~ .x$n > 1)

Session Info

sessioninfo::session_info()


LieberInstitute/brainstorm_package documentation built on Dec. 14, 2024, 10:29 p.m.