Nothing
chromosomeVis <- function(
sample,
sv_sample,
dbSNP_file,
Exac_file,
chromosomes,
pngWidth = 1600,
pngHeight = 1200,
caller = "speedseq",
MA_Window = 1000,
coding_regions_file = system.file("extdata", "nexterarapidcapture_exome_targetedregions_v1.2_chr19_9-10.bed",
package = "RareVariantVis"),
annotation_file = system.file("extdata", "UCSC_hg19_chr9_9-10_refSeq_160702.txt",
package = "RareVariantVis"),
uniprot_file = system.file("extdata", "uniprot-all_chr19_9-10.txt",
package = "RareVariantVis")
) {
sampleName = unlist(strsplit(basename(sample), '[.]'))[1]
if (missing(dbSNP_file)) {
dbSNP_file = system.file("extdata", "All_20160601_chr19_9-10.vcf.recode.vcf.gz", package =
"RareVariantVis")
}
if (missing(Exac_file)) {
Exac_file = system.file("extdata", "ExAC.r0.3.1.sites.vep_chr19_9-10.vcf.recode.vcf.gz", package =
"RareVariantVis")
}
###############################################
# Part 0 - prepare adnnotations
###############################################
# Centromeres for hg19
url = "http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/cytoBand.txt.gz"
cytobands = read.delim(
text=readLines(gzcon(url(url))),
header=FALSE, sep="\t", stringsAsFactors = FALSE
)
cytobands_acen = cytobands[cytobands$V5 == "acen",]
cytobands_chr = cytobands_acen[1:dim(cytobands_acen)[1] %% 2 == 1,]
cytobands_start = cytobands_acen[1:dim(cytobands_acen)[1] %% 2 == 1,]$V2
cytobands_end = cytobands_acen[1:dim(cytobands_acen)[1] %% 2 == 0,]$V3
cytobands_summary = cbind.data.frame(as.character(cytobands_chr$V1),
cytobands_start, cytobands_end, stringsAsFactors = FALSE)
colnames(cytobands_summary) = c("Chrom", "Start", "Stop")
url = "http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz"
chromosome_lengths = read.delim(
text=readLines(gzcon(url(url))),
header=FALSE, sep="\t", stringsAsFactors = FALSE
)
chr_lengths = cbind(chromosome_lengths[1:24,1:2],
ceiling(chromosome_lengths[1:24,2] / 10000000) * 10000000)
colnames(chr_lengths) = c("Chrom", "ChromLen", "WinSize")
temp_centromeres = merge(cytobands_summary, chr_lengths, by = "Chrom")
order_rows_centromeres = match(c(1:22, "X", "Y"), substr(temp_centromeres$Chrom, 4,
nchar(as.character(temp_centromeres$Chrom))))
centromeres = temp_centromeres[order_rows_centromeres,]
# End of centromeres preparation
# Annotation files
# centromeres_file = system.file("extdata", "CentromeresHg19.txt", package =
# "RareVariantVis")
# coding_regions_file = system.file("extdata",
# "nexterarapidcapture_exome_targetedregions_v1.2_chr19_9-10.bed",
# package = "RareVariantVis")
# annotation_file = system.file("extdata", "UCSC_hg19_chr9_9-10_refSeq_160702.txt", package =
# "RareVariantVis")
# uniprot_file = system.file("extdata", "uniprot-all_chr19_9-10.txt", package =
# "RareVariantVis")
# load annotations
# centromeres = read.delim(centromeres_file,
# header = TRUE,
# stringsAsFactors = FALSE)
anno_whole = read.delim(annotation_file,
header = TRUE,
stringsAsFactors = FALSE)
uniprot = read.delim(uniprot_file,
header = TRUE,
stringsAsFactors = FALSE)
exome = read.delim(coding_regions_file,
header = FALSE,
stringsAsFactors = FALSE)
# create table with empty row (to make binding work properly)
final_table = data.frame(
chromosome = "",
final_positions = NA,
variant_type = "",
ref_allele = "",
alt_allele = "",
final_variations = NA,
conservation = NA,
gene_name = "",
in_exon = NA,
stringsAsFactors = FALSE
)
final_table = cbind(final_table, uniprot[1, ])
sv_table = data.frame(
chromosome = "",
start_position = NA,
ID = "",
REF = "",
ALT = "",
QUAL = NA,
SVTYPE = "",
GT = "",
end_position = NA,
gene_name = "",
in_exon = NA,
stringsAsFactors = FALSE
)
sv_table = cbind(sv_table, uniprot[1, ])
complex_table = data.frame(
chromosome = "",
start_position = NA,
gene_name = "",
in_exon = NA,
stringsAsFactors = FALSE
)
complex_table = cbind(complex_table, uniprot[1, ])
# begin chromosome loop
for (chromosome in chromosomes) {
# determine chromosome index
if (chromosome == "X") {
chromosomeIndex = 23
} else if (chromosome == "Y") {
chromosomeIndex = 24
} else {
chromosomeIndex = as.integer(chromosome)
}
###############################################
# Part 1 - read 1 chromosome from a genome
###############################################
param = ScanVcfParam(which = GRanges(chromosome, IRanges(1, 250000000)))
vcf = readVcf(sample, genome = "hg19", param = param)
if (caller == "speedseq"){
firstADs = sapply(geno(vcf)[["AO"]], function(m) m[1])
secondADs = sapply(geno(vcf)[["RO"]], function(m) m[1])
positions = start(rowRanges(vcf))
variation = firstADs / (firstADs + secondADs)
} else {
firstADs = sapply( geno(vcf)[["AD"]], function(m) m[1] )
secondADs = sapply( geno(vcf)[["AD"]], function(n) n[2] )
positions = start(rowRanges(vcf))
variation = secondADs / (firstADs + secondADs)
}
###############################################
# Part 2 - Select coding variants
###############################################
# check for proper chromosome
chr_select = which(exome[, 1] == paste0("chr", chromosome))
# binary interval search
lo = exome$V2[chr_select]
hi = exome$V3[chr_select]
# find overlapping ranges
overlapping = vector(mode = "numeric", length = 0)
if (length(lo) >= 2) {
for (i in 2:length(lo)) {
if (hi[i - 1] >= lo[i]) {
overlapping = c(overlapping, i)
}
}
}
# remove boundaries corresponding to overlapping ranges
if (length(overlapping) > 0) {
lo = lo[-overlapping] - 0.1 # 0.1 is to handle properly boundary values
hi = hi[-(overlapping - 1)] + 0.1
}
coding_ranges = c(rbind(lo, hi))
# interleaved vector of low and high exon borders
coding_sel = findInterval(positions, coding_ranges)
# odd indices correspond to positions inside intervals
coding_sel = c(0, which(coding_sel %% 2 == 1))
coding_pos = positions[coding_sel]
coding_var = variation[coding_sel]
###############################################
# Part 3 - Select rare dbSNP variants
###############################################
# beware of strange CAF format!
param2 = ScanVcfParam(which = GRanges(
chromosome,
IRanges(start = coding_pos, end = coding_pos)
))
vcf2 = readVcf(dbSNP_file, genome = "hg19", param = param2)
# binary interval search
lo_vcf2 = start(rowRanges(vcf2))
hi_vcf2 = end(rowRanges(vcf2))
# find overlapping ranges to identify complex variants
overlapping_vcf2 = vector(mode = "numeric", length = 0)
if (length(lo_vcf2) >= 2) {
for (i in 2:length(lo_vcf2)) {
if (hi_vcf2[i - 1] >= lo_vcf2[i]) {
overlapping_vcf2 = c(overlapping_vcf2, i)
}
}
}
# indexes of starting records for new coding positions
first_codpos_ind = setdiff(1:length(lo_vcf2), overlapping_vcf2)
# simple and complex variants in dbSNP
prev_present_ind = first_codpos_ind - 1
simple_variants_dbsnp_ind = intersect(first_codpos_ind, prev_present_ind)
# positions of simple and complex variants
simple_variant_positions = lo_vcf2[simple_variants_dbsnp_ind]
complex_variant_positions = coding_pos[which(is.na(match(
coding_pos, simple_variant_positions
)))]
# handle allelic frequencies
CAF_array = matrix(NA,
nrow = length(simple_variant_positions),
ncol = 30)
rareDBsnp = rep(TRUE, length(simple_variant_positions))
# check if we found anything
if (length(simple_variant_positions) > 0) {
for (i in 1:length(simple_variant_positions)) {
currentCAF = unlist(info(vcf2)[["CAF"]][simple_variants_dbsnp_ind][i])
if (length(currentCAF) > 0) {
CAF_array[i, 1:length(currentCAF)] = currentCAF
}
if (any(currentCAF == ".")) {
rareDBsnp[i] = TRUE
} else {
rareDBsnp[i] = !all(as.numeric(currentCAF) > 0.01)
}
}
rareDBsnp_with_complex_pos = sort(c(
simple_variant_positions[rareDBsnp],
complex_variant_positions
))
rareDBsnp_with_complex_var = variation[match(rareDBsnp_with_complex_pos, positions)]
###############################################
# Part 4 - Select rare Exac variants
###############################################
# beware of strange AF format!
param3 = ScanVcfParam(which = GRanges(
chromosome,
IRanges(start = rareDBsnp_with_complex_pos, end = rareDBsnp_with_complex_pos)
))
vcf3 = readVcf(Exac_file, genome = "hg19", param = param3)
# binary interval search
lo_vcf3 = start(rowRanges(vcf3))
hi_vcf3 = end(rowRanges(vcf3))
# find overlapping ranges to identify complex variants
overlapping_vcf3 = vector(mode = "numeric", length = 0)
if (length(lo_vcf3) >= 2) {
for (i in 2:length(lo_vcf3)) {
if (hi_vcf3[i - 1] >= lo_vcf3[i]) {
overlapping_vcf3 = c(overlapping_vcf3, i)
}
}
}
# indexes of starting records for new coding positions
first_codpos_ind_exac = setdiff(1:length(lo_vcf3), overlapping_vcf3)
# simple and complex variants in dbSNP
prev_present_ind_exac = first_codpos_ind_exac - 1
simple_variants_dbsnp_ind_exac = intersect(first_codpos_ind_exac, prev_present_ind_exac)
# positions of simple and complex variants
simple_variant_positions_exac = lo_vcf3[simple_variants_dbsnp_ind_exac]
complex_variant_positions_exac = rareDBsnp_with_complex_pos[which(is.na(
match(
rareDBsnp_with_complex_pos,
simple_variant_positions_exac
)
))]
# handle allelic frequencies
Exac_AF_array = matrix(NA,
nrow = length(simple_variant_positions_exac),
ncol = 30)
rareExac = rep(TRUE, length(simple_variant_positions_exac))
multipleAltVariant = rep(FALSE, length(simple_variant_positions_exac))
for (i in 1:length(simple_variant_positions_exac)) {
currentAF_exac = unlist(info(vcf3)[["AF"]][simple_variants_dbsnp_ind_exac][i])
if (length(currentAF_exac) > 1) {
rareExac[i] = TRUE
multipleAltVariant[i] = TRUE
}
else if (length(currentAF_exac) < 1) {
rareExac[i] = TRUE
multipleAltVariant[i] = FALSE
}
else{
rareExac[i] = currentAF_exac < 0.01
multipleAltVariant[i] = FALSE
}
}
# Exac filtering Summary
# one variant in position and one alternative allele
simple_nexac_positions = simple_variant_positions_exac[rareExac &
!multipleAltVariant]
simple_nexac_variation = variation[match(simple_nexac_positions, positions)]
# one variant in position and many alternative alleles
complex1_exac_positions = simple_variant_positions_exac[rareExac &
multipleAltVariant]
# many variants in one position
complex2_exac_positions = complex_variant_positions_exac
complex_positions = sort(c(complex1_exac_positions, complex2_exac_positions))
###############################################
# Part 5 - Predict non-synonymous
###############################################
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
if(length(simple_nexac_positions) > 0){
param4 = ScanVcfParam(which = GRanges(
chromosome,
IRanges(start = simple_nexac_positions, end = simple_nexac_positions)
))
vcf4 = readVcf(sample, genome = "hg19", param = param4)
if(caller == "GATK"){
seqlevels(vcf4) = chromosome
}
vcf4 <- renameSeqlevels(vcf4, paste0("chr", chromosome))
synonymous <- predictCoding(vcf4, txdb, Hsapiens)
UniqueNonsyn = unique(synonymous[mcols(synonymous)$CONSEQUENCE == "nonsynonymous"])
UniqueFrames = unique(synonymous[mcols(synonymous)$CONSEQUENCE == "frameshift"])
UniqueNonsen = unique(synonymous[mcols(synonymous)$CONSEQUENCE == "nonsense"])
RareNonsynCoding = c(UniqueNonsyn, UniqueFrames, UniqueNonsen)
final_positions = sort(start(ranges(RareNonsynCoding)))
final_variations = variation[match(final_positions, positions)]
} else {
final_positions = NULL
final_variations = NULL
}
###############################################
# Part 6 - Annotate selected and report
###############################################
# filter annotations by chromosome,
anno = anno_whole[anno_whole$chrom == paste0("chr", chromosome),]
# find genes containing analysed positions
ordering_start = order(anno$txStart)
ordering_end = order(anno$txEnd)
intervals_start = findInterval(final_positions, anno$txStart[ordering_start])
intervals_end = findInterval(final_positions, anno$txEnd[ordering_end])
if (length(final_positions) > 0) {
# get conservation scores
conservations = score(phastCons100way.UCSC.hg19,
GRanges(
seqnames = paste0("chr", chromosome) ,
IRanges(start = final_positions, width = 1)
))
# analyse all positions
for (i in 1:length(final_positions)) {
row_left = list(
chromosome,
final_positions[i],
as.character(mcols(RareNonsynCoding)$CONSEQUENCE[match(final_positions[i],
start(RareNonsynCoding))]),
as.character(mcols(RareNonsynCoding)$REF[match(final_positions[i],
start(RareNonsynCoding))]),
as.character(unlist(
mcols(RareNonsynCoding)$ALT
))[match(final_positions[i],
start(RareNonsynCoding))],
final_variations[i],
conservations[i]
)
starting_before = ordering_start[1:intervals_start[i]]
ending_after = ordering_end[(intervals_end[i] + 1):length(ordering_end)]
common = intersect(starting_before, ending_after)
# proceed only when position intersects with some gene
if (length(common) > 0) {
uniqueMask = !duplicated(anno[common,]$name2)
uniqueCommon = common[uniqueMask]
# for all genes containing the position
for (ig in 1:length(uniqueCommon)) {
# extract gene name
gene_id = uniqueCommon[ig]
gene = anno$name2[gene_id]
exon_starts = as.numeric(unlist(
strsplit(anno$exonStarts[gene_id], split = ",")
))
exon_ends = as.numeric(unlist(
strsplit(anno$exonEnds[gene_id], split = ",")
))
gene_ranges = c(rbind(exon_starts, exon_ends)) # interleaved vector of low and high exon borders
interval = findInterval(final_positions[i], gene_ranges)
exon = (interval %% 2 == 1)
uniprot_row = uniprot[which(uniprot$Gene.names...primary.. == anno[uniqueCommon[ig], ]$name2),]
if (dim(uniprot_row)[1] == 0) {
uniprot_row = rep(NA, 11)
}
row = c(row_left, gene, exon, uniprot_row)
final_table = rbind(final_table, unlist(row))
}
} else {
row = c(row_left, rep(NA, 13))
final_table = rbind(final_table, row)
}
} # end position loop
###############################################
# Part 6b - complex variants
###############################################
# find genes containing analysed positions
intervals_start = findInterval(complex_positions, anno$txStart[ordering_start])
intervals_end = findInterval(complex_positions, anno$txEnd[ordering_end])
if (length(complex_positions) > 0) {
# analyse all positions
for (i in 1:length(complex_positions)) {
row_left = list(
chromosome,
complex_positions[i]
)
starting_before = ordering_start[1:intervals_start[i]]
ending_after = ordering_end[(intervals_end[i] + 1):length(ordering_end)]
common = intersect(starting_before, ending_after)
# proceed only when position intersects with some gene
if (length(common) > 0) {
uniqueMask = !duplicated(anno[common,]$name2)
uniqueCommon = common[uniqueMask]
# for all genes containing the position
for (ig in 1:length(uniqueCommon)) {
# extract gene name
gene_id = uniqueCommon[ig]
gene = anno$name2[gene_id]
exon_starts = as.numeric(unlist(
strsplit(anno$exonStarts[gene_id], split = ",")
))
exon_ends = as.numeric(unlist(
strsplit(anno$exonEnds[gene_id], split = ",")
))
gene_ranges = c(rbind(exon_starts, exon_ends)) # interleaved vector of low and high exon borders
interval = findInterval(complex_positions[i], gene_ranges)
exon = (interval %% 2 == 1)
uniprot_row = uniprot[which(uniprot$Gene.names...primary.. == anno[uniqueCommon[ig], ]$name2),]
if (dim(uniprot_row)[1] == 0) {
uniprot_row = rep(NA, 11)
}
row = c(row_left, gene, exon, uniprot_row)
complex_table = rbind(complex_table, unlist(row))
}
} else {
row = c(row_left, rep(NA, 13))
complex_table = rbind(complex_table, row)
}
} # end position loop
}
}
}
###############################################
# Part 7 - Load and filter structural variants
###############################################
if (!missing(sv_sample)) {
sv_local_table = read.table(
text = "",
col.names = colnames(sv_table),
colClasses = sapply(sv_table, class)
)
param = ScanVcfParam(which = GRanges(chromosome, IRanges(1, 250000000)))
vcf_sv = readVcf(sv_sample, genome = "hg19", param = param)
dupdel_variant_mask = (info(vcf_sv)[["SVTYPE"]] == "DUP") |
(info(vcf_sv)[["SVTYPE"]] == "DEL")
point_variant_mask = info(vcf_sv)[["SVTYPE"]] == "BND"
dupdel_variant_positions_all = start(rowRanges(vcf_sv[dupdel_variant_mask]))
dupdel_variant_ends_all = info(vcf_sv[dupdel_variant_mask])[["END"]]
point_variant_positions_all = start(rowRanges(vcf_sv[point_variant_mask]))
fitv_begins = findInterval(dupdel_variant_positions_all, coding_ranges)
fitv_ends = findInterval(dupdel_variant_ends_all, coding_ranges)
dupdel_variant_sel = which((fitv_begins %% 2 == 1) |
# variant begins inside coding region
(fitv_ends %% 2 == 1) | # variant ends inside coding region
(fitv_ends - fitv_begins >= 1)) # variant spans over multiple intervals
dupdel_variant_positions = dupdel_variant_positions_all[dupdel_variant_sel]
dupdel_variant_ends = dupdel_variant_ends_all[dupdel_variant_sel]
point_variant_sel = which(findInterval(point_variant_positions_all, coding_ranges) %% 2 == 1)
point_variant_positions = point_variant_positions_all[point_variant_sel]
matched_dupdel = match(dupdel_variant_positions, start(rowRanges(vcf_sv)))
matched_point = match(point_variant_positions, start(rowRanges(vcf_sv)))
###############################################
# Part 8 - Annotate duplications/deletions
###############################################
# find genes containing analysed positions
ordering_start = order(anno$txStart)
ordering_end = order(anno$txEnd)
if (length(dupdel_variant_positions)) {
variantStart.geneStart = findInterval(dupdel_variant_positions, anno$txStart[ordering_start])
variantStart.geneEnd = findInterval(dupdel_variant_positions, anno$txEnd[ordering_end])
variantEnd.geneStart = findInterval(dupdel_variant_ends, anno$txStart[ordering_start])
variantEnd.geneEnd = findInterval(dupdel_variant_ends, anno$txEnd[ordering_end])
# analyse begining positions of duplications/deletions
for (i in 1:length(dupdel_variant_positions)) {
row_left = list(
chromosome,
start(vcf_sv[matched_dupdel])[i],
rownames(vcf_sv[matched_dupdel])[i],
"N",
unlist(rowRanges(vcf_sv)$ALT)[matched_dupdel][i],
unlist(rowRanges(vcf_sv)$QUAL)[matched_dupdel][i],
info(vcf_sv)$SVTYPE[matched_dupdel][i],
geno(vcf_sv)$GT[matched_dupdel][i],
info(vcf_sv)$END[matched_dupdel][i]
)
# genes overlapping with variant:
# - start before variant end
# - end after variant begin
startingBeforeVariantEnd = ordering_start[1:variantStart.geneStart[i]]
endingAfterVariantStart = ordering_end[(variantStart.geneEnd[i] +
1):length(ordering_end)]
overlappingGenes = intersect(startingBeforeVariantEnd,
endingAfterVariantStart)
# proceed only when position intersects with some gene
if (length(overlappingGenes) > 0) {
uniqueMask = !duplicated(anno[overlappingGenes,]$name2)
uniqueCommon = overlappingGenes[uniqueMask]
# for all genes containing the position
for (ig in 1:length(uniqueCommon)) {
# extract gene name
gene_id = uniqueCommon[ig]
gene = anno$name2[gene_id]
exon_starts = as.numeric(unlist(
strsplit(anno$exonStarts[gene_id], split = ",")
))
exon_ends = as.numeric(unlist(strsplit(
anno$exonEnds[gene_id], split = ","
)))
# interleaved vector of low and high exon borders
gene_ranges = c(rbind(exon_starts, exon_ends))
variantBegin.interval = findInterval(dupdel_variant_positions[i],
gene_ranges)
variantEnd.interval = findInterval(dupdel_variant_ends[i], gene_ranges)
in_exon = ((variantBegin.interval %% 2 == 1) |
(variantEnd.interval %% 2 == 1) |
(
variantEnd.interval - variantBegin.interval
) >= 1
)
uniprot_row = uniprot[which(uniprot$Gene.names...primary.. == anno[uniqueCommon[ig], ]$name2),]
if (dim(uniprot_row)[1] == 0) {
uniprot_row = rep(NA, 11)
}
row = c(row_left, gene, in_exon, uniprot_row)
}
} else {
row = c(row_left, rep(NA, 13))
}
#instead of rbind which messes row names for empty frames
sv_local_table[nrow(sv_local_table) + 1, ] = row
} # end position loop
}
###############################################
# Part 9 - Annotate point structural variants
###############################################
if (length(point_variant_positions) > 0) {
# find genes containing analysed positions
intervals_start = findInterval(point_variant_positions, anno$txStart[ordering_start])
intervals_end = findInterval(point_variant_positions, anno$txEnd[ordering_end])
# analyse begining/ending positions of duplications/deletions
for (i in 1:length(point_variant_positions)) {
row_left = list(
chromosome,
start(vcf_sv[matched_point])[i],
rownames(vcf_sv[matched_point])[i],
"N",
unlist(rowRanges(vcf_sv)$ALT)[matched_point][i],
unlist(rowRanges(vcf_sv)$QUAL)[matched_point][i],
info(vcf_sv)$SVTYPE[matched_point][i],
geno(vcf_sv)$GT[matched_point][i],
info(vcf_sv)$END[matched_point][i]
)
# consider variant begin positions
starting_before = ordering_start[1:intervals_start[i]]
ending_after = ordering_end[(intervals_end[i] + 1):length(ordering_end)]
common = intersect(starting_before, ending_after)
# proceed only when position intersects with some gene
if (length(common) > 0) {
uniqueMask = !duplicated(anno[common,]$name2)
uniqueCommon = common[uniqueMask]
# for all genes containing the position
for (ig in 1:length(uniqueCommon)) {
# extract gene name
gene_id = uniqueCommon[ig]
gene = anno$name2[gene_id]
exon_starts = as.numeric(unlist(
strsplit(anno$exonStarts[gene_id], split = ",")
))
exon_ends = as.numeric(unlist(strsplit(
anno$exonEnds[gene_id], split = ","
)))
# interleaved vector of low and high exon borders
gene_ranges = c(rbind(exon_starts, exon_ends))
interval = findInterval(point_variant_positions[i],
gene_ranges)
exon = (interval %% 2 == 1)
uniprot_row = uniprot[which(uniprot$Gene.names...primary.. == anno[uniqueCommon[ig], ]$name2),]
if (dim(uniprot_row)[1] == 0) {
uniprot_row = rep(NA, 11)
}
row = c(row_left, gene, exon, uniprot_row)
}
} else {
row = c(row_left, rep(NA, 13))
}
#instead of rbind which messes row names for empty frames
sv_local_table[nrow(sv_local_table) + 1, ] = row
} # end position loop
}
sv_local_table = sv_local_table[order(as.numeric(sv_local_table[, 2])), ]
sv_local_table = sv_local_table[sv_local_table$QUAL > 0, ]
variation_sv = rep(0, length(sv_local_table$start_position))
variation_sv[sv_local_table$GT == "0/1"] = 0.5
variation_sv[sv_local_table$GT == "1/1"] = 1
sv_table = rbind(sv_table, sv_local_table)
}
###############################################
# Part 10 - Plots
###############################################
# prepare png
png(
filename = paste(sampleName, "_chr", chromosome,
".png", sep = ""),
bg = "white",
width = pngWidth,
height = pngHeight,
units = "px",
pointsize = 24
)
# plot all variants
plot(
positions,
variation,
cex = 0.2,
pch = 16,
col = "blue",
cex.lab = 1,
xlab = paste0("Chromosome_", chromosome),
ylab = "Number of alternative reads / depth",
ylim = c(0, 1),
xlim = c(0, max(positions))
)
if (chromosome != "Y") {
MA <- movingAverage(variation, MA_Window)
lines(positions, MA, col = "red", lwd = 2)
}
abline(h = 0.75,
lwd = 4,
col = "red")
abline(v = centromeres$Start[chromosomeIndex],
lwd = 4,
col = "orange")
abline(v = centromeres$Stop[chromosomeIndex],
lwd = 4,
col = "orange")
# plot nonsynonymous/frameshift/nonesene coding variants, simple, rare in dbsnp and exac
points(
final_positions,
final_variations,
cex = 3,
pch = 16,
col = "green"
)
if (!missing(sv_sample)) {
# plot structural variants that passed the filter
points(
sv_local_table$start_position,
variation_sv,
cex = 3,
pch = 16,
col = "orange"
)
}
dev.off()
} # end chromosome loop
final_table[final_table == ""] = NA
final_table = final_table[-1,] # remove dummy first row
final_table$Gene.names...primary.. <-
NULL # remove duplicated column with gene name
# write to output
write.table(
final_table,
paste0("RareVariants_", sampleName, ".txt"),
sep = "\t",
row.names = FALSE,
quote = FALSE
)
complex_table[complex_table == ""] = NA
complex_table = complex_table[-1,] # remove dummy first row
# write to output
write.table(
complex_table,
paste0("ComplexVariants_", sampleName, ".txt"),
sep = "\t",
row.names = FALSE,
quote = FALSE
)
if (!missing(sv_sample)) {
sv_table[sv_table == ""] = NA
sv_table = sv_table[-1,] # remove dummy first row
sv_table$Gene.names...primary.. <-
NULL # remove duplicated column with gene name
write.table(
sv_table,
paste0("StructuralVariants_", sampleName, ".txt"),
sep = "\t",
row.names = FALSE,
quote = FALSE
)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.