## ----results='hide', echo=FALSE, message=FALSE, warning=FALSE------------ set.seed(1) options(width = 120) library(knitr) opts_chunk$set(comment = " ", fig.path = "", fig.align = "center", out.width = "90%", indent = 10, cache = FALSE, cache.path = "../cache") ## out.width = "1\\columnwidth" ## uncomment options for HTML output, one of them breaks the image export ## background = "#FFFFFF", dev = 'pdf' ## ----setup, results='hide', echo=FALSE, message=FALSE, warning=FALSE----- library(knitr) # to base64 encode images opts_knit$set(upload.fun = image_uri) knit_hooks$set(fig.cap = function(before, options, envir) { if(!before) { paste('

',options$fig.cap,"

",sep="") } }) ## ----ci_cases_plot, echo=FALSE, message=FALSE, fig.width=14, fig.height=7, fig.cap='Illustrative cases of confidence intervals for somatic variant frequency estimates'---- library(ggplot2) df = data.frame( x = factor(rep(c(""), times = 6)), case = factor(1:6), d = c(0.5, 0.3, -0.525, 0, 0.2, 0), cil = c(0.45, 0.2, -0.60, -0.05, -0.3, -1), ciu = c(0.55, 0.4, -0.45, 0.05, 0.7, 1) ) p = ggplot(df) + geom_hline(aes(yintercept = 0), color = "darkgray") + geom_pointrange(aes(x = x, y = d, ymin = cil, ymax = ciu), size = 1, color = "black") + facet_grid( ~ case) + ylim(-1, 1) + theme_bw() + theme(legend.position = "none") + xlab("") + ylab("pT - pC") print(p) ## ----results='hide', message=FALSE, warning=FALSE------------------------ library(Rariant) library(h5vcData) library(GenomicRanges) library(ggbio) library(ggplot2) ## ------------------------------------------------------------------------ control_bam = system.file("extdata", "NRAS.Control.bam", package = "h5vcData", mustWork = TRUE) test_bam = system.file("extdata", "NRAS.AML.bam", package = "h5vcData", mustWork = TRUE) ## ------------------------------------------------------------------------ roi = GRanges("1", IRanges(start = 115258439, end = 115259089)) ## ------------------------------------------------------------------------ vars = rariant(test_bam, control_bam, roi) ## ------------------------------------------------------------------------ vars ## ------------------------------------------------------------------------ vars_all = rariant(test_bam, control_bam, roi, select = FALSE) head(vars_all, 3) ## ------------------------------------------------------------------------ idx_out = ciOutside(vars_all, 0) ind_out = which(idx_out) vars_all$outside = idx_out table(idx_out) ## ----fig.width=14, fig.height=7, fig.cap='NRAS: Variant frequency confidence intervals and shifts. Red and blue points show the non-consensus rates in the test and control sample. The difference between the two represents the effect size.'---- win = 20 ind_var = (ind_out[1]-win):(ind_out[1]+win) p_ci = plotConfidenceIntervals(vars_all[ind_var]) p_shift = plotAbundanceShift(vars_all[ind_var]) t = tracks(p_ci, p_shift) print(t) ## ----fig.width=14, fig.height=7, fig.cap='NRAS: Non-variant site with sequencing depth'---- ind_low = (100-40):(100+40) p_low = plotConfidenceIntervals(vars_all[ind_low]) p_depth = autoplot(vars_all[ind_low], aes(y = testDepth), geom = "step", col = "darkred") + geom_step(aes(y = controlDepth), col = "steelblue3") + theme_bw() t2 = tracks(p_low, p_depth) print(t2) ## ----eval=FALSE---------------------------------------------------------- ## rariantInspect(vars_all) ## ----results='hide', message=FALSE, warning=FALSE------------------------ library(Rariant) library(GenomicRanges) library(ggbio) ## ------------------------------------------------------------------------ tp53_region = GRanges("chr17", IRanges(7571720, 7590863)) ## ------------------------------------------------------------------------ control_bam = system.file("extdata", "control.bam", package = "Rariant", mustWork = TRUE) test1_bam = system.file("extdata", "test.bam", package = "Rariant", mustWork = TRUE) test2_bam = system.file("extdata", "test2.bam", package = "Rariant", mustWork = TRUE) mix_bam = system.file("extdata", "mix.bam", package = "Rariant", mustWork = TRUE) ## ------------------------------------------------------------------------ v_test1 = rariant(test1_bam, control_bam, tp53_region, select = FALSE) v_test2 = rariant(test2_bam, control_bam, tp53_region, select = FALSE) v_mix = rariant(mix_bam, control_bam, tp53_region, select = FALSE) ## ------------------------------------------------------------------------ poi = unique(c(v_test1[ciOutside(v_test1)], v_test2[ciOutside(v_test2)], v_mix[ciOutside(v_mix)])) length(poi) head(poi) ## ------------------------------------------------------------------------ v_poi1 = v_test1[v_test1 %in% poi] v_poi2 = v_test2[v_test2 %in% poi] v_poi3 = v_mix[v_mix %in% poi] ## ----fig.width=14, fig.height=7, fig.cap='Confidence intervals for simulition study'---- py1 = plotConfidenceIntervals(v_poi1, color = "eventType") py2 = plotConfidenceIntervals(v_poi2, color = "eventType") py3 = plotConfidenceIntervals(v_poi3, color = "eventType") t = tracks(py1, py2, py3) print(t) ## ----fig.width=14, fig.height=7, fig.cap='Abundance shifts for simulition study'---- pa1 = plotAbundanceShift(v_poi1) pa2 = plotAbundanceShift(v_poi2) pa3 = plotAbundanceShift(v_poi3) t2 = tracks(pa1, pa2, pa3) print(t2) ## ----results='hide', echo=FALSE, message=FALSE, warning=FALSE------------ library(Rariant) library(h5vc) library(h5vcData) library(GenomicRanges) library(ggbio) library(ggplot2) library(biovizBase) ## ----results='hide', echo=FALSE, message=FALSE, warning=FALSE------------ roi = GRanges("chr22", IRanges(39357400, 39357400)) data(genesymbol, package = "biovizBase") apo = reduce(genesymbol[names(genesymbol) %in% "APOBEC3A"]) ## ----results='hide', echo=FALSE, message=FALSE, warning=FALSE------------ tallyFile = system.file("extdata", "example.tally.hfs5", package = "h5vcData", mustWork = TRUE) sampleData = getSampleData(tallyFile, "/ExampleStudy/22") stopifnot(file.exists(tallyFile)) data = h5readBlock( filename = tallyFile, group = "/ExampleStudy/22", names = c("Counts", "Coverages", "Deletions"), range = c(start(apo), end(apo)) ) counts = data$Counts counts = counts[5:8, , , ] ## focus on HQ counts mm_test = aperm(counts[ ,2, , ], c(3,1,2)) mm_control = aperm(counts[ ,1, , ], c(3,1,2)) cov = data$Coverages cov_test = aperm(cov[2, , ], c(2,1)) cov_control = aperm(cov[1, , ], c(2,1)) ## ----results='hide', echo=FALSE, message=FALSE, warning=FALSE------------ conf_level = 0.99 ## container GRanges gr = GRanges("22", IRanges(start(apo):end(apo), width = 1)) ## both strands k1b = rowSums(colSums(aperm(mm_test, c(3,1,2)))) k2b = rowSums(colSums(aperm(mm_control, c(3,1,2)))) n1b = colSums(aperm(cov_test, c(2,1))) n2b = colSums(aperm(cov_control, c(2,1))) cis = acCi(k1b, n1b, k2b, n2b, conf_level) grb = gr mcols(grb) = cis ## plus strand k1p = rowSums(mm_test[ , ,1]) k2p = rowSums(mm_control[ , ,1]) n1p = cov_test[ ,1] n2p = cov_control[ ,1] cip = acCi(k1p, n1p, k2p, n2p, conf_level) grp = gr mcols(grp) = cip ## minus strand k1m = rowSums(mm_test[ , ,2]) k2m = rowSums(mm_control[ , ,2]) n1m = cov_test[ ,2] n2m = cov_control[ ,2] cim = acCi(k1m, n1m, k2m, n2m, conf_level) grm = gr mcols(grm) = cim ## ----results='hide', echo=FALSE, message=FALSE, warning=FALSE------------ idx_out = ciOutside(cis) ind_out = which(idx_out) table(idx_out) ## ----results='hide', echo=FALSE, message=FALSE, warning=FALSE------------ idx_over = ciOverlap(cip, cim) table(idx_over) ## ----results='hide', echo=FALSE, message=FALSE, warning=FALSE------------ idx_unequal = (n1p > 30 & n1m < 2) | (n1p < 3 & n1m > 30) ind_unequal = which(idx_unequal) sum(idx_unequal) ## ----results='hide', echo=FALSE, message=FALSE, warning=FALSE------------ win = 35 plotCis <- function(idx_show) { pb1 = plotConfidenceIntervals(grb[idx_show]) pp1 = plotConfidenceIntervals(grp[idx_show]) pm1 = plotConfidenceIntervals(grm[idx_show]) t1 = tracks(both = pb1, plus = pp1, minus = pm1) return(t1) } plotMm <- function(pos) { ## mmplot data0 = h5readBlock(filename = tallyFile, group = "/ExampleStudy/22", names = c("Coverages", "Counts", "Deletions"), range = c(pos-win, pos+win)) data0$Counts[c(1:4, 9:12), , , ] = 0 p1 = mismatchPlot(data = data0, sampledata = sampleData, samples = sampleData$Sample[c(2,5)], windowsize = win, position = pos) + theme_bw() + theme(legend.background = element_rect(color = "black", size = 0.1), strip.background = element_rect(fill = NA), legend.position = "none") return(p1) } ## ----echo=FALSE, message=FALSE, warning=FALSE---------------------------- i = 1 idx_show = (ind_out[i]-win):(ind_out[i]+win) pos = start(gr)[ind_out[i]] ## ----warning=FALSE, fig.width=14, fig.height=7, fig.cap='Mismatch plot for case 1: Somatic variant', echo=FALSE---- p1 = plotMm(pos) print(p1) ## ----warning=FALSE, fig.width=14, fig.height=7, fig.cap='Confidence interval plot for case 1: Somatic variant', echo=FALSE---- t1 = plotCis(idx_show) print(t1) ## ----echo=FALSE, message=FALSE, warning=FALSE---------------------------- idx = 1000 idx_show = (idx-win):(idx+win) pos = start(gr)[idx] ## ----fig.width=14, fig.height=7, fig.cap='Mismatch plot for case 2: Absence of a variant', echo=FALSE---- p2 = plotMm(pos) print(p2) ## ----fig.width=14, fig.height=7, fig.cap='Confidence interval plot for case 2: Absence of a variant', echo=FALSE---- t2 = plotCis(idx_show) print(t2) ## ----echo=FALSE, message=FALSE, warning=FALSE---------------------------- i = 2 idx_show = (ind_out[i]-win):(ind_out[i]+win) pos = start(gr)[ind_out[i]] ## ----fig.width=14, fig.height=7, fig.cap='Mismatch plot for case 3: Strand-specific mismatches', echo=FALSE---- p3 = plotMm(pos) print(p3) ## ----fig.width=14, fig.height=7, fig.cap='Confidence interval plot for case 3: Strand-specific mismatches', echo=FALSE---- t3 = plotCis(idx_show) print(t3) ## ----echo=FALSE, message=FALSE, warning=FALSE---------------------------- i = ind_unequal[10] idx_show = (i-win):(i+win) pos = start(gr)[i] ## ----fig.width=14, fig.height=7, fig.cap='Mismatch plot for case 4: Strand-specific differences in sequencing depth', echo=FALSE---- p4 = plotMm(pos) print(p4) ## ----fig.width=14, fig.height=7, fig.cap='Confidence inteval plot for case 4: Strand-specific differences in sequencing depth', echo=FALSE---- t4 = plotCis(idx_show) print(t4) ## ----echo=FALSE, message=FALSE, fig.width=14, fig.height=7, fig.cap='Illustrative cases of confidence intervals for somatic variant frequency estimates for two strands'---- library(ggplot2) df = data.frame( x = factor(rep(c("A", "B"), times = 7)), case = factor(rep(c(5, 6, 7, 4, 1, 2, 3), each = 2)), dx = c(0.65, -0.65, 0.65, 0.20, 0.65, 0, 0.65, 0.55, 0.65, 0, 0.05, -0.05, -0.05, 0.05), cil = c(0.5, -0.8, 0.5, 0.1, 0.5, -0.2, 0.5, 0.4, 0.5, -0.7, -0.1, -0.2, -0.9, -0.8), ciu = c(0.8, -0.5, 0.8, 0.3, 0.8, 0.2, 0.8, 0.7, 0.8, 0.7, 0.2, 0.1, 0.8, 0.9), group = factor(c(rep("n", 2*3), rep("o", 2*4))) ) p = ggplot(df) + geom_hline(aes(yintercept = 0), color = "darkgray") + geom_hline(aes(yintercept = 0.6), color = "darkred", linetype = "dashed") + geom_pointrange(aes(x = x, y = dx, ymin = cil, ymax = ciu), size = 1, color = "black") + facet_wrap(~ case, nrow = 2) + ylim(-1, 1) + theme_bw() + theme(legend.position = "none") + xlab("Strand") + ylab("Shift in non-consensus rate") print(p) ## ----warning=FALSE, fig.height=7, out.width='50%', fig.cap='Confidence interval width - sequencing depth relationship. The identified variant is marked in blue.'---- df = as.data.frame(vars_all) df$ci_width = ciWidth(df) p = ggplot(df) + geom_point(aes_string(x = "(controlDepth + testDepth) / 2", y = "ci_width", col = "outside")) + xlab("Average sequencing depth") + ylab("Confidence interval width") + theme_bw() print(p) ## ------------------------------------------------------------------------ ## WGS n1 = 30 n2 = 30 k1 = 0:(n1-1) k2 = 0:(n2-1) cl = 0.95 n_sample = 1e4 pars = expand.grid(k1 = k1, k2 = k2, n1 = n1, n2 = n2, conf_level = cl) cp_ac = coverageProbability(pars, fun = acCi, n_sample = n_sample) ## ----fig.width=7, fig.height=7, out.width='60%', fig.cap='Coverage probabilities for whole-genome setting'---- p_ac = ggplot(cp_ac) + geom_tile(aes(x = k1, y = k2, fill = cp)) + scale_fill_gradient2(midpoint = 0.95, limits = c(0.9, 1)) + theme_bw() + xlab("kT") + ylab("kC") print(p_ac) ## ----eval=FALSE---------------------------------------------------------- ## source("http://bioconductor.org/biocLite.R") ## biocLite("Rariant") ## ----echo=FALSE, results='markup'---------------------------------------- sessionInfo()