## ---- optChunkDefault, include=FALSE-------------------------------------
library(knitr)
opts_chunk$set( # Set these first, to make them default
    message = FALSE,
    warning=FALSE,
    error=FALSE
)
optChunkDefault <- opts_chunk$get()

## ----biocLite, eval=FALSE------------------------------------------------
#  # Currently:
#  devtools::install_github("kevinrue/TVTB")
#  # When hosted on Bioconductor:
#  source("http://bioconductor.org/biocLite.R")
#  biocLite("TVTB")

## ----library-------------------------------------------------------------
library(TVTB)

## ----TVTBparamCreate-----------------------------------------------------
tparam <- TVTBparam(Genotypes(
    ref = "0|0",
    het = c("0|1", "1|0", "0|2", "2|0", "1|2", "2|1"),
    alt = c("1|1", "2|2")),
    ranges = GenomicRanges::GRangesList(
        SLC24A5 = GenomicRanges::GRanges(
            seqnames = "15",
            IRanges::IRanges(
                start = 48413170, end = 48434757)
            )
        )
    )

## ----TVTBparamView-------------------------------------------------------
tparam

## ---- reduceGRangesList--------------------------------------------------
svp <- as(tparam, "ScanVcfParam")
svp

## ----importPhenotypes----------------------------------------------------
phenoFile <- system.file(
    "extdata", "integrated_samples.txt", package = "TVTB")
phenotypes <- S4Vectors::DataFrame(
    read.table(file = phenoFile, header = TRUE, row.names = 1))

## ----vcfFile-------------------------------------------------------------
vcfFile <- system.file(
    "extdata", "chr15.phase3_integrated.vcf.gz", package = "TVTB")
tabixVcf <- Rsamtools::TabixFile(file = vcfFile)

## ----ScanVcfParam--------------------------------------------------------
VariantAnnotation::vcfInfo(svp(tparam)) <- vep(tparam)
VariantAnnotation::vcfGeno(svp(tparam)) <- "GT"
svp(tparam)

## ----preprocessVariants, message=FALSE-----------------------------------
# Import variants as a CollapsedVCF object
vcf <- VariantAnnotation::readVcf(
    tabixVcf, param = tparam, colData = phenotypes)
# Expand into a ExpandedVCF object (bi-allelic records)
vcf <- VariantAnnotation::expand(x = vcf, row.names = TRUE)

## ----vcf, echo=FALSE-----------------------------------------------------
vcf

## ----addOverallFrequencies-----------------------------------------------
initialInfo <- colnames(info(vcf))
vcf <- addOverallFrequencies(vcf = vcf)
setdiff(colnames(info(vcf)), initialInfo)

## ----addFrequenciesOverall-----------------------------------------------
vcf <- addFrequencies(vcf = vcf, force = TRUE)

## ----addPhenoLevelFrequencies--------------------------------------------
initialInfo <- colnames(info(vcf))
vcf <- addPhenoLevelFrequencies(
    vcf = vcf, pheno = "super_pop", level = "AFR")
setdiff(colnames(info(vcf)), initialInfo)

## ----addFrequenciesPhenoLevel--------------------------------------------
initialInfo <- colnames(info(vcf))
vcf <- addFrequencies(
    vcf = vcf, phenos = list(
        super_pop = c("EUR", "AFR"),
        pop = c("GBR", "FIN", "MSL")),
    force = TRUE)
setdiff(colnames(info(vcf)), initialInfo)

## ----FilterRules---------------------------------------------------------
fr <- S4Vectors::FilterRules(list(
    mixed = function(x){
        VariantAnnotation::fixed(x)[,"FILTER"] == "PASS" &
            VariantAnnotation::info(x)[,"MAF"] >= 0.05
    }
))
fr

## ----VcfFixedRules-------------------------------------------------------
fixedR <- VcfFixedRules(list(
    pass = expression(FILTER == "PASS"),
    qual = expression(QUAL > 20)
    ))
fixedR

## ----VcfInfoRules--------------------------------------------------------
infoR <- VcfInfoRules(exprs = list(
    common = expression(MAF > 0.1), # minor allele frequency
    present = expression(ALT + HET > 0) # count of non-REF homozygotes
    ),
    active = c(TRUE, FALSE))
infoR

## ----VcfVepRules---------------------------------------------------------
vepR <- VcfVepRules(exprs = list(
    missense = expression(Consequence %in% c("missense_variant")),
    CADD = expression(CADD_PHRED > 15)
    ))
vepR

## ----VcfFilterRules------------------------------------------------------
vcfRules <- VcfFilterRules(fixedR, infoR, vepR)
vcfRules

## ----deactivateCADD------------------------------------------------------
active(vcfRules)["CADD"] <- FALSE
active(vcfRules)

## ------------------------------------------------------------------------
summary(eval(expr = infoR, envir = vcf))
summary(eval(expr = vcfRules, envir = vcf))
summary(evalSeparately(expr = vcfRules, envir = vcf))

## ----sessionInfo, echo=FALSE---------------------------------------------
sessionInfo()