## ----setup, echo = FALSE------------------------------------------------------
knitr::opts_chunk$set(message = FALSE, crop = NULL)

## ----load-packages------------------------------------------------------------
library(VariantAnnotation)
library(rtracklayer)
library(extraChIPs)
library(transmogR)
library(BSgenome.Hsapiens.UCSC.hg38)

## ----chr1---------------------------------------------------------------------
chr1 <- getSeq(BSgenome.Hsapiens.UCSC.hg38, "chr1")
chr1 <- as(chr1, "DNAStringSet")
names(chr1) <- "chr1"
chr1

## ----var----------------------------------------------------------------------
sq <- seqinfo(chr1)
genome(sq) <- "GRCh38"
vcf <- system.file("extdata/1000GP_subset.vcf.gz", package = "transmogR")
vcf_param <- ScanVcfParam(fixed = "ALT", info = NA, which = GRanges(sq))
var <- rowRanges(readVcf(vcf, param = vcf_param))
seqinfo(var) <- sq
var

## ----gtf----------------------------------------------------------------------
f <- system.file("extdata/gencode.v44.subset.gtf.gz", package = "transmogR")
gtf <- import.gff(f, which = GRanges(sq))
seqinfo(gtf) <- sq
gtf

## ----gtf-split----------------------------------------------------------------
gtf <- splitAsList(gtf, gtf$type)

## ----upset-var, fig.cap = "Included variants which overlap exonic sequences, summarised by unique gene ids"----
upsetVarByCol(gtf$exon, var, mcol = "gene_id")

## ----overlaps-by-var----------------------------------------------------------
regions <- defineRegions(gtf$gene, gtf$transcript, gtf$exon, proximal = 0)
regions$exon <- granges(gtf$exon)
overlapsByVar(regions, var)

## ----chr1-mod-----------------------------------------------------------------
chr1_mod <- genomogrify(chr1, var, tag = "mod")
chr1_mod

## ----trans-mod----------------------------------------------------------------
trans_mod <- transmogrify(
    chr1, var, gtf$exon, tag = "mod", var_tags = TRUE
)
trans_mod
names(trans_mod)[grepl("_si", names(trans_mod))]

## ----par-y--------------------------------------------------------------------
sq_hg38 <- seqinfo(BSgenome.Hsapiens.UCSC.hg38)
parY(sq_hg38)

## ----sj-----------------------------------------------------------------------
ec <- c("transcript_id", "transcript_name", "gene_id", "gene_name")
sj <- sjFromExons(gtf$exon, extra_cols = ec)
sj

## ----chop-sj------------------------------------------------------------------
chopMC(sj)

## ----sj-as-gi-----------------------------------------------------------------
sj <- sjFromExons(gtf$exon, extra_cols = ec, as = "GInteractions")
subset(sj, transcript_name == "DDX11L17-201")

## ----sessionInfo, echo=FALSE--------------------------------------------------
sessionInfo()