## ----setup, echo = FALSE------------------------------------------------------ knitr::opts_chunk$set(message = FALSE, crop = NULL) ## ----load-packages------------------------------------------------------------ library(VariantAnnotation) library(rtracklayer) library(extraChIPs) library(transmogR) library(GenomicFeatures) library(BSgenome.Hsapiens.UCSC.hg38) ## ----chr1--------------------------------------------------------------------- chr1 <- getSeq(BSgenome.Hsapiens.UCSC.hg38, "chr1") chr1 <- as(chr1, "DNAStringSet") names(chr1) <- "chr1" chr1 ## ----sq----------------------------------------------------------------------- sq <- seqinfo(chr1) genome(sq) <- "GRCh38" sq ## ----vcf---------------------------------------------------------------------- vcf <- system.file("extdata/1000GP_subset.vcf.gz", package = "transmogR") vcf <- VcfFile(vcf) ## ----var---------------------------------------------------------------------- var <- cleanVariants(vcf) seqinfo(var) <- sq var ## ----gtf---------------------------------------------------------------------- f <- system.file("extdata/gencode.v44.subset.gtf.gz", package = "transmogR") gtf <- import.gff(f, which = GRanges(sq)) seqinfo(gtf) <- sq gtf ## ----gtf-split---------------------------------------------------------------- gtf <- splitAsList(gtf, gtf$type) ## ----upset-var, fig.cap = "Included variants which overlap exonic sequences, summarised by unique gene ids"---- upsetVarByCol(gtf$exon, var, mcol = "gene_id") ## ----overlaps-by-var---------------------------------------------------------- regions <- defineRegions(gtf$gene, gtf$transcript, gtf$exon, proximal = 0) regions$exon <- granges(gtf$exon) overlapsByVar(regions, var) ## ----trans-mod---------------------------------------------------------------- trans_mod <- transmogrify(chr1, var, gtf$exon, tag = "1000GP", var_tags = TRUE) trans_mod names(trans_mod)[grepl("_si", names(trans_mod))] ## ----chr1-mod----------------------------------------------------------------- chr1_mod <- genomogrify(chr1, var) chr1_mod ## ----gentrome, eval=FALSE----------------------------------------------------- # c(trans_mod, chr1_mod) %>% writeXStringSet("gentrome.fa.gz", compress = TRUE) # writeLines(names(chr1_mod), "decoys.txt") ## ----ref-trans---------------------------------------------------------------- ref_exons_by_trans <- gtf$exon %>% splitAsList(.$transcript_id) ref_trans <- extractTranscriptSeqs(chr1, ref_exons_by_trans) ref_trans ## ----new-exons---------------------------------------------------------------- new_exons <- shiftByVar(gtf$exon, var) ## ----compare-seqinfo---------------------------------------------------------- seqinfo(new_exons) seqinfo(chr1_mod) ## ----new-exon-by-trans-------------------------------------------------------- new_exon_by_trans <- splitAsList(new_exons, new_exons$transcript_id) ## ----tags--------------------------------------------------------------------- tags <- varTags(ref_exons_by_trans, var, tag = "1000GP") names(new_exon_by_trans) <- paste0(names(new_exon_by_trans), tags) ## ----new-trans-mod------------------------------------------------------------ new_trans_mod <- extractTranscriptSeqs(chr1_mod, new_exon_by_trans) ## ----check-modified----------------------------------------------------------- modified <- grepl("_", names(new_trans_mod)) all(new_trans_mod[!modified] == ref_trans[!modified]) all(new_trans_mod[modified] == trans_mod[modified]) ## ----par-y-------------------------------------------------------------------- sq_hg38 <- seqinfo(BSgenome.Hsapiens.UCSC.hg38) parY(sq_hg38) ## ----sj----------------------------------------------------------------------- ec <- c("transcript_id", "transcript_name", "gene_id", "gene_name") sj <- sjFromExons(gtf$exon, extra_cols = ec) sj ## ----chop-sj------------------------------------------------------------------ chopMC(sj) ## ----sj-as-gi----------------------------------------------------------------- sj <- sjFromExons(gtf$exon, extra_cols = ec, as = "GInteractions") subset(sj, transcript_name == "DDX11L17-201") ## ----sessionInfo, echo=FALSE-------------------------------------------------- sessionInfo()