## ----knitr, echo=FALSE, results="hide"----------------------------------- library("knitr") opts_chunk$set(tidy=FALSE,dev="png",fig.show="hide", fig.width=4,fig.height=4.5, message=FALSE) ## ----<style-knitr, eval=TRUE, echo=FALSE, results="asis"----------------- BiocStyle::latex() ## ----options,echo=FALSE-------------------------------------------------- options(digits=3, width=80, prompt=" ", continue=" ") ## ----Import_library_noEcho,echo=TRUE,eval=FALSE-------------------------- ## library(RiboProfiling) ## listInputBam <- c( ## Rsamtools::BamFile("http://genomique.info/data/public/RiboProfiling/ctrl.bam"), ## Rsamtools::BamFile("http://genomique.info/data/public/RiboProfiling/nutlin2h.bam") ## ) ## covData <- riboSeqFromBAM(listInputBam, genomeName="hg19") ## ----Import_library,echo=FALSE------------------------------------------- suppressMessages(library(RiboProfiling)) ## ----riboSeqfromBam,echo=FALSE,eval=TRUE,pdf=FALSE,jpeg=TRUE,fig.width=7---- listeInputBam <- c( Rsamtools::BamFile("http://genomique.info/data/public/RiboProfiling/ctrl.bam"), Rsamtools::BamFile("http://genomique.info/data/public/RiboProfiling/nutlin2h.bam") ) txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene covData <- suppressMessages( suppressWarnings( riboSeqFromBAM( listeInputBam, txdb=txdb, listShiftValue=c(-14, -14) ) ) ) ## ----GAlignments_from_BAM,echo=TRUE,eval=FALSE--------------------------- ## aln <- readGAlignments( ## BamFile("http://genomique.info/data/public/RiboProfiling/ctrl.bam") ## ) ## ----Hist_quick,echo=TRUE,eval=TRUE,pdf=FALSE,jpeg=TRUE------------------ data(ctrlGAlignments) aln <- ctrlGAlignments matchLenDistr <- histMatchLength(aln, 0) matchLenDistr[[2]] ## ----CovAroundTSS_quick,eval=FALSE,echo=TRUE,split=TRUE,fig=FALSE-------- ## #transform the GAlignments object into a GRanges object ## #(faster processing of the object) ## alnGRanges <- readsToReadStart(aln) ## #txdb object with annotations ## txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene ## oneBinRanges <- aroundPromoter(txdb, alnGRanges, percBestExpressed=0.001) ## #the coverage in the TSS flanking region for the reads with match sizes 29:31 ## listPromoterCov <- ## readStartCov( ## alnGRanges, ## oneBinRanges, ## matchSize=c(29:31), ## fixedInterval=c(-20, 20), ## renameChr="aroundTSS", ## charPerc="perc" ## ) ## plotSummarizedCov(listPromoterCov) ## ----TSS_Cov,echo=FALSE,eval=TRUE,dev="pdf",fig.height=10,pdf=FALSE,jpeg=TRUE---- #transform the GAlignments object into a GRanges object #(faster processing of the object) alnGRanges <- readsToReadStart(aln) #make a txdb object containing the annotations for the specified species. #In this case hg19. txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene #Please make sure that seqnames of txdb correspond to the seqnames #of the alignment files ("chr" particle) #if not rename the txdb seqlevels #renameSeqlevels(txdb, sub("chr", "", seqlevels(txdb))) #get the flanking region around the promoter of the 0.1% best expressed CDSs oneBinRanges <- aroundPromoter(txdb, alnGRanges, percBestExpressed=0.001) #the coverage in the TSS flanking region for the summarized read match sizes #the coverage in the TSS flanking region for the reads with match sizes 29:31 listPromoterCov <- suppressWarnings( readStartCov( alnGRanges, oneBinRanges, matchSize=c(29:32), fixedInterval=c(-20, 20), renameChr="aroundTSS", charPerc="perc" ) ) suppressMessages(plotSummarizedCov(listPromoterCov)) ## ----CountReads_echo,echo=TRUE,eval=FALSE-------------------------------- ## #keep only the match read sizes 30-33 ## alnGRanges <- alnGRanges[which(!is.na(match(alnGRanges$score,30:33)))] ## #get all CDSs by transcript ## cds <- cdsBy(txdb, by="tx", use.names=TRUE) ## #get all exons by transcript ## exonGRanges <- exonsBy(txdb, by="tx", use.names=TRUE) ## #get the per transcript relative position of start and end codons ## cdsPosTransc <- orfRelativePos(cds, exonGRanges) ## #compute the counts on the different features ## #after applying the specified shift value on the read start along the transcript ## countsDataCtrl1 <- ## countShiftReads( ## exonGRanges=exonGRanges[names(cdsPosTransc)], ## cdsPosTransc=cdsPosTransc, ## alnGRanges=alnGRanges, ## shiftValue=-14 ## ) ## head(countsDataCtrl1[[1]]) ## listCountsPlots <- countsPlot( ## list(countsDataCtrl1[[1]]), ## grep("_counts$", colnames(countsDataCtrl1[[1]])), ## 1 ## ) ## listCountsPlots ## ----CountReads,echo=FALSE,eval=TRUE,fig.width=7,pdf=FALSE,jpeg=TRUE----- #get all CDSs by transcript cds <- GenomicFeatures::cdsBy(txdb, by="tx", use.names=TRUE) #get all exons by transcript exonGRanges <- GenomicFeatures::exonsBy(txdb, by="tx", use.names=TRUE) #get the per transcript relative position of start and end codons cdsPosTransc <- orfRelativePos(cds, exonGRanges) #compute the counts on the different features after applying #the specified shift value on the read start along the transcript countsDataCtrl1 <- countShiftReads( exonGRanges[names(cdsPosTransc)], cdsPosTransc, alnGRanges, -14 ) listCountsPlots <- countsPlot( list(countsDataCtrl1[[1]]), grep("_counts$", colnames(countsDataCtrl1[[1]])), 1 ) invisible(capture.output( suppressWarnings( suppressMessages( print(listCountsPlots)) ) ) ) head(countsDataCtrl1[[1]], n=3) ## ----codon_cov_position,echo=TRUE,eval=TRUE------------------------------ data(codonIndexCovCtrl) head(codonIndexCovCtrl[[1]], n=3) ## ----CodonAnalysis_echo,fig=FALSE,echo=TRUE,eval=FALSE------------------- ## listReadsCodon <- countsDataCtrl1[[2]] ## #get the names of the expressed ORFs grouped by transcript ## cds <- cdsBy(txdb, use.names=TRUE) ## orfCoord <- cds[names(cds) %in% names(listReadsCodon)] ## #chromosome names should correspond between the BAM, ## #the annotations, and the genome ## genomeSeq <- BSgenome.Hsapiens.UCSC.hg19 ## #codon frequency, coverage, and annotation ## codonData <- codonInfo(listReadsCodon, genomeSeq, orfCoord) ## ----CodonAnalysis,fig=FALSE,echo=FALSE,eval=TRUE------------------------ listReadsCodon <- countsDataCtrl1[[2]] #get the names of the ORFs for which we have coverage #grouped by transcript cds <- GenomicFeatures::cdsBy(txdb, use.names=TRUE) orfCoord <- cds[names(cds) %in% names(listReadsCodon)] genomeSeq <- BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19 #codon frequency, coverage, and annotation codonData <- codonInfo(listReadsCodon, genomeSeq, orfCoord) ## ----CountReads_res,fig=FALSE,results=TRUE,echo=TRUE,split=FALSE--------- data("codonDataCtrl") head(codonDataCtrl[[1]], n=3) ## ----CodonPCA,echo=TRUE,eval=TRUE,pdf=FALSE,jpeg=TRUE-------------------- codonUsage <- codonData[[1]] codonCovMatrix <- codonData[[2]] #keep only genes with a minimum number of reads nbrReadsGene <- apply(codonCovMatrix, 1, sum) ixExpGenes <- which(nbrReadsGene >= 50) codonCovMatrix <- codonCovMatrix[ixExpGenes, ] #get the PCA on the codon coverage codonCovMatrixTransp <- t(codonCovMatrix) rownames(codonCovMatrixTransp) <- colnames(codonCovMatrix) colnames(codonCovMatrixTransp) <- rownames(codonCovMatrix) listPCACodonCoverage <- codonPCA(codonCovMatrixTransp, "codonCoverage") listPCACodonCoverage[[2]] ## ----sessInfo,echo=TRUE,eval=TRUE---------------------------------------- sessionInfo()