## ----style, eval=TRUE, echo=FALSE, results='asis'------------------------
BiocStyle::latex2()

## ----knitr, echo=FALSE, results='hide'-----------------------------------
library("knitr")
opts_chunk$set(tidy=FALSE,dev="pdf",fig.show="hide",
               fig.width=4,fig.height=4.5,
               dpi=300,# increase dpi to avoid pixelised pngs
               message=FALSE)

## ----options,results='hide',echo=FALSE-----------------------------------
options(digits=2, prompt=" ", continue=" ")

## ----systemFile----------------------------------------------------------
pythonScriptsDir = system.file( "python_scripts", package="DEXSeq" )
list.files(pythonScriptsDir)

## ----systemFileCheck,echo=FALSE,results='hide'---------------------------
system.file( "python_scripts", package="DEXSeq", mustWork=TRUE )

## ----loadDEXSeq----------------------------------------------------------
inDir = system.file("extdata", package="pasilla")
countFiles = list.files(inDir, pattern="fb.txt$", full.names=TRUE)
basename(countFiles)
flattenedFile = list.files(inDir, pattern="gff$", full.names=TRUE)
basename(flattenedFile)

## ----sampleTable---------------------------------------------------------
sampleTable = data.frame(
   row.names = c( "treated1", "treated2", "treated3", 
      "untreated1", "untreated2", "untreated3", "untreated4" ),
   condition = c("knockdown", "knockdown", "knockdown",  
      "control", "control", "control", "control" ),
   libType = c( "single-end", "paired-end", "paired-end", 
      "single-end", "single-end", "paired-end", "paired-end" ) )

## ----displaySampleTable--------------------------------------------------
sampleTable

## ----makeecs, eval=TRUE--------------------------------------------------
suppressPackageStartupMessages( library( "DEXSeq" ) )

dxd = DEXSeqDataSetFromHTSeq(
   countFiles,
   sampleData=sampleTable,
   design= ~ sample + exon + condition:exon,
   flattenedfile=flattenedFile )

## ----start---------------------------------------------------------------
genesForSubset = read.table( 
  file.path(inDir, "geneIDsinsubset.txt"), 
  stringsAsFactors=FALSE)[[1]]

dxd = dxd[geneIDs( dxd ) %in% genesForSubset,]

## ----seeColData----------------------------------------------------------
colData(dxd)

## ----seeCounts-----------------------------------------------------------
head( counts(dxd), 5 )

## ----seeSplitted---------------------------------------------------------
split( seq_len(ncol(dxd)), colData(dxd)$exon )

## ----seeCounts2----------------------------------------------------------
head( featureCounts(dxd), 5 )

## ----fData---------------------------------------------------------------
head( rowRanges(dxd), 3 )

## ----pData---------------------------------------------------------------
sampleAnnotation( dxd )

## ----sizeFactors1--------------------------------------------------------
dxd = estimateSizeFactors( dxd )

## ----estDisp1------------------------------------------------------------
dxd = estimateDispersions( dxd )

## ----fitdiagnostics, dev='png', fig.show='asis', fig.small=TRUE, fig.cap='Fit Diagnostics. The initial per-exon dispersion estimates (shown by black points), the fitted mean-dispersion values function (red line), and the shrinked values in blue.\\label{figure/fitdiagnostics-1}'----
plotDispEsts( dxd )

## ----testForDEU1,cache=TRUE----------------------------------------------
dxd = testForDEU( dxd )

## ----estFC,cache=TRUE----------------------------------------------------
dxd = estimateExonFoldChanges( dxd, fitExpToVar="condition")

## ----results1,cache=TRUE-------------------------------------------------
dxr1 = DEXSeqResults( dxd )
dxr1

## ----results2,cache=TRUE-------------------------------------------------
mcols(dxr1)$description

## ----tallyExons----------------------------------------------------------
table ( dxr1$padj < 0.1 )

## ----tallyGenes----------------------------------------------------------
table ( tapply( dxr1$padj < 0.1, dxr1$groupID, any ) )

## ----MvsA, dev='png', fig.show='asis', fig.small=TRUE, fig.cap='MA plot. Mean expression versus $\\log_2$ fold change plot. Significant hits (at \\Robject{padj}<0.1) are coloured in red.\\label{figure/MvsA-1}'----
plotMA( dxr1, cex=0.8 )

## ----design--------------------------------------------------------------
sampleAnnotation(dxd)

## ----formulas2-----------------------------------------------------------
formulaFullModel    =  ~ sample + exon + libType:exon + condition:exon
formulaReducedModel =  ~ sample + exon + libType:exon 

## ----estDisps_again, cache=TRUE, results='hide'--------------------------
dxd = estimateDispersions( dxd, formula = formulaFullModel )

## ----test_again, cache=TRUE----------------------------------------------
dxd = testForDEU( dxd, 
	reducedModel = formulaReducedModel, 
        fullModel = formulaFullModel )

## ----res_again-----------------------------------------------------------
dxr2 = DEXSeqResults( dxd )

## ----table2--------------------------------------------------------------
table( dxr2$padj < 0.1 )

## ----table3--------------------------------------------------------------
table( before = dxr1$padj < 0.1, now = dxr2$padj < 0.1 )

## ----plot1, fig.height=8, fig.width=12-----------------------------------
plotDEXSeq( dxr2, "FBgn0010909", legend=TRUE, cex.axis=1.2, cex=1.3,
   lwd=2 )

## ----checkClaim,echo=FALSE-----------------------------------------------
wh = (dxr2$groupID=="FBgn0010909")
stopifnot(sum(dxr2$padj[wh] < formals(plotDEXSeq)$FDR)==1)

## ----plot2, fig.height=8, fig.width=12-----------------------------------
plotDEXSeq( dxr2, "FBgn0010909", displayTranscripts=TRUE, legend=TRUE,
   cex.axis=1.2, cex=1.3, lwd=2 )

## ----plot3, fig.height=8, fig.width=12-----------------------------------
plotDEXSeq( dxr2, "FBgn0010909", expression=FALSE, norCounts=TRUE,
   legend=TRUE, cex.axis=1.2, cex=1.3, lwd=2 )

## ----plot4, fig.height=8, fig.width=12-----------------------------------
plotDEXSeq( dxr2, "FBgn0010909", expression=FALSE, splicing=TRUE,
   legend=TRUE, cex.axis=1.2, cex=1.3, lwd=2 )

## ----DEXSeqHTML,cache=TRUE, eval=FALSE-----------------------------------
## DEXSeqHTML( dxr2, FDR=0.1, color=c("#FF000080", "#0000FF80") )

## ----para1,cache=TRUE,results='hide', eval=FALSE-------------------------
## BPPARAM = MultiCoreParam(workers=4)
## dxd = estimateSizeFactors( dxd )
## dxd = estimateDispersions( dxd, BPPARAM=BPPARAM)
## dxd = testForDEU( dxd, BPPARAM=BPPARAM)
## dxd = estimateExonFoldChanges(dxd, BPPARAM=BPPARAM)

## ----alldeu, cache=TRUE--------------------------------------------------
dxr = DEXSeq(dxd)
class(dxr)

## ----pergeneqval---------------------------------------------------------
numbOfGenes <- sum( perGeneQValue(dxr) < 0.1)
numbOfGenes

## ----buildExonCountSetLoadPacks,cache=TRUE, eval=FALSE-------------------
## library(GenomicRanges)
## library(GenomicFeatures)
## library(GenomicAlignments)

## ----buildExonCountSetDownloadAnno,cache=TRUE, eval=FALSE----------------
## hse = makeTxDbFromBiomart( biomart="ensembl",
##    dataset="hsapiens_gene_ensembl" )

## ----buildExonCountSetDisjoin,cache=TRUE, eval=FALSE---------------------
## exonicParts = disjointExons( hse, aggregateGenes=FALSE )

## ----buildExonCountSet2FindBAMs,cache=TRUE, eval=FALSE-------------------
## bamDir = system.file( "extdata", package="parathyroidSE",
##    mustWork=TRUE )
## fls = list.files( bamDir, pattern="bam$", full=TRUE )

## ----buildExonCountSet2FindBAMs2,cache=TRUE, eval=FALSE------------------
## bamlst = BamFileList( fls, index=character(), yieldSize=100000,
##    obeyQname=TRUE )
## SE = summarizeOverlaps( exonicParts, bamlst, mode="Union",
##    singleEnd=FALSE, ignore.strand=TRUE, inter.feature=FALSE,
##    fragments=TRUE )

## ----buildExonCountSet3,cache=TRUE, eval=FALSE---------------------------
## colData(SE)$condition = c("A", "A", "B")
## DEXSeqDataSetFromSE( SE, design= ~ sample + exon + condition:exon )

## ----acc-----------------------------------------------------------------
head( geneIDs(dxd) )
head( exonIDs(dxd) )

## ----grmethods-----------------------------------------------------------
interestingRegion = GRanges( "chr2L", 
   IRanges(start=3872658, end=3875302) )
subsetByOverlaps( query=dxr, subject=interestingRegion )
findOverlaps( query=dxr, subject=interestingRegion )

## ----sessionInfo---------------------------------------------------------
sessionInfo()