## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"------------------
BiocStyle::latex()

## ----include=FALSE-------------------------------------------------------
library(knitr)
library(regioneR)
opts_chunk$set(concordance=FALSE)
set.seed(21666641)

## ------------------------------------------------------------------------
  A <- createRandomRegions(nregions=50, length.mean=5000000, length.sd=3000000)
  B <- c(A[1:25], createRandomRegions(nregions=25, length.mean=500000, length.sd=30000))
  numOverlaps(A, B, count.once=TRUE)
  numOverlaps(randomizeRegions(A), B, count.once=TRUE)

## ------------------------------------------------------------------------
  pt <- overlapPermTest(A=A, B=B, ntimes=50)
  pt

## ----fig.height=6,fig.width=8--------------------------------------------
  plot(pt)

## ----fig.height=6,fig.width=8--------------------------------------------
  lz <- localZScore(pt=pt, A=A, B=B)
  plot(lz)

## ------------------------------------------------------------------------
# special <- toGRanges("http://gattaca.imppc.org/regioner/data/my.special.genes.bed")
special <- toGRanges(system.file("extdata", "my.special.genes.bed", package="regioneR"))
# all.genes <- toGRanges("http://gattaca.imppc.org/regioner/data/all.genes.bed")
all.genes <- toGRanges(system.file("extdata", "all.genes.bed", package="regioneR"))
#altered <- toGRanges("http://gattaca.imppc.org/regioner/data/my.altered.regions.bed")
altered <- toGRanges(system.file("extdata", "my.altered.regions.bed", package="regioneR"))
length(special)
length(all.genes)
length(altered)

## ------------------------------------------------------------------------
numOverlaps(special, altered)

## ------------------------------------------------------------------------
random.RS <- resampleRegions(special, universe=all.genes)
random.RS

## ------------------------------------------------------------------------
random.RS <- resampleRegions(special, universe=all.genes)
numOverlaps(random.RS, altered)
random.RS <- resampleRegions(special, universe=all.genes)
numOverlaps(random.RS, altered)
random.RS <- resampleRegions(special, universe=all.genes)
numOverlaps(random.RS, altered)
random.RS <- resampleRegions(special, universe=all.genes)
numOverlaps(random.RS, altered)

## ----eval=FALSE----------------------------------------------------------
## # NOT RUN
## pt <- permTest(A=my.regions, B=repeats, randomize.function=randomizeRegions,
## evaluate.function=overlapRegions)

## ----eval=FALSE----------------------------------------------------------
## # NOT RUN
## pt <- permTest(A=my.genes, randomize.function=resampleRegions, universe=all.genes,
## evaluate.function=meanInRegions, x=methylation.levels.450K)

## ------------------------------------------------------------------------
pt <- permTest(A=special, ntimes=50, randomize.function=resampleRegions, universe=all.genes,
 evaluate.function=numOverlaps, B=altered, verbose=FALSE)

## ------------------------------------------------------------------------
pt
summary(pt)

## ----fig.height=6,fig.width=8--------------------------------------------
plot(pt)

## ----fig.height=6,fig.width=8--------------------------------------------
regular <- toGRanges("http://gattaca.imppc.org/regioner/data/my.regular.genes.bed")
length(regular)
numOverlaps(regular, altered)
pt.reg <- permTest(A=regular, ntimes=50, randomize.function=resampleRegions, universe=all.genes,
 evaluate.function=numOverlaps, B=altered, verbose=FALSE)
pt.reg
plot(pt.reg)

## ----eval=FALSE----------------------------------------------------------
## #NOT RUN - See Figure 1
## pt.5000 <- permTest(A=special, ntimes=5000, randomize.function=resampleRegions,
## universe=all.genes, evaluate.function=numOverlaps, B=altered, verbose=TRUE)
## plot(pt.5000)

## ----eval=FALSE----------------------------------------------------------
## #NOT RUN - See Figure 2
## pt.5000.reg <- permTest(A=regular, ntimes=5000, randomize.function=resampleRegions,
## universe=all.genes, evaluate.function=numOverlaps, B=altered, verbose=TRUE)
## plot(pt.5000.reg)

## ------------------------------------------------------------------------
A <- toGRanges(data.frame(chr=c("chr1", "chr1", "chr1"), start=c(20000, 50000, 100000),
end=c(22000, 70000, 400000)))

## ------------------------------------------------------------------------
randomizeRegions(A, genome="hg19")

## ------------------------------------------------------------------------
randomizeRegions(A, genome="hg19", per.chromosome=TRUE)

## ------------------------------------------------------------------------
circularRandomizeRegions(A, genome="hg19", mask=NA)

## ------------------------------------------------------------------------
gcContent <- function(A, bsgenome, ...) {
  A <- toGRanges(A)
  reg.seqs <- getSeq(bsgenome, A)
  base.frequency <- alphabetFrequency(reg.seqs)
  gc.pct <- (sum(base.frequency[,"C"]) + sum(base.frequency[,"G"]))/sum(width(A))
  return(gc.pct)
}

## ------------------------------------------------------------------------
permuteRegionsMetadata <- function(A, ...) {
  A <- toGRanges(A)
  mcols(A)[,1] <- mcols(A)[sample(length(A)),1]
  return(A)
}

## ------------------------------------------------------------------------
  A <- createRandomRegions(nregions=50, length.mean=5000000, length.sd=3000000)
  B <- c(A[1:25], createRandomRegions(nregions=25, length.mean=500000, length.sd=30000))

  #Without mc.set.seed=FALSE
  set.seed(123)
  overlapPermTest(A, B, ntimes=10, force.parallel=TRUE)
  set.seed(123)
  overlapPermTest(A, B, ntimes=10, force.parallel=TRUE)

  #With mc.set.seed=FALSE
  set.seed(123)
  overlapPermTest(A, B, ntimes=10, mc.set.seed=FALSE, force.parallel=TRUE)
  set.seed(123)
  overlapPermTest(A, B, ntimes=10, mc.set.seed=FALSE, force.parallel=TRUE)


## ----fig.height=6,fig.width=8--------------------------------------------
pt <- permTest(A=special, ntimes=50, randomize.function=resampleRegions, universe=all.genes,
               evaluate.function=numOverlaps, B=altered, verbose=FALSE)
lz <- localZScore(A=special, pt=pt, window=10*mean(width(special)), 
                  step=mean(width(special))/2, B=altered)
plot(lz)

## ----fig.height=6,fig.width=8--------------------------------------------

genome <- filterChromosomes(getGenome("hg19"), keep.chr="chr1")
B <- createRandomRegions(nregions=100, length.mean=10000, length.sd=20000, genome=genome,
                         non.overlapping=FALSE)
A <- B[sample(20)]

pt <- overlapPermTest(A=A, B=B, ntimes=100, genome=genome, non.overlapping=FALSE)
pt

lz <- localZScore(A=A, B=B, pt=pt, window=10*mean(width(A)), step=mean(width(A))/2 )
plot(lz)

## ------------------------------------------------------------------------
A <- data.frame(chr=1, start=c(1,15,24), end=c(10,20,30),  x=c(1,2,3), y=c("a","b","c"))
B <- toGRanges(A)
B

## ------------------------------------------------------------------------
toDataframe(B) 

## ------------------------------------------------------------------------
human.genome <- getGenomeAndMask("hg19", mask=NA)$genome
human.canonical <- filterChromosomes(human.genome, organism="hg")
listChrTypes()
human.autosomal <- filterChromosomes(human.genome, organism="hg", chr.type="autosomal")
human.123 <- filterChromosomes(human.genome, keep.chr=c("chr1", "chr2", "chr3"))

## ------------------------------------------------------------------------
overlaps.df <- overlapRegions(A, B)
overlaps.df

## ------------------------------------------------------------------------
overlaps.df <- overlapRegions(A, B, type="BinA", get.pctA=TRUE)
overlaps.df

## ------------------------------------------------------------------------
overlaps.df <- overlapRegions(A, B, min.bases=5)
overlaps.df

## ------------------------------------------------------------------------
overlaps.bool <- overlapRegions(A, B, min.bases=5, only.boolean=TRUE)
overlaps.bool

## ------------------------------------------------------------------------
overlaps.int <- overlapRegions(A, B, min.bases=5, only.count=TRUE)
overlaps.int

## ----eval=FALSE----------------------------------------------------------
##   #NOT RUN
##   set.seed(12345)
##   cpgHMM <- toGRanges("http://rafalab.jhsph.edu/CGI/model-based-cpg-islands-hg19.txt")
##   promoters <- toGRanges("http://gattaca.imppc.org/regioner/data/UCSC.promoters.hg19.bed")

## ----eval=FALSE----------------------------------------------------------
##   #NOT RUN
##   cpgHMM <- filterChromosomes(cpgHMM, organism="hg", chr.type="canonical")
##   promoters <- filterChromosomes(promoters, organism="hg", chr.type="canonical")

## ----eval=FALSE----------------------------------------------------------
##  #NOT RUN
##  cpgHMM_2K <- sample(cpgHMM, 2000)
## 
##  pt <- overlapPermTest(cpgHMM_2K, promoters, ntimes=1000, genome="hg19", count.once=TRUE)
##  pt
##    P-value: 0.000999000999000999
##    Z-score: 60.5237
##    Number of iterations: 1000
##    Alternative: greater
##    Evaluation of the original region set: 614
##    Evaluation function: numOverlaps
##    Randomization function: randomizeRegions
##  mean(pt$permuted)
##  79.087

## ----eval=FALSE----------------------------------------------------------
##     #NOT RUN
##     plot(pt)

## ----eval=FALSE----------------------------------------------------------
##         #NOT RUN
##         set.seed(12345)
##         download.file("http://hgdownload-test.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeAwgTfbsUniform/wgEncodeAwgTfbsSydhHepg2Rad21IggrabUniPk.narrowPeak.gz", "Rad21.gz")
## 
##         download.file("http://hgdownload-test.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeAwgTfbsUniform/wgEncodeAwgTfbsUwHepg2CtcfUniPk.narrowPeak.gz", "Ctcf.gz")
## 
##         HepG2_Rad21 <- toGRanges(gzfile("Rad21.gz"), header=FALSE)
##         HepG2_Ctcf <- toGRanges(gzfile("Ctcf.gz"), header=FALSE)
## 
##         promoters <- toGRanges("http://gattaca.imppc.org/regioner/data/UCSC.promoters.hg19.bed")

## ----eval=FALSE----------------------------------------------------------
##     #NOT RUN
##     promoters <- filterChromosomes(promoters, organism="hg19")
##     HepG2_Rad21_5K <- sample(HepG2_Rad21, 5000)

## ----eval=FALSE----------------------------------------------------------
##     #NOT RUN
##     pt_Rad21_5k_vs_Ctcf <- permTest(A=HepG2_Rad21_5K, B=HepG2_Ctcf, ntimes=1000,
##                                     randomize.function=circularRandomizeRegions,
##                                     evaluate.function=numOverlaps, count.once=TRUE,
##                                     genome="hg19", mc.set.seed=FALSE, mc.cores=4)
## 
##     pt_Rad21_5k_vs_Prom <- permTest(A=HepG2_Rad21_5K, B=promoters, ntimes=1000,
##                                     randomize.function=circularRandomizeRegions,
##                                     evaluate.function=numOverlaps, count.once=TRUE,
##                                     genome="hg19", mc.set.seed=FALSE, mc.cores=4)
## 
##     pt_Rad21_5k_vs_Ctcf
## 
##     pt_Rad21_5k_vs_Prom
## 
##     plot(pt_Rad21_5k_vs_Ctcf, main="Rad21_5K vs CTCF")
##     plot(pt_Rad21_5k_vs_Prom, main="Rad21_5K vs Promoters")

## ----eval=FALSE----------------------------------------------------------
##     #NOT RUN
##     lz_Rad21_vs_Ctcf_1 <- localZScore(A=HepG2_Rad21_5K, B=HepG2_Ctcf,
##                                       pt=pt_Rad21_5k_vs_Ctcf,
##                                       window=1000, step=50, count.once=TRUE)
## 
##     lz_Rad21_vs_Prom_1 <- localZScore(A=HepG2_Rad21_5K, B=promoters,
##                                       pt=pt_Rad21_5k_vs_Prom,
##                                       window=1000, step=50, count.once=TRUE)
## 
##     plot(lz_Rad21_vs_Ctcf_1, main="Rad21_5k vs CTCF (1Kbp)")
##     plot(lz_Rad21_vs_Prom_1, main="Rad21_5k vs promoters  (1Kbp)")
## 

## ----eval=FALSE----------------------------------------------------------
##     #NOT RUN
##     lz_Rad21_vs_Ctcf_2 <- localZScore(A=HepG2_Rad21_5K, B=HepG2_Ctcf,
##                                       pt=pt_Rad21_5k_vs_Ctcf,
##                                       window=10000, step=500, count.once=TRUE)
## 
##     lz_Rad21_vs_Prom_2 <- localZScore(A=HepG2_Rad21_5K, B=promoters,
##                                       pt=pt_Rad21_5k_vs_Prom,
##                                       window=10000, step=500, count.once=TRUE)
## 
##     plot(lz_Rad21_vs_Ctcf_2, main="Rad21_5k vs CTCF (10Kbp)")
##     plot(lz_Rad21_vs_Prom_2, main="Rad21_5k vs promoters  (10Kbp)")

## ----sessionInfo---------------------------------------------------------
sessionInfo()