## ---- echo = FALSE, message=FALSE---------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  # comment = "#>", 
  tidy = FALSE, 
  cache = FALSE, 
  results = 'markup'
)

## ---- eval=FALSE--------------------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  
#  BiocManager::install("SingleMoleculeFootprinting")

## ---- eval=FALSE--------------------------------------------------------------
#  library(parallel)
#  library(QuasR)
#  library(BSgenome.Mmusculus.UCSC.mm10)
#  
#  cl <- makeCluster(40)
#  prj <- QuasR::qAlign(sampleFile = Qinput,
#                       genome = "BSgenome.Mmusculus.UCSC.mm10",
#                       aligner = "Rbowtie",
#                       projectName = "prj",
#                       paired = "fr",
#                       bisulfite = "undir",
#                       alignmentParameter = "-e 70 -X 1000 -k 2 --best -strata",
#                       alignmentsDir = "./",
#                       cacheDir = tempdir(),
#                       clObj = cl)

## ----setup, message=FALSE, eval=TRUE------------------------------------------
library(BSgenome.Mmusculus.UCSC.mm10)
library(SingleMoleculeFootprinting)
library(SingleMoleculeFootprintingData)
library(parallel)

## ---- include=FALSE-----------------------------------------------------------
# Under the hood:
#       download and cache SingleMoleculeFootprintingData data
#       locate bam file and write QuasR sampleSheet

# Download and cache
CachedBamPath = SingleMoleculeFootprintingData::NRF1pair.bam()[[1]]
CachedBaiPath = SingleMoleculeFootprintingData::NRF1pair.bam.bai()[[1]]
SingleMoleculeFootprintingData::AllCs.rds()
SingleMoleculeFootprintingData::EnrichmentRegions_mm10.rds()
SingleMoleculeFootprintingData::ReferenceMethylation.rds()

# Create copy of bam and bai files with relevant names
CacheDir <- ExperimentHub::getExperimentHubOption(arg = "CACHE")
file.copy(from = CachedBamPath, to = paste0(CacheDir, "/", "NRF1pair.bam"))
file.copy(from = CachedBaiPath, to = paste0(CacheDir, "/", "NRF1pair.bam.bai"))

# Write QuasR input
data.frame(
  FileName = paste0(CacheDir, "/", "NRF1pair.bam"),
  SampleName = "NRF1pair_DE_"
  ) -> df
readr::write_delim(df, paste0(CacheDir, "/NRF1Pair_Qinput.txt"), delim = "\t")

## ---- eval=TRUE---------------------------------------------------------------
Qinput = paste0(CacheDir, "/NRF1Pair_Qinput.txt")
suppressMessages(readr::read_delim(Qinput, delim = "\t"))

## ---- eval=TRUE---------------------------------------------------------------
BaitRegions <- SingleMoleculeFootprintingData::EnrichmentRegions_mm10.rds()
clObj = makeCluster(5)
BaitCaptureEfficiency <- BaitCapture(sampleSheet = Qinput, 
                                     genome = BSgenome.Mmusculus.UCSC.mm10, 
                                     baits = BaitRegions, clObj = clObj)
stopCluster(clObj)
BaitCaptureEfficiency

## ----conversion-rate-precision, eval=FALSE, echo = TRUE-----------------------
#  ConversionRateValue <- ConversionRate(sampleSheet = Qinput,
#                                            genome = BSgenome.Mmusculus.UCSC.mm10,
#                                            chr = 6)

## ----load-conversion-rate-precision, eval=TRUE, echo = FALSE------------------
## the code above takes > 10 minutes to run, so we load a pre-computed value here
ConversionRateValue <- readRDS(system.file("extdata",
                                           "vignette_ConversionRatePrecision.rds",
                                           package = "SingleMoleculeFootprinting"))

## ---- echo = FALSE------------------------------------------------------------
knitr::kable(data.frame(ExperimentType = c("Single enzyme SMF", "Double enzyme SMF", "No enzyme (endogenous mCpG only)"), 
                        substring = c("\\_NO_", "\\_DE_", "\\_SS_"), 
                        RelevanContext = c("DGCHN & NWCGW", "GCH + HCG", "CG"), 
                        Notes = c("Methylation info is reported separately for each context", "Methylation information is aggregated across the contexts", "-")))

## ---- eval=TRUE---------------------------------------------------------------
MySample <- suppressMessages(readr::read_delim(Qinput, delim = "\t")[[2]])
Region_of_interest <- GRanges(seqnames = "chr6", 
                              ranges = IRanges(start = 88106000, 
                                               end = 88106500), 
                              strand = "*")

Methylation <- CallContextMethylation(sampleSheet = Qinput, 
                                      sample = MySample, 
                                      genome = BSgenome.Mmusculus.UCSC.mm10, 
                                      range = Region_of_interest, 
                                      coverage = 20, 
                                      ConvRate.thr = 0.2)
Methylation[[1]]
Methylation[[2]][1:10, 1:10]

## -----------------------------------------------------------------------------
n = 1000
readsSubset <- sample(seq(nrow(Methylation[[2]])), n)
Methylation[[2]] <- Methylation[[2]][readsSubset,]

## ---- eval=TRUE---------------------------------------------------------------
TFBSs <- GenomicRanges::GRanges("chr6", 
                               IRanges(c(88106216, 88106253),
                                       c(88106226, 88106263)), 
                               strand = "-")
elementMetadata(TFBSs)$name <- c("NRF1", "NRF1")
names(TFBSs) <- c(paste0("TFBS_", c(4305215, 4305216)))
TFBSs

PlotAvgSMF(MethGR = Methylation[[1]], 
           range = Region_of_interest, 
           TFBSs = TFBSs)

## -----------------------------------------------------------------------------
PlotSM(MethSM = Methylation[[2]], 
       range = Region_of_interest)

## ---- eval=TRUE---------------------------------------------------------------
PlotSM(MethSM = Methylation[[2]], 
       range = Region_of_interest, 
       SortedReads = "HC")

## ---- eval=TRUE---------------------------------------------------------------
SortedReads_TFCluster <- SortReadsByTFCluster(MethSM = Methylation[[2]], 
                                             TFBSs = TFBSs)

## ---- echo=FALSE--------------------------------------------------------------
message(paste0("Number of retrieved states: ", 
               as.character(length(SortedReads_TFCluster))))
message("States population:")
unlist(lapply(SortedReads_TFCluster, length))

## -----------------------------------------------------------------------------
PlotSM(MethSM = Methylation[[2]], 
       range = Region_of_interest, 
       SortedReads = SortedReads_TFCluster)

## -----------------------------------------------------------------------------
StateQuantificationPlot(SortedReads = SortedReads_TFCluster)

## ---- eval=TRUE---------------------------------------------------------------
PlotSingleSiteSMF(ContextMethylation = Methylation, 
                  sample = MySample, 
                  range = Region_of_interest, 
                  SortedReads = SortedReads_TFCluster, 
                  TFBSs = TFBSs, 
                  saveAs = NULL)

## ----sessionInfo, echo=FALSE--------------------------------------------------
sessionInfo()