## ----ky1, load-depenencies, results="hide", message=FALSE, warning=FALSE------
library(sesame)
sesameDataCache()

## ----ky2, message=FALSE-------------------------------------------------------
query <- KYCG_getDBs("MM285.designGroup")[["PGCMeth"]]
head(query)

## ----ky3, fig.width=8, fig.height=5, message=FALSE----------------------------
results_pgc <- testEnrichment(query, platform="MM285")
head(results_pgc)

## ----ky4----------------------------------------------------------------------
KYCG_plotEnrichAll(results_pgc)

## ----ky9, echo = FALSE, results="asis"----------------------------------------
library(knitr)
df = data.frame(
    "Continuous DB"=c("Correlation-based","Set Enrichment Analysis"),
    "Discrete DB"=c("Set Enrichment Analysis","Fisher's Exact Test"))
rownames(df) = c("Continuous Query", "Discrete Query")
kable(df, caption="Four KnowYourCG Testing Scenarios")

## ----ky10, run-test-single, echo=TRUE, eval=TRUE, message=FALSE---------------
library(SummarizedExperiment)

## prepare a query
df <- rowData(sesameDataGet('MM285.tissueSignature'))
query <- df$Probe_ID[df$branch == "fetal_brain" & df$type == "Hypo"]

results <- testEnrichment(query, "TFBS", platform="MM285")
results %>% dplyr::filter(overlap>10) %>% head

## prepare another query
query <- df$Probe_ID[df$branch == "fetal_liver" & df$type == "Hypo"]
results <- testEnrichment(query, "TFBS", platform="MM285")
results %>% dplyr::filter(overlap>10) %>%
    dplyr::select(dbname, estimate, test, FDR) %>% head

## ----ky5, list-data, eval=TRUE, echo=TRUE-------------------------------------
KYCG_listDBGroups("MM285")

## ----ky6, cache-data, eval=TRUE, warning=FALSE--------------------------------
dbs <- KYCG_getDBs("MM285.design")

## ----ky7, view-data1, eval=TRUE, warning=FALSE--------------------------------
str(dbs[["PGCMeth"]])

## ----ky8, message=FALSE-------------------------------------------------------
df <- rowData(sesameDataGet('MM285.tissueSignature'))
query <- df$Probe_ID[df$branch == "B_cell"]
head(query)

## ----ky16, fig.width=7, fig.height=6, echo=TRUE, warning=FALSE, message=FALSE----
query <- names(sesameData_getProbesByGene("Dnmt3a", "MM285"))
results <- testEnrichment(query, 
    KYCG_buildGeneDBs(query, max_distance=100000, platform="MM285"),
    platform="MM285")
results[,c("dbname","estimate","gene_name","FDR", "nQ", "nD", "overlap")]

## ----ky17, fig.width=5, fig.height=4, echo=TRUE-------------------------------
KYCG_plotLollipop(results, label="gene_name")

## ----eval=FALSE---------------------------------------------------------------
#  KYCG_listDBGroups(path = "~/Downloads/KYCG_knowledgebase_EPICv2")
#  ##  [1] "ABCompartment.20220911.gz" "Blacklist.20220304.gz"
#  ##  [3] "CGI.20220904.gz"           "ChromHMM.20220303.gz"
#  ##  [5] "CTCFbind.20220911.gz"      "HM.20221013.gz"
#  ##  [7] "ImprintingDMR.20220818.gz" "MetagenePC.20220911.gz"
#  ##  [9] "nFlankCG.20220321.gz"      "PMD.20220911.gz"
#  ## [11] "ProbeType.gz"              "REMCChromHMM.20220911.gz"
#  ## [13] "rmsk1.20220307.gz"         "rmsk2.20220321.gz"
#  ## [15] "Tetranuc2.20220321.gz"     "TFBS.20220921.gz"
#  ## [17] "TFBSrm.20221005.gz"
#  ## load all database files in the folder
#  dbs <- KYCG_loadDBs("~/Downloads/KYCG_knowledgebase_EPICv2/")
#  ## or one database file
#  dbs <- KYCG_loadDBs("~/Downloads/KYCG_knowledgebase_EPICv2/hg38/CGI.20220904.gz")

## ----ky18, message=FALSE------------------------------------------------------
df <- rowData(sesameDataGet('MM285.tissueSignature'))
query <- df$Probe_ID[df$branch == "fetal_liver" & df$type == "Hypo"]
regs <- sesameData_getTxnGRanges("mm10", merge2gene = TRUE)
genes <- sesameData_annoProbes(query, regs, platform="MM285", return_ov_features=TRUE)
genes

## ----ky19, eval = FALSE-------------------------------------------------------
#  library(gprofiler2)
#  
#  ## use gene name
#  gostres <- gost(genes$gene_name, organism = "mmusculus")
#  gostres$result[order(gostres$result$p_value),]
#  gostplot(gostres)
#  
#  ## use Ensembl gene ID, note we need to remove the version suffix
#  gene_ids <- sapply(strsplit(names(genes),"\\."), function(x) x[1])
#  gostres <- gost(gene_ids, organism = "mmusculus")
#  gostres$result[order(gostres$result$p_value),]
#  gostplot(gostres)

## ----ky21, run-test-data, echo=TRUE, eval=TRUE, message=FALSE-----------------
query <- KYCG_getDBs("KYCG.MM285.designGroup")[["TSS"]]

## ----ky22, echo=TRUE, eval=TRUE, message=FALSE--------------------------------
res <- testEnrichmentSEA(query, "MM285.seqContextN")
res[, c("dbname", "test", "estimate", "FDR", "nQ", "nD", "overlap")]

## ----ky24, GSEA, fig.width=6, fig.height=6, message=FALSE---------------------
query <- KYCG_getDBs("KYCG.MM285.designGroup")[["TSS"]]
db <- KYCG_getDBs("MM285.seqContextN", "distToTSS")
res <- testEnrichmentSEA(query, db, prepPlot = TRUE)
KYCG_plotSetEnrichment(res[[1]])

## ----ky23, warning=FALSE, eval=FALSE------------------------------------------
#  beta_values <- getBetas(sesameDataGet("MM285.1.SigDF"))
#  res <- testEnrichmentSEA(beta_values, "MM285.chromHMM")
#  res[, c("dbname", "test", "estimate", "FDR", "nQ", "nD", "overlap")]

## ----ky25, echo=FALSE---------------------------------------------------------

se <- sesameDataGet("MM285.10.SE.tissue")

library(tibble)
df_plot <- as.data.frame(t(assay(se)[
    c("cg30910045_BC21","cg34861418_TC21"),])) %>% rownames_to_column("IDAT")

library(ggplot2)
ggplot(df_plot) +
    geom_line(mapping=aes(IDAT, y=cg30910045_BC21, group=1), color="blue") +
    geom_line(mapping=aes(IDAT, y=cg34861418_TC21, group=1), color="red") +
    labs(y="Methylation fraction", x="Sample") + theme_bw() +
    theme(axis.text.x=element_blank(),
          axis.ticks.x=element_blank())


## -----------------------------------------------------------------------------
sessionInfo()