## ----echo=FALSE---------------------------------------------------------------
library(knitr)
opts_chunk$set(echo = TRUE, TOC = TRUE)

## ----eval=FALSE---------------------------------------------------------------
# if (!requireNamespace("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# BiocManager::install("scMitoMut")

## ----eval=FALSE---------------------------------------------------------------
# if (!requireNamespace("remotes", quietly = TRUE))
#     install.packages("remotes")
# remotes::install_github("wenjie1991/scMitoMut")

## -----------------------------------------------------------------------------
library(scMitoMut)
library(data.table)
library(ggplot2)
library(rhdf5)

## ----eval=TRUE----------------------------------------------------------------
## Load the allele count table
f <- system.file("extdata", "mini_dataset.tsv.gz", package = "scMitoMut")
f_h5_tmp <- tempfile(fileext = ".h5")
f_h5 <- parse_table(f, h5_file = f_h5_tmp)

## ----eval=TRUE----------------------------------------------------------------
f_h5

## -----------------------------------------------------------------------------
## Open the h5 file as a scMitoMut object
x <- open_h5_file(f_h5)
str(x)

## Show what's in the h5 file
h5ls(x$h5f, recursive = FALSE)

## -----------------------------------------------------------------------------
f <- system.file("extdata", "mini_dataset_cell_ann.csv", package = "scMitoMut")
cell_ann <- read.csv(f, row.names = 1)

## Subset the cells, the cell id can be found by colnames() for the Seurat object
x <- subset_cell(x, rownames(cell_ann))

## -----------------------------------------------------------------------------
head(x$cell_selected)

## ----eval=TRUE----------------------------------------------------------------
## Run the model fitting
run_model_fit(x, mc.cores = 1)

## The p-value is kept in the pval group of H5 file
h5ls(x$h5f, recursive = FALSE)

## -----------------------------------------------------------------------------
## Filter mutation
x <- filter_loc(
  mtmutObj = x,
  min_cell = 2,
  model = "bb",
  p_threshold = 0.01,
  p_adj_method = "fdr"
)
x$loc_pass

## ----fig.width = 12-----------------------------------------------------------
## Prepare the color for cell annotation
colors <- c(
  "Cancer Epi" = "#f28482",
  Blood = "#f6bd60"
)
ann_colors <- list("SeuratCellTypes" = colors)

## ----fig.width = 12-----------------------------------------------------------
## binary heatmap
plot_heatmap(x,
  cell_ann = cell_ann, ann_colors = ann_colors, type = "binary",
  percent_interp = 0.2, n_interp = 3
)

## ----fig.width = 12-----------------------------------------------------------
## binary heatmap
plot_heatmap(x,
  cell_ann = cell_ann, ann_colors = ann_colors, type = "binary",
  percent_interp = 1, n_interp = 3
)

## ----fig.width = 12-----------------------------------------------------------
## p value heatmap
plot_heatmap(x,
  cell_ann = cell_ann, ann_colors = ann_colors, type = "p",
  percent_interp = 0.2, n_interp = 3
)

## ----fig.width = 12-----------------------------------------------------------
## allele frequency heatmap
plot_heatmap(x,
  cell_ann = cell_ann, ann_colors = ann_colors, type = "af",
  percent_interp = 0.2, n_interp = 3
)

## -----------------------------------------------------------------------------
## Export the mutation data as data.frame
m_df <- export_df(x)
m_df[1:10, ]
## Export allele frequency data as data.matrix
export_af(x)[1:5, 1:5]
## Export p-value data as data.matrix
export_pval(x)[1:5, 1:5]
## Export binary mutation status data as data.matrix
export_binary(x)[1:5, 1:5]

## ----fig.width = 5------------------------------------------------------------
## The `m_df` is exported using the `export_df` function above.
m_dt <- data.table(m_df)
m_dt[, cell_type := cell_ann[as.character(m_dt$cell_barcode), "SeuratCellTypes"]]
m_dt_sub <- m_dt[loc == "chrM.1227"]
m_dt_sub[, sum((pval) < 0.01, na.rm = TRUE), by = cell_type]
m_dt_sub[, sum((1 - af) > 0.05, na.rm = TRUE), by = cell_type]

## The p value versus cell types
ggplot(m_dt_sub) +
  aes(x = cell_type, y = -log10(pval), color = cell_type) +
  geom_jitter() +
  scale_color_manual(values = colors) +
  theme_bw() +
  geom_hline(yintercept = -log10(0.01), linetype = "dashed") +
  ylab("-log10(FDR)")

## The allele frequency versus cell types
ggplot(m_dt_sub) +
  aes(x = cell_type, y = 1 - af, color = factor(cell_type)) +
  geom_jitter() +
  scale_color_manual(values = colors) +
  theme_bw() +
  geom_hline(yintercept = 0.05, linetype = "dashed") +
  ylab("1 - Dominant Allele Frequency")

## The p value versus allele frequency
ggplot(m_dt_sub) +
    aes(x = -log10(pval), y = 1 - af, color = factor(cell_type)) +
    geom_point() +
    scale_color_manual(values = colors) +
    theme_bw() +
    geom_hline(yintercept = 0.05, linetype = "dashed") +
    geom_vline(xintercept = -log10(0.01), linetype = "dashed") +
    xlab("-log10(FDR)") +
    ylab("1 - Dominant Allele Frequency")

## -----------------------------------------------------------------------------
sessionInfo()