## ----include=FALSE------------------------------------------------------------
knitr::opts_chunk$set(comment = "", warning = FALSE, message = FALSE)

## -----------------------------------------------------------------------------
library(knitr)
library(ClustIRR)
library(igraph)
library(ggplot2)
library(ggrepel)
library(patchwork)

## ----eval=FALSE---------------------------------------------------------------
#  if(!require("BiocManager", quietly = TRUE)) {
#    install.packages("BiocManager")
#  }
#  BiocManager::install("ClustIRR")

## ----graphic, echo = FALSE, fig.align="left", out.width='100%'----------------
knitr::include_graphics("../inst/extdata/logo.png")

## -----------------------------------------------------------------------------
data("CDR3ab", package = "ClustIRR")

## -----------------------------------------------------------------------------
set.seed(127)
n <- 300

# get 300 CDR3a and CDR3b pairs from the data -> IRR a
a <- data.frame(CDR3a = CDR3ab$CDR3a[1:n], 
                CDR3b = CDR3ab$CDR3b[1:n], 
                clone_size=rpois(n = n, lambda = 3)+1, 
                sample = "a")

# get 200 CDR3a and CDR3b pairs from the data -> IRR b
b <- data.frame(CDR3a = CDR3ab$CDR3a[101:(n+100)], 
                CDR3b = CDR3ab$CDR3b[101:(n+100)], 
                clone_size=rpois(n = n, lambda = 3)+1, 
                sample = "b")

## -----------------------------------------------------------------------------
str(a)

## -----------------------------------------------------------------------------
str(b)

## -----------------------------------------------------------------------------
# perform clust_irr analysis of repertoire a
c_a <- cluster_irr(s = a, control = list(gmi = 0.7))
# ... and b
c_b <- cluster_irr(s = b, control = list(gmi = 0.7))

## -----------------------------------------------------------------------------
kable(head(c_a@clust$CDR3a), digits = 2)

## -----------------------------------------------------------------------------
kable(head(c_a@clust$CDR3a), digits = 2)

## -----------------------------------------------------------------------------
# control = list(gmi = 0.7, trim_flank_aa = 3, db_dist = 0, db_custom = NULL)
kable(head(get_clustirr_inputs(c_a)$s), digits = 2)

## -----------------------------------------------------------------------------
g <- get_graph(clust_irr = c_a)

## -----------------------------------------------------------------------------
plot_graph(g, as_visnet = TRUE)

## ----fig.width=6, fig.height=4.5----------------------------------------------
# data.frame of edges and their attributes
e <- igraph::as_data_frame(x = g$graph, what = "edges")

## -----------------------------------------------------------------------------
kable(head(e), digits = 2)

## ----fig.width=5, fig.height=3.5----------------------------------------------
ggplot(data = e)+
  geom_density(aes(ncweight, col = chain))+
  geom_density(aes(nweight, col = chain), linetype = "dashed")+
  theme_bw()+
  xlab(label = "edge weight (solid = ncweight, dashed = nweight)")+
  theme(legend.position = "top")

## ----fig.width=6, fig.height=6------------------------------------------------
g <- get_joint_graph(clust_irrs = c(c_a, c_b))

## ----fig.width=6, fig.height=6------------------------------------------------
plot_graph(g = g, as_visnet = TRUE, node_opacity = 0.8)

## -----------------------------------------------------------------------------
gcd <- detect_communities(graph = g$graph, 
                          algorithm = "leiden",
                          resolution = 1,
                          weight = "ncweight",
                          metric = "average",
                          chains = c("CDR3a", "CDR3b"))

## -----------------------------------------------------------------------------
names(gcd)

## -----------------------------------------------------------------------------
dim(gcd$community_occupancy_matrix)

## -----------------------------------------------------------------------------
head(gcd$community_occupancy_matrix)

## ----fig.width=5, fig.height=5------------------------------------------------
plot(x = gcd$community_occupancy_matrix[, "a"],
     y = gcd$community_occupancy_matrix[, "b"],
     xlab = "community occupancy [cells in IRR a]",
     ylab = "community occupancy [cells in IRR b]")

## -----------------------------------------------------------------------------
kable(head(gcd$community_summary), digits = 2)

## ----fig.width=6, fig.height=4------------------------------------------------
ggplot(data = gcd$community_summary)+
  geom_point(aes(x = w_CDR3a, y = w_CDR3b, size = cells_n), shape=21)+
  xlab(label = "CDR3a ncweight")+
  ylab(label = "CDR3b ncweight")+
  scale_size_continuous(range = c(0.5, 5))+
  theme_bw(base_size = 10)

## -----------------------------------------------------------------------------
kable(head(gcd$node_summary), digits = 2)

## -----------------------------------------------------------------------------
d <- dco(community_occupancy_matrix = gcd$community_occupancy_matrix,
         mcmc_control = list(mcmc_warmup = 500,
                             mcmc_iter = 1500,
                             mcmc_chains = 3,
                             mcmc_cores = 1,
                             mcmc_algorithm = "NUTS",
                             adapt_delta = 0.9,
                             max_treedepth = 10))

## ----fig.width=6, fig.height=2.5----------------------------------------------
ggplot(data = d$posterior_summary$y_hat)+
  facet_wrap(facets = ~sample, nrow = 1, scales = "free")+
  geom_abline(intercept = 0, slope = 1, linetype = "dashed", col = "gray")+
  geom_errorbar(aes(x = y_obs, y = mean, ymin = L95, ymax = H95),
                col = "darkgray", width=0)+
  geom_point(aes(x = y_obs, y = mean), size = 0.8)+
  xlab(label = "observed y")+
  ylab(label = "predicted y (and 95% HDI)")+
  theme_bw(base_size = 10)

## ----fig.width=6, fig.height=4------------------------------------------------
ggplot(data = d$posterior_summary$delta)+
  facet_wrap(facets = ~contrast, ncol = 1)+
  geom_errorbar(aes(x = community, y = mean, ymin = L95, ymax = H95), 
                col = "darkgray", width =0)+
  geom_point(aes(x = community, y = mean), size = 0.5)+
  theme_bw(base_size = 10)+
  theme(legend.position = "none")+
  ylab(label = expression(delta))+
  scale_x_continuous(expand = c(0,0))

## ----fig.width=4, fig.height=3------------------------------------------------
ggplot(data = d$posterior_summary$delta)+
  geom_histogram(aes(mean), bins = 100)+
  xlab(label = expression(delta))+
  theme_bw(base_size = 10)

## ----echo=FALSE, include=FALSE------------------------------------------------
rm(a, b, c_a, c_b, d, e, g, gcd, n)

## -----------------------------------------------------------------------------
# repertoire size
n <- 200

# a
a <- data.frame(CDR3a = CDR3ab$CDR3a[1:n], 
                CDR3b = CDR3ab$CDR3b[1:n],
                sample = "a")
# b
b <- data.frame(CDR3a = CDR3ab$CDR3a[1:n], 
                CDR3b = CDR3ab$CDR3b[1:n],
                sample = "b")

# c
c <- data.frame(CDR3a = CDR3ab$CDR3a[1:n], 
                CDR3b = CDR3ab$CDR3b[1:n],
                sample = "c")

## -----------------------------------------------------------------------------
get_clonal_expansion <- function(n, p_expanded) {
  s <- sample(x = c(0, 1), size = n, prob = c(1-p_expanded, 
                                              p_expanded), replace = T)
  y <- vapply(X = s, FUN.VALUE = numeric(1), FUN = function(x) {
    if(x == 0) {
      return(rpois(n = 1, lambda = 0.5))
    }
    return(rpois(n = 1, lambda = 50))
  })
  return(y)
}

## -----------------------------------------------------------------------------
# simulate expansion of specific communities
set.seed(1243)
clone_size <- rpois(n = n, lambda = 3)+1
expansion_factor <- get_clonal_expansion(n = n, p_expanded = 0.02)

a$clone_size <- clone_size
b$clone_size <- clone_size+expansion_factor*1
c$clone_size <- clone_size+expansion_factor*2

## -----------------------------------------------------------------------------
# run cluster_irr on each IRR and join the results
clust_irrs <- c(cluster_irr(s = a), cluster_irr(s = b), cluster_irr(s = c))

## -----------------------------------------------------------------------------
g <- get_joint_graph(clust_irrs = clust_irrs, cores = 1)

## ----fig.width=6, fig.height=6------------------------------------------------
plot_graph(g = g, as_visnet = TRUE, node_opacity = 0.8)

## -----------------------------------------------------------------------------
gcd <- detect_communities(graph = g$graph,
                          weight = "ncweight",
                          algorithm = "leiden",
                          resolution = 1,
                          chains = c("CDR3a", "CDR3b"))

## ----fig.width=7, fig.height=2.5----------------------------------------------
(ggplot(data = gcd$community_summary)+
   geom_point(aes(x = cells_a, y = cells_b), shape = 21)+
   theme_bw(base_size = 10))|
  (ggplot(data = gcd$community_summary)+
     geom_point(aes(x = cells_a, y = cells_c), shape = 21)+
     theme_bw(base_size = 10))|
  (ggplot(data = gcd$community_summary)+
     geom_point(aes(x = cells_b, y = cells_c), shape = 21)+
     theme_bw(base_size = 10))+
  plot_annotation(tag_level = 'A')

## -----------------------------------------------------------------------------
community_occupancy_matrix <- gcd$community_occupancy_matrix
head(community_occupancy_matrix)

## -----------------------------------------------------------------------------
d <- dco(community_occupancy_matrix = community_occupancy_matrix,
         mcmc_control = list(mcmc_warmup = 300,
                             mcmc_iter = 900,
                             mcmc_chains = 3,
                             mcmc_cores = 1,
                             mcmc_algorithm = "NUTS",
                             adapt_delta = 0.95,
                             max_treedepth = 11))

## ----fig.width=6, fig.height=2.5----------------------------------------------
ggplot(data = d$posterior_summary$y_hat)+
  facet_wrap(facets = ~sample, nrow = 1, scales = "free")+
  geom_abline(intercept = 0, slope = 1, linetype = "dashed", col = "gray")+
  geom_errorbar(aes(x = y_obs, y = mean, ymin = L95, ymax = H95),
                col = "darkgray", width=0)+
  geom_point(aes(x = y_obs, y = mean), size = 0.8)+
  xlab(label = "observed y")+
  ylab(label = "predicted y (and 95% HDI)")+
  theme_bw(base_size = 10)

## ----fig.width=6, fig.height=7------------------------------------------------
beta <- d$posterior_summary$beta
ggplot(data = beta)+
  facet_wrap(facets = ~sample, ncol = 1)+
  geom_errorbar(aes(x = community, y = mean, ymin = L95, ymax = H95,
                    col = sample), width = 0)+
  geom_point(aes(x = community, y = mean, col = sample), size = 0.5)+
  theme_bw(base_size = 10)+
  theme(legend.position = "top")+
  ylab(label = expression(beta))+
  scale_x_continuous(expand = c(0,0))

## ----fig.width=6, fig.height=7------------------------------------------------
delta <- d$posterior_summary$delta
delta <- delta[delta$contrast %in% c("a-b", "a-c", "b-c"), ]

ggplot(data = delta)+
  facet_wrap(facets = ~contrast, ncol = 1)+
  geom_errorbar(aes(x = community, y = mean, ymin = L95, ymax = H95), width=0)+
  geom_point(aes(x = community, y = mean), size = 0.5)+
  theme_bw(base_size = 10)+
  theme(legend.position = "top")+
  ylab(label = expression(delta))+
  scale_x_continuous(expand = c(0,0))

## ----session_info-------------------------------------------------------------
utils::sessionInfo()