## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>",
    fig.height = 5,
    fig.width = 5,
    fig.align = "center"
)

## -----------------------------------------------------------------------------
library(ClustIRR)
library(knitr)
library(visNetwork)

## ----eval=FALSE---------------------------------------------------------------
#  if(!require("BiocManager", quietly = TRUE)) {
#    install.packages("BiocManager")
#  }
#  BiocManager::install("ClustIRR")

## ----graphic, echo = FALSE, fig.align="left", out.width='90%'-----------------
knitr::include_graphics("../inst/extdata/logo.png")

## -----------------------------------------------------------------------------
data("CDR3ab")

## -----------------------------------------------------------------------------
# take the first 500 CDR3b sequences from the data -> s
s <- data.frame(CDR3b = c(CDR3ab$CDR3b[1:500], "CASSSSPGDQEQFF"),
                clone_size = 1)

# take 5000 CDR3b sequences 501:5500 from the data -> r
r <- data.frame(CDR3b = CDR3ab$CDR3b[501:5500],
                clone_size = 1)

## -----------------------------------------------------------------------------
str(s)

## -----------------------------------------------------------------------------
str(r)

## -----------------------------------------------------------------------------
cdr3 <- "CASSTTTGTGELFF"
ks <- 4
colnames(stringdist::qgrams(cdr3, q = ks))

## -----------------------------------------------------------------------------
t <- 3
cdr3_trimmed <- substr(x = cdr3, start = t + 1, stop = nchar(cdr3) - t)
colnames(stringdist::qgrams(cdr3_trimmed, q = ks))

## -----------------------------------------------------------------------------
# insert motif LEAR
substr(x = s$CDR3b[1:20], start = 6, stop = 9) <- "LEAR"

## -----------------------------------------------------------------------------
o <- cluster_irr(s = s, r = r)

## -----------------------------------------------------------------------------
# extract local motifs
local_motifs <- get_clustirr_clust(o)$CDR3b$local$m

# display only passed motifs
knitr::kable(local_motifs[local_motifs$pass == TRUE, ], row.names = FALSE)

## -----------------------------------------------------------------------------
# display globally similar pairs
knitr::kable(get_clustirr_clust(o)$CDR3b$global, row.names = FALSE)

## ----fig.width=5, fig.height=4, fig.align='center'----------------------------
par(mai = c(0,0,0,0))
plot_graph(clust_irr = o)

## ----fig.width=5, fig.height=4, fig.align='center'----------------------------
plot_graph(clust_irr = o, as_visnet = TRUE)

## ----fig.width=5, fig.height=4, fig.align='center'----------------------------
plot_graph(clust_irr = o, as_visnet = TRUE, show_singletons = FALSE)

## -----------------------------------------------------------------------------
# create a clone of 10 T-cells
clone <- data.frame(CDR3b = "CATSRPDGLAQYF", clone_size = 10)

# append the clone to the original repertoire 's'
s <- rbind(s, clone)

## -----------------------------------------------------------------------------
o <- cluster_irr(s = s,
                 r = r,
                 ks = 4,
                 cores = 1,
                 control = list(global_smart = FALSE,
                                trim_flank_aa = 3))

## ----fig.align='center'-------------------------------------------------------
# extract local motifs
local_motifs <- get_clustirr_clust(o)$CDR3b$local$m

# display only passed motifs
knitr::kable(local_motifs[local_motifs$pass == TRUE, ], row.names = FALSE)

## -----------------------------------------------------------------------------
plot_graph(clust_irr = o, as_visnet = TRUE)

## -----------------------------------------------------------------------------
data("CDR3ab")

S0 <- data.frame(CDR3a = CDR3ab$CDR3a[3001:3100],
                 CDR3b = CDR3ab$CDR3b[3001:3100],
                 clone_size = c(50, 10, rep(1, times = 98)))

S1 <- data.frame(CDR3a = CDR3ab$CDR3a[5001:5200],
                 CDR3b = CDR3ab$CDR3b[5001:5200],
                 clone_size = c(20, rep(1, times = 199)))

# create a clone of 15 T-cells
clone <- data.frame(CDR3a = "CASSQPGTDHGYTF",
                    CDR3b = "CASSPQGREATGELFF", 
                    clone_size = 15)

# append the clone to the original repertoire 'S0'
S0 <- rbind(S0, clone)

# insert motif LEAR into S0
substr(x = S0$CDR3b[1:20], start = 6, stop = 9) <- "LEAR"

# insert motif WWWW  into S1
substr(x = S1$CDR3b[1], start = 5, stop = 8) <- "WWWW"

## -----------------------------------------------------------------------------
clust_irrs <- vector(mode = "list", length = 2)
names(clust_irrs) <- c("S1", "S0")

clust_irrs[[1]] <- cluster_irr(s = S1, r = S0,
                               control = list(global_smart = FALSE,
                                              trim_flank_aa = 3))

## -----------------------------------------------------------------------------
clust_irrs[[2]] <- cluster_irr(s = S0, r = S1,
                               control = list(global_smart = FALSE,
                                              trim_flank_aa = 3))

## -----------------------------------------------------------------------------
# beta & alpha chain
plot_joint_graph(clust_irrs = clust_irrs, cores = 1, as_visnet = TRUE)

## ----eval=F-------------------------------------------------------------------
#  data("CDR3ab")
#  
#  # gamma/delta chain TCR data -> notice CDR3g and CDR3d columns of 's' and 'r'
#  s <- data.frame(CDR3g = CDR3ab$CDR3a[4501:5000],
#                        CDR3d = CDR3ab$CDR3b[4501:5000])
#  
#  r <- data.frame(CDR3g = CDR3ab$CDR3a[5001:10000],
#                        CDR3d = CDR3ab$CDR3b[5001:10000])
#  

## ----eval=F-------------------------------------------------------------------
#  data("CDR3ab")
#  
#  # heavy/light chain BCR data -> notice CDR3h and CDR3l columns of 's' and 'r'
#  s <- data.frame(CDR3h = CDR3ab$CDR3a[4501:5000],
#                        CDR3l = CDR3ab$CDR3b[4501:5000])
#  
#  r <- data.frame(CDR3h = CDR3ab$CDR3a[5001:10000],
#                        CDR3l = CDR3ab$CDR3b[5001:10000])
#  

## -----------------------------------------------------------------------------
data("CDR3ab")

P1_A <- data.frame(CDR3a = CDR3ab$CDR3a[3001:3100],
                   CDR3b = CDR3ab$CDR3b[3001:3100],
                   clone_size = rpois(n = 100, lambda = 1)+1)

P1_B <- data.frame(CDR3a = CDR3ab$CDR3a[c(3100, 3101:3200)],
                   CDR3b = CDR3ab$CDR3b[c(3100, 3101:3200)],
                   clone_size = rpois(n = 101, lambda = 1)+1)

# clone_size column -> poisson distributed

## -----------------------------------------------------------------------------
P2_A <- data.frame(CDR3a = CDR3ab$CDR3a[c(3100, 4001:4100)],
                   CDR3b = CDR3ab$CDR3b[c(3100, 4001:4100)])

P2_B <- data.frame(CDR3a = CDR3ab$CDR3a[c(3100, 4001:4100)],
                   CDR3b = CDR3ab$CDR3b[c(3100, 4001:4100)])

# no clone_size column -> default clone_size = 1

## -----------------------------------------------------------------------------
clust_irrs <- vector(mode = "list", length = 4)
names(clust_irrs) <- c("P1_A", "P1_B", "P2_A", "P2_B")

clust_irrs[[1]] <- cluster_irr(s = P1_A, r = NULL)
clust_irrs[[2]] <- cluster_irr(s = P1_B, r = NULL)
clust_irrs[[3]] <- cluster_irr(s = P2_A, r = NULL)
clust_irrs[[4]] <- cluster_irr(s = P2_B, r = NULL)

## ----fig.width=6, fig.height=6------------------------------------------------
# beta & alpha chain
plot_joint_graph(clust_irrs = clust_irrs, 
                 as_visnet = TRUE, 
                 show_singletons = T)

## ----session_info-------------------------------------------------------------
utils::sessionInfo()