## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set(collapse = FALSE, warning = FALSE) ## ----load_packages, message = FALSE, warning = FALSE-------------------------- # load required packages library(clustSIGNAL) library(scater) library(ggplot2) library(dplyr) library(patchwork) library(aricode) ## ----embryo_data_prep--------------------------------------------------------- # load me_expr containing gene expression logcounts # load me_data containing cell metadata including x-y coordinates data(mEmbryo2) # to create a SpatialExperiment object we need gene expression, cell metadata, # and cell locations. spe <- SpatialExperiment::SpatialExperiment( assays = list(logcounts = me_expr), colData = me_data, # spatialCoordsNames requires column names in me_data that contain # xy-coordinates of cells spatialCoordsNames = c("X", "Y")) spe ## ----embryo_data_columns------------------------------------------------------ spe |> colData() |> colnames() # column names in the metadata ## ----ClustSIGNAL_singleRun---------------------------------------------------- set.seed(100) samples <- "sample_id" # column name containing sample names # to run ClustSIGNAL, requires a SpatialExperiment object, column name of sample # labels in colData slot, and the output type to generate (clusters, neighbours, # and/or final spe object). res_emb <- clustSIGNAL(spe, samples, outputs = "a") ## ----embryo_result_list------------------------------------------------------- res_emb |> names() # names of the outputs generated ## ----embryo_clusters_head----------------------------------------------------- res_emb$clusters |> head() # cluster data frame has cell IDs and cluster labels ## ----embryo_final_spe--------------------------------------------------------- # for convenience with downstream analyses, we will replace the original spe # object with the one generated by ClustSIGNAL. This does not lead to any loss # of information as ClustSIGNAL only adds information to the input spe object. spe <- res_emb$spe_final spe spe |> colData() |> colnames() ## ----colors------------------------------------------------------------------- colors <- c("#635547", "#8EC792", "#9e6762", "#FACB12", "#3F84AA", "#0F4A9C", "#ff891c", "#EF5A9D", "#C594BF", "#DFCDE4", "#139992", "#65A83E", "#8DB5CE", "#005579", "#C9EBFB", "#B51D8D", "#532C8A", "#8870ad", "#cc7818", "#FBBE92", "#EF4E22", "#f9decf", "#c9a997", "#C72228", "#f79083", "#F397C0", "#DABE99", "#c19f70", "#354E23", "#C3C388", "#647a4f", "#CDE088", "#f7f79e", "#F6BFCB", "#7F6874", "#989898", "#1A1A1A", "#FFFFFF", "#e6e6e6", "#77441B", "#F90026", "#A10037", "#DA5921", "#E1C239", "#9DD84A") ## ----embryo_spatialPlots1----------------------------------------------------- # for plotting with scater R package, we need to add the spatial coordinates # to the reduced dimension slot of the spe object reducedDim(spe, "spatial") <- spatialCoords(spe) ## ----embryo_spatialPlots2----------------------------------------------------- # spatial plot spt_clust <- scater::plotReducedDim( spe, colour_by = "ClustSIGNAL", dimred = "spatial", point_alpha = 1, point_size = 4, scattermore = TRUE) + ggtitle("A. Spatial plot of clusters") + scale_color_manual(values = colors) + guides(colour = guide_legend(title = "Clusters", override.aes = list(size = 5))) + theme(text = element_text(size = 12)) ## ----embryo_spatialPlots3----------------------------------------------------- # entropy distribution plotted at cluster-level can indicate which clusters # have cells from homogeneous/heterogeneous space. df_met <- spe |> colData() %>% as.data.frame() ct_ent <- df_met %>% mutate(ClustSIGNAL = as.character(ClustSIGNAL)) %>% group_by(ClustSIGNAL) %>% # calculating median entropy of each cluster category summarise(mdEntropy = median(entropy)) %>% # reordering clusters by their median entropy value arrange(mdEntropy) df_met$ClustSIGNAL <- factor(df_met$ClustSIGNAL, levels = ct_ent$ClustSIGNAL) col_ent <- colors[as.numeric(as.character(ct_ent$ClustSIGNAL))] box_clust <- df_met %>% ggplot(aes(x = ClustSIGNAL, y = entropy, fill = ClustSIGNAL)) + geom_boxplot() + scale_fill_manual(values = col_ent) + ggtitle("B. Entropy distribution of clusters") + labs(x = "ClustSIGNAL clusters", y = "Entropy", name = "Clusters") + theme_classic() + theme(legend.position = "none", text = element_text(size = 12), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1), plot.title = element_text(face = "bold")) ## ----embryo_spatialPlots4----------------------------------------------------- spt_clust + box_clust + patchwork::plot_layout(guides = "collect", widths = c(2, 3)) ## ----embryo_clusterMetrics---------------------------------------------------- # to assess the accuracy of clustering, the cluster labels are often compared to # prior annotations. Here, we compare ClustSIGNAL cluster labels to annotations # available with this public data. spe |> colData() %>% as.data.frame() %>% summarise( ARI = aricode::ARI(celltype_mapped_refined, ClustSIGNAL), # calculate ARI NMI = aricode::NMI(celltype_mapped_refined, ClustSIGNAL)) # calculate NMI ## ----embryo_entropyMetrics---------------------------------------------------- # we can assess the overall entropy distribution of the dataset spe |> colData() %>% as.data.frame() %>% summarise(min_Entropy = min(entropy), min_Entropy_count = sum(spe$entropy == 0), max_Entropy = max(entropy), mean_Entropy = mean(entropy)) ## ----entropyPlots1------------------------------------------------------------ # we can also visualize the distribution and spread of the entropy values hst_ent <- spe |> colData() %>% as.data.frame() %>% ggplot(aes(entropy)) + geom_histogram(binwidth = 0.05) + ggtitle("A. Entropy spread") + labs(x = "Entropy", y = "Number of neighbourhoods") + theme_classic() + theme(text = element_text(size = 12), plot.title = element_text(face = "bold")) ## ----entropyPlots2------------------------------------------------------------ spt_ent <- scater::plotReducedDim(spe, colour_by = "entropy", # specify spatial low dimension dimred = "spatial", point_alpha = 1, point_size = 4, scattermore = TRUE) + ggtitle("B. Entropy spatial distribution") + scale_colour_gradient2("Entropy", low = "grey", high = "blue") + scale_size_continuous(range = c(0, max(spe$entropy))) + theme(text = element_text(size = 12)) ## ----entropyPlots3------------------------------------------------------------ hst_ent + spt_ent ## ----hypothal_data_prep------------------------------------------------------- # load mh_expr containing gene expression logcounts # load mh_data containing cell metadata and cell x-y coordinates data(mHypothal) # create spe object using gene expression, cell metadata, and cell locations spe2 <- SpatialExperiment(assays = list(logcounts = mh_expr), colData = mh_data, # spatialCoordsNames requires column names in # mh_data that contain xy-coordinates of cells spatialCoordsNames = c("X", "Y")) spe2 ## ----hypothal_data_columns---------------------------------------------------- spe2 |> colData() |> str() # metadata summary ## ----ClustSIGNAL_multiRun----------------------------------------------------- set.seed(110) # ClustSIGNAL can be run on a dataset with multiple samples. As before, we need # the SpatialExperiment object and column name of sample labels in the object. # The method can be run in parallel through the threads option. Here we use # thread = 4 to use 4 cores. # Since no batch effects were observed in this data subset, we have not used # the batch and batch_by options. samples <- "samples" # column name containing sample names res_hyp <- clustSIGNAL(spe2, samples, threads = 4, outputs = "a") ## ----hypothal_final_spe------------------------------------------------------- # for convenience with downstream analyses, we replace the original spe object # with the one generated by ClustSIGNAL. spe2 <- res_hyp$spe_final spe2 ## ----hypothal_samples--------------------------------------------------------- samplesList <- spe2[[samples]] |> levels() # get sample names samplesList ## ----hypothal_clusterMetrics-------------------------------------------------- spe2 |> colData() %>% as.data.frame() %>% group_by(samples) %>% summarise( # Comparing ClustSIGNAL cluster labels to annotations available with the # public data to assess its accuracy. ARI = aricode::ARI(Cell_class, ClustSIGNAL), NMI = aricode::NMI(Cell_class, ClustSIGNAL), # Assessing the overall entropy distribution of the samples in the dataset. min_Entropy = min(entropy), min_Entropy_count = sum(entropy == 0), max_Entropy = max(entropy), mean_Entropy = mean(entropy)) ## ----hypothal_spatialPlots1--------------------------------------------------- # for plotting with scater R package, we need to add the spatial coordinates # to the reduced dimension section reducedDim(spe2, "spatial") <- spatialCoords(spe2) ## ----hypothal_spatialPlots2--------------------------------------------------- # spatial plot - ClustSIGNAL clusters spt_clust2 <- scater::plotReducedDim(spe2, colour_by = "ClustSIGNAL", # specify spatial low dimension dimred = "spatial", point_alpha = 1, point_size = 4, scattermore = TRUE) + scale_color_manual(values = colors) + facet_wrap(vars(spe2[[samples]]), scales = "free", nrow = 1) + guides(colour = guide_legend(title = "Clusters", override.aes = list(size = 3))) + theme(text = element_text(size = 12)) ## ----hypothal_spatialPlots3--------------------------------------------------- # For visualising cluster-level entropy distribution, we reorder the clusters # by their median entropy value in each sample df_met2 <- spe2 |> colData() %>% as.data.frame() box_clust2 <- list() for (s in samplesList) { df_met_sub <- df_met2[df_met2[[samples]] == s, ] # calculating median entropy of each cluster in a sample ct_ent2 <- df_met_sub %>% mutate(ClustSIGNAL = as.character(ClustSIGNAL)) %>% group_by(ClustSIGNAL) %>% summarise(mdEntropy = median(entropy)) %>% # reordering clusters by their median entropy arrange(mdEntropy) df_met_sub$ClustSIGNAL <- factor(df_met_sub$ClustSIGNAL, levels = ct_ent2$ClustSIGNAL) # box plot of cluster entropy col_ent2 <- colors[as.numeric(ct_ent2$ClustSIGNAL)] box_clust2[[s]] <- df_met_sub %>% ggplot(aes(x = ClustSIGNAL, y = entropy, fill = ClustSIGNAL)) + geom_boxplot() + scale_fill_manual(values = col_ent2) + facet_wrap(vars(samples), nrow = 1) + labs(x = "ClustSIGNAL clusters", y = "Entropy") + ylim(0, NA) + theme_classic() + theme(strip.text = element_blank(), legend.position = "none", text = element_text(size = 12), axis.text.x = element_text(angle = 90, vjust = 0.5)) } ## ----hypothal_spatialPlots4--------------------------------------------------- spt_clust2 / (patchwork::wrap_plots(box_clust2[1:3], nrow = 1) + plot_layout(axes = "collect")) + plot_layout(guides = "collect", heights = c(5, 3)) + plot_annotation( title = "Spatial (top) and entropy (bottom) distributions of clusters", theme = theme(plot.title = element_text(hjust = 0.5, face = "bold"))) ## ----hypothal_entropyPlots1--------------------------------------------------- hst_ent2 <- spe2 |> colData() %>% as.data.frame() %>% ggplot(aes(entropy)) + geom_histogram(binwidth = 0.05) + facet_wrap(vars(samples), nrow = 1) + labs(x = "Entropy", y = "Number of neighbourhoods") + theme_classic() + theme(text = element_text(size = 12)) ## ----hypothal_entropyPlots2--------------------------------------------------- spt_ent2 <- scater::plotReducedDim(spe2, colour_by = "entropy", # specify spatial low dimension dimred = "spatial", point_alpha = 1, point_size = 4, scattermore = TRUE) + scale_colour_gradient2("Entropy", low = "grey", high = "blue") + scale_size_continuous(range = c(0, max(spe2$entropy))) + facet_wrap(vars(spe2[[samples]]), scales = "free", nrow = 1) + theme(strip.text = element_blank(), text = element_text(size = 12)) ## ----hypothal_entropyPlots3--------------------------------------------------- hst_ent2 / spt_ent2 + plot_layout(heights = c(4, 5)) + plot_annotation( title = "Entropy spread (top) and spatial distribution (bottom)", theme = theme(plot.title = element_text(hjust = 0.5, face = "bold"))) ## ----ClustSIGNALseq_data------------------------------------------------------ # load logcounts and metadata to the environment data(mEmbryo2) # as before, we read the data into a SpatialExperiment object spe <- SpatialExperiment(assays = list(logcounts = me_expr), colData = me_data, spatialCoordsNames = c("X", "Y")) ## ----ClustSIGNALseq_prep------------------------------------------------------ set.seed(100) # first we need to generate low dimension data for initial clustering spe <- scater::runPCA(spe) ## ----ClustSIGNALseq_step1----------------------------------------------------- spe <- clustSIGNAL::p1_clustering(spe, dimRed = "PCA") ## ----ClustSIGNALseq_step1_out1------------------------------------------------ spe$initCluster |> head() # clustering output ## ----ClustSIGNALseq_step1_out2------------------------------------------------ spe$initSubcluster |> head() # subclustering output ## ----ClustSIGNALseq_step2----------------------------------------------------- # This step generates a list of neighbourhood information. outReg <- clustSIGNAL::neighbourDetect(spe, samples = "sample_id") ## ----ClustSIGNALseq_step2_out1------------------------------------------------ outReg$nnCells[1:3, 1:3] ## ----ClustSIGNALseq_step2_out2------------------------------------------------ outReg$regXclust[[1]] ## ----ClustSIGNALseq_step3----------------------------------------------------- spe <- clustSIGNAL::entropyMeasure(spe, outReg$regXclust) ## ----ClustSIGNALseq_step3_out------------------------------------------------- spe$entropy |> head() # entropy values ## ----ClustSIGNALseq_step4----------------------------------------------------- spe <- clustSIGNAL::adaptiveSmoothing(spe, outReg$nnCells) ## ----ClustSIGNALseq_step4_out------------------------------------------------- assay(spe, "smoothed")[1:5, 1:3] ## ----ClustSIGNALseq_step5----------------------------------------------------- spe <- clustSIGNAL::p2_clustering(spe) ## ----ClustSIGNALseq_step5_out------------------------------------------------- spe$ClustSIGNAL |> head() # ClustSIGNAL cluster labels ## ----------------------------------------------------------------------------- sessionInfo()