## ----setup, include = FALSE------------------------------------------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", crop = NULL ## Related to https://stat.ethz.ch/pipermail/bioc-devel/2020-April/016656.html ) ## ----vignetteSetup, echo=FALSE, message=FALSE, warning = FALSE-------------------------------------------------------- ## Bib setup library("RefManageR") ## Write bibliography information bib <- c( R = citation(), BiocStyle = citation("BiocStyle")[1], knitr = citation("knitr")[1], RefManageR = citation("RefManageR")[1], rmarkdown = citation("rmarkdown")[1], sessioninfo = citation("sessioninfo")[1], testthat = citation("testthat")[1], DeconvoBuddies = citation("DeconvoBuddies")[1] ) ## ----"install", eval = FALSE------------------------------------------------------------------------------------------ # if (!requireNamespace("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # # BiocManager::install("DeconvoBuddies") # # ## Check that you have a valid Bioconductor installation # BiocManager::valid() ## ----"load_packages", message=FALSE, warning=FALSE-------------------------------------------------------------------- ## Packages for different types of RNA-seq data structures in R library("SingleCellExperiment") ## For downloading data library("spatialLIBD") ## Other helper packages for this vignette library("dplyr") library("ggplot2") ## Our main package library("DeconvoBuddies") ## ----"load snRNA-seq"------------------------------------------------------------------------------------------------- ## Use spatialLIBD to fetch the snRNA-seq dataset sce_path_zip <- fetch_deconvo_data("sce") ## unzip and load the data sce_path <- unzip(sce_path_zip, exdir = tempdir()) sce <- HDF5Array::loadHDF5SummarizedExperiment( file.path(tempdir(), "sce_DLPFC_annotated") ) # lobstr::obj_size(sce) # 172.28 MB ## exclude Ambiguous cell type sce <- sce[, sce$cellType_broad_hc != "Ambiguous"] sce$cellType_broad_hc <- droplevels(sce$cellType_broad_hc) ## Check the broad cell type distribution table(sce$cellType_broad_hc) ## We're going to subset to the first 5k genes to save memory ## In a real application you'll want to use the full dataset sce <- sce[seq_len(5000), ] ## check the final dimensions of the dataset dim(sce) ## ----"Run get_mean_ratio"--------------------------------------------------------------------------------------------- # calculate the Mean Ratio of genes for each cell type in sce marker_stats_MeanRatio <- get_mean_ratio( sce = sce, # sce is the SingleCellExperiment with our data assay_name = "logcounts", ## assay to use, we recommend logcounts [default] cellType_col = "cellType_broad_hc", # column in colData with cell type info gene_ensembl = "gene_id", # column in rowData with ensembl gene ids gene_name = "gene_name" # column in rowData with gene names/symbols ) ## ----"Explore get_mean_ratio output"---------------------------------------------------------------------------------- ## Explore the tibble output marker_stats_MeanRatio ## genes with the highest MeanRatio are the best marker genes for each cell type marker_stats_MeanRatio |> filter(MeanRatio.rank == 1) ## ----`Run findMarkers_1vALL`------------------------------------------------------------------------------------------ ## Run 1vALL DE to find markers for each cell type - takes 5min+ # marker_stats_1vAll <- findMarkers_1vAll( # sce = sce, # sce is the SingleCellExperiment with our data # assay_name = "counts", # cellType_col = "cellType_broad_hc", # column in colData with cell type info # mod = "~BrNum" # Control for donor stored in "BrNum" with mod # ) ## load 1vAll data to save time, data is equivalent to the above code data("marker_stats_1vAll") ## ----"Explore findMarkers_1vALL output"------------------------------------------------------------------------------- ## Explore the tibble output marker_stats_1vAll ## genes with the highest MeanRatio are the best marker genes for each cell type marker_stats_1vAll |> filter(std.logFC.rank == 1) ## ----"volcano plots"-------------------------------------------------------------------------------------------------- # Create volcano plots of DE stats from 1vALL marker_stats_1vAll |> ggplot(aes(logFC, -log.p.value)) + geom_point() + facet_wrap(~cellType.target) + geom_vline(xintercept = c(1, -1), linetype = "dashed", color = "red") ## ----"join marker_stats"---------------------------------------------------------------------------------------------- ## join the two marker_stats tables marker_stats <- marker_stats_MeanRatio |> left_join(marker_stats_1vAll, by = join_by(gene, cellType.target)) ## Check stats for our top genes marker_stats |> filter(MeanRatio.rank == 1) |> select(gene, cellType.target, MeanRatio, MeanRatio.rank, std.logFC, std.logFC.rank) ## ----"hockey stick plots"--------------------------------------------------------------------------------------------- # create hockey stick plots to compare MeanRatio and standard logFC values. marker_stats |> ggplot(aes(MeanRatio, std.logFC)) + geom_point() + facet_wrap(~cellType.target) ## ----"plot_gene_express"---------------------------------------------------------------------------------------------- ## plot expression of two genes from a list plot_gene_express( sce = sce, category = "cellType_broad_hc", genes = c("SLC2A1", "CREG2") ) ## ----"plot_marker_express MeanRatio"---------------------------------------------------------------------------------- # plot the top 10 MeanRatio genes for Excit plot_marker_express( sce = sce, stats = marker_stats, cell_type = "Excit", n_genes = 10, cellType_col = "cellType_broad_hc" ) ## ----"plot_marker_express logFC"-------------------------------------------------------------------------------------- # plot the top 10 1vAll genes for Excit plot_marker_express( sce = sce, stats = marker_stats, cell_type = "Excit", n_genes = 10, rank_col = "std.logFC.rank", ## use logFC cols from 1vALL anno_col = "std.logFC.anno", cellType_col = "cellType_broad_hc" ) ## ----plot_marker_express_ALL, eval = FALSE---------------------------------------------------------------------------- # # plot the top 10 1vAll genes for all cell types # print(plot_marker_express_ALL( # sce = sce, # stats = marker_stats, # n_genes = 10, # cellType_col = "cellType_broad_hc" # )) ## ----reproduce3, echo=FALSE------------------------------------------------------------------------------------------- ## Session info library("sessioninfo") options(width = 120) session_info() ## ----vignetteBiblio, results = "asis", echo = FALSE, warning = FALSE, message = FALSE--------------------------------- ## Print bibliography PrintBibliography(bib, .opts = list(hyperlink = "to.doc", style = "html"))