## ---- eval = FALSE------------------------------------------------------------
#  pkgs <- c("cellxgenedp", "zellkonverter", "SingleCellExperiment", "HDF5Array")
#  required_pkgs <- pkgs[!pkgs %in% rownames(installed.packages())]
#  BiocManager::install(required_pkgs)

## ---- eval = FALSE------------------------------------------------------------
#  pkgs <- c(
#      "mtmorgan/cellxgenedp", "zellkonverter", "SingleCellExperiment", "HDF5Array"
#  )

## -----------------------------------------------------------------------------
suppressPackageStartupMessages({
    library(zellkonverter)
    library(SingleCellExperiment) # load early to avoid masking dplyr::count()
    library(dplyr)
    library(cellxgenedp)
})

## ---- eval = FALSE------------------------------------------------------------
#  cxg()

## -----------------------------------------------------------------------------
db <- db()

## -----------------------------------------------------------------------------
db

## -----------------------------------------------------------------------------
collections(db)

datasets(db)

files(db)

## -----------------------------------------------------------------------------
collection_with_most_datasets <-
    datasets(db) |>
    count(collection_id, sort = TRUE) |>
    slice(1)

## -----------------------------------------------------------------------------
left_join(
    collection_with_most_datasets |> select(collection_id),
    collections(db),
    by = "collection_id"
) |> glimpse()

## -----------------------------------------------------------------------------
left_join(
    collection_with_most_datasets |> select(collection_id),
    datasets(db),
    by = "collection_id"
)

## -----------------------------------------------------------------------------
datasets(db) |>
    select(where(is.list))

## -----------------------------------------------------------------------------
facets(db, "assay")
facets(db, "ethnicity")
facets(db, "sex")

## -----------------------------------------------------------------------------
african_american_female <-
    datasets(db) |>
    filter(
        facets_filter(assay, "ontology_term_id", "EFO:0009922"),
        facets_filter(ethnicity, "label", "African American"),
        facets_filter(sex, "label", "female")
    )

## -----------------------------------------------------------------------------
african_american_female |>
    summarise(total_cell_count = sum(cell_count))

## -----------------------------------------------------------------------------
## collections
left_join(
    african_american_female |> select(collection_id) |> distinct(),
    collections(db),
    by = "collection_id"
)

## -----------------------------------------------------------------------------
selected_files <-
    left_join(
        african_american_female |> select(dataset_id),
        files(db),
        by = "dataset_id"
    )
selected_files

## ---- eval = FALSE------------------------------------------------------------
#  selected_files |>
#      filter(filetype == "CXG") |>
#      slice(1) |> # visualize a single dataset
#      datasets_visualize()

## -----------------------------------------------------------------------------
local_file <-
    selected_files |>
    filter(
        dataset_id == "3de0ad6d-4378-4f62-b37b-ec0b75a50d94",
        filetype == "H5AD"
    ) |>
    files_download(dry.run = FALSE)
basename(local_file)

## -----------------------------------------------------------------------------
h5ad <- readH5AD(local_file, reader = "R", use_hdf5 = TRUE)
h5ad

## -----------------------------------------------------------------------------
h5ad |>
    colData(h5ad) |>
    as_tibble() |>
    count(sex, donor_id)

## ----sessionInfo, echo=FALSE--------------------------------------------------
sessionInfo()