## ----setup, echo = FALSE, warning = FALSE-------------------------------------
suppressMessages(library(knitr))
suppressMessages(library(GenomicRanges))
suppressMessages(library(limma))
suppressMessages(library(minfi))
suppressMessages(library(ExperimentHub))
suppressMessages(library(recountmethylation))
knitr::opts_chunk$set(echo = TRUE, eval = TRUE, warning = FALSE, 
  message = FALSE)

## -----------------------------------------------------------------------------
dft <- data.frame(release = c("first", "second", "third", "total"),
                  version.label = c("0.0.1", "0.0.2", "0.0.3", "all"),
                  date = c("11/20/2020", "01/06/2021", "12/21/2022", "12/21/2022"),
                  hm450k.samples = c(35360, 50400, 7546, 
                                     sum(c(35360, 50400, 7546))),
                  epic.samples = c(0, 12650, 25472, 
                                   sum(c(0, 12650, 25472))))
dft$combined.samples <- dft$hm450k.samples + dft$epic.samples
knitr::kable(dft, align = "c")

## ----echo = TRUE, message = TRUE----------------------------------------------
sm <- as.data.frame(smfilt(get_servermatrix()))
if(is(sm, "data.frame")){knitr::kable(sm, align = "c")}

## ----eval = F-----------------------------------------------------------------
#  cache.path <- tools::R_user_dir("recountmethylation")
#  setExperimentHubOption("CACHE", cache.path)
#  hub <- ExperimentHub::ExperimentHub()                    # connect to the hubs
#  rmdat <- AnnotationHub::query(hub, "recountmethylation") # query the hubs

## ----eval = F-----------------------------------------------------------------
#  fpath <- rmdat[["EH3778"]] # download with default caching
#  rhdf5::h5ls(fpath)         # load the h5 file

## -----------------------------------------------------------------------------
dn <- "remethdb-h5se_gr-test_0-0-1_1590090412"
path <- system.file("extdata", dn, package = "recountmethylation")
h5se.test <- HDF5Array::loadHDF5SummarizedExperiment(path)

## -----------------------------------------------------------------------------
class(h5se.test) # inspect object class

## -----------------------------------------------------------------------------
dim(h5se.test) # get object dimensions

## -----------------------------------------------------------------------------
summary(h5se.test) # summarize dataset components

## -----------------------------------------------------------------------------
h5se.md <- minfi::pData(h5se.test) # get sample metadata
dim(h5se.md)                       # get metadata dimensions

## -----------------------------------------------------------------------------
colnames(h5se.md) # get metadata column names

## -----------------------------------------------------------------------------
h5se.bm <- minfi::getBeta(h5se.test) # get dnam fractions
dim(h5se.bm)                         # get dnam fraction dimensions

## -----------------------------------------------------------------------------
colnames(h5se.bm) <- h5se.test$gsm       # assign sample ids to dnam fractions
knitr::kable(head(h5se.bm), align = "c") # show table of dnam fractions 

## -----------------------------------------------------------------------------
an <- minfi::getAnnotation(h5se.test) # get platform annotation
dim(an)                               # get annotation dimensions

## -----------------------------------------------------------------------------
colnames(an) # get annotation column names

## -----------------------------------------------------------------------------
ant <- as.matrix(t(an[c(1:4), c(1:3, 5:6, 9, 19, 24, 26)])) # subset annotation
knitr::kable(ant, align = "c")                              # show annotation table

## -----------------------------------------------------------------------------
dn <- "remethdb-h5_rg-test_0-0-1_1590090412.h5"     # get the h5se directory name
h5.test <- system.file("extdata", "h5test", dn, 
                    package = "recountmethylation") # get the h5se dir path

## -----------------------------------------------------------------------------
h5.rg <- getrg(dbn = h5.test, all.gsm = TRUE) # get red/grn signals from an h5 db

## -----------------------------------------------------------------------------
h5.red <- minfi::getRed(h5.rg)     # get red signal matrix
h5.green <- minfi::getGreen(h5.rg) # get grn signal matrix
dim(h5.red)                        # get dimensions of red signal matrix

## -----------------------------------------------------------------------------
knitr::kable(head(h5.red), align = "c") # show first rows of red signal matrix

## -----------------------------------------------------------------------------
knitr::kable(head(h5.green), align = "c") # show first rows of grn signal matrix

## -----------------------------------------------------------------------------
identical(rownames(h5.red), rownames(h5.green)) # check cpg probe names identical

## ----eval = FALSE-------------------------------------------------------------
#  # download from GEO
#  dlpath <- tempdir()                                     # get a temp dir path
#  gsmv <- c("GSM1038308", "GSM1038309")                   # set sample ids to identify
#  geo.rg <- gds_idat2rg(gsmv, dfp = dlpath)               # load sample idats into rgset
#  colnames(geo.rg) <- gsub("\\_.*", "", colnames(geo.rg)) # assign sample ids to columns

## ----eval = FALSE-------------------------------------------------------------
#  geo.red <- minfi::getRed(geo.rg)      # get red signal matrix
#  geo.green <- minfi::getGreen(geo.rg)  # get grn signal matrix

## ----eval = FALSE-------------------------------------------------------------
#  int.addr <- intersect(rownames(geo.red), rownames(h5.red)) # get probe address ids
#  geo.red <- geo.red[int.addr,]                              # subset geo rgset red signal
#  geo.green <- geo.green[int.addr,]                          # subset gro rgset grn signal
#  geo.red <- geo.red[order(match(rownames(geo.red), rownames(h5.red))),]
#  geo.green <- geo.green[order(match(rownames(geo.green), rownames(h5.green))),]
#  identical(rownames(geo.red), rownames(h5.red))             # check identical addresses, red
#  identical(rownames(geo.green), rownames(h5.green))         # check identical addresses, grn
#  class(h5.red) <- "integer"; class(h5.green) <- "integer"   # set matrix data classes to integer

## ----eval = FALSE-------------------------------------------------------------
#  identical(geo.red, h5.red) # compare matrix signals, red

## ----eval = FALSE-------------------------------------------------------------
#  identical(geo.green, h5.green) # compare matrix signals, grn

## ----eval = FALSE-------------------------------------------------------------
#  geo.gr <- minfi::preprocessNoob(geo.rg) # get normalized se data

## ----eval = FALSE-------------------------------------------------------------
#  geo.bm <- as.matrix(minfi::getBeta(geo.gr)) # get normalized dnam fractions matrix

## ----eval = FALSE-------------------------------------------------------------
#  h5se.bm <- as.matrix(h5se.bm) # set dnam fractions to matrix
#  int.cg <- intersect(rownames(geo.bm), rownames(h5se.bm))
#  geo.bm <- geo.bm[int.cg,]     # subset fractions on shared probe ids
#  geo.bm <- geo.bm[order(match(rownames(geo.bm), rownames(h5se.bm))),]

## ----eval = FALSE-------------------------------------------------------------
#  identical(summary(geo.bm), summary(h5se.bm)) # check identical summary values

## ----eval = FALSE-------------------------------------------------------------
#  identical(rownames(geo.bm), rownames(h5se.bm)) # check identical probe ids

## ----eval = FALSE-------------------------------------------------------------
#  detectionP(rg[,1:50]) # get detection pvalues from rgset
#  "Error in .local(Red, Green, locusNames, controlIdx, TypeI.Red, TypeI.Green, dim(Red_grid) == dim(detP_sink_grid) are not all TRUE"

## ----eval = FALSE-------------------------------------------------------------
#  preprocessFunnorm(rg) # get noob-normalized data
#  "Error: 'preprocessFunnorm()' only supports matrix-backed minfi objects.""

## ----eval = FALSE-------------------------------------------------------------
#  rg.h5se <- loadHDF5SummarizedExperiment(rg.path)        # full h5se RGChannelSet
#  rg.sub <- rg.h5se[,c(1:20)]                             # subset samples of interest
#  rg.new <- RGChannelSet(Red = getRed(rg.sub),
#                         Green = getGreen(rg.sub),
#                         annotation = annotation(rg.sub)) # re-make as non-DA object
#  gr <- preprocessFunnorm(rg.new)                         # repeat preprocessing

## ----get_sessioninfo----------------------------------------------------------
sessionInfo()