## ----include=FALSE------------------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

## ----install-package, eval=FALSE----------------------------------------------
#  if (!require("BiocManager", quietly = TRUE)) {
#      install.packages("BiocManager")
#  }
#  
#  BiocManager::install("PIUMA")

## ----chunk-1,eval=FALSE-------------------------------------------------------
#  #############################################
#  ############# NOT TO EXECUTE ################
#  ########## please skip this chunk ###########
#  #############################################
#  
#  
#  dataset_seu <- readRDS("./GSE193346_CD1_seurat_object.rds")
#  
#  # subset vascular endothelial cells
#  vascularEC_seuobj <- subset(x = dataset_seu,
#                              subset = markFinal == "vascular_ec")
#  df_data_counts <- vascularEC_seuobj@assays$RNA@counts
#  df_cl <- as.data.frame(df_data_counts)
#  meta_cl <- vascularEC_seuobj@meta.data[, c(10,13,14,15)]
#  meta_cl[sapply(meta_cl, is.character)] <- lapply(meta_cl[sapply(meta_cl,
#                                                                  is.character)],
#                                                   as.factor)
#  
#  ## Filtering and normalization
#  colnames(meta_cl)[4] <- "class"
#  SE <- DaMiR.makeSE(df_cl, meta_cl)
#  data_norm <- DaMiR.normalization(SE,
#                                   type = "vst",
#                                   minCounts = 3,
#                                   fSample = 0.4,
#                                   hyper = "no")
#  vascEC_norm <- round(t(assay(data_norm)), 2)
#  vascEC_meta <- meta_cl[, c(3,4), drop=FALSE]
#  df_TDA <- cbind(vascEC_meta, vascEC_norm)
#  

## ----chunk-2,warning=FALSE----------------------------------------------------
library(PIUMA)
library(ggplot2)
data(vascEC_norm)
data(vascEC_meta)

df_TDA <- cbind(vascEC_meta, vascEC_norm)

dim(df_TDA)
head(df_TDA[1:5, 1:7])

## ----chunk-3,warning=FALSE----------------------------------------------------
TDA_obj <- makeTDAobj(df_TDA, c("stage","zone"))


## ----chunk-3_1,warning=FALSE, eval=FALSE--------------------------------------
#  data("vascEC_meta")
#  data("vascEC_norm")
#  
#  dataSE <- SummarizedExperiment(assays=as.matrix(t(vascEC_norm)),
#                                 colData=as.data.frame(vascEC_meta))
#  TDA_obj <- makeTDAobjFromSE(dataSE, c("stage","zone"))
#  

## ----chunk-4, fig.width=10, fig.height=10,warning=FALSE, fig.cap = "Scatterplot from UMAP. Four scatter plots are drawn, using the first 2 components identified by UMAP. Each panel represents cells belonging to a specific heart chamber, while colors refer to the development stage."----
set.seed(1)

# calculate the distance matrix
TDA_obj <- dfToDistance(TDA_obj, distMethod = "euclidean")

# calculate the projections (lenses)
TDA_obj <- dfToProjection(TDA_obj,
                      "UMAP",
                      nComp = 2,
                      umapNNeigh = 25,
                      umapMinDist = 0.3,
                      showPlot = FALSE)

# plot point-cloud based on stage and zone
df_plot <- as.data.frame(cbind(getOutcomeFact(TDA_obj),
                                getComp(TDA_obj)), 
                         stringAsFactor = TRUE)

ggplot(data= df_plot, aes(x=comp1, y=comp2, color=stage))+
  geom_point(size=3)+
  facet_wrap(~zone)


## ----chunk-5, fig.width=10, fig.height=10,warning=FALSE-----------------------
TDA_obj <- mapperCore(TDA_obj,
                       nBins = 15,
                       overlap = 0.3,
                       clustMeth = "kmeans")

# number of clusters (nodes)
dim(getDfMapper(TDA_obj))

# content of two overlapping clusters
getDfMapper(TDA_obj)["node_102_cl_1", 1]
getDfMapper(TDA_obj)["node_117_cl_1", 1]


## ----chunk-6, fig.width=10, fig.height=10,warning=FALSE-----------------------
# Jaccard Matrix
TDA_obj <- jaccardMatrix(TDA_obj)
head(round(getJacc(TDA_obj)[1:5,1:5],3))
round(getJacc(TDA_obj)["node_102_cl_1","node_117_cl_1"],3)

## ----chunk-7, fig.width=10, fig.height=10,warning=FALSE-----------------------
TDA_obj <- tdaDfEnrichment(TDA_obj,
                           cbind(getScaledData(TDA_obj),
                                 getOutcome(TDA_obj)))
head(getNodeDataMat(TDA_obj)[1:5, tail(names(getNodeDataMat(TDA_obj)), 5)])

## ----chunk-8, fig.width=10, fig.height=10,warning=FALSE, eval=TRUE, fig.cap = "Power-law degree distribution. The correlation between P(k) (y-axis) and k (x-axis) is represented in linear scale (on the left) and in log-log scale (on the right). The regression line (orange line) is also provided."----
# Anchoring (supervised)
entropy <-  checkNetEntropy(getNodeDataMat(TDA_obj)[, "zone"])
entropy

# Scale free network (unsupervised)
netModel <- checkScaleFreeModel(TDA_obj, showPlot = TRUE)
netModel

## ----fig.width=10, fig.height=10, warning=FALSE, eval=FALSE-------------------
#  write.table(x = round(getJacc(TDA_obj),3),
#              file = "./jaccard.matrix.txt",
#              sep = "\t",
#              quote = FALSE,
#              na = "",
#              col.names = NA)
#  
#  write.table(x = getNodeDataMat(TDA_obj),
#              file = "./nodeEnrichment.txt",
#              sep = "\t",
#              quote = FALSE,
#              col.names = NA)
#  
#  

## ----session_info-------------------------------------------------------------
sessionInfo()