## ----runConclus_first, eval=FALSE---------------------------------------------
#  library(conclus)
#  
#  outputDirectory <- tempdir()
#  experimentName <- "Test"
#  species <- "mouse"
#  
#  countmatrixPath <- system.file("extdata/countMatrix.tsv", package="conclus")
#  countMatrix <- loadDataOrMatrix(file=countmatrixPath, type="countMatrix",
#                                  ignoreCellNumber=TRUE)
#  
#  coldataPath <- system.file("extdata/colData.tsv", package="conclus")
#  columnsMetaData <- loadDataOrMatrix(file=coldataPath, type="coldata",
#                          columnID="cell_ID")
#  
#  sceObjectCONCLUS <- runCONCLUS(outputDirectory, experimentName, countMatrix,
#                              species, columnsMetaData=columnsMetaData,
#                              perplexities=c(2,3), tSNENb=1,
#                              PCs=c(4,5,6,7,8,9,10), epsilon=c(380, 390, 400),
#                              minPoints=c(2,3), clusterNumber=4)
#  

## ----format_data--------------------------------------------------------------

library(conclus)

outputDirectory <- tempdir()
dir.create(outputDirectory, showWarnings=FALSE)
species <- "mouse"

countMatrixPath <- file.path(outputDirectory, "countmatrix.txt")
metaDataPath <- file.path(outputDirectory, "metaData.txt")
matrixURL <- paste0("https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSM492",
"3493&format=file&file=GSM4923493%5FE11%5F5%5Frawcounts%5Fmatrix%2Etsv%2Egz")
metaDataURL <- paste0("https://www.ncbi.nlm.nih.gov/geo/download/?acc=",
"GSM4923493&format=file&file=GSM4923493%5FMetadata%5FE11%5F5%2Etxt%2Egz")

result <- retrieveFromGEO(matrixURL, countMatrixPath, species,
metaDataPath=metaDataPath, colMetaDataURL=metaDataURL)
countMatrix <- result[[1]]
columnsMetaData <- result[[2]]
## Correct the columns names to fit conclus input requirement
## The columns should be: cellName, state, and cellBarcode
columnsMetaData <- columnsMetaData[,c(1,3,2)]
colnames(columnsMetaData) <- c("cellName", "state", "cellBarcode")

## Removing embryonic hemoglobins with names starting with "Hba" or "Hbb"
idxHba <- grep("Hba", rownames(countMatrix))
idxHbb <- grep("Hbb", rownames(countMatrix))
countMatrix <- countMatrix[-c(idxHba, idxHbb),]

## Removing control human cells
idxHumanPos <- which(columnsMetaData$state == "Pos Ctrl")
idxHumanNeg <- which(columnsMetaData$state == "Neg Ctrl")
columnsMetaData <- columnsMetaData[-c(idxHumanPos, idxHumanNeg),]
countMatrix <- countMatrix[,-c(idxHumanPos, idxHumanNeg)]

## Removing genes not having an official symbol
idxENS <- grep("ENSMUSG", rownames(countMatrix))
countMatrix <- countMatrix[-idxENS,]

## ----local_data, eval=FALSE---------------------------------------------------
#  
#  ## countMatrixPath <- ""
#  ## metaDataPath <- ""
#  
#  countMatrix <- loadDataOrMatrix(countMatrixPath, type="countMatrix")
#  columnsMetaData <- loadDataOrMatrix(file=metaDataPath, type="coldata",
#                                      columnID="")
#  
#  ## Filtering steps to add here as performed above

## ----normalization_1----------------------------------------------------------

## Creation of the single-cell RNA-Seq object
scr <- singlecellRNAseq(experimentName = "E11_5",
        countMatrix     = countMatrix,
        species         = "mouse",
        outputDirectory = outputDirectory)

## Normalization of the count matrix
scr <- normaliseCountMatrix(scr, coldata=columnsMetaData)

## ----testClustering, results="hide", eval=FALSE-------------------------------
#  p <- testClustering(scr, writeOutput=TRUE, silent=TRUE)

## ----testClustering_result1, results="hide", fig.keep = 'none', eval=FALSE----
#  # saved as "outputDirectory/test_clustering/test_tSNE.pdf"
#  p[[1]]

## ----testClustering_result2, results="hide", fig.keep = 'none', eval=FALSE----
#  # saved as "outputDirectory/test_clustering/test_clustering.pdf"
#  p[[2]]

## ----normalization_2, eval=FALSE----------------------------------------------
#  scr <- normaliseCountMatrix(scr, coldata=columnsMetaData)

## ----normalization_results----------------------------------------------------
## Accessing slots
originalMat <- getCountMatrix(scr)
SCEobject <- getSceNorm(scr)
normMat <- SingleCellExperiment::logcounts(SCEobject)

# checking what changed after the normalisation
dim(originalMat)
dim(normMat)

# show first columns and rows of the count matrix
originalMat[1:5,1:5]

# show first columns and rows of the normalized count matrix
normMat[1:5,1:5]

# visualize first rows of metadata (coldata)
coldataSCE <- as.data.frame(SummarizedExperiment::colData(SCEobject))
head(coldataSCE)

# visualize beginning of the rowdata containing gene information
rowdataSCE <- as.data.frame(SummarizedExperiment:::rowData(SCEobject))
head(rowdataSCE)

## ----tsne_generation----------------------------------------------------------
scr <- generateTSNECoordinates(scr, cores=2)

## ----tsne_showResults---------------------------------------------------------
tsneList <- getTSNEList(scr)
head(getCoordinates(tsneList[[1]]))

## ----runDBSScan---------------------------------------------------------------
scr <- runDBSCAN(scr, cores=2)

## ----dbscan_showResults-------------------------------------------------------
dbscanList <- getDbscanList(scr)
clusteringList <- lapply(dbscanList, getClustering)
clusteringList[[1]][, 1:10]

## ----clusterCellsInternal-----------------------------------------------------
scr <- clusterCellsInternal(scr, clusterNumber=10)

## ----clusterCellsInternal_showResults-----------------------------------------
cci <- getCellsSimilarityMatrix(scr)
cci[1:10, 1:10]

## ----clustersSimilarityMatrix-------------------------------------------------
scr <- calculateClustersSimilarity(scr)
csm <- getClustersSimilarityMatrix(scr)
csm[1:10, 1:10]

## ----plotClustered, eval=FALSE------------------------------------------------
#  tSNEclusters <- plotClusteredTSNE(scr, columnName="clusters",
#                  returnPlot=TRUE, silentPlot=TRUE)
#  
#  tSNEnoColor <- plotClusteredTSNE(scr, columnName="noColor",
#                  returnPlot=TRUE, silentPlot=TRUE)
#  
#  tSNEstate <- plotClusteredTSNE(scr, columnName="state",
#                  returnPlot=TRUE, silentPlot=TRUE)

## ----plotClustered_visualization1, results="hide", fig.keep = 'none', eval=FALSE----
#  tSNEclusters[[5]]

## ----plotClustered_visualization2, results="hide", fig.keep = 'none', eval=FALSE----
#  tSNEnoColor[[5]]

## ----plotClustered_visualization3, results="hide", fig.keep = 'none', eval=FALSE----
#  tSNEstate[[5]]

## ----plotCellSimilarity, results="hide", fig.keep = 'none', eval=FALSE--------
#  plotCellSimilarity(scr)

## ----plotClustersSimilarity, results="hide", fig.keep = 'none', eval=FALSE----
#  plotClustersSimilarity(scr)

## ----rankGenes----------------------------------------------------------------
scr <- rankGenes(scr)

## ----rankGenes_result---------------------------------------------------------
markers <- getMarkerGenesList(scr, cluster=1)

## ----topMarkers_result--------------------------------------------------------
scr <- retrieveTopClustersMarkers(scr, removeDuplicates=TRUE)
topMarkers <- getTopMarkers(scr)
topMarkers

## ----plotCellHeatmap_marker1, results="hide", fig.keep = 'none', eval=FALSE----
#  plotCellHeatmap(scr, orderClusters=TRUE, orderGenes=TRUE)

## ----plotCellHeatmap_marker2, results="hide", fig.keep = 'none', eval=FALSE----
#  plotCellHeatmap(scr, orderClusters=TRUE, orderGenes=TRUE, meanCentered=FALSE)

## ----plotGeneExpression1, eval=FALSE------------------------------------------
#  plotGeneExpression(scr, "Maob", tSNEpicture=5)

## ----plotGeneExpression2, eval=FALSE------------------------------------------
#  plotGeneExpression(scr, "Pcbd1", tSNEpicture=5)

## ----plotGeneExpression3, eval=FALSE------------------------------------------
#  plotGeneExpression(scr, "Serpinf2", tSNEpicture=5)

## ----plotGeneExpression4, eval=FALSE------------------------------------------
#  plotGeneExpression(scr, "Cldn1", tSNEpicture=5)

## ----getGenesInfo-------------------------------------------------------------
scr <- retrieveGenesInfo(scr)
result <- getGenesInfos(scr)
head(result)

## ----merging_clusters, eval=FALSE---------------------------------------------
#  ## Retrieving the table indicating to which cluster each cell belongs
#  clustCellsDf <- retrieveTableClustersCells(scr)
#  
#  ## Replace "2/10" by "1/9" to merge 1/2 and 9/10
#  clustCellsDf$clusters[which(clustCellsDf$clusters == 2)] <- 1
#  clustCellsDf$clusters[which(clustCellsDf$clusters == 10)] <- 9
#  
#  ## Modifying the object to take into account the new classification
#  scrUpdated <- addClustering(scr, clusToAdd=clustCellsDf)

## ----addClusteringManually_visualization2, results="hide", fig.keep = 'none', eval=FALSE----
#  plotCellSimilarity(scrUpdated)

## ----addClusteringManually_visualization3, results="hide", fig.keep = 'none', eval=FALSE----
#  plotCellHeatmap(scrUpdated, orderClusters=TRUE, orderGenes=TRUE)

## ----addClusteringManually_visualization4, results="hide", fig.keep = 'none', eval=FALSE----
#  tSNEclusters <- plotClusteredTSNE(scrUpdated, columnName="clusters",
#                                      returnPlot=TRUE, silentPlot=TRUE)
#  tSNEclusters[[5]]

## ----replot1, eval=FALSE------------------------------------------------------
#  plotGeneExpression(scrUpdated, "Col1a1", tSNEpicture=5)

## ----replot2, eval=FALSE------------------------------------------------------
#  plotGeneExpression(scrUpdated, "Cdk8", tSNEpicture=5)

## ----sessionInfo--------------------------------------------------------------
sessionInfo()