## ----<knitr, echo=FALSE, message=FALSE, results="hide", class.output="scroll-300"----
library("knitr")
opts_chunk$set(
  tidy = TRUE,
  cache = FALSE,
  message = FALSE,
  fig.align="center",
  dpi=200,
  fig.height=8,
  results='hold')

options(bitmapType = "cairo")


## ----loadLibaries2, echo=TRUE, message=FALSE, results="hide", warning=FALSE, class.output="scroll-300"----
library(readr)
library(GRaNIE)

## ----importData, echo=TRUE, include=TRUE, class.output="scroll-300"-----------

# We load the example data directly from the web:
file_peaks = "https://www.embl.de/download/zaugg/GRaNIE/countsATAC.filtered.tsv.gz"
file_RNA   = "https://www.embl.de/download/zaugg/GRaNIE/countsRNA.filtered.tsv.gz"
file_sampleMetadata = "https://www.embl.de/download/zaugg/GRaNIE/sampleMetadata.tsv.gz"

countsRNA.df      = read_tsv(file_RNA, col_types = cols())
countsPeaks.df    = read_tsv(file_peaks, col_types = cols()) 
sampleMetadata.df = read_tsv(file_sampleMetadata, col_types = cols())


# Let's check how the data looks like
countsRNA.df
countsPeaks.df
sampleMetadata.df

# Save the name of the respective ID columns
idColumn_peaks = "peakID"
idColumn_RNA = "ENSEMBL"


## ----initializeObject, echo=TRUE, include=TRUE, class.output="scroll-300"-----
genomeAssembly = "hg38" #Either hg19, hg38 or mm10. Both enhancers and RNA data must have the same genome assembly

# Optional and arbitrary list with information and metadata that is stored within the GRaNIE object
objectMetadata.l = list(name                = paste0("Macrophages_infected_primed"),
                        file_peaks          = file_peaks,
                        file_rna            = file_RNA,
                        file_sampleMetadata = file_sampleMetadata,
                        genomeAssembly      = genomeAssembly)

dir_output = "."

GRN = initializeGRN(objectMetadata = objectMetadata.l,
                    outputFolder = dir_output,
                    genomeAssembly = genomeAssembly)

GRN

## ----addData, echo=TRUE, include=TRUE, eval = FALSE, class.output="scroll-300"----
#  GRN = addData(GRN,
#                counts_peaks = countsPeaks.df, normalization_peaks = "DESeq2_sizeFactors", idColumn_peaks = idColumn_peaks,
#                counts_rna = countsRNA.df, normalization_rna = "limma_quantile", idColumn_RNA = idColumn_RNA,
#                sampleMetadata = sampleMetadata.df,
#                forceRerun = TRUE)
#  
#  GRN

## ----loadObject, echo=FALSE, results = FALSE, eval = TRUE---------------------
GRN = loadExampleObject()

## ----runPCA, echo=TRUE, include=TRUE, eval = FALSE, class.output="scroll-300", collapse=FALSE, results="hold"----
#  GRN = plotPCA_all(GRN, data = c("rna"), topn = 500, type = "normalized", plotAsPDF = FALSE, pages = c(2,3,14), forceRerun = TRUE)

## ----addTFBS, echo=TRUE, include=TRUE, eval = FALSE, class.output="scroll-300", results='hold'----
#  
#  folder_TFBS_6TFs = "https://www.embl.de/download/zaugg/GRaNIE/TFBS_selected.zip"
#  # Download the zip of all TFBS files. Takes too long here, not executed therefore
#  
#  download.file(folder_TFBS_6TFs, file.path("TFBS_selected.zip"), quiet = FALSE)
#  
#  unzip(file.path("TFBS_selected.zip"), overwrite = TRUE)
#  
#  motifFolder = tools::file_path_as_absolute("TFBS_selected")
#  
#  GRN = addTFBS(GRN, motifFolder = motifFolder, TFs = "all", filesTFBSPattern = "_TFBS", fileEnding = ".bed.gz", forceRerun = TRUE)
#  
#  GRN = overlapPeaksAndTFBS(GRN, nCores = 1, forceRerun = TRUE)

## ----filterData, echo=TRUE, include=TRUE, eval = TRUE, class.output="scroll-300", results='hold'----
# Chromosomes to keep for peaks. This should be a vector of chromosome names
chrToKeep_peaks = c(paste0("chr", 1:22), "chrX")
GRN = filterData(GRN, minNormalizedMean_peaks = 5, minNormalizedMeanRNA = 1, chrToKeep_peaks = chrToKeep_peaks, maxSize_peaks = 10000, forceRerun = TRUE)

## ----addTFPeakConnections, echo=TRUE, include=TRUE, eval = TRUE, class.output="scroll-300", results='hold'----
GRN = addConnections_TF_peak(GRN, plotDiagnosticPlots = FALSE,
                                     connectionTypes = c("expression"),
                                     corMethod = "pearson", forceRerun = TRUE)

## ----echo=TRUE, include=TRUE, eval = TRUE, class.output="scroll-300", results='hold', fig.cap="<i>TF-enhancer diagnostic plots connection overview.A</i>", results='hold', fig.height = 10----
GRN = plotDiagnosticPlots_TFPeaks(GRN, dataType = c("real"), plotAsPDF = FALSE, pages = c(1))

## ----echo=TRUE, include=TRUE, eval = TRUE, class.output="scroll-300", results='hold', fig.cap="<i>TF-enhancer diagnostic plots for EGR1.0.A (real)</i>", results='hold', fig.height = 10----
GRN = plotDiagnosticPlots_TFPeaks(GRN, dataType = c("real"), plotAsPDF = FALSE, pages = c(42))

## ----echo=TRUE, include=TRUE, eval = TRUE, class.output="scroll-300", results='hold', fig.cap="<i>TF-enhancer diagnostic plots for EGR1.0.A (background)</i>", results='hold', fig.height = 10----
GRN = plotDiagnosticPlots_TFPeaks(GRN, dataType = c("background"), plotAsPDF = FALSE, pages = c(42))

## ----runARClassification, echo=TRUE, include=TRUE, eval = FALSE, class.output="scroll-300"----
#  GRN = AR_classification_wrapper(GRN, significanceThreshold_Wilcoxon = 0.05,
#                                  outputFolder = "plots",
#                                  plot_minNoTFBS_heatmap = 100, plotDiagnosticPlots = TRUE,
#                                  forceRerun = TRUE)

## ----saveObject2, echo=TRUE, include=TRUE, eval=FALSE, class.output="scroll-300"----
#  GRN_file_outputRDS = paste0(dir_output, "/GRN.rds")
#  saveRDS(GRN, GRN_file_outputRDS)

## ----addPeakGeneConnections, echo=TRUE, include=TRUE, eval = TRUE, class.output="scroll-300"----
GRN = addConnections_peak_gene(GRN,
                               corMethod = "pearson", promoterRange = 10000, TADs = NULL,
                               nCores = 1, plotDiagnosticPlots = FALSE, plotGeneTypes = list(c("all")), forceRerun = TRUE)

## ----echo=TRUE, fig.cap="<i>Enhancer-gene diagnostic plots</i>", fig.height = 7, class.output="scroll-300"----
GRN = plotDiagnosticPlots_peakGene(GRN, gene.types = list(c("protein_coding", "lincRNA")), plotAsPDF = FALSE, pages = 1)

## ----combineAndFilter, echo=TRUE, include=TRUE, eval = TRUE, class.output="scroll-300"----
GRN = filterGRNAndConnectGenes(GRN, TF_peak.fdr.threshold = 0.2,
                               peak_gene.fdr.threshold = 0.2, peak_gene.fdr.method = "BH", 
                               gene.types = c("protein_coding", "lincRNA"),
                               allowMissingTFs = FALSE, allowMissingGenes = FALSE,
                               forceRerun = TRUE
                               )

## ----include=TRUE, eval = TRUE, class.output="scroll-300"---------------------
GRN = add_TF_gene_correlation(GRN, corMethod = "pearson", nCores = 1, forceRerun = TRUE)

## ----saveObject, echo=TRUE, include=TRUE , eval=FALSE, class.output="scroll-300"----
#  GRN = deleteIntermediateData(GRN)
#  saveRDS(GRN, GRN_file_outputRDS)

## ----include=TRUE, eval = TRUE, class.output="scroll-300"---------------------
GRN_connections.all = getGRNConnections(GRN, type = "all.filtered", include_TF_gene_correlations = TRUE, include_geneMetadata = TRUE)

GRN_connections.all

## ----connectionSummary, echo=TRUE, include=TRUE, eval = TRUE, out.width="50%", class.output="scroll-300"----
GRN = generateStatsSummary(GRN,
                           TF_peak.fdr = c(0.05, 0.1, 0.2), 
                           TF_peak.connectionTypes = "all",
                           peak_gene.fdr = c(0.1, 0.2),
                           peak_gene.r_range = c(0,1),
                           allowMissingGenes = c(FALSE, TRUE),
                           allowMissingTFs = c(FALSE),
                           gene.types = c("protein_coding", "lincRNA"),
                           forceRerun = TRUE)


GRN = plot_stats_connectionSummary(GRN, type = "heatmap", plotAsPDF = FALSE, pages = 3)
GRN = plot_stats_connectionSummary(GRN, type = "boxplot", plotAsPDF = FALSE, pages = 1)

## ----buildGraph, echo=TRUE, include=TRUE, eval = TRUE, class.output="scroll-300"----

GRN = build_eGRN_graph(GRN, forceRerun = TRUE) 


## ----visualizeGRN, echo=TRUE, include=TRUE, eval = TRUE, fig.cap="<i>eGRN example visualization</i>", fig.height = 6, class.output="scroll-300"----
GRN = visualizeGRN(GRN, plotAsPDF = FALSE, maxEdgesToPlot = 1000)


## ----filterAndVisualizeGRN, echo=TRUE, include=TRUE, eval = TRUE, fig.cap="<i>eGRN example visualization with extra filtering</i>", fig.height = 6, class.output="scroll-300"----
GRN = filterConnectionsForPlotting(GRN, plotAll = FALSE, TF.ID == "E2F7.0.B" | stringr::str_starts(TF.ID, "ETV"))
GRN = visualizeGRN(GRN, plotAsPDF = FALSE)


## ----allNetworkAnalyses, echo=TRUE, include=TRUE, eval = FALSE, class.output="scroll-300"----
#  GRN = performAllNetworkAnalyses(GRN, ontology = c("GO_BP"), outputFolder = ".", forceRerun = TRUE)

## ----plotGraphStats, echo=FALSE, fig.cap="<i>General network statistics for the filtered network</i>", fig.width = 12, class.output="scroll-300"----

GRN = plotGeneralGraphStats(GRN, plotAsPDF = FALSE, pages = c(1,6))


## ----generalEnrichment, echo=FALSE, eval = FALSE, class.output="scroll-300"----
#  
#  GRN = calculateGeneralEnrichment(GRN, ontology = "GO_BP")
#  

## ----plotGeneralEnrichment, echo=TRUE, fig.cap="<i>General network enrichment for the filtered network</i>", fig.height=5, class.output="scroll-300"----

GRN = plotGeneralEnrichment(GRN, plotAsPDF = FALSE, pages = 1)


## ----communityEnrichment, echo=FALSE, class.output="scroll-300"---------------

GRN = calculateCommunitiesStats(GRN)
GRN = calculateCommunitiesEnrichment(GRN, ontology = "GO_BP")


## ----plotCommunityStats, echo=FALSE, fig.height=9, fig.width = 10, fig.cap="<i>General statistics for the communities from the filtered network</i>", class.output="scroll-300"----

GRN = plotCommunitiesStats(GRN, plotAsPDF = FALSE, pages = c(1,3))


## ----plotCommunityEnrichment, echo=FALSE, fig.height=7, fig.width = 9, fig.cap="<i>Community enrichment for 3 different communities</i>", class.output="scroll-300"----

GRN = plotCommunitiesEnrichment(GRN, plotAsPDF = FALSE, pages = c(1,2,3))


## ----plotCommunityEnrichment2, echo=TRUE, fig.height=7, fig.width = 10,fig.cap="<i>Summary of the community enrichment</i>", class.output="scroll-300"----
GRN = plotCommunitiesEnrichment(GRN, plotAsPDF = FALSE, pages = c(5))



## ----TFEnrichmentCal, echo=FALSE, eval = FALSE, class.output="scroll-300"-----
#  GRN = calculateTFEnrichment(GRN, ontology = "GO_BP")

## ----TFEnrichment, echo=TRUE, fig.cap="<i>Enrichment summary for EGR1.0.A</i>", fig.height=7, fig.width = 12, class.output="scroll-300"----
GRN = plotTFEnrichment(GRN, plotAsPDF = FALSE, n = 3, pages = c(1))

## ----TFEnrichment2, echo=TRUE, fig.cap="<i>Enrichment summary for selected TFs and the whole eGRN network</i>", fig.height=7, fig.width = 15, class.output="scroll-300"----
GRN = plotTFEnrichment(GRN, plotAsPDF = FALSE, n = 3, pages = c(5))

## ----saveObject3, echo=TRUE, include=TRUE, eval=FALSE, class.output="scroll-300"----
#  GRN = deleteIntermediateData(GRN)
#  saveRDS(GRN, GRN_file_outputRDS)

## ----class.output="scroll-300"------------------------------------------------
 sessionInfo()