## ----imports, echo=FALSE,eval=TRUE, message=FALSE, warning=FALSE-----------
library(Onassis)
library(DT)
library(gplots)
library(org.Hs.eg.db)
library(kableExtra)

## ----echo=TRUE, eval=FALSE-------------------------------------------------
#  source("https://bioconductor.org/biocLite.R")
#  biocLite('org.Hs.eg.db')
#  biocLite("GenomicRanges")
#  install.packages('data.table')
#  install.packages('DT')
#  install.packages('gplots')

## ----connectTodb, echo=TRUE,eval=FALSE-------------------------------------
#  ## Running this function might take long time if the database has to be downloaded.
#  geo_con <- connectToGEODB(download=TRUE)
#  
#  #Showing the experiment types available in GEO
#  experiments <- experiment_types(geo_con)
#  
#  #Showing the organism types available in GEO
#  species <- organism_types(geo_con)
#  
#  #Retrieving the metadata associated to experiment type "Methylation profiling by high througput sequencing"
#  meth_metadata <- getGEOMetadata(geo_con, experiment_type='Methylation profiling by high throughput sequencing', organism = 'Homo sapiens')
#  
#  #Retrieving Human gene expression metadata, knowing the GEO platform identifier, e.g. the Affymetrix Human Genome U133 Plus 2.0 Array
#  expression <- getGEOMetadata(geo_con, experiment_type='Expression profiling by array', gpl='GPL570')

## ----experimentTypesshow, echo=FALSE, eval=TRUE----------------------------
experiments <- readRDS(system.file('extdata', 'vignette_data', 'experiment_types.rds', package='Onassis'))

knitr::kable(as.data.frame(experiments[1:10]), col.names = c('Experiment')) %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "300px", height = "200px")

## ----speciesShow, echo=FALSE,eval=TRUE-------------------------------------
species <- readRDS(system.file('extdata', 'vignette_data', 'organisms.rds', package='Onassis'))
knitr::kable(as.data.frame(species[1:10]), col.names=c('Species')) %>%
              kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "300px", height = "200px")

## ----loadgeoMetadata, echo=TRUE, eval=TRUE---------------------------------
meth_metadata <- readRDS(system.file('extdata', 'vignette_data', 'GEOmethylation.rds', package='Onassis'))

## ----printmeta, echo=FALSE,eval=TRUE---------------------------------------

methylation_tmp <- meth_metadata
methylation_tmp$experiment_summary <- sapply(methylation_tmp$experiment_summary, function(x) substr(x, 1, 50))
knitr::kable(methylation_tmp[1:10,], 
              caption = 'Methylation profiling by high througput sequencing metadata from GEOmetadb.') %>%
              kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "300px")


## ----connectSRA, echo=TRUE,eval=FALSE--------------------------------------
#  # Connection to the SRAmetadb and potential download of the sqlite file
#  sqliteFileName <- './data/SRAdb.sqlite'
#  sra_con <- dbConnect(SQLite(), sqliteFileName)
#  
#  # Query for the ChIP-Seq experiments contained in GEO for human samples
#  library_strategy <- 'ChIP-Seq' #ChIP-Seq data
#  library_source='GENOMIC'
#  taxon_id=9606 #Human samples
#  center_name='GEO' #Data from GEO
#  
#  # Query to the sample table
#  samples_query <- paste0("select sample_accession, description, sample_attribute, sample_url_link from sample where taxon_id='", taxon_id, "' and sample_accession IS NOT NULL", " and center_name='", center_name, "'",  )
#  
#  samples_df <- dbGetQuery(sra_con, samples_query)
#  samples <- unique(as.character(as.vector(samples_df[, 1])))
#  
#  # Query to the experiment table
#  experiment_query <- paste0("select experiment_accession, center_name, title, sample_accession, sample_name, experiment_alias, library_strategy, library_layout, experiment_url_link, experiment_attribute from experiment where library_strategy='",
#                             library_strategy, "'" , " and library_source ='", library_source,
#                             "' " )
#  
#  experiment_df <- dbGetQuery(sra_con, experiment_query)
#  
#  #Merging the columns from the sample and the experiment table
#  experiment_df <- merge(experiment_df, samples_df, by = "sample_accession")
#  
#  # Replacing the field separators with white spaces
#  experiment_df$experiment_attribute <- sapply(experiment_df$experiment_attribute,
#                                               function(value) {
#                                                 gsub("||", "  ", value)
#                                               })
#  experiment_df$sample_attribute <- sapply(experiment_df$sample_attribute,
#                                           function(value) {
#                                             gsub("||", "  ", value)
#                                           })
#  # Replacing the '_' character with white spaces
#  experiment_df$sample_name <- sapply(experiment_df$sample_name,
#                                      function(value) {
#                                        gsub("_", " ", value)
#                                      })
#  experiment_df$experiment_alias <- sapply(experiment_df$experiment_alias,
#                                           function(value) {
#                                             gsub("_", " ", value)
#                                           })
#  sra_chip_seq <- experiment_df

## ----readCHIP, echo=TRUE, eval=TRUE----------------------------------------
sra_chip_seq <- readRDS(system.file('extdata', 'vignette_data', 'GEO_human_chip.rds',  package='Onassis'))

## ----printchromatinIP, echo=FALSE,eval=TRUE--------------------------------

knitr::kable(head(sra_chip_seq, 10), rownames=FALSE, 
  caption = 'ChIP-Seq metadata obtained from SRAdb') %>%
              kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "300px")


## ----createSampleAndTargetDict, echo=TRUE,eval=TRUE, message=FALSE---------
# If a Conceptmapper dictionary is already available the dictType CMDICT can be specified and the corresponding file loaded
sample_dict <- CMdictionary(inputFileOrDb=system.file('extdata', 'cmDict-sample.cs.xml', package = 'Onassis'), dictType = 'CMDICT')

#Creation of a dictionary from the file sample.cs.obo available in OnassisJavaLibs
obo <- system.file('extdata', 'sample.cs.obo', package='OnassisJavaLibs')

sample_dict <- CMdictionary(inputFileOrDb=obo, outputDir=getwd(), synonymType='ALL')

# Creation of a dictionary for human genes/proteins
require(org.Hs.eg.db)
targets <- CMdictionary(dictType='TARGET', inputFileOrDb = 'org.Hs.eg.db')

## ----settingOptions, echo=TRUE,eval=TRUE-----------------------------------

#Creating a CMoptions object and showing hte default parameters 
opts <- CMoptions()  
show(opts)

## ----listCombinations, echo=TRUE, eval=TRUE--------------------------------
combinations <- listCMOptions()

## ----setsynonymtype, echo=TRUE, eval=TRUE----------------------------------
myopts <- CMoptions(SynonymType = 'EXACT_ONLY')
myopts

## ----changeparameter, echo=TRUE, eval=TRUE---------------------------------
#Changing the SearchStrategy parameter
SearchStrategy(myopts) <- 'SKIP_ANY_MATCH_ALLOW_OVERLAP'
myopts

## ----EntityFinder, echo=TRUE, eval=TRUE, results='hide', message=FALSE, warning=FALSE----
chipseq_dict_annot <- EntityFinder(sra_chip_seq[1:20,c('sample_accession', 'title', 'experiment_attribute', 'sample_attribute', 'description')], dictionary=sample_dict, options=myopts)


## ----showchipresults, echo=FALSE, eval=TRUE, message=FALSE-----------------

#methylation_brenda_annot <- readRDS(system.file('extdata', 'vignette_data', 'methylation_brenda_annot.rds', package='Onassis'))
#UPDATE con ChIP-seq
knitr::kable(head(chipseq_dict_annot, 20), rownames=FALSE, 
  caption = 'Annotations of the methylation profiling by high througput sequencing metadata obtained from GEO with BRENDA ontology concepts') %>% kable_styling() %>%
  scroll_box(width = "80%", height = "400px")

## ----annotateGenes, echo=TRUE, eval=TRUE, results='hide', message=FALSE, warning=FALSE----

#Finding the TARGET entities
target_entities <- EntityFinder(input=sra_chip_seq[1:20,c('sample_accession', 'title', 'experiment_attribute', 'sample_attribute', 'description')], options = myopts, dictionary=targets) 

## ----printKable, echo=FALSE, eval=TRUE-------------------------------------
knitr::kable(target_entities, 
  caption = 'Annotations of ChIP-seq test metadata obtained from SRAdb and stored into files with the TARGETs (genes and histone variants)') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----similarity, echo=TRUE, eval=TRUE, message=FALSE-----------------------
#Instantiating the Similarity
similarities <- listSimilarities()

## ----computing measures, echo=TRUE, eval=TRUE, message=FALSE---------------

found_terms <- unique(chipseq_dict_annot$term_url)
n <- length(found_terms)

ontologyfile <- obo
pairwise_results <- data.frame(term1 = character(0), term2= character(0), value = double(0L))
for(i in 1:(n-1)){
  term1 <- as.character(found_terms[i])
  j = i + 1 
  for(k in j:n){
    term2 <- as.character(found_terms[k])
    two_term_similarity <- Similarity(ontologyfile,  term1, term2 )
    new_row <- cbind(term1, term2, two_term_similarity)
    pairwise_results <- rbind(pairwise_results, new_row )
  }
}
pairwise_results <- unique(pairwise_results)
pairwise_results <- merge(pairwise_results, chipseq_dict_annot[, c('term_url', 'term_name')], by.x='term2', by.y='term_url', all.x=TRUE)
colnames(pairwise_results)[length(colnames(pairwise_results))] <- 'term2_name'
pairwise_results <- merge(pairwise_results, chipseq_dict_annot[, c('term_url', 'term_name')], by.x='term1', by.y='term_url', all.x=TRUE)
colnames(pairwise_results)[length(colnames(pairwise_results))] <- 'term1_name'
pairwise_results <- unique(pairwise_results)

## ----showSim, echo=FALSE, eval=TRUE----------------------------------------
knitr::kable(pairwise_results,  
  caption = 'Pairwise similarities of cell line terms annotating the ChIP-seq metadata') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----groupwise_measures, echo=TRUE, eval=TRUE, message=FALSE---------------

Similarity(obo, found_terms[1:2], found_terms[3])


## ----samples_similarity, echo=TRUE, eval=TRUE, message=FALSE---------------

annotated_samples <- as.character(as.vector(unique(chipseq_dict_annot$sample_id)))
n <- length(annotated_samples)


samples_results <- data.frame(sample1 = character(0), sample2= character(0), value = double(0L))
samples_results <- matrix(0, nrow=n, ncol=n)
rownames(samples_results) <- colnames(samples_results) <- annotated_samples
for(i in 1:(n-1)){
  sample1 <- as.character(annotated_samples[i])
  j = i + 1 
  for(k in j:n){
    sample2 <- as.character(annotated_samples[k])
    two_samples_similarity <- Similarity(ontologyfile, sample1, sample2, chipseq_dict_annot)
    samples_results[i, k] <- samples_results[k, i] <- two_samples_similarity
  }
}
diag(samples_results) <- 1
heatmap.2(samples_results, density.info = "none", trace="none", main='Semantic similarity of annotated samples', margins=c(5,5))

## ----onassis_class_usage, echo=TRUE, eval=TRUE, results='hide', message=FALSE, warning=FALSE----
onassis_annotations <- annotate(sra_chip_seq, 'OBO',obo )

## ----show_onassis_annotations, echo=TRUE, eval=TRUE------------------------
onassis_entities <- entities(onassis_annotations) 

## ----showing_entities, echo=FALSE, eval=TRUE-------------------------------
knitr::kable(
onassis_entities[sample(nrow(onassis_entities), 10),], 
  caption = 'Entities in Onassis object') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----term_filtering, echo=TRUE, eval=TRUE----------------------------------
filtered_onassis <- filterconcepts(onassis_annotations, c('cell'))

## ----showing_filt_entities, echo=FALSE, eval=TRUE--------------------------
knitr::kable(entities(filtered_onassis), 
  caption = 'Entities in filtered Onassis object') %>% kable_styling(bootstrap_options = c("striped"), position="center") %>%
  scroll_box(width = "80%", height = "400px")

## ----similarity_of_samples, echo=TRUE, eval=TRUE---------------------------

filtered_onassis <- sim(filtered_onassis)


## ----collapsing_similarities, echo=TRUE, eval=TRUE, message=FALSE, results='hide', fig.width=6, fig.height=6----
collapsed_onassis <- Onassis::collapse(filtered_onassis, 0.8)
head(entities(collapsed_onassis))

heatmap.2(simil(collapsed_onassis), margins=c(15,15), cexRow = 1, cexCol = 1)

## ----sessionInfo(), echo=FALSE, eval=TRUE----------------------------------
sessionInfo()