## ----style, echo = FALSE, results = 'asis'------------------------------------ BiocStyle::markdown() ## ----css, echo=FALSE, results='asis'------------------------------------------ cat(" ") ## ----js, echo=FALSE, results='asis'------------------------------------------- cat(" ") ## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( comment = "" ) ## ----env, message = FALSE, warning = FALSE, echo = TRUE----------------------- library(goSorensen) ## ----------------------------------------------------------------------------- data("allOncoGeneLists") ## ----------------------------------------------------------------------------- # name and length of the gene lists: sapply(allOncoGeneLists, length) ## ----warning=FALSE, message=FALSE, eval=FALSE--------------------------------- # # Previously load the genomic annotation package for the studied specie: # library(org.Hs.eg.db) # humanEntrezIDs <- keys(org.Hs.eg.db, keytype = "ENTREZID") # # # Computing the irrelevance-threshold matrix of dissimilarities # dissMatrx_BP4 <- sorenThreshold(allOncoGeneLists, # geneUniverse = humanEntrezIDs, # orgPackg = "org.Hs.eg.db", # onto = "BP", # GOLevel = 4, # trace = FALSE) # dissMatrx_BP4 ## ----echo=FALSE--------------------------------------------------------------- options(digits = 4) data("dissMatrx_BP4") data("cont_all_BP4") dissMatrx_BP4 ## ----warning=FALSE, message=FALSE, comment=NA, eval=FALSE--------------------- # # Previously compute the enrichment contingency tables: # cont_all_BP4 <- buildEnrichTable(allOncoGeneLists, # geneUniverse = humanEntrezIDs, # orgPackg = "org.Hs.eg.db", # onto = "BP", # GOLevel = 4) ## ----------------------------------------------------------------------------- # Computing the irrelevance-threshold matrix of dissimilarities from the # enrichment contingency table "cont_all_BP4": dissMatrx_BP4 <- sorenThreshold(cont_all_BP4, trace = FALSE) dissMatrx_BP4 ## ----eval=FALSE--------------------------------------------------------------- # boot_dissMatrx_BP4 <- sorenThreshold(allOncoGeneLists, # geneUniverse = humanEntrezIDs, # orgPackg = "org.Hs.eg.db", # onto = "BP", # GOLevel = 4, # boot = TRUE, # use bootstrap distribution # trace = FALSE) # boot_dissMatrx_BP4 ## ----echo=FALSE--------------------------------------------------------------- sorenThreshold(cont_all_BP4, boot = TRUE, trace = FALSE) ## ----------------------------------------------------------------------------- boot_dissMatrx_BP4 <- sorenThreshold(cont_all_BP4, boot = TRUE, trace = FALSE) boot_dissMatrx_BP4 ## ----warning=FALSE, message=FALSE, comment=NA, eval=FALSE--------------------- # # For example, for GO levels 3 to 10 and for the ontologies BP, CC and MF: # allDissMatrx <- allSorenThreshold(allOncoGeneLists, # geneUniverse = humanEntrezIDs, # orgPackg = "org.Hs.eg.db", # ontos = c("BP", "CC", "MF"), # GOLevels = 3:10, # trace = FALSE) ## ----warning=FALSE, message=FALSE, comment=NA, eval=FALSE--------------------- # # Previously compute the enrichment contingency tables: # allContTabs <- allBuildEnrichTable(allOncoGeneLists, # geneUniverse = humanEntrezIDs, # orgPackg = "org.Hs.eg.db", # ontos = c("BP", "CC", "MF"), # GOLevels = 3:10) # # # Computing the irrelevance-threshold matrix of dissimilarities from the # # enrichment contingency tables "allContTabs": # allDissMatrx <- allSorenThreshold(allContTabs, # trace = FALSE) ## ----warning=FALSE, message=FALSE--------------------------------------------- clust.threshold <- hclustThreshold(dissMatrx_BP4) plot(clust.threshold) ## ----warning=FALSE, message=FALSE--------------------------------------------- # multidimensional scaling analysis: mds <- as.data.frame(cmdscale(dissMatrx_BP4, k = 2)) ## ----warning=FALSE, message=FALSE, fig.align='left', fig.height=3, fig.width=5---- library(ggplot2) library(ggrepel) graph <- ggplot() + geom_point(aes(mds[,1], mds[,2]), color = "blue", size = 3) + geom_text_repel(aes(mds[,1], mds[,2], label = attr(dissMatrx_BP4, "Labels")), color = "black", size = 3) + xlab("Dim 1") + ylab("Dim 2") + theme_light() graph ## ----------------------------------------------------------------------------- # Split the axis 20% to the left, 60% to the middle and 20% to the right: prop <- c(0.2, 0.6, 0.2) # Sort according dimension 1: sorted <- mds[order(mds[, 1]), ] # Determine the range for dimension 1. range <- sorted[, 1][c(1, nrow(mds))] # Find the cut-points to split the axis: cutpoints <- (cumsum(prop)[1:2] * diff(range)) + range[1] # Identify lists to the left: lleft <- rownames(sorted[sorted[, 1] < cutpoints[1], ]) lleft # Identify lists to the right lright <- rownames(sorted[sorted[, 1] > cutpoints[2], ]) lright ## ----warning=FALSE, message=FALSE, fig.align='left', fig.height=3, fig.width=5---- graph + geom_vline(xintercept = cutpoints, color = "red", linetype = "dashed", linewidth = 0.75) ## ----echo=FALSE--------------------------------------------------------------- options(max.print = 16) ## ----------------------------------------------------------------------------- # enrichment contingency tables: contTablesBP4 <- attr(dissMatrx_BP4, "all2x2Tables") # matrix of GO term enrichment for the lists located at the extreme left tableleft <- attr(contTablesBP4, "enriched")[, lleft, drop = FALSE] tableleft ## ----echo=FALSE--------------------------------------------------------------- options(max.print = 50) ## ----------------------------------------------------------------------------- # matrix of GO term enrichment for the lists located at the extreme right tableright <- attr(contTablesBP4, "enriched")[, lright, drop = FALSE] tableright ## ----warning=FALSE, message=FALSE--------------------------------------------- # function to compute mean and standard deviation: mean_sd <- function(x){ c("mean" = mean(x), "sd" = ifelse(length(x) == 1, 0, sd(x))) } # mean and sd of the lists to the extreme left: lmnsd <- apply(tableleft, 1, mean_sd) # mean and sd of the lists to the extreme right: rmnsd <- apply(tableright, 1, mean_sd) ## ----warning=FALSE, message=FALSE--------------------------------------------- nl <- ncol(tableleft) nr <- ncol(tableright) t_stat <- abs(lmnsd[1, ] - rmnsd[1, ]) / sqrt((((lmnsd[2, ] / nl) + (rmnsd[2, ] / nr))) + 0.00000001) ## ----warning=FALSE, message=FALSE--------------------------------------------- result <- t_stat[t_stat == max(t_stat)] result ## ----warning=FALSE, message=FALSE--------------------------------------------- library(GO.db) library(DT) # Previous function to get the description of the identified GO terms: get_go_description <- function(go_id) { go_term <- Term(GOTERM[[go_id]]) return(go_term) } # GO terms description: datatable(data.frame(GO_term = names(result), Description = sapply(names(result), get_go_description, USE.NAMES = TRUE)), filter = "top", rownames = FALSE)