%\VignetteIndexEntry{GeneNetworkBuilder Vignette}
%\VignetteDepends{GeneNetworkBuilder}
%\VignetteKeywords{regulation network}
%\VignettePackage{GeneNetworkBuilder}
\documentclass[12pt]{article}
\usepackage{hyperref}
\usepackage{url}
\usepackage{fullpage}
\usepackage[numbers]{natbib}
\usepackage{graphicx}
\usepackage{epstopdf}
\usepackage{tikz}
\bibliographystyle{plainnat}

\newcommand{\Rfunction}[1]{{\texttt{#1}}}
\newcommand{\Robject}[1]{{\texttt{#1}}}
\newcommand{\Rpackage}[1]{{\textit{#1}}}

\author{Jianhong Ou\footnote{jianhong.ou@umassmed.edu}, Lihua Julie Zhu\footnote{Julie.Zhu@umassmed.edu}}
\begin{document}
\title{GeneNetworkBuilder Guide}

\maketitle

\tableofcontents
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Introduction}
Transcription factors (TFs), chromatin modifications and microRNAs (miRNAs) are  
important in regulating gene expression\cite{Walhout2010}. 
Chromatin immunoprecipitation (ChIP) followed by
high-throughput sequencing (ChIP-seq)\cite{Peter2009} or genome tiling
array analysis (ChIP-chip)\cite{Tae2006} are widely used 
technologies for identifying genome-wide binding sites of TFs (TFBDs)\cite{Kerstin2010}. 
The role of the TF on genome-wide gene expression can be
determined by expression microarray or RNA-seq experiments\cite{Zhong2009}.  By combining
both technologies, researchers have the potential to decipher the regulatory
network of the TF. Genes bound by the TF and altered in expression are
considered direct targets of the TF, and genes altered in expression but not
bound by the TF are the indirect targets of the TF. The indirect targets can
potentially form a complex network itself, especially when the TF is a
master regulator who regulates other TFs. To facilitate identification of
the complex regulatory network of TFs and how indirect targets are
inter-connected, we have developed GeneNetworkBuilder (GNB). Each genereated
network is consisted of a directed acyclic graph with each edge representing
TF -> gene, TF -> miRNA, or miRNA -> gene where a -> b represents "a
regulates b". 

\section{Examples of using GeneNetworkBuilder}
To use GNB, users need to input a list of genes bound by a given
TF and another list of genes/miRNAs with altered expression by knockdown/knockout of the
same TF. The bound gene list could be obtained from ChIP-seq or ChIP-chip
experiment. The gene list with altered expression are from RNA-seq or
expression microarray experiment.  ChIP experiments and expression
experiments should preferably be performed in similar experimental condition
such as tissue type, development stage etc.  In addition, users need to
select a TF regulatory network from GNB or upload customized TF regulatory
network.

GNB provides two embedded regulatory networks. One is designed for \textit{Caenorhabditis elegans}
combining database EDGEdb\cite{Barrasa2007} and microCosm Targets\cite{Sam2008}. Database
MicroCosm Targets contains computationally predicted targets for miRNAs
across many species. EDGEdb contains experimentally determined interactions
of $\sim$934 worm TFs by high-throughput yeast on-hybrid (Y1H) assay. And the
other embedded regulatory network is designed for \textit{Homo sapiens} combining database FANTOM\cite{Ravasi2010},
miRGen\cite{Panagiotis2010} and microCosm Targets\cite{Sam2008}. FANTOM stores physical
interactions among the majority of human/mouse DNA-binding TFs. The miRGen
is an integrated database of miRNA regulation by TFs and miRNA targets mainly for human. 
Figure\ref{fig:workflow} depicts the relationships of various databases and
its role in GNB.

\begin{figure}[htb]
\centering
\includegraphics{workflow}
\caption{workflow of GeneNetworkBuilder}
\label{fig:workflow}
\end{figure}

\subsection{Quick start}
Here is an example to use GNB to generate a simple regulatory network for \textit{C. elegans}.
There are three steps, 
\begin{enumerate}
	\item buildNetwork, build the network by GNB-embedded or user-defined regulatory network 
starting from the bound gene list.
	\item filterNetwork, filter the network by differential expressed genes/miRNAs. 
	\item polishNetwork, generate the graphNEL object with display style.
\end{enumerate}

\begin{scriptsize}
<<>>=
##
library(GeneNetworkBuilder)
##load C. elegans miRNA ID lists
data("ce.miRNA.map")
##load GNB-embedded regulatory network of C. elegans.
data("ce.interactionmap")
##load data required
data("example.data")
##build the network by binding list and interaction map
sifNetwork<-buildNetwork(TFbindingTable=example.data$ce.bind, 
                        interactionmap=ce.interactionmap, level=2)
##filter the network by expression data
cifNetwork<-filterNetwork(rootgene="WBGene00000912", sifNetwork=sifNetwork, 
                    exprsData=uniqueExprsData(example.data$ce.exprData), 
                    mergeBy="symbols",
                    miRNAlist=as.character(ce.miRNA.map[ , 1]), 
                    remove_miRNA=FALSE, tolerance=1)
##generate graphNEL object for the network
gR<-polishNetwork(cifNetwork=cifNetwork, nodecolor=colorRampPalette(c("green", "yellow", "red"))(5))
@
\end{scriptsize}

\subsection{Example using gene expression profile}
Here is an example to use GNB to generate a simple regulatory network for 
\textit{C. elegans}. And also show some examples how to use the \textit{graphNEL} object 
for further analysis.

\begin{scriptsize}
<<>>=
library(GeneNetworkBuilder)
data("example.data")
##Initialize a binding matrix by TF and the related gene lists of TFBDs.
##For example, TF is daf-16, and the ChIP-chip result indicates that it can bind to 
##upstream regions of gene "zip-2", "zip-4", "nhr-3" and "nhr-66".
bind<-cbind(from="daf-16", to=c("zip-2", "zip-4", "nhr-3", "nhr-66"))
##For same gene, there are multple gene alias. In order to eliminate the possibility of 
##missing any interactions, convert the gene symbols to unique gene ids is important.
data("ce.IDsMap")
bind<-convertID(toupper(bind), IDsMap=ce.IDsMap, ByName=c("from", "to"))
##build the network by binding list and interaction map
data("ce.interactionmap")
sifNetwork<-buildNetwork(TFbindingTable=example.data$ce.bind, 
                        interactionmap=ce.interactionmap, level=2)
##filter the network by expression data
##For each gene id, it should have only single record for expression change.
unique.ce.microarrayData<-uniqueExprsData(example.data$ce.exprData,
                        method="Max", condenseName='logFC')
data("ce.miRNA.map")
cifNetwork<-filterNetwork(rootgene="WBGene00000912", sifNetwork=sifNetwork, 
                    exprsData=unique.ce.microarrayData, mergeBy="symbols",
                    miRNAlist=as.character(ce.miRNA.map[ , 1]), 
                    tolerance=1, cutoffPVal=0.01, cutoffLFC=1)
##convert the unique gene ids back to gene symbols
data("ce.mapIDs")
cifNetwork<-convertID(cifNetwork, ce.mapIDs, ByName=c("from","to"))
##generate graphNEL object for the network
gR<-polishNetwork(cifNetwork, nodecolor=colorRampPalette(c("green", "yellow", "red"))(10))
##plot the figure
library(Rgraphviz)
plotNetwork<-function(gR, layouttype="dot", ...){
    if(!is(gR,"graphNEL")) stop("gR must be a graphNEL object")
    if(!(GeneNetworkBuilder:::inList(layouttype, c("dot", "neato", "twopi", "circo", "fdp")))){
        stop("layouttype must be dot, neato, twopi, circo or fdp")
    }
    g1<-Rgraphviz::layoutGraph(gR, layoutType=layouttype, ...)
    renderGraph(g1)
}
plotNetwork(gR)
##output the GXL file
#library("XML")
#xml<-saveXML(toGXL(gR)$value())
#z<-textConnection(xml)
#cat(readLines(z, 8), sep="\n")
##calculate shortest path, ...
#library(RBGL)
#sp.between(gR,"daf-16","lam-2")
@
\end{scriptsize}

\begin{figure}[htb]
\centering
\includegraphics{ce_example}
\caption{Regulatory network of a \textit{C. elegans} example}
\label{fig:example_results}
\end{figure}

\subsection{Example using both gene and miRNA expression profile}
Using several advanced genomic technologies including micorarray profiling and miRNA sequencing, 
not only the gene expression profile, but also the miRNA expression profile can be obtained.
A more robust network can be built if miRNA expression profiling is available. Here is an example to build \textit{SOX2} response 
network for \textit{H. sapiens}. The data was downloaded from \href{http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3022822/?tool=pubmed}{BMC Genomics}\cite{xuefeng2011}.

\begin{scriptsize}
\begin{Schunk}
\begin{Sinput}
> library(GeneNetworkBuilder)
> data("hs.interactionmap")
> data("hs.miRNA.map")
> data("hs.IDsMap")
> data("hs.mapIDs")
> data("example.data")
> rootgene<-"6657"
> sifNetwork<-buildNetwork(example.data$hs.bind, hs.interactionmap, level=5)
> ##example.data$ce$exprData is the combination of gene/miRNA expression profile
> ##note, here should set the miRNAtol to TRUE
> cifNetwork<-filterNetwork(rootgene=rootgene, sifNetwork=sifNetwork, 
+                    exprsData=example.data$hs.exprData, mergeBy="symbols", 
+                    miRNAlist=as.character(hs.miRNA.map[,1]),
+                    tolerance=0, miRNAtol=TRUE)
> cifNetwork<-convertID(cifNetwork, hs.mapIDs, ByName=c("from","to"))
> gR<-polishNetwork(cifNetwork)
> ##plot the figure
> library(RCytoscape)
> cw <- new.CytoscapeWindow ('sox2', graph=gR)
> showGraphicsDetails (cw, TRUE)
> displayGraph(cw)
> layoutNetwork (cw, layout.name='force-directed')
> redraw (cw)
\end{Sinput}
\end{Schunk}
\end{scriptsize}

\begin{figure}[htb]
\centering
\includegraphics{hs_sample_rcytoscape}
\caption{The SOX2 regulatory network.}
\label{fig:sox2_results}
\end{figure}

\section{References}
\begin{thebibliography}{99}
\bibitem[Walhout et al. (2010)]{Walhout2010} Gene-centered regulatory networks, H. Efsun Arda and Albertha J.M. Walhout, Briefings in Functional Genomics, 9(1): 4-12 (2010) 
\bibitem[Peter (2009)]{Peter2009} ChIP-seq: advantages and challenges of a maturing technology, Peter J. Park, Nature Reviews Genetics, 10: 669-680 (2009)
\bibitem[Tae et al. (2006)]{Tae2006} Genome-Wide Analysis of Protein-DNA Interactions, Tae Hoon Kim and Bing Ren, Annual Review of Genomics and Human Genetics, 7: 81-102 (2006)
\bibitem[Kerstin et al. (2010)]{Kerstin2010} Chromatin immunoprecipitation (ChIP) of plant transcription factors followed by sequencing (ChIP-SEQ) or hybridization to whole genome arrays (ChIP-CHIP), Kerstin Kaufmann, Jose Mui$\tilde{n}$o, Magne $\phi$ster$\mathring{a}$s, Laurent Farinelli, Pawel Krajewski and Gerco C Angenent, Nature Protocols, 5: 457-472 (2010) 
\bibitem[Zhong et al. (2009)]{Zhong2009} RNA-Seq: a revolutionary tool for transcriptomics, Zhong Wang, Mark Gerstein and Michael Snyder, Nature Reviews Genetics, 10: 57-63 (2009)
\bibitem[Barrasa et al. (2007)]{Barrasa2007} EDGEdb: a transcription factor-DNA interaction database for the analysis of C. elegans differential gene expression, M Inmaculada Barrasa, Philippe Vaglio, Fabien Cavasino, Laurent Jacotot and Albertha JM Walhout, BMC Genomics, 8:21, (2007)
\bibitem[Sam et al. (2008)]{Sam2008} miRBase: tools for microRNA genomics, Sam Griffiths-Jones, Harpreet Kaur Saini, Stijn van Dongen and Anton J. Enright, Nucleic Acids Research, 36: D153-D158 (2008)
\bibitem[Ravasi et al. (2010)]{Ravasi2010} An atlas of combinatorial transcriptional regulation in mouse and man, the FANTOM consortium and RIKEN Omics Science Center, Cell, 140(5):744-752 (2010)
\bibitem[Panagiotis et al. (2010)]{Panagiotis2010} miRGen 2.0: a database of microRNA genomic information and regulation, Panagiotis Alexiou, Thanasis Vergoulis, Martin Gleditzsch, George Prekas, Theodore Dalamagas, Molly Megraw, Ivo Grosse, Timos Sellis and Artemis G. Hatzigeorgiou, Nucleic Acids Research, 38(suppl1):D137-D141, (2010) 
\bibitem[Xuefeng et al. (2011)]{xuefeng2011} The SOX2 response program in glioblastoma multiforme: an integrated ChIP-seq, expression microarray, and microRNA analysis, Xuefeng Fang, Jae-Geun Yoon, Lisha li, Wei Yu, Jiaofang Shao, Dasong Hua, Shu Zheng, Leroy Hood, David R Goodlett, Gregory Foltz and Biaoyang Lin, BMC Genomics, 12:11, (2011)
\end{thebibliography}

\section{Session Info}
<<>>=
sessionInfo()
@
\end{document}