## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"------------------ BiocStyle::latex() ## ----installing, eval=FALSE,hide=TRUE, message=FALSE,include=FALSE------- ## install.packages(devtools) ## library(devtools); ## devtools::install_github("lijingya/ELMER"); ## ----install, eval=FALSE------------------------------------------------- ## source("http://bioconductor.org/biocLite.R") ## biocLite("ELMER") ## ----example.data.run, echo=FALSE, hide=TRUE, message=FALSE, include=FALSE---- if(!file_test("-d", "./ELMER.example")) { URL <- "https://dl.dropboxusercontent.com/u/61961845/ELMER.example.tar.gz" SYS <- Sys.info()[['sysname']] if(SYS == "Darwin"){ download.file(URL,destfile = "ELMER.example.tar.gz",method= "curl") }else{ download.file(URL,destfile = "ELMER.example.tar.gz",method= "wget", extra = c("--no-check-certificate -a download.log")) } untar("./ELMER.example.tar.gz") } library(ELMER, quietly = TRUE) ## ----example.data, eval=FALSE-------------------------------------------- ## #Example file download from URL: https://dl.dropboxusercontent.com/u/61961845/ELMER.example.tar.gz ## URL <- "https://dl.dropboxusercontent.com/u/61961845/ELMER.example.tar.gz" ## download.file(URL,destfile = "ELMER.example.tar.gz",method= "curl") ## untar("./ELMER.example.tar.gz") ## library(ELMER) ## ----tcga.pipe, cache=TRUE----------------------------------------------- TCGA.pipe("LUSC",wd="./ELMER.example",cores=detectCores()/2,permu.size=300,Pe=0.01, analysis = c("distal.probes","diffMeth","pair","motif","TF.search"), diff.dir="hypo",rm.chr=paste0("chr",c("X","Y"))) ## ----dna.methylation.data, cache=TRUE------------------------------------ load("./ELMER.example/Result/LUSC/LUSC_meth_refined.rda") Meth[1:10, 1:2] ## ----gene.expression.data, cache=TRUE------------------------------------ load("./ELMER.example/Result/LUSC/LUSC_RNA_refined.rda") GeneExp[1:10, 1:2] ## ----sample.information, cache=TRUE-------------------------------------- mee <- fetch.mee(meth=Meth, exp=GeneExp, TCGA=TRUE) head(getSample(mee)) ## ----probe.information, cache=TRUE--------------------------------------- probe <- getAnnotation(IlluminaHumanMethylation450kanno.ilmn12.hg19, what="Locations") probe <- GRanges(seqnames=probe$chr, ranges=IRanges(probe$pos, width=1, names=rownames(probe)), strand=probe$strand, name=rownames(probe)) mee <- fetch.mee(meth=Meth, exp=GeneExp, TCGA=TRUE, probeInfo=probe) getProbeInfo(mee) ## ----gene.information, cache=TRUE---------------------------------------- geneAnnot <- txs() ## In TCGA expression data, geneIDs were used as the rowname for each row. However, numbers ## can't be the rownames, "ID" was added to each gene id functioning as the rowname. ## If your geneID is consistent with the rownames of the gene expression matrix, adding "ID" ## to each geneID can be skipped. geneAnnot$GENEID <- paste0("ID",geneAnnot$GENEID) geneInfo <- promoters(geneAnnot,upstream = 0, downstream = 0) save(geneInfo,file="./ELMER.example/Result/LUSC/geneAnnot.rda") mee <- fetch.mee(meth=Meth, exp=GeneExp, TCGA=TRUE, geneInfo=geneInfo) getGeneInfo(mee) ## ----mee.data, cache=TRUE------------------------------------------------ mee <- fetch.mee(meth=Meth, exp=GeneExp, TCGA=TRUE, probeInfo=probe, geneInfo=geneInfo) mee ## ----selection.of.probes.within.biofeatures, cache=TRUE------------------ #get distal enhancer probes that are 2kb away from TSS and overlap with REMC and FANTOM5 #enhancers on chromosome 1 Probe <- get.feature.probe(rm.chr=paste0("chr",c(2:22,"X","Y"))) save(Probe,file="./ELMER.example/Result/LUSC/probeInfo_feature_distal.rda") ## ----identifying.differentially.methylated.probes, cache=TRUE------------ ## fetch.mee can take path as input. mee <- fetch.mee(meth="./ELMER.example/Result/LUSC/LUSC_meth_refined.rda", exp="./ELMER.example/Result/LUSC/LUSC_RNA_refined.rda", TCGA=TRUE, probeInfo="./ELMER.example/Result/LUSC/probeInfo_feature_distal.rda", geneInfo="./ELMER.example/Result/LUSC/geneAnnot.rda") sig.diff <- get.diff.meth(mee, cores=detectCores()/2, dir.out ="./ELMER.example/Result/LUSC", diff.dir="hypo", pvalue = 0.01) sig.diff[1:10,] ## significantly hypomethylated probes # get.diff.meth automatically save output files. # getMethdiff.hypo.probes.csv contains statistics for all the probes. # getMethdiff.hypo.probes.significant.csv contains only the significant probes which # is the same with sig.diff dir(path = "./ELMER.example/Result/LUSC", pattern = "getMethdiff") ## ----identifying.putative.probe.gene.pairs, cache=TRUE------------------- ### identify target gene for significantly hypomethylated probes. Sig.probes <- read.csv("./ELMER.example/Result/LUSC/getMethdiff.hypo.probes.significant.csv", stringsAsFactors=FALSE)[,1] head(Sig.probes) # significantly hypomethylated probes ## Collect nearby 20 genes for Sig.probes nearGenes <-GetNearGenes(TRange=getProbeInfo(mee,probe=Sig.probes), geneAnnot=getGeneInfo(mee),cores=detectCores()/2) ## Identify significant probe-gene pairs Hypo.pair <-get.pair(mee=mee,probes=Sig.probes,nearGenes=nearGenes, permu.dir="./ELMER.example/Result/LUSC/permu",permu.size=300,Pe = 0.01, dir.out="./ELMER.example/Result/LUSC",cores=detectCores()/2,label= "hypo") head(Hypo.pair) ## significant probe-gene pairs # get.pair automatically save output files. #getPair.hypo.all.pairs.statistic.csv contains statistics for all the probe-gene pairs. #getPair.hypo.pairs.significant.csv contains only the significant probes which is # same with Hypo.pair. dir(path = "./ELMER.example/Result/LUSC", pattern = "getPair") ## ----motif.enrichment.analysis.on.selected.probes, cache=TRUE------------ ### identify enriched motif for significantly hypomethylated probes which ##have putative target genes. Sig.probes.paired <- read.csv("./ELMER.example/Result/LUSC/getPair.hypo.pairs.significant.csv", stringsAsFactors=FALSE)[,1] head(Sig.probes.paired) # significantly hypomethylated probes with putative target genes enriched.motif <-get.enriched.motif(probes=Sig.probes.paired, dir.out="./ELMER.example/Result/LUSC", label="hypo", min.incidence = 10,lower.OR = 1.1) names(enriched.motif) # enriched motifs head(enriched.motif["TP53"]) ## probes in the given set that have TP53 motif. # get.enriched.motif automatically save output files. # getMotif.hypo.enriched.motifs.rda contains enriched motifs and the probes with the motif. # getMotif.hypo.motif.enrichment.csv contains summary of enriched motifs. dir(path = "./ELMER.example/Result/LUSC", pattern = "getMotif") # motif enrichment figure will be automatically generated. dir(path = "./ELMER.example/Result/LUSC", pattern = "motif.enrichment.pdf") ## ----identifying.regulatory.tf, cache=TRUE------------------------------- ### identify regulatory TF for the enriched motifs load("./ELMER.example/Result/LUSC/getMotif.hypo.enriched.motifs.rda") TF <- get.TFs(mee=mee, enriched.motif=enriched.motif,dir.out="./ELMER.example/Result/LUSC", cores=detectCores()/2, label= "hypo") # get.TFs automatically save output files. # getTF.hypo.TFs.with.motif.pvalue.rda contains statistics for all TF with average # DNA methylation at sites with the enriched motif. # getTF.hypo.significant.TFs.with.motif.summary.csv contains only the significant probes. dir(path = "./ELMER.example/Result/LUSC", pattern = "getTF") # TF ranking plot based on statistics will be automatically generated. dir(path = "./ELMER.example/Result/LUSC/TFrankPlot", pattern = "pdf") ## ----figure1,fig.width=11, fig.height=10,fig.scap="Scatter Plot of 20 nearby genes",cache=TRUE---- scatter.plot(mee,byProbe=list(probe=c("cg19403323"),geneNum=20), category="TN", dir.out ="./ELMER.example/Result/LUSC", save=FALSE) ## ----figure2, fig.height=4,fig.scap="Scatter Plot of One Pair", cache=TRUE---- scatter.plot(mee,byPair=list(probe=c("cg19403323"),gene=c("ID255928")), category="TN", save=FALSE,lm_line=TRUE) ## ----figure3, fig.height=4, fig.scap="TF expression vs. average DNA methylation", cache=TRUE---- load("ELMER.example/Result/LUSC/getMotif.hypo.enriched.motifs.rda") scatter.plot(mee,byTF=list(TF=c("TP53","TP63","TP73"), probe=enriched.motif[["TP53"]]), category="TN", save=FALSE,lm_line=TRUE) ## ----makepair, cache=TRUE------------------------------------------------ # Make a "Pair" object for schematic.plot pair <- fetch.pair(pair="./ELMER.example/Result/LUSC/getPair.hypo.pairs.significant.withmotif.csv", probeInfo = "./ELMER.example/Result/LUSC/probeInfo_feature_distal.rda", geneInfo = "./ELMER.example/Result/LUSC/geneAnnot.rda") ## ------------------------------------------------------------------------ schematic.plot(pair=pair, byProbe="cg19403323",save=TRUE) ## ------------------------------------------------------------------------ schematic.plot(pair=pair, byGene="ID255928",save=TRUE) ## ----figure6, fig.height=2,fig.scap="Motif Enrichment", cache=TRUE------- motif.enrichment.plot(motif.enrichment="./ELMER.example/Result/LUSC/getMotif.hypo.motif.enrichment.csv", significant=list(OR=1.3,lowerOR=1.3), dir.out ="ELMER.example/Result/LUSC", label="hypo", save=FALSE) ## different signficant cut off. ## ------------------------------------------------------------------------ load("./ELMER.example/Result/LUSC/getTF.hypo.TFs.with.motif.pvalue.rda") TF.rank.plot(motif.pvalue=TF.meth.cor, motif="TP53", TF.label=list(TP53=c("TP53","TP63","TP73")), save=TRUE) ## ----finalsession, cache=TRUE-------------------------------------------- sessionInfo()