## ----echo = FALSE, message = FALSE, warning = FALSE--------------------------- library(BiocStyle) ## ----load-libs, message = FALSE, warning = FALSE----------------------------- library(Site2Target) ## ----warning=FALSE, message=FALSE--------------------------------------------- # Read gene coordinates geneFile=system.file("extdata", "gene_expression.tsv", package="Site2Target") geneCoords <- Table2Granges(geneFile) # Read gene table geneTable <- read.table(geneFile, header=TRUE) # Read peak coordinates tfFile =system.file("extdata", "MEIS_binding.tsv", package="Site2Target") TFCoords <- Table2Granges(tfFile) # Predict targets pvals <- getTargetGenesPvals( geneCoordinates=geneCoords, sites=TFCoords, distance = 50000) topTargetNum <- 5 topTargetIndex <- order(pvals)[1:topTargetNum] # Make a data frame of peak targets pvalues and expression logFCs dfTopTarget <- data.frame(name=geneTable$name[topTargetIndex], pvalue=pvals[topTargetIndex], exprLogC=geneTable$logFC[topTargetIndex] ) dfTopTarget ## ----warning=FALSE, message=FALSE--------------------------------------------- # Read TAD regions TADsFile =system.file("extdata", "TADs.tsv", package="Site2Target") TADs <- Table2Granges(TADsFile) # Predict targets pvals <- getTargetGenesPvals(geneCoordinates=geneCoords, sites=TFCoords, givenRegions=TADs, distance = 1000000) topTargetNum <- 5 topTargetIndex <- order(pvals)[1:topTargetNum] # Make a data frame of peak targets pvalues and expression logFCs dfTopTarget <- data.frame(name=geneTable$name[topTargetIndex], pvalue=pvals[topTargetIndex], exprLogC=geneTable$logFC[topTargetIndex] ) dfTopTarget ## ----warning=FALSE, message=FALSE--------------------------------------------- # Read HiC interactions HiCFile =system.file("extdata", "HiC_intensities.tsv", package="Site2Target") HiCstr1 <- Table2Granges(HiCFile, chrColName="Strand1_chr", startColName="Strand1_start", endColName="Strand1_end") HiCstr2 <- Table2Granges(HiCFile, chrColName="Strand2_chr", startColName="Strand2_start", endColName="Strand2_end") # Predict targets pvals <- getTargetGenesPvalsWithDNAInteractions( geneCoordinates=geneCoords, sites=TFCoords, strand1=HiCstr1, strand2=HiCstr2) topTargetNum <- 5 topTargetIndex <- order(pvals)[1:topTargetNum] # Make a data frame of peak targets pvalues and expression logFCs dfTopTarget <- data.frame(name=geneTable$name[topTargetIndex], pvalue=pvals[topTargetIndex], exprLogC=geneTable$logFC[topTargetIndex] ) dfTopTarget ## ----warning=FALSE, message=FALSE--------------------------------------------- # Read MEIS binding intensities tfTable <- read.table(tfFile, header=TRUE) tfIntensities <- tfTable$intensities # Predict targets pvals <- getTargetGenesPvalsWithIntensities( geneCoordinates=geneCoords, sites=TFCoords, intensities=tfIntensities ) topTargetNum <- 5 topTargetIndex <- order(pvals)[1:topTargetNum] # Make a data frame of peak targets pvalues and expression logFCs dfTopTarget <- data.frame(name=geneTable$name[topTargetIndex], pvalue=pvals[topTargetIndex], exprLogC=geneTable$logFC[topTargetIndex] ) dfTopTarget ## ----warning=FALSE, message=FALSE--------------------------------------------- # Predict targets pvals <- getTargetGenesPvalsWithIntensitiesAndDNAInteractions( geneCoordinates=geneCoords, sites=TFCoords, intensities=tfIntensities, strand1=HiCstr1, strand2=HiCstr2 ) topTargetNum <- 5 topTargetIndex <- order(pvals)[1:topTargetNum] # Make a data frame of peak targets pvalues and expression logFCs dfTopTarget <- data.frame(name=geneTable$name[topTargetIndex], pvalue=pvals[topTargetIndex], exprLogC=geneTable$logFC[topTargetIndex] ) dfTopTarget ## ----message=FALSE, warning=FALSE--------------------------------------------- # Take genes with Log fold change larger than one geneDEIndices <- which((abs(geneTable$logFC)>1)==TRUE) indicesLen <- length(geneDEIndices) if(indicesLen >0) { geneTable <- geneTable[geneDEIndices,] geneCoords <- geneCoords[geneDEIndices] } geneDENames <- geneTable$name geneDElogFC <- geneTable$logFC geneCoordsDE <- geneCoords # Associate differential genes to TF binding by 50K distance statsDist <- genewiseAssociation(associationBy="distance", geneCoordinates=geneCoordsDE, geneNames=geneDENames, peakCoordinates=TFCoords, distance=50000, outFile="Gene_TF_50K" ) statsDist # Associate differential genes to TF binding by TADs (up to 1M bp) statsTADs <- genewiseAssociation(associationBy="regions", geneCoordinates=geneCoordsDE, geneNames=geneDENames, peakCoordinates=TFCoords, givenRegions=TADs, distance=1000000, outFile="Gene_TF_TADs" ) statsTADs # Associate differential genes to TF binding by HiC statsHiC <- genewiseAssociation(associationBy="DNAinteractions", geneCoordinates=geneCoordsDE, geneNames=geneDENames, peakCoordinates=TFCoords, distance=50000, strand1=HiCstr1, strand2=HiCstr2, outFile="Gene_TF_HiC" ) statsHiC ## ----message=FALSE, warning=FALSE--------------------------------------------- # Add log fold changes of gene expressoin addColumn2geneWiseAssociation(type="gene", name=geneDENames, columnName="Expr_logFC", column=geneDElogFC, inFile="Gene_TF_50K", outFile="Gene_TF_50K" ) # Add binding intensities of MEIS addColumn2geneWiseAssociation(type="peak", coordinates=TFCoords, columnName="Binding_Intensities", column=tfIntensities, inFile="Gene_TF_50K", outFile="Gene_TF_50K" ) ## ----message=FALSE, warning=FALSE--------------------------------------------- HiCTable <- read.table(HiCFile, header=TRUE) HiCintensities <- HiCTable$intensities addRelation2geneWiseAssociation(strand1=HiCstr1, strand2=HiCstr2, columnName="HiC_Intensities", column=HiCintensities, inFile="Gene_TF_50K", outFile="Gene_TF_50K" ) ## ----echo=FALSE, results='hide',message=FALSE--------------------------------- # Remove folders unlink("Gene_TF_50K", recursive = TRUE) unlink("Gene_TF_TADs", recursive = TRUE) unlink("Gene_TF_HiC", recursive = TRUE) ## ----sessionInfo-------------------------------------------------------------- sessionInfo()