## ----setup, echo=FALSE, results='hide', warning=FALSE, error=FALSE, message=FALSE, cache=FALSE----
library(knitr)
opts_chunk$set(
  cache = FALSE,
  echo = TRUE,
  warning = FALSE,
  error = FALSE,
  message = FALSE
)

## ------------------------------------------------------------------------
library(airway)
library(DESeq2)
data(airway)

# import data to DESeq2 and variance stabilize
dset = DESeqDataSetFromMatrix(assay(airway),
    colData=as.data.frame(colData(airway)), design=~dex)
dset = estimateSizeFactors(dset)
dset = estimateDispersions(dset)
gene_expr = getVarianceStabilizedData(dset)

# annotate matrix with HGNC symbols
library(biomaRt)
mart = useDataset("hsapiens_gene_ensembl", useMart("ensembl"))
genes = getBM(attributes = c("ensembl_gene_id","hgnc_symbol"),
              values=rownames(gene_expr), mart=mart)
matched = match(rownames(gene_expr), genes$ensembl_gene_id)
rownames(gene_expr) = genes$hgnc_symbol[matched]

## ------------------------------------------------------------------------
library(progeny)
pathways = progeny(gene_expr, scale=FALSE)
controls = airway$dex == "untrt"
ctl_mean = apply(pathways[controls,], 2, mean)
ctl_sd = apply(pathways[controls,], 2, sd)
pathways = t(apply(pathways, 1, function(x) x - ctl_mean))
pathways = apply(pathways, 1, function(x) x / ctl_sd)

## ------------------------------------------------------------------------
library(dplyr)
result = apply(pathways, 1, function(x) {
    broom::tidy(lm(x ~ !controls)) %>%
        filter(term == "!controlsTRUE") %>%
        select(-term)
})
mutate(bind_rows(result), pathway=names(result))

## ------------------------------------------------------------------------
# set up a file cache so we download only once
library(BiocFileCache)
bfc = BiocFileCache(".")
# gene expression and drug response
base = "http://www.cancerrxgene.org/gdsc1000/GDSC1000_WebResources/Data/"
paths = bfcrpath(bfc, paste0(base, c("suppData/TableS4A.xlsx",
            "preprocessed/Cell_line_RMA_proc_basalExp.txt.zip")))

## ------------------------------------------------------------------------
# load the downloaded files
drug_table = readxl::read_excel(paths[1], skip=5)
gene_table = readr::read_tsv(paths[2])

# we need drug response with COSMIC IDs
drug_response = data.matrix(drug_table[,3:ncol(drug_table)])
rownames(drug_response) = drug_table[[1]]

# we need genes in rows and samples in columns
gene_expr = data.matrix(gene_table[,3:ncol(gene_table)])
colnames(gene_expr) = sub("DATA.", "", colnames(gene_expr), fixed=TRUE)
rownames(gene_expr) = gene_table$GENE_SYMBOLS

## ------------------------------------------------------------------------
library(progeny)
pathways = progeny(gene_expr)

## ------------------------------------------------------------------------
head(pathways)

## ------------------------------------------------------------------------
cell_lines = intersect(rownames(pathways), rownames(drug_response))
trametinib = drug_response[cell_lines, "Trametinib"]
mapk = pathways[cell_lines, "MAPK"]

associations = lm(trametinib ~ mapk)
summary(associations)

## ----echo=FALSE----------------------------------------------------------
sessionInfo()