## ----style, eval=TRUE, echo=FALSE, results="asis"------------------------
BiocStyle::latex()

## ----env, include=FALSE, echo=FALSE, cache=FALSE-------------------------
library("knitr")
opts_chunk$set(fig.align = 'center', 
               fig.show = 'hold', 
               par = TRUE,
               prompt = TRUE,
               eval = TRUE,
               stop_on_error = 1L,
               comment = NA)
options(replace.assign = TRUE, 
        width = 55)

suppressPackageStartupMessages(library("MSnbase"))
suppressWarnings(suppressPackageStartupMessages(library("pRoloc")))
suppressPackageStartupMessages(library("pRolocdata"))
suppressPackageStartupMessages(library("class"))
set.seed(1)

## ----libraries-----------------------------------------------------------
library("MSnbase")
library("pRoloc")
library("pRolocdata")

## ----setcols-------------------------------------------------------------
setStockcol(paste0(getStockcol(), 70))

## ----readCsvData0--------------------------------------------------------
## The original data for replicate 1, available
## from the pRolocdata package
f0 <- dir(system.file("extdata", package = "pRolocdata"), 
          full.names = TRUE, pattern = "pr800866n_si_004-rep1.csv")
csv <- read.csv(f0)

## ----showOrgCsv----------------------------------------------------------
head(csv, n=3)

## ----readCsvData1, tidy = FALSE------------------------------------------
## The quantitation data, from the original data
f1 <- dir(system.file("extdata", package = "pRolocdata"), 
          full.names = TRUE, pattern = "exprsFile.csv")
exprsCsv <- read.csv(f1)
## Feature meta-data, from the original data
f2 <- dir(system.file("extdata", package = "pRolocdata"), 
          full.names = TRUE, pattern = "fdataFile.csv")
fdataCsv <- read.csv(f2)
## Sample meta-data, a new file
f3 <- dir(system.file("extdata", package = "pRolocdata"), 
          full.names = TRUE, pattern = "pdataFile.csv")
pdataCsv <- read.csv(f3)

## ----showExprsFile-------------------------------------------------------
head(exprsCsv, n=3)

## ----showFdFile----------------------------------------------------------
head(fdataCsv, n=3)

## ----showPdFile----------------------------------------------------------
pdataCsv

## ----makeMSnSet----------------------------------------------------------
tan2009r1 <- readMSnSet(exprsFile = f1,
                        featureDataFile = f2,
                        phenoDataFile = f3,
                        sep = ",")
tan2009r1

## ----showSubset----------------------------------------------------------
smallTan <- tan2009r1[1:5, 1:2]
dim(smallTan)
exprs(smallTan)

## ----rmtan, echo=FALSE---------------------------------------------------
## remove manullay constructred data
rm(tan2009r1)
data(tan2009r1)

## ----loadTan1------------------------------------------------------------
data(tan2009r1)

## ----lookAtTan-----------------------------------------------------------
getMarkers(tan2009r1, fcol = "markers.orig")
getMarkers(tan2009r1, fcol = "PLSDA")

## ----markers-------------------------------------------------------------
pRolocmarkers()
head(pRolocmarkers("dmel"))
table(pRolocmarkers("dmel"))

## ----realtiveQuants------------------------------------------------------
summary(rowSums(exprs(tan2009r1)))

## ----norm, echo=TRUE, message=FALSE, cache=TRUE--------------------------
## create a small illustrative test data
data(itraqdata)
itraqdata <- quantify(itraqdata, method = "trap", 
                      reporters = iTRAQ4, 
                      verbose = FALSE, parallel = FALSE)
## the quantification data
head(exprs(itraqdata), n = 3)
summary(rowSums(exprs(itraqdata)))
## normalising to the sum of feature intensitites
itraqnorm <- normalise(itraqdata, method = "sum")
processingData(itraqnorm)
head(exprs(itraqnorm), n = 3)
summary(rowSums(exprs(itraqnorm)))

## ----featurenames--------------------------------------------------------
head(featureNames(tan2009r1))

## ----showplotdist, echo=TRUE, eval=FALSE, prompt=FALSE-------------------
## ## indices of the mito markers
## j <- which(fData(tan2009r1)$markers.orig == "mitochondrion")
## ## indices of all proteins assigned to the mito
## i <- which(fData(tan2009r1)$PLSDA == "mitochondrion")
## plotDist(tan2009r1[i, ],
##          markers = featureNames(tan2009r1)[j])

## ----plotdist1, dev='pdf', fig.width=9, fig.height=7, echo=FALSE---------
par(mfrow = c(2,2), mar = c(4, 4, 2, 0.1))
cls <- getStockcol()[1:4]
plotDist(tan2009r1[which(fData(tan2009r1)$PLSDA == "mitochondrion"), ],
         markers = featureNames(tan2009r1)
         [which(fData(tan2009r1)$markers.orig == "mitochondrion")],
         mcol = cls[1])
title(main = "mitochondrion")
plotDist(tan2009r1[which(fData(tan2009r1)$PLSDA == "ER/Golgi"), ],
         markers = featureNames(tan2009r1)
         [which(fData(tan2009r1)$markers.orig == "ER")],
         mcol = cls[2])
title(main = "ER")
plotDist(tan2009r1[which(fData(tan2009r1)$PLSDA == "ER/Golgi"), ],
         markers = featureNames(tan2009r1)
         [which(fData(tan2009r1)$markers.orig == "Golgi")],
         mcol = cls[3])
title(main = "Golgi")
plotDist(tan2009r1[which(fData(tan2009r1)$PLSDA == "PM"), ],
         markers = featureNames(tan2009r1)
         [which(fData(tan2009r1)$markers.orig == "PM")],
         mcol = cls[4])
title(main = "PM")

## ----plot2d, dev='pdf', fig.width=5, fig.height=5, echo=TRUE-------------
plot2D(tan2009r1, fcol = "PLSDA", fpch = "markers.orig")
addLegend(tan2009r1, fcol = "PLSDA", 
          where = "bottomright", bty = "n", cex = .7)

## ----foi0----------------------------------------------------------------
foi1 <- FeaturesOfInterest(description = "Feats of interest 1",
                           fnames = featureNames(tan2009r1[1:10]))
description(foi1)
foi(foi1)

## ------------------------------------------------------------------------
foi2 <- FeaturesOfInterest(description = "Feats of interest 2",
                           fnames = featureNames(tan2009r1[880:888]))
foic <- FoICollection(list(foi1, foi2))
foic

## ----plotfoi, dev='pdf', fig.width=5, fig.height=5, echo=TRUE------------
plot2D(tan2009r1, fcol = "PLSDA")
addLegend(tan2009r1, fcol = "PLSDA", 
          where = "bottomright", bty = "n", cex = .7)
highlightOnPlot(tan2009r1, foi1, col = "black", lwd = 2)
highlightOnPlot(tan2009r1, foi2, col = "purple", lwd = 2)
legend("topright", c("FoI 1", "FoI 2"), bty = "n",
       col = c("black", "purple"), pch = 1)

## ----guiinstall, eval = FALSE--------------------------------------------
## library("BiocInstaller")
## biocLite("pRolocGUI")
## library("pRolocGUI")
## pRolocVis(tan2009r1)

## ----plot2dnull, dev='pdf', fig.width=5, fig.height=5, echo=TRUE---------
plot2D(tan2009r1, fcol = NULL)

## ----plotKmeans, dev='pdf', fig.width=8, fig.height=6, echo=TRUE---------
kcl <- MLearn( ~ ., tan2009r1,  kmeansI, centers=5)
plot(kcl, exprs(tan2009r1))

## ----plotHclust, dev='pdf', fig.width=8, fig.height=6, echo=TRUE, tidy = FALSE----
hcl <- MLearn( ~ ., tan2009r1,  
              hclustI(distFun = dist, cutParm = list(k = 5)))
plot(hcl, labels = FALSE)

## ----plotPam, dev='pdf', fig.width=8, fig.height=6, echo=TRUE------------
pcl <- MLearn( ~ ., tan2009r1,  pamI(dist), k = 5)
plot(pcl, data = exprs(tan2009r1))

## ----svmParamOptim, eval = FALSE, message = FALSE, tidy=FALSE------------
## params <- svmOptimisation(tan2009r1, fcol = "markers.orig",
##                           times = 10, xval = 5, verbose = FALSE)

## ----loadParams, echo = FALSE--------------------------------------------
fn <- dir(system.file("extdata", package = "pRoloc"), 
          full.names = TRUE, pattern = "params.rda")
load(fn)
rm(fn)

## ----showParams----------------------------------------------------------
params

## ----params, dev='pdf', fig.width=4, fig.height=4, echo=TRUE, out.width='.49\\linewidth'----
plot(params)
levelPlot(params)

## ----f1count-------------------------------------------------------------
f1Count(params)
getParams(params)

## ----svmRes0, warning=FALSE, eval = FALSE, tidy = FALSE------------------
## ## manual setting of parameters
## svmres <- svmClassification(tan2009r1, fcol = "markers.orig",
##                             sigma = 1, cost = 1)

## ----svmRes, warning=FALSE, tidy = FALSE---------------------------------
## using default best parameters
svmres <- svmClassification(tan2009r1, fcol = "markers.orig",
                            assessRes = params)
processingData(svmres)
tail(fvarLabels(svmres), 4)

## ----getPredictions------------------------------------------------------
p1 <- getPredictions(svmres, fcol = "svm")
minprob <- median(fData(svmres)$svm.scores)
p2 <- getPredictions(svmres, fcol = "svm", t = minprob)
table(p1, p2)

## ----predscoresPlot, dev='pdf', fig.width=6, fig.height=6, echo=TRUE, out.width='.55\\linewidth'----
boxplot(svm.scores ~ svm, data = fData(svmres), ylab = "SVM scores")
abline(h = minprob, lwd = 2, lty = 2)

## ----medscores1, tidy = FALSE--------------------------------------------
## including marker scores
(ts1 <- tapply(fData(svmres)$svm.scores, fData(svmres)$svm, median))

## ----medscores2, tidy = FALSE--------------------------------------------
## ignoring markers scores (i.e. scores == 1)
(ts2 <- tapply(fData(svmres)$svm.scores, fData(svmres)$svm,
               function(x) {
                   ## assuming scores of 1 are markers
                   scr <- median(x[x != 1])
                   ## in case no proteins were assigned to an organelle, 
                   ## scr would be NA. Setting these cases to 1.
                   ifelse(is.na(scr), 1, scr)
               }))

## ----medscorepreds, tidy = FALSE-----------------------------------------
getPredictions(svmres, fcol = "svm", t = ts2)

## ----svmresfig, dev='pdf', fig.width=5, fig.height=5, echo=TRUE----------
ptsze <- exp(fData(svmres)$svm.scores) - 1
plot2D(svmres, fcol = "svm", fpch = "markers.orig", cex = ptsze)
addLegend(svmres, fcol = "svm", 
          where = "bottomright", bty = "n", cex = .5)

## ----runPhenoDisco, eval=FALSE, warning=FALSE----------------------------
## pdres <- phenoDisco(tan2009r1, GS = 10, times = 100, fcol = "PLSDA")

## ----loadpdres, echo=FALSE-----------------------------------------------
fn <- dir(system.file("extdata", package = "pRoloc"), 
          full.names = TRUE, pattern = "pdres.rda")
load(fn)
rm(fn)

## ----phenoDiscoFvar------------------------------------------------------
processingData(pdres)
tail(fvarLabels(pdres), 3)

## ----pdresfig, dev='pdf', fig.width=5, fig.height=5, echo=TRUE-----------
plot2D(pdres, fcol = "pd")
addLegend(pdres, fcol = "pd", ncol = 2, 
          where = "bottomright", bty = "n", cex = .5)

## ----pdmarkers-----------------------------------------------------------
getMarkers(tan2009r1, fcol = "pd.markers")

## ----weights, eval = TRUE, echo = TRUE-----------------------------------
w <- table(fData(tan2009r1)[, "pd.markers"])
(w <- 1/w[names(w) != "unknown"])

## ----pdsvmParams, eval = FALSE, tidy = FALSE-----------------------------
## params2 <- svmOptimisation(tan2009r1, fcol = "pd.markers",
##                            times = 10, xval = 5,
##                            class.weights = w,
##                            verbose = FALSE)

## ----loadParams2, echo = FALSE-------------------------------------------
fn <- dir(system.file("extdata", package = "pRoloc"), 
          full.names = TRUE, pattern = "params2.rda")
load(fn)
rm(fn)

## ----pdsvm, cache = TRUE, message = FALSE, warning = FALSE, tidy = FALSE----
tan2009r1 <- svmClassification(tan2009r1, params2, 
                               class.weights = w,
                               fcol = "pd.markers")

## ----pdres2fig, dev='pdf', fig.width=6, fig.height=6, echo=TRUE, tidy=FALSE----
ptsze <- exp(fData(tan2009r1)$svm.scores) - 1
plot2D(tan2009r1, fcol = "svm", cex = ptsze)
addLegend(tan2009r1, fcol = "svm", where = "bottomright",
          ncol = 2, bty = "n", cex = .5)

## ----sessioninfo, results='asis', echo=FALSE-----------------------------
toLatex(sessionInfo())