## ----LibraryPreload, message=FALSE---------------------------------------
library(Risa)
library(xcms)
library(CAMERA)
library(pcaMethods)

## ----settings------------------------------------------------------------
# prefilter <- c(3,200)  ## standard
prefilter=c(6,750)      ## quick-run for debugging
nSlaves=4

## ----rISA, cache=TRUE----------------------------------------------------

ISAmtbls2 <- readISAtab(find.package("mtbls2"))
a.filename <- ISAmtbls2["assay.filenames"][[1]]

msfiles <- getAssayRawDataFilenames(ISAmtbls2@assay.tabs[[1]], full.path = TRUE)[,1]
adf <- getAnnotatedDataFrameAssay(ISAmtbls2, assay.filename = a.filename)


## ----PeakPicking, cache=TRUE, warning=FALSE------------------------------

cwp <- CentWaveParam(ppm = 25, peakwidth = c(20, 50), snthresh = 10,
  prefilter = c(3, 100), mzCenterFun = "wMean", integrate = 1L,
  mzdiff = -0.001, fitgauss = FALSE, noise = 0, verboseColumns = FALSE,
  roiList = list(), firstBaselineCheck = TRUE, roiScales = numeric())

raw_data <- readMSData(msfiles, mode = "onDisk")

## Perform the peak detection using the settings defined above.
mtbls2 <- findChromPeaks(raw_data, param = cwp, BPPARAM = bpparam())

pData(mtbls2) <- pData(adf)
          

head(chromPeaks(mtbls2))


## ----xcmsSet-------------------------------------------------------------
show(mtbls2)

## ----retcor--------------------------------------------------------------

## Perform the peak grouping with default settings:
pdp <- PeakDensityParam(sampleGroups = as.integer(interaction(pData(mtbls2), drop=TRUE)))

mtbls2grouped <- groupChromPeaks(mtbls2, pdp)

pgp <- PeakGroupsParam(minFraction = 1, extraPeaks = 1, smooth = "loess",
  span = 0.2, family = "gaussian")

mtbls2groupedretcor <- adjustRtime(mtbls2grouped, param = pgp)

## Visualize the impact of the alignment. We show both versions of the plot,
## with the raw retention times on the x-axis (top) and with the adjusted
## retention times (bottom).
par(mfrow = c(2, 1))
plotAdjustedRtime(mtbls2groupedretcor, adjusted = FALSE)
grid()
plotAdjustedRtime(mtbls2groupedretcor)
grid()


## ----QCintensity---------------------------------------------------------
boxplot(featureValues(mtbls2grouped, value="into") +1, 
        #col=as.numeric(sampclass(mtbls2Set))+1, 
        log="y", las=2)

## ----fillPeaks-----------------------------------------------------------

mtbls2groupedFilled <- fillChromPeaks(mtbls2grouped)


## ----plotQC--------------------------------------------------------------
#plotQC(mtbls2Set)

## ----QCPCA, fig.show='hold'----------------------------------------------
sdThresh <- 4.0 ## Filter low-standard deviation rows for plot
data <- log(featureValues(mtbls2groupedFilled))+1

pca.result <- pca(data, nPcs=3)
plotPcs(pca.result, type="loadings", 
        #col=as.numeric(sampclass(mtbls2Set))+1
        )


## ----CAMERA, warning=FALSE, results='hide'-------------------------------

## Since CAMERA has not yet been ported to XCMSnExp,
## we need to convert to xcmsSet. Note that 
## the conversion only makes sense for somple XCMSnSets, 
## without e.g. MS level filtering (where CAMERA would then 
## extract the wrong )
mtbls2Set <- as(mtbls2groupedFilled, "xcmsSet")
mtbls2Set <- fillPeaks(mtbls2Set)
##
## Now do the normal CAMERA workflow:
##
an <- xsAnnotate(mtbls2Set,
                 sample=seq(1,length(sampnames(mtbls2Set))),
                 nSlaves=nSlaves)

an <- groupFWHM(an)
an <- findIsotopes(an)  # optional but recommended.
an <- groupCorr(an,
                graphMethod="lpc",
                calcIso = TRUE,
                calcCiS = TRUE,
                calcCaS = TRUE,
                cor_eic_th=0.5)

## Setup ruleSet
rs <- new("ruleSet")

rs@ionlistfile <- file.path(find.package("mtbls2"), "lists","ions.csv")
rs@neutraladditionfile <- file.path(find.package("mtbls2"), "lists","neutraladdition.csv")
rs@neutrallossfile <- file.path(find.package("mtbls2"), "lists","neutralloss.csv")

rs <- readLists(rs)
rs <- setDefaultParams(rs)
rs <- generateRules(rs)

an <- findAdducts(an,
                  rules=rs@rules,
                  polarity="positive")
  

## ----diffreport----------------------------------------------------------

dr <- diffreport(mtbls2Set, sortpval=FALSE, filebase="mtbls2diffreport", eicmax=20 )
cspl <- getPeaklist(an)

annotatedDiffreport <- cbind(dr, cspl)


## ----diffreportPspec-----------------------------------------------------
interestingPspec <- tapply(seq(1, nrow(annotatedDiffreport)),
                               INDEX=annotatedDiffreport[,"pcgroup"],
                               FUN=function(x, a) {m <- median(annotatedDiffreport[x, "pvalue"]);
                                                   p <- max(annotatedDiffreport[x, "pcgroup"]);
                                                   as.numeric(c(pvalue=m,pcgroup=p))},
                               annotatedDiffreport)

interestingPspec <- do.call(rbind, interestingPspec)
colnames(interestingPspec) <- c("pvalue", "pcgroup") 

o <- order(interestingPspec[,"pvalue"])

pdf("interestingPspec.pdf")
dummy <- lapply(interestingPspec[o[1:40], "pcgroup"],
                function(x) {suppressWarnings(plotPsSpectrum(an, pspec=x, maxlabel=5))})
dev.off()


## ----assembleMAF---------------------------------------------------------

pl <- annotatedDiffreport 

charge <- sapply(an@isotopes, function(x) {
  ifelse( length(x) > 0, x$charge, NA) 
})
abundance <- groupval(an@xcmsSet, value="into")


##
## load ISA assay files
## 

a.samples <- ISAmtbls2["samples.per.assay.filename"][[ a.filename ]]

##
## These columns are defined by mzTab
##

maf.std.colnames <- c("identifier", "chemical_formula", "description",
"mass_to_charge", "fragmentation", "charge", "retention_time",
"taxid", "species", "database", "database_version", "reliability",
"uri", "search_engine", "search_engine_score", "modifications",
"smallmolecule_abundance_sub", "smallmolecule_abundance_stdev_sub",
"smallmolecule_abundance_std_error_sub")

##
## Plus the columns for the sample intensities
##
all.colnames <- c(maf.std.colnames, a.samples)

##
## Now assemble new maf
##

l <- nrow(pl)

maf <- data.frame(identifier = character(l), 
                  chemical_formula = character(l), 
                  description = character(l), 
                  mass_to_charge = pl$mz, 
                  fragmentation = character(l), 
                  charge = charge, 
                  retention_time = pl$rt, 
                  taxid = character(l), 
                  species = character(l), 
                  database = character(l), 
                  database_version = character(l), 
                  reliability = character(l), 
                  uri = character(l), 
                  search_engine = character(l), 
                  search_engine_score = character(l),
                  modifications = character(l), 
                  smallmolecule_abundance_sub = character(l),
                  smallmolecule_abundance_stdev_sub = character(l), 
                  smallmolecule_abundance_std_error_sub = character(l),
                  abundance, stringsAsFactors=FALSE)

## ----exportMAF-----------------------------------------------------------

##
## Make sure maf table is quoted properly, 
## and add to the ISAmtbls2 assay file.
## 
maf_character <- apply(maf, 2, as.character)

write.table(maf_character, 
            file=paste(tempdir(), "/a_mtbl2_metabolite profiling_mass spectrometry_maf.csv", sep=""),
            row.names=FALSE, col.names=all.colnames, 
            quote=TRUE, sep="\t", na="\"\"")

ISAmtbls2 <- updateAssayMetadata(ISAmtbls2, a.filename,
             "Metabolite Assignment File",
	     paste(tempdir(), "/a_mtbl2_metabolite profiling_mass spectrometry_maf.csv", sep=""))

write.assay.file(ISAmtbls2, a.filename)


## ----sessionInfo---------------------------------------------------------
sessionInfo()