### R code from vignette source 'flowQBVignettes.Rnw'
### Encoding: UTF-8

###################################################
### code chunk number 1: LoadPackage
###################################################
library("flowQB")
library("flowQBData")


###################################################
### code chunk number 2: FitLED
###################################################
## Example is based on LED data from the flowQBData package
fcs_directory <- system.file("extdata", "SSFF_LSRII", "LED_Series", 
    package="flowQBData")
fcs_file_path_list <- list.files(fcs_directory, "*.fcs", full.names= TRUE)
## We are working with these FCS files:
basename(fcs_file_path_list)

## Various house keeping information
## - Which channels should be ignored, typically the non-fluorescence
##    channels, such as the time and the scatter channels
ignore_channels <- c("Time", 
    "FSC-A", "FSC-W", "FSC-H", 
    "SSC-A", "SSC-W", "SSC-H")
## - Which dyes would you typically use with the detectors
dyes <- c("APC", "APC-Cy7", "APC-H7", "FITC", "PE", "PE-Cy7", "PerCP", 
    "PerCP-Cy55", "V450", "V500-C")
## - What are the corresponding detectors, provide a vector of short channel 
## names, i.e., values of the $PnN FCS keywords.
detectors <- c("APC-A", "APC-Cy7-A", "APC-Cy7-A", "FITC-A", "PE-A", "PE-Cy7-A",
    "PerCP-Cy5-5-A", "PerCP-Cy5-5-A", "Pacific Blue-A", "Aqua Amine-A")
## - The signal type that you are looking at (Area or Height)
signal_type <- "Area"
## - The instrument make/model
instrument_name <- 'LSRII'
## - Set the minimum and maximum values, peaks with mean outsize of this range
## will be ignored
bounds <- list(minimum = -100, maximum = 100000)
## - The minimum number of usable peaks (represented by different FCS files
## in case of an LED pulser) required in order for a fluorescence channel 
## to be included in the fitting. Peaks with mean expression outside of the 
## bounds specified above are omitted and therefore not considered useful.
## Fitting the three quadratic parameters requires three valid points to obtain
## a fit at all, and 4 or more points are needed to obtain error estimates. 
minimum_fcs_files <- 3
## - What is the maximum number of iterations for iterative fitting with
## weight adjustments
max_iterations <- 10 # The default 10 seems to be enough in typical cases

## Now, let's calculate the fitting
led_results <- fit_led(fcs_file_path_list, ignore_channels, dyes,
    detectors, signal_type, instrument_name, bounds = bounds,
    minimum_useful_peaks = minimum_fcs_files, max_iterations = max_iterations)


###################################################
### code chunk number 3: ExploreLEDPeakStats
###################################################
## We have stats for these channels
names(led_results$peak_stats)

## Explore the peak stats for a randomly chosen channel (PE-Cy7-A)
## Showing only the head in order to limit the output for the vignette
head(led_results$peak_stats$`PE-Cy7-A`)


###################################################
### code chunk number 4: ExploreBGStats
###################################################
## Explore bg_stats
led_results$bg_stats

## Explore dye_bg_stats
led_results$dye_bg_stats


###################################################
### code chunk number 5: ExploreFits
###################################################
## Explore dye_fits
## fits are the same rows but columns corresponding to all channels
led_results$dye_fits

## Explore iterated_dye_fits
## iterated_fits are the same rows but columns corresponding to all channels
led_results$iterated_dye_fits


###################################################
### code chunk number 6: ExploreIterationNumbers
###################################################
## Explore iteration numbers
## Showing only the head in order to limit the output for the vignette
head(led_results$iteration_numbers)


###################################################
### code chunk number 7: FitSpherotechExample
###################################################
## Example of fitting bead data based on Sph8 particle sets from Spherotech
fcs_file_path <- system.file("extdata", "SSFF_LSRII", "Other_Tests", 
    "933745.fcs", package="flowQBData")
scatter_channels <- c("FSC-A", "SSC-A")
## Depending on your hardware and input, this may take a few minutes, mainly
## due to the required clustering stage of the algorithm.
spherotech_results <- fit_spherotech(fcs_file_path, scatter_channels, 
    ignore_channels, dyes, detectors, bounds, 
    signal_type, instrument_name, minimum_useful_peaks = 3,
    max_iterations = max_iterations, logicle_width = 1.0)
## This is the same as if we were running
## fit_beads(fcs_file_path, scatter_channels, 
##     ignore_channels, 8, dyes, detectors, bounds, 
##     signal_type, instrument_name, minimum_useful_peaks = 3, 
##     max_iterations = max_iterations, logicle_width = 1.0)


###################################################
### code chunk number 8: VisualizeKmeans
###################################################
library("flowCore")
plot(
    exprs(spherotech_results$transformed_data[,"FITC-A"]),
    exprs(spherotech_results$transformed_data[,"Pacific Blue-A"]),
    col=spherotech_results$peak_clusters$cluster, pch='.')


###################################################
### code chunk number 9: ExploreBeadPeakStats
###################################################
## We have stats for these channels
names(spherotech_results$peak_stats)

## Explore the peak stats for a randomly chosen channel (PE-Cy7-A)
spherotech_results$peak_stats$`PE-Cy7-A`


###################################################
### code chunk number 10: ExploreFits2
###################################################
## Explore fits
## Selecting just a few columns to limit the output for the vignette
spherotech_results$fits[,c(1,3,5,7)]

## Explore iterated_fits
spherotech_results$iterated_fits[,c(1,3,5,7)]


###################################################
### code chunk number 11: ExploreIterationNumbers2
###################################################
## Explore iteration numbers
## Showing only the head in order to limit the output for the vignette
head(spherotech_results$iteration_numbers)


###################################################
### code chunk number 12: QBFromFits
###################################################
## 1 QB from both quadratic and linear fits of LED data
qb_from_fits(led_results$iterated_dye_fits)
## 2 QB from quadratic fitting of bead data
qb_from_fits(spherotech_results$iterated_dye_fits)


###################################################
### code chunk number 13: XSLTExample
###################################################
## Example of fitting based on house-keeping information in a spreadsheet
library(xlsx)

## LED Fitting first
inst_xlsx_path <- system.file("extdata", 
    "140126_InstEval_Stanford_LSRIIA2.xlsx", package="flowQBData")
xlsx <- read.xlsx(inst_xlsx_path, 1, headers=FALSE, stringsAsFactors=FALSE)

ignore_channels_row <- 9
ignore_channels <- vector()
i <- 1
while(!is.na(xlsx[[i+4]][[ignore_channels_row]])) {
    ignore_channels[[i]] <- xlsx[[i+4]][[ignore_channels_row]]
    i <- i + 1
}
    
instrument_folder_row <- 9
instrument_folder_col <- 2
instrument_folder <- xlsx[[instrument_folder_col]][[instrument_folder_row]]
folder_column <- 18
folder_row <- 14
folder <- xlsx[[folder_column]][[folder_row]]

fcs_directory <- system.file("extdata", instrument_folder, 
    folder, package="flowQBData")
fcs_file_path_list <- list.files(fcs_directory, "*.fcs", full.names= TRUE)

bounds_min_col <- 6
bounds_min_row <- 7
bounds_max_col <- 7
bounds_max_row <- 7
bounds <- list()

if (is.na(xlsx[[bounds_min_col]][[bounds_min_row]])) {
    bounds$minimum <- -100
} else {
    bounds$minimum <- as.numeric(xlsx[[bounds_min_col]][[bounds_min_row]])
}
if (is.na(xlsx[[bounds_max_col]][[bounds_max_row]])) {
    bounds$maximum <- 100000
} else {
    bounds$maximum <- as.numeric(xlsx[[bounds_max_col]][[bounds_max_row]])
}

signal_type_col <- 3
signal_type_row <- 19
signal_type <- xlsx[[signal_type_col]][[signal_type_row]]
    
instrument_name_col <- 2
instrument_name_row <- 5
instrument_name <- xlsx[[instrument_name_col]][[instrument_name_row]]

channel_cols <- 3:12
dye_row <- 11
detector_row <- 13
dyes <- as.character(xlsx[dye_row,channel_cols])
detectors <- as.character(xlsx[detector_row,channel_cols])

## Now we do the LED fitting
led_results <- fit_led(fcs_file_path_list, ignore_channels, dyes,
    detectors, signal_type, instrument_name, bounds = bounds,
    minimum_useful_peaks = 3, max_iterations = 10)

led_results$iterated_dye_fits
qb_from_fits(led_results$iterated_dye_fits)

## Next we do the bead fitting; this example is with Thermo-fisher beads
folder_column <- 17
folder <- xlsx[[folder_column]][[folder_row]]
filename <- xlsx[[folder_column]][[folder_row+1]]

fcs_file_path <- system.file("extdata", instrument_folder, folder, 
    filename, package="flowQBData")

thermo_fisher_results <- fit_thermo_fisher(fcs_file_path, scatter_channels,
    ignore_channels, dyes, detectors, bounds, 
    signal_type, instrument_name, minimum_useful_peaks = 3, 
    max_iterations = 10, logicle_width = 1.0)
## The above is the same as this:
## N_peaks <- 6
## thermo_fisher_results <- fit_beads(fcs_file_path, scatter_channels,
##     ignore_channels, N_peaks, dyes, detectors, bounds, 
##     signal_type, instrument_name, minimum_useful_peaks = 3, 
##     max_iterations = 10, logicle_width = 1.0)

thermo_fisher_results$iterated_dye_fits
qb_from_fits(thermo_fisher_results$iterated_dye_fits)


###################################################
### code chunk number 14: FlowRepositoryRExample (eval = FALSE)
###################################################
## library("FlowRepositoryR")
## ## 1) Specify your credentials to work with FlowRepository, this will not
## ##    be required once the data is publicly available; see the 
## ##    FlowRepositoryR vignette for further details.
## setFlowRepositoryCredentials(email="your@email.com", password="password")
## ##
## ## 2) Get the dataset from FlowRepository
## ##    Note that this does not download the data. You could download all
## ##    data by running 
## ##    qbDataset <- download(flowRep.get("FR-FCM-ZZTF"))
## ##    but note that this will download about 3 GB of data
## qbDataset <- flowRep.get("FR-FCM-ZZTF")
## ##
## summary(qbDataset)
## ## A flowRepData object (FlowRepository dataset) Asilomar Instrument 
## ## Standardization
## ## 911 FCS files, 36 attachments, NOT downloaded
## ##
## ## 3) See which of the files are MS Excell spredsheets
## spreadsheet_indexes <- which(unlist(lapply(qbDataset@attachments, 
##     function(a) { endsWith(a@name, ".xlsx") })))
## ##
## ## 4) Download a random spreadsheet, say the 5th spreadsheet
## ## This is a bit ugly at this point since the data is not public
## ## plus we don't want to download everythink, just one of the files
## library(RCurl)
## h <- getCurlHandle(cookiefile="")
## FlowRepositoryR:::flowRep.login(h)
## ## Once the data is public, only this line and without the curlHandle
## ## argument will be needed:
## qbDataset@attachments[[spreadsheet_indexes[5]]] <- xslx5 <- download(
##     qbDataset@attachments[[spreadsheet_indexes[5]]], curlHandle=h)
## ## File 140126_InstEval_NIH_Aria_B2.xlsx downloaded.
## ##
## ## 5) Read the spreadsheet
## library(xlsx)
## xlsx <- read.xlsx(xslx5@localpath, 1, headers=FALSE, stringsAsFactors=FALSE)
## ##
## ## 6) Which FCS file contains the Spherotech data?
## ##    This is based on how we create the Excel spreadsheets and 
## ##    the FCS file names.
## instrument_row <- 9
## instrument_col <- 2
## instrument_name <- xlsx[[instrument_folder_col]][[instrument_folder_row]]
## fol_col <- 16
## fol_row <- 14
## fol_name <- xlsx[[fol_col]][[fol_row]]
## fcsFilename <- paste(instrument_name, fol_name, 
##     xlsx[[fol_col]][[fol_row+1]], sep="_")
## fcsFilename
## ## [1] "NIH Aria_B 140127_Other Tests_BEADS_Spherotech Rainbow1X_012.fcs"
## ##
## ## 7) Let's locate the file in our dataset and let's download it
## ##    Again, the curlHandle is only needed since the file is not public yet
## fcsFileIndex = which(unlist(lapply(qbDataset@fcs.files, function(f) { 
##     f@name == fcsFilename })))
## qbDataset@fcs.files[[fcsFileIndex]] <- fcsFile <- download(
##     qbDataset@fcs.files[[fcsFileIndex]], curlHandle=h)
## # File NIH Aria_B 140127_Other Tests_BEADS_Spherotech Rainbow1X_012.fcs 
## # downloaded.
## ##
## ## 8) Read in some more metadata from the spreadsheet
## scatter_channels <- c(
##     xlsx[[fol_col]][[fol_row+2]], 
##     xlsx[[fol_col]][[fol_row+3]])
## ignore_channels_row <- 9
## ignore_channels <- vector()
## i <- 1
## while(!is.na(xlsx[[i+4]][[ignore_channels_row]])) {
##     ignore_channels[[i]] <- xlsx[[i+4]][[ignore_channels_row]]
##     i <- i + 1
## }
## bounds_min_col <- 6
## bounds_min_row <- 7
## bounds_max_col <- 7
## bounds_max_row <- 7
## bounds <- list()
## if (is.na(xlsx[[bounds_min_col]][[bounds_min_row]])) {
##     bounds$minimum <- -100
## } else {
##     bounds$minimum <- as.numeric(xlsx[[bounds_min_col]][[bounds_min_row]])
## }
## if (is.na(xlsx[[bounds_max_col]][[bounds_max_row]])) {
##     bounds$maximum <- 100000
## } else {
##     bounds$maximum <- as.numeric(xlsx[[bounds_max_col]][[bounds_max_row]])
## }
## signal_type_col <- 3
## signal_type_row <- 19
## signal_type <- xlsx[[signal_type_col]][[signal_type_row]]
## channel_cols <- 3:12
## dye_row <- 11
## detector_row <- 13
## dyes <- as.character(xlsx[dye_row,channel_cols])
## detectors <- as.character(xlsx[detector_row,channel_cols])
## ##
## ## 9) Let's calculate the fits
## multipeak_results <- fit_spherotech(fcsFile@localpath, scatter_channels, 
##     ignore_channels, dyes, detectors, bounds, 
##     signal_type, instrument_name)
## ##
## ## 10) And same as before, we can extract Q, B and CV0 from the fits
## qb_from_fits(multipeak_results$iterated_dye_fits)
## #         APC          APC-Cy7      APC-H7       FITC        PE         
## # q_QI    0.2406655    0.1291517    0.1291517    0.2034718   0.9014326  
## # q_BSpe  34.33642     21.47796     21.47796     21.57055    812.5968   
## # q_CV0sq 0.0006431427 0.0004931863 0.0004931863 0.001599552 0.001065658
## #         PE-Cy7      PerCP       PerCP-Cy55  V450        V500-C     
## # q_QI    0.2754679   0.08987149  0.08987149  0.07051216  0.192678   
## # q_BSpe  11.15762    8.167175    8.167175    24.81647    63.4188    
## # q_CV0sq 0.001286111 0.001622208 0.001622208 0.005127177 0.004315592


###################################################
### code chunk number 15: LEDFCSFileIndexesExample (eval = FALSE)
###################################################
## LEDfcsFileIndexes <- which(unlist(lapply(qbDataset@fcs.files, function(f) 
## { 
##     f@name %in% paste(instrument_name, xlsx[14, 3:12], xlsx[15, 3:12], sep="_")
## })))
## # [1] 173 174 175 176 177 178 179 180 181 182


###################################################
### code chunk number 16: LEDExampleRepositoryContinued (eval = FALSE)
###################################################
## dir <- tempdir()
## cwd <- getwd()
## setwd(dir)
## lapply(LEDfcsFileIndexes, function(i) {
##     qbDataset@fcs.files[[i]] <- download(qbDataset@fcs.files[[i]], 
##         curlHandle=h)
## })
## setwd(cwd)
## fcs_file_path_list <- list.files(dir, "*.fcs", full.names= TRUE)
## 
## led_results <- fit_led(fcs_file_path_list, ignore_channels, dyes,
##     detectors, signal_type, instrument_name, bounds = bounds)


###################################################
### code chunk number 17: SessionInfo
###################################################
## This vignette has been created with the following configuration
sessionInfo()