## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ---- eval=FALSE--------------------------------------------------------------
#  if(!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("randRotation")

## ----message=FALSE------------------------------------------------------------
library(randRotation)
set.seed(0)
# Dataframe of phenotype data (sample information)
pdata <- data.frame(batch = as.factor(rep(1:3, c(10,10,10))),
                    phenotype = rep(c("Control", "Cancer"), c(5,5)))
features <- 1000

# Matrix with random gene expression data
edata <- matrix(rnorm(features * nrow(pdata)), features)
rownames(edata) <- paste("feature", 1:nrow(edata))

xtabs(data = pdata)

## -----------------------------------------------------------------------------
mod1 <- model.matrix(~1+phenotype, pdata)
head(mod1)

## -----------------------------------------------------------------------------
rr <- initBatchRandrot(Y = edata, X = mod1, coef.h = 2, batch = pdata$batch)

## -----------------------------------------------------------------------------
statistic <- function(Y, batch, mod){
  # (I) Batch effect correction with "Combat" from the "sva" package
  Y <- sva::ComBat(dat = Y, batch = batch, mod = mod)
  
  # (II) Linear model fit
  fit1 <- lm.fit(x = mod, y = t(Y))
  abs(t(coef(fit1))[,2])
}

## ----message=FALSE, results='hide'--------------------------------------------
rs1 <- rotateStat(initialised.obj = rr, R = 10, statistic = statistic,
                   batch = pdata$batch, mod = mod1)

## -----------------------------------------------------------------------------
rs1

## -----------------------------------------------------------------------------
p.vals <- pFdr(rs1)
hist(p.vals, col = "lightgreen");abline(h = 100, col = "blue", lty = 2)
qqunif(p.vals)

## -----------------------------------------------------------------------------
edata.combat <- sva::ComBat(dat = edata, batch = pdata$batch, mod = mod1)
fit1 <- lm.fit(x = mod1, y = t(edata.combat))

# t-statistics
var.beta <- diag(solve(t(mod1)%*%mod1))
s2 <- colSums(resid(fit1)^2) / df.residual(fit1)
t1 <- t(coef(fit1) / sqrt(var.beta)) / sqrt(s2)

# P-values from t-statistics
p.vals.nonrot <- 2*pt(abs(t1), df.residual(fit1), lower.tail = FALSE)
p.vals.nonrot <- p.vals.nonrot[,2]

hist(p.vals.nonrot, col = "lightgreen");abline(h = 100, col = "blue", lty = 2)
qqunif(p.vals.nonrot)
plot(p.vals, p.vals.nonrot, log = "xy", pch = 20)
abline(0,1, col = 4, lwd = 2)

## -----------------------------------------------------------------------------
# Specification of the full model
mod1 <- model.matrix(~1+phenotype, pdata)

# We select "phenotype" as the coefficient associated with H0
# All other coefficients are considered as "determined" coefficients
rr <- initRandrot(Y = edata, X = mod1, coef.h = 2)

coefs <- function(Y, mod){
  t(coef(lm.fit(x = mod, y = t(Y))))
}

# Specification of the H0 model
mod0 <- model.matrix(~1, pdata)

coef01 <- coefs(edata, mod0)
coef02 <- coefs(randrot(rr), mod0)

head(cbind(coef01, coef02))

all.equal(coef01, coef02)

## -----------------------------------------------------------------------------
coef11 <- coefs(edata, mod1)
coef12 <- coefs(randrot(rr), mod1)

head(cbind(coef11, coef12))

## ----message=FALSE, results='hold'--------------------------------------------
library(limma)

mod1 = model.matrix(~phenotype, pdata)

## -----------------------------------------------------------------------------
# Linear model fit
fit0 <- lmFit(edata, mod1)
fit0 <- eBayes(fit0)

# P values for phenotype coefficient
p0 <- topTable(fit0, coef = 2, number = Inf, adjust.method = "none",
               sort.by = "none")$P.Value
hist(p0, freq = FALSE, col = "lightgreen", breaks = seq(0,1,0.1))
abline(1,0, col = "blue", lty = 2)
qqunif(p0)

## -----------------------------------------------------------------------------
library(sva)
edata.combat = ComBat(edata, batch = pdata$batch, mod = mod1)

## -----------------------------------------------------------------------------
# Linear model fit
fit1 <- lmFit(edata.combat, mod1)
fit1 <- eBayes(fit1)

# P value for phenotype coefficient
p.combat <- topTable(fit1, coef = 2, number = Inf, adjust.method = "none",
                     sort.by = "none")$P.Value
hist(p.combat, freq = FALSE, col = "lightgreen", breaks = seq(0,1,0.1))
abline(1,0, col = "blue", lty = 2)
qqunif(p.combat)

## -----------------------------------------------------------------------------
init1 <- initBatchRandrot(edata, mod1, coef.h = 2, batch = pdata$batch)

## -----------------------------------------------------------------------------
statistic <- function(Y, batch, mod, coef){
  Y.tmp <- sva::ComBat(dat = Y, batch = batch, mod = mod)

  fit1 <- limma::lmFit(Y.tmp, mod)
  fit1 <- limma::eBayes(fit1)
  # The "abs" is needed for "pFdr" to calculate 2-tailed statistics
  abs(fit1$t[,coef])
}

## ----message=FALSE, results='hide'--------------------------------------------
res1 <- rotateStat(initialised.obj = init1, R = 10, statistic = statistic,
                    batch = pdata$batch, mod = mod1, coef = 2)

## -----------------------------------------------------------------------------
p.rot <- pFdr(res1)
head(p.rot)

hist(p.rot, freq = FALSE, col = "lightgreen", breaks = seq(0,1,0.1))
abline(1,0, col = "blue", lty = 2)
qqunif(p.rot)

## -----------------------------------------------------------------------------
plot(density(log(p.rot/p0)), col = "salmon", "Log p ratios",
     panel.first = abline(v=0, col = "grey"),
     xlim = range(log(c(p.rot/p0, p.combat/p0))))
lines(density(log(p.combat/p0)), col = "blue")
legend("topleft", legend = c("log(p.combat/p0)", "log(p.rot/p0)"),
       lty = 1, col = c("blue", "salmon"))

## -----------------------------------------------------------------------------
plot(p0, p.combat, log = "xy", pch = 20, col = "lightblue", ylab = "")
points(p0, p.rot, pch = 20, col = "salmon")
abline(0,1, lwd = 1.5, col = "black")
legend("topleft", legend = c("p.combat", "p.rot"), pch = 20,
       col = c("lightblue", "salmon"))

## -----------------------------------------------------------------------------
plot(density(log(p.combat/p.rot)), col = "blue",
     main = "log(p.combat / p.rot )", panel.first = abline(v=0, col = "grey"))

## -----------------------------------------------------------------------------
fdr.q  <- pFdr(res1, "fdr.q")
fdr.qu <- pFdr(res1, "fdr.q")
fdr.BH <- pFdr(res1, "BH")

FDRs <- cbind(fdr.q, fdr.qu, fdr.BH)
ord1 <- order(res1$s0, decreasing = TRUE)

FDRs.sorted <- FDRs[ord1,]

matplot(FDRs.sorted, type = "l", lwd = 2)
legend("bottomright", legend = c("fdr.q", "fdr.qu", "BH"), lty = 1:5, lwd = 2,
       col = 1:6)

head(FDRs.sorted)


## -----------------------------------------------------------------------------
mapping <- function(Y) abs(Y)
df_estimate(edata, mapping = mapping)
ncol(edata)

## -----------------------------------------------------------------------------
mapping <- function(Y) floor(Y)
df_estimate(edata, mapping = mapping)

## -----------------------------------------------------------------------------
mapping <- function(Y, batch, mod) {
  limma::removeBatchEffect(Y, batch = batch, design = mod)
}

df_estimate(edata, mapping = mapping, batch = pdata$batch, mod = mod1)

## -----------------------------------------------------------------------------
sessionInfo()