with all surrogates
maintaing the logFC between treated and control
Let’s say you’re not convinced yet by the package, so you want to be conservative and remove correlations to all SUCs but not the logFC between conditions. In this case, center_SUCs = "per_sample"
.
Now we can compare the new Pearson correlation coefficients calculated from the assay counts_simple_persample
(after RUCova, upper triangle) to the original coefficients from the assay ``counts``` (lower triangle).
heatmap_compare_corr(sce_Cal33, name_assay_before = "counts", name_assay_after = "counts_simple_persample")

Log fold-changes between irradiated and control condition are kept (positive means higher in irradiated).
FC_before <- t(assay(sce_Cal33,"counts")) |>
as.tibble() |>
cbind(sce_Cal33@colData) |>
mutate_at(vars(x,m), asinh) |>
pivot_longer(names_to = "marker", values_to = "value", c(x,m)) |>
group_by(marker) |>
summarise(logFC = mean(value[dose=="10Gy"])-mean(value[dose=="0Gy"])) |>
mutate(data = "before RUCova")
FC_after <- t(assay(sce_Cal33,"counts_simple_persample")) |>
as.tibble() |> cbind(sce_Cal33@colData) |>
mutate_at(vars(x,m), asinh) |>
pivot_longer(names_to = "marker", values_to = "value", c(x,m)) |>
group_by(marker) |>
summarise(logFC = mean(value[dose=="10Gy"])-mean(value[dose=="0Gy"])) |>
ungroup() |>
mutate(data = "simple all, per sample")
rbind(FC_before,FC_after) |>
ggplot(aes(x = logFC, y = marker, fill = data)) +
geom_col(position = "dodge")

changing logFC between samples accordingly
As radiation changes the cell volume, we think differences in protein intensities between treated and control are confounded. Hence, we want to remove any difference that correlates with the SUCs (center_SUCs = "across_sample"
).
#> [1] "Fitting pH3"
#> [1] "Fitting IdU"
#> [1] "Fitting Cyclin_D1"
#> [1] "Fitting Cyclin_B1"
#> [1] "Fitting Ki.67"
#> [1] "Fitting pRb"
#> [1] "Fitting pH2A.X"
#> [1] "Fitting p.p53"
#> [1] "Fitting p.p38"
#> [1] "Fitting pChk2"
#> [1] "Fitting pCDC25c"
#> [1] "Fitting cCasp3"
#> [1] "Fitting cPARP"
#> [1] "Fitting pAkt"
#> [1] "Fitting pAkt_T308"
#> [1] "Fitting pMEK1.2"
#> [1] "Fitting pERK1.2"
#> [1] "Fitting pS6"
#> [1] "Fitting p4e.BP1"
#> [1] "Fitting pSmad1.8"
#> [1] "Fitting pSmad2.3"
#> [1] "Fitting pNFkB"
#> [1] "Fitting IkBa"
#> [1] "Fitting CXCL1"
#> [1] "Fitting Lamin_B1"
#> [1] "Fitting pStat1"
#> [1] "Fitting pStat3"
#> [1] "Fitting YAP"
#> [1] "Fitting NICD"
heatmap_compare_corr(sce_Cal33, name_assay_before = "counts", name_assay_after = "counts_simple_acrosssamples")

Log fold-changes between irradiated and control condition are modified accordingly (positive means higher in irradiated).
FC_before <- t(assay(sce_Cal33,"counts")) |>
as.tibble() |>
cbind(sce_Cal33@colData) |>
mutate_at(vars(x,m), asinh) |>
pivot_longer(names_to = "marker", values_to = "value", c(x,m)) |>
group_by(marker) |>
summarise(logFC = mean(value[dose=="10Gy"])-mean(value[dose=="0Gy"])) |>
mutate(data = "before RUCova")
FC_after <- t(assay(sce_Cal33,"counts_simple_acrosssamples")) |>
as.tibble() |> cbind(sce_Cal33@colData) |>
mutate_at(vars(x,m), asinh) |>
pivot_longer(names_to = "marker", values_to = "value", c(x,m)) |>
group_by(marker) |>
summarise(logFC = mean(value[dose=="10Gy"])-mean(value[dose=="0Gy"])) |>
ungroup() |>
mutate(data = "simple all, per sample")
rbind(FC_before,FC_after) |>
ggplot(aes(x = logFC, y = marker, fill = data)) +
geom_col(position = "dodge")

with PC1 only
Let’s imagine you want to be conservative and only remove correlations between markers and PC1 (of SUCs).
pca_cal33 <- t(assay(sce_Cal33,"counts")) |>
as.tibble() |>
cbind(sce_Cal33@colData) |>
select(x) |>
mutate_all(asinh) |>
mutate_all(scale) |>
prcomp()
Calculate and plot the variance explained by each PC:
tibble(perc = as.numeric(pca_cal33$sdev^2/sum(pca_cal33$sdev^2))*100,
PC = 1:length(pca_cal33$sdev)) |>
ggplot(aes(x = PC, y = perc, label = round(perc,1))) +
geom_col() +
geom_label()

Check the loadings of each PC:
as.data.frame(pca_cal33$rotation) |>
rownames_to_column("x") |>
pivot_longer(names_to = "PC", values_to = "loadings", -x) |>
ggplot(aes(x = loadings, y = x)) +
geom_col() +
facet_wrap(~PC, nrow = 1)

In this example, PC1 has positive loadings. Meaning PC1 will positively correlate with the markers, which is intuitive if we think of it as the cell size. In case for your data set, PC1 has negative loadings, you can just the direction for a more intuitive analysis:
pca_cal33$x |> as.data.frame() |> mutate(PC1 = -PC1) #variable not saved as not necessary here
Add the PCA to the sce object under the name “PCA”:
name_reduced_dim = "PCA"
reducedDim(sce_Cal33, name_reduced_dim) <- pca_cal33$x
Then, SUCs= "PC1"
and apply_asinh_SUCs = FALSE
, as asinh transformation is not necessary on PCs (it was applied on SUCs before PCA). This applies to all models.
#> [1] "Fitting pH3"
#> [1] "Fitting IdU"
#> [1] "Fitting Cyclin_D1"
#> [1] "Fitting Cyclin_B1"
#> [1] "Fitting Ki.67"
#> [1] "Fitting pRb"
#> [1] "Fitting pH2A.X"
#> [1] "Fitting p.p53"
#> [1] "Fitting p.p38"
#> [1] "Fitting pChk2"
#> [1] "Fitting pCDC25c"
#> [1] "Fitting cCasp3"
#> [1] "Fitting cPARP"
#> [1] "Fitting pAkt"
#> [1] "Fitting pAkt_T308"
#> [1] "Fitting pMEK1.2"
#> [1] "Fitting pERK1.2"
#> [1] "Fitting pS6"
#> [1] "Fitting p4e.BP1"
#> [1] "Fitting pSmad1.8"
#> [1] "Fitting pSmad2.3"
#> [1] "Fitting pNFkB"
#> [1] "Fitting IkBa"
#> [1] "Fitting CXCL1"
#> [1] "Fitting Lamin_B1"
#> [1] "Fitting pStat1"
#> [1] "Fitting pStat3"
#> [1] "Fitting YAP"
#> [1] "Fitting NICD"
If we regress-out any PCs and want to check the correlation coefficient, it is important we specify now the name for the heatmap function to include it: ``name_reduced_dim = “PCA”```.
heatmap_compare_corr(sce_Cal33, name_assay_before = "counts", name_assay_after = "counts_simple_PC1", name_reduced_dim = "PCA")

Log fold-changes between irradiated and control condition are modified accordingly (positive means higher in irradiated).
FC_before <- t(assay(sce_Cal33,"counts")) |>
as.tibble() |>
cbind(sce_Cal33@colData) |>
mutate_at(vars(x,m), asinh) |>
pivot_longer(names_to = "marker", values_to = "value", c(x,m)) |>
group_by(marker) |>
summarise(logFC = mean(value[dose=="10Gy"])-mean(value[dose=="0Gy"])) |>
mutate(data = "before RUCova")
FC_after <- t(assay(sce_Cal33,"counts_simple_PC1")) |>
as.tibble() |> cbind(sce_Cal33@colData) |>
mutate_at(vars(x,m), asinh) |>
pivot_longer(names_to = "marker", values_to = "value", c(x,m)) |>
group_by(marker) |>
summarise(logFC = mean(value[dose=="10Gy"])-mean(value[dose=="0Gy"])) |>
ungroup() |>
mutate(data = "simple all, per sample")
rbind(FC_before,FC_after) |>
ggplot(aes(x = logFC, y = marker, fill = data)) +
geom_col(position = "dodge")
