## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", out.width = "100%" ) ## ----eval=FALSE--------------------------------------------------------------- # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("CCAFE") ## ----eval=FALSE--------------------------------------------------------------- # if(!require(devtools, quietly = TRUE)) { # install.packages("devtools") # } # # devtools::install_github("https://github.com/wolffha/CCAFE") ## ----echo=TRUE,message=FALSE-------------------------------------------------- library(CCAFE) library(tidyverse) ## ----CaseControl_AF example--------------------------------------------------- # load the data data("sampleDat") sampleDat <- as.data.frame(sampleDat) results_af <- CaseControl_AF(data = sampleDat, N_case = 16550, N_control = 403923, OR_colname = "OR", AF_total_colname = "true_maf_pop") head(results_af) ## ----eval=TRUE---------------------------------------------------------------- # plot the results # first need minor allele frequency to compare to true_maf_case results_af$MAF_case <- sapply(results_af$AF_case, function(x) ifelse(x > 0.5, 1-x, x)) results_af <- as.data.frame(results_af) ggplot(results_af, aes(x = true_maf_case, y = MAF_case)) + geom_point() + geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") + theme_bw() + ggtitle("Example CaseControl_AF()") ## ----CaseControl_SE example - no correction----------------------------------- # load the data data("sampleDat") # always ensure data is a dataframe before inputting to CCAFE methods sampleDat <- as.data.frame(sampleDat) # First run without correction results_se_noCorr <- CaseControl_SE(data = sampleDat, N_case = 16550, N_control = 403923, OR_colname = "OR", SE_colname = "SE", chromosome_colname = "CHR", position_colname = "POS", do_correction = FALSE, sex_chromosomes = FALSE) head(results_se_noCorr) ## ----eval=TRUE---------------------------------------------------------------- # plot the results ggplot(results_se_noCorr, aes(x = true_maf_case, y = MAF_case)) + geom_point() + geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") + theme_bw() + coord_cartesian(ylim = c(0,.5)) + ggtitle("Example CaseControl_SE() No Correction") ## ----CaseControl_SE example - correction-------------------------------------- # load the data data("sampleDat") # always ensure data is a dataframe before inputting to CCAFE methods sampleDat <- as.data.frame(sampleDat) corr_data <- data.frame(CHR = sampleDat$CHR, POS = sampleDat$POS, proxy_MAF = sampleDat$gnomad_maf) # now run with correction results_se_corr <- CaseControl_SE(data = sampleDat, N_case = 16550, N_control = 403923, OR_colname = "OR", SE_colname = "SE", chromosome_colname = "CHR", position_colname = "POS", do_correction = TRUE, correction_data = corr_data, sex_chromosomes = FALSE) head(results_se_corr) ## ----eval=TRUE---------------------------------------------------------------- # plot the results ggplot(results_se_corr, aes(x = true_maf_case, y = MAF_case_adj)) + geom_point() + geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") + theme_bw() + ggtitle("Example CaseControl_SE() With Correction") ## ----eval=FALSE--------------------------------------------------------------- # BiocManager::install("VariantAnnotation") ## ----------------------------------------------------------------------------- suppressWarnings(suppressPackageStartupMessages({ library(VariantAnnotation) library(tidyverse) })) ## ----------------------------------------------------------------------------- data("vcf_sample") ## ----------------------------------------------------------------------------- samples(header(vcf_sample)) ## ----------------------------------------------------------------------------- vcf_sample ## ----------------------------------------------------------------------------- # first we will get the info from GRanges object (position, RSID) meta <- as.data.frame(ranges(vcf_sample)) meta <- meta[,c(1, 4)] colnames(meta) <- c("Position", "RSID") # get the chromosome (as a) meta$Chromosome <- as.vector(seqnames(rowRanges(vcf_sample))) # now we can also get the meta data (REF, ALT) from the GRanges object meta <- cbind(meta, mcols(vcf_sample)[,c(2,3)]) rownames(meta) <- seq(1, nrow(meta)) # now we will get the info from the geno object geno_dat <- data.frame( beta = unlist(geno(vcf_sample)$ES), SE = unlist(geno(vcf_sample)$SE), AF = unlist(geno(vcf_sample)$AF) ) df_data <- cbind(meta, geno_dat) head(df_data) ## ----------------------------------------------------------------------------- df_data$OR <- exp(df_data$beta) ## ----------------------------------------------------------------------------- df_data <- CaseControl_AF(data = df_data, N_case = 48286, N_control = 250671, OR_colname = "OR", AF_total_colname = "AF") head(df_data) ## ----------------------------------------------------------------------------- plotdata_AF <- df_data %>% dplyr::select(AF, AF_case, AF_control) plotdata_AF$AF_total <- (plotdata_AF$AF_case*48286 + plotdata_AF$AF_control*250671)/(298857) colnames(plotdata_AF)[1] <- "AF(known)" plotdata_AF_long <- pivot_longer(plotdata_AF, cols = colnames(plotdata_AF)) ## ----------------------------------------------------------------------------- ggplot(plotdata_AF_long, aes(x = value, fill = name)) + geom_boxplot(alpha = 0.5, outliers = FALSE) + facet_wrap(~name, nrow = 4) + theme_bw() ## ----CCAFE_convertVCF Demo---------------------------------------------------- df_data_2 <- CCAFE_convertVCF(vcf_sample) ## ----------------------------------------------------------------------------- df_data_2 <- CaseControl_AF(data = df_data_2, N_case = 48286, N_control = 250671, OR_colname = "OR", AF_total_colname = "AF") head(df_data_2) ## ----------------------------------------------------------------------------- sessionInfo()