## ----setup, include=FALSE------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_knit$set(progress = FALSE)

## ----message=FALSE, warning=FALSE, include=FALSE-------------------------
library(TCGAbiolinks)
library(SummarizedExperiment)
library(dplyr)
library(DT)

## ----results='hide', echo=TRUE, message=FALSE, warning=FALSE-------------
clinical <- GDCquery_clinic(project = "TCGA-LUAD", type = "clinical")

## ----echo=TRUE, message=FALSE, warning=FALSE-----------------------------
datatable(clinical, filter = 'top', 
          options = list(scrollX = TRUE, keys = TRUE, pageLength = 5),  
          rownames = FALSE)

## ----results = 'hide',echo=TRUE, message=FALSE, warning=FALSE------------
query <- GDCquery(project = "TCGA-COAD", 
                  data.category = "Clinical", 
                  file.type = "xml", 
                  barcode = c("TCGA-RU-A8FL","TCGA-AA-3972"))
GDCdownload(query)
clinical <- GDCprepare_clinic(query, clinical.info = "patient")

## ----echo = TRUE, message = FALSE, warning = FALSE-----------------------
datatable(clinical, options = list(scrollX = TRUE, keys = TRUE), rownames = FALSE)

## ----results = 'hide', echo=TRUE, message=FALSE, warning=FALSE-----------
clinical.drug <- GDCprepare_clinic(query, clinical.info = "drug")

## ----echo = TRUE, message = FALSE, warning = FALSE-----------------------
datatable(clinical.drug, options = list(scrollX = TRUE, keys = TRUE), rownames = FALSE)

## ----results = 'hide', echo=TRUE, message=FALSE, warning=FALSE-----------
clinical.radiation <- GDCprepare_clinic(query, clinical.info = "radiation")

## ----echo = TRUE, message = FALSE, warning = FALSE-----------------------
datatable(clinical.radiation, options = list(scrollX = TRUE,  keys = TRUE), rownames = FALSE)

## ----results = 'hide', echo=TRUE, message=FALSE, warning=FALSE-----------
clinical.admin <- GDCprepare_clinic(query, clinical.info = "admin")

## ----echo = TRUE, message = FALSE, warning = FALSE-----------------------
datatable(clinical.admin, options = list(scrollX = TRUE, keys = TRUE), rownames = FALSE)

## ----results = 'hide', echo=TRUE, message=FALSE, warning=FALSE,eval = F----
#  query <- GDCquery(project = "TCGA-COAD",
#                    data.category = "Other",
#                    legacy = TRUE,
#                    access = "open",
#                    data.type = "Auxiliary test",
#                    barcode = c("TCGA-AD-A5EJ","TCGA-DM-A0X9"))
#  GDCdownload(query)
#  msi_results <- GDCprepare_clinic(query, "msi")

## ----echo=TRUE, message=FALSE, warning=FALSE-----------------------------
datatable(msi_results, options = list(scrollX = TRUE, keys = TRUE))

## ----results = 'hide', echo=TRUE, message=FALSE, warning=FALSE-----------
# Tissue slide image files
query <- GDCquery(project = "TCGA-COAD", 
                  data.category = "Clinical", 
                  data.type = "Tissue slide image",
                  legacy = TRUE,
                  barcode = c("TCGA-RU-A8FL","TCGA-AA-3972")) 

## ----echo=TRUE, message=FALSE, warning=FALSE-----------------------------
query %>% getResults %>% datatable(options = list(scrollX = TRUE, keys = TRUE))

## ----results = 'hide', echo=TRUE, message=FALSE, warning=FALSE-----------
# Pathology report
query <- GDCquery(project = "TCGA-COAD", 
                  data.category = "Clinical", 
                  data.type = "Pathology report",
                  legacy = TRUE,
                  barcode = c("TCGA-RU-A8FL","TCGA-AA-3972"))  

## ----echo=TRUE, message=FALSE, warning=FALSE-----------------------------
query %>% getResults %>% datatable(options = list(scrollX = TRUE, keys = TRUE))

## ----results = 'hide', echo=TRUE, message=FALSE, warning=FALSE, eval=FALSE----
#  # Tissue slide image
#  query <- GDCquery(project = "TCGA-COAD",
#                    data.category = "Clinical",
#                    data.type = "Tissue slide image",
#                    legacy = TRUE,
#                    barcode = c("TCGA-RU-A8FL","TCGA-AA-3972"))

## ----echo=TRUE, message=FALSE, warning=FALSE-----------------------------
query %>% getResults %>% datatable(options = list(scrollX = TRUE, keys = TRUE))

## ----results = 'hide', echo=TRUE, message=FALSE, warning=FALSE-----------
# Clinical Supplement
query <- GDCquery(project = "TCGA-COAD", 
                  data.category = "Clinical", 
                  data.type = "Clinical Supplement",
                  legacy = TRUE,
                  barcode = c("TCGA-RU-A8FL","TCGA-AA-3972")) 

## ----echo=TRUE, message=FALSE, warning=FALSE-----------------------------
query %>% getResults %>% datatable(options = list(scrollX = TRUE, keys = TRUE))

## ----results = 'hide', echo=TRUE, message=FALSE, warning=FALSE-----------
# Clinical data
query <- GDCquery(project = "TCGA-COAD", 
                  data.category = "Clinical", 
                  data.type = "Clinical data",
                  legacy = TRUE,
                  file.type = "txt")  

## ----echo=TRUE, message=FALSE, warning=FALSE-----------------------------
query %>% getResults %>% select(-matches("cases"))%>% datatable(options = list(scrollX = TRUE, keys = TRUE))

## ----results = 'hide', echo=TRUE, message=FALSE, warning=FALSE, eval = FALSE----
#  GDCdownload(query)
#  clinical.biotab <- GDCprepare(query)

## ----echo=TRUE, message=FALSE, warning=FALSE-----------------------------
names(clinical.biotab)
datatable(clinical.biotab$clinical_radiation_coad, options = list(scrollX = TRUE, keys = TRUE))

## ----results = 'hide', echo=TRUE, message=FALSE, warning=FALSE-----------
# Get XML files and parse them
clin.query <- GDCquery(project = "TCGA-READ", data.category = "Clinical", file.type = "xml", barcode = "TCGA-F5-6702")
GDCdownload(clin.query)
clinical.patient <- GDCprepare_clinic(clin.query, clinical.info = "patient")
clinical.patient.followup <- GDCprepare_clinic(clin.query, clinical.info = "follow_up")

# Get indexed data
clinical.index <- GDCquery_clinic("TCGA-READ")

## ----echo = TRUE, message = FALSE, warning = FALSE-----------------------
dplyr::select(clinical.patient,vital_status,days_to_death,days_to_last_followup) %>% datatable
dplyr::select(clinical.patient.followup, vital_status,days_to_death,days_to_last_followup) %>% datatable
# Vital status should be the same in the follow up table 
dplyr::filter(clinical.index,submitter_id == "TCGA-F5-6702") %>% dplyr::select(vital_status,days_to_death,days_to_last_follow_up) %>% datatable

## ----results = 'hide', echo=TRUE, message=FALSE, warning=FALSE-----------
# Get XML files and parse them
recurrent.samples <- GDCquery(project = "TCGA-LIHC",
                             data.category = "Transcriptome Profiling",
                             data.type = "Gene Expression Quantification", 
                             workflow.type = "HTSeq - Counts",
                             sample.type = 	"Recurrent Solid Tumor")$results[[1]] %>% select(cases)
recurrent.patients <- unique(substr(recurrent.samples$cases,1,12))
clin.query <- GDCquery(project = "TCGA-LIHC", data.category = "Clinical", file.type = "xml", barcode = recurrent.patients)
GDCdownload(clin.query)
clinical.patient <- GDCprepare_clinic(clin.query, clinical.info = "patient") 

## ----echo = TRUE, message = FALSE, warning = FALSE-----------------------
# Get indexed data
GDCquery_clinic("TCGA-LIHC") %>% dplyr::filter(submitter_id %in% recurrent.patients) %>% 
    dplyr::select(progression_or_recurrence,days_to_recurrence,tumor_grade) %>% datatable

# XML data
clinical.patient %>% dplyr::select(bcr_patient_barcode,neoplasm_histologic_grade) %>% datatable


## ---- eval = TRUE--------------------------------------------------------
bar <- c("TCGA-G9-6378-02A-11R-1789-07", "TCGA-CH-5767-04A-11R-1789-07",  
         "TCGA-G9-6332-60A-11R-1789-07", "TCGA-G9-6336-01A-11R-1789-07",
         "TCGA-G9-6336-11A-11R-1789-07", "TCGA-G9-7336-11A-11R-1789-07",
         "TCGA-G9-7336-04A-11R-1789-07", "TCGA-G9-7336-14A-11R-1789-07",
         "TCGA-G9-7036-04A-11R-1789-07", "TCGA-G9-7036-02A-11R-1789-07",
         "TCGA-G9-7036-11A-11R-1789-07", "TCGA-G9-7036-03A-11R-1789-07",
         "TCGA-G9-7036-10A-11R-1789-07", "TCGA-BH-A1ES-10A-11R-1789-07",
         "TCGA-BH-A1F0-10A-11R-1789-07", "TCGA-BH-A0BZ-02A-11R-1789-07",
         "TCGA-B6-A0WY-04A-11R-1789-07", "TCGA-BH-A1FG-04A-11R-1789-08",
         "TCGA-D8-A1JS-04A-11R-2089-08", "TCGA-AN-A0FN-11A-11R-8789-08",
         "TCGA-AR-A2LQ-12A-11R-8799-08", "TCGA-AR-A2LH-03A-11R-1789-07",
         "TCGA-BH-A1F8-04A-11R-5789-07", "TCGA-AR-A24T-04A-55R-1789-07",
         "TCGA-AO-A0J5-05A-11R-1789-07", "TCGA-BH-A0B4-11A-12R-1789-07",
         "TCGA-B6-A1KN-60A-13R-1789-07", "TCGA-AO-A0J5-01A-11R-1789-07",
         "TCGA-AO-A0J5-01A-11R-1789-07", "TCGA-G9-6336-11A-11R-1789-07",
         "TCGA-G9-6380-11A-11R-1789-07", "TCGA-G9-6380-01A-11R-1789-07",
         "TCGA-G9-6340-01A-11R-1789-07", "TCGA-G9-6340-11A-11R-1789-07")

S <- TCGAquery_SampleTypes(bar,"TP")
S2 <- TCGAquery_SampleTypes(bar,"NB")

# Retrieve multiple tissue types  NOT FROM THE SAME PATIENTS
SS <- TCGAquery_SampleTypes(bar,c("TP","NB"))

# Retrieve multiple tissue types  FROM THE SAME PATIENTS
SSS <- TCGAquery_MatchedCoupledSampleTypes(bar,c("NT","TP"))

## ---- eval = FALSE-------------------------------------------------------
#  # This code will get all clinical indexed data from TCGA
#  library(data.table)
#  library(dplyr)
#  library(regexPipes)
#  clinical <- TCGAbiolinks:::getGDCprojects()$project_id %>%
#      regexPipes::grep("TCGA",value=T) %>%
#      sort %>%
#      plyr::alply(1,GDCquery_clinic, .progress = "text") %>%
#      rbindlist
#  readr::write_csv(clinical,path = paste0("all_clin_indexed.csv"))
#  
#  # This code will get all clinical XML data from TCGA
#  getclinical <- function(proj){
#      message(proj)
#      while(1){
#          result = tryCatch({
#              query <- GDCquery(project = proj, data.category = "Clinical",file.type = "xml")
#              GDCdownload(query)
#              clinical <- GDCprepare_clinic(query, clinical.info = "patient")
#              for(i in c("admin","radiation","follow_up","drug","new_tumor_event")){
#                  message(i)
#                  aux <- GDCprepare_clinic(query, clinical.info = i)
#                  if(is.null(aux) || nrow(aux) == 0) next
#                  # add suffix manually if it already exists
#                  replicated <- which(grep("bcr_patient_barcode",colnames(aux), value = T,invert = T) %in% colnames(clinical))
#                  colnames(aux)[replicated] <- paste0(colnames(aux)[replicated],".",i)
#                  if(!is.null(aux)) clinical <- merge(clinical,aux,by = "bcr_patient_barcode", all = TRUE)
#              }
#              readr::write_csv(clinical,path = paste0(proj,"_clinical_from_XML.csv")) # Save the clinical data into a csv file
#              return(clinical)
#          }, error = function(e) {
#              message(paste0("Error clinical: ", proj))
#          })
#      }
#  }
#  clinical <- TCGAbiolinks:::getGDCprojects()$project_id %>%
#      regexPipes::grep("TCGA",value=T) %>% sort %>%
#      plyr::alply(1,getclinical, .progress = "text") %>%
#      rbindlist(fill = TRUE) %>% setDF %>% subset(!duplicated(clinical))
#  
#  readr::write_csv(clinical,path = "all_clin_XML.csv")
#  # result: https://drive.google.com/open?id=0B0-8N2fjttG-WWxSVE5MSGpva1U
#  # Obs: this table has multiple lines for each patient, as the patient might have several followups, drug treatments,
#  # new tumor events etc...