### R code from vignette source 'DesignSignatures.Rnw'

###################################################
### code chunk number 1: DesignSignatures.Rnw:51-53
###################################################
options(continue=" ")
options(width=80)


###################################################
### code chunk number 2: startup
###################################################
library(DECIPHER)


###################################################
### code chunk number 3: expr1
###################################################
# specify the path to your sequence file:
fas <- "<<path to FASTA file>>"
# OR find the example sequence file used in this tutorial:
fas <- system.file("extdata", "IDH2.fas", package="DECIPHER")


###################################################
### code chunk number 4: expr2
###################################################
# specify a path for where to write the sequence database
dbConn <- "<<path to write sequence database>>"
# OR create the sequence database in memory
dbConn <- dbConnect(SQLite(), ":memory:")
N <- Seqs2DB(fas, "FASTA", dbConn, "")
N # number of sequences in the database


###################################################
### code chunk number 5: expr3
###################################################
# if each sequence belongs to its own group,
# then identify the sequences with a number:
desc <- as.character(seq_len(N)) # N is the number of sequences
# OR get the FASTA record description:
desc <- dbGetQuery(dbConn, "select description from DNA")$description
# show the unique descriptors:
unique(desc)


###################################################
### code chunk number 6: expr4
###################################################
Add2DB(data.frame(id=desc), dbConn)


###################################################
### code chunk number 7: expr5a
###################################################
# Designing primers for sequencing experiments:
TYPE <- "sequence"
MIN_SIZE <- 300 # base pairs
MAX_SIZE <- 700
RESOLUTION <- 5 # k-mer signature
LEVELS <- 5 # max number of each k-mer


###################################################
### code chunk number 8: expr5b
###################################################
# Designing primers for community fingerprinting (FLP):
TYPE <- "length"
# it is important to have a width range of lengths
MIN_SIZE <- 200 # base pairs
MAX_SIZE <- 1400
# define bin boundaries for distinguishing length,
# the values below require high-resolution, but
# the bin boundaries can be redefined for lower
# resolution experiments such as gel runs
RESOLUTION <- c(seq(200, 700, 3),
                seq(705, 1000, 5),
                seq(1010, 1400, 10))
LEVELS <- 2 # presence/absence of the length


###################################################
### code chunk number 9: expr5c
###################################################
# Designing primers for high resolution melting (HRM):
TYPE <- "melt"
MIN_SIZE <- 55 # base pairs
MAX_SIZE <- 400
# the recommended values for resolution
RESOLUTION <- seq(80, 100, 0.25) # degrees Celsius
LEVELS <- 10


###################################################
### code chunk number 10: expr6
###################################################
ENZYMES <- NULL # required for sequencing
# OR select restriction enzymes to consider
data(RESTRICTION_ENZYMES)
# for this tutorial we will use the enzyme MslI
ENZYMES <- RESTRICTION_ENZYMES["MslI"]
ENZYMES


###################################################
### code chunk number 11: expr7 (eval = FALSE)
###################################################
## primers <- DesignSignatures(dbConn,
##                             type=TYPE,
##                             minProductSize=MIN_SIZE,
##                             maxProductSize=MAX_SIZE,
##                             resolution=RESOLUTION,
##                             levels=LEVELS,
##                             enzymes=ENZYMES)


###################################################
### code chunk number 12: expr8 (eval = FALSE)
###################################################
## primers[which.max(primers$score),] # best primers without digestion


###################################################
### code chunk number 13: expr9 (eval = FALSE)
###################################################
## primers[which.max(primers$digest_score),] # best primers with digestion


###################################################
### code chunk number 14: expr9 (eval = FALSE)
###################################################
## PSET <- 1 # examine the top scoring primer set overall
## 
## # select the first sequence from each group
## dna <- SearchDB(dbConn,
##                 remove="all",
##                 nameBy="id",
##                 clause="where row_names =
##                         (select min(row_names)
##                          from DNA as S
##                          where S.id = DNA.id)",
##                 verbose=FALSE)
## 
## f_primer <- DNAStringSet(primers$forward_primer[PSET])
## r_primer <- DNAStringSet(primers$reverse_primer[PSET])
## patterns <- c(f_primer,
##               reverseComplement(r_primer),
##               DNAStringSet(gsub("[^A-Z]", "", ENZYMES)))
## 
## BrowseSeqs(dna,
##            patterns=patterns)


###################################################
### code chunk number 15: expr10 (eval = FALSE)
###################################################
## PSET <- which.max(primers$score) # top scoring without digestion
## 
## f_primer <- DNAString(primers$forward_primer[PSET])
## r_primer <- DNAString(primers$reverse_primer[PSET])
## r_primer <- reverseComplement(r_primer)
## 
## ids <- dbGetQuery(dbConn, "select distinct id from DNA")$id
## 
## if (TYPE=="sequence") {
##     signatures <- matrix(0, nrow=4^RESOLUTION, ncol=length(ids))
## } else if (TYPE=="melt") {
##     signatures <- matrix(0, nrow=length(RESOLUTION), ncol=length(ids))
## } else { # TYPE=="length"
##     signatures <- matrix(0, nrow=length(RESOLUTION) - 1, ncol=length(ids))
## }
## 
## for (i in seq_along(ids)) {
##     dna <- SearchDB(dbConn, identifier=ids[i], remove="all", verbose=FALSE)
##     amplicons <- matchLRPatterns(f_primer, r_primer,
##                                  MAX_SIZE, unlist(dna),
##                                  max.Lmismatch=1, max.Rmismatch=1,
##                                  Lfixed="subject", Rfixed="subject")
##     amplicons <- as(amplicons, "DNAStringSet")
##     if (length(amplicons)==0)
##         next
##     
##     if (TYPE=="sequence") {
##         signature <- oligonucleotideFrequency(amplicons, RESOLUTION)
##         signatures[, i] <- colMeans(signature)
##     } else if (TYPE=="melt") {
##         signature <- MeltDNA(amplicons, "melt curves", RESOLUTION)
##         # weight melting curves by their amlicon's width
##         signature <- t(signature)*width(amplicons)
##         signatures[, i] <- colSums(signature)/sum(width(amplicons))
##     } else { # TYPE=="length"
##         signature <- .bincode(width(amplicons), RESOLUTION)
##         for (j in signature[which(!is.na(signature))])
##             signatures[j, i] <- signatures[j, i] + 1/length(signature)
##     }
## }
## 
## if (TYPE=="sequence") {
##     d <- dist(t(signatures))
##     h <- hclust(d)
##     plot(h, labels=ids, cex=0.7, ann=FALSE)
## } else if (TYPE=="melt") {
##     matplot(RESOLUTION, signatures, type="l",
##         xlab="Temperature (degrees Celsius)", ylab="Average Helicity")
## } else { # TYPE=="length"
##     plot(NA, xlim=c(1, length(ids)), ylim=range(RESOLUTION),
##         xlab="Group Index", ylab="Amplicon Length", yaxs="i", xaxs="i")
##     xaxs <- RESOLUTION[-1] - diff(RESOLUTION)/2 # average lengths
##     for (i in seq_along(ids)) {
##         w <- which(signatures[, i] > 0)
##         if (length(w) > 0)
##             segments(i, xaxs[w], i + 1, xaxs[w], lwd=2)
##     }
## }


###################################################
### code chunk number 16: expr11 (eval = FALSE)
###################################################
## PSET <- which.max(primers$digest_score) # top scoring with digestion
## 
## f_primer <- DNAString(primers$forward_primer[PSET])
## r_primer <- DNAString(primers$reverse_primer[PSET])
## r_primer <- reverseComplement(r_primer)
## enzyme <- RESTRICTION_ENZYMES[primers$enzyme[PSET]]
## 
## signatures[] <- 0 # initialize the results matrix used previously
## for (i in seq_along(ids)) {
##     dna <- SearchDB(dbConn, identifier=ids[i], remove="all", verbose=FALSE)
##     amplicons <- matchLRPatterns(f_primer, r_primer,
##                                  MAX_SIZE, unlist(dna),
##                                  max.Lmismatch=1, max.Rmismatch=1,
##                                  Lfixed="subject", Rfixed="subject")
##     amplicons <- as(amplicons, "DNAStringSet")
##     if (length(amplicons)==0)
##         next
##     digested <- DigestDNA(enzyme, amplicons, strand="top")
##     digested <- unlist(digested) # convert to DNAStringSet
##     
##     if (TYPE=="melt") {
##         signature <- MeltDNA(digested, "melt curves", RESOLUTION)
##         # weight melting curves by their fragment's width
##         signature <- t(signature)*width(digested)
##         signatures[, i] <- colSums(signature)/sum(width(digested))
##     } else { # TYPE=="length"
##         signature <- .bincode(width(digested), RESOLUTION)
##         for (j in signature[which(!is.na(signature))])
##             signatures[j, i] <- signatures[j, i] + 1/length(signature)
##     }
## }
## 
## if (TYPE=="melt") {
##     matplot(RESOLUTION, signatures, type="l",
##         xlab="Temperature (degrees Celsius)", ylab="Average Helicity")
## } else { # TYPE=="length"
##     plot(NA, xlim=c(1, length(ids)), ylim=range(RESOLUTION),
##         xlab="Group Index", ylab="Amplicon Length", yaxs="i", xaxs="i")
##     xaxs <- RESOLUTION[-1] - diff(RESOLUTION)/2 # average lengths
##     for (i in seq_along(ids)) {
##         w <- which(signatures[, i] > 0)
##         if (length(w) > 0)
##             segments(i, xaxs[w], i + 1, xaxs[w], lwd=2)
##     }
## }


###################################################
### code chunk number 17: sessinfo
###################################################
zzz <- dbDisconnect(dbConn)
toLatex(sessionInfo(), locale=FALSE)