## ----style, echo = FALSE, results = 'asis', message=FALSE----------------
BiocStyle::markdown()

## ----env, message=FALSE, echo=FALSE, cache=FALSE, warning=FALSE----------
library("Pbase")

## ----schema, echo=FALSE, fig.cap='Mapping proteins to a genome reference.', fig.align='center'----
Pbase:::mapplot()

## ----p, message=FALSE----------------------------------------------------
library("Pbase")
data(p)
p
seqnames(p)

## ----startend------------------------------------------------------------
pcols(p)[1, c("start", "end")]

## ----bm, cache=TRUE, message=FALSE---------------------------------------
library("biomaRt")
ens <- useMart("ensembl", "hsapiens_gene_ensembl")
ens

bm <- select(ens, keys = seqnames(p),
             keytype = "uniprot_swissprot",
             columns = c(
                 "uniprot_swissprot",
                 "hgnc_symbol",
                 "ensembl_transcript_id"))

bm

## ----enst----------------------------------------------------------------
acols(p)$ENST

## ----etris2grl-----------------------------------------------------------
grl <- etrid2grl(acols(p)$ENST, ens)
all.equal(names(grl), acols(p)$ENST,
          check.attributes=FALSE)
grl

## ----pc------------------------------------------------------------------
pcgrl <- proteinCoding(grl)
pcgrl

## ----echo=FALSE----------------------------------------------------------
pp <- p[5]
n <- elementNROWS(pranges(pp))
pepstring <- paste(unique(pcols(pp)[[1]]$pepseq), collapse = ", ")

## ------------------------------------------------------------------------
sort(pranges(p)[5])
plot(p[5])

## ------------------------------------------------------------------------
plotAsGeneRegionTrack(grl[[5]], pcgrl[[5]])

## ----protfromgenome------------------------------------------------------
library("BSgenome.Hsapiens.NCBI.GRCh38")
head(seqnames(BSgenome.Hsapiens.NCBI.GRCh38))
if (!"chr1" %in% seqnames(BSgenome.Hsapiens.NCBI.GRCh38))
    seqnames(BSgenome.Hsapiens.NCBI.GRCh38)[1:23] <-
        paste0("chr", seqnames(BSgenome.Hsapiens.NCBI.GRCh38)[1:23])
seqnames(BSgenome.Hsapiens.NCBI.GRCh38)[21:27]

## ----aaseq---------------------------------------------------------------
s <- getSeq(BSgenome.Hsapiens.NCBI.GRCh38, pcgrl[[5]])
s

if (isReverse(pcgrl[[5]]))
    s <- rev(s)

aaseq <- translate(unlist(s))
aaseq

## ----aln-----------------------------------------------------------------
writePairwiseAlignments(pairwiseAlignment(aa(p[5]), aaseq))

## ----map-----------------------------------------------------------------
res <- mapToGenome(p[5], pcgrl[5])
res[[1]]

## ----pepcoords, fig.align='center'---------------------------------------
plotAsAnnotationTrack(res[[1]], grl[[5]])

## ----msmsspectra, fig.align='center'-------------------------------------
data(pms)

library("ggplot2")
details <- function(identifier, ...) {
    p <- plot(pms[[as.numeric(identifier)]], full=TRUE, plot=FALSE) + ggtitle("") 
    p <- p + theme_bw() + theme(axis.text.y = element_blank(),
                                axis.text.x = element_blank()) + 
                                labs(x = NULL, y = NULL)
    print(p, newpage=FALSE)
}

res <- res[[1]]
deTrack <- AnnotationTrack(start = start(res),
                           end = end(res),
                           genome = "hg38", chromosom = "chrX",
                           id = pcols(p)[[5]]$acquisitionNum,
                           name = "MS2 spectra",
                           stacking = "squish", fun = details)

grTrack <- GeneRegionTrack(grl[[5]],
                           name = acols(p)$ENST[5])
ideoTrack <- IdeogramTrack(genome = "hg38",
                           chromosome = "chrX")
axisTrack <- GenomeAxisTrack()

plotTracks(list(ideoTrack, axisTrack, deTrack, grTrack),
           add53 = TRUE, add35 = TRUE)

## ----pmap----------------------------------------------------------------
pres <- pmapToGenome(p, pcgrl)
pres

## ------------------------------------------------------------------------
k <- 6
seqnames(p)[k]

## ----remindbm------------------------------------------------------------
sel <- bm$uniprot_swissprot == seqnames(p)[k]
bm[sel, ]
acols(p)$ENST[k]

## ----etris2grl2----------------------------------------------------------
eid <- bm[sel, 3]
names(eid) <- bm[sel, 1]
eid

grl <- etrid2grl(eid, ens, use.names = TRUE)
pcgrl <- proteinCoding(grl)

## ----plot5---------------------------------------------------------------
plotAsGeneRegionTrack(pcgrl)

## ----getseq2, warning=FALSE----------------------------------------------

lseq <- lapply(getSeq(BSgenome.Hsapiens.NCBI.GRCh38, pcgrl),
               function(s) translate(unlist(s)))

laln <- sapply(lseq, pairwiseAlignment, aa(p[k]))
sapply(laln, nmatch)/width(aa(p[k]))

## ----ki, echo=FALSE------------------------------------------------------
ki <- which.max(sapply(laln, nmatch))

## ----checkk--------------------------------------------------------------
ki <- which.max(sapply(laln, nmatch))
stopifnot(eid[ki] == acols(p)$ENST[k])

## ----map2----------------------------------------------------------------
res <- pmapToGenome(p[k], pcgrl[ki])

## ----pepcoords2----------------------------------------------------------
plotAsAnnotationTrack(res[[1]], pcgrl[[ki]])

## ----coordall------------------------------------------------------------
alleid <- bm[, 3]
names(alleid) <- bm[, 1]
grl <- etrid2grl(alleid, ens, use.names = TRUE)
pcgrl <- proteinCoding(grl)
res <- mapToGenome(p, pcgrl)
length(res)

## ----si------------------------------------------------------------------
sessionInfo()