\name{hmm}
\alias{hmm}

\title{Application of the Viterbi algorithm to copy number and/or
genotype data.}
\description{
  A wrapper for fitting the HMM.
}
\usage{
hmm(object, hmm.params, use.baf=FALSE, ...)
}

\arguments{

\item{object}{one of the following classes derived from eSet:
	\code{SnpSet}, \code{oligoSnpSet}, \code{CopyNumberSet},
	\code{CNSet}}

      \item{hmm.params}{\code{HmmOptionList}. See details.}

      \item{use.baf}{Logical. Whether to use the BAFs instead of the
      genotype calls and confidence scores to estimate the emission
      probabilities. See details.}

\item{\dots}{The argument \code{k} to the function \code{runmed} can be
  passed for assessing the probability of an outlier. See details.}
}

\details{

  The probability that a point estimate is an outlier is assessed by
  subtracting the running median of the copy number estimates from the
  point estimates (referred to as 'delta'). For copy number changes that
  span multiple markers, we expect delta to be near zero and to have a
  small variance.  For single point outliers, delta can be large in
  absolute value and have a large variance. We estimate the mixture
  probabilities (the probability that an observation is an outlier)
  using EM.  We plug in the estimate of the outlier probability into the
  emission probabilities.  Specifally, we model the emission
  probabilities as a Uniform-Gaussian mixture, where the Uniform
  component handles outliers.

  The function used for the running median is \code{runmed}. One can
  pass an integer value of $k$ to \code{runmed}.  Larger values of
  \code{k} should result in fewer segments with low coverage.

  For \code{oligoSnpSet} objects, the emission probability for the HMM
  is calculated as

  log emission = log GT + log CN

  where log GT is the emission probability for the genotype calls and
  log CN is the emission probability for the copy number estimates.
  When \code{use.baf} is TRUE, the emission probabilities are estimated
  from the B allele frequencies instead of the genotype calls /
  confidence scores.  Generally, use of the \code{use.baf} argument
  requires a \code{CNSet} object obtained from preprocessing with the
  \code{crlmm} package.  However, users may supply the B allele
  frequencies from external sources (e.g., BeadStudio software) when
  available.

  See the examples below for using the \code{use.baf} argument and how
  to visualize the predicted states along with the low-level genotype
  and copy number summaries.

}

\value{

	An object of class \code{RangedData}.

}

\seealso{\code{\link{hmm.setup}}, \code{\link{runmed}}, \code{\link[crlmm]{calculateRBaf}}}

\references{

    RB Scharpf et al. (2008) Hidden Markov Models for the assessment of
  chromosomal alterations using high-throughput SNP arrays, Annals of
  Applied Statistics

  }

\examples{
data(locusLevelData, package="oligoClasses")
oligoSet <- new("oligoSnpSet",
		copyNumber=log2(locusLevelData[["copynumber"]]/100),
		call=locusLevelData[["genotypes"]],
		callProbability=locusLevelData[["crlmmConfidence"]],
		annotation=locusLevelData[["platform"]])
oligoSet <- oligoSet[!is.na(chromosome(oligoSet)), ]
oligoSet <- oligoSet[order(chromosome(oligoSet), position(oligoSet)),]
oligoSet <- oligoSet[chromosome(oligoSet) < 3, ]
hmmOpts <- hmm.setup(oligoSet, is.log=TRUE)
fit <- hmm(oligoSet, hmmOpts, k=3)
xyplot(cn~x, oligoSet, range=fit[4, ], frame=2e6,pch=21, cex=0.5, panel=xypanel)

## Useful accessors for RangedData
ranges(fit)

##Log likelihood ratio comparing likelihood of predicted state to the 'normal' state
## for each segment
fit$LLR
## the number of SNPs / nonpolymorphic loci in each segment
coverage2(fit)

sampleNames(fit)
chromosome(fit)

##---------------------------------------------------------------------------
##
##  Downstream of CRLMM for genotyping
##
##---------------------------------------------------------------------------
\dontrun{
if(require("crlmm")){
	data(cnSetExample, package="crlmm")
        cnSetExample <- order(cnSetExample)
	oligoSet <- as(cnSetExample, "oligoSnpSet")
        assayDataElement(oligoSet, "baf") <- calculateRBaf(cnSetExample)[["baf"]]
        ## uses genotype emission probabilities
        hmmOpts <- HmmOptionList(oligoSet, is.log=TRUE, ICE=FALSE)
        fit2 <- hmm(oligoSet, hmmOpts, use.baf=FALSE)
        xyplot(cn ~ x | range, data=oligoSet, range=fit2[1:10, ], frame=2e6,
               panel=xypanel, cex=0.3, pch=21, border="blue",
               scales=list(x="free"),
               col.hom="lightblue", col.het="salmon", col.np="grey60", fill.np="grey60")
        xyplot(baf ~ x | range, data=oligoSet, range=fit2[1:10, ], frame=2e6,
               panel=xypanel, cex=0.3, pch=21, border="blue",
               scales=list(x="free"),
               col.hom="lightblue", col.het="salmon", col.np="grey60", fill.np="grey60")
	hmmOpts <- HmmOptionList(oligoSet, is.log=TRUE)

        ## use BAFs for emission probs.
        fit3 <- hmm(oligoSet, hmmOpts, use.baf=TRUE)
        ## plot the copy number for the first 10 ranges
        xyplot(cn ~ x | range, data=oligoSet, range=fit3[1:10, ], frame=2e6,
               panel=xypanel, cex=0.2, pch=21, border="blue",
               scales=list(x="free"),
               col.hom="lightblue", col.het="salmon", col.np="grey60", fill.np="grey60")
        ## plot the BAFs for the first 10 ranges
        xyplot(baf ~ x | range, data=oligoSet, range=fit3[1:10, ], frame=2e6,
               panel=xypanel, cex=0.2, pch=21, border="orange",
               scales=list(x="free"),
               col.hom="lightblue", col.het="salmon", col.np="grey60", fill.np="grey60")
} ## if(require("crlmm"))
} ## \dontrun

}

\author{R. Scharpf}

\keyword{models}
\keyword{manip}
\keyword{ts}