From c25c79023fddc105041ee4185bf99a03bac238a9 Mon Sep 17 00:00:00 2001 From: dochvam Date: Fri, 20 Sep 2019 11:58:59 -0700 Subject: [PATCH 01/20] Add dNmixture file --- R/dNMixture.R | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 R/dNMixture.R diff --git a/R/dNMixture.R b/R/dNMixture.R new file mode 100644 index 0000000..38d6f41 --- /dev/null +++ b/R/dNMixture.R @@ -0,0 +1,102 @@ +# dNmixture +#' N-mixture distribution for use in NIMBLE models +#' +#' \code{dNmixture} provides dynamic occupancy model distributions for NIMBLE models. +#' +#' N-mixture models +#' model abundance at a series of sites over many replicate observations. The likelihood of an observation in site s +#' at time t depends on the +#' +#' Compared to writing NIMBLE models with a discrete latent state for true occupancy status and +#' a separate scalar datum for each observation, +#' use of these distributions allows +#' one to directly sum over the discrete latent state and calculate the probability of +#' all observations from one site jointly. +#' +#' @name dNmixture +#' @aliases dDynOcc_ss dDynOcc_sv dDynOcc_vs dDynOcc_vv +#' +#' @param x count observation data, 0 and positive integers +#' @param lambda expected value of the Poisson distribution of true abundance +#' @param prob observation probability for each X +#' @param notZero 0 if datum is structural zero, 1 otherwise. Allows for zero-inflated Poisson models +#' @param minN the minimum true abundance to sum over. Set to -1 for distribution to select based on lambda +#' @param maxN the maximum true abundance to sum over. Set to -1 for distribution to select based on lambda +#' +#' @author Lauren Ponisio, Ben Goldstein, Perry de Valpine +#' +#' These are written in the format of user-defined distributions to extend NIMBLE's +#' use of the BUGS model language. More information about writing user-defined distributions can be found +#' in the NIMBLE User Manual at \code{https://r-nimble.org}. +#' +#' The first argument to a "d" function is always named \code{x} and is given on the +#' left-hand side of a (stochastic) model declaration in the BUGS model language (used by NIMBLE). +#' When using these distributions in a NIMBLE model, the user +#' should not provide the \code{log} argument. (It is always set to \code{TRUE} when used +#' in a NIMBLE model.) +#' +#' For example, in a NIMBLE model, +#' +#' \code{detections[1:T] ~ dNmixture(lambda = lambda, prob = p[1:T], notZero = 1, minN = -1, maxN = -1)} +#' +#' declares that the \code{detections[1:T]} vector follows an N-mixture model distribution +#' with parameters as indicated, assuming all the parameters have been declared elsewhere in the model. +#' +#' +#' @seealso For regular occupancy models, see documentation for dOcc. +#' @examples +#' \dontrun{} + + +NULL +#' @rdname dNmixture +#' @export +dNmixture <- nimbleFunction( + run = function(x = double(1), + lambda = double(), + prob = double(1), + notZero = double(), + minN = double(), + maxN = double(), + log = integer(0, default = 0)) { + if (lambda < 0) { + if (log) return(-Inf) + else return(0) + } + if (is.na.vec(x) | is.nan.vec(x)) { + if (log) return(-Inf) + return(0) + } + if (notZero == 0) { ## It is structural zero + if (all(x == 0)) { + if (log) return(0) + else return(1) + } else { + if (log) return(-Inf) + else return(0) + } + } + ## It is not a structural zero. + ## + ## For each x, the conditional distribution of (N - x | x) is pois(lambda * (1-p)) + ## We determine the lowest N and highest N at extreme quantiles and sum over those. + minN <- min(x + qpois(0.00001, lambda * (1 - prob))) + maxN <- max(x + qpois(0.99999, lambda * (1 - prob))) + minN <- max( max(x), minN ) ## set minN to at least the largest x + + obsProb <- 0 + if (maxN > minN) { ## should normally be true, but check in case it isn't in some corner case. + ## print("counting from ", minN, " to ", maxN, " with lambda = ", lambda) + for (N in minN:maxN) { + thisObsProb <- dpois(N, lambda) * prod(dbinom(x, size = N, prob = prob)) + obsProb <- obsProb + thisObsProb + } + } else { + ## return a potentially non-zero obsProb + N <- max(x) + obsProb <- dpois(N, lambda) * prod(dbinom(x, size = N, prob = prob)) + } + if (log) return(log(obsProb)) + else return(obsProb) + returnType(double(0)) + }) From a9d168c71674a5dbe679222f95fa694275596725 Mon Sep 17 00:00:00 2001 From: dochvam Date: Tue, 29 Oct 2019 17:29:42 -0700 Subject: [PATCH 02/20] Add some Nmixture testing --- R/dNMixture.R | 64 ++++++++++++------- R/zzz.R | 22 ++++++- tests/testthat/test-Nmixture.R | 110 +++++++++++++++++++++++++++++++++ 3 files changed, 173 insertions(+), 23 deletions(-) create mode 100644 tests/testthat/test-Nmixture.R diff --git a/R/dNMixture.R b/R/dNMixture.R index 38d6f41..e2b1a35 100644 --- a/R/dNMixture.R +++ b/R/dNMixture.R @@ -19,9 +19,19 @@ #' @param x count observation data, 0 and positive integers #' @param lambda expected value of the Poisson distribution of true abundance #' @param prob observation probability for each X -#' @param notZero 0 if datum is structural zero, 1 otherwise. Allows for zero-inflated Poisson models -#' @param minN the minimum true abundance to sum over. Set to -1 for distribution to select based on lambda -#' @param maxN the maximum true abundance to sum over. Set to -1 for distribution to select based on lambda +#' @param notZero 0 if datum is structural zero, 1 otherwise. Allows for +#' zero-inflated Poisson models +#' @param minN the minimum true abundance to sum over. Set to -1 for distribution +#' to select based on lambda. Ignored if dynamicMinMax = TRUE +#' @param maxN the maximum true abundance to sum over. Set to -1 for distribution +#' to select based on lambda. Ignored if dynamicMinMax = TRUE +#' @param dynamicMinMax 0 to use input minN and maxN, 1 to ignore +#' provided minN and maxN and algorithmically select reasonable bounds for N. +#' @param n number of random draws, each returning a vector of length +#' \code{len}. Currently only \code{n = 1} is supported, but the +#' argument exists for standardization of "\code{r}" functions. +#' @param len The length of the x vector. Only used for simulation in \code{rNmixture}, +#' ignored in \code{dNmixture} #' #' @author Lauren Ponisio, Ben Goldstein, Perry de Valpine #' @@ -53,36 +63,35 @@ NULL #' @export dNmixture <- nimbleFunction( run = function(x = double(1), - lambda = double(), - prob = double(1), - notZero = double(), - minN = double(), - maxN = double(), + lambda = double(0), + prob = double(1), # Two cases, p scalar and p vector + minN = double(0, default = -1), + maxN = double(0, default = -1), + dynamicMinMax = double(0, default = 0), + len = double(0, default = 0), log = integer(0, default = 0)) { + # Lambda cannot be negative if (lambda < 0) { if (log) return(-Inf) else return(0) } + # Check if there is any data if (is.na.vec(x) | is.nan.vec(x)) { if (log) return(-Inf) return(0) } - if (notZero == 0) { ## It is structural zero - if (all(x == 0)) { - if (log) return(0) - else return(1) - } else { - if (log) return(-Inf) - else return(0) - } + + if (minN == -1 & maxN == -1 & !dynamicMinMax) { + stop("minN and maxN must be provided if dynamicMinMax is not 1.") } - ## It is not a structural zero. - ## + ## For each x, the conditional distribution of (N - x | x) is pois(lambda * (1-p)) ## We determine the lowest N and highest N at extreme quantiles and sum over those. - minN <- min(x + qpois(0.00001, lambda * (1 - prob))) - maxN <- max(x + qpois(0.99999, lambda * (1 - prob))) - minN <- max( max(x), minN ) ## set minN to at least the largest x + if (dynamicMinMax) { + minN <- min(x + qpois(0.00001, lambda * (1 - prob))) + maxN <- max(x + qpois(0.99999, lambda * (1 - prob))) + minN <- max( max(x), minN ) ## set minN to at least the largest x + } obsProb <- 0 if (maxN > minN) { ## should normally be true, but check in case it isn't in some corner case. @@ -100,3 +109,16 @@ dNmixture <- nimbleFunction( else return(obsProb) returnType(double(0)) }) + + +rNmixture <- nimbleFunction( + run = function(n = integer(), + lambda = double(), + prob = double(1), + minN = double(), + maxN = double(), + dynamicMinMax = integer(0), + len = double()) { + stop("Not implemented yet") +}) + diff --git a/R/zzz.R b/R/zzz.R index b5381e5..2042c67 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -282,5 +282,23 @@ 'len = double()'), mixedSizes = TRUE, pqAvail = FALSE)), verbose = F - )}) -} + ) + + registerDistributions(list( + dNmixture = list( + BUGSdist = "dNmixture(lambda, prob, minN, maxN, dynamicMinMax, len)", + Rdist = "dNmixture(lambda, prob, minN, maxN, dynamicMinMax, len)", + discrete = TRUE, + types = c('value = double(1)', + 'lambda = double()', + 'prob = double(1)', + 'minN = double()', + 'maxN = double()', + 'dynamicMinMax = integer(0)', + 'len = double()'), + mixedSizes = FALSE, + pqAvail = FALSE + )), verbose = T + ) + +})} diff --git a/tests/testthat/test-Nmixture.R b/tests/testthat/test-Nmixture.R new file mode 100644 index 0000000..e172d57 --- /dev/null +++ b/tests/testthat/test-Nmixture.R @@ -0,0 +1,110 @@ +# Test the N-mixture distribution nimbleFunction. + +# ----------------------------------------------------------------------------- +# 0. Load +# Set the context for testthat +context("Testing dNmixture-related functions.") + +# ----------------------------------------------------------------------------- +# 1. Test dNmixture +test_that("dNmixture works", + { + # Uncompiled calculation + x <- c(1, 0, 1, 3, 0) + lambda <- 8 + prob <- c(0.5, 0.3, 0.5, 0.4, 0.1) + minN <- 0 + maxN <- 250 + dynamicMinMax <- 0 + + probX <- dNmixture(x, lambda, prob, minN, maxN, dynamicMinMax) + # Manually calculate the correct answer + correctProbX <- 0 + for (N in minN:maxN) { + correctProbX <- correctProbX + dpois(N, lambda) * prod(dbinom(x, N, prob)) + } + + expect_equal(probX, correctProbX) + + # Uncompiled log probability + lProbX <- dNmixture(x, lambda, prob, minN, maxN, dynamicMinMax, log = TRUE) + lCorrectProbX <- log(correctProbX) + expect_equal(lProbX, lCorrectProbX) + + # Compilation and compiled calculations + CdNmixture <- compileNimble(dNmixture) + CprobX <- CdNmixture(x, lambda, prob, minN, maxN, dynamicMinMax) + expect_equal(CprobX, probX) + + ClProbX <- CdNmixture(x, lambda, prob, minN, maxN, dynamicMinMax, log = TRUE) + expect_equal(ClProbX, lProbX) + + # Use in Nimble model + nc <- nimbleCode({ + x[1:5] ~ dNmixture(lambda = lambda, prob = prob[1:5], + minN = minN, maxN = maxN, + dynamicMinMax = dynamicMinMax, len = 5) + }) + + m <- nimbleModel(code = nc, + data = list(x = x), + inits = list(lambda = lambda, + prob = prob), + constants = list(minN = minN, maxN = maxN, + dynamicMinMax = dynamicMinMax)) + m$calculate() + MlProbX <- m$getLogProb("x") + expect_equal(MlProbX, lProbX) + + # Compiled model + cm <- compileNimble(m) + cm$calculate() + CMlProbX <- cm$getLogProb("x") + expect_equal(CMlProbX, lProbX) + + # Test imputing value for all NAs + xNA <- c(NA, NA, NA, NA, NA) + mNA <- nimbleModel(nc, data = list(x = xNA), + inits = list(lambda = lambda, + prob = prob)) + mNAConf <- configureMCMC(mNA) + mNAConf$addMonitors('x') + mNA_MCMC <- buildMCMC(mNAConf) + cmNA <- compileNimble(mNA, mNA_MCMC) + + set.seed(0) + cmNA$mNA_MCMC$run(10) + + # Did the imputed values come back? + expect_true(all(!is.na(as.matrix(cmNA$mNA_MCMC$mvSamples)[,"x[2]"]))) + + # Test simulation code + set.seed(1) + nSim <- 10 + xSim <- array(NA, dim = c(nSim, length(x))) + for(i in 1:nSim) + xSim[i,] <- rCJS_ss(1, probSurvive, probCapture, len = length(x)) + set.seed(1) + CrCJS_ss <- compileNimble(rCJS_ss) + CxSim <- array(NA, dim = c(nSim, length(x))) + for(i in 1:nSim) + CxSim[i,] <- CrCJS_ss(1, probSurvive, probCapture, len = length(x)) + expect_identical(xSim, CxSim) + + simNodes <- m$getDependencies(c('probSurvive', 'probCapture'), self = FALSE) + mxSim <- array(NA, dim = c(nSim, length(x))) + set.seed(1) + for(i in 1:nSim) { + m$simulate(simNodes, includeData = TRUE) + mxSim[i,] <- m$x + } + expect_identical(mxSim, xSim) + + CmxSim <- array(NA, dim = c(nSim, length(x))) + set.seed(1) + for(i in 1:nSim) { + cm$simulate(simNodes, includeData = TRUE) + CmxSim[i,] <- cm$x + } + expect_identical(CmxSim, mxSim) + }) From d26d485a3e72262ef794440e08960c14de8fceaf Mon Sep 17 00:00:00 2001 From: dochvam Date: Wed, 30 Oct 2019 14:18:57 -0700 Subject: [PATCH 03/20] Fixed problem in compiling Nmixture model --- R/dNMixture.R | 34 ++++++++++++++-------------------- R/zzz.R | 9 ++++----- tests/testthat/test-Nmixture.R | 19 +++++++++---------- 3 files changed, 27 insertions(+), 35 deletions(-) diff --git a/R/dNMixture.R b/R/dNMixture.R index e2b1a35..f05184b 100644 --- a/R/dNMixture.R +++ b/R/dNMixture.R @@ -63,31 +63,26 @@ NULL #' @export dNmixture <- nimbleFunction( run = function(x = double(1), - lambda = double(0), + lambda = double(), prob = double(1), # Two cases, p scalar and p vector - minN = double(0, default = -1), - maxN = double(0, default = -1), - dynamicMinMax = double(0, default = 0), - len = double(0, default = 0), + minN = double(), + maxN = double(), log = integer(0, default = 0)) { # Lambda cannot be negative if (lambda < 0) { if (log) return(-Inf) else return(0) } - # Check if there is any data - if (is.na.vec(x) | is.nan.vec(x)) { - if (log) return(-Inf) - return(0) - } - if (minN == -1 & maxN == -1 & !dynamicMinMax) { - stop("minN and maxN must be provided if dynamicMinMax is not 1.") - } + # Check if there is any data + # if (is.na.vec(x) | is.nan.vec(x)) { + # if (log) return(-Inf) + # return(0) + # } ## For each x, the conditional distribution of (N - x | x) is pois(lambda * (1-p)) ## We determine the lowest N and highest N at extreme quantiles and sum over those. - if (dynamicMinMax) { + if (minN == -1 & maxN == -1) { minN <- min(x + qpois(0.00001, lambda * (1 - prob))) maxN <- max(x + qpois(0.99999, lambda * (1 - prob))) minN <- max( max(x), minN ) ## set minN to at least the largest x @@ -107,18 +102,17 @@ dNmixture <- nimbleFunction( } if (log) return(log(obsProb)) else return(obsProb) - returnType(double(0)) + returnType(double()) }) rNmixture <- nimbleFunction( - run = function(n = integer(), + run = function(n = double(), lambda = double(), prob = double(1), minN = double(), - maxN = double(), - dynamicMinMax = integer(0), - len = double()) { - stop("Not implemented yet") + maxN = double()) { + return(rep(0, length(prob))) + returnType(double(1)) }) diff --git a/R/zzz.R b/R/zzz.R index 2042c67..7430b73 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -286,16 +286,15 @@ registerDistributions(list( dNmixture = list( - BUGSdist = "dNmixture(lambda, prob, minN, maxN, dynamicMinMax, len)", - Rdist = "dNmixture(lambda, prob, minN, maxN, dynamicMinMax, len)", + BUGSdist = "dNmixture(lambda, prob, minN, maxN)", + Rdist = "dNmixture(lambda, prob, minN, maxN)", discrete = TRUE, types = c('value = double(1)', 'lambda = double()', 'prob = double(1)', 'minN = double()', - 'maxN = double()', - 'dynamicMinMax = integer(0)', - 'len = double()'), + 'maxN = double()' + ), mixedSizes = FALSE, pqAvail = FALSE )), verbose = T diff --git a/tests/testthat/test-Nmixture.R b/tests/testthat/test-Nmixture.R index e172d57..2d19050 100644 --- a/tests/testthat/test-Nmixture.R +++ b/tests/testthat/test-Nmixture.R @@ -15,9 +15,9 @@ test_that("dNmixture works", prob <- c(0.5, 0.3, 0.5, 0.4, 0.1) minN <- 0 maxN <- 250 - dynamicMinMax <- 0 - probX <- dNmixture(x, lambda, prob, minN, maxN, dynamicMinMax) + + probX <- dNmixture(x, lambda, prob, minN, maxN) # Manually calculate the correct answer correctProbX <- 0 for (N in minN:maxN) { @@ -27,37 +27,36 @@ test_that("dNmixture works", expect_equal(probX, correctProbX) # Uncompiled log probability - lProbX <- dNmixture(x, lambda, prob, minN, maxN, dynamicMinMax, log = TRUE) + lProbX <- dNmixture(x, lambda, prob, minN, maxN, log = TRUE) lCorrectProbX <- log(correctProbX) expect_equal(lProbX, lCorrectProbX) # Compilation and compiled calculations CdNmixture <- compileNimble(dNmixture) - CprobX <- CdNmixture(x, lambda, prob, minN, maxN, dynamicMinMax) + CprobX <- CdNmixture(x, lambda, prob, minN, maxN) expect_equal(CprobX, probX) - ClProbX <- CdNmixture(x, lambda, prob, minN, maxN, dynamicMinMax, log = TRUE) + ClProbX <- CdNmixture(x, lambda, prob, minN, maxN, log = TRUE) expect_equal(ClProbX, lProbX) # Use in Nimble model nc <- nimbleCode({ x[1:5] ~ dNmixture(lambda = lambda, prob = prob[1:5], - minN = minN, maxN = maxN, - dynamicMinMax = dynamicMinMax, len = 5) + minN = minN, maxN = maxN) + }) m <- nimbleModel(code = nc, data = list(x = x), inits = list(lambda = lambda, prob = prob), - constants = list(minN = minN, maxN = maxN, - dynamicMinMax = dynamicMinMax)) + constants = list(minN = minN, maxN = maxN)) m$calculate() MlProbX <- m$getLogProb("x") expect_equal(MlProbX, lProbX) # Compiled model - cm <- compileNimble(m) + cm <- compileNimble(m, showCompilerOutput = TRUE) cm$calculate() CMlProbX <- cm$getLogProb("x") expect_equal(CMlProbX, lProbX) From 36a6057c491f61364df3782ba2e67ad07a6e22eb Mon Sep 17 00:00:00 2001 From: dochvam Date: Tue, 5 Nov 2019 13:11:08 -0800 Subject: [PATCH 04/20] Implement and test dNmixture_s and dNmixture_v. --- R/dNMixture.R | 106 +++++++++++++++++++-- R/zzz.R | 26 +++++- tests/testthat/test-Nmixture.R | 165 ++++++++++++++++++++++++++++----- 3 files changed, 261 insertions(+), 36 deletions(-) diff --git a/R/dNMixture.R b/R/dNMixture.R index f05184b..9d237e3 100644 --- a/R/dNMixture.R +++ b/R/dNMixture.R @@ -14,7 +14,7 @@ #' all observations from one site jointly. #' #' @name dNmixture -#' @aliases dDynOcc_ss dDynOcc_sv dDynOcc_vs dDynOcc_vv +#' @aliases dNmixture_v dNmixture_s #' #' @param x count observation data, 0 and positive integers #' @param lambda expected value of the Poisson distribution of true abundance @@ -30,8 +30,7 @@ #' @param n number of random draws, each returning a vector of length #' \code{len}. Currently only \code{n = 1} is supported, but the #' argument exists for standardization of "\code{r}" functions. -#' @param len The length of the x vector. Only used for simulation in \code{rNmixture}, -#' ignored in \code{dNmixture} +#' @param len The length of the x vector. Needed for simulation in \code{rNmixture_*}. #' #' @author Lauren Ponisio, Ben Goldstein, Perry de Valpine #' @@ -61,13 +60,17 @@ NULL #' @rdname dNmixture #' @export -dNmixture <- nimbleFunction( +dNmixture_v <- nimbleFunction( run = function(x = double(1), lambda = double(), - prob = double(1), # Two cases, p scalar and p vector + prob = double(1), minN = double(), maxN = double(), + len = double(), log = integer(0, default = 0)) { + if (length(x) != len) stop("in dNmixture_v, len must equal length(x).") + if (length(prob) != len) stop("in dNmixture_v, len must equal length(prob).") + # Lambda cannot be negative if (lambda < 0) { if (log) return(-Inf) @@ -105,14 +108,99 @@ dNmixture <- nimbleFunction( returnType(double()) }) +NULL +#' @rdname dNmixture +#' @export +dNmixture_s <- nimbleFunction( + run = function(x = double(1), + lambda = double(), + prob = double(), + minN = double(), + maxN = double(), + len = double(), + log = integer(0, default = 0)) { + if (length(x) != len) stop("in dNmixture_v, len must equal length(x).") + + # Lambda cannot be negative + if (lambda < 0) { + if (log) return(-Inf) + else return(0) + } + + # Check if there is any data + # if (is.na.vec(x) | is.nan.vec(x)) { + # if (log) return(-Inf) + # return(0) + # } + + ## For each x, the conditional distribution of (N - x | x) is pois(lambda * (1-p)) + ## We determine the lowest N and highest N at extreme quantiles and sum over those. + if (minN == -1 & maxN == -1) { + minN <- min(x + qpois(0.00001, lambda * (1 - prob))) + maxN <- max(x + qpois(0.99999, lambda * (1 - prob))) + minN <- max( max(x), minN ) ## set minN to at least the largest x + } + + obsProb <- 0 + if (maxN > minN) { ## should normally be true, but check in case it isn't in some corner case. + ## print("counting from ", minN, " to ", maxN, " with lambda = ", lambda) + for (N in minN:maxN) { + thisObsProb <- dpois(N, lambda) * prod(dbinom(x, size = N, prob = prob)) + obsProb <- obsProb + thisObsProb + } + } else { + ## return a potentially non-zero obsProb + N <- max(x) + obsProb <- dpois(N, lambda) * prod(dbinom(x, size = N, prob = prob)) + } + if (log) return(log(obsProb)) + else return(obsProb) + returnType(double()) + }) -rNmixture <- nimbleFunction( +NULL +#' @rdname dNmixture +#' @export +rNmixture_v <- nimbleFunction( run = function(n = double(), lambda = double(), prob = double(1), minN = double(), - maxN = double()) { - return(rep(0, length(prob))) - returnType(double(1)) + maxN = double(), + len = double()) { + if (n != 1) stop("rNmixture_v only works for n = 1") + if (length(prob) != len) stop("In rNmixture_v, len must equal length(prob).") + trueN <- rpois(1, lambda) + ans <- numeric(len) + for (i in 1:len) { + ans[i] <- rbinom(n = 1, size = trueN, prob = prob[i]) + } + + return(ans) + returnType(double(1)) +}) + +NULL +#' @rdname dNmixture +#' @export +rNmixture_s <- nimbleFunction( + run = function(n = double(), + lambda = double(), + prob = double(), + minN = double(), + maxN = double(), + len = double()) { + if (n != 1) stop("rNmixture_v only works for n = 1") + trueN <- rpois(1, lambda) + ans <- numeric(len) + for (i in 1:len) { + ans[i] <- rbinom(n = 1, size = trueN, prob = prob) + } + + return(ans) + returnType(double(1)) }) + + + diff --git a/R/zzz.R b/R/zzz.R index 7430b73..e26ae76 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -285,15 +285,33 @@ ) registerDistributions(list( - dNmixture = list( - BUGSdist = "dNmixture(lambda, prob, minN, maxN)", - Rdist = "dNmixture(lambda, prob, minN, maxN)", + dNmixture_v = list( + BUGSdist = "dNmixture_v(lambda, prob, minN, maxN, len)", + Rdist = "dNmixture_v(lambda, prob, minN, maxN, len)", discrete = TRUE, types = c('value = double(1)', 'lambda = double()', 'prob = double(1)', 'minN = double()', - 'maxN = double()' + 'maxN = double()', + 'len = double()' + ), + mixedSizes = FALSE, + pqAvail = FALSE + )), verbose = T + ) + + registerDistributions(list( + dNmixture_s = list( + BUGSdist = "dNmixture_s(lambda, prob, minN, maxN, len)", + Rdist = "dNmixture_s(lambda, prob, minN, maxN, len)", + discrete = TRUE, + types = c('value = double(1)', + 'lambda = double()', + 'prob = double()', + 'minN = double()', + 'maxN = double()', + 'len = double()' ), mixedSizes = FALSE, pqAvail = FALSE diff --git a/tests/testthat/test-Nmixture.R b/tests/testthat/test-Nmixture.R index 2d19050..0d7ee5f 100644 --- a/tests/testthat/test-Nmixture.R +++ b/tests/testthat/test-Nmixture.R @@ -6,8 +6,8 @@ context("Testing dNmixture-related functions.") # ----------------------------------------------------------------------------- -# 1. Test dNmixture -test_that("dNmixture works", +# 1. Test dNmixture_v +test_that("dNmixture_v works", { # Uncompiled calculation x <- c(1, 0, 1, 3, 0) @@ -15,9 +15,9 @@ test_that("dNmixture works", prob <- c(0.5, 0.3, 0.5, 0.4, 0.1) minN <- 0 maxN <- 250 + len <- 5 - - probX <- dNmixture(x, lambda, prob, minN, maxN) + probX <- dNmixture_v(x, lambda, prob, minN, maxN, len) # Manually calculate the correct answer correctProbX <- 0 for (N in minN:maxN) { @@ -27,22 +27,22 @@ test_that("dNmixture works", expect_equal(probX, correctProbX) # Uncompiled log probability - lProbX <- dNmixture(x, lambda, prob, minN, maxN, log = TRUE) + lProbX <- dNmixture_v(x, lambda, prob, minN, maxN, len, log = TRUE) lCorrectProbX <- log(correctProbX) expect_equal(lProbX, lCorrectProbX) # Compilation and compiled calculations - CdNmixture <- compileNimble(dNmixture) - CprobX <- CdNmixture(x, lambda, prob, minN, maxN) + CdNmixture_v <- compileNimble(dNmixture_v) + CprobX <- CdNmixture_v(x, lambda, prob, minN, maxN, len) expect_equal(CprobX, probX) - ClProbX <- CdNmixture(x, lambda, prob, minN, maxN, log = TRUE) + ClProbX <- CdNmixture_v(x, lambda, prob, minN, maxN, len, log = TRUE) expect_equal(ClProbX, lProbX) # Use in Nimble model nc <- nimbleCode({ - x[1:5] ~ dNmixture(lambda = lambda, prob = prob[1:5], - minN = minN, maxN = maxN) + x[1:5] ~ dNmixture_v(lambda = lambda, prob = prob[1:5], + minN = minN, maxN = maxN, len = len) }) @@ -50,13 +50,14 @@ test_that("dNmixture works", data = list(x = x), inits = list(lambda = lambda, prob = prob), - constants = list(minN = minN, maxN = maxN)) + constants = list(minN = minN, maxN = maxN, + len = len)) m$calculate() MlProbX <- m$getLogProb("x") expect_equal(MlProbX, lProbX) # Compiled model - cm <- compileNimble(m, showCompilerOutput = TRUE) + cm <- compileNimble(m) cm$calculate() CMlProbX <- cm$getLogProb("x") expect_equal(CMlProbX, lProbX) @@ -65,7 +66,11 @@ test_that("dNmixture works", xNA <- c(NA, NA, NA, NA, NA) mNA <- nimbleModel(nc, data = list(x = xNA), inits = list(lambda = lambda, - prob = prob)) + prob = prob), + constants = list(minN = minN, maxN = maxN, + len = len)) + + mNAConf <- configureMCMC(mNA) mNAConf$addMonitors('x') mNA_MCMC <- buildMCMC(mNAConf) @@ -78,20 +83,134 @@ test_that("dNmixture works", expect_true(all(!is.na(as.matrix(cmNA$mNA_MCMC$mvSamples)[,"x[2]"]))) # Test simulation code + nSim <- 10 + xSim <- array(NA, dim = c(nSim, len)) + set.seed(1) + for (i in 1:nSim) { + xSim[i,] <- rNmixture_v(1, lambda, prob, minN, maxN, len) + } + + CrNmixture_v <- compileNimble(rNmixture_v) + CxSim <- array(NA, dim = c(nSim, len)) + set.seed(1) + for (i in 1:nSim) { + CxSim[i,] <- CrNmixture_v(1, lambda, prob, minN, maxN, len) + } + expect_identical(xSim, CxSim) + + simNodes <- m$getDependencies(c('prob', 'lambda'), self = FALSE) + mxSim <- array(NA, dim = c(nSim, len)) set.seed(1) + for(i in 1:nSim) { + m$simulate(simNodes, includeData = TRUE) + mxSim[i,] <- m$x + } + expect_identical(mxSim, xSim) + + CmxSim <- array(NA, dim = c(nSim, len)) + set.seed(1) + for(i in 1:nSim) { + cm$simulate(simNodes, includeData = TRUE) + CmxSim[i,] <- cm$x + } + expect_identical(CmxSim, mxSim) + }) + +# ----------------------------------------------------------------------------- +# 2. Test dNmixture_s +test_that("dNmixture_s works", + { + # Uncompiled calculation + x <- c(1, 0, 1, 3, 0) + lambda <- 8 + prob <- 0.4 + minN <- 0 + maxN <- 250 + len <- 5 + + probX <- dNmixture_s(x, lambda, prob, minN, maxN, len) + # Manually calculate the correct answer + correctProbX <- 0 + for (N in minN:maxN) { + correctProbX <- correctProbX + dpois(N, lambda) * prod(dbinom(x, N, prob)) + } + + expect_equal(probX, correctProbX) + + # Uncompiled log probability + lProbX <- dNmixture_s(x, lambda, prob, minN, maxN, len, log = TRUE) + lCorrectProbX <- log(correctProbX) + expect_equal(lProbX, lCorrectProbX) + + # Compilation and compiled calculations + CdNmixture_s <- compileNimble(dNmixture_s) + CprobX <- CdNmixture_s(x, lambda, prob, minN, maxN, len) + expect_equal(CprobX, probX) + + ClProbX <- CdNmixture_s(x, lambda, prob, minN, maxN, len, log = TRUE) + expect_equal(ClProbX, lProbX) + + # Use in Nimble model + nc <- nimbleCode({ + x[1:5] ~ dNmixture_s(lambda = lambda, prob = prob, + minN = minN, maxN = maxN, len = len) + + }) + + m <- nimbleModel(code = nc, + data = list(x = x), + inits = list(lambda = lambda, + prob = prob), + constants = list(minN = minN, maxN = maxN, + len = len)) + m$calculate() + MlProbX <- m$getLogProb("x") + expect_equal(MlProbX, lProbX) + + # Compiled model + cm <- compileNimble(m) + cm$calculate() + CMlProbX <- cm$getLogProb("x") + expect_equal(CMlProbX, lProbX) + + # Test imputing value for all NAs + xNA <- c(NA, NA, NA, NA, NA) + mNA <- nimbleModel(nc, data = list(x = xNA), + inits = list(lambda = lambda, + prob = prob), + constants = list(minN = minN, maxN = maxN, + len = len)) + + + mNAConf <- configureMCMC(mNA) + mNAConf$addMonitors('x') + mNA_MCMC <- buildMCMC(mNAConf) + cmNA <- compileNimble(mNA, mNA_MCMC) + + set.seed(0) + cmNA$mNA_MCMC$run(10) + + # Did the imputed values come back? + expect_true(all(!is.na(as.matrix(cmNA$mNA_MCMC$mvSamples)[,"x[2]"]))) + + # Test simulation code nSim <- 10 - xSim <- array(NA, dim = c(nSim, length(x))) - for(i in 1:nSim) - xSim[i,] <- rCJS_ss(1, probSurvive, probCapture, len = length(x)) + xSim <- array(NA, dim = c(nSim, len)) set.seed(1) - CrCJS_ss <- compileNimble(rCJS_ss) - CxSim <- array(NA, dim = c(nSim, length(x))) - for(i in 1:nSim) - CxSim[i,] <- CrCJS_ss(1, probSurvive, probCapture, len = length(x)) + for (i in 1:nSim) { + xSim[i,] <- rNmixture_s(1, lambda, prob, minN, maxN, len) + } + + CrNmixture_s <- compileNimble(rNmixture_s) + CxSim <- array(NA, dim = c(nSim, len)) + set.seed(1) + for (i in 1:nSim) { + CxSim[i,] <- CrNmixture_s(1, lambda, prob, minN, maxN, len) + } expect_identical(xSim, CxSim) - simNodes <- m$getDependencies(c('probSurvive', 'probCapture'), self = FALSE) - mxSim <- array(NA, dim = c(nSim, length(x))) + simNodes <- m$getDependencies(c('prob', 'lambda'), self = FALSE) + mxSim <- array(NA, dim = c(nSim, len)) set.seed(1) for(i in 1:nSim) { m$simulate(simNodes, includeData = TRUE) @@ -99,7 +218,7 @@ test_that("dNmixture works", } expect_identical(mxSim, xSim) - CmxSim <- array(NA, dim = c(nSim, length(x))) + CmxSim <- array(NA, dim = c(nSim, len)) set.seed(1) for(i in 1:nSim) { cm$simulate(simNodes, includeData = TRUE) From 8916c31a7372d067d9895785cb59af9390da6de9 Mon Sep 17 00:00:00 2001 From: dochvam Date: Tue, 5 Nov 2019 14:15:53 -0800 Subject: [PATCH 05/20] Fixed roxygen and zzz omissions --- DESCRIPTION | 1 + NAMESPACE | 4 ++++ R/zzz.R | 6 +++--- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5a95845..3c89e4c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,6 +24,7 @@ Collate: dDHMM.R dHMM.R dOcc.R + dNmixture.R zzz.R RoxygenNote: 6.1.1 Suggests: diff --git a/NAMESPACE b/NAMESPACE index b934c10..976b546 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,6 +20,8 @@ export(dDynOcc_vvs) export(dDynOcc_vvv) export(dHMM) export(dHMMo) +export(dNmixture_s) +export(dNmixture_v) export(dOcc_s) export(dOcc_v) export(rCJS_ss) @@ -42,6 +44,8 @@ export(rDynOcc_vvs) export(rDynOcc_vvv) export(rHMM) export(rHMMo) +export(rNmixture_s) +export(rNmixture_v) export(rOcc_s) export(rOcc_v) import(nimble) diff --git a/R/zzz.R b/R/zzz.R index e26ae76..82cfc7f 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -2,7 +2,7 @@ .onAttach <- function(libname, pkgname) { packageStartupMessage("Loading nimbleEcology. \nRegistering the following user-defined functions: ", - "\ndOcc", ", dDynOcc", ", dCJS", ", dHMM", ", dDHMM") + "\ndOcc", ", dDynOcc", ", dCJS", ", dHMM", ", dDHMM", ", dNmixture") # Register the distributions explicitly for two reasons: # 1. Avoid message to user about automatic registrations upon first use in a nimbleModel @@ -298,7 +298,7 @@ ), mixedSizes = FALSE, pqAvail = FALSE - )), verbose = T + )), verbose = F ) registerDistributions(list( @@ -315,7 +315,7 @@ ), mixedSizes = FALSE, pqAvail = FALSE - )), verbose = T + )), verbose = F ) })} From 89a2d00b2d969645bca35a5e5584ba2eda8efce1 Mon Sep 17 00:00:00 2001 From: dochvam Date: Tue, 5 Nov 2019 14:29:45 -0800 Subject: [PATCH 06/20] Fix collate, add Nmixture roxygen file --- DESCRIPTION | 2 +- man/dNmixture.Rd | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 man/dNmixture.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 3c89e4c..8e0b2f9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,8 +19,8 @@ Encoding: UTF-8 LazyData: true URL: https://github.com/nimble-dev/nimbleEcology Collate: - dDynOcc.R dCJS.R + dDynOcc.R dDHMM.R dHMM.R dOcc.R diff --git a/man/dNmixture.Rd b/man/dNmixture.Rd new file mode 100644 index 0000000..1c5eab2 --- /dev/null +++ b/man/dNmixture.Rd @@ -0,0 +1,83 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dNmixture.R +\name{dNmixture} +\alias{dNmixture} +\alias{dNmixture_v} +\alias{dNmixture_s} +\alias{rNmixture_v} +\alias{rNmixture_s} +\title{N-mixture distribution for use in NIMBLE models} +\usage{ +dNmixture_v(x, lambda, prob, minN, maxN, len, log = 0) + +dNmixture_s(x, lambda, prob, minN, maxN, len, log = 0) + +rNmixture_v(n, lambda, prob, minN, maxN, len) + +rNmixture_s(n, lambda, prob, minN, maxN, len) +} +\arguments{ +\item{x}{count observation data, 0 and positive integers} + +\item{lambda}{expected value of the Poisson distribution of true abundance} + +\item{prob}{observation probability for each X} + +\item{minN}{the minimum true abundance to sum over. Set to -1 for distribution +to select based on lambda. Ignored if dynamicMinMax = TRUE} + +\item{maxN}{the maximum true abundance to sum over. Set to -1 for distribution +to select based on lambda. Ignored if dynamicMinMax = TRUE} + +\item{len}{The length of the x vector. Needed for simulation in \code{rNmixture_*}.} + +\item{n}{number of random draws, each returning a vector of length +\code{len}. Currently only \code{n = 1} is supported, but the +argument exists for standardization of "\code{r}" functions.} + +\item{notZero}{0 if datum is structural zero, 1 otherwise. Allows for +zero-inflated Poisson models} + +\item{dynamicMinMax}{0 to use input minN and maxN, 1 to ignore +provided minN and maxN and algorithmically select reasonable bounds for N.} +} +\description{ +\code{dNmixture} provides dynamic occupancy model distributions for NIMBLE models. +} +\details{ +N-mixture models +model abundance at a series of sites over many replicate observations. The likelihood of an observation in site s +at time t depends on the + +Compared to writing NIMBLE models with a discrete latent state for true occupancy status and +a separate scalar datum for each observation, +use of these distributions allows +one to directly sum over the discrete latent state and calculate the probability of +all observations from one site jointly. +} +\examples{ +\dontrun{} +} +\seealso{ +For regular occupancy models, see documentation for dOcc. +} +\author{ +Lauren Ponisio, Ben Goldstein, Perry de Valpine + +These are written in the format of user-defined distributions to extend NIMBLE's +use of the BUGS model language. More information about writing user-defined distributions can be found +in the NIMBLE User Manual at \code{https://r-nimble.org}. + +The first argument to a "d" function is always named \code{x} and is given on the +left-hand side of a (stochastic) model declaration in the BUGS model language (used by NIMBLE). +When using these distributions in a NIMBLE model, the user +should not provide the \code{log} argument. (It is always set to \code{TRUE} when used +in a NIMBLE model.) + +For example, in a NIMBLE model, + +\code{detections[1:T] ~ dNmixture(lambda = lambda, prob = p[1:T], notZero = 1, minN = -1, maxN = -1)} + +declares that the \code{detections[1:T]} vector follows an N-mixture model distribution +with parameters as indicated, assuming all the parameters have been declared elsewhere in the model. +} From 4fbe2c1285088a4614e7ec63f3cf932412ff854b Mon Sep 17 00:00:00 2001 From: dochvam Date: Tue, 5 Nov 2019 14:32:22 -0800 Subject: [PATCH 07/20] Fix casing in dNmixture.R --- R/{dNMixture.R => dNmixture.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename R/{dNMixture.R => dNmixture.R} (100%) diff --git a/R/dNMixture.R b/R/dNmixture.R similarity index 100% rename from R/dNMixture.R rename to R/dNmixture.R From 28151571fec72d2547372763cc9da206b44bdd35 Mon Sep 17 00:00:00 2001 From: dochvam Date: Tue, 5 Nov 2019 15:12:09 -0800 Subject: [PATCH 08/20] Add Nmixture models to vignette --- inst/doc/Introduction_to_nimbleEcology.html | 58 +++++++++++++++------ 1 file changed, 43 insertions(+), 15 deletions(-) diff --git a/inst/doc/Introduction_to_nimbleEcology.html b/inst/doc/Introduction_to_nimbleEcology.html index 645c179..a0f65db 100644 --- a/inst/doc/Introduction_to_nimbleEcology.html +++ b/inst/doc/Introduction_to_nimbleEcology.html @@ -12,7 +12,7 @@ - + Introduction to nimbleEcology @@ -303,7 +303,7 @@

Introduction to nimbleEcology

Perry de Valpine and Ben Goldstein

-

2019-10-10

+

2019-11-05

@@ -346,7 +346,7 @@

Illustration: A simple occupancy model

Occupancy models are used for data from repeated visits to a set of sites, where the detection (1) or non-detection (0) of a species of interest is recorded on each visit. Define y[i, j] as the observation from site i on visit j. y[i, j] is 1 if the species was seen and 0 if not.

Typical code for for an occupancy model would be as follows. Naturally, this is written for nimble, but the same code should work for JAGS or BUGS (WinBUGS or OpenBUGS) when used as needed for those packages.

+#> dOcc, dDynOcc, dCJS, dHMM, dDHMM, dNmixture
@@ -484,15 +484,15 @@

Do it all for the new version of the model

#> [.quosures rlang #> c.quosures rlang #> print.quosures rlang -#> ── Attaching packages ───────────────────────────────────────────────────────────── tidyverse 1.2.1 ── +#> ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ── #> ✔ ggplot2 3.1.1 ✔ purrr 0.3.2 #> ✔ tibble 2.1.2 ✔ dplyr 0.8.1 #> ✔ tidyr 0.8.3 ✔ stringr 1.4.0 #> ✔ readr 1.3.1 ✔ forcats 0.4.0 -#> ── Conflicts ──────────────────────────────────────────────────────────────── tidyverse_conflicts() ── +#> ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ── #> ✖ dplyr::filter() masks stats::filter() #> ✖ dplyr::lag() masks stats::lag() -

+

The posterior density plots show that the familiar, conventional version of the model yields the same posterior distribution as the new version, which uses dOcc_s.

@@ -513,7 +513,7 @@

Use the new version for something different: maximum likelihood or maximum a #> compiling... this may take a minute. Use 'showCompilerOutput = TRUE' to see C++ compilation details. #> compilation finished. optim(c(0.5, 0.5), COccLogLik$run, control = list(fnscale = -1))$par -#> [1] 0.7166556 0.1506934 +#> [1] 0.7483715 0.1229434
@@ -690,6 +690,34 @@

Dynamic occupancy models in nimbleEcology

+
+

N-mixture

+

An N-mixture model gives the probability of a set of counts from repeated visits to each of multiple sites. The N-mixture distribution in nimbleEcology gives probability calculations for data from one site.

+

Define \(y_t\) as the number of individuals counted at the site on sampling occasion (time) \(t\). Define \(\mathbf{y} = (y_1, \ldots, y_t)\). Define \(\lambda\) as the average density of individuals, such that the true number of individuals, \(N\), follows a Poisson distribution with mean \(\lambda\). Define \(p_t\) to be the detection probability for each individual at time \(t\), and \(\mathbf{p} = (p_1, \ldots, p_t)\).

+

The probability of the data given the parameters is: \[ +P(\mathbf{y} | \lambda, \mathbf{p}) = \sum_{N = 1}^\infty P(N | \lambda) \prod_{t = 1}^T P(y_t | N) +\] where \(P(N | \lambda)\) is a Poisson probability and \(P(y_t | N)\) is a binomial probability. That is, \(y_t \sim \mbox{binomial}(N, p_t)\), and the \(y_t\)s are independent.

+

In practice, the summation over \(N\) can start at a value greater than 0 and must be truncated at some value \(\lt \infty\). Two options are provided for the range of summation:

+
    +
  1. Start the summation at the largest value of \(y_t\) (there must be at least this many individuals) and truncate it at a value of \(maxN\) provided by the user.
  2. +
  3. The following heuristic can be used.
  4. +
+

If we consider a single \(y_t\), then \(N - y_t | y_t \sim \mbox{Poisson}(\lambda (1-p_t))\) (See opening example of Royle and Dorazio). Thus, a natural upper end for the summation range of \(N\) would be \(y_t\) plus a very high quantile of The \(\mbox{Poisson}(\lambda (1-p_t))\) distribution. For a set of observations, a natural choice would be the maximum of such values across the observation times. We use the 0.99999 quantile to be conservative.

+

Correspondingly, the summation can begin at smallest of the 0.00001 quantiles of \(N | y_t\). If \(p_t\) is small, this can be considerably larger than the maximum value of \(y_t\), allowing more efficient computation.

+

Describe structural zeros.

+
+

N-mixture models in nimbleEcology

+

N-mixture models are available in two distributions in nimbleEcology. They differ in whether probability of detection is visit-dependent (vector case, corresponding to dNmixture_v) or visit-independent (scalar, dNmixture_s).

+

An example is:

+

y[i, 1:T] ~ dNmixture_v(lambda = lambda, p = p[1:T], minN = minN, maxN = maxN, len = T)

+
    +
  • lambda is \(\lambda\) above.
  • +
  • p[1:T] is \(\mathbf{p}\) above.
  • +
  • len is \(T\).
  • +
  • minN and maxN provide the lower and upper bounds for the sum over Ns (option 1 above). If both are set to -1, bounds are chosen dynamically using quantiles of the Poisson distribution (option 2 above).
  • +
+
+
From a6f2efeb62037e19b640dcf9120e98c490180cca Mon Sep 17 00:00:00 2001 From: dochvam Date: Tue, 5 Nov 2019 15:46:34 -0800 Subject: [PATCH 09/20] Update dNmixture documentation --- R/dNmixture.R | 140 +++++++++++++++++++++++++++++++++++---------- man/dNmixture.Rd | 145 ++++++++++++++++++++++++++++++++++++----------- 2 files changed, 223 insertions(+), 62 deletions(-) diff --git a/R/dNmixture.R b/R/dNmixture.R index 9d237e3..56966f0 100644 --- a/R/dNmixture.R +++ b/R/dNmixture.R @@ -1,20 +1,12 @@ # dNmixture #' N-mixture distribution for use in NIMBLE models #' -#' \code{dNmixture} provides dynamic occupancy model distributions for NIMBLE models. -#' -#' N-mixture models -#' model abundance at a series of sites over many replicate observations. The likelihood of an observation in site s -#' at time t depends on the -#' -#' Compared to writing NIMBLE models with a discrete latent state for true occupancy status and -#' a separate scalar datum for each observation, -#' use of these distributions allows -#' one to directly sum over the discrete latent state and calculate the probability of -#' all observations from one site jointly. +#' \code{dNmixture_s} and \code{dNmixture_v} provides dynamic occupancy model distributions for NIMBLE models. #' #' @name dNmixture -#' @aliases dNmixture_v dNmixture_s +#' @aliases dNmixture_s dNmixture_v rNmixture_s rNmixture_v +#' +#' @author Ben Goldstein, Lauren Ponisio, and Perry de Valpine #' #' @param x count observation data, 0 and positive integers #' @param lambda expected value of the Poisson distribution of true abundance @@ -27,34 +19,122 @@ #' to select based on lambda. Ignored if dynamicMinMax = TRUE #' @param dynamicMinMax 0 to use input minN and maxN, 1 to ignore #' provided minN and maxN and algorithmically select reasonable bounds for N. -#' @param n number of random draws, each returning a vector of length -#' \code{len}. Currently only \code{n = 1} is supported, but the -#' argument exists for standardization of "\code{r}" functions. #' @param len The length of the x vector. Needed for simulation in \code{rNmixture_*}. +#' @param log TRUE or 1 to return log probability. FALSE or 0 to return probability. +#' Need not be specified in the model context. +#' @param n number of random draws, each returning a vector of length +#' \code{len}. Currently only \code{n = 1} is supported, but the +#' argument exists for standardization of "\code{r}" functions. +#' +#' @details +#' These nimbleFunctions provide distributions that can be used directly in R or +#' in \code{nimble} hierarchical models (via \code{\link[nimble]{nimbleCode}} +#' and \code{\link[nimble]{nimbleModel}}). +#' +#' N-mixture models model abundance at a series of sites over many replicate observations. +#' The likelihood of an observation in site \code{s} at visit \code{t} depends on the +#' true abundance N, which is assumed to be drawn from a Poisson distribution with +#' mean \code{lambda}. It also depends on the probability of detecting an individual +#' (\code{prob} or \code{prob[t]}). The observed count \code{x[t]} has a probability +#' according to the Binomial distribution with size parameter N and probability \code{prob} +#' +#' The distribution has two forms, \code{dNmixture_s} and +#' \code{dNmixture_v}. These differentiate between whether the detection +#' probability \code{prob} is visit-dependent (vector, \code{dNmixture_v}) +#' or visit-independent (scalar, dNmixture_s). +#' +#' For more explanation, see +#' \href{../doc/Introduction_to_nimbleEcology.html}{package vignette} (or +#' \code{vignette("Introduction_to_nimbleEcology")}). +#' +#' Compared to writing \code{nimble} models with a discrete latent +#' state of abundance N and a separate scalar datum for each observation time, +#' use of these distributions allows one to directly sum (marginalize) over +#' the discrete latent state N and calculate the probability of all +#' observations for a site jointly. #' -#' @author Lauren Ponisio, Ben Goldstein, Perry de Valpine +#' These are \code{nimbleFunction}s written in the format of user-defined +#' distributions for NIMBLE's extension of the BUGS model language. More +#' information can be found in the NIMBLE User Manual at +#' \href{https://r-nimble.org}{https://r-nimble.org}. #' -#' These are written in the format of user-defined distributions to extend NIMBLE's -#' use of the BUGS model language. More information about writing user-defined distributions can be found -#' in the NIMBLE User Manual at \code{https://r-nimble.org}. +#' When using these distributions in a \code{nimble} model, the left-hand side +#' will be used as \code{x}, and the user should not provide the \code{log} +#' argument. #' -#' The first argument to a "d" function is always named \code{x} and is given on the -#' left-hand side of a (stochastic) model declaration in the BUGS model language (used by NIMBLE). -#' When using these distributions in a NIMBLE model, the user -#' should not provide the \code{log} argument. (It is always set to \code{TRUE} when used -#' in a NIMBLE model.) +#' For example, in \code{nimble} model code, #' -#' For example, in a NIMBLE model, +#' \code{observedCounts[i, 1:T] ~ dNmixture_v(lambda[i], +#' prob[i, 1:T], +#' minN, maxN, T)} #' -#' \code{detections[1:T] ~ dNmixture(lambda = lambda, prob = p[1:T], notZero = 1, minN = -1, maxN = -1)} +#' declares that the \code{observedCounts[i, 1:T]} (observed +#' counts for site \code{i}, for example) vector follows an +#' N-mixture model distribution with parameters as indicated, +#' assuming all the parameters have been declared elsewhere in the +#' model. As above, \code{lambda[i]} is the mean of the abundance distribution at site i, +#' \code{prob[i, 1:T]} is a vector of detection probabilities at site i, and +#' \code{T} is the number of observation occasions. This will invoke +#' (something like) the following call to \code{dHMM} when +#' \code{nimble} uses the model such as for MCMC: #' -#' declares that the \code{detections[1:T]} vector follows an N-mixture model distribution -#' with parameters as indicated, assuming all the parameters have been declared elsewhere in the model. +#' \code{dNmixture_v(observedCounts[i, 1:T], lambda[i], +#' prob[i, 1:T], +#' minN, maxN, T)} #' +#' If an algorithm using a \code{nimble} model with this declaration +#' needs to generate a random draw for \code{observedCounts[1:T]}, it +#' will make a similar invocation of \code{rNmixture_v}, with \code{n = 1}. +#' +#' If the observation probabilities are occasion-independent, one would use: +#' +#' \code{observedCounts[1:T] ~ dNmixture_s(observedCounts[i, 1:T], lambda[i], +#' prob[i], +#' minN, maxN, T)} +#' +#' @return +#' For \code{dNmixture_s} and \code{dNmixture_v}: the probability (or likelihood) or log +#' probability of observation vector \code{x}. +#' +#' For \code{rNmixture_s} and \code{rNmixture_v}: a simulated detection history, \code{x}. +#' +#' @seealso For occupancy models dealing with detection/nondetection data, +#' see \code{\link{dOcc}}. +#' For dynamic occupancy, see \code{\link{dDynOcc}}. +#' +#' @references D. Turek, P. de Valpine and C. J. Paciorek. 2016. Efficient Markov chain Monte +#' Carlo sampling for hierarchical hidden Markov models. Environmental and Ecological Statistics +#' 23:549–564. DOI 10.1007/s10651-016-0353-z #' -#' @seealso For regular occupancy models, see documentation for dOcc. #' @examples -#' \dontrun{} +#' \donttest{ +#' # Set up constants and initial values for defining the model +#' len <- 5 # length of dataset +#' dat <- c(1,2,0,1,5) # A vector of observations +#' lambda <- 10 # mean abundance +#' prob <- c(0.2, 0.3, 0.2, 0.1, 0.4) # A vector of detection probabilities +#' +#' # Define code for a nimbleModel +#' nc <- nimbleCode({ +#' x[1:5] ~ dNmixture_v(lambda, prob = [1:5], +#' minN = -1, maxN = -1, len = 5) +#' +#' lambda ~ dunif(0, 1000) +#' +#' for (i in 1:5) { +#' prob[i] ~ dunif(0, 1) +#' } +#' }) +#' +#' # Build the model +#' nmix <- nimbleModel(nc, +#' data = list(x = dat), +#' inits = list(lambda = lambda, +#' prob = prob)) +#' # Calculate log probability of data from the model +#' nmix_model$calculate() +#' # Use the model for a variety of other purposes... +#' } NULL diff --git a/man/dNmixture.Rd b/man/dNmixture.Rd index 1c5eab2..d35c68d 100644 --- a/man/dNmixture.Rd +++ b/man/dNmixture.Rd @@ -2,10 +2,10 @@ % Please edit documentation in R/dNmixture.R \name{dNmixture} \alias{dNmixture} -\alias{dNmixture_v} \alias{dNmixture_s} -\alias{rNmixture_v} +\alias{dNmixture_v} \alias{rNmixture_s} +\alias{rNmixture_v} \title{N-mixture distribution for use in NIMBLE models} \usage{ dNmixture_v(x, lambda, prob, minN, maxN, len, log = 0) @@ -31,6 +31,9 @@ to select based on lambda. Ignored if dynamicMinMax = TRUE} \item{len}{The length of the x vector. Needed for simulation in \code{rNmixture_*}.} +\item{log}{TRUE or 1 to return log probability. FALSE or 0 to return probability. +Need not be specified in the model context.} + \item{n}{number of random draws, each returning a vector of length \code{len}. Currently only \code{n = 1} is supported, but the argument exists for standardization of "\code{r}" functions.} @@ -41,43 +44,121 @@ zero-inflated Poisson models} \item{dynamicMinMax}{0 to use input minN and maxN, 1 to ignore provided minN and maxN and algorithmically select reasonable bounds for N.} } +\value{ +For \code{dNmixture_s} and \code{dNmixture_v}: the probability (or likelihood) or log +probability of observation vector \code{x}. + +For \code{rNmixture_s} and \code{rNmixture_v}: a simulated detection history, \code{x}. +} \description{ -\code{dNmixture} provides dynamic occupancy model distributions for NIMBLE models. +\code{dNmixture_s} and \code{dNmixture_v} provides dynamic occupancy model distributions for NIMBLE models. } \details{ -N-mixture models -model abundance at a series of sites over many replicate observations. The likelihood of an observation in site s -at time t depends on the - -Compared to writing NIMBLE models with a discrete latent state for true occupancy status and -a separate scalar datum for each observation, -use of these distributions allows -one to directly sum over the discrete latent state and calculate the probability of -all observations from one site jointly. +These nimbleFunctions provide distributions that can be used directly in R or +in \code{nimble} hierarchical models (via \code{\link[nimble]{nimbleCode}} +and \code{\link[nimble]{nimbleModel}}). + +N-mixture models model abundance at a series of sites over many replicate observations. +The likelihood of an observation in site \code{s} at visit \code{t} depends on the +true abundance N, which is assumed to be drawn from a Poisson distribution with +mean \code{lambda}. It also depends on the probability of detecting an individual +(\code{prob} or \code{prob[t]}). The observed count \code{x[t]} has a probability +according to the Binomial distribution with size parameter N and probability \code{prob} + +The distribution has two forms, \code{dNmixture_s} and +\code{dNmixture_v}. These differentiate between whether the detection +probability \code{prob} is visit-dependent (vector, \code{dNmixture_v}) +or visit-independent (scalar, dNmixture_s). + +For more explanation, see +\href{../doc/Introduction_to_nimbleEcology.html}{package vignette} (or +\code{vignette("Introduction_to_nimbleEcology")}). + +Compared to writing \code{nimble} models with a discrete latent +state of abundance N and a separate scalar datum for each observation time, +use of these distributions allows one to directly sum (marginalize) over +the discrete latent state N and calculate the probability of all +observations for a site jointly. + +These are \code{nimbleFunction}s written in the format of user-defined +distributions for NIMBLE's extension of the BUGS model language. More +information can be found in the NIMBLE User Manual at +\href{https://r-nimble.org}{https://r-nimble.org}. + +When using these distributions in a \code{nimble} model, the left-hand side +will be used as \code{x}, and the user should not provide the \code{log} +argument. + +For example, in \code{nimble} model code, + +\code{observedCounts[i, 1:T] ~ dNmixture_v(lambda[i], +prob[i, 1:T], +minN, maxN, T)} + +declares that the \code{observedCounts[i, 1:T]} (observed +counts for site \code{i}, for example) vector follows an +N-mixture model distribution with parameters as indicated, +assuming all the parameters have been declared elsewhere in the +model. As above, \code{lambda[i]} is the mean of the abundance distribution at site i, +\code{prob[i, 1:T]} is a vector of detection probabilities at site i, and +\code{T} is the number of observation occasions. This will invoke +(something like) the following call to \code{dHMM} when +\code{nimble} uses the model such as for MCMC: + +\code{dNmixture_v(observedCounts[i, 1:T], lambda[i], +prob[i, 1:T], +minN, maxN, T)} + +If an algorithm using a \code{nimble} model with this declaration +needs to generate a random draw for \code{observedCounts[1:T]}, it +will make a similar invocation of \code{rNmixture_v}, with \code{n = 1}. + +If the observation probabilities are occasion-independent, one would use: + +\code{observedCounts[1:T] ~ dNmixture_s(observedCounts[i, 1:T], lambda[i], +prob[i], +minN, maxN, T)} } \examples{ -\dontrun{} +\donttest{ +# Set up constants and initial values for defining the model +len <- 5 # length of dataset +dat <- c(1,2,0,1,5) # A vector of observations +lambda <- 10 # mean abundance +prob <- c(0.2, 0.3, 0.2, 0.1, 0.4) # A vector of detection probabilities + +# Define code for a nimbleModel + nc <- nimbleCode({ + x[1:5] ~ dNmixture_v(lambda, prob = [1:5], + minN = -1, maxN = -1, len = 5) + + lambda ~ dunif(0, 1000) + + for (i in 1:5) { + prob[i] ~ dunif(0, 1) + } + }) + +# Build the model +nmix <- nimbleModel(nc, + data = list(x = dat), + inits = list(lambda = lambda, + prob = prob)) +# Calculate log probability of data from the model +nmix_model$calculate() +# Use the model for a variety of other purposes... +} +} +\references{ +D. Turek, P. de Valpine and C. J. Paciorek. 2016. Efficient Markov chain Monte +Carlo sampling for hierarchical hidden Markov models. Environmental and Ecological Statistics +23:549–564. DOI 10.1007/s10651-016-0353-z } \seealso{ -For regular occupancy models, see documentation for dOcc. +For occupancy models dealing with detection/nondetection data, +see \code{\link{dOcc}}. +For dynamic occupancy, see \code{\link{dDynOcc}}. } \author{ -Lauren Ponisio, Ben Goldstein, Perry de Valpine - -These are written in the format of user-defined distributions to extend NIMBLE's -use of the BUGS model language. More information about writing user-defined distributions can be found -in the NIMBLE User Manual at \code{https://r-nimble.org}. - -The first argument to a "d" function is always named \code{x} and is given on the -left-hand side of a (stochastic) model declaration in the BUGS model language (used by NIMBLE). -When using these distributions in a NIMBLE model, the user -should not provide the \code{log} argument. (It is always set to \code{TRUE} when used -in a NIMBLE model.) - -For example, in a NIMBLE model, - -\code{detections[1:T] ~ dNmixture(lambda = lambda, prob = p[1:T], notZero = 1, minN = -1, maxN = -1)} - -declares that the \code{detections[1:T]} vector follows an N-mixture model distribution -with parameters as indicated, assuming all the parameters have been declared elsewhere in the model. +Ben Goldstein, Lauren Ponisio, and Perry de Valpine } From 7de6ae69ec81c30047279a25f40ccc6c31537beb Mon Sep 17 00:00:00 2001 From: dochvam Date: Fri, 8 Nov 2019 15:47:30 -0800 Subject: [PATCH 10/20] Nmixture changes - minN to Nmin, default values of -1 --- DESCRIPTION | 2 +- R/dNmixture.R | 60 +++++++++++++++++----------------- R/zzz.R | 8 ++--- tests/testthat/test-Nmixture.R | 48 +++++++++++++-------------- 4 files changed, 59 insertions(+), 59 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8e0b2f9..f96d095 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: nimbleEcology Type: Package Title: Distributions for Ecological Models in 'nimble' -Version: 0.1.1 +Version: 0.2.0 Maintainer: Benjamin R. Goldstein Authors@R: c(person("Benjamin R.", "Goldstein", role = c("aut", "cre"), email = "ben.goldstein@berkeley.edu"), diff --git a/R/dNmixture.R b/R/dNmixture.R index 56966f0..ac1bf56 100644 --- a/R/dNmixture.R +++ b/R/dNmixture.R @@ -13,12 +13,12 @@ #' @param prob observation probability for each X #' @param notZero 0 if datum is structural zero, 1 otherwise. Allows for #' zero-inflated Poisson models -#' @param minN the minimum true abundance to sum over. Set to -1 for distribution +#' @param Nmin the minimum true abundance to sum over. Set to -1 for distribution #' to select based on lambda. Ignored if dynamicMinMax = TRUE -#' @param maxN the maximum true abundance to sum over. Set to -1 for distribution +#' @param Nmax the maximum true abundance to sum over. Set to -1 for distribution #' to select based on lambda. Ignored if dynamicMinMax = TRUE -#' @param dynamicMinMax 0 to use input minN and maxN, 1 to ignore -#' provided minN and maxN and algorithmically select reasonable bounds for N. +#' @param dynamicMinMax 0 to use input Nmin and Nmax, 1 to ignore +#' provided Nmin and Nmax and algorithmically select reasonable bounds for N. #' @param len The length of the x vector. Needed for simulation in \code{rNmixture_*}. #' @param log TRUE or 1 to return log probability. FALSE or 0 to return probability. #' Need not be specified in the model context. @@ -66,7 +66,7 @@ #' #' \code{observedCounts[i, 1:T] ~ dNmixture_v(lambda[i], #' prob[i, 1:T], -#' minN, maxN, T)} +#' Nmin, Nmax, T)} #' #' declares that the \code{observedCounts[i, 1:T]} (observed #' counts for site \code{i}, for example) vector follows an @@ -80,7 +80,7 @@ #' #' \code{dNmixture_v(observedCounts[i, 1:T], lambda[i], #' prob[i, 1:T], -#' minN, maxN, T)} +#' Nmin, Nmax, T)} #' #' If an algorithm using a \code{nimble} model with this declaration #' needs to generate a random draw for \code{observedCounts[1:T]}, it @@ -90,7 +90,7 @@ #' #' \code{observedCounts[1:T] ~ dNmixture_s(observedCounts[i, 1:T], lambda[i], #' prob[i], -#' minN, maxN, T)} +#' Nmin, Nmax, T)} #' #' @return #' For \code{dNmixture_s} and \code{dNmixture_v}: the probability (or likelihood) or log @@ -117,7 +117,7 @@ #' # Define code for a nimbleModel #' nc <- nimbleCode({ #' x[1:5] ~ dNmixture_v(lambda, prob = [1:5], -#' minN = -1, maxN = -1, len = 5) +#' Nmin = -1, Nmax = -1, len = 5) #' #' lambda ~ dunif(0, 1000) #' @@ -144,8 +144,8 @@ dNmixture_v <- nimbleFunction( run = function(x = double(1), lambda = double(), prob = double(1), - minN = double(), - maxN = double(), + Nmin = integer(0, default = -1), + Nmax = integer(0, default = -1), len = double(), log = integer(0, default = 0)) { if (length(x) != len) stop("in dNmixture_v, len must equal length(x).") @@ -165,16 +165,16 @@ dNmixture_v <- nimbleFunction( ## For each x, the conditional distribution of (N - x | x) is pois(lambda * (1-p)) ## We determine the lowest N and highest N at extreme quantiles and sum over those. - if (minN == -1 & maxN == -1) { - minN <- min(x + qpois(0.00001, lambda * (1 - prob))) - maxN <- max(x + qpois(0.99999, lambda * (1 - prob))) - minN <- max( max(x), minN ) ## set minN to at least the largest x + if (Nmin == -1 & Nmax == -1) { + Nmin <- min(x + qpois(0.00001, lambda * (1 - prob))) + Nmax <- max(x + qpois(0.99999, lambda * (1 - prob))) + Nmin <- max( max(x), Nmin ) ## set Nmin to at least the largest x } obsProb <- 0 - if (maxN > minN) { ## should normally be true, but check in case it isn't in some corner case. - ## print("counting from ", minN, " to ", maxN, " with lambda = ", lambda) - for (N in minN:maxN) { + if (Nmax > Nmin) { ## should normally be true, but check in case it isn't in some corner case. + ## print("counting from ", Nmin, " to ", Nmax, " with lambda = ", lambda) + for (N in Nmin:Nmax) { thisObsProb <- dpois(N, lambda) * prod(dbinom(x, size = N, prob = prob)) obsProb <- obsProb + thisObsProb } @@ -195,8 +195,8 @@ dNmixture_s <- nimbleFunction( run = function(x = double(1), lambda = double(), prob = double(), - minN = double(), - maxN = double(), + Nmin = integer(0, default = -1), + Nmax = integer(0, default = -1), len = double(), log = integer(0, default = 0)) { if (length(x) != len) stop("in dNmixture_v, len must equal length(x).") @@ -215,16 +215,16 @@ dNmixture_s <- nimbleFunction( ## For each x, the conditional distribution of (N - x | x) is pois(lambda * (1-p)) ## We determine the lowest N and highest N at extreme quantiles and sum over those. - if (minN == -1 & maxN == -1) { - minN <- min(x + qpois(0.00001, lambda * (1 - prob))) - maxN <- max(x + qpois(0.99999, lambda * (1 - prob))) - minN <- max( max(x), minN ) ## set minN to at least the largest x + if (Nmin == -1 & Nmax == -1) { + Nmin <- min(x + qpois(0.00001, lambda * (1 - prob))) + Nmax <- max(x + qpois(0.99999, lambda * (1 - prob))) + Nmin <- max( max(x), Nmin ) ## set Nmin to at least the largest x } obsProb <- 0 - if (maxN > minN) { ## should normally be true, but check in case it isn't in some corner case. - ## print("counting from ", minN, " to ", maxN, " with lambda = ", lambda) - for (N in minN:maxN) { + if (Nmax > Nmin) { ## should normally be true, but check in case it isn't in some corner case. + ## print("counting from ", Nmin, " to ", Nmax, " with lambda = ", lambda) + for (N in Nmin:Nmax) { thisObsProb <- dpois(N, lambda) * prod(dbinom(x, size = N, prob = prob)) obsProb <- obsProb + thisObsProb } @@ -245,8 +245,8 @@ rNmixture_v <- nimbleFunction( run = function(n = double(), lambda = double(), prob = double(1), - minN = double(), - maxN = double(), + Nmin = integer(0, default = -1), + Nmax = integer(0, default = -1), len = double()) { if (n != 1) stop("rNmixture_v only works for n = 1") if (length(prob) != len) stop("In rNmixture_v, len must equal length(prob).") @@ -267,8 +267,8 @@ rNmixture_s <- nimbleFunction( run = function(n = double(), lambda = double(), prob = double(), - minN = double(), - maxN = double(), + Nmin = integer(0, default = -1), + Nmax = integer(0, default = -1), len = double()) { if (n != 1) stop("rNmixture_v only works for n = 1") trueN <- rpois(1, lambda) diff --git a/R/zzz.R b/R/zzz.R index 82cfc7f..6d3b009 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -292,8 +292,8 @@ types = c('value = double(1)', 'lambda = double()', 'prob = double(1)', - 'minN = double()', - 'maxN = double()', + 'Nmin = integer(0)', + 'Nmax = integer(0)', 'len = double()' ), mixedSizes = FALSE, @@ -309,8 +309,8 @@ types = c('value = double(1)', 'lambda = double()', 'prob = double()', - 'minN = double()', - 'maxN = double()', + 'Nmin = integer(0)', + 'Nmax = integer(0)', 'len = double()' ), mixedSizes = FALSE, diff --git a/tests/testthat/test-Nmixture.R b/tests/testthat/test-Nmixture.R index 0d7ee5f..d7feffc 100644 --- a/tests/testthat/test-Nmixture.R +++ b/tests/testthat/test-Nmixture.R @@ -13,36 +13,36 @@ test_that("dNmixture_v works", x <- c(1, 0, 1, 3, 0) lambda <- 8 prob <- c(0.5, 0.3, 0.5, 0.4, 0.1) - minN <- 0 - maxN <- 250 + Nmin <- 0 + Nmax <- 250 len <- 5 - probX <- dNmixture_v(x, lambda, prob, minN, maxN, len) + probX <- dNmixture_v(x, lambda, prob, Nmin, Nmax, len) # Manually calculate the correct answer correctProbX <- 0 - for (N in minN:maxN) { + for (N in Nmin:Nmax) { correctProbX <- correctProbX + dpois(N, lambda) * prod(dbinom(x, N, prob)) } expect_equal(probX, correctProbX) # Uncompiled log probability - lProbX <- dNmixture_v(x, lambda, prob, minN, maxN, len, log = TRUE) + lProbX <- dNmixture_v(x, lambda, prob, Nmin, Nmax, len, log = TRUE) lCorrectProbX <- log(correctProbX) expect_equal(lProbX, lCorrectProbX) # Compilation and compiled calculations CdNmixture_v <- compileNimble(dNmixture_v) - CprobX <- CdNmixture_v(x, lambda, prob, minN, maxN, len) + CprobX <- CdNmixture_v(x, lambda, prob, Nmin, Nmax, len) expect_equal(CprobX, probX) - ClProbX <- CdNmixture_v(x, lambda, prob, minN, maxN, len, log = TRUE) + ClProbX <- CdNmixture_v(x, lambda, prob, Nmin, Nmax, len, log = TRUE) expect_equal(ClProbX, lProbX) # Use in Nimble model nc <- nimbleCode({ x[1:5] ~ dNmixture_v(lambda = lambda, prob = prob[1:5], - minN = minN, maxN = maxN, len = len) + Nmin = Nmin, Nmax = Nmax, len = len) }) @@ -50,7 +50,7 @@ test_that("dNmixture_v works", data = list(x = x), inits = list(lambda = lambda, prob = prob), - constants = list(minN = minN, maxN = maxN, + constants = list(Nmin = Nmin, Nmax = Nmax, len = len)) m$calculate() MlProbX <- m$getLogProb("x") @@ -67,7 +67,7 @@ test_that("dNmixture_v works", mNA <- nimbleModel(nc, data = list(x = xNA), inits = list(lambda = lambda, prob = prob), - constants = list(minN = minN, maxN = maxN, + constants = list(Nmin = Nmin, Nmax = Nmax, len = len)) @@ -87,14 +87,14 @@ test_that("dNmixture_v works", xSim <- array(NA, dim = c(nSim, len)) set.seed(1) for (i in 1:nSim) { - xSim[i,] <- rNmixture_v(1, lambda, prob, minN, maxN, len) + xSim[i,] <- rNmixture_v(1, lambda, prob, Nmin, Nmax, len) } CrNmixture_v <- compileNimble(rNmixture_v) CxSim <- array(NA, dim = c(nSim, len)) set.seed(1) for (i in 1:nSim) { - CxSim[i,] <- CrNmixture_v(1, lambda, prob, minN, maxN, len) + CxSim[i,] <- CrNmixture_v(1, lambda, prob, Nmin, Nmax, len) } expect_identical(xSim, CxSim) @@ -124,36 +124,36 @@ test_that("dNmixture_s works", x <- c(1, 0, 1, 3, 0) lambda <- 8 prob <- 0.4 - minN <- 0 - maxN <- 250 + Nmin <- 0 + Nmax <- 250 len <- 5 - probX <- dNmixture_s(x, lambda, prob, minN, maxN, len) + probX <- dNmixture_s(x, lambda, prob, Nmin, Nmax, len) # Manually calculate the correct answer correctProbX <- 0 - for (N in minN:maxN) { + for (N in Nmin:Nmax) { correctProbX <- correctProbX + dpois(N, lambda) * prod(dbinom(x, N, prob)) } expect_equal(probX, correctProbX) # Uncompiled log probability - lProbX <- dNmixture_s(x, lambda, prob, minN, maxN, len, log = TRUE) + lProbX <- dNmixture_s(x, lambda, prob, Nmin, Nmax, len, log = TRUE) lCorrectProbX <- log(correctProbX) expect_equal(lProbX, lCorrectProbX) # Compilation and compiled calculations CdNmixture_s <- compileNimble(dNmixture_s) - CprobX <- CdNmixture_s(x, lambda, prob, minN, maxN, len) + CprobX <- CdNmixture_s(x, lambda, prob, Nmin, Nmax, len) expect_equal(CprobX, probX) - ClProbX <- CdNmixture_s(x, lambda, prob, minN, maxN, len, log = TRUE) + ClProbX <- CdNmixture_s(x, lambda, prob, Nmin, Nmax, len, log = TRUE) expect_equal(ClProbX, lProbX) # Use in Nimble model nc <- nimbleCode({ x[1:5] ~ dNmixture_s(lambda = lambda, prob = prob, - minN = minN, maxN = maxN, len = len) + Nmin = Nmin, Nmax = Nmax, len = len) }) @@ -161,7 +161,7 @@ test_that("dNmixture_s works", data = list(x = x), inits = list(lambda = lambda, prob = prob), - constants = list(minN = minN, maxN = maxN, + constants = list(Nmin = Nmin, Nmax = Nmax, len = len)) m$calculate() MlProbX <- m$getLogProb("x") @@ -178,7 +178,7 @@ test_that("dNmixture_s works", mNA <- nimbleModel(nc, data = list(x = xNA), inits = list(lambda = lambda, prob = prob), - constants = list(minN = minN, maxN = maxN, + constants = list(Nmin = Nmin, Nmax = Nmax, len = len)) @@ -198,14 +198,14 @@ test_that("dNmixture_s works", xSim <- array(NA, dim = c(nSim, len)) set.seed(1) for (i in 1:nSim) { - xSim[i,] <- rNmixture_s(1, lambda, prob, minN, maxN, len) + xSim[i,] <- rNmixture_s(1, lambda, prob, Nmin, Nmax, len) } CrNmixture_s <- compileNimble(rNmixture_s) CxSim <- array(NA, dim = c(nSim, len)) set.seed(1) for (i in 1:nSim) { - CxSim[i,] <- CrNmixture_s(1, lambda, prob, minN, maxN, len) + CxSim[i,] <- CrNmixture_s(1, lambda, prob, Nmin, Nmax, len) } expect_identical(xSim, CxSim) From 69b70936d1b657df6e354cd9a09d940a34e34ca8 Mon Sep 17 00:00:00 2001 From: dochvam Date: Fri, 8 Nov 2019 16:20:39 -0800 Subject: [PATCH 11/20] Fixed parameter naming problem in nmixture registration --- R/dNmixture.R | 16 ++++++++-------- R/zzz.R | 16 ++++++++-------- tests/testthat/test-Nmixture.R | 3 ++- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/R/dNmixture.R b/R/dNmixture.R index ac1bf56..d70fea1 100644 --- a/R/dNmixture.R +++ b/R/dNmixture.R @@ -144,8 +144,8 @@ dNmixture_v <- nimbleFunction( run = function(x = double(1), lambda = double(), prob = double(1), - Nmin = integer(0, default = -1), - Nmax = integer(0, default = -1), + Nmin = double(0, default = -1), + Nmax = double(0, default = -1), len = double(), log = integer(0, default = 0)) { if (length(x) != len) stop("in dNmixture_v, len must equal length(x).") @@ -195,8 +195,8 @@ dNmixture_s <- nimbleFunction( run = function(x = double(1), lambda = double(), prob = double(), - Nmin = integer(0, default = -1), - Nmax = integer(0, default = -1), + Nmin = double(0, default = -1), + Nmax = double(0, default = -1), len = double(), log = integer(0, default = 0)) { if (length(x) != len) stop("in dNmixture_v, len must equal length(x).") @@ -245,8 +245,8 @@ rNmixture_v <- nimbleFunction( run = function(n = double(), lambda = double(), prob = double(1), - Nmin = integer(0, default = -1), - Nmax = integer(0, default = -1), + Nmin = double(0, default = -1), + Nmax = double(0, default = -1), len = double()) { if (n != 1) stop("rNmixture_v only works for n = 1") if (length(prob) != len) stop("In rNmixture_v, len must equal length(prob).") @@ -267,8 +267,8 @@ rNmixture_s <- nimbleFunction( run = function(n = double(), lambda = double(), prob = double(), - Nmin = integer(0, default = -1), - Nmax = integer(0, default = -1), + Nmin = double(0, default = -1), + Nmax = double(0, default = -1), len = double()) { if (n != 1) stop("rNmixture_v only works for n = 1") trueN <- rpois(1, lambda) diff --git a/R/zzz.R b/R/zzz.R index 6d3b009..bff322c 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -286,14 +286,14 @@ registerDistributions(list( dNmixture_v = list( - BUGSdist = "dNmixture_v(lambda, prob, minN, maxN, len)", - Rdist = "dNmixture_v(lambda, prob, minN, maxN, len)", + BUGSdist = "dNmixture_v(lambda, prob, Nmin, Nmax, len)", + Rdist = "dNmixture_v(lambda, prob, Nmin, Nmax, len)", discrete = TRUE, types = c('value = double(1)', 'lambda = double()', 'prob = double(1)', - 'Nmin = integer(0)', - 'Nmax = integer(0)', + 'Nmin = double(0, default = -1)', + 'Nmax = double(0, default = -1)', 'len = double()' ), mixedSizes = FALSE, @@ -303,14 +303,14 @@ registerDistributions(list( dNmixture_s = list( - BUGSdist = "dNmixture_s(lambda, prob, minN, maxN, len)", - Rdist = "dNmixture_s(lambda, prob, minN, maxN, len)", + BUGSdist = "dNmixture_s(lambda, prob, Nmin, Nmax, len)", + Rdist = "dNmixture_s(lambda, prob, Nmin, Nmax, len)", discrete = TRUE, types = c('value = double(1)', 'lambda = double()', 'prob = double()', - 'Nmin = integer(0)', - 'Nmax = integer(0)', + 'Nmin = double(0, default = -1)', + 'Nmax = double(0, default = -1)', 'len = double()' ), mixedSizes = FALSE, diff --git a/tests/testthat/test-Nmixture.R b/tests/testthat/test-Nmixture.R index d7feffc..5cfe108 100644 --- a/tests/testthat/test-Nmixture.R +++ b/tests/testthat/test-Nmixture.R @@ -42,7 +42,8 @@ test_that("dNmixture_v works", # Use in Nimble model nc <- nimbleCode({ x[1:5] ~ dNmixture_v(lambda = lambda, prob = prob[1:5], - Nmin = Nmin, Nmax = Nmax, len = len) + Nmin = Nmin, Nmax = Nmax, + len = len) }) From 57885e306d0d0db80289fc6270e4d9b43b060ef1 Mon Sep 17 00:00:00 2001 From: dochvam Date: Thu, 12 Dec 2019 09:28:48 -0800 Subject: [PATCH 12/20] New fast implementation of dNmixture --- R/dNmixture.R | 109 ++++++++++++++++++++--------------------------- man/dNmixture.Rd | 50 ++++++++++------------ 2 files changed, 70 insertions(+), 89 deletions(-) diff --git a/R/dNmixture.R b/R/dNmixture.R index d70fea1..5f99b97 100644 --- a/R/dNmixture.R +++ b/R/dNmixture.R @@ -8,18 +8,14 @@ #' #' @author Ben Goldstein, Lauren Ponisio, and Perry de Valpine #' -#' @param x count observation data, 0 and positive integers +#' @param x count observation data. 0 and positive integers #' @param lambda expected value of the Poisson distribution of true abundance #' @param prob observation probability for each X -#' @param notZero 0 if datum is structural zero, 1 otherwise. Allows for -#' zero-inflated Poisson models -#' @param Nmin the minimum true abundance to sum over. Set to -1 for distribution -#' to select based on lambda. Ignored if dynamicMinMax = TRUE -#' @param Nmax the maximum true abundance to sum over. Set to -1 for distribution -#' to select based on lambda. Ignored if dynamicMinMax = TRUE -#' @param dynamicMinMax 0 to use input Nmin and Nmax, 1 to ignore -#' provided Nmin and Nmax and algorithmically select reasonable bounds for N. -#' @param len The length of the x vector. Needed for simulation in \code{rNmixture_*}. +#' @param Nmin the minimum abundance to sum over. Set to -1 for distribution +#' to select based on lambda +#' @param Nmax the maximum abundance to sum over. Set to -1 for distribution +#' to select based on lambda +#' @param len The length of the x vector #' @param log TRUE or 1 to return log probability. FALSE or 0 to return probability. #' Need not be specified in the model context. #' @param n number of random draws, each returning a vector of length @@ -31,7 +27,7 @@ #' in \code{nimble} hierarchical models (via \code{\link[nimble]{nimbleCode}} #' and \code{\link[nimble]{nimbleModel}}). #' -#' N-mixture models model abundance at a series of sites over many replicate observations. +#' N-mixture models describe abundance at a series of sites over many replicate observations. #' The likelihood of an observation in site \code{s} at visit \code{t} depends on the #' true abundance N, which is assumed to be drawn from a Poisson distribution with #' mean \code{lambda}. It also depends on the probability of detecting an individual @@ -39,9 +35,9 @@ #' according to the Binomial distribution with size parameter N and probability \code{prob} #' #' The distribution has two forms, \code{dNmixture_s} and -#' \code{dNmixture_v}. These differentiate between whether the detection -#' probability \code{prob} is visit-dependent (vector, \code{dNmixture_v}) -#' or visit-independent (scalar, dNmixture_s). +#' \code{dNmixture_v}. These differentiate whether the detection +#' probability \code{prob} is visit-dependent (a vector, \code{dNmixture_v}) +#' or visit-independent (a scalar, dNmixture_s). #' #' For more explanation, see #' \href{../doc/Introduction_to_nimbleEcology.html}{package vignette} (or @@ -75,7 +71,7 @@ #' model. As above, \code{lambda[i]} is the mean of the abundance distribution at site i, #' \code{prob[i, 1:T]} is a vector of detection probabilities at site i, and #' \code{T} is the number of observation occasions. This will invoke -#' (something like) the following call to \code{dHMM} when +#' (something like) the following call to \code{dNmixture_v} when #' \code{nimble} uses the model such as for MCMC: #' #' \code{dNmixture_v(observedCounts[i, 1:T], lambda[i], @@ -102,9 +98,14 @@ #' see \code{\link{dOcc}}. #' For dynamic occupancy, see \code{\link{dDynOcc}}. #' -#' @references D. Turek, P. de Valpine and C. J. Paciorek. 2016. Efficient Markov chain Monte -#' Carlo sampling for hierarchical hidden Markov models. Environmental and Ecological Statistics -#' 23:549–564. DOI 10.1007/s10651-016-0353-z +#' @references +#' +#' D. Turek, P. de Valpine and C. J. Paciorek. 2016. Efficient Markov chain Monte +#' Carlo sampling for hierarchical hidden Markov models. Environmental and +#' Ecological Statistics 23:549–564. DOI 10.1007/s10651-016-0353-z +#' T. Meehan, N. Michel and H. Rue. 2017. Estimating animal abundance with N-mixture +#' models using the R-INLA package for R. arXiv. arXiv:1705.01581 +#' #' #' @examples #' \donttest{ @@ -157,34 +158,28 @@ dNmixture_v <- nimbleFunction( else return(0) } - # Check if there is any data - # if (is.na.vec(x) | is.nan.vec(x)) { - # if (log) return(-Inf) - # return(0) - # } - ## For each x, the conditional distribution of (N - x | x) is pois(lambda * (1-p)) ## We determine the lowest N and highest N at extreme quantiles and sum over those. if (Nmin == -1 & Nmax == -1) { Nmin <- min(x + qpois(0.00001, lambda * (1 - prob))) Nmax <- max(x + qpois(0.99999, lambda * (1 - prob))) - Nmin <- max( max(x), Nmin ) ## set Nmin to at least the largest x } + Nmin <- max( max(x), Nmin ) ## set Nmin to at least the largest x - obsProb <- 0 - if (Nmax > Nmin) { ## should normally be true, but check in case it isn't in some corner case. - ## print("counting from ", Nmin, " to ", Nmax, " with lambda = ", lambda) - for (N in Nmin:Nmax) { - thisObsProb <- dpois(N, lambda) * prod(dbinom(x, size = N, prob = prob)) - obsProb <- obsProb + thisObsProb - } - } else { - ## return a potentially non-zero obsProb - N <- max(x) - obsProb <- dpois(N, lambda) * prod(dbinom(x, size = N, prob = prob)) + logProb <- -Inf + if (Nmax > Nmin) { + fac <- 1 + ## ff = lambda prod((1-p_i)) N^(numReps - 1) + ff <- lambda * prod( (1-prob) ) + numN <- Nmax - Nmin + 1 - 1 ## remember: +1 for the count, but -1 because the summation should run from N = maxN to N = minN + 1 + for (i in 1:numN) { + N <- Nmax - i + 1 + fac <- 1 + fac * ff * prod(N/(N - x)) / N + } + logProb <- dpois(Nmin, lambda, log = TRUE) + sum(dbinom(x, size = Nmin, prob = prob, log = TRUE)) + log(fac) } - if (log) return(log(obsProb)) - else return(obsProb) + if (log) return(logProb) + else return(exp(logProb)) returnType(double()) }) @@ -207,34 +202,28 @@ dNmixture_s <- nimbleFunction( else return(0) } - # Check if there is any data - # if (is.na.vec(x) | is.nan.vec(x)) { - # if (log) return(-Inf) - # return(0) - # } - ## For each x, the conditional distribution of (N - x | x) is pois(lambda * (1-p)) ## We determine the lowest N and highest N at extreme quantiles and sum over those. if (Nmin == -1 & Nmax == -1) { Nmin <- min(x + qpois(0.00001, lambda * (1 - prob))) Nmax <- max(x + qpois(0.99999, lambda * (1 - prob))) - Nmin <- max( max(x), Nmin ) ## set Nmin to at least the largest x } + Nmin <- max( max(x), Nmin ) ## set Nmin to at least the largest x - obsProb <- 0 - if (Nmax > Nmin) { ## should normally be true, but check in case it isn't in some corner case. - ## print("counting from ", Nmin, " to ", Nmax, " with lambda = ", lambda) - for (N in Nmin:Nmax) { - thisObsProb <- dpois(N, lambda) * prod(dbinom(x, size = N, prob = prob)) - obsProb <- obsProb + thisObsProb - } - } else { - ## return a potentially non-zero obsProb - N <- max(x) - obsProb <- dpois(N, lambda) * prod(dbinom(x, size = N, prob = prob)) + logProb <- -Inf + if (Nmax > Nmin) { + fac <- 1 + ## ff = lambda prod((1-p_i)) N^(numReps - 1) + ff <- lambda * ((1-prob)^len) + numN <- Nmax - Nmin + 1 - 1 ## remember: +1 for the count, but -1 because the summation should run from N = maxN to N = minN + 1 + for (i in 1:numN) { + N <- Nmax - i + 1 + fac <- 1 + fac * ff * prod(N/(N - x)) / N + } + logProb <- dpois(Nmin, lambda, log = TRUE) + sum(dbinom(x, size = Nmin, prob = prob, log = TRUE)) + log(fac) } - if (log) return(log(obsProb)) - else return(obsProb) + if (log) return(logProb) + else return(exp(logProb)) returnType(double()) }) @@ -280,7 +269,3 @@ rNmixture_s <- nimbleFunction( return(ans) returnType(double(1)) }) - - - - diff --git a/man/dNmixture.Rd b/man/dNmixture.Rd index d35c68d..cae9ce8 100644 --- a/man/dNmixture.Rd +++ b/man/dNmixture.Rd @@ -8,28 +8,28 @@ \alias{rNmixture_v} \title{N-mixture distribution for use in NIMBLE models} \usage{ -dNmixture_v(x, lambda, prob, minN, maxN, len, log = 0) +dNmixture_v(x, lambda, prob, Nmin = -1, Nmax = -1, len, log = 0) -dNmixture_s(x, lambda, prob, minN, maxN, len, log = 0) +dNmixture_s(x, lambda, prob, Nmin = -1, Nmax = -1, len, log = 0) -rNmixture_v(n, lambda, prob, minN, maxN, len) +rNmixture_v(n, lambda, prob, Nmin = -1, Nmax = -1, len) -rNmixture_s(n, lambda, prob, minN, maxN, len) +rNmixture_s(n, lambda, prob, Nmin = -1, Nmax = -1, len) } \arguments{ -\item{x}{count observation data, 0 and positive integers} +\item{x}{count observation data. 0 and positive integers} \item{lambda}{expected value of the Poisson distribution of true abundance} \item{prob}{observation probability for each X} -\item{minN}{the minimum true abundance to sum over. Set to -1 for distribution -to select based on lambda. Ignored if dynamicMinMax = TRUE} +\item{Nmin}{the minimum abundance to sum over. Set to -1 for distribution +to select based on lambda} -\item{maxN}{the maximum true abundance to sum over. Set to -1 for distribution -to select based on lambda. Ignored if dynamicMinMax = TRUE} +\item{Nmax}{the maximum abundance to sum over. Set to -1 for distribution +to select based on lambda} -\item{len}{The length of the x vector. Needed for simulation in \code{rNmixture_*}.} +\item{len}{The length of the x vector} \item{log}{TRUE or 1 to return log probability. FALSE or 0 to return probability. Need not be specified in the model context.} @@ -37,12 +37,6 @@ Need not be specified in the model context.} \item{n}{number of random draws, each returning a vector of length \code{len}. Currently only \code{n = 1} is supported, but the argument exists for standardization of "\code{r}" functions.} - -\item{notZero}{0 if datum is structural zero, 1 otherwise. Allows for -zero-inflated Poisson models} - -\item{dynamicMinMax}{0 to use input minN and maxN, 1 to ignore -provided minN and maxN and algorithmically select reasonable bounds for N.} } \value{ For \code{dNmixture_s} and \code{dNmixture_v}: the probability (or likelihood) or log @@ -58,7 +52,7 @@ These nimbleFunctions provide distributions that can be used directly in R or in \code{nimble} hierarchical models (via \code{\link[nimble]{nimbleCode}} and \code{\link[nimble]{nimbleModel}}). -N-mixture models model abundance at a series of sites over many replicate observations. +N-mixture models describe abundance at a series of sites over many replicate observations. The likelihood of an observation in site \code{s} at visit \code{t} depends on the true abundance N, which is assumed to be drawn from a Poisson distribution with mean \code{lambda}. It also depends on the probability of detecting an individual @@ -66,9 +60,9 @@ mean \code{lambda}. It also depends on the probability of detecting an individua according to the Binomial distribution with size parameter N and probability \code{prob} The distribution has two forms, \code{dNmixture_s} and -\code{dNmixture_v}. These differentiate between whether the detection -probability \code{prob} is visit-dependent (vector, \code{dNmixture_v}) -or visit-independent (scalar, dNmixture_s). +\code{dNmixture_v}. These differentiate whether the detection +probability \code{prob} is visit-dependent (a vector, \code{dNmixture_v}) +or visit-independent (a scalar, dNmixture_s). For more explanation, see \href{../doc/Introduction_to_nimbleEcology.html}{package vignette} (or @@ -93,7 +87,7 @@ For example, in \code{nimble} model code, \code{observedCounts[i, 1:T] ~ dNmixture_v(lambda[i], prob[i, 1:T], -minN, maxN, T)} +Nmin, Nmax, T)} declares that the \code{observedCounts[i, 1:T]} (observed counts for site \code{i}, for example) vector follows an @@ -102,12 +96,12 @@ assuming all the parameters have been declared elsewhere in the model. As above, \code{lambda[i]} is the mean of the abundance distribution at site i, \code{prob[i, 1:T]} is a vector of detection probabilities at site i, and \code{T} is the number of observation occasions. This will invoke -(something like) the following call to \code{dHMM} when +(something like) the following call to \code{dNmixture_v} when \code{nimble} uses the model such as for MCMC: \code{dNmixture_v(observedCounts[i, 1:T], lambda[i], prob[i, 1:T], -minN, maxN, T)} +Nmin, Nmax, T)} If an algorithm using a \code{nimble} model with this declaration needs to generate a random draw for \code{observedCounts[1:T]}, it @@ -117,7 +111,7 @@ If the observation probabilities are occasion-independent, one would use: \code{observedCounts[1:T] ~ dNmixture_s(observedCounts[i, 1:T], lambda[i], prob[i], -minN, maxN, T)} +Nmin, Nmax, T)} } \examples{ \donttest{ @@ -130,7 +124,7 @@ prob <- c(0.2, 0.3, 0.2, 0.1, 0.4) # A vector of detection probabilities # Define code for a nimbleModel nc <- nimbleCode({ x[1:5] ~ dNmixture_v(lambda, prob = [1:5], - minN = -1, maxN = -1, len = 5) + Nmin = -1, Nmax = -1, len = 5) lambda ~ dunif(0, 1000) @@ -151,8 +145,10 @@ nmix_model$calculate() } \references{ D. Turek, P. de Valpine and C. J. Paciorek. 2016. Efficient Markov chain Monte -Carlo sampling for hierarchical hidden Markov models. Environmental and Ecological Statistics -23:549–564. DOI 10.1007/s10651-016-0353-z +Carlo sampling for hierarchical hidden Markov models. Environmental and +Ecological Statistics 23:549–564. DOI 10.1007/s10651-016-0353-z +T. Meehan, N. Michel and H. Rue. 2017. Estimating animal abundance with N-mixture +models using the R-INLA package for R. arXiv. arXiv:1705.01581 } \seealso{ For occupancy models dealing with detection/nondetection data, From 92bfff2efcb149fdc82165e7649c7c106355a4dd Mon Sep 17 00:00:00 2001 From: dochvam Date: Thu, 12 Dec 2019 09:35:29 -0800 Subject: [PATCH 13/20] doc fixes --- R/dNmixture.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/dNmixture.R b/R/dNmixture.R index 5f99b97..6e2bec6 100644 --- a/R/dNmixture.R +++ b/R/dNmixture.R @@ -76,7 +76,7 @@ #' #' \code{dNmixture_v(observedCounts[i, 1:T], lambda[i], #' prob[i, 1:T], -#' Nmin, Nmax, T)} +#' Nmin, Nmax, T, log = TRUE)} #' #' If an algorithm using a \code{nimble} model with this declaration #' needs to generate a random draw for \code{observedCounts[1:T]}, it @@ -117,7 +117,7 @@ #' #' # Define code for a nimbleModel #' nc <- nimbleCode({ -#' x[1:5] ~ dNmixture_v(lambda, prob = [1:5], +#' x[1:5] ~ dNmixture_v(lambda, prob = prob[1:5], #' Nmin = -1, Nmax = -1, len = 5) #' #' lambda ~ dunif(0, 1000) From 9d12bbd4d35b2a2056a5ac5489f478752e15fc70 Mon Sep 17 00:00:00 2001 From: perrydv Date: Mon, 17 Feb 2020 10:57:48 -0800 Subject: [PATCH 14/20] revisions to roxygen, especially for new Nmixture --- R/dDHMM.R | 4 +-- R/dDynOcc.R | 2 +- R/dHMM.R | 2 +- R/dNmixture.R | 99 +++++++++++++++++++++++++++++---------------------- R/dOcc.R | 4 +-- 5 files changed, 62 insertions(+), 49 deletions(-) diff --git a/R/dDHMM.R b/R/dDHMM.R index 39d42b5..af30c35 100644 --- a/R/dDHMM.R +++ b/R/dDHMM.R @@ -1,7 +1,7 @@ -#' Dynamic Hidden Markov Model distribution for use in NIMBLE models +#' Dynamic Hidden Markov Model distribution for use in \code{nimble} models #' #' \code{dDHMM} and \code{dDHMMo} provide Dynamic hidden Markov model -#' distributions for NIMBLE models. +#' distributions for \code{nimble} models. #' #' @name dDHMM #' @aliases dDHMM dDHMMo rDHMM rDHMMo diff --git a/R/dDynOcc.R b/R/dDynOcc.R index 08ac0f3..54166b1 100644 --- a/R/dDynOcc.R +++ b/R/dDynOcc.R @@ -1,5 +1,5 @@ # dDynOcc -#' Dynamic occupancy distribution for use in NIMBLE models +#' Dynamic occupancy distribution for use in \code{nimble} models #' \code{dDynOcc_**} and \code{rDynOcc_**} provide dynamic occupancy #' model distributions that can be used directly from R or in \code{nimble} #' models. diff --git a/R/dHMM.R b/R/dHMM.R index 78a1711..b075ccd 100644 --- a/R/dHMM.R +++ b/R/dHMM.R @@ -1,4 +1,4 @@ -#' Hidden Markov Model distribution for use in NIMBLE models +#' Hidden Markov Model distribution for use in \code{nimble} models #' #' \code{dHMM} and \code{dHMMo} provide hidden Markov model #' distributions that can be used directly from R or in \code{nimble} diff --git a/R/dNmixture.R b/R/dNmixture.R index 6e2bec6..2043622 100644 --- a/R/dNmixture.R +++ b/R/dNmixture.R @@ -1,62 +1,73 @@ # dNmixture -#' N-mixture distribution for use in NIMBLE models +#' N-mixture distribution for use in \code{nimble} models #' -#' \code{dNmixture_s} and \code{dNmixture_v} provides dynamic occupancy model distributions for NIMBLE models. +#' \code{dNmixture_s} and \code{dNmixture_v} provide Poisson-Binomial mixture distributions of abundance ("N-mixture") for use in \code{nimble}a models. #' #' @name dNmixture #' @aliases dNmixture_s dNmixture_v rNmixture_s rNmixture_v #' #' @author Ben Goldstein, Lauren Ponisio, and Perry de Valpine #' -#' @param x count observation data. 0 and positive integers +#' @param x vector of integer counts from a series of sampling occasions. #' @param lambda expected value of the Poisson distribution of true abundance -#' @param prob observation probability for each X -#' @param Nmin the minimum abundance to sum over. Set to -1 for distribution -#' to select based on lambda -#' @param Nmax the maximum abundance to sum over. Set to -1 for distribution -#' to select based on lambda +#' @param prob detection probability (scalar for \code{dNmixture_s}, vector for \code{dNmixture_v}). +#' @param Nmin minimum abundance to sum over for the mixture probability. Set to -1 to select automatically. +#' @param Nmax maximum abundance to sum over for the mixture probability. Set to -1 to select automatically. #' @param len The length of the x vector #' @param log TRUE or 1 to return log probability. FALSE or 0 to return probability. -#' Need not be specified in the model context. #' @param n number of random draws, each returning a vector of length #' \code{len}. Currently only \code{n = 1} is supported, but the #' argument exists for standardization of "\code{r}" functions. #' -#' @details -#' These nimbleFunctions provide distributions that can be used directly in R or -#' in \code{nimble} hierarchical models (via \code{\link[nimble]{nimbleCode}} -#' and \code{\link[nimble]{nimbleModel}}). +#' @details These nimbleFunctions provide distributions that can be +#' used directly in R or in \code{nimble} hierarchical models (via +#' \code{\link[nimble]{nimbleCode}} and +#' \code{\link[nimble]{nimbleModel}}). #' -#' N-mixture models describe abundance at a series of sites over many replicate observations. -#' The likelihood of an observation in site \code{s} at visit \code{t} depends on the -#' true abundance N, which is assumed to be drawn from a Poisson distribution with -#' mean \code{lambda}. It also depends on the probability of detecting an individual -#' (\code{prob} or \code{prob[t]}). The observed count \code{x[t]} has a probability -#' according to the Binomial distribution with size parameter N and probability \code{prob} +#' An N-mixture model defines a distribution for multiple counts +#' (typically of animals, typically made at a sequence of visits to +#' the same site). The latent number of animals available to be +#' counted, N, follows a Poisson distribution with mean \code{lambda}. +#' Each count, \code{x[i]} for visit \code{i}, #' follows a binomial +#' distribution with size (number of trials) N and probability of +#' success (being counted) \code{prob[i]} #' #' The distribution has two forms, \code{dNmixture_s} and -#' \code{dNmixture_v}. These differentiate whether the detection -#' probability \code{prob} is visit-dependent (a vector, \code{dNmixture_v}) -#' or visit-independent (a scalar, dNmixture_s). +#' \code{dNmixture_v}. With \code{dNmixture_s}, detection probability +#' is a scalar, independent of visit, so \code{prob[i]} should be +#' replaced with \code{prob} above. With \code{dNmixture_v}, +#' detection probability is a vector, with one element for each visit, +#' as written above.. #' #' For more explanation, see #' \href{../doc/Introduction_to_nimbleEcology.html}{package vignette} (or #' \code{vignette("Introduction_to_nimbleEcology")}). #' #' Compared to writing \code{nimble} models with a discrete latent -#' state of abundance N and a separate scalar datum for each observation time, -#' use of these distributions allows one to directly sum (marginalize) over -#' the discrete latent state N and calculate the probability of all -#' observations for a site jointly. +#' state of abundance N and a separate scalar datum for each count, +#' use of these distributions allows one to directly sum (marginalize) +#' over the discrete latent state N and calculate the probability of +#' all observations for a site jointly. #' -#' These are \code{nimbleFunction}s written in the format of user-defined -#' distributions for NIMBLE's extension of the BUGS model language. More -#' information can be found in the NIMBLE User Manual at -#' \href{https://r-nimble.org}{https://r-nimble.org}. +#' If one knows a reasonable range for summation over possible values +#' of N, the start and end of the range can be provided as \code{Nmin} +#' and \code{Nmax}. Otherwise one can set both to -1, in which case +#' values for \code{Nmin} and \code{Nmax} will be chosen based on the +#' 0.0001 and 0.9999 quantiles of the marginal distributions of each +#' count, using the minimum over counts of the former and the maximum +#' over counts of the latter. #' -#' When using these distributions in a \code{nimble} model, the left-hand side -#' will be used as \code{x}, and the user should not provide the \code{log} -#' argument. +#' The summation over N uses the efficient method given by Meehan et +#' al. (2017). +#' +#' These are \code{nimbleFunction}s written in the format of +#' user-defined distributions for NIMBLE's extension of the BUGS model +#' language. More information can be found in the NIMBLE User Manual +#' at \href{https://r-nimble.org}{https://r-nimble.org}. +#' +#' When using these distributions in a \code{nimble} model, the +#' left-hand side will be used as \code{x}, and the user should not +#' provide the \code{log} argument. #' #' For example, in \code{nimble} model code, #' @@ -64,15 +75,16 @@ #' prob[i, 1:T], #' Nmin, Nmax, T)} #' -#' declares that the \code{observedCounts[i, 1:T]} (observed -#' counts for site \code{i}, for example) vector follows an -#' N-mixture model distribution with parameters as indicated, -#' assuming all the parameters have been declared elsewhere in the -#' model. As above, \code{lambda[i]} is the mean of the abundance distribution at site i, -#' \code{prob[i, 1:T]} is a vector of detection probabilities at site i, and -#' \code{T} is the number of observation occasions. This will invoke -#' (something like) the following call to \code{dNmixture_v} when -#' \code{nimble} uses the model such as for MCMC: +#' declares that the \code{observedCounts[i, 1:T]} (observed counts +#' for site \code{i}, for example) vector follows an N-mixture +#' distribution with parameters as indicated, assuming all the +#' parameters have been declared elsewhere in the model. As above, +#' \code{lambda[i]} is the mean of the abundance distribution at site +#' i, \code{prob[i, 1:T]} is a vector of detection probabilities at +#' site i, and \code{T} is the number of observation occasions. This +#' will invoke (something like) the following call to +#' \code{dNmixture_v} when \code{nimble} uses the model such as for +#' MCMC: #' #' \code{dNmixture_v(observedCounts[i, 1:T], lambda[i], #' prob[i, 1:T], @@ -82,7 +94,7 @@ #' needs to generate a random draw for \code{observedCounts[1:T]}, it #' will make a similar invocation of \code{rNmixture_v}, with \code{n = 1}. #' -#' If the observation probabilities are occasion-independent, one would use: +#' If the observation probabilities are visit-independent, one would use: #' #' \code{observedCounts[1:T] ~ dNmixture_s(observedCounts[i, 1:T], lambda[i], #' prob[i], @@ -103,6 +115,7 @@ #' D. Turek, P. de Valpine and C. J. Paciorek. 2016. Efficient Markov chain Monte #' Carlo sampling for hierarchical hidden Markov models. Environmental and #' Ecological Statistics 23:549–564. DOI 10.1007/s10651-016-0353-z +#' #' T. Meehan, N. Michel and H. Rue. 2017. Estimating animal abundance with N-mixture #' models using the R-INLA package for R. arXiv. arXiv:1705.01581 #' diff --git a/R/dOcc.R b/R/dOcc.R index 88638cd..d3eac2a 100644 --- a/R/dOcc.R +++ b/R/dOcc.R @@ -1,5 +1,5 @@ -#' Occupancy distribution suitable for use in code{nimble} models +#' Occupancy distribution suitable for use in \code{nimble} models #' #' \code{dOcc_*} and \code{rOcc_*} provide occupancy model #' distributions that can be used directly from R or in \code{nimble} @@ -29,7 +29,7 @@ #' in \code{nimble} hierarchical models (via \code{\link[nimble]{nimbleCode}} #' and \code{\link[nimble]{nimbleModel}}). #' -#' The probability (or likelihood) of observation vector \code{x} depends on +#' The probability of observation vector \code{x} depends on #' occupancy probability, \code{probOcc}, and detection probability, #' \code{probDetect} or \code{probDetect[t]}. #' From b6f07873ab8d3232437a6fbe2923fa247e7d61f3 Mon Sep 17 00:00:00 2001 From: dochvam Date: Tue, 18 Feb 2020 20:57:13 -0800 Subject: [PATCH 15/20] Add new CJS error enforcing initial capture --- R/dCJS.R | 6 ++++++ tests/testthat/test-CJS.R | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/R/dCJS.R b/R/dCJS.R index ef79607..b2e3b78 100644 --- a/R/dCJS.R +++ b/R/dCJS.R @@ -148,6 +148,8 @@ dCJS_ss <- nimbleFunction( if (len != 0) { if (len != length(x)) stop("Argument len must match length of data, or be 0.") } + if (x[1] != 1) stop("dCJS requires specifying first capture. x[1] must equal 1.") + ## Note the calculations used here are actually in hidden Markov model form. probAliveGivenHistory <- 1 ## logProbData will be the final answer @@ -191,6 +193,7 @@ dCJS_sv <- nimbleFunction( if (len != length(x)) stop("Argument len must match length of data, or be 0.") } if (length(x) != length(probCapture)) stop("Length of probCapture does not match length of data.") + if (x[1] != 1) stop("dCJS requires specifying first capture. x[1] must equal 1.") ## Note the calculations used here are actually in hidden Markov model form. probAliveGivenHistory <- 1 @@ -238,6 +241,7 @@ dCJS_vs <- nimbleFunction( } if (length(probSurvive) < length(x) - 1) stop("Length of probSurvive must be at least length of data minus 1.") + if (x[1] != 1) stop("dCJS requires specifying first capture. x[1] must equal 1.") ## Note the calculations used here are actually in hidden Markov model form. probAliveGivenHistory <- 1 @@ -290,6 +294,8 @@ dCJS_vv <- nimbleFunction( if (length(probSurvive) < length(x) - 1) stop("Length of probSurvive must be at least length of data minus 1.") if (length(x) != length(probCapture)) stop("Length of probCapture does not match length of data.") + if (x[1] != 1) stop("dCJS requires specifying first capture. x[1] must equal 1.") + ## Note the calculations used here are actually in hidden Markov model form. probAliveGivenHistory <- 1 ## logProbData will be the final answer diff --git a/tests/testthat/test-CJS.R b/tests/testthat/test-CJS.R index 8a57963..b649965 100644 --- a/tests/testthat/test-CJS.R +++ b/tests/testthat/test-CJS.R @@ -417,6 +417,10 @@ test_that("dCJS errors", { expect_error( dCJS_ss(x = c(1,0,1,0,0), probCapture = 0.4, probSurvive = 0.5, len = 3) ) + expect_error( + dCJS_ss(x = c(0,0,1,0,0), probCapture = 0.4, probSurvive = 0.5) + ) + # dCJS_sv error checks expect_error( @@ -425,6 +429,11 @@ test_that("dCJS errors", { expect_error( dCJS_vs(x = c(1,0,1,0,0), probCapture = 0.1, probSurvive = c(0.9, 0.9, 0.4)) ) + expect_error( + dCJS_vs(x = c(0,0,1,0,0), probCapture = 0.1, probSurvive = c(0.9, 0.9, 0.4, 0.4)) + ) + + # dCJS_vs error checks expect_error( @@ -433,6 +442,9 @@ test_that("dCJS errors", { expect_error( dCJS_sv(x = c(1,0,1,0,0), probCapture = c(1,0.9, 0.9, 0.4), probSurvive = 0.8) ) + expect_error( + dCJS_sv(x = c(0,0,1,0,0), probCapture = c(1,0.9, 0.9, 0.4, 0.4), probSurvive = 0.1) + ) # dCJS_vv error checks expect_error( @@ -445,6 +457,11 @@ test_that("dCJS errors", { dCJS_vv(x = c(1,0,1,0,0), probCapture = c(1,0,1,0,0), probSurvive = c(0.9, 0.9, 0.1, 0.1), len = 2) ) + expect_error( + dCJS_vv(x = c(0,0,1,0,0), probCapture = c(1,0,1,0,0), + probSurvive = c(0.9, 0.9, 0.1, 0.1)) + ) + ### Compiled errors CdCJS_ss <- compileNimble(dCJS_ss) @@ -455,6 +472,10 @@ test_that("dCJS errors", { expect_error( CdCJS_ss(x = c(1,0,1,0,0), probCapture = 0.4, probSurvive = 0.5, len = 3) ) + expect_error( + CdCJS_ss(x = c(0,0,1,0,0), probCapture = 0.4, probSurvive = 0.5) + ) + # dCJS_sv error checks expect_error( @@ -463,6 +484,10 @@ test_that("dCJS errors", { expect_error( CdCJS_vs(x = c(1,0,1,0,0), probCapture = 0.1, probSurvive = c(0.9, 0.9, 0.4)) ) + expect_error( + CdCJS_vs(x = c(0,0,1,0,0), probCapture = 0.1, probSurvive = c(0.9, 0.9, 0.4, 0.4)) + ) + # dCJS_vs error checks expect_error( @@ -471,6 +496,10 @@ test_that("dCJS errors", { expect_error( CdCJS_sv(x = c(1,0,1,0,0), probCapture = c(1,0.9, 0.9, 0.4), probSurvive = 0.8) ) + expect_error( + CdCJS_sv(x = c(0,0,1,0,0), probCapture = c(1,0.9, 0.9, 0.4, 0.4), probSurvive = 0.1) + ) + # dCJS_vv error checks expect_error( @@ -483,5 +512,10 @@ test_that("dCJS errors", { CdCJS_vv(x = c(1,0,1,0,0), probCapture = c(1,0,1,0,0), probSurvive = c(0.9, 0.9, 0.1, 0.1), len = 2) ) + expect_error( + CdCJS_vv(x = c(0,0,1,0,0), probCapture = c(1,0,1,0,0), + probSurvive = c(0.9, 0.9, 0.1, 0.1)) + ) + }) From fa402ca9d4d12b452b12d6f3255086581f57e540 Mon Sep 17 00:00:00 2001 From: dochvam Date: Tue, 18 Feb 2020 20:57:49 -0800 Subject: [PATCH 16/20] Add .Rmd vignette file to repo but not to build; remove future-doc --- .Rbuildignore | 1 + R/dNmixture.R | 4 +- inst/doc/Introduction_to_nimbleEcology.Rmd | 479 +++++++++++++++++++++ inst/future_doc/Nmixture_section.txt | 37 -- 4 files changed, 482 insertions(+), 39 deletions(-) create mode 100644 inst/doc/Introduction_to_nimbleEcology.Rmd delete mode 100644 inst/future_doc/Nmixture_section.txt diff --git a/.Rbuildignore b/.Rbuildignore index c587e9e..7cb32fa 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -7,3 +7,4 @@ run_tests\.R ^\.travis\.yml$ ^doc$ ^Meta$ +\.Rmd$ diff --git a/R/dNmixture.R b/R/dNmixture.R index 2043622..c817b4c 100644 --- a/R/dNmixture.R +++ b/R/dNmixture.R @@ -37,7 +37,7 @@ #' is a scalar, independent of visit, so \code{prob[i]} should be #' replaced with \code{prob} above. With \code{dNmixture_v}, #' detection probability is a vector, with one element for each visit, -#' as written above.. +#' as written above. #' #' For more explanation, see #' \href{../doc/Introduction_to_nimbleEcology.html}{package vignette} (or @@ -59,7 +59,7 @@ #' #' The summation over N uses the efficient method given by Meehan et #' al. (2017). -#' +#' #' These are \code{nimbleFunction}s written in the format of #' user-defined distributions for NIMBLE's extension of the BUGS model #' language. More information can be found in the NIMBLE User Manual diff --git a/inst/doc/Introduction_to_nimbleEcology.Rmd b/inst/doc/Introduction_to_nimbleEcology.Rmd new file mode 100644 index 0000000..b675507 --- /dev/null +++ b/inst/doc/Introduction_to_nimbleEcology.Rmd @@ -0,0 +1,479 @@ +--- +title: "Introduction to nimbleEcology" +author: "Perry de Valpine and Ben Goldstein" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Introduction_to_nimbleEcology} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" + ,eval = TRUE ## uncomment this to build quickly without running code. +) +``` + + + +Welcome to `nimbleEcology`. This package provides distributions that can be used in NIMBLE models for common ecological model components. These include: + +- Cormack-Jolly-Seber (CJS) capture-recapture models. +- Dynamic hidden Markov models (DHMMs), which are used in multi-state and multi-event capture-recapture models. +- Occupancy models. +- Dynamic occupancy models. + +We expect to add N-mixture models in the future. + +# What is nimble? + +NIMBLE is a system for writing hierarchical statistical models and algorithms. It is distributed as R package [nimble](https://CRAN.R-project.org/package=nimble). NIMBLE stands for "Numerical Inference for statistical Models using Bayesian and Likelihood Estimation". NIMBLE includes: + +1. A dialect of the BUGS model language that is extensible. NIMBLE uses almost the same model code as WinBUGS, OpenBUGS, and JAGS. Being "extensible" means that it is possible to write new functions and distributions and use them in your models. + +2. An algorithm library including Markov chain Monte Carlo (MCMC) and other methods. + +3. A compiler that generates C++ for each model and algorithm, compiles the C++, and lets you use it from R. You don't need to know anything about C++ to use nimble. + +More information about NIMBLE can be found at [https://r-nimble.org](https://r-nimble.org). + +The paper that describes NIMBLE is [here](https://www.tandfonline.com/doi/full/10.1080/10618600.2016.1172487). + +### Getting help + +The best way to seek user support is the nimble-users list. Information on how to join can be found at [https://r-nimble.org](https://r-nimble.org). + +# The concept of using new distributions for ecological model components. + +The distributions provided in `nimbleEcology` let you simplify model code and the algorithms that use it, such as MCMC. For the ecological models in `nimbleEcology`, the simplification comes from removing some discrete latent states from the model and instead doing the corresponding probability (or likelihood) calculations in a specialized distribution. + +For each of the ecological model components provided by `nimbleEcology`, here are the discrete latent states that are replaced by use of a specialized distribution: + +- CJS (basic capture-recapture): Latent alive-or-dead states. +- HMMs: Latent individual state, such as location or breeding status, as well as alive-or-dead. +- Occupancy: Latent occupancy status of a site. +- Dynamic occupancy: latent occupancy status of a site at a time. + + +Before going further, let's illustrate how `nimbleEcology` can be used for a basic occupancy model. + +## Illustration: A simple occupancy model + +Occupancy models are used for data from repeated visits to a set of sites, where the detection (1) or non-detection (0) of a species of interest is recorded on each visit. Define `y[i, j]` as the observation from site `i` on visit `j`. `y[i, j]` is 1 if the species was seen and 0 if not. + +Typical code for for an occupancy model would be as follows. Naturally, this is written for `nimble`, but the same code should work for JAGS or BUGS (WinBUGS or OpenBUGS) when used as needed for those packages. + +```{r, results='hide', messages=FALSE,warnings=FALSE} +library(nimble) +library(nimbleEcology) +``` + +```{r} +occupancy_code <- nimbleCode({ + psi ~ dunif(0,1) + p ~ dunif (0,1) + for(i in 1:nSites) { + z[i] ~ dbern(psi) + for(j in 1:nVisits) { + y[i, j] ~ dbern(z[i] * p) + } + } +}) +``` + +In this code: + +- `psi` is occupancy probability; +- `p` is detection probability; +- `z[i]` is the latent state of whether a site is really occupied (`z[i]` = 1) or not (`z[i]` = 0); +- `nSites` is the number of sites; and +- `nVisits` is the number of sampling visits to each site. + +The new version of this model using `nimbleEcology`'s specialized occupancy distribution will only work in `nimble` (not JAGS or BUGS). It is: +```{r} +occupancy_code_new <- nimbleCode({ + psi ~ dunif(0,1) + p ~ dunif (0,1) + for(i in 1:nSites) { + y[i, 1:nVisits] ~ dOcc_s(probOcc = psi, probDetect = p, len = nVisits) + } +}) +``` + +In the new code, the vector of data from all visits to site `i`, namely `y[i, 1:nVisits]`, has its likelihood contribution calculated in one step, `dOcc_s`. This occupancy distribution calculates the total probability of the data by summing over the cases that the site is occupied or unoccupied. That means that `z[i]` is not needed in the model, and MCMC will not need to sample over `z[i]`. Details of all calculations, and discussion of the pros and cons of changing models in this way, are given later this vignette. + +The `_s` part of `dOcc_s` means that `p` is a scalar, i.e. it does not vary with visit. If it should vary with visit, a condition sometimes described as being time-dependent, it would need to be provided as a vector, and the distribution function should be `dOcc_v`. + +## MCMC with both versions of the example occupancy model. + +We can run an MCMC for this model in the following steps: + +1. Build the model. +2. Configure the MCMC. +3. Build the MCMC. +4. Compile the model and MCMC. +5. Run the MCMC. +6. Extract the samples. + +The function `nimbleMCMC` does all of these steps for you. The function `runMCMC` does steps 5-6 for you, with convenient management of options such as discarding burn-in samples. The full set of steps allows great control over how you use a model and configure and use an MCMC. We will go through the steps 1-4 and then use `runMCMC` for steps 5-6. + +In this example, we also need simulated data. We can use the same model to create simulated data, rather than writing separate R code for that purpose. + +#### Build the model + +```{r} +occupancy_model <- nimbleModel(occupancy_code, + constants = list(nSites = 50, nVisits = 5)) +``` + +#### Simulate data + +```{r} +occupancy_model$psi <- 0.7 +occupancy_model$p <- 0.15 +simNodes <- occupancy_model$getDependencies(c("psi", "p"), self = FALSE) +occupancy_model$simulate(simNodes) +occupancy_model$z +head(occupancy_model$y, 10) ## first 10 rows +occupancy_model$setData('y') ## set "y" as data +``` + +#### Configure and build the MCMC + +```{r} +MCMCconf <- configureMCMC(occupancy_model) +MCMC <- buildMCMC(occupancy_model) +``` + +#### Compile the model and MCMC + +```{r} +## These can be done in one step, but many people +## find it convenient to do it in two steps. +Coccupancy_model <- compileNimble(occupancy_model) +CMCMC <- compileNimble(MCMC, project = occupancy_model) +``` + +#### Run the MCMC and extract the samples + +```{r, results='hide', messages=FALSE,warnings=FALSE} +samples <- runMCMC(CMCMC, niter = 10000, nburnin = 500, thin = 10) +``` + +### Do it all for the new version of the model + +Next we show all of the same steps, except for simulating data, using the new version of the model. + +```{r, results='hide', messages=FALSE,warnings=FALSE} +occupancy_model_new <- nimbleModel(occupancy_code_new, + constants = list(nSites = 50, nVisits = 5), + data = list(y = occupancy_model$y), + inits = list(psi = 0.7, p = 0.15)) +MCMC_new <- buildMCMC(occupancy_model_new) ## This will use default call to configureMCMC. +Coccupancy_model_new <- compileNimble(occupancy_model_new) +CMCMC_new <- compileNimble(MCMC_new, project = occupancy_model_new) +samples_new <- runMCMC(CMCMC_new, niter = 10000, nburnin = 500, thin = 10) +``` + +We see that the results of the two versions match closely. + +```{r, echo = FALSE, results='hide', messages=FALSE,warnings=FALSE} +library(tidyverse) + +S1 <- gather(as.data.frame(samples), key = "Parameter", value = "Value") +S2 <- gather(as.data.frame(samples_new), key = "Parameter", value = "Value") +Scombo <- bind_rows(list(familiar = S1, new = S2), .id = 'Version') +library(ggplot2) +ggplot(data = Scombo, aes(x = Value)) + + geom_density(aes(group = Version, color = Version)) + + facet_grid(cols = vars(Parameter)) + +# { +# plot(density(as.data.frame(samples)$psi), col = "red", main = "psi") +# points(density(as.data.frame(samples_new)$psi), col = "blue", type = "l", add = TRUE) +# } +# +# { +# plot(density(as.data.frame(samples)$psi), col = "red", main = "psi") +# points(density(as.data.frame(samples_new)$psi), col = "blue", type = "l", add = TRUE) +# } + +``` + +The posterior density plots show that the familiar, conventional version of the model yields the same posterior distribution as the new version, which uses `dOcc_s`. + +## Use the new version for something different: maximum likelihood or maximum a posteriori estimation. + +It is useful that the new way to write the model does not have discrete latent states. Since this example also does not have other latent states or random effects, we can use it simply as a likelihood or posterior density calculator. More about how to do so can be found in the nimble User Manual. Here we illustrate making a compiled `nimbleFunction` for likelihood calculations for parameters `psi` and `p` and maximizing the likelihood using R's `optim`. + +```{r} +CalcLogLik <- nimbleFunction( + setup = function(model, nodes) + calcNodes <- model$getDependencies(nodes, self = FALSE), + run = function(v = double(1)) { + values(model, nodes) <<- v + return(model$calculate(calcNodes)) + returnType(double(0)) + } +) +OccLogLik <- CalcLogLik(occupancy_model_new, c("psi", "p")) +COccLogLik <- compileNimble(OccLogLik, project = occupancy_model_new) +optim(c(0.5, 0.5), COccLogLik$run, control = list(fnscale = -1))$par +``` + +# Distributions provided in `nimbleEcology` + +In this section, we introduce each of the `nimbleEcology` distributions in more detail. We will summarize the calculations using mathematical notation and then describe how to use the distributions in a `nimble` model. + +### A note on static typing + +Some distribution names are followed by a suffix indicating the type of some parameters, for example the `_s` in `dOcc_s`. NIMBLE uses a static typing system, meaning that a function must know in advance if a certain argument will be a scalar, vector, or matrix. There may be notation such as `sv` or `svm` if there are two or three parameters that can be time-independent (`s`) or time-dependent (`v` or `m`) in one or more dimensions. In general, `s` corresponds to a scalar argument, `v` to a vector argument, and `m` to a matrix argument. The order of these type indicators will correspond to the order of the relevant parameters, but always check the documentation when using a new distribution with the R function `?` (e.g., both `?dOcc` and `?dOcc_s` work). + +The static typing requirement may be relaxed somewhat in the future. + + +## Capture-recapture (dCJS) + +Cormack-Jolly-Seber models give the probability of a capture history for each of many individuals, conditional on first capture, based on parameters for survival and detection probability. `nimbleEcology` provides a distribution for individual capture histories, with variants for time-independent and time-dependent survival and/or detection probability. Of course, the survival and detection parameters for the CJS probability may themselves depend on other parameters and/or random effects. The rest of this summary will focus on one individual's capture history. + +Define $\phi_t$ as survival from time $t$ to $t+1$ and $\mathbf{\phi} = (\phi_1, \ldots, \phi_T)$, where $T$ is the length of the series. We use "time" and "sampling occasion" as synonyms in this context, so $T$ is the number of sampling occasions. (Be careful with indexing. Sometimes you might see $\phi_t$ defined as survival from time $t-1$ to $t$.) Define $p_t$ as detection probability at time $t$ and $\mathbf{p} = (p_1, \ldots, p_T)$. Define the capture history as $\mathbf{y} = y_{1:T} = (y_1, \ldots, y_T)$, where each $y_t$ is 0 or 1. The notation $y_{i:j}$ means the sequence of observations from time $i$ to time $j$. The first observation, $y_0 = 1$, **is included** in $\mathbf{y}$. + +There are multiple ways to write the CJS probability. We will do so in a state-space format because that leads to the more general DHMM case next. The probability of observations given parameters, $P(\mathbf{y} | \mathbf{\phi}, \mathbf{p})$, is factored as: +\[ +P(\mathbf{y} | \mathbf{\phi}, \mathbf{p}) = \prod_{t = 1}^T P(y_{t} | y_{1:t-1}, \mathbf{\phi}, \mathbf{p}) +\] + +Each factor $P(y_{t} | y_{1:t-1}, \mathbf{\phi}, \mathbf{p})$ is calculated as: +\[ +P(y_{t} | y_{1:t-1}, \mathbf{\phi}, \mathbf{p}) = I(y_t = 1) (A_t p_t) + I(y_t = 0) (A_t (1-p_t) + (1-A_t)) +\] +The indicator function $I(y_t = 1)$ is 1 if it $y_t$ is 1, 0 otherwise, and vice versa for $I(y_t = 0)$. Here $A_t$ is the probability that the individual is alive at time $t$ given $y_{1:t-1}$, the data up to the previous time. This is calculated as: +\[ +A_t = G_{t-1} \phi_{t-1} +\] +where $G_{t}$ is the probability that the individual is alive at time $t$ given $y_{1:t}$, the data up to the current time. This is calculated as: +\[ +G_{t} = I(y_t = 1) 1 + I(y_t = 0) \frac{A_t (1-p_t)}{A_t (1-p_t) + (1-A_t)} +\] +The sequential calculation is initialized with $G_0 = 1$. For time step $t$, we calculate $A_t$, then $P(y_t | y_{1:t-1}, \mathbf{\phi}, \mathbf{p})$, then $G_t$, leaving us ready for time step $t+1$. This is a trivial hidden Markov model where the latent state, alive or dead, is not written explicitly. + +In the cases with time-independent survival or capture probability, we simply drop the time indexing for the corresponding parameter. + +### CJS distributions in `nimbleEcology` + +CJS models are available in four distributions in `nimbleEcology`. These differ only in whether survival probability and/or capture probability are time-dependent or time-independent, yielding four combinations: + +- `dCJS_ss`: Both are time-independent (scalar). +- `dCJS_sv`: Survival is time-independent (scalar). Capture probability is time-dependent (vector). +- `dCJS_vs`: Survival is time-dependent (vector). Capture probability is time-independent (scalar). +- `dCJS_vv`: Both are time-dependent (vector). + +The usage for each is similar. An example for `dCJS_vs` is: + +`y[i, 1:T] ~ dCJS_sv(probSurvive = phi, probCapture = p[i, 1:T], len = T)` + +Note the following points: + +- `y[i, 1:T]` is a vector of capture history. It is written as if `i` indexes individual, but it could be any vector in any variable in the model. +- Arguments to `dCJS_sv` are named. As in R, this is optional but helpful. Without names, the order matters. +- `probSurvive` is provided as a scalar value, assuming there is a variable called `phi`. +- In variants where `probSurvive` is a vector (`dOcc_vs` and `dOcc_vv`), the $t^{\mbox{th}}$ element of `probSurvive` is $phi_t$ above, namely the probability of survival from occasion $t$ to $t+1$. +- `probCapture` is provided as a vector value, assuming there is a matrix variable called `p`. The value of `probCapture` could be any vector from any variable in the model. +- `probCapture[t]` (i.e., the $t^{\mbox{th}}$ element of `probCapture`, which is `p[i, t]` in this example) is $p_t$ above, namely the probability of capture, if alive, at time $t$. +- The length parameter `len` is in some cases redundant (the information could be determined by the length of other inputs), but nevertheless it is required. + +## Hidden Markov models (HMMs) and Dynamic hidden Markov models (DHMMs) + +Hidden Markov models give the probability of detection history that can handle: + +- Detections in different states, such as locations or breeding states, +- Incorrect state information, such as if breeding state might not be observed correctly, +- Probabilities of transitions between states. + +In a HMM, "dead" is simply another state, and "unobserved" is a possible observation. Thus, HMMs are generalizations of the CJS model. In capture-recapture, HMMs encompass multi-state and multi-event models. + +We again use the factorization +\[ +P(\mathbf{y} | \mathbf{\phi}, \mathbf{p}) = \prod_{t = 1}^T P(y_{t} | y_{1:t-1}, \mathbf{\phi}, \mathbf{p}) +\] +In the case of a HMM, $y_t$ is the recorded state at time $t$, taking an integer value, with the possibility that one possible value corresponds to "unobserved". Define $S$ as the number of possible states and $K$ as the number of possible observed states. Observed states need not correspond to real states. For example, often there is an observed state for "unobserved". Another example is that two real states might never be distinguishable in observations, so they may correpond to only one observed state. + +Define $A_{i, t}$ as the probability that the individual is in state $i$ at time $t$, given $y_{1:t-1}$, the data up to the previous time. Define $p_{i,j,t}$ as the probability that, at time $t$, an individual in state $i$ is observed in state $j$. Then $P(y_{t} | y_{1:t-1}, \mathbf{\phi}, \mathbf{p})$ is calculated as: +\[ +P(y_{t} | y_{1:t-1}, \mathbf{\phi}, \mathbf{p}) = \sum_{i=1}^S A_{i, t} p_{i, y_t, t} +\] +where $j = y_t$ is the observed state of the individual. + +Define $G_{i, t}$ as the probability that the individual is in state $i$ at time $t$ given $y_{1:t}$, the data up to the current time. Define $\psi_{i, j, t}$ as the probability that, from time $t$ to $t+1$, an individual in state $i$ transitions to state $j$. *Note that the time-step is different here ($t$ to $t+1$) than for the CJS model above (which uses $t-1$ to $t$).* Then the calculation of $A_{j,t}$ for each possible state $j$ is given by: +\[ +A_{j, t} = \sum_{i=1}^S G_{i, t-1} \psi_{i, j, t-1}, \mbox{ for } j=1,\ldots,S +\] + +Finally, calculation of $G_{i, t}$ for each possible state $i$ is done by conditioning on $y_t$ as follows: +\[ +G_{i, t} = \frac{A_{i, t} p_{i, y_t, t}}{P(y_{t} | y_{1:t-1}, \mathbf{\phi}, \mathbf{p})}, \mbox{ for } j=1,\ldots,S +\] + +The calculation of the total probability of a detection history with uncertain state information starts by initializing $A_{i, 1}$ to some choice of initial (or prior) probability of being in each state before any observations are made. Then, starting with $t = 1$, we calculate $P(y_{t} | y_{1:t-1}, \mathbf{\phi}, \mathbf{p})$. If $t < T$, we prepare for time step $t+1$ by calculating $G_{i, t}$ for each $i$ followed by $A_{i, t+1}$ for each $i$. + +If the transition probabilities, $\psi_{i, j, t}$, actually depend on $t$, we refer to the model as "Dynamic", namely a DHMM instead of an HMM. It may also be the case that observation probabilities $p_{i, j, t}$ depend on $t$ or not. + +## HMM and DHMM distributions in `nimbleEcology` + +HMMs and DHMMs are available in four distributions in `nimbleEcology`. These differ only in whether transition and/or observation probabilities are time-dependent or time-independent, yielding four combinations: + +- `dHMM`: Both are time-independent. +- `dDHMM`: State transitions are time-dependent (dynamic). Observation probabilities are time-independent. +- `dHMMo`: Observation probabilities are time-dependent. State transitions are time-independent (not dynamic). +- `dDHMMo`: Both are time-dependent. + +In this notation, the leading `D` is for "dynamic" (time-dependent state transitions), while the trailing "o" is for "observations" being time-dependent. + +The usage for each is similar. An example for `dDHMM` is: + +`y[i, 1:T] ~ dDHMM(init = initial_probs[i, 1:T], obsProb = p[1:nStates, 1:nCat], transProb = Trans[i, 1:nStates, 1:nStates, 1:(T-1)], len = T)` + +Note the following points: + +- As above, this is written as if `i` indexes individuals in the model, but this is arbitrary as an example. +- `nStates` is $S$ above. +- `nCat` is $K$ above. +- `init[i]` is the initial probability of being in state `i`, namely $A_{i, 1}$ above. +- `obsProb[i, j]` (i.e., `p[i, j]` in this example) is probability of observing an individual who is truly in state `i` as being in observation state `j`. This is $p_{i, j}$ above if indexing by $t$ is not needed. If observation probabilities are time-dependent (in `dHMMo` and `dDHMMo`), then `obsProb[i, j, t]` is $p_{i, j, t}$ above. +- `transProb[i, j, t]` (i.e., `Trans[i, j, t]` in this example) is the probability that an individual changes from state `i` to state `j` during time step `t` to `t+1`. This is $\psi_{i,j,t}$ above. +- `len` is the length of the observation record, `T` in this example. +- `len` must match the length of the data, `y[i, 1:T]` in this example. +- The dimensions of `obsProb` must be $K \times S$ in the time-independent case (`dHMM` or `dDHMM`) or $K \times S \times T$ in the time-dependent case (`dHMMo` or `dDHMMo`). +- The dimensions of `transProb` must be $S \times S$ in the time-independent case (`dHMM` or `dHMMo`) or $S \times S \times (T-1)$ in the time-dependent case (`dDHMM` or `dDHMMo`). The last dimension is one less than $T$ because no transition to time $T+1$ is needed. + +## Occupancy + +An occupancy model gives the probability of a series of detection/non-detection records for a species during multiple visits to each of multiple site. The occupancy distributions in `nimbleEcology` give the probability of the detection history for one site, so this summary focuses on data from one site. + +Define $y_t$ to be the observation at time $t$, with $y_t = 1$ for a detection and $y_t = 0$ for a non-detection. Again, we use "time" as a synonym for "sampling occasion". Again, define the vector of observations as $\mathbf{y} = (y_1, \ldots, y_T)$, where $T$ is the number of sampling occasions. + +Define $\psi$ as the probability that a site is occupied. Define $p_t$ as the probability of a detection on sampling occasion $t$ if the site is occupied, and $\mathbf{p} = (p_1, \ldots, p_T)$. Then the probability of the data given the parameters is: +\[ +P(\mathbf{y} | \psi, \mathbf{p}) = \psi \prod_{t = 1}^T p_t^{y_t} (1-p_t)^{1-y_t} + (1-\psi) I\left(\sum_{t=1}^T y_t= 0 \right) +\] +The indicator function usage in the last term, $I(\cdot)$, is 1 if the given summation is 0, i.e. if no detections were made. Otherwise it is 0. + +### Occupancy models in `nimbleEcology` + +Occupancy models are available in two distributions in `nimbleEcology`. These differ only in whether detection probability depends on time or not: + +- `dOcc_s`: Detection probability is time-independent (scalar). +- `dOcc_v`: Detection probability is time-dependent (vector). + +An example for `dOcc_v` is: + +`y[i, 1:T] ~ dOcc_v(probOcc = psi, probDetect = p[i, 1:T], len = T)` + +Note the following points: + +- This is written as if `i` indexes site, but the variables could be arranged in other ways. +- `y[i, 1:T]` is the detection record. +- `probOcc` is the probability of occupancy, $\psi$ above. +- `probDetect` is the vector of detection probabilities, $\mathbf{p}$ above. In the case of `dOcc_s`, `probDetect` would be a scalar. +- `len` is the length of the detection record. + +## Dynamic occupancy + +Dynamic occupancy models give the probability of detection records from multiple seasons (primary periods) in each of which there were multiple sampling occasions (secondary periods) at each of multiple sites. The dynamic occupancy distribution in `nimbleEcology` provides probability calculations for data from one site at a time. + +We will use "year" for primary periods and "time" or "sampling occasion" as above for secondary periods. Define $y_{r, t}$ as the observation (1 or 0) on sampling occasion $t$ of year $r$. Define $\mathbf{y}_r$ as the detection history in year $r$, i.e. $\mathbf{y}_r = (y_{r, 1}, \ldots, y_{r, T})$ . Define $\phi_t$ as the probability of being occupied at time $t+1$ given the site was occupied at time $t$, called "persistence". Define $\gamma_t$ as the probability of being occupied at time $t+1$ given the site was unoccupied at time $t$, called "colonization". Define $p_{r, t}$ as the detection probability on sampling occasion $t$ of year $r$ given the site is occupied. + +The probability of all the data given parameters is: +\[ +P(\mathbf{y} | \mathbf{\phi}, \mathbf{\gamma}, \mathbf{p}) = \prod_{r = 1}^R P(\mathbf{y}_{r} | \mathbf{y}_{1:r-1}, \mathbf{\phi}, \mathbf{\gamma}, \mathbf{p}) +\] +Each factor $P(\mathbf{y}_{r} | \mathbf{y}_{1:r-1}, \mathbf{\phi}, \mathbf{\gamma}, \mathbf{p})$ is calculated as: +\[ +P(\mathbf{y}_{r} | \mathbf{y}_{1:r-1}, \mathbf{\phi}, \mathbf{\gamma}, \mathbf{p}) = A_{r} \prod_{t = 1}^T p_{r,t}^{y_{r,t}} (1-p_{r,t})^{1-y_{r,t}} + (1-A_{r}) I\left(\sum_{t=1}^T y_{r,t} = 0 \right) +\] +Here $A_r$ is the probability that the site is occupied in year $r$ given observations up to the previous year $\mathbf{y}_{1:r-1}$. Otherwise, this equation is just like the occupancy model above, except there are indices for year $r$ in many places. $A_r$ is calculated as: +\[ +A_r = G_{r-1} \phi_{r-1} + (1-G_{r-1}) \gamma_{r-1} +\] +Here $G_r$ is the probability that the site is occupied given the data up to time $r$, $\mathbf{y}_{1:r}$. This is calculated as +\[ +G_r = \frac{A_{r} \prod_{t = 1}^T p_{r,t}^{y_{r,t}} (1-p_{r,t})^{1-y_{r,t}}}{\mathbf{y}_{r} | \mathbf{y}_{1:r-1}, \mathbf{\phi}, \mathbf{\gamma}, \mathbf{p})} +\] + +The sequential calculation is initiated with $A_1$, which is natural to think of as "$\psi_1$", probability of occupancy in the first year. Then for year $r$, starting with $r = 1$, we calculate $P(\mathbf{y}_{r} | \mathbf{y}_{1:r-1}, \mathbf{\phi}, \mathbf{\gamma}, \mathbf{p})$. If $r < R$, we calculate $G_r$ and then $A_{r+1}$, leaving us ready to increment $r$ and iterate. + +### Dynamic occupancy models in `nimbleEcology` + +Dynamic occupancy models are available in twelve distributions in `nimbleEcology`. These differ in whether persistence and/or colonization probabilities are time-dependent, with a "s" (time-independent) and "v" (time-dependent) notation similar to the distributions above. The distributions are named by `dDynOcc_` followed by three letters. Each letter indicates the typing (or dimension) of the persistence, colonization, and detection probabilities, respectively: + +- `dDynOcc_s**` functions take time-independent (scalar) persistence probabilities, while `dDynOcc_v**`functions take time-dependent (vector) persistence probabilities +- `dDynOcc_*s*` functions take time-independent (scalar) colonization probabilities, while `dDynOcc_*v*`functions take time-dependent (vector) colonization probabilities +- `dDynOcc_**s` functions take time-independent (scalar) observation probabilities, while `dDynOcc_**v` functions take observation probabilities dependent on time step (vector) and `dDynOcc_**m` functions take observation probabilities dependent on both time step and observation event (matrix) + +Expanding these typing possibilities gives $2 \times 2 \times 3 = 12$ total functions: + +- `dDynOcc_sss` +- `dDynOcc_svs` +- `dDynOcc_vss` +- `dDynOcc_vvs` +- `dDynOcc_ssv` +- `dDynOcc_svv` +- `dDynOcc_vsv` +- `dDynOcc_vvv` +- `dDynOcc_ssm` +- `dDynOcc_svm` +- `dDynOcc_vsm` +- `dDynOcc_vvm` + +An example for `dDynOcc_svs` is: + +`y[i, 1:T] ~ dDynOcc_sv(init = psi1[i], probPersist = phi[i], probColonize = gamma[i, 1:T], p = p, len = T)` + +Note the following points: + +- As in the examples above, this is written as if `i` indexes the individual site, but the variables could be arranged in other ways. +- `y[i, 1:T]` is the detection record. +- `probPersist` is the probability of persistence, $\phi$ above. +- `probColonize` is the vector of detection probabilities, $\mathbf{\gamma}$ above. In the case of `dDynOcc_*s`, `probColonize` would be a scalar. +- `len` is the length of the detection record. +- `p` here is a single constant value of observation probability for all samples. If `p` changed with time or observation event, we would need to use a different function (`dDynOcc_**v` or `dDynOcc_**m`). + +## N-mixture + +An N-mixture model gives the probability of a set of counts from repeated visits to each of multiple sites. The N-mixture distribution in `nimbleEcology` gives probability calculations for data from one site. + +Define $y_t$ as the number of individuals counted at the site on sampling occasion (time) $t$. Define $\mathbf{y} = (y_1, \ldots, y_t)$. Define $\lambda$ as the average density of individuals, such that the true number of individuals, $N$, follows a Poisson distribution with mean $\lambda$. Define $p_t$ to be the detection probability for each individual at time $t$, and $\mathbf{p} = (p_1, \ldots, p_t)$. + +The probability of the data given the parameters is: +\[ +P(\mathbf{y} | \lambda, \mathbf{p}) = \sum_{N = 1}^\infty P(N | \lambda) \prod_{t = 1}^T P(y_t | N) +\] +where $P(N | \lambda)$ is a Poisson probability and $P(y_t | N)$ is a binomial probability. That is, $y_t \sim \mbox{binomial}(N, p_t)$, and the $y_t$s are independent. + +In practice, the summation over $N$ can start at a value greater than 0 and must be truncated at some value $\lt \infty$. Two options are provided for the range of summation: + +1. Start the summation at the largest value of $y_t$ (there must be at least this many individuals) and truncate it at a value of $maxN$ provided by the user. +2. The following heuristic can be used. + +If we consider a single $y_t$, then $N - y_t | y_t \sim \mbox{Poisson}(\lambda (1-p_t))$ (*See opening example of Royle and Dorazio*). Thus, a natural upper end for the summation range of $N$ would be $y_t$ plus a very high quantile of The $\mbox{Poisson}(\lambda (1-p_t))$ distribution. For a set of observations, a natural choice would be the maximum of such values across the observation times. We use the 0.99999 quantile to be conservative. + +Correspondingly, the summation can begin at smallest of the 0.00001 quantiles of $N | y_t$. If $p_t$ is small, this can be considerably larger than the maximum value of $y_t$, allowing more efficient computation. + +### N-mixture models in `nimbleEcology` + +N-mixture models are available in two distributions in `nimbleEcology`. They differ in whether probability of detection is visit-dependent (vector case, corresponding to `dNmixture_v`) or visit-independent (scalar, `dNmixture_s`). + +An example is: + +`y[i, 1:T] ~ dNmixture_v(lambda = lambda, p = p[1:T], minN = minN, maxN = maxN, len = T)` + +- `lambda` is $\lambda$ above. +- `p[1:T]` is $\mathbf{p}$ above. +- `len` is $T$. +- `minN` and `maxN` provide the lower and upper bounds for the sum over Ns (option 1 above). If both are set to `-1`, bounds are chosen dynamically using quantiles of the Poisson distribution (option 2 above). + + + diff --git a/inst/future_doc/Nmixture_section.txt b/inst/future_doc/Nmixture_section.txt deleted file mode 100644 index 5453a9b..0000000 --- a/inst/future_doc/Nmixture_section.txt +++ /dev/null @@ -1,37 +0,0 @@ -This contains draft text for when we add dNmixture to the package. - -## N-mixture - -An N-mixture model gives the probability of a set of counts from repeated visits to each of multiple sites. The N-mixture distribution in `nimbleEcology` gives probability calculations for data from one site. - -Define $y_t$ as the number of individuals counted at the site on sampling occasion (time) $t$. Define $\mathbf{y} = (y_1, \ldots, y_t)$. Define $\lambda$ as the average density of individuals, such that the true number of individuals, $N$, follows a Poisson distribution with mean $\lambda$. Define $p_t$ to be the detection probability for each individual at time $t$, and $\mathbf{p} = (p_1, \ldots, p_t)$. - -The probability of the data given the parameters is: -\[ -P(\mathbf{y} | \lambda, \mathbf{p}) = \sum_{N = 1}^\infty P(N | \lambda) \prod_{t = 1}^T P(y_t | N) -\] -where $P(N | \lambda)$ is a Poisson probability and $P(y_t | N)$ is a binomial probability. That is, $y_t \sim \mbox{binomial}(N, p_t)$, and the $y_t$s are independent. - -In practice, the summation over $N$ can start at a value greater than 0 and must be truncated at some value $\lt \infty$. Two options are provided for the range of summation: - -1. Start the summation at the largest value of $y_t$ (there must be at least this many individuals) and truncate it at a value of $N$ provided by the user. -2. The following heuristic can be used. - -If we consider a single $y_t$, then $N - y_t | y_t \sim \mbox{Poisson}(\lambda (1-p_t))$ (*See opening example of Royle and Dorazio*). Thus, a natural upper end for the summation range of $N$ would be $y_t$ plus a very high quantile of The $\mbox{Poisson}(\lambda (1-p_t))$ distribution. For a set of observations, a natural choice would be the maximum of such values across the observation times. We use the 0.99999 quantile to be conservative. - -Correspondingly, the summation can begin at smallest of the 0.00001 quantiles of $N | y_t$. If $p_t$ is small, this can be considerably larger than the maximum value of $y_t$, allowing more efficient computation. - -*Describe structural zeros.* - -### N-mixture models in `nimbleEcology` - -An example is: - -`y[i, 1:T] ~ dNmix(lambda = lambda, p = p[1:T], knownZero = 0, len = T)` - -- `lambda` is $\lambda$ above. -- `p[1:t]` is $\mathbf{p}$ above. -- `knownZero` is 1 if the `lambda` should be replaced with 0 and an easy answer can be returned without summing over $N$. -- `len` is $T$. - - From f840e1915b32884bf56602fdc744d38ac34a7ce1 Mon Sep 17 00:00:00 2001 From: dochvam Date: Wed, 19 Feb 2020 12:00:47 -0800 Subject: [PATCH 17/20] Update roxygen files --- man/dDHMM.Rd | 4 +- man/dDynOcc.Rd | 4 +- man/dHMM.Rd | 2 +- man/dNmixture.Rd | 110 ++++++++++++++++++++++++++--------------------- man/dOcc.Rd | 4 +- 5 files changed, 69 insertions(+), 55 deletions(-) diff --git a/man/dDHMM.Rd b/man/dDHMM.Rd index 8d04dfb..62a8523 100644 --- a/man/dDHMM.Rd +++ b/man/dDHMM.Rd @@ -5,7 +5,7 @@ \alias{dDHMMo} \alias{rDHMM} \alias{rDHMMo} -\title{Dynamic Hidden Markov Model distribution for use in NIMBLE models} +\title{Dynamic Hidden Markov Model distribution for use in \code{nimble} models} \usage{ dDHMM(x, init, probObs, probTrans, len, log = 0) @@ -52,7 +52,7 @@ For \code{rDHMM} and \code{rDHMMo}: a simulated detection history, \code{x}. } \description{ \code{dDHMM} and \code{dDHMMo} provide Dynamic hidden Markov model -distributions for NIMBLE models. +distributions for \code{nimble} models. } \details{ These nimbleFunctions provide distributions that can be used directly in R or diff --git a/man/dDynOcc.Rd b/man/dDynOcc.Rd index 44463a2..ec3ff78 100644 --- a/man/dDynOcc.Rd +++ b/man/dDynOcc.Rd @@ -26,7 +26,7 @@ \alias{rDynOcc_vss} \alias{rDynOcc_svs} \alias{rDynOcc_sss} -\title{Dynamic occupancy distribution for use in NIMBLE models +\title{Dynamic occupancy distribution for use in \code{nimble} models \code{dDynOcc_**} and \code{rDynOcc_**} provide dynamic occupancy model distributions that can be used directly from R or in \code{nimble} models.} @@ -122,7 +122,7 @@ of observation vector \code{x}. For \code{rDynOcc_***}: a simulated detection history, \code{x}. } \description{ -Dynamic occupancy distribution for use in NIMBLE models +Dynamic occupancy distribution for use in \code{nimble} models \code{dDynOcc_**} and \code{rDynOcc_**} provide dynamic occupancy model distributions that can be used directly from R or in \code{nimble} models. diff --git a/man/dHMM.Rd b/man/dHMM.Rd index 8df744f..5c8d6d5 100644 --- a/man/dHMM.Rd +++ b/man/dHMM.Rd @@ -5,7 +5,7 @@ \alias{dHMMo} \alias{rHMM} \alias{rHMMo} -\title{Hidden Markov Model distribution for use in NIMBLE models} +\title{Hidden Markov Model distribution for use in \code{nimble} models} \usage{ dHMM(x, init, probObs, probTrans, len = 0, log = 0) diff --git a/man/dNmixture.Rd b/man/dNmixture.Rd index cae9ce8..7e6e34a 100644 --- a/man/dNmixture.Rd +++ b/man/dNmixture.Rd @@ -6,7 +6,7 @@ \alias{dNmixture_v} \alias{rNmixture_s} \alias{rNmixture_v} -\title{N-mixture distribution for use in NIMBLE models} +\title{N-mixture distribution for use in \code{nimble} models} \usage{ dNmixture_v(x, lambda, prob, Nmin = -1, Nmax = -1, len, log = 0) @@ -17,22 +17,19 @@ rNmixture_v(n, lambda, prob, Nmin = -1, Nmax = -1, len) rNmixture_s(n, lambda, prob, Nmin = -1, Nmax = -1, len) } \arguments{ -\item{x}{count observation data. 0 and positive integers} +\item{x}{vector of integer counts from a series of sampling occasions.} \item{lambda}{expected value of the Poisson distribution of true abundance} -\item{prob}{observation probability for each X} +\item{prob}{detection probability (scalar for \code{dNmixture_s}, vector for \code{dNmixture_v}).} -\item{Nmin}{the minimum abundance to sum over. Set to -1 for distribution -to select based on lambda} +\item{Nmin}{minimum abundance to sum over for the mixture probability. Set to -1 to select automatically.} -\item{Nmax}{the maximum abundance to sum over. Set to -1 for distribution -to select based on lambda} +\item{Nmax}{maximum abundance to sum over for the mixture probability. Set to -1 to select automatically.} \item{len}{The length of the x vector} -\item{log}{TRUE or 1 to return log probability. FALSE or 0 to return probability. -Need not be specified in the model context.} +\item{log}{TRUE or 1 to return log probability. FALSE or 0 to return probability.} \item{n}{number of random draws, each returning a vector of length \code{len}. Currently only \code{n = 1} is supported, but the @@ -45,43 +42,58 @@ probability of observation vector \code{x}. For \code{rNmixture_s} and \code{rNmixture_v}: a simulated detection history, \code{x}. } \description{ -\code{dNmixture_s} and \code{dNmixture_v} provides dynamic occupancy model distributions for NIMBLE models. +\code{dNmixture_s} and \code{dNmixture_v} provide Poisson-Binomial mixture distributions of abundance ("N-mixture") for use in \code{nimble}a models. } \details{ -These nimbleFunctions provide distributions that can be used directly in R or -in \code{nimble} hierarchical models (via \code{\link[nimble]{nimbleCode}} -and \code{\link[nimble]{nimbleModel}}). - -N-mixture models describe abundance at a series of sites over many replicate observations. -The likelihood of an observation in site \code{s} at visit \code{t} depends on the -true abundance N, which is assumed to be drawn from a Poisson distribution with -mean \code{lambda}. It also depends on the probability of detecting an individual -(\code{prob} or \code{prob[t]}). The observed count \code{x[t]} has a probability -according to the Binomial distribution with size parameter N and probability \code{prob} +These nimbleFunctions provide distributions that can be + used directly in R or in \code{nimble} hierarchical models (via + \code{\link[nimble]{nimbleCode}} and + \code{\link[nimble]{nimbleModel}}). + +An N-mixture model defines a distribution for multiple counts +(typically of animals, typically made at a sequence of visits to +the same site). The latent number of animals available to be +counted, N, follows a Poisson distribution with mean \code{lambda}. +Each count, \code{x[i]} for visit \code{i}, #' follows a binomial +distribution with size (number of trials) N and probability of +success (being counted) \code{prob[i]} The distribution has two forms, \code{dNmixture_s} and -\code{dNmixture_v}. These differentiate whether the detection -probability \code{prob} is visit-dependent (a vector, \code{dNmixture_v}) -or visit-independent (a scalar, dNmixture_s). +\code{dNmixture_v}. With \code{dNmixture_s}, detection probability +is a scalar, independent of visit, so \code{prob[i]} should be +replaced with \code{prob} above. With \code{dNmixture_v}, +detection probability is a vector, with one element for each visit, +as written above. For more explanation, see \href{../doc/Introduction_to_nimbleEcology.html}{package vignette} (or \code{vignette("Introduction_to_nimbleEcology")}). Compared to writing \code{nimble} models with a discrete latent -state of abundance N and a separate scalar datum for each observation time, -use of these distributions allows one to directly sum (marginalize) over -the discrete latent state N and calculate the probability of all -observations for a site jointly. - -These are \code{nimbleFunction}s written in the format of user-defined -distributions for NIMBLE's extension of the BUGS model language. More -information can be found in the NIMBLE User Manual at -\href{https://r-nimble.org}{https://r-nimble.org}. - -When using these distributions in a \code{nimble} model, the left-hand side -will be used as \code{x}, and the user should not provide the \code{log} -argument. +state of abundance N and a separate scalar datum for each count, +use of these distributions allows one to directly sum (marginalize) +over the discrete latent state N and calculate the probability of +all observations for a site jointly. + +If one knows a reasonable range for summation over possible values +of N, the start and end of the range can be provided as \code{Nmin} +and \code{Nmax}. Otherwise one can set both to -1, in which case +values for \code{Nmin} and \code{Nmax} will be chosen based on the +0.0001 and 0.9999 quantiles of the marginal distributions of each +count, using the minimum over counts of the former and the maximum +over counts of the latter. + +The summation over N uses the efficient method given by Meehan et +al. (2017). + +These are \code{nimbleFunction}s written in the format of +user-defined distributions for NIMBLE's extension of the BUGS model +language. More information can be found in the NIMBLE User Manual +at \href{https://r-nimble.org}{https://r-nimble.org}. + +When using these distributions in a \code{nimble} model, the +left-hand side will be used as \code{x}, and the user should not +provide the \code{log} argument. For example, in \code{nimble} model code, @@ -89,25 +101,26 @@ For example, in \code{nimble} model code, prob[i, 1:T], Nmin, Nmax, T)} -declares that the \code{observedCounts[i, 1:T]} (observed -counts for site \code{i}, for example) vector follows an -N-mixture model distribution with parameters as indicated, -assuming all the parameters have been declared elsewhere in the -model. As above, \code{lambda[i]} is the mean of the abundance distribution at site i, -\code{prob[i, 1:T]} is a vector of detection probabilities at site i, and -\code{T} is the number of observation occasions. This will invoke -(something like) the following call to \code{dNmixture_v} when -\code{nimble} uses the model such as for MCMC: +declares that the \code{observedCounts[i, 1:T]} (observed counts +for site \code{i}, for example) vector follows an N-mixture +distribution with parameters as indicated, assuming all the +parameters have been declared elsewhere in the model. As above, +\code{lambda[i]} is the mean of the abundance distribution at site +i, \code{prob[i, 1:T]} is a vector of detection probabilities at +site i, and \code{T} is the number of observation occasions. This +will invoke (something like) the following call to +\code{dNmixture_v} when \code{nimble} uses the model such as for +MCMC: \code{dNmixture_v(observedCounts[i, 1:T], lambda[i], prob[i, 1:T], -Nmin, Nmax, T)} +Nmin, Nmax, T, log = TRUE)} If an algorithm using a \code{nimble} model with this declaration needs to generate a random draw for \code{observedCounts[1:T]}, it will make a similar invocation of \code{rNmixture_v}, with \code{n = 1}. -If the observation probabilities are occasion-independent, one would use: +If the observation probabilities are visit-independent, one would use: \code{observedCounts[1:T] ~ dNmixture_s(observedCounts[i, 1:T], lambda[i], prob[i], @@ -123,7 +136,7 @@ prob <- c(0.2, 0.3, 0.2, 0.1, 0.4) # A vector of detection probabilities # Define code for a nimbleModel nc <- nimbleCode({ - x[1:5] ~ dNmixture_v(lambda, prob = [1:5], + x[1:5] ~ dNmixture_v(lambda, prob = prob[1:5], Nmin = -1, Nmax = -1, len = 5) lambda ~ dunif(0, 1000) @@ -147,6 +160,7 @@ nmix_model$calculate() D. Turek, P. de Valpine and C. J. Paciorek. 2016. Efficient Markov chain Monte Carlo sampling for hierarchical hidden Markov models. Environmental and Ecological Statistics 23:549–564. DOI 10.1007/s10651-016-0353-z + T. Meehan, N. Michel and H. Rue. 2017. Estimating animal abundance with N-mixture models using the R-INLA package for R. arXiv. arXiv:1705.01581 } diff --git a/man/dOcc.Rd b/man/dOcc.Rd index 2d91d5f..2dc093a 100644 --- a/man/dOcc.Rd +++ b/man/dOcc.Rd @@ -6,7 +6,7 @@ \alias{dOcc_v} \alias{rOcc_s} \alias{rOcc_v} -\title{Occupancy distribution suitable for use in code{nimble} models} +\title{Occupancy distribution suitable for use in \code{nimble} models} \usage{ dOcc_s(x, probOcc, probDetect, len = 0, log = 0) @@ -49,7 +49,7 @@ These nimbleFunctions provide distributions that can be used directly in R or in \code{nimble} hierarchical models (via \code{\link[nimble]{nimbleCode}} and \code{\link[nimble]{nimbleModel}}). -The probability (or likelihood) of observation vector \code{x} depends on +The probability of observation vector \code{x} depends on occupancy probability, \code{probOcc}, and detection probability, \code{probDetect} or \code{probDetect[t]}. From 87f4127214f2a83d8e88e59f751cfaa45a266e07 Mon Sep 17 00:00:00 2001 From: dochvam Date: Wed, 19 Feb 2020 12:24:41 -0800 Subject: [PATCH 18/20] manual edit of NAMESPACE to resolve cran check note --- NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/NAMESPACE b/NAMESPACE index 976b546..322fa58 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -52,3 +52,4 @@ import(nimble) importFrom(stats,dbinom) importFrom(stats,rbinom) importFrom(stats,runif) +importFrom("stats", "dpois", "qpois", "rpois") From e5cc9d7132b14c3fe928eb13009b308ec861fc6f Mon Sep 17 00:00:00 2001 From: dochvam Date: Thu, 20 Feb 2020 10:27:37 -0800 Subject: [PATCH 19/20] fix date, fix DHMM double reg, separate dynamic selection of Nmin and Nmax --- DESCRIPTION | 2 +- R/dDHMM.R | 31 ------------------------------- R/dNmixture.R | 11 +++++++---- 3 files changed, 8 insertions(+), 36 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f96d095..8dbf61f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -8,7 +8,7 @@ Authors@R: c(person("Benjamin R.", "Goldstein", role = c("aut", "cre"), person("Daniel", "Turek", role = "aut"), person("Lauren", "Ponisio", role = "aut"), person("Perry", "de Valpine", role = "aut")) -Date: 2019-09-24 +Date: 2020-02-20 Description: Common ecological distributions for 'nimble' models in the form of nimbleFunction objects. Includes Cormack-Jolly-Seber, occupancy, dynamic occupancy, hidden Markov, and dynamic hidden Markov models. (Jolly (1965) , Seber (1965) <10.2307/2333827>, Turek et al. (2016) ). diff --git a/R/dDHMM.R b/R/dDHMM.R index af30c35..48c6cb2 100644 --- a/R/dDHMM.R +++ b/R/dDHMM.R @@ -330,34 +330,3 @@ rDHMMo <- nimbleFunction( # } # return(ans) # }) - - - -registerDistributions(list( - dDHMM = list( - BUGSdist = "dDHMM(init, probObs, probTrans, len)", - Rdist = "dDHMM(init, probObs, probTrans, len)", - discrete = TRUE, - types = c('value = double(1)', - 'init = double(1)', - 'probObs = double(2)', - 'probTrans = double(3)', - 'len = double()'), - mixedSizes = TRUE, - pqAvail = FALSE)) - ) - -registerDistributions(list( - dDHMMo = list( - BUGSdist = "dDHMMo(init, probObs, probTrans, len)", - Rdist = "dDHMMo(init, probObs, probTrans, len)", - discrete = TRUE, - types = c('value = double(1)', - 'init = double(1)', - 'probObs = double(3)', - 'probTrans = double(3)', - 'len = double()'), - mixedSizes = TRUE, - pqAvail = FALSE)) - ) - diff --git a/R/dNmixture.R b/R/dNmixture.R index c817b4c..042b33b 100644 --- a/R/dNmixture.R +++ b/R/dNmixture.R @@ -173,11 +173,12 @@ dNmixture_v <- nimbleFunction( ## For each x, the conditional distribution of (N - x | x) is pois(lambda * (1-p)) ## We determine the lowest N and highest N at extreme quantiles and sum over those. - if (Nmin == -1 & Nmax == -1) { + if (Nmin == -1) { Nmin <- min(x + qpois(0.00001, lambda * (1 - prob))) - Nmax <- max(x + qpois(0.99999, lambda * (1 - prob))) } - Nmin <- max( max(x), Nmin ) ## set Nmin to at least the largest x + if (Nmax == -1) { + Nmax <- max(x + qpois(0.99999, lambda * (1 - prob))) + } Nmin <- max( max(x), Nmin ) ## set Nmin to at least the largest x logProb <- -Inf if (Nmax > Nmin) { @@ -217,8 +218,10 @@ dNmixture_s <- nimbleFunction( ## For each x, the conditional distribution of (N - x | x) is pois(lambda * (1-p)) ## We determine the lowest N and highest N at extreme quantiles and sum over those. - if (Nmin == -1 & Nmax == -1) { + if (Nmin == -1) { Nmin <- min(x + qpois(0.00001, lambda * (1 - prob))) + } + if (Nmax == -1) { Nmax <- max(x + qpois(0.99999, lambda * (1 - prob))) } Nmin <- max( max(x), Nmin ) ## set Nmin to at least the largest x From 9d65989f318231d07d41b6b524abd3a235783793 Mon Sep 17 00:00:00 2001 From: dochvam Date: Thu, 20 Feb 2020 10:42:47 -0800 Subject: [PATCH 20/20] Fix syntax glitch in dNmixture --- R/dNmixture.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/dNmixture.R b/R/dNmixture.R index 042b33b..932168c 100644 --- a/R/dNmixture.R +++ b/R/dNmixture.R @@ -178,7 +178,8 @@ dNmixture_v <- nimbleFunction( } if (Nmax == -1) { Nmax <- max(x + qpois(0.99999, lambda * (1 - prob))) - } Nmin <- max( max(x), Nmin ) ## set Nmin to at least the largest x + } + Nmin <- max( max(x), Nmin ) ## set Nmin to at least the largest x logProb <- -Inf if (Nmax > Nmin) {