Introduction to the multiocc package

library(interp)
library(MCMCpack)
#> Loading required package: coda
#> Loading required package: MASS
#> 
#> Attaching package: 'MASS'
#> The following object is masked from 'package:interp':
#> 
#>     area
#> ##
#> ## Markov Chain Monte Carlo Package (MCMCpack)
#> ## Copyright (C) 2003-2024 Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park
#> ##
#> ## Support provided by the U.S. National Science Foundation
#> ## (Grants SES-0350646 and SES-0350613)
#> ##
library(tmvtnorm)
#> Loading required package: mvtnorm
#> Loading required package: Matrix
#> Loading required package: stats4
#> Loading required package: gmm
#> Loading required package: sandwich
library(truncnorm)
library(multiocc)
library(MASS)
library(corrplot)
#> corrplot 0.95 loaded
library(fields)
#> Loading required package: spam
#> Spam version 2.11-0 (2024-10-03) is loaded.
#> Type 'help( Spam)' or 'demo( spam)' for a short introduction 
#> and overview of this package.
#> Help for individual functions is also obtained by adding the
#> suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
#> 
#> Attaching package: 'spam'
#> The following object is masked from 'package:stats4':
#> 
#>     mle
#> The following object is masked from 'package:Matrix':
#> 
#>     det
#> The following objects are masked from 'package:mvtnorm':
#> 
#>     rmvnorm, rmvt
#> The following objects are masked from 'package:base':
#> 
#>     backsolve, forwardsolve
#> Loading required package: viridisLite
#> 
#> Try help(fields) to get started.
data(detection)
data(occupancy)
data(coords)
DataNames <- list("species"=colnames(detection)[4:9],
             "detection"=c("duration"),"occupancy"=c("forest","elev"))
model.input <- multioccbuild(detection, occupancy, coords, DataNames, threshold = 15000)
#> Warning: Rows in detection with missing covariates have been removed for purposes of fitting the model, but the site/season combination is retained in occupancy and therefore predictions will be outputted.

Perform some exploratory data analysis

par(mfrow=c(1,3))
hist(occupancy$forest, main="", xlab="Forest")
hist(occupancy$elev, main="", xlab="Elevation")
hist(detection$duration, main="", xlab="Duration")


par(mfrow=c(3,2), mar=c(3,3,3,1))
quilt.plot(coords[,2:3], occupancy$forest[1:267], main="Forest Cover", zlim=c(-1.5,3))
fit <- Tps(coords[,2:3], occupancy$forest[1:267])
out <- predictSurface(fit, df=100)
image.plot(out, main="Forest Cover (interpolated)", zlim=c(-1.5,2))

quilt.plot(coords[,2:3], occupancy$elev[1:267], main="Elevation", zlim=c(-1.5,3.5))
fit <- Tps(coords[,2:3], occupancy$elev[1:267])
out <- predictSurface(fit, df=100)
image.plot(out, main="Elevation (interpolated)", zlim=c(-1.5,2))

quilt.plot(coords[,2:3], detection$duration[1:267], main="Duration", zlim=c(-2.5,3))
fit <- Tps(coords[,2:3], detection$duration[1:267])
out <- predictSurface(fit, df=100)
image.plot(out, main="Duration (Survey 1)", zlim=c(-2.5,2.5))

A short run for demonstration purposes

## Shorter run for demonstration purposes.
## library(tmvtnorm)
mcmc.out <- GibbsSampler(M.iter=10, M.burn=1, M.thin=1, model.input, q=10, sv=FALSE)
#>   |                                                                              |                                                                      |   0%  |                                                                              |=======                                                               |  10%  |                                                                              |==============                                                        |  20%  |                                                                              |=====================                                                 |  30%  |                                                                              |============================                                          |  40%  |                                                                              |===================================                                   |  50%  |                                                                              |==========================================                            |  60%  |                                                                              |=================================================                     |  70%  |                                                                              |========================================================              |  80%  |                                                                              |===============================================================       |  90%  |                                                                              |======================================================================| 100%

A longer run (not executed) for scientific results

mcmc.out <- GibbsSampler(M.iter=50000, M.burn=20000, M.thin=1, model.input, q=10, sv=FALSE)

Can summarize output

summary(mcmc.out$samples$alpha)
#> 
#> Iterations = 1:9
#> Thinning interval = 1 
#> Number of chains = 1 
#> Sample size per chain = 9 
#> 
#> 1. Empirical mean and standard deviation for each variable,
#>    plus standard error of the mean:
#> 
#>                         Mean      SD Naive SE Time-series SE
#> Great.tit Int       0.664507 0.11075 0.036916       0.081620
#> Great.tit forest   -0.118752 0.04866 0.016219       0.016219
#> Great.tit elev     -0.167027 0.04858 0.016193       0.016193
#> Blue.tit Int        0.463593 0.07907 0.026356       0.051144
#> Blue.tit forest    -0.086730 0.04049 0.013498       0.025407
#> Blue.tit elev      -0.180707 0.05828 0.019425       0.044951
#> Coal.tit Int        0.842897 0.12628 0.042093       0.080916
#> Coal.tit forest     0.035070 0.03465 0.011550       0.026307
#> Coal.tit elev      -0.087794 0.01848 0.006158       0.006158
#> Crested.tit Int     0.495038 0.11430 0.038100       0.077029
#> Crested.tit forest  0.001835 0.02039 0.006796       0.006796
#> Crested.tit elev   -0.074102 0.03392 0.011307       0.027073
#> Marsh.tit Int       0.338417 0.07638 0.025459       0.057529
#> Marsh.tit forest   -0.088214 0.04591 0.015302       0.029091
#> Marsh.tit elev     -0.191131 0.08448 0.028159       0.051510
#> Willow.tit Int      0.119088 0.09062 0.030207       0.067062
#> Willow.tit forest   0.071454 0.02853 0.009509       0.009509
#> Willow.tit elev     0.026708 0.03919 0.013062       0.013062
#> 
#> 2. Quantiles for each variable:
#> 
#>                         2.5%       25%       50%      75%     97.5%
#> Great.tit Int       0.481878  0.603732  0.686106  0.76909  0.784381
#> Great.tit forest   -0.165803 -0.149343 -0.141156 -0.10002 -0.029327
#> Great.tit elev     -0.221076 -0.194745 -0.178713 -0.15260 -0.076318
#> Blue.tit Int        0.316416  0.452976  0.485722  0.52217  0.533991
#> Blue.tit forest    -0.122633 -0.113188 -0.099776 -0.09175 -0.016477
#> Blue.tit elev      -0.259867 -0.207950 -0.184398 -0.13169 -0.099175
#> Coal.tit Int        0.612555  0.796265  0.883635  0.92140  0.982480
#> Coal.tit forest    -0.009808 -0.002403  0.044837  0.05543  0.081896
#> Coal.tit elev      -0.110029 -0.101054 -0.086311 -0.07509 -0.060466
#> Crested.tit Int     0.303900  0.419475  0.516428  0.59600  0.616600
#> Crested.tit forest -0.034747 -0.006012  0.007059  0.01663  0.023240
#> Crested.tit elev   -0.117518 -0.105674 -0.070689 -0.05614 -0.022984
#> Marsh.tit Int       0.220051  0.296131  0.342714  0.38716  0.435156
#> Marsh.tit forest   -0.138403 -0.113807 -0.105561 -0.06586 -0.005722
#> Marsh.tit elev     -0.293386 -0.254950 -0.212310 -0.12270 -0.074429
#> Willow.tit Int     -0.017547  0.074008  0.121839  0.20712  0.227712
#> Willow.tit forest   0.036466  0.051122  0.067315  0.09852  0.110228
#> Willow.tit elev    -0.044982  0.018749  0.035007  0.04120  0.079975
summary(mcmc.out$samples$rho)
#> 
#> Iterations = 1:9
#> Thinning interval = 1 
#> Number of chains = 1 
#> Sample size per chain = 9 
#> 
#> 1. Empirical mean and standard deviation for each variable,
#>    plus standard error of the mean:
#> 
#>                   Mean      SD Naive SE Time-series SE
#> Great.tit rho   0.7845 0.19424  0.06475        0.12235
#> Blue.tit rho    0.8327 0.14076  0.04692        0.04692
#> Coal.tit rho    0.4729 0.12688  0.04229        0.04229
#> Crested.tit rho 0.8425 0.17926  0.05975        0.05975
#> Marsh.tit rho   0.8038 0.14265  0.04755        0.04755
#> Willow.tit rho  0.9083 0.08547  0.02849        0.02849
#> 
#> 2. Quantiles for each variable:
#> 
#>                   2.5%    25%    50%    75%  97.5%
#> Great.tit rho   0.4402 0.6363 0.8994 0.9066 0.9496
#> Blue.tit rho    0.5778 0.7563 0.9002 0.9331 0.9403
#> Coal.tit rho    0.3373 0.3729 0.4598 0.5385 0.7023
#> Crested.tit rho 0.4756 0.8382 0.9001 0.9438 0.9807
#> Marsh.tit rho   0.5165 0.8037 0.8242 0.8729 0.9340
#> Willow.tit rho  0.7472 0.8662 0.9440 0.9606 0.9930

Visualize correlation matrix

par(mfrow=c(1,1), mar=c(3,3,3,1))
sigout <- mcmc.out$samples$sig
Sig <- matrix(colMeans(sigout),6,6)
SpeciesCor <- cov2cor(Sig)
rownames(SpeciesCor) <- DataNames$species
colnames(SpeciesCor) <- DataNames$species
corrplot::corrplot(SpeciesCor)

Make predictions from fitted model

y.agg1 <-  aggregate(model.input$y[,1], by=list(model.input$detection.info$siteID, 
                                              model.input$detection.info$season), FUN=sum, na.rm=TRUE)
y.plot1 <- 1*(y.agg1$x>0)

y.agg2 <- aggregate(model.input$y[,2], by=list(model.input$detection.info$siteID, 
                                              model.input$detection.info$season), FUN=sum, na.rm=TRUE)
y.plot2 <- 1*(y.agg2$x>0)

y.agg3 <- aggregate(model.input$y[,3], by=list(model.input$detection.info$siteID, 
                                              model.input$detection.info$season), FUN=sum, na.rm=TRUE)
y.plot3 <- 1*(y.agg3$x>0)

y.agg4 <- aggregate(model.input$y[,4], by=list(model.input$detection.info$siteID, 
                                              model.input$detection.info$season), FUN=sum, na.rm=TRUE)
y.plot4 <- 1*(y.agg4$x>0)

y.agg5 <- aggregate(model.input$y[,5], by=list(model.input$detection.info$siteID, 
                                              model.input$detection.info$season), FUN=sum, na.rm=TRUE)
y.plot5 <- 1*(y.agg5$x>0)

y.agg6 <- aggregate(model.input$y[,6], by=list(model.input$detection.info$siteID, 
                                              model.input$detection.info$season), FUN=sum, na.rm=TRUE)
y.plot6 <- 1*(y.agg6$x>0)

for (yr in c(1,4,7,10)){
  print(yr)

  range <- which(model.input$occupancy.info$season == yr)

  psiout <- mcmc.out$samples$psi
  #pout <- mcmc.out$p
  dim(psiout)

  psi1 <- apply(psiout[,0*2670+range],2,mean)
  psi2 <- apply(psiout[,1*2670+range],2,mean)
  psi3 <- apply(psiout[,2*2670+range],2,mean)
  psi4 <- apply(psiout[,3*2670+range],2,mean)
  psi5 <- apply(psiout[,4*2670+range],2,mean)
  psi6 <- apply(psiout[,5*2670+range],2,mean)

  par(mfrow=c(3,2), mar=c(1,3,3,1))
  fit <- Tps(coords[1:267,2:3], psi1)
  out <- predictSurface(fit, df=100)
  image.plot(out, main="Great Tit", zlim=c(-0.01,1.01))
  mtext(paste("Year",yr), side=3, line=-2, outer=TRUE)

  y.plot1.in <- y.plot1[which(model.input$occupancy.info$season ==yr)]
  points(coords[which(y.plot1.in==1),2:3])

  fit <- Tps(coords[1:267,2:3], psi2)
  out <- predictSurface(fit, df=100)
  image.plot(out, main="Blue Tit", zlim=c(-0.01,1.01))

  y.plot2.in <- y.plot2[which(model.input$occupancy.info$season ==yr)]
  points(coords[which(y.plot2.in==1),2:3])

  fit <- Tps(coords[1:267,2:3], psi3)
  out <- predictSurface(fit, df=100)
  image.plot(out, main="Coal Tit", zlim=c(-0.01,1.01))

  y.plot3.in <- y.plot3[which(model.input$occupancy.info$season ==yr)]
  points(coords[which(y.plot3.in==1),2:3])

  fit <- Tps(coords[1:267,2:3], psi4)
  out <- predictSurface(fit, df=100)
  image.plot(out, main="Crested Tit", zlim=c(-0.01,1.01))

  y.plot4.in <- y.plot4[which(model.input$occupancy.info$season ==yr)]
  points(coords[which(y.plot4.in==1),2:3])

  fit <- Tps(coords[1:267,2:3], psi5)
  out <- predictSurface(fit, df=100)
  image.plot(out, main="Marsh Tit", zlim=c(-0.01,1.01))

  y.plot5.in <- y.plot5[which(model.input$occupancy.info$season ==yr)]
  points(coords[which(y.plot5.in==1),2:3])

  fit <- Tps(coords[1:267,2:3], psi6)
  out <- predictSurface(fit, df=100)
  image.plot(out, main="Willow Tit", zlim=c(-0.01,1.01))

  y.plot6.in <- y.plot6[which(model.input$occupancy.info$season ==yr)]
  points(coords[which(y.plot6.in==1),2:3])
}
#> [1] 1

#> [1] 4

#> [1] 7
#> Warning: 
#> Grid searches over lambda (nugget and sill variances) with  minima at the endpoints: 
#>   (GCV) Generalized Cross-Validation 
#>    minimum at  right endpoint  lambda  =  3.015679e-06 (eff. df= 253.65 )

#> [1] 10