x = 1:20
a = 2
b = 1
y_det = a .+ b * x
y = rand.(Normal.(y_det, 2))
y_ols = lm(hcat(ones(20),x),y)
scatter(x,y,labels=false)
plot!(x,y_det,labels="true")
Plots.abline!(coef(y_ols)[2], coef(y_ols)[1],labels="best fit")Notes on Ch. 5 of “Ecological Models and Data in R”
This chapter is about stochastic simulation and power analysis. We start with examples of stochastic simulation.
using Distributions
using GLM
using StatsFuns
using Optim
using PlotsLinear model example
This is hardly worthy of the term stochastic simulation but here it is.
The model is \(Y \sim N(a+bx,\sigma^2)\).
Hyperbolic negative binomial
The second model is less trivial. The implementation of the negative binomial distribution in Distributions.jl uses a “successes, probability” parameterization of that distribution, so we define a function nbinom_pars to go to the more “ecological” parameterization in terms of mean and dispersion (arising from a Gamma-Poisson mixture).
The stochastic model is \(Y \sim NegBin(\mu = ab/(b+x), k)\). The dispersion parameter is \(k\) (as \(k \rightarrow \infty\) it becomes a Poisson distribution), \(a\) is an “intercept term” such that when \(x=0\) we get \(\mu = a\).
a = 20
b = 1
k = 5
x = rand(Uniform(0,5), 50)
x = sort(x)
y_det = @. a * b/(b + x)
# go from (mu,disperson) to (n,p) parameterization
function nbinom_pars(μ,k)
p = @. k/(k+μ)
n = @. μ * p/(1-p)
return n,p
end
# go to Distribution's (n,p) parameterization
n, p = nbinom_pars(y_det, k)
y = rand.(NegativeBinomial.(n,p))
# fit the model
# negative loglike of model
function mfun(θ)
a, b, k = exp.(θ)
μ = @. a/(b+x)
n, p = nbinom_pars(μ, k)
return -sum(logpdf.(NegativeBinomial.(n, p), y))
end
m1 = optimize(mfun, log.([20.0, 1.0, 1.0]))
a_opt, b_opt, k_opt = exp.(Optim.minimizer(m1))
y_opt = @. a_opt * b_opt/(b_opt + x)
scatter(x,y,labels=false)
plot!(x,y_det,labels="true")
plot!(x,y_opt,labels="best fit")We can do the same thing but with “groups” (fixed effects).
g = fill(1,50)
g[sample(1:50,25,replace=false)] .= 2
a = [20, 10]
b = [1, 2]
k = 5
y_det = @. a[g]/(b[g] + x)
# go to Distribution's (n,p) parameterization
p = @. k/(k+y_det)
n = @. y_det * p/(1-p)
y = rand.(NegativeBinomial.(n,p))
# "true" relationship without noise
y_grp1 = @. a[1]/(b[1] + x)
y_grp2 = @. a[2]/(b[2] + x)
scatter(x[g.==1],y[g.==1],labels="group 1",seriescolor=1)
scatter!(x[g.==2],y[g.==2],labels="group 2",seriescolor=2)
plot!(x,y_grp1,label="",seriescolor=1)
plot!(x,y_grp2,label="",seriescolor=2)Reef fish settlement
This model has “two levels” of stochastic effects. The first is random variation in settlement (density of larvae arriving on a given anemone), and subsequent random variation in density-dependent recruitment (number of settlers surviving on the anemone).
The first level is simulated from a zero-inflated negative binomial model (implemented as rzinbinom). Then given some number \(S\) of settlers on each anemone, the number of recruits is \(Y \sim Bin(n = S, p = \frac{a}{1+(a/b)S})\). The parameter \(p\) is a decreasing function of density \(S\), hence the density-dependent moniker.
N = 603
a = 0.696
b = 9.79
mu = 25.32
zprob = 0.123
k = 0.932
function recrprob(S)
a/(1+(a/b)*S)
end
function rzinbinom(N, mu, k, zprob)
n, p = nbinom_pars(mu, k)
samp = zeros(Int64, N)
nzero = rand(Binomial(N,zprob))
nonzero = sample(1:N, N-nzero, replace = false)
samp[nonzero] .= rand(NegativeBinomial(n,p), N-nzero)
return samp
end
settlers = rzinbinom(N,mu,k,zprob)
recr = @. rand(Binomial(settlers, recrprob(settlers)))
p1 = histogram(settlers,legend=false,xlabel="Settlers",ylabel="Frequency",breaks=40)
p2 = scatter(settlers,recr,xlabel="Settlers",ylabel="Recruits",legend=false)
p2 = plot!(x->a*x/(1+(a/b)*x))
plot(p1,p2)Pigweed Poisson cluster process
The most complex stochastic simulation considered was a process where locations of parent plants were given as a Poisson process, each with the same number offspr_per_parent of offspring, which were distributed according to an isotropic distribution around each parent, where displacement followed an exponential distribution.
Each plant has a “competition index” \(C\), counting the number of other plants within a 2m neighborhood. This is used to calculate the end of year mass \(M\), which decays with increased density. The relationship was \(M \sim Gamma(shape = m/(1+C), scale = \alpha)\).
L = 30
nparents = 50
offspr_per_parent = 10
noffspr = nparents*offspr_per_parent
dispdist = 2
parent_x = rand(Uniform(0,L), nparents)
parent_y = rand(Uniform(0,L), nparents)
angle = rand(Uniform(0,2π), noffspr)
dist = rand(Exponential(dispdist), noffspr)
offspr_x = vcat([repeat([x],offspr_per_parent) for x in parent_x]...)
offspr_x += @. cos(angle)*dist
offspr_y = vcat([repeat([y],offspr_per_parent) for y in parent_y]...)
offspr_y += @. sin(angle)*dist
pos = hcat(offspr_x,offspr_y)
ndist = [sqrt(sum((pos[i,:] - pos[j,:]).^2)) for i in axes(pos,1), j in axes(pos,1)]
nbrcrowd = [sum(ndist[i,:] .< 2)-1 for i in axes(pos,1)]
ci = nbrcrowd*3
M = 2.3
alpha = 0.49
mass_det = @. M/(1+ci)
mass = @. rand(Gamma(alpha, mass_det))
b = 271.6
k = 0.569
seed_det = b*mass
n, p = nbinom_pars(seed_det,k)
seed = @. rand(NegativeBinomial(n,p))
# θ are the parameters on unconstrained scale
function negbin_ll(θ,y)
n = exp(θ[1])
p = logistic(θ[2])
-loglikelihood(NegativeBinomial(n,p),y)
end
f = optimize(x -> negbin_ll(x,nbrcrowd), [log(5),logit(0.5)])
fn = exp(Optim.minimizer(f)[1])
fp = logistic(Optim.minimizer(f)[2])
# p1
p1 = scatter(offspr_x,offspr_y,legend=false,alpha=0.5)
# p2
p2 = histogram(nbrcrowd,bins=max(nbrcrowd...),xlabel="Number of neighbors",ylabel="Proportion",normalize=true,legend=false,seriescolor=1)
p2 = scatter!(0:max(nbrcrowd...), map(x -> pdf(NegativeBinomial(fn,fp), x),0:max(nbrcrowd...)),seriescolor=2)
p2 = plot!(0:max(nbrcrowd...), map(x -> pdf(NegativeBinomial(fn,fp), x),0:max(nbrcrowd...)),seriescolor=2)
# p3
p3 = scatter(ci,mass,yscale = :log10,xlabel="Competition index",ylabel="Biomass (g)",legend=false,alpha=0.5)
p3 = plot!(x -> M/(1+x)*alpha)
# p4
function negbin_qt(μ,k,q)
n, p = nbinom_pars(μ,k)
return quantile(NegativeBinomial(n,p), q)
end
p4 = scatter(mass,1 .+ seed,xscale=:log10,yscale=:log10,xlabel="Mass",ylabel="1+Seed set",legend=false,alpha=0.5)
p4 = plot!(x -> b*x+1,xscale=:log10,yscale=:log10,seriescolor=2)
p4 = plot!(x -> negbin_qt(b*x,k,0.975)+1,linestyle=:dash,xscale=:log10,yscale=:log10,seriescolor=2)
p4 = plot!(x -> negbin_qt(b*x,k,0.025)+1,linestyle=:dash,xscale=:log10,yscale=:log10,seriescolor=2)
plot(p1,p2,p3,p4,layout=4)