Before moving forward, be sure to first check out the Xcertainty vignette on how to use the
independent_length_sampler()
for a proper introduction on
how to use Xcertainty
. This vignette focuses on the
growth_curve_sampler()
, which uses data containing
individuals with replicate body length measurements and age information
over time. This model fits a Von Bertalanffy-Putter growth curve to
observations and incorporates measurement uncertainty associated with
multiple drones following Pirotta and Bierlich et al.,
2024.
Von Bertalanffy-Putter growth curve equations involve three parameters: growth rate (k), asymptotic length (A), and the (theoretical) age when size is equal to 0 (t0).
In this vignette, we’ll use the growth_curve_sampler()
to reproduce results derived from the data and methods described by Pirotta and Bierlich et al.,
2024.
We’ll first load the Xcertainty package, as well as other packages we will use throughout this example.
## ℹ Updating Xcertainty documentation
## ✖ Installed roxygen2 is older than the version used with this package
## ℹ You have "7.3.1" but you need "7.3.2"
## ℹ Loading Xcertainty
library(Xcertainty)
library(tidyverse)
library(ggdist)
library(coda)
We will use calibration and observation (whale) length and age data from Pirotta and Bierlich et al., 2024. This data includes imagery collected by five different UAS. Whale age’s are estimated using photo-identification history and are labeled as either a ‘known age’ (if seen as a calf) or a ‘minimum age’ (based on the date of the first sighting).
We’ll use a calibration dataset consisting of measurements of a 1 m wooden board collected by five different UAS at various altitudes (13-62 m).
data('calibration')
# sample size for each UAS
table(calibration$uas)
##
## I2F I2O P3P P4P P4S
## 259 198 32 155 13
We’ll use parse_observations()
to prepare the
calibration and whale data. Measurements are often recorded in a
wide-format dataframe, so parse_observations() converts to long-format
data.
# parse calibration study
calibration_data = parse_observations(
x = calibration,
subject_col = 'CO.ID',
meas_col = 'Lpix',
tlen_col = 'CO.L',
image_col = 'image',
barometer_col = 'Baro_Alt',
laser_col = 'Laser_Alt',
flen_col = 'Focal_Length',
iwidth_col = 'Iw',
swidth_col = 'Sw',
uas_col = 'uas'
)
This creates a list of four elements:
* calibration_data$pixel_counts
.
* calibration_data$training_objects
.
* calibration_data$image_info
. *
calibration_data$prediction_objects
Next, we’ll use parse_observations()
to prepare the
whale data. Note, that the timepoint_col
is set to year
since we are interested in the total body length of each individual
summarized at the yearly scale.
data('whales')
# parse field study
whale_data = parse_observations(
x = whales,
subject_col = 'whale_ID',
meas_col = 'TL.pix',
image_col = 'Image',
barometer_col = 'AltitudeBarometer',
laser_col = 'AltitudeLaser',
flen_col = 'FocalLength',
iwidth_col = 'ImageWidth',
swidth_col = 'SensorWidth',
uas_col = 'UAS',
timepoint_col = 'year'
)
Now the calibration and whale data are both ready. Time to set up the sampler.
Note that combine_observations()
is used to combine the
parse_calibrations()
outputs, calibration_data
and whale_data
. We will use non-informative priors
here.
sampler = growth_curve_sampler(
data = combine_observations(calibration_data, whale_data),
priors = list(
image_altitude = c(min = 0.1, max = 130),
altimeter_bias = rbind(
data.frame(altimeter = 'Barometer', mean = 0, sd = 1e2),
data.frame(altimeter = 'Laser', mean = 0, sd = 1e2)
),
altimeter_variance = rbind(
data.frame(altimeter = 'Barometer', shape = .01, rate = .01),
data.frame(altimeter = 'Laser', shape = .01, rate = .01)
),
altimeter_scaling = rbind(
data.frame(altimeter = 'Barometer', mean = 0, sd = 1e1),
data.frame(altimeter = 'Laser', mean = 0, sd = 1e1)
),
pixel_variance = c(shape = .01, rate = .01),
# priors from Agbayani et al.
zero_length_age = c(mean = -5.09, sd = 0.4),
growth_rate = c(mean = .18, sd = .01),
# additional priors
group_asymptotic_size = rbind(
Female = c(mean = 12, sd = .5),
Male = c(mean = 12, sd = .5)
),
group_asymptotic_size_trend = rbind(
Female = c(mean = 0, sd = 1),
Male = c(mean = 0, sd = 1)
),
subject_group_distribution = c(Female = .5, Male = .5),
asymptotic_size_sd = c(min = 0, max = 10),
min_calf_length = 3.5,
# To model break points between 1990 and 2015
group_size_shift_start_year = c(min = 1990, max = 2015)
),
subject_info = whale_info
)
## Joining with `by = join_by(altimeter)`
## Joining with `by = join_by(altimeter)`
## Joining with `by = join_by(altimeter)`
## Joining with `by = join_by(UAS, altimeter)`
## Defining model
## [Note] Detected use of non-constant indexes: subject_group[1], subject_group[2], subject_group[3], subject_group[4],
## subject_group[5], subject_group[6], subject_group[7], subject_group[8], subject_group[9], subject_group[10], subject_group[11],
## subject_group[12], subject_group[13], subject_group[14], subject_group[15], subject_group[16], subject_group[17],
## subject_group[18], subject_group[19], subject_group[20], subject_group[21], subject_group[22], subject_group[24],
## subject_group[25], subject_group[26], subject_group[27], subject_group[28], subject_group[29], subject_group[31],
## subject_group[32], subject_group[33], subject_group[34], subject_group[35], subject_group[36], subject_group[37],
## subject_group[38], subject_group[39], subject_group[40], subject_group[41], subject_group[42], subject_group[43],
## subject_group[44], subject_group[45], subject_group[46], subject_group[47], subject_group[48], subject_group[49],
## subject_group[50], ... For computational efficiency we recommend specifying these in 'constants'.
## Building model
## Setting data and initial values
## Running calculate on model [Note] Any error reports that follow may simply reflect missing values in model variables.
## Checking model sizes and dimensions
## Compiling [Note] This may take a minute. [Note] Use 'showCompilerOutput = TRUE' to see C++ compilation details.
## ===== Monitors =====
## thin = 1: altimeter_bias, altimeter_scaling, altimeter_variance, asymptotic_size_sd, group_asymptotic_size, group_asymptotic_size_trend, group_size_shift_start_year, growth_rate, image_altitude, pixel_variance, subject_age_offset, subject_group, zero_length_age
## ===== Samplers =====
## RW sampler (1452)
## - image_altitude[] (1302 elements)
## - zero_length_age
## - growth_rate
## - subject_age_offset[] (130 elements)
## - asymptotic_size_sd
## - group_size_shift_start_year
## - subject_asymptotic_size[] (8 elements)
## - object_length[] (8 elements)
## conjugate sampler (151)
## - altimeter_bias[] (8 elements)
## - altimeter_scaling[] (8 elements)
## - altimeter_variance[] (8 elements)
## - pixel_variance
## - group_asymptotic_size[] (2 elements)
## - group_asymptotic_size_trend[] (2 elements)
## - subject_asymptotic_size[] (122 elements)
## categorical sampler (32)
## - subject_group[] (32 elements)
## thin = 1: altimeter_bias, altimeter_scaling, altimeter_variance, asymptotic_size_sd, group_asymptotic_size, group_asymptotic_size_trend, group_size_shift_start_year, growth_rate, image_altitude, non_calf_length_age, object_length, pixel_variance, subject_age_offset, subject_asymptotic_size, subject_birth_year, subject_group, zero_length_age
## Compiling
## [Note] This may take a minute.
## [Note] Use 'showCompilerOutput = TRUE' to see C++ compilation details.
$nbsp;
Now run the sampler! When exploring data outputs, 1e4 or 1e5 can be good place for exploration, as this won’t take too much time to run. We recommend using 1e6 for the final analysis since 1e6 MCMC samples is often enough to get a reasonable posterior effective sample size.
output_growth = sampler(niter = 1e4)
## Sampling
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## Extracting altimeter output
## Extracting image output
## Extracting pixel error output
## Extracting object output
## Extracting growth curve model output
## Extracting summaries
Our saved output_growth
contains all the posterior
samples and summaries from the sampler. Note, that there are many
objects stored in output_growth
, so it is best to view
specific selections rather than viewing all of the objects stored in
output_growth
at once, as this can take a very long time to
load and cause R to freeze.
You can view the posterior summaries (mean, sd, etc.) of total body
length (TL) for each individual (Subject) at each Timepoint (year). Note
that the lower
and upper
represent the 95%
highest posterior density intervals (HPDI) of the posterior distribution
for TL.
head(output_growth$summaries$objects)
## Subject Measurement Timepoint parameter mean sd lower upper ESS PSS
## 1 6 TL.pix 2020 length 13.54185 0.1985569 13.20282 13.96650 3.648892 5001
## 2 6 TL.pix 2021 length 13.54277 0.1985309 13.20351 13.96758 3.644688 5001
## 3 84 TL.pix 2017 length 11.89351 0.2092283 11.58025 12.28971 4.847506 5001
## 4 84 TL.pix 2018 length 11.89626 0.2091701 11.58301 12.29174 4.853066 5001
## 5 84 TL.pix 2020 length 11.90045 0.2090914 11.58433 12.29310 4.870251 5001
## 6 87 TL.pix 2018 length 11.53187 0.3861897 10.93358 12.26832 3.589867 5001
We can view the posterior summaries (mean, sd, and lower and upper 95% HPDI) of the posterior distribution.
output_growth$summaries$altimeters
## UAS altimeter parameter mean sd lower upper ESS PSS
## 1 I2F Barometer bias -0.6932355 0.398973712 -1.4717383 0.04456479 149.88080 5001
## 2 I2F Barometer variance 3.6449296 0.300490967 3.0821436 4.25333109 235.36005 5001
## 3 I2F Barometer scaling 0.9844252 0.009450076 0.9667009 1.00300162 136.50978 5001
## 4 I2F Laser bias -0.3053682 0.922372408 -1.9943995 1.61419416 93.22852 5001
## 5 I2F Laser variance 5.0352375 0.617755578 3.9220960 6.28957815 340.96167 5001
## 6 I2F Laser scaling 0.9609646 0.021442989 0.9184164 1.00242382 87.81790 5001
## 7 I2O Barometer bias 2.7492515 0.461517851 1.8254702 3.59784737 95.32337 5001
## 8 I2O Barometer variance 2.8543303 0.226039156 2.4083769 3.28752072 871.56550 5001
## 9 I2O Barometer scaling 0.9075059 0.013143414 0.8819154 0.93216352 85.63239 5001
## 10 I2O Laser bias 1.8976043 0.516853188 0.8995315 2.94631381 123.86688 5001
## 11 I2O Laser variance 2.4426358 0.285409345 1.9167921 3.02070223 262.26641 5001
## 12 I2O Laser scaling 0.9174396 0.014856556 0.8872060 0.94596523 108.58109 5001
## 13 P3P Barometer bias 8.3631562 1.415399460 5.6000919 11.27420769 64.48248 5001
## 14 P3P Barometer variance 5.9105083 0.932656799 4.1953405 7.75633616 196.92264 5001
## 15 P3P Barometer scaling 0.7332374 0.056165102 0.6170686 0.84242168 60.80028 5001
## 16 P4P Barometer bias 11.4961974 0.714895359 10.1917164 12.93181275 51.48820 5001
## 17 P4P Barometer variance 3.9201840 0.302881357 3.3538498 4.53706521 1038.37449 5001
## 18 P4P Barometer scaling 0.6160033 0.028945303 0.5602674 0.67162133 54.17208 5001
## 19 P4P Laser bias 8.6725336 4.962632714 -0.9395999 18.56536773 61.59509 5001
## 20 P4P Laser variance 36.5550353 7.106835707 23.7180099 50.13285478 3903.88387 5001
## 21 P4P Laser scaling 0.8072348 0.199088092 0.3966834 1.17697114 60.97122 5001
## 22 P4S Barometer bias 19.1403361 2.104913863 15.0893382 23.02336886 71.31543 5001
## 23 P4S Barometer variance 9.6368671 1.736014463 6.4241676 12.92739681 3044.87824 5001
## 24 P4S Barometer scaling 0.3228527 0.083469238 0.1620232 0.48128905 72.41114 5001
We can view the posterior summaries of the pixel variance
output_growth$pixel_error$summary
## error parameter mean sd lower upper ESS PSS
## 1 pixel variance 12.08068 2.548511 7.561632 17.31028 35.25218 5001
Let’s view a preview of the posterior summaries of the growth curve parameters, including k, t0, A for males and females, and individual’s birth year.
output_growth$summaries$growth_curve[1:10,]
## parameter mean sd lower upper ESS PSS
## 1 zero_length_age -6.31087884 0.178573714 -6.59999010 -6.08336690 1.668323 5001
## 2 growth_rate 0.18366117 0.001650781 0.18050765 0.18656065 3.583295 5001
## 3 Female group_asymptotic_size 12.90739369 0.127315172 12.64422380 13.15208812 136.792747 5001
## 4 Male group_asymptotic_size 12.09146517 0.122350016 11.85525341 12.33455322 577.459902 5001
## 5 Female group_asymptotic_size_trend -0.18008073 0.086315748 -0.33468881 -0.05831142 50.890964 5001
## 6 Male group_asymptotic_size_trend 0.03375674 0.056447975 -0.05420489 0.14708012 22.131012 5001
## 7 6 birth_year 1983.43579868 2.435478244 1979.51369403 1984.99976719 103.142927 5001
## 8 84 birth_year 1986.12884733 5.989339943 1973.40716535 1988.99870115 26.972659 5001
## 9 87 birth_year 1990.26150797 2.456269926 1985.93629851 1991.99989038 130.553853 5001
## 10 89 birth_year 1989.45978298 4.770082798 1980.73838239 1991.99940113 47.713679 5001
Let’s now create a data frame with the posterior summaries for each
individual’s length. We will first save the total length output
summaries, then save the birth year outputs, then combine both of these
dataframes by Subject
, and then join with
whale_info
to sync sex and AgeType Finally, we’ll calculate
the estimated age from the sample year from the estimated birth
year.
# total length summary outputs for each subject
sums_L <- output_growth$summaries$objects
# birth year summary outputs for each subject
birth_year <- output_growth$growth_curve$birth_year$summary %>% rename_with(~str_c("birth_year_", .), everything()) %>%
separate(birth_year_parameter, c("Subject", "Parameter"), sep = " ") %>% dplyr::select(!"Parameter")
# combine total length and birth year summary outputs. Then join with whale info to get sex, AgeType. Finally, calculated new estimated age.
sum <- sums_L %>%
left_join(birth_year, by = "Subject") %>% mutate(Year = as.integer(Timepoint)) %>%
left_join(whale_info %>% mutate(Subject = as.factor(Subject)) %>% rename(sex = Group), by = c("Subject", "Year")) %>%
relocate(Year, .before = Timepoint) %>%
mutate(Age_est_mean = Year - birth_year_mean,
Age_est_lower = Year - birth_year_upper,
Age_est_upper = Year - birth_year_lower)
We can view these results and plot the uncertainty associated with total body length (solid vertical line) and estimated age (dashed horizontal line).
ggplot() + theme_bw() +
geom_pointrange(data = sum, aes(x = Age_est_mean, y = mean, ymin = lower, ymax = upper)) +
geom_errorbarh(data = sum,
aes(xmin = Age_est_lower, xmax = Age_est_upper, y = mean), lty = 2) +
xlab("estimated age") + ylab("total body length (m)")
Now we’ll calculate the expected growth for male and females separately between ages 1-40. We’ll first save the posterior samplers of the growth parameters and sex-based asymptotic lengths in a dataframe. We’ll then calculate the expected length for male and females at each age using the Von Bertalanffy-Putter growth equation for each MCMC iteration. This will generate a distribution for the expected length at each age for male and females separately. We can then summarize these distributions to get the mean expected length and uncertainty as the 95% HPDI.
# create a dataframe of the output growth parameters
pred_growth <- rbind(data.frame(t0 = output_growth$growth_curve$zero_length_age$samples,
k = output_growth$growth_curve$growth_rate$samples,
sex = "Female",
A = output_growth$growth_curve$group_asymptotic_size$samples[,1]),
data.frame(
t0 = output_growth$growth_curve$zero_length_age$samples,
k = output_growth$growth_curve$growth_rate$samples,
sex = "Male",
A = output_growth$growth_curve$group_asymptotic_size$samples[,2]))
# write a loop to calculate the expected length for male and females between ages 1-40 for each MCMC iteration to create a distribution. Then calculate the mean and HPDIs from the distribution for each expected length.
age_list <- seq(from = 1, to = 40, by = 1)
sex_list <- c("Male", "Female")
age_df <- data.frame()
full_df <- data.frame()
for (s in sex_list){
s_x = s
for (i in age_list){
yr0 = i
growth_filt = pred_growth %>% filter(sex == s_x)
Exp.L = growth_filt$A * (1-exp(-growth_filt$k * (yr0 - growth_filt$t0)))
Exp.L_mean = mean(Exp.L)
Exp.L_lower = HPDinterval(mcmc(Exp.L))[1]
Exp.L_upper = HPDinterval(mcmc(Exp.L))[2]
temp_df <- data.frame(age = yr0, sex = s_x,
Exp.L_mean, Exp.L_lower = Exp.L_lower, Exp.L_upper = Exp.L_upper)
age_df <- rbind(age_df, temp_df)
}
full_df <- rbind(full_df, age_df)
}
$nbsp;
Now we can plot the results
ggplot() + theme_bw() +
xlab("estimated age") + ylab("total body length (m)") +
geom_ribbon(data = full_df, aes(x = age, ymin = Exp.L_lower, ymax = Exp.L_upper, fill = sex), alpha = 0.4) +
geom_line(data = full_df, aes(x = age, y = Exp.L_mean, color = sex)) +
scale_color_manual(values = c(Female = "lightblue3", Male = "darkorange")) +
scale_fill_manual(values = c(Female = "lightblue3", Male = "darkorange")) +
geom_pointrange(data = sum %>% filter(!is.na(sex)),
aes(x = Age_est_mean, y = mean, ymin = lower, ymax = upper, color = sex)) +
geom_errorbarh(data = sum,
aes(xmin = Age_est_lower, xmax = Age_est_upper, y = mean, color = sex), lty = 3)
Note, you can create custom samplers to use different growth equations (i.e., Gompertz, etc.). See Xcertainty to learn more.