Xcertainty Growth Curves

Introduction

Before moving forward, be sure to first check out the Xcertainty vignette on how to use the independent_length_sampler() for a proper introduction on how to use Xcertainty. This vignette focuses on the growth_curve_sampler(), which uses data containing individuals with replicate body length measurements and age information over time. This model fits a Von Bertalanffy-Putter growth curve to observations and incorporates measurement uncertainty associated with multiple drones following Pirotta and Bierlich et al., 2024.

Von Bertalanffy-Putter growth curve equations involve three parameters: growth rate (k), asymptotic length (A), and the (theoretical) age when size is equal to 0 (t0).

In this vignette, we’ll use the growth_curve_sampler() to reproduce results derived from the data and methods described by Pirotta and Bierlich et al., 2024.

We’ll first load the Xcertainty package, as well as other packages we will use throughout this example.

## ℹ Updating Xcertainty documentation
## ✖ Installed roxygen2 is older than the version used with this package
## ℹ You have "7.3.1" but you need "7.3.2"
## ℹ Loading Xcertainty

library(Xcertainty)

library(tidyverse)
library(ggdist)
library(coda)

Data

We will use calibration and observation (whale) length and age data from Pirotta and Bierlich et al., 2024. This data includes imagery collected by five different UAS. Whale age’s are estimated using photo-identification history and are labeled as either a ‘known age’ (if seen as a calf) or a ‘minimum age’ (based on the date of the first sighting).

Calibration data

We’ll use a calibration dataset consisting of measurements of a 1 m wooden board collected by five different UAS at various altitudes (13-62 m).

data('calibration')

# sample size for each UAS
table(calibration$uas)

## 
## I2F I2O P3P P4P P4S 
## 259 198  32 155  13

parse_observations()

We’ll use parse_observations() to prepare the calibration and whale data. Measurements are often recorded in a wide-format dataframe, so parse_observations() converts to long-format data.

# parse calibration study
calibration_data = parse_observations(
  x = calibration, 
  subject_col = 'CO.ID',
  meas_col = 'Lpix', 
  tlen_col = 'CO.L', 
  image_col = 'image', 
  barometer_col = 'Baro_Alt',
  laser_col = 'Laser_Alt', 
  flen_col = 'Focal_Length', 
  iwidth_col = 'Iw', 
  swidth_col = 'Sw',
  uas_col = 'uas'
)

This creates a list of four elements:
* calibration_data$pixel_counts.
* calibration_data$training_objects.
* calibration_data$image_info. * calibration_data$prediction_objects

Next, we’ll use parse_observations() to prepare the whale data. Note, that the timepoint_col is set to year since we are interested in the total body length of each individual summarized at the yearly scale.

data('whales')

# parse field study
whale_data = parse_observations(
  x = whales, 
  subject_col = 'whale_ID',
  meas_col = 'TL.pix', 
  image_col = 'Image', 
  barometer_col = 'AltitudeBarometer',
  laser_col = 'AltitudeLaser', 
  flen_col = 'FocalLength', 
  iwidth_col = 'ImageWidth', 
  swidth_col = 'SensorWidth', 
  uas_col = 'UAS',
  timepoint_col = 'year'
)

Now the calibration and whale data are both ready. Time to set up the sampler.

The sampler

Note that combine_observations() is used to combine the parse_calibrations() outputs, calibration_data and whale_data. We will use non-informative priors here.

sampler = growth_curve_sampler(
  data = combine_observations(calibration_data, whale_data),
  priors = list(
    image_altitude = c(min = 0.1, max = 130),
    altimeter_bias = rbind(
      data.frame(altimeter = 'Barometer', mean = 0, sd = 1e2),
      data.frame(altimeter = 'Laser', mean = 0, sd = 1e2)
    ),
    altimeter_variance = rbind(
      data.frame(altimeter = 'Barometer', shape = .01, rate = .01),
      data.frame(altimeter = 'Laser', shape = .01, rate = .01)
    ),
    altimeter_scaling = rbind(
      data.frame(altimeter = 'Barometer', mean = 0, sd = 1e1),
      data.frame(altimeter = 'Laser', mean = 0, sd = 1e1)
    ),
    pixel_variance = c(shape = .01, rate = .01),
    # priors from Agbayani et al. 
    zero_length_age = c(mean = -5.09, sd = 0.4),
    growth_rate = c(mean = .18, sd = .01),
    # additional priors
    group_asymptotic_size = rbind(
      Female = c(mean = 12, sd = .5),
      Male = c(mean = 12, sd = .5)
    ),
    group_asymptotic_size_trend = rbind(
      Female = c(mean = 0, sd = 1),
      Male = c(mean = 0, sd = 1)
    ),
    subject_group_distribution = c(Female = .5, Male = .5),
    asymptotic_size_sd = c(min = 0, max = 10),
    min_calf_length = 3.5,
    # To model break points between 1990 and 2015
    group_size_shift_start_year = c(min = 1990, max = 2015)
  ),
  subject_info = whale_info
)

## Joining with `by = join_by(altimeter)`
## Joining with `by = join_by(altimeter)`
## Joining with `by = join_by(altimeter)`
## Joining with `by = join_by(UAS, altimeter)`
## Defining model
## [Note] Detected use of non-constant indexes: subject_group[1], subject_group[2], subject_group[3], subject_group[4],
## subject_group[5], subject_group[6], subject_group[7], subject_group[8], subject_group[9], subject_group[10], subject_group[11],
## subject_group[12], subject_group[13], subject_group[14], subject_group[15], subject_group[16], subject_group[17],
## subject_group[18], subject_group[19], subject_group[20], subject_group[21], subject_group[22], subject_group[24],
## subject_group[25], subject_group[26], subject_group[27], subject_group[28], subject_group[29], subject_group[31],
## subject_group[32], subject_group[33], subject_group[34], subject_group[35], subject_group[36], subject_group[37],
## subject_group[38], subject_group[39], subject_group[40], subject_group[41], subject_group[42], subject_group[43],
## subject_group[44], subject_group[45], subject_group[46], subject_group[47], subject_group[48], subject_group[49],
## subject_group[50], ... For computational efficiency we recommend specifying these in 'constants'.
## Building model
## Setting data and initial values
## Running calculate on model [Note] Any error reports that follow may simply reflect missing values in model variables.
## Checking model sizes and dimensions
## Compiling [Note] This may take a minute. [Note] Use 'showCompilerOutput = TRUE' to see C++ compilation details.

## ===== Monitors =====
## thin = 1: altimeter_bias, altimeter_scaling, altimeter_variance, asymptotic_size_sd, group_asymptotic_size, group_asymptotic_size_trend, group_size_shift_start_year, growth_rate, image_altitude, pixel_variance, subject_age_offset, subject_group, zero_length_age
## ===== Samplers =====
## RW sampler (1452)
##   - image_altitude[]  (1302 elements)
##   - zero_length_age
##   - growth_rate
##   - subject_age_offset[]  (130 elements)
##   - asymptotic_size_sd
##   - group_size_shift_start_year
##   - subject_asymptotic_size[]  (8 elements)
##   - object_length[]  (8 elements)
## conjugate sampler (151)
##   - altimeter_bias[]  (8 elements)
##   - altimeter_scaling[]  (8 elements)
##   - altimeter_variance[]  (8 elements)
##   - pixel_variance
##   - group_asymptotic_size[]  (2 elements)
##   - group_asymptotic_size_trend[]  (2 elements)
##   - subject_asymptotic_size[]  (122 elements)
## categorical sampler (32)
##   - subject_group[]  (32 elements)
## thin = 1: altimeter_bias, altimeter_scaling, altimeter_variance, asymptotic_size_sd, group_asymptotic_size, group_asymptotic_size_trend, group_size_shift_start_year, growth_rate, image_altitude, non_calf_length_age, object_length, pixel_variance, subject_age_offset, subject_asymptotic_size, subject_birth_year, subject_group, zero_length_age

## Compiling
##   [Note] This may take a minute.
##   [Note] Use 'showCompilerOutput = TRUE' to see C++ compilation details.

$nbsp;

Now run the sampler! When exploring data outputs, 1e4 or 1e5 can be good place for exploration, as this won’t take too much time to run. We recommend using 1e6 for the final analysis since 1e6 MCMC samples is often enough to get a reasonable posterior effective sample size.

output_growth = sampler(niter = 1e4)

## Sampling

## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|

## Extracting altimeter output

## Extracting image output

## Extracting pixel error output

## Extracting object output

## Extracting growth curve model output

## Extracting summaries

View Sampler Outputs

Our saved output_growth contains all the posterior samples and summaries from the sampler. Note, that there are many objects stored in output_growth, so it is best to view specific selections rather than viewing all of the objects stored in output_growth at once, as this can take a very long time to load and cause R to freeze.

You can view the posterior summaries (mean, sd, etc.) of total body length (TL) for each individual (Subject) at each Timepoint (year). Note that the lower and upper represent the 95% highest posterior density intervals (HPDI) of the posterior distribution for TL.

head(output_growth$summaries$objects)

##   Subject Measurement Timepoint parameter     mean        sd    lower    upper      ESS  PSS
## 1       6      TL.pix      2020    length 13.54185 0.1985569 13.20282 13.96650 3.648892 5001
## 2       6      TL.pix      2021    length 13.54277 0.1985309 13.20351 13.96758 3.644688 5001
## 3      84      TL.pix      2017    length 11.89351 0.2092283 11.58025 12.28971 4.847506 5001
## 4      84      TL.pix      2018    length 11.89626 0.2091701 11.58301 12.29174 4.853066 5001
## 5      84      TL.pix      2020    length 11.90045 0.2090914 11.58433 12.29310 4.870251 5001
## 6      87      TL.pix      2018    length 11.53187 0.3861897 10.93358 12.26832 3.589867 5001

We can view the posterior summaries (mean, sd, and lower and upper 95% HPDI) of the posterior distribution.

output_growth$summaries$altimeters

##    UAS altimeter parameter       mean          sd      lower       upper        ESS  PSS
## 1  I2F Barometer      bias -0.6932355 0.398973712 -1.4717383  0.04456479  149.88080 5001
## 2  I2F Barometer  variance  3.6449296 0.300490967  3.0821436  4.25333109  235.36005 5001
## 3  I2F Barometer   scaling  0.9844252 0.009450076  0.9667009  1.00300162  136.50978 5001
## 4  I2F     Laser      bias -0.3053682 0.922372408 -1.9943995  1.61419416   93.22852 5001
## 5  I2F     Laser  variance  5.0352375 0.617755578  3.9220960  6.28957815  340.96167 5001
## 6  I2F     Laser   scaling  0.9609646 0.021442989  0.9184164  1.00242382   87.81790 5001
## 7  I2O Barometer      bias  2.7492515 0.461517851  1.8254702  3.59784737   95.32337 5001
## 8  I2O Barometer  variance  2.8543303 0.226039156  2.4083769  3.28752072  871.56550 5001
## 9  I2O Barometer   scaling  0.9075059 0.013143414  0.8819154  0.93216352   85.63239 5001
## 10 I2O     Laser      bias  1.8976043 0.516853188  0.8995315  2.94631381  123.86688 5001
## 11 I2O     Laser  variance  2.4426358 0.285409345  1.9167921  3.02070223  262.26641 5001
## 12 I2O     Laser   scaling  0.9174396 0.014856556  0.8872060  0.94596523  108.58109 5001
## 13 P3P Barometer      bias  8.3631562 1.415399460  5.6000919 11.27420769   64.48248 5001
## 14 P3P Barometer  variance  5.9105083 0.932656799  4.1953405  7.75633616  196.92264 5001
## 15 P3P Barometer   scaling  0.7332374 0.056165102  0.6170686  0.84242168   60.80028 5001
## 16 P4P Barometer      bias 11.4961974 0.714895359 10.1917164 12.93181275   51.48820 5001
## 17 P4P Barometer  variance  3.9201840 0.302881357  3.3538498  4.53706521 1038.37449 5001
## 18 P4P Barometer   scaling  0.6160033 0.028945303  0.5602674  0.67162133   54.17208 5001
## 19 P4P     Laser      bias  8.6725336 4.962632714 -0.9395999 18.56536773   61.59509 5001
## 20 P4P     Laser  variance 36.5550353 7.106835707 23.7180099 50.13285478 3903.88387 5001
## 21 P4P     Laser   scaling  0.8072348 0.199088092  0.3966834  1.17697114   60.97122 5001
## 22 P4S Barometer      bias 19.1403361 2.104913863 15.0893382 23.02336886   71.31543 5001
## 23 P4S Barometer  variance  9.6368671 1.736014463  6.4241676 12.92739681 3044.87824 5001
## 24 P4S Barometer   scaling  0.3228527 0.083469238  0.1620232  0.48128905   72.41114 5001

We can view the posterior summaries of the pixel variance

output_growth$pixel_error$summary

##   error parameter     mean       sd    lower    upper      ESS  PSS
## 1 pixel  variance 12.08068 2.548511 7.561632 17.31028 35.25218 5001

Let’s view a preview of the posterior summaries of the growth curve parameters, including k, t0, A for males and females, and individual’s birth year.

output_growth$summaries$growth_curve[1:10,]

##                             parameter          mean          sd         lower         upper        ESS  PSS
## 1                     zero_length_age   -6.31087884 0.178573714   -6.59999010   -6.08336690   1.668323 5001
## 2                         growth_rate    0.18366117 0.001650781    0.18050765    0.18656065   3.583295 5001
## 3        Female group_asymptotic_size   12.90739369 0.127315172   12.64422380   13.15208812 136.792747 5001
## 4          Male group_asymptotic_size   12.09146517 0.122350016   11.85525341   12.33455322 577.459902 5001
## 5  Female group_asymptotic_size_trend   -0.18008073 0.086315748   -0.33468881   -0.05831142  50.890964 5001
## 6    Male group_asymptotic_size_trend    0.03375674 0.056447975   -0.05420489    0.14708012  22.131012 5001
## 7                        6 birth_year 1983.43579868 2.435478244 1979.51369403 1984.99976719 103.142927 5001
## 8                       84 birth_year 1986.12884733 5.989339943 1973.40716535 1988.99870115  26.972659 5001
## 9                       87 birth_year 1990.26150797 2.456269926 1985.93629851 1991.99989038 130.553853 5001
## 10                      89 birth_year 1989.45978298 4.770082798 1980.73838239 1991.99940113  47.713679 5001

Let’s now create a data frame with the posterior summaries for each individual’s length. We will first save the total length output summaries, then save the birth year outputs, then combine both of these dataframes by Subject, and then join with whale_info to sync sex and AgeType Finally, we’ll calculate the estimated age from the sample year from the estimated birth year.

# total length summary outputs for each subject
sums_L <- output_growth$summaries$objects

# birth year summary outputs for each subject
birth_year <- output_growth$growth_curve$birth_year$summary %>% rename_with(~str_c("birth_year_", .), everything()) %>%
  separate(birth_year_parameter, c("Subject", "Parameter"), sep = " ") %>% dplyr::select(!"Parameter")

# combine total length and birth year summary outputs. Then join with whale info to get sex, AgeType. Finally, calculated new estimated age. 
sum <- sums_L %>% 
  left_join(birth_year, by = "Subject") %>% mutate(Year = as.integer(Timepoint)) %>%
  left_join(whale_info %>% mutate(Subject = as.factor(Subject)) %>% rename(sex = Group), by = c("Subject", "Year")) %>% 
  relocate(Year, .before = Timepoint) %>% 
  mutate(Age_est_mean = Year - birth_year_mean, 
         Age_est_lower = Year - birth_year_upper, 
         Age_est_upper = Year - birth_year_lower)

Plot Growth Curves

We can view these results and plot the uncertainty associated with total body length (solid vertical line) and estimated age (dashed horizontal line).

ggplot() + theme_bw() + 
  geom_pointrange(data = sum, aes(x = Age_est_mean, y = mean, ymin = lower, ymax = upper)) +
  geom_errorbarh(data = sum, 
                 aes(xmin = Age_est_lower, xmax = Age_est_upper, y = mean), lty = 2) + 
  xlab("estimated age") + ylab("total body length (m)")

plot of chunk Xc_Gr_length_age

Now we’ll calculate the expected growth for male and females separately between ages 1-40. We’ll first save the posterior samplers of the growth parameters and sex-based asymptotic lengths in a dataframe. We’ll then calculate the expected length for male and females at each age using the Von Bertalanffy-Putter growth equation for each MCMC iteration. This will generate a distribution for the expected length at each age for male and females separately. We can then summarize these distributions to get the mean expected length and uncertainty as the 95% HPDI.

# create a dataframe of the output growth parameters
pred_growth <- rbind(data.frame(t0 = output_growth$growth_curve$zero_length_age$samples,
           k = output_growth$growth_curve$growth_rate$samples,
           sex = "Female",
           A = output_growth$growth_curve$group_asymptotic_size$samples[,1]),
           data.frame(
           t0 = output_growth$growth_curve$zero_length_age$samples,
           k = output_growth$growth_curve$growth_rate$samples,
           sex = "Male",
           A = output_growth$growth_curve$group_asymptotic_size$samples[,2]))

# write a loop to calculate the expected length for male and females between ages 1-40 for each MCMC iteration to create a distribution. Then calculate the mean and HPDIs from the distribution for each expected length.
age_list <- seq(from  = 1, to = 40, by = 1)
sex_list <- c("Male", "Female")

age_df <- data.frame()
full_df <- data.frame()

for (s in sex_list){
  s_x = s
for (i in age_list){
  yr0 = i
  growth_filt = pred_growth %>% filter(sex == s_x)
  Exp.L = growth_filt$A * (1-exp(-growth_filt$k * (yr0 - growth_filt$t0)))
  Exp.L_mean = mean(Exp.L)
  Exp.L_lower = HPDinterval(mcmc(Exp.L))[1]
  Exp.L_upper = HPDinterval(mcmc(Exp.L))[2]
  temp_df <- data.frame(age = yr0, sex = s_x, 
                        Exp.L_mean, Exp.L_lower = Exp.L_lower, Exp.L_upper = Exp.L_upper) 
  age_df <- rbind(age_df, temp_df)
}
  full_df <- rbind(full_df, age_df)
}

$nbsp;

Now we can plot the results

ggplot() + theme_bw() + 
  xlab("estimated age") + ylab("total body length (m)") + 
  geom_ribbon(data = full_df, aes(x = age, ymin = Exp.L_lower, ymax = Exp.L_upper, fill = sex), alpha = 0.4) + 
  geom_line(data = full_df, aes(x = age, y = Exp.L_mean, color = sex)) +
  scale_color_manual(values = c(Female = "lightblue3", Male = "darkorange")) +
  scale_fill_manual(values = c(Female = "lightblue3", Male = "darkorange")) +
  geom_pointrange(data = sum %>% filter(!is.na(sex)),
                  aes(x = Age_est_mean, y = mean, ymin = lower, ymax = upper, color = sex)) +
  geom_errorbarh(data = sum, 
                 aes(xmin = Age_est_lower, xmax = Age_est_upper, y = mean, color = sex), lty = 3)

plot of chunk Xc_Gr_growth_curves

Note, you can create custom samplers to use different growth equations (i.e., Gompertz, etc.). See Xcertainty to learn more.