This brief vignette shows an example of a basic workflow selecting recordings for different times of day by site and year.
First we’ll load the packages we want to work with
Next we’ll prepare our metadata on the recordings, by cleaning, adding site-level information and calculating the time to sunrise/sunset for each file. We’ll also define recordings as either ‘early’ (occurring before 6am) or ‘late’ (occurring after 6am).
s <- clean_site_index(example_sites_clean,
name_date = c("date_time_start", "date_time_end")
)
m <- clean_metadata(project_files = example_files) |>
add_sites(s) |>
calc_sun() |>
mutate(
time_period = if_else(hour(date_time) < 6, "early", "late"),
year = year(date)
)
#> Extracting ARU info...
#> Extracting Dates and Times...
#> Joining by columns `date_time_start` and `date_time_end`
m
#> # A tibble: 42 × 18
#> file_name type path aru_id manufacturer model aru_type site_id tz_offset
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 P01_1_202005… wav a_BA… BARLT… Frontier La… BAR-… BARLT P01_1 -0400
#> 2 P01_1_202005… wav a_BA… BARLT… Frontier La… BAR-… BARLT P01_1 -0400
#> 3 P02_1_202005… wav a_S4… S4A01… Wildlife Ac… Song… SongMet… P02_1 <NA>
#> 4 P02_1_202005… wav a_S4… S4A01… Wildlife Ac… Song… SongMet… P02_1 <NA>
#> # ℹ 38 more rows
#> # ℹ 9 more variables: date_time <dttm>, date <date>, longitude <dbl>,
#> # latitude <dbl>, tz <chr>, t2sr <dbl>, t2ss <dbl>, time_period <chr>,
#> # year <dbl>
Time to do some sampling!
First we define the selection parameters for each time frame we’re interested in sampling. This might be “dawn” and “dusk”, or in this example, “early” and “late” morning.
This function will also simulate the selection weights so we can see what we’ve defined.
p <- list(
"early" = sim_selection_weights(min_range = c(-70, 240)),
"late" = sim_selection_weights(min_range = c(100, 300), min_mean = 200)
)
p
#> $early
#> $early$min_range
#> [1] -70 240
#>
#> $early$min_mean
#> [1] 30
#>
#> $early$min_sd
#> [1] 60
#>
#> $early$day_range
#> [1] 120 201
#>
#> $early$day_mean
#> [1] 161
#>
#> $early$day_sd
#> [1] 20
#>
#> $early$offset
#> [1] 0
#>
#> $early$return_log
#> [1] TRUE
#>
#> $early$selection_fun
#> [1] "norm"
#>
#>
#> $late
#> $late$min_range
#> [1] 100 300
#>
#> $late$min_mean
#> [1] 200
#>
#> $late$min_sd
#> [1] 60
#>
#> $late$day_range
#> [1] 120 201
#>
#> $late$day_mean
#> [1] 161
#>
#> $late$day_sd
#> [1] 20
#>
#> $late$offset
#> [1] 0
#>
#> $late$return_log
#> [1] TRUE
#>
#> $late$selection_fun
#> [1] "norm"
Now we can calculate selection weights
Here we’ll calculate a separate set of selection weights for early and late recordings in each year. Then we’ll group recordings by site, year, and time period.
w <- m |>
nest(data = c(-time_period, -year)) |>
mutate(
params = p,
sel = map2(data, params, calc_selection_weights)
) |>
unnest(sel) |>
select(-"data", -"params") |>
mutate(selection_group = glue("{site_id}_{year}_{time_period}"))
w
#> # A tibble: 21 × 27
#> time_period year file_name type path aru_id manufacturer model aru_type
#> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 early 2020 P01_1_202005… wav a_BA… BARLT… Frontier La… BAR-… BARLT
#> 2 early 2020 P02_1_202005… wav a_S4… S4A01… Wildlife Ac… Song… SongMet…
#> 3 early 2020 P06_1_202005… wav a_BA… BARLT… Frontier La… BAR-… BARLT
#> 4 early 2020 P07_1_202005… wav a_S4… S4A01… Wildlife Ac… Song… SongMet…
#> # ℹ 17 more rows
#> # ℹ 18 more variables: site_id <chr>, tz_offset <chr>, date_time <dttm>,
#> # date <date>, longitude <dbl>, latitude <dbl>, tz <chr>, t2sr <dbl>,
#> # t2ss <dbl>, doy <dbl>, psel_by <chr>, psel_min <dbl>, psel_doy <dbl>,
#> # psel <dbl>, psel_scaled <dbl>, psel_std <dbl>, psel_normalized <dbl>,
#> # selection_group <glue>
This w
data sets contains the original sampling
recordings, but now also new columns containing various measures of the
probability of selection.
We’ll define the number of samples we’d like to have.
n <- w |>
summarize(n_recordings = n(), .by = c("selection_group", "time_period")) |>
mutate(
n = if_else(time_period == "early", 5, 2),
n_os = if_else(time_period == "early", floor(n * 1 / 3), floor(n * 1 / 4)),
n_os = pmax(0, pmin(n_recordings - n, round(n / 3))),
n = pmin(n, n_recordings)
)
n
#> # A tibble: 7 × 5
#> selection_group time_period n_recordings n n_os
#> <glue> <chr> <int> <dbl> <dbl>
#> 1 P01_1_2020_early early 3 3 0
#> 2 P02_1_2020_early early 3 3 0
#> 3 P06_1_2020_early early 3 3 0
#> 4 P07_1_2020_early early 3 3 0
#> # ℹ 3 more rows
And finally sample the recordings!
g <- sample_recordings(w, n,
col_site_id = selection_group,
col_sel_weights = psel_normalized
)
g
#> Summary of Site Counts:
#>
#> siteuse by total:
#> Base Over
#> total 19 2
#>
#> siteuse by stratum:
#> Base Over
#> P01_1_2020_early 3 0
#> P02_1_2020_early 3 0
#> P03_1_2020_late 2 1
#> P06_1_2020_early 3 0
#> P07_1_2020_early 3 0
#> P08_1_2020_late 2 1
#> P09_1_2020_early 3 0
The recordings selected for sampling…
g$sites_base
#> Simple feature collection with 19 features and 35 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: 124 ymin: -53.21667 xmax: 132 ymax: 238.3167
#> Projected CRS: WGS 84 / World Mercator
#> First 10 features:
#> siteID siteuse replsite lon_WGS84 lat_WGS84 stratum wgt ip
#> 1 sample-01 Base None 0.001113911 -4.812753e-04 P01_1_2020_early 1 1
#> 2 sample-02 Base None 0.001113911 -4.812753e-04 P01_1_2020_early 1 1
#> 3 sample-03 Base None 0.001113911 -4.812753e-04 P01_1_2020_early 1 1
#> 4 sample-04 Base None 0.001122894 -4.273146e-04 P02_1_2020_early 1 1
#> 5 sample-05 Base None 0.001122894 -4.273146e-04 P02_1_2020_early 1 1
#> 6 sample-06 Base None 0.001122894 -4.273146e-04 P02_1_2020_early 1 1
#> 7 sample-07 Base None 0.001167810 3.240657e-05 P06_1_2020_early 1 1
#> 8 sample-08 Base None 0.001167810 3.240657e-05 P06_1_2020_early 1 1
#> 9 sample-09 Base None 0.001167810 3.240657e-05 P06_1_2020_early 1 1
#> 10 sample-10 Base None 0.001167810 -3.701886e-04 P07_1_2020_early 1 1
#> caty aux time_period year file_name type
#> 1 None 0.001 early 2020 P01_1_20200503T052000-0400_ARU.wav wav
#> 2 None 0.001 early 2020 P01_1_20200503T052000-0400_ARU.wav wav
#> 3 None 0.001 early 2020 P01_1_20200503T052000-0400_ARU.wav wav
#> 4 None 0.001 early 2020 P02_1_20200504T052500_ARU.wav wav
#> 5 None 0.001 early 2020 P02_1_20200504T052500_ARU.wav wav
#> 6 None 0.001 early 2020 P02_1_20200504T052500_ARU.wav wav
#> 7 None 0.001 early 2020 P06_1_20200509T052000-0400_ARU.wav wav
#> 8 None 0.001 early 2020 P06_1_20200509T052000-0400_ARU.wav wav
#> 9 None 0.001 early 2020 P06_1_20200509T052000-0400_ARU.wav wav
#> 10 None 0.001 early 2020 P07_1_20200509T052500_ARU.wav wav
#> path aru_id
#> 1 a_BARLT10962_P01_1/P01_1_20200503T052000-0400_ARU.wav BARLT10962
#> 2 j_BARLT10962_P01_1/P01_1_20200503T052000-0400_ARU.wav BARLT10962
#> 3 o_BARLT10962_P01_1/P01_1_20200503T052000-0400_ARU.wav BARLT10962
#> 4 a_S4A01234_P02_1/P02_1_20200504T052500_ARU.wav S4A01234
#> 5 j_S4A01234_P02_1/P02_1_20200504T052500_ARU.wav S4A01234
#> 6 o_S4A01234_P02_1/P02_1_20200504T052500_ARU.wav S4A01234
#> 7 a_BARLT10962_P06_1/P06_1_20200509T052000-0400_ARU.wav BARLT10962
#> 8 j_BARLT10962_P06_1/P06_1_20200509T052000-0400_ARU.wav BARLT10962
#> 9 o_BARLT10962_P06_1/P06_1_20200509T052000-0400_ARU.wav BARLT10962
#> 10 a_S4A01234_P07_1/P07_1_20200509T052500_ARU.wav S4A01234
#> manufacturer model aru_type site_id tz_offset
#> 1 Frontier Labs BAR-LT BARLT P01_1 -0400
#> 2 Frontier Labs BAR-LT BARLT P01_1 -0400
#> 3 Frontier Labs BAR-LT BARLT P01_1 -0400
#> 4 Wildlife Acoustics Song Meter 4 SongMeter P02_1 <NA>
#> 5 Wildlife Acoustics Song Meter 4 SongMeter P02_1 <NA>
#> 6 Wildlife Acoustics Song Meter 4 SongMeter P02_1 <NA>
#> 7 Frontier Labs BAR-LT BARLT P06_1 -0400
#> 8 Frontier Labs BAR-LT BARLT P06_1 -0400
#> 9 Frontier Labs BAR-LT BARLT P06_1 -0400
#> 10 Wildlife Acoustics Song Meter 4 SongMeter P07_1 <NA>
#> date_time date longitude latitude tz t2ss
#> 1 2020-05-03 05:20:00 2020-05-03 -85.03 50.01 America/Toronto 498.4167
#> 2 2020-05-03 05:20:00 2020-05-03 -85.03 50.01 America/Toronto 498.4167
#> 3 2020-05-03 05:20:00 2020-05-03 -85.03 50.01 America/Toronto 498.4167
#> 4 2020-05-04 05:25:00 2020-05-04 -87.45 52.68 America/Toronto 483.4167
#> 5 2020-05-04 05:25:00 2020-05-04 -87.45 52.68 America/Toronto 483.4167
#> 6 2020-05-04 05:25:00 2020-05-04 -87.45 52.68 America/Toronto 483.4167
#> 7 2020-05-09 05:20:00 2020-05-09 -90.08 52.00 America/Winnipeg 521.9333
#> 8 2020-05-09 05:20:00 2020-05-09 -90.08 52.00 America/Winnipeg 521.9333
#> 9 2020-05-09 05:20:00 2020-05-09 -90.08 52.00 America/Winnipeg 521.9333
#> 10 2020-05-09 05:25:00 2020-05-09 -86.03 50.45 America/Toronto 488.7500
#> psel_by psel_min psel_doy psel psel_scaled psel_std psel_normalized
#> 1 t2sr -0.5359972 -0.9351720 0.2296568 0.8502302 1 0.001
#> 2 t2sr -0.5359972 -0.9351720 0.2296568 0.8502302 1 0.001
#> 3 t2sr -0.5359972 -0.9351720 0.2296568 0.8502302 1 0.001
#> 4 t2sr -0.5240265 -0.9200039 0.2359748 0.8736204 1 0.001
#> 5 t2sr -0.5240265 -0.9200039 0.2359748 0.8736204 1 0.001
#> 6 t2sr -0.5240265 -0.9200039 0.2359748 0.8736204 1 0.001
#> 7 t2sr -0.4585242 -0.8503970 0.2701113 1.0000000 1 0.001
#> 8 t2sr -0.4585242 -0.8503970 0.2701113 1.0000000 1 0.001
#> 9 t2sr -0.4585242 -0.8503970 0.2701113 1.0000000 1 0.001
#> 10 t2sr -0.5129536 -0.8503970 0.2558023 0.9470254 1 0.001
#> selection_group geometry
#> 1 P01_1_2020_early POINT (124 -53.21667)
#> 2 P01_1_2020_early POINT (124 -53.21667)
#> 3 P01_1_2020_early POINT (124 -53.21667)
#> 4 P02_1_2020_early POINT (125 -47.25)
#> 5 P02_1_2020_early POINT (125 -47.25)
#> 6 P02_1_2020_early POINT (125 -47.25)
#> 7 P06_1_2020_early POINT (130 3.583333)
#> 8 P06_1_2020_early POINT (130 3.583333)
#> 9 P06_1_2020_early POINT (130 3.583333)
#> 10 P07_1_2020_early POINT (130 -40.93333)