Repeated Measurements

The extract_rcov() function is a practical tool for extracting the residual variance-covariance matrix from a repeated measurement ASReml model. This function is particularly useful when dealing with various covariance structures, including but not limited to the uniform correlation (corv), power or exponential (expv), antedependence (ante), unstructured (US), and Autoregressive Correlation 1 (ar1).

Watch the tutorial: A good guide on fitting repeated measurement models in ASReml by VSNi. However, it might leave you wondering how to actually extract the fitted residual variance-covariance matrix. That’s where extract_rcov() comes into play.

This vignette utilizes the same dataset featured in the video and incorporates a segment of the code to showcase the functionality of extract_rcov(). Additionally, we provide insightful figures that aid in exploring the results.

library(ggpubr)
library(agriutilities)
library(tidyr)
library(dplyr)
library(tibble)

if (requireNamespace("asreml", quietly = TRUE)) {
  library(asreml)
  head(grassUV) %>% print()
  grassUV %>%
    ggplot(
      aes(x = Time, y = y, group = Plant, color = Plant)
    ) +
    geom_point() +
    geom_line() +
    facet_wrap(~Tmt) +
    theme_minimal(base_size = 15)
}
#>   Tmt Plant Time HeightID    y
#> 1 MAV     1    1       y1 21.0
#> 2 MAV     1    3       y3 39.7
#> 3 MAV     1    5       y5 47.0
#> 4 MAV     1    7       y7 53.0
#> 5 MAV     1   10      y10 55.0
#> 6 MAV     2    1       y1 32.0

Exploration

if (requireNamespace("asreml", quietly = TRUE)) {
  tmp <- grassUV %>%
    group_by(Time, Plant) %>%
    summarise(mean = mean(y, na.rm = TRUE)) %>%
    spread(Time, mean) %>%
    column_to_rownames("Plant")

  a <- covcor_heat(
    matrix = cor(tmp),
    legend = "none",
    size = 4.5
  ) +
    ggtitle(label = "Pearson Correlation")

  b <- tmp %>%
    cor(use = "pairwise.complete.obs") %>%
    as.data.frame() %>%
    rownames_to_column(var = "Time") %>%
    gather("DAP2", "corr", -1) %>%
    type.convert(as.is = FALSE) %>%
    mutate(corr = ifelse(Time < DAP2, NA, corr)) %>%
    mutate(DAP2 = as.factor(DAP2)) %>%
    ggplot(
      aes(x = Time, y = corr, group = DAP2, color = DAP2)
    ) +
    geom_point() +
    geom_line() +
    theme_minimal(base_size = 15) +
    color_palette(palette = "jco") +
    labs(color = "Time", y = "Pearson Correlation") +
    theme(legend.position = "top")
  ggarrange(a, b)
}

Modeling

Let’s fit several models with different variance-covariance structures:

if (requireNamespace("asreml", quietly = TRUE)) {
  # Identity variance model.
  model_0 <- asreml(
    fixed = y ~ Time + Tmt + Tmt:Time,
    residual = ~ id(Plant):idv(Time),
    data = grassUV
  )

  # Simple correlation model; homogeneous variance form.
  model_1 <- asreml(
    fixed = y ~ Time + Tmt + Tmt:Time,
    residual = ~ id(Plant):corv(Time),
    data = grassUV
  )

  # Exponential (or power) model; homogeneous variance form.
  model_2 <- asreml(
    fixed = y ~ Time + Tmt + Tmt:Time,
    residual = ~ id(Plant):expv(Time),
    data = grassUV
  )

  # Exponential (or power) model; heterogeneous variance form.
  model_3 <- asreml(
    fixed = y ~ Time + Tmt + Tmt:Time,
    residual = ~ id(Plant):exph(Time),
    data = grassUV
  )

  # Antedependence variance model of order 1
  model_4 <- asreml(
    fixed = y ~ Time + Tmt + Tmt:Time,
    residual = ~ id(Plant):ante(Time),
    data = grassUV
  )

  # Autoregressive model of order 1; homogeneous variance form.
  model_5 <- asreml(
    fixed = y ~ Time + Tmt + Tmt:Time,
    residual = ~ id(Plant):ar1v(Time),
    data = grassUV
  )

  # Autoregressive model of order 1; heterogeneous variance form.
  model_6 <- asreml(
    fixed = y ~ Time + Tmt + Tmt:Time,
    residual = ~ id(Plant):ar1h(Time),
    data = grassUV
  )

  # Unstructured variance model.
  model_7 <- asreml(
    fixed = y ~ Time + Tmt + Tmt:Time,
    residual = ~ id(Plant):us(Time),
    data = grassUV
  )
}

Model Comparison

We can use the Akaike Information Criterion (AIC)(Akaike, 1974) or the Bayesian Information Criterion (BIC)(Stone, 1979) for comparing the fitted models. A lower AIC or BIC value indicates a better fit.

if (requireNamespace("asreml", quietly = TRUE)) {
  models <- list(
    "idv" = model_0,
    "corv" = model_1,
    "expv" = model_2,
    "exph" = model_3,
    "ante" = model_4,
    "ar1v" = model_5,
    "ar1h" = model_6,
    "us" = model_7
  )

  summary_models <- data.frame(
    model = names(models),
    aic = unlist(lapply(models, function(x) summary(x)$aic)),
    bic = unlist(lapply(models, function(x) summary(x)$bic)),
    loglik = unlist(lapply(models, function(x) summary(x)$loglik)),
    nedf = unlist(lapply(models, function(x) summary(x)$nedf)),
    param = unlist(lapply(models, function(x) attr(summary(x)$aic, "param"))),
    row.names = NULL
  )

  summary_models %>% print()

  summary_models %>%
    ggplot(
      aes(x = reorder(model, -bic), y = bic, group = 1)
    ) +
    geom_point(size = 2) +
    geom_text(aes(x = model, y = bic + 5, label = param), size = 5) +
    geom_line() +
    theme_minimal(base_size = 15) +
    labs(x = NULL, y = "BIC")
}
#>   model      aic      bic    loglik nedf param
#> 1   idv 420.8836 422.9779 -209.4418   60     1
#> 2  corv 397.7535 401.9422 -196.8768   60     2
#> 3  expv 369.9577 374.1464 -182.9788   60     2
#> 4  exph 354.9984 367.5645 -171.4992   60     6
#> 5  ante 338.7387 357.5878 -160.3694   60     9
#> 6  ar1v 366.1259 370.3146 -181.0630   60     2
#> 7  ar1h 351.1073 363.6734 -169.5536   60     6
#> 8    us 346.0712 377.4863 -158.0356   60    15

In this specific scenario, the antedependence model emerges as the optimal choice, as indicated by the Bayesian Information Criteria (BIC). The 1-factor antedependence structure elegantly models the variance-covariance matrix \(\Sigma^{\omega \times\omega}\) with the following decomposition:

\[ \Sigma ^{-1} = UDU' \] where \(U^{\omega \times\omega}\) is a unit upper triangular matrix and \(D = diag(d_1, ..., d_{\omega})\). The extract_rcov() retrieves these matrices for a closer inspection of the results.

Extracting Variance Covariance Matrix

As an example of extracting the variance-covariance matrix, let’s take the best model according to the BIC, and run this line of code: extract_rcov(model_4).

if (requireNamespace("asreml", quietly = TRUE)) {
  mat <- extract_rcov(model_4, time = "Time", plot = "Plant")
  print(mat)

  # Plotting  Matrix
  p1 <- covcor_heat(matrix = mat$corr, legend = "none", size = 4.5) +
    ggtitle(label = "Correlation Matrix (ante)")
  p1

  p2 <- covcor_heat(
    matrix = mat$vcov,
    corr = FALSE,
    legend = "none",
    size = 4.5,
    digits = 1
  ) +
    ggtitle(label = "Covariance Matrix (ante)")
  p2
  ggarrange(p1, p2)
}
#> $corr_mat
#>            1         3         5         7        10
#> 1  1.0000000 0.5949600 0.3551374 0.3117083 0.3042604
#> 3  0.5949600 1.0000000 0.5969097 0.5239148 0.5113965
#> 5  0.3551374 0.5969097 1.0000000 0.8777119 0.8567400
#> 7  0.3117083 0.5239148 0.8777119 1.0000000 0.9761062
#> 10 0.3042604 0.5113965 0.8567400 0.9761062 1.0000000
#> 
#> $vcov_mat
#>           1        3         5         7        10
#> 1  37.22092 23.38866  34.87475  44.65979  43.22207
#> 3  23.38866 41.51911  61.90901  79.27923  76.72701
#> 5  34.87475 61.90901 259.08503 331.77822 321.09738
#> 7  44.65979 79.27923 331.77822 551.50497 533.75052
#> 10 43.22207 76.72701 321.09738 533.75052 542.16700
#> 
#> $vc
#> [1] "ante"
#> 
#> $U
#>    1         3         5         7         10
#> 1  1 -0.628374  0.000000  0.000000  0.0000000
#> 3  0  1.000000 -1.491097  0.000000  0.0000000
#> 5  0  0.000000  1.000000 -1.280577  0.0000000
#> 7  0  0.000000  0.000000  1.000000 -0.9678073
#> 10 0  0.000000  0.000000  0.000000  1.0000000
#> 
#> $D
#>             1          3           5           7         10
#> 1  0.02686661 0.00000000 0.000000000 0.000000000 0.00000000
#> 3  0.00000000 0.03728243 0.000000000 0.000000000 0.00000000
#> 5  0.00000000 0.00000000 0.005996185 0.000000000 0.00000000
#> 7  0.00000000 0.00000000 0.000000000 0.007896552 0.00000000
#> 10 0.00000000 0.00000000 0.000000000 0.000000000 0.03906346

Comparison

The plot below unveils the correlation matrices, comparing the raw data matrix with the one derived post-application of the antedependence model.

if (requireNamespace("asreml", quietly = TRUE)) {
  ggarrange(a, p1)
}

Final Results

if (requireNamespace("asreml", quietly = TRUE)) {
  pvals <- predict(model_4, classify = "Tmt:Time")$pvals
  grassUV %>%
    ggplot(
      aes(x = Time, y = y, group = Tmt, color = Tmt)
    ) +
    geom_point(alpha = 0.5, size = 3) +
    geom_line(data = pvals, mapping = aes(y = predicted.value)) +
    scale_color_manual(values = c("red", "grey50")) +
    facet_wrap(~Tmt) +
    theme_minimal(base_size = 15) +
    theme(legend.position = "none")
}