The sample data on migration between Spain, Sweden and the Netherlands were prepared by Claudio Bosco (European Commission), Daniela Ghio (European Commission), Maurizio Teobaldelli (European Commission) and Sabine Zinn (German Socio-Economic Panel, Humboldt University Berlin). EUROSTAT data were used as the data source. The sample was intentionally kept small. MicSim can also handle larger numbers of cases, but to be efficient in terms of run times, this requires a bit more computing power with more CPUs.
# Load library
library(MicSim)
#> Lade nötiges Paket: snowfall
#> Lade nötiges Paket: snow
#> Lade nötiges Paket: rlecuyer
# Defining simulation horizon
startDate <- 20140101 # yyyymmdd
endDate <- 20181231 # yyyymmdd
simHorizon <- c(startDate=startDate, endDate=endDate)
# Seed for random number generator
set.seed(12)
# Definition of maximal age (sharp, i.e. max age is 100.0)
maxAge <- 100
# Definition of state space
sex <- c("m","f")
country_R <- c("NL","ES","SE")
fert <- c("0","1","2","3","4+","999")
stateSpace <- expand.grid(sex=sex,country_R=country_R,fert=fert)
# Definition of nonabsorbing and absorbing states
absStates <- c("dead","rest")
# initial population included in the MicSim package
initPop <- initPopMigrExp
head(initPop)
#> ID birthDate initState
#> 1 1 19411206 m/NL/999
#> 2 2 20070608 m/SE/999
#> 3 3 19750709 m/ES/0
#> 4 4 19220312 f/NL/999
#> 5 5 19740425 f/ES/0
#> 6 6 20110828 m/ES/999
# population of immigrants entering to virtual population over simulation time
immigrPop <- immigrPopMigrExp # included in the MicSim package
head(immigrPop)
#> ID immigrDate birthDate immigrInitState
#> 1 72966 20180925 19830404 m/ES/0
#> 2 72967 20171208 20100319 f/ES/0
#> 3 72968 20151217 19910316 f/NL/0
#> 4 72969 20170105 20070602 m/NL/0
#> 5 72970 20140609 19880623 f/ES/0
#> 6 72971 20171003 19881129 m/NL/0
# Definition of initial states for newborns
fixInitStates <- 2 # give indices for attribute/substate that will be taken over from the mother, here: nat (nationality)
varInitStates <- rbind(c("m","ES","0"), c("f","ES","0"), # to have possibility to define distinct sex ratios in distinct countries,
c("m","NL","0"), c("f","NL","0"), # hold substate that are inherited by mother in the init state (i.e. nationality)
c("m","SE","0"), c("f","SE","0"))
initStatesProb <- c(0.5151,0.4849, # probabilities for female / male newborns for each nationality separately
0.5124,0.4876,
0.5146,0.4854)
Beware: Rates have to be given at least for age [0,maxAge) and for all years within the simulation horizon. At this the exact value of maxAge is excluded, i.e. here 100.00 but not e.g. age=99.9999.
# Load rates from data included in the MicSim package (one column for one transition)
rates <- migrExpRates
# Define function to easily transform rates in function format required by MicSim
require(glue)
#> Lade nötiges Paket: glue
for (i in 1:length(names(rates))) {
script_var = names(rates[i])
eval(parse(text = glue("{script_var} <- unlist(as.numeric(rates[,{i}]))")))
}
# --------------------------------------------------------------------------
# Fertility Rates
# --------------------------------------------------------------------------
fertRates_NL_0_1 <- function(age,calTime, duration){
rate <- fert_NL_0_1[as.integer(age)+1]
return(rate)
}
fertRates_NL_1_2 <- function(age,calTime, duration){
rate <- fert_NL_1_2[as.integer(age)+1]
return(rate)
}
fertRates_NL_2_3 <- function(age,calTime, duration){
rate <- fert_NL_2_3[as.integer(age)+1]
return(rate)
}
fertRates_NL_3_4 <- function(age,calTime, duration){
rate <- fert_NL_3_4[as.integer(age)-+1]
return(rate)
}
fertRates_ES_0_1 <- function(age,calTime, duration){
rate <- fert_ES_0_1[as.integer(age)+1]
return(rate)
}
fertRates_ES_1_2 <- function(age,calTime, duration){
rate <- fert_ES_1_2[as.integer(age)+1]
return(rate)
}
fertRates_ES_2_3 <- function(age,calTime, duration){
rate <- fert_ES_2_3[as.integer(age)+1]
return(rate)
}
fertRates_ES_3_4 <- function(age,calTime, duration){
rate <- fert_ES_3_4[as.integer(age)+1]
return(rate)
}
fertRates_SE_0_1 <- function(age,calTime, duration){
rate <- fert_SE_0_1[as.integer(age)+1]
return(rate)
}
fertRates_SE_1_2 <- function(age,calTime, duration){
rate <- fert_SE_1_2[as.integer(age)+1]
return(rate)
}
fertRates_SE_2_3 <- function(age,calTime, duration){
rate <- fert_SE_2_3[as.integer(age)+1]
return(rate)
}
fertRates_SE_3_4 <- function(age,calTime, duration){
rate <- fert_SE_3_4[as.integer(age)+1]
return(rate)
}
# --------------------------------------------------------------------------
# Internal Migration Rates
# --------------------------------------------------------------------------
`%!in%` <- Negate(`%in%`)
for(i in 1:length(country_R)) {
other_provinces = country_R[which(country_R %!in% country_R[i])]
for(k in 1:length(other_provinces)) {
eq = paste(sprintf('%s_%s_rates', glue("{country_R[i]}"), glue("{other_provinces[k]}")),
'<- function(age,calTime, duration)', '{',
sprintf('rate <- rate_%s_%s[as.integer(age)+1]', glue("{country_R[i]}"),
glue("{other_provinces[k]}")), "\n ", 'return(rate)','}')
eval(parse(text = eq))
}
}
# --------------------------------------------------------------------------
# Mortality Rates
# --------------------------------------------------------------------------
# Female mortality
for(i in 1:length(country_R)) {
eq = paste(sprintf('mortRates_f_%s', glue("{country_R[i]}")), '<- function(age,calTime, duration)',
'{',
sprintf('rate <- mort_f_%s[as.integer(age)+1]', glue("{country_R[i]}")),
"\n ",
'return(rate)',
'}')
eval(parse(text = eq))
}
# Male mortality
for(i in 1:length(country_R)) {
eq = paste(sprintf('mortRates_m_%s', glue("{country_R[i]}")), '<- function(age,calTime, duration)',
'{',
sprintf('rate <- mort_m_%s[as.integer(age)+1]', glue("{country_R[i]}")),
"\n ",
'return(rate)',
'}')
eval(parse(text = eq))
}
# ---------------------------------------------------------------------------
# Emigration rates
# ---------------------------------------------------------------------------
# Emigration rates for females
for(i in 1:length(country_R)) {
eq = paste(sprintf('emigrRates_f_%s', glue("{country_R[i]}")), '<- function(age,calTime, duration)',
'{',
sprintf('rate <- emig_f_%s[as.integer(age)+1]', glue("{country_R[i]}")),
"\n ",
'return(rate)',
'}')
eval(parse(text = eq))
}
# Emigration rates for males
for(i in 1:length(country_R)) {
eq = paste(sprintf('emigrRates_m_%s', glue("{country_R[i]}")), '<- function(age,calTime, duration)',
'{',
sprintf('rate <- emig_m_%s[as.integer(age)+1]', glue("{country_R[i]}")),
"\n ",
'return(rate)',
'}')
eval(parse(text = eq))
}
# ---------------------------------------------------------------------------
# Transition matrix for fertility
# ---------------------------------------------------------------------------
fertTrMatrix <- cbind(c("f/ES/0->f/ES/1","f/ES/1->f/ES/2","f/ES/2->f/ES/3","f/ES/3->f/ES/4+",
"f/SE/0->f/SE/1","f/SE/1->f/SE/2","f/SE/2->f/SE/3","f/SE/3->f/SE/4+",
"f/NL/0->f/NL/1","f/NL/1->f/NL/2","f/NL/2->f/NL/3","f/NL/3->f/NL/4+"),
c("fertRates_ES_0_1", "fertRates_ES_1_2", "fertRates_ES_2_3", "fertRates_ES_3_4",
"fertRates_SE_0_1", "fertRates_SE_1_2", "fertRates_SE_2_3", "fertRates_SE_3_4",
"fertRates_NL_0_1", "fertRates_NL_1_2", "fertRates_NL_2_3", "fertRates_NL_3_4"))
# ---------------------------------------------------------------------------
# Transition matrix for migration
# ---------------------------------------------------------------------------
# First: make names for transition matrix, i.e. stateOfOrigin->stateOfDestination
testo1 <- "intmigTrMatrix <- cbind(c("
for(i in 1:length(country_R)) {
for(m in 1:length(country_R)) {
if(m != i) {
eq1 = paste(sprintf("'%s->%s'", glue("{country_R[i]}"), glue("{country_R[m]}")))
if (i == length(country_R) & m == (i-1)) {
testo1 = paste(testo1,eq1)
} else {
testo1 = paste(testo1 ,eq1, ",")
}
}
if (m == i & m == length(country_R)){
testo1 = paste(testo1, "),")
}
}
}
#Second: set names for transition functions
testo1 = paste (testo1,"c(")
for(i in 1:length(country_R)) {
for(m in 1:length(country_R)) {
if(m != i) {
eq1 = paste(sprintf("'%s_%s_rates'", glue("{country_R[i]}"), glue("{country_R[m]}")))
if (i == length(country_R) & m == (i-1)) {
testo1 = paste(testo1,eq1)
} else {
testo1 = paste(testo1 ,eq1, ",")
}
}
if (m == i & m == length(country_R)){
testo1 = paste(testo1, "))")
}
}
}
eval(parse(text = testo1))
# ---------------------------------------------------------------------------
# Transition matrix for mortality and emigration
# ---------------------------------------------------------------------------
testo <- "absTransitions <- rbind("
for(i in 1:length(country_R)) {
for(m in 1:length(sex)) {
eq1 = paste(sprintf("c('%s/%s/dead', 'mortRates_%s_%s')", glue("{sex[m]}"), glue("{country_R[i]}") ,
glue("{sex[m]}"), glue("{country_R[i]}")),',',
sprintf("c('%s/%s/rest', 'emigrRates_%s_%s')", glue("{sex[m]}"),glue("{country_R[i]}") ,
glue("{sex[m]}"), glue("{country_R[i]}")))
if(i == length(country_R) & m == length(sex)) {
testo = paste(testo, eq1, ")")
} else {
testo = paste(testo,eq1, ",")
}
}
}
eval(parse(text = testo))
# ---------------------------------------------------------------------------
# Combine all
# ---------------------------------------------------------------------------
allTransitions <- rbind(fertTrMatrix, intmigTrMatrix)
transitionMatrix <- buildTransitionMatrix(allTransitions=allTransitions,
absTransitions=absTransitions,
stateSpace=stateSpace)
# ---------------------------------------------------------------------------
# Define transitions triggering a birth event
# ---------------------------------------------------------------------------
fertTr <- fertTrMatrix[,1]
For illustration purpose, the subsequent run is limited to the first 500 people and to 100 migrants. However, just remove the restriction to run the whole sample, i.e. using initPop instead of initPop[1:500,] and immigrPop instead of immigrPop[1:100,].
pop <- micSim(initPop=initPop[1:500,], immigrPop=immigrPop[1:100,],
transitionMatrix=transitionMatrix, absStates=absStates,
varInitStates=varInitStates, initStatesProb=initStatesProb,
fixInitStates=fixInitStates,
maxAge=maxAge, simHorizon=simHorizon,fertTr=fertTr)
#> Initialization ...
#> [1] "Starting at: 2024-01-23 14:07:50.88455"
#> [1] "Ending at: 2024-01-23 14:07:51.858173"
#> Simulation is running ...
#> Year: 2014
#> Year: 2015
#> Year: 2016
#> Year: 2017
#> Year: 2018
#> Simulation has finished.
#> ------------------
head(pop)
#> ID birthDate initState From To transitionTime transitionAge motherID
#> 1 1 19411206 m/NL/999 <NA> <NA> <NA> NA <NA>
#> 112 2 20070608 m/SE/999 <NA> <NA> <NA> NA <NA>
#> 225 3 19750709 m/ES/0 <NA> <NA> <NA> NA <NA>
#> 337 4 19220312 f/NL/999 f/NL/999 dead 20150626 93.29 <NA>
#> 449 5 19740425 f/ES/0 <NA> <NA> <NA> NA <NA>
#> 461 6 20110828 m/ES/999 <NA> <NA> <NA> NA <NA>
popLong <- convertToLongFormat(pop, migr=TRUE)
head(popLong)
#> ID birthDate Tstart Tstop statusEntry statusExit OD ns Episode sex
#> 1 1 19411206 20140101 20181231 0 0 cens 1 1 m
#> 114 2 20070608 20140101 20181231 0 0 cens 1 1 m
#> 231 3 19750709 20140101 20181231 0 0 cens 1 1 m
#> 348 4 19220312 20140101 20150626 0 1 dead 1 1 f
#> 465 5 19740425 20140101 20181231 0 0 cens 1 1 f
#> 478 6 20110828 20140101 20181231 0 0 cens 1 1 m
#> country_R fert
#> 1 NL 999
#> 114 SE 999
#> 231 ES 0
#> 348 NL 999
#> 465 ES 0
#> 478 ES 999
popWide <- convertToWideFormat(pop)
head(popWide)
#> ID birthDate initState ns From.1 To.1 transitionTime.1 transitionAge.1
#> 1 1 19411206 m/NL/999 0 <NA> <NA> <NA> NA
#> 112 2 20070608 m/SE/999 0 <NA> <NA> <NA> NA
#> 225 3 19750709 m/ES/0 0 <NA> <NA> <NA> NA
#> 337 4 19220312 f/NL/999 1 f/NL/999 dead 20150626 93.29
#> 449 5 19740425 f/ES/0 0 <NA> <NA> <NA> NA
#> 461 6 20110828 m/ES/999 0 <NA> <NA> <NA> NA
#> From.2 To.2 transitionTime.2 transitionAge.2
#> 1 <NA> <NA> <NA> NA
#> 112 <NA> <NA> <NA> NA
#> 225 <NA> <NA> <NA> NA
#> 337 <NA> <NA> <NA> NA
#> 449 <NA> <NA> <NA> NA
#> 461 <NA> <NA> <NA> NA
Try this to speed up your simulation run. The more cores you can use the faster the simulation will be executed. This example uses three cores. Give as many seeds as you use cores. That way you can replicate your results.
cores <- 3
seeds <- c(34,145,97)
pop <- micSimParallel(initPop=initPop, immigrPop=immigrPop,
transitionMatrix=transitionMatrix, absStates=absStates,
varInitStates=varInitStates, initStatesProb=initStatesProb,
fixInitStates=fixInitStates,
maxAge=maxAge, simHorizon=simHorizon,fertTr=fertTr,
cores=cores, seeds=seeds)
#> Starting at [1] "2024-01-23 14:07:52 CET"
#> Warning in searchCommandline(parallel, cpus = cpus, type = type, socketHosts =
#> socketHosts, : Unknown option on commandline: tools::buildVignettes(dir
#> R Version: R version 4.3.2 (2023-10-31 ucrt)
#> snowfall 1.84-6.3 initialized (using snow 0.4-4): parallel execution on 3 CPUs.
#>
#> Stopping cluster
#> Stopped at [1] "2024-01-23 14:09:42 CET"
head(pop)
#> ID birthDate initState From To transitionTime transitionAge
#> 1 1 19411206 m/NL/999 <NA> <NA> <NA> NA
#> 11220 2 20070608 m/SE/999 <NA> <NA> <NA> NA
#> 16690 3 19750709 m/ES/0 <NA> <NA> <NA> NA
#> 17812 4 19220312 f/NL/999 f/NL/999 dead 20160929 94.55
#> 18937 5 19740425 f/ES/0 <NA> <NA> <NA> NA
#> 20057 6 20110828 m/ES/999 m/ES/999 rest 20180917 7.05
#> motherID
#> 1 <NA>
#> 11220 <NA>
#> 16690 <NA>
#> 17812 <NA>
#> 18937 <NA>
#> 20057 <NA>