Main analysis: Average Principal Causal Effects (APCE)
To estimate and visualize the APCE using the proposed method, the functions below are used in the following order:
AiEvalmcmc()
: Sample the parameters using Gibbs sampling. See Appendix S5 for more details.CalAPCE()
orCalAPCEparallel()
: Compute APCE for each posterior sample from mcmc.APCEsummary()
: Summarize the results based on the average and 95% quantile of the estimated APCE(s) for each sample from mcmc.PlotAPCE()
: Visualize the results as in Figure 4.
AiEvalmcmc()
AiEvalmcmc()
samples the parameters using Gibbs sampler
specified in Appendix S5. Basically, you need to specify the data and
the number of mcmc iterations (n.mcmc
).
data
: Adata.frame
of which columns consists of pre-treatment covariates, a binary treatment (Z), an ordinal decision (D), and an outcome variable (Y). The column names of the latter three should be specified as “Z”, “D”, and “Y” respectively (e.g.,data(synth)
)n.mcmc
: The total number of MCMC iterations. It should be large enough to guarantee the convergence. You may use Gelman-Rubin statistic for convergence diagnostics. In the paper, we run five Markov chains of 50,000 iterations, retain the second half of each chain and combine them to be used for our analysis. See the replication codes in the later section for more details.
Please check the documentation of the function using the code
?AiEvalmcmc
for other arguments. The example code is as
below:
data(synth)
= AiEvalmcmc(data = synth, n.mcmc = 10)
sample_mcmc #> 10/10 done.
# increase n.mcmc in your analysis
CalAPCE()
or CalAPCEparallel()
CalAPCE()
computes APCE for each posterior sample in the
output of AiEvalmcmc()
. CalAPCEparallel()
uses
parallel computing to reduce its computation time. You may need three
main inputs:
data
: The samedata.frame
that is used forAiEvalmcmc()
.mcmc.re
: The output ofAiEvalmcmc()
.subgroup
: A list of numeric vectors for the index of each of the five subgroups.
For CalAPCEparallel()
, you also need to specify
size
(the number of parallel computing). I recommend you to
check the number of CPU cores in your computer using
parallel::detectCores()
and divide it by 4 to determine the
size
. If the number of CPU cores is 4, use
CalAPCE()
without parallel computing.
Note that the output is in form of list
. Please check
the documentation of the function using the code ?CalAPCE
or ?CalAPCEparallel
for other arguments and the details of
outputs. The example code is as below:
= list(1:nrow(synth),
subgroup_synth which(synth$Sex==0),
which(synth$Sex==1),
which(synth$Sex==1&synth$White==0),
which(synth$Sex==1&synth$White==1))
= CalAPCE(data = synth,
sample_apce mcmc.re = sample_mcmc,
subgroup = subgroup_synth)
# Or using parallel computing (results are the same):
# sample_apce = CalAPCEparallel(data = synth,
# mcmc.re = sample_mcmc,
# subgroup = subgroup_synth,
# burnin = 0,
# size = 2)
APCEsummary()
APCEsummary()
computes the average and 95% quantile of
APCE of each sample. You only need the output of CalAPCE()
or CalAPCEparallel()
.
= APCEsummary(sample_apce[["APCE.mcmc"]]) sample_apce_summary
PlotAPCE()
(Figure 4)
PlotAPCE()
visualizes the results as in Figure 4. See
the example below:
PlotAPCE(sample_apce_summary,
y.max = 0.25,
decision.labels = c("signature","small cash","middle cash","large cash"),
shape.values = c(16, 17, 15, 18),
col.values = c("blue", "black", "red", "brown", "purple"),
label = FALSE)
Replication codes
The codes below is used for the main analysis and the visualization of Figure 4:
## Main analysis
# MCMC
library(coda)
= function(data, rho=0,
PSADiag_ordinal ## mcmc parameters
n.mcmc = 5*10^4, verbose = TRUE, out.length = 500){
set.seed(111)
= AiEvalmcmc(data,rho=rho,n.mcmc=n.mcmc,verbose=TRUE,out.length=out.length)
mcmc1 set.seed(222)
= AiEvalmcmc(data,rho=rho,n.mcmc=n.mcmc,verbose=TRUE,out.length=out.length)
mcmc2 set.seed(333)
= AiEvalmcmc(data,rho=rho,n.mcmc=n.mcmc,verbose=TRUE,out.length=out.length)
mcmc3 set.seed(444)
= AiEvalmcmc(data,rho=rho,n.mcmc=n.mcmc,verbose=TRUE,out.length=out.length)
mcmc4 set.seed(555)
= AiEvalmcmc(data,rho=rho,n.mcmc=n.mcmc,verbose=TRUE,out.length=out.length)
mcmc5
= mcmc.list(mcmc1,mcmc2,mcmc3,mcmc4, mcmc5)
mcmc.PSA return(mcmc.PSA)
}
= PSADiag_ordinal(FTAdata,n.mcmc=50000,verbose=TRUE,out.length=500)
PSA.ordinal.diag.fta = PSADiag_ordinal(NCAdata,n.mcmc=50000,verbose=TRUE,out.length=500)
PSA.ordinal.diag.nca = PSADiag_ordinal(NVCAdata,n.mcmc=50000,verbose=TRUE,out.length=500)
PSA.ordinal.diag.nvca
<- lapply(PSA.ordinal.diag.fta, function(x) x[-(1: floor(0.5*dim(x)[1])),])
FTAmcmc <- do.call("rbind", FTAmcmc)
FTAmcmc <- mcmc(FTAmcmc)
FTAmcmc
<- lapply(PSA.ordinal.diag.nca, function(x) x[-(1: floor(0.5*dim(x)[1])),])
NCAmcmc <- do.call("rbind", NCAmcmc)
NCAmcmc <- mcmc(NCAmcmc)
NCAmcmc
<- lapply(PSA.ordinal.diag.nvca, function(x) x[-(1: floor(0.5*dim(x)[1])),])
NVCAmcmc <- do.call("rbind", NVCAmcmc)
NVCAmcmc <- mcmc(NVCAmcmc)
NVCAmcmc
# APCE
= list(1:nrow(FTAdata),
subg which(FTAdata$Sex==0),
which(FTAdata$Sex==1),
which(FTAdata$Sex==1&FTAdata$White==0),
which(FTAdata$Sex==1&FTAdata$White==1))
<- CalAPCEparallel(FTAdata,FTAmcmc,subgroup=subg,rho=0,burnin=0,size=5)
FTAace <- APCEsummary(FTAace[["APCE.mcmc"]])
FTAqoi
<- CalAPCEparallel(NCAdata,NCAmcmc,subgroup=subg,rho=0,burnin=0,size=5)
NCAace <- APCEsummary(NCAace[["APCE.mcmc"]])
NCAqoi
<- CalAPCEparallel(NVCAdata,NVCAmcmc,subgroup=subg,rho=0,burnin=0,size=5)
NVCAace <- APCEsummary(NVCAace[["APCE.mcmc"]])
NVCAqoi
# Figure 4
pdf("figs/qoi_fta.pdf", width = 9, height = 3)
PlotAPCE(FTAqoi, y.max = 0.166, top.margin = 0.05, bottom.margin = 0.05,
label.position = c("top", "bottom", "top", "bottom")) +
labs(title = "Failure to Appear (FTA)") +
theme(legend.position = "none")
dev.off()
pdf("figs/qoi_nca.pdf", width = 9, height = 3)
PlotAPCE(NCAqoi, y.max = 0.166, label = FALSE) +
labs(title = "New Criminal Activity (NCA)") +
theme(legend.position = "none")
dev.off()
pdf("figs/qoi_nvca.pdf", width = 9, height = 3.5)
PlotAPCE(NVCAqoi, y.max = 0.166, label = FALSE) +
labs(title = "New Violent Criminal Activity (NVCA)")
dev.off()
For subgroup analysis for age groups in Figure S7:
# APCE
= list(which(FTAdata$Age<23),
subg.age which(FTAdata$Age>=23&FTAdata$Age<29),
which(FTAdata$Age>=29&FTAdata$Age<36),
which(FTAdata$Age>=36&FTAdata$Age<46),
which(FTAdata$Age>=46))
= c("17~22", "23~28", "29~35", "36~45", "46~")
subg.age.lab <- CalAPCEparallel(FTAdata,FTAmcmc,subgroup=subg.age,
FTAace.age name.group=subg.age.lab,
rho=0,burnin=0,size=5)
<- APCEsummary(FTAace.age[["APCE.mcmc"]])
FTAqoi.age
<- CalAPCEparallel(NCAdata,NCAmcmc,subgroup=subg.age,
NCAace.age name.group=subg.age.lab,
rho=0,burnin=0,size=5)
<- APCEsummary(NCAace.age[["APCE.mcmc"]])
NCAqoi.age
<- CalAPCEparallel(NVCAdata,NVCAmcmc,subgroup=subg.age,
NVCAace.age name.group=subg.age.lab,
rho=0,burnin=0,size=5)
<- APCEsummary(NVCAace.age[["APCE.mcmc"]])
NVCAqoi.age
# Figure S7
pdf("figs/qoi_fta_age.pdf", width = 9, height = 3)
PlotAPCE(FTAqoi.age, y.max = 0.124, top.margin = 0.03, bottom.margin = 0.03,
label.position = c("top", "bottom", "top", "bottom"),
name.group = subg.age.lab) +
labs(title = "Failure to Appear (FTA)") +
theme(legend.position = "none")
dev.off()
pdf("figs/qoi_nca_age.pdf", width = 9, height = 3)
PlotAPCE(NCAqoi.age, y.max = 0.124, label = FALSE, name.group = subg.age.lab) +
labs(title = "New Criminal Activity (NCA)") +
theme(legend.position = "none")
dev.off()
pdf("figs/qoi_nvca_age.pdf", width = 9, height = 3.5)
PlotAPCE(NVCAqoi.age, y.max = 0.124, label = FALSE, name.group = subg.age.lab) +
labs(title = "New Violent Criminal Activity (NVCA)")
dev.off()