I'm moving to Apollo after using STATA and am struggling with my most simple MNL model. I've noticed a discrepancy between the Apollo and STATA results, but only with the opt-out parameter. When I switch the reference level for my 'b_times' variable from 'b_7times' to 'b_2times', the opt-out parameter's sign flips, indicating a likely error in my code. I suspect I'm miscoding the opt-out and would greatly appreciate any help with this.
ref: b_7times
Code: Select all
rm(list=ls())
### DCE DATA ANALYSIS-----
library(apollo)
library(dplyr)
library(knitr)
### Initialise code
apollo_initialise()
### Set core controls
apollo_control = list(
modelName = "MNL_DCE_Surgeryb_7times",
modelDescr = "Simple MNL model on UC surgery with med optout and all categorical",
indivID = "X",
outputDirectory = "output"
)
### Loading data from package
database <- read.csv("apollodata.csv")
## DEFINE MODEL PARAMETERS
### Vector of parameters, including any that are kept fixed in estimation
apollo_beta=c(asc_medoptout=0,
b_mild = 0,
b_moderate = 0,
b_high = 0,
b_7times = 0,
b_6times = 0,
b_5times = 0,
b_4times = 0,
b_3times = 0,
b_2times = 0,
b_hurry = 0,
b_15min = 0,
b_delay = 0,
b_nostoma = 0,
b_tempstoma = 0,
b_revstoma = 0,
b_permstoma = 0,
b_norisk = 0,
b_10y5 = 0,
b_10y10 = 0,
b_10y20 = 0,
b_s5 = 0,
b_s10 = 0,
b_s20 = 0,
b_s30 = 0,
b_w19 = 0,
b_w25 = 0,
b_w40 = 0,
b_w43 = 0)
### Vector with names (in quotes) of parameters to be kept fixed at their starting value in apollo_beta, use apollo_beta_fixed = c() if none
apollo_fixed = c( "b_mild" , "b_7times", "b_hurry" , "b_nostoma" , "b_norisk" , "b_s5", "b_w19")
#### GROUP AND VALIDATE INPUTS ---
apollo_inputs = apollo_validateInputs()
#### DEFINE MODEL AND LIKELIHOOD FUNCTION ---
## this code is standard and always the same
apollo_probabilities=function(apollo_beta, apollo_inputs, functionality="estimate"){
### Attach inputs and detach after function exit
apollo_attach(apollo_beta, apollo_inputs)
on.exit(apollo_detach(apollo_beta, apollo_inputs))
### Create list of probabilities P
P = list()
##write utility function for each of the alternatives
### List of utilities: these must use the same names as in mnl_settings, order is irrelevant
V = list()
V[["SurgeryA"]] = b_mild * (`A1_1`==0) + b_moderate * (`A1_1`==1) + b_high * (`A1_1`==2) +
b_7times * (`A1_2`==0) + b_6times * (`A1_2`==1) + b_5times * (`A1_2`==2) + b_4times * (`A1_2`==3)+ b_3times * (`A1_2`==4)+ b_2times * (`A1_2`==5) +
b_hurry * (`A1_3`==0) + b_15min * (`A1_3`==1) + b_delay * (`A1_3`==2) +
b_nostoma * (`A1_4`==0) + b_tempstoma * (`A1_4`==1) + b_revstoma * (`A1_4`==2) + b_permstoma * (`A1_4`==3) +
b_norisk * (`A1_5`==0) + b_10y5 * (`A1_5`==1) + b_10y10 * (`A1_5`==2) + b_10y20 * (`A1_5`==3) +
b_s5 * (`A1_6`==0) + b_s10 * (`A1_6`==1) + b_s20 * (`A1_6`==2) + b_s30 * (`A1_6`==3) +
b_w19 * (`A1_7`==0) + b_w25* (`A1_7`==1) + b_w40 * (`A1_7`==2) + b_w43 * (`A1_7`==3)
V[["SurgeryB"]] = b_mild * (`A2_1`==0) + b_moderate * (`A2_1`==1) + b_high * (`A2_1`==2) +
b_7times * (`A2_2`==0) + b_6times * (`A2_2`==1) + b_5times * (`A2_2`==2) + b_4times * (`A2_2`==3)+ b_3times * (`A2_2`==4)+ b_2times * (`A2_2`==5) +
b_hurry * (`A2_3`==0) + b_15min * (`A2_3`==1) + b_delay * (`A2_3`==2) +
b_nostoma * (`A2_4`==0) + b_tempstoma* (`A2_4`==1) + b_revstoma *(`A2_4`==2) + b_permstoma * (`A2_4`==3) +
b_norisk * (`A2_5`==0) + b_10y5 * (`A2_5`==1) + b_10y10 * (`A2_5`==2) + b_10y20 * (`A2_5`==3) +
b_s5 * (`A2_6`==0) + b_s10 * (`A2_6`==1) + b_s20 * (`A2_6`==2) + b_s30 * (`A2_6`==3) +
b_w19 * (`A2_7`==0) + b_w25* (`A2_7`==1) + b_w40* (`A2_7`==2) + b_w43* (`A2_7`==3)
V[['medication']] = asc_medoptout
### Define settings for MNL model component
mnl_settings = list(
alternatives = c(SurgeryA="Surgery A", SurgeryB="Surgery B",medication="Medication"),
avail = 1,
choiceVar = choice,
utilities = V
)
##now we have finished creating all the inputs for the model
### Compute probabilities using MNL model
##putting the probabilities inside a list called P with a element called model
P[["model"]] = apollo_mnl(mnl_settings, functionality)
## if the data set has 1000 respondents, then it would calculate 1000 probabilities
##essentially the program is collapsing all observations for each ID. this is looking at the likelihood of the
## sequence of choices that each person makes
### Take product across observation for same individual
P = apollo_panelProd(P, apollo_inputs, functionality)
##this line is always there, just preparing the output
### Prepare and return outputs of function
P = apollo_prepareProb(P, apollo_inputs, functionality)
return(P)
}
apollo_probabilities(apollo_beta, apollo_inputs, functionality="estimate")
# ################################################################# #
#### MODEL ESTIMATION ####
# ################################################################# #
model = apollo_estimate(apollo_beta, apollo_fixed, apollo_probabilities, apollo_inputs)
# ################################################################# #
#### MODEL OUTPUTS ####
# ################################################################# #
# ----------------------------------------------------------------- #
#---- FORMATTED OUTPUT (TO SCREEN) ----
# ----------------------------------------------------------------- #
#model <- readRDS("output/MNL_DCE_Surgery_model.rds")
result_df <- data.frame(apollo_modelOutput(model, modelOutput_settings = list(printPVal=2)))
# ----------------------------------------------------------------- #
#---- FORMATTED OUTPUT (TO FILE, using model name) ----
# ----------------------------------------------------------------- #
##this saves the output
#apollo_saveOutput(model, saveOutput_settings = list(printPVal=2))
# ----------------------------------------------------------------- #
#---- format output to qmd ----
# ----------------------------------------------------------------- #
result_df <- result_df %>%
mutate(Parameter = c(
"Medication",
"QoL: Mild",
"QoL: Moderate",
"QoL: High",
"Bowel frequency: 7 times",
"Bowel frequency: 6 times",
"Bowel frequency: 5 times",
"Bowel frequency: 4 times",
"Bowel frequency: 3 times",
"Bowel frequency: 2 times",
"Bowel urgency: Need to hurry",
"Bowel urgency: Delay for 15 mins",
"Bowel urgency: Delay as long as needed",
"No stoma",
"Temporary stoma",
"Reversible stoma",
"Permanent stoma",
"10 year stoma risk: No risk-5%",
"10 year stoma risk: 5%",
"10 year stoma risk: 10%",
"10 year stoma risk: 20%",
"Short-term risks: 5%-10%",
"Short-term risks: 10%",
"Short-term risks: 20%",
"Short-term risks: 30%",
"Fertility: Women:19%; Men:0%",
"Fertility: Women:25%; Men:0%",
"Fertility: Women:40%; Men:0%",
"Fertility: Women:43%; Men:15%"
))%>%
mutate(Parameter = factor(Parameter, levels = rev(Parameter))) # Convert to factor with reversed levels
knitr::kable(result_df)
# ----------------------------------------------------------------- #
#---- switch off writing to file ----
# ----------------------------------------------------------------- #
apollo_sink()
Code: Select all
Model name : MNL_DCE_Surgeryb_7times
Model description : Simple MNL model on UC surgery with med optout and all categorical
Model run at : 2025-03-19 11:53:12.742942
Estimation method : bgw
Model diagnosis : Relative function convergence
Optimisation diagnosis : Maximum found
hessian properties : Negative definite
maximum eigenvalue : -19.813778
reciprocal of condition number : 0.0120985
Number of individuals : 350
Number of rows in database : 3150
Number of modelled outcomes : 3150
Number of cores used : 1
Model without mixing
LL(start) : -3460.63
LL at equal shares, LL(0) : -3460.63
LL at observed shares, LL(C) : -3417.55
LL(final) : -3035.66
Rho-squared vs equal shares : 0.1228
Adj.Rho-squared vs equal shares : 0.1164
Rho-squared vs observed shares : 0.1117
Adj.Rho-squared vs observed shares : 0.1059
AIC : 6115.33
BIC : 6248.54
Estimated parameters : 22
Time taken (hh:mm:ss) : 00:00:4.13
pre-estimation : 00:00:0.72
estimation : 00:00:2.35
post-estimation : 00:00:1.05
Iterations : 8
Unconstrained optimisation.
Estimates:
Estimate s.e. t.rat.(0) p(2-sided) Rob.s.e. Rob.t.rat.(0) p(2-sided)
asc_medoptout 0.602902 0.14504 4.15668 3.229e-05 0.17089 3.52798 4.1875e-04
b_mild 0.000000 NA NA NA NA NA NA
b_moderate 0.410471 0.06616 6.20465 5.482e-10 0.06719 6.10879 1.004e-09
b_high 0.561723 0.07473 7.51652 5.618e-14 0.08108 6.92804 4.267e-12
b_7times 0.000000 NA NA NA NA NA NA
b_6times -0.005449 0.11095 -0.04911 0.960829 0.12290 -0.04434 0.964634
b_5times 0.300900 0.10161 2.96119 0.003065 0.09310 3.23185 0.001230
b_4times 0.582452 0.10622 5.48362 4.167e-08 0.10877 5.35485 8.563e-08
b_3times 0.716727 0.10749 6.66784 2.596e-11 0.11659 6.14725 7.884e-10
b_2times 0.818915 0.11124 7.36202 1.812e-13 0.10800 7.58227 3.397e-14
b_hurry 0.000000 NA NA NA NA NA NA
b_15min 0.915395 0.08566 10.68687 0.000000 0.09306 9.83632 0.000000
b_delay 1.292746 0.08098 15.96431 0.000000 0.09582 13.49087 0.000000
b_nostoma 0.000000 NA NA NA NA NA NA
b_tempstoma -0.244353 0.07314 -3.34073 8.3559e-04 0.06749 -3.62063 2.9388e-04
b_revstoma -0.506400 0.07049 -7.18401 6.770e-13 0.06841 -7.40206 1.341e-13
b_permstoma -1.488192 0.13067 -11.38883 0.000000 0.13769 -10.80860 0.000000
b_norisk 0.000000 NA NA NA NA NA NA
b_10y5 -0.043169 0.08083 -0.53405 0.593309 0.07523 -0.57380 0.566101
b_10y10 -0.266822 0.08619 -3.09565 0.001964 0.09531 -2.79956 0.005117
b_10y20 -0.324723 0.08734 -3.71785 2.0092e-04 0.08373 -3.87838 1.0516e-04
b_s5 0.000000 NA NA NA NA NA NA
b_s10 0.052309 0.08580 0.60968 0.542073 0.08537 0.61276 0.540033
b_s20 -0.239892 0.08063 -2.97519 0.002928 0.08261 -2.90398 0.003684
b_s30 -0.267704 0.08163 -3.27945 0.001040 0.08170 -3.27657 0.001051
b_w19 0.000000 NA NA NA NA NA NA
b_w25 -0.056392 0.08378 -0.67309 0.500892 0.08617 -0.65446 0.512813
b_w40 -0.200839 0.08005 -2.50883 0.012113 0.07892 -2.54480 0.010934
b_w43 -0.259383 0.08002 -3.24168 0.001188 0.07491 -3.46241 5.3537e-04Code: Select all
### DCE DATA ANALYSIS-----
rm(list=ls())
library(apollo)
library(dplyr)
library(knitr)
### Initialise code
apollo_initialise()
### Set core controls
apollo_control = list(
modelName = "MNL_DCE_Surgeryb_2times",
modelDescr = "Simple MNL model on surgery vs med optout and all categorical",
indivID = "X",
outputDirectory = "output"
)
### Loading data from package
database <- read.csv("apollodata.csv")
## DEFINE MODEL PARAMETERS
### Vector of parameters, including any that are kept fixed in estimation
apollo_beta=c(asc_medoptout=0,
b_mild = 0,
b_moderate = 0,
b_high = 0,
b_7times = 0,
b_6times = 0,
b_5times = 0,
b_4times = 0,
b_3times = 0,
b_2times = 0,
b_hurry = 0,
b_15min = 0,
b_delay = 0,
b_nostoma = 0,
b_tempstoma = 0,
b_revstoma = 0,
b_permstoma = 0,
b_norisk = 0,
b_10y5 = 0,
b_10y10 = 0,
b_10y20 = 0,
b_s5 = 0,
b_s10 = 0,
b_s20 = 0,
b_s30 = 0,
b_w19 = 0,
b_w25 = 0,
b_w40 = 0,
b_w43 = 0)
### Vector with names (in quotes) of parameters to be kept fixed at their starting value in apollo_beta, use apollo_beta_fixed = c() if none
apollo_fixed = c( "b_mild" , "b_2times", "b_hurry" , "b_nostoma" , "b_norisk" , "b_s5", "b_w19")
#### GROUP AND VALIDATE INPUTS ---
apollo_inputs = apollo_validateInputs()
#### DEFINE MODEL AND LIKELIHOOD FUNCTION ---
## this code is standard and always the same
apollo_probabilities=function(apollo_beta, apollo_inputs, functionality="estimate"){
### Attach inputs and detach after function exit
apollo_attach(apollo_beta, apollo_inputs)
on.exit(apollo_detach(apollo_beta, apollo_inputs))
### Create list of probabilities P
P = list()
##write utility function for each of the alternatives
### List of utilities: these must use the same names as in mnl_settings, order is irrelevant
V = list()
V[["SurgeryA"]] = b_mild * (`A1_1`==0) + b_moderate * (`A1_1`==1) + b_high * (`A1_1`==2) +
b_7times * (`A1_2`==0) + b_6times * (`A1_2`==1) + b_5times * (`A1_2`==2) + b_4times * (`A1_2`==3)+ b_3times * (`A1_2`==4)+ b_2times * (`A1_2`==5) +
b_hurry * (`A1_3`==0) + b_15min * (`A1_3`==1) + b_delay * (`A1_3`==2) +
b_nostoma * (`A1_4`==0) + b_tempstoma * (`A1_4`==1) + b_revstoma * (`A1_4`==2) + b_permstoma * (`A1_4`==3) +
b_norisk * (`A1_5`==0) + b_10y5 * (`A1_5`==1) + b_10y10 * (`A1_5`==2) + b_10y20 * (`A1_5`==3) +
b_s5 * (`A1_6`==0) + b_s10 * (`A1_6`==1) + b_s20 * (`A1_6`==2) + b_s30 * (`A1_6`==3) +
b_w19 * (`A1_7`==0) + b_w25* (`A1_7`==1) + b_w40 * (`A1_7`==2) + b_w43 * (`A1_7`==3)
V[["SurgeryB"]] = b_mild * (`A2_1`==0) + b_moderate * (`A2_1`==1) + b_high * (`A2_1`==2) +
b_7times * (`A2_2`==0) + b_6times * (`A2_2`==1) + b_5times * (`A2_2`==2) + b_4times * (`A2_2`==3)+ b_3times * (`A2_2`==4)+ b_2times * (`A2_2`==5) +
b_hurry * (`A2_3`==0) + b_15min * (`A2_3`==1) + b_delay * (`A2_3`==2) +
b_nostoma * (`A2_4`==0) + b_tempstoma* (`A2_4`==1) + b_revstoma *(`A2_4`==2) + b_permstoma * (`A2_4`==3) +
b_norisk * (`A2_5`==0) + b_10y5 * (`A2_5`==1) + b_10y10 * (`A2_5`==2) + b_10y20 * (`A2_5`==3) +
b_s5 * (`A2_6`==0) + b_s10 * (`A2_6`==1) + b_s20 * (`A2_6`==2) + b_s30 * (`A2_6`==3) +
b_w19 * (`A2_7`==0) + b_w25* (`A2_7`==1) + b_w40* (`A2_7`==2) + b_w43* (`A2_7`==3)
V[['medication']] = asc_medoptout
### Define settings for MNL model component
mnl_settings = list(
alternatives = c(SurgeryA="Surgery A", SurgeryB="Surgery B",medication="Medication"),
avail = 1,
choiceVar = choice,
utilities = V
)
##now we have finished creating all the inputs for the model
### Compute probabilities using MNL model
##putting the probabilities inside a list called P with a element called model
P[["model"]] = apollo_mnl(mnl_settings, functionality)
## if the data set has 1000 respondents, then it would calculate 1000 probabilities
##essentially the program is collapsing all observations for each ID. this is looking at the likelihood of the
## sequence of choices that each person makes
### Take product across observation for same individual
P = apollo_panelProd(P, apollo_inputs, functionality)
##this line is always there, just preparing the output
### Prepare and return outputs of function
P = apollo_prepareProb(P, apollo_inputs, functionality)
return(P)
}
# ################################################################# #
#### MODEL ESTIMATION ####
# ################################################################# #
model = apollo_estimate(apollo_beta, apollo_fixed, apollo_probabilities, apollo_inputs)
# ################################################################# #
#### MODEL OUTPUTS ####
# ################################################################# #
# ----------------------------------------------------------------- #
#---- FORMATTED OUTPUT (TO SCREEN) ----
# ----------------------------------------------------------------- #
#model <- readRDS("output/MNL_DCE_Surgery_model.rds")
result_df <- data.frame(apollo_modelOutput(model, modelOutput_settings = list(printPVal=2)))
# ----------------------------------------------------------------- #
#---- FORMATTED OUTPUT (TO FILE, using model name) ----
# ----------------------------------------------------------------- #
##this saves the output
#apollo_saveOutput(model, saveOutput_settings = list(printPVal=2))
# ----------------------------------------------------------------- #
#---- format output to qmd ----
# ----------------------------------------------------------------- #
result_df <- result_df %>%
mutate(Parameter = c(
"Medication",
"QoL: Mild",
"QoL: Moderate",
"QoL: High",
"Bowel frequency: 7 times",
"Bowel frequency: 6 times",
"Bowel frequency: 5 times",
"Bowel frequency: 4 times",
"Bowel frequency: 3 times",
"Bowel frequency: 2 times",
"Bowel urgency: Need to hurry",
"Bowel urgency: Delay for 15 mins",
"Bowel urgency: Delay as long as needed",
"No stoma",
"Temporary stoma",
"Reversible stoma",
"Permanent stoma",
"10 year stoma risk: No risk-5%",
"10 year stoma risk: 5%",
"10 year stoma risk: 10%",
"10 year stoma risk: 20%",
"Short-term risks: 5%-10%",
"Short-term risks: 10%",
"Short-term risks: 20%",
"Short-term risks: 30%",
"Fertility: Women:19%; Men:0%",
"Fertility: Women:25%; Men:0%",
"Fertility: Women:40%; Men:0%",
"Fertility: Women:43%; Men:15%"
))%>%
mutate(Parameter = factor(Parameter, levels = rev(Parameter))) # Convert to factor with reversed levels
knitr::kable(result_df)
# ----------------------------------------------------------------- #
#---- switch off writing to file ----
# ----------------------------------------------------------------- #
apollo_sink()
Code: Select all
Model name : MNL_DCE_Surgeryb_2times
Model description : Simple MNL model on UC surgery with med optout and all categorical
Model run at : 2025-03-19 11:44:58.292033
Estimation method : bgw
Model diagnosis : Relative function convergence
Optimisation diagnosis : Maximum found
hessian properties : Negative definite
maximum eigenvalue : -25.090522
reciprocal of condition number : 0.0152937
Number of individuals : 350
Number of rows in database : 3150
Number of modelled outcomes : 3150
Number of cores used : 1
Model without mixing
LL(start) : -3460.63
LL at equal shares, LL(0) : -3460.63
LL at observed shares, LL(C) : -3417.55
LL(final) : -3035.66
Rho-squared vs equal shares : 0.1228
Adj.Rho-squared vs equal shares : 0.1164
Rho-squared vs observed shares : 0.1117
Adj.Rho-squared vs observed shares : 0.1059
AIC : 6115.33
BIC : 6248.54
Estimated parameters : 22
Time taken (hh:mm:ss) : 00:00:7.79
pre-estimation : 00:00:0.83
estimation : 00:00:4.33
post-estimation : 00:00:2.63
Iterations : 8
Unconstrained optimisation.
Estimates:
Estimate s.e. t.rat.(0) p(2-sided) Rob.s.e.
asc_medoptout -0.21601 0.13689 -1.5780 0.114575 0.16732
b_mild 0.00000 NA NA NA NA
b_moderate 0.41047 0.06616 6.2047 5.482e-10 0.06719
b_high 0.56172 0.07473 7.5165 5.618e-14 0.08108
b_7times -0.81892 0.11124 -7.3620 1.812e-13 0.10800
b_6times -0.82436 0.10599 -7.7774 7.327e-15 0.11168
b_5times -0.51801 0.09349 -5.5411 3.006e-08 0.08451
b_4times -0.23646 0.09551 -2.4759 0.013291 0.09310
b_3times -0.10219 0.09662 -1.0576 0.290247 0.08328
b_2times 0.00000 NA NA NA NA
b_hurry 0.00000 NA NA NA NA
b_15min 0.91540 0.08566 10.6869 0.000000 0.09306
b_delay 1.29275 0.08098 15.9643 0.000000 0.09582
b_nostoma 0.00000 NA NA NA NA
b_tempstoma -0.24435 0.07314 -3.3407 8.3561e-04 0.06749
b_revstoma -0.50640 0.07049 -7.1840 6.770e-13 0.06841
b_permstoma -1.48819 0.13067 -11.3888 0.000000 0.13769
b_norisk 0.00000 NA NA NA NA
b_10y5 -0.04317 0.08083 -0.5341 0.593305 0.07523
b_10y10 -0.26682 0.08619 -3.0956 0.001964 0.09531
b_10y20 -0.32472 0.08734 -3.7179 2.0092e-04 0.08373
b_s5 0.00000 NA NA NA NA
b_s10 0.05231 0.08580 0.6097 0.542075 0.08537
b_s20 -0.23989 0.08063 -2.9752 0.002928 0.08261
b_s30 -0.26770 0.08163 -3.2794 0.001040 0.08170
b_w19 0.00000 NA NA NA NA
b_w25 -0.05639 0.08378 -0.6731 0.500888 0.08617
b_w40 -0.20084 0.08005 -2.5088 0.012113 0.07892
b_w43 -0.25938 0.08002 -3.2417 0.001188 0.07491
Rob.t.rat.(0) p(2-sided)
asc_medoptout -1.2910 0.196703
b_mild NA NA
b_moderate 6.1088 1.004e-09
b_high 6.9280 4.267e-12
b_7times -7.5823 3.397e-14
b_6times -7.3818 1.563e-13
b_5times -6.1293 8.829e-10
b_4times -2.5399 0.011089
b_3times -1.2270 0.219807
b_2times NA NA
b_hurry NA NA
b_15min 9.8363 0.000000
b_delay 13.4909 0.000000
b_nostoma NA NA
b_tempstoma -3.6206 2.9389e-04
b_revstoma -7.4021 1.341e-13
b_permstoma -10.8086 0.000000
b_norisk NA NA
b_10y5 -0.5738 0.566097
b_10y10 -2.7996 0.005117
b_10y20 -3.8784 1.0515e-04
b_s5 NA NA
b_s10 0.6128 0.540035
b_s20 -2.9040 0.003684
b_s30 -3.2766 0.001051
b_w19 NA NA
b_w25 -0.6545 0.512809
b_w40 -2.5448 0.010934
b_w43 -3.4624 5.3534e-04Code: Select all
apollo_fixed = c( "b_mild" , "b_7times", "b_hurry" , "b_nostoma" , "b_norisk" , "b_s5", "b_w19")Code: Select all
apollo_fixed = c( "b_mild" , "b_2times", "b_hurry" , "b_nostoma" , "b_norisk" , "b_s5", "b_w19")Thanks in advance!
Tara