Fitting LC mod to simulated data--NA values for SE
Posted: 09 Mar 2022, 22:50
I am trying to code a power analysis for a discrete choice experiment that I am planning. I have generated fake data simulating the choices of three latent classes of respondents. Each class has its own known beta values and a MNL predicting their choices from three alternatives and two opt-out options. ( i.e. choice A, choice B, choice C, something not listed, don't do the activity)
I want to test different sample sizes, so I produced simulated choices for 5 versions of the experiment with increasing numbers of respondents (500, 800, 1000, 1200, and 1500).
When I fit this simulated data with a simple MNL, the estimation is fine. Obviously it doesn't accommodate the preference heterogeneity, but it gives me values that make sense and reasonable SEs.
When I fit a LC model, the estimation is all over the place. When the estimation runs for the different sample sizes, I will get appropriate beta estimates and SEs about 25% of the time. For the rest of the runs, I get wild parameter values and NA values for standard errors. I have done a lot of troubleshooting checking the process for generating the data, adjusting the different class's parameter values to make them more distinctive, and looking for syntax errors. When I tried to use a bootstrapping method to estimate the SEs instead, I got a really strange error message saying, "The scaling of the model led to differences in the results, and was thus not used. This is unexpected. You may want to contact the developers."
I am trying to fit the LC model under basically optimal conditions, so it's worrying that the estimation is so unstable. I assume I am doing somethign wrong but am so far unable to find it. Below is the code I am using to estimate the LC model (without bootstrapping the SEs). I'd really appreciate if anyone could take a look!
# ################################################################# #
#### LOAD LIBRARY AND DEFINE CORE SETTINGS ####
# ################################################################# #
### Clear memory
rm(list = ls())
### Load Apollo library
library(apollo)
library(tidyverse)
# read in simulated data
data=read.csv(here::here("simulated data","choices.simplified.design.60.blocks.csv"))
data.a=data[,-1]
n.versions=max(data.a$version)
# adding loop for power analysis of different sample sizes
for(i in 1:n.versions){
### Initialise code
apollo_initialise()
apollo_control = list(
modelName = paste("LC_test_version",i,sep="_"),
modelDescr = "LC model without covariates on simulated choice data--power analysis",
indivID = "resp.id",
nCores = 3,
outputDirectory = paste("output_version",i, sep="_"),
panelData = TRUE
)
# ################################################################# #
#### LOAD DATA AND APPLY ANY TRANSFORMATIONS ####
# ################################################################# #
### Loading data from package
### if data is to be loaded from a file (e.g. called data.csv),
### the code would be: database = read.csv("data.csv",header=TRUE)
database = dplyr::filter(data.a, version==i)
# ################################################################# #
#### DEFINE MODEL PARAMETERS ####
# ################################################################# #
### Vector of parameters, including any that are kept fixed in estimation
apollo_beta = c(
b.asc1.a= 0,
b.asc2.a= 0,
b.travel.a=0,
b.wcatch.a=0,
b.bcatch.a=0,
b.pcatch.a=0,
b.wsize.10.a=0,
b.wsize.14.a=0,
b.wsize.16.a=0,
b.wsize.20.a=0,
b.wsize.24.a=0,
b.psize.7.a=0,
b.psize.8.a=0,
b.psize.10.a=0,
b.bsize.12.a=0,
b.bsize.14.a=0,
b.bsize.16.a=0,
b.bsize.18.a=0,
b.bsize.20.a=0,
b.asc1.b= 0,
b.asc2.b= 0,
b.travel.b=0,
b.wcatch.b=0,
b.bcatch.b=0,
b.pcatch.b=0,
b.wsize.10.b=0,
b.wsize.14.b=0,
b.wsize.16.b=0,
b.wsize.20.b=0,
b.wsize.24.b=0,
b.psize.7.b=0,
b.psize.8.b=0,
b.psize.10.b=0,
b.bsize.12.b=0,
b.bsize.14.b=0,
b.bsize.16.b=0,
b.bsize.18.b=0,
b.bsize.20.b=0,
b.asc1.c= 0,
b.asc2.c= 0,
b.travel.c=0,
b.wcatch.c=0,
b.bcatch.c=0,
b.pcatch.c=0,
b.wsize.10.c=0,
b.wsize.14.c=0,
b.wsize.16.c=0,
b.wsize.20.c=0,
b.wsize.24.c=0,
b.psize.7.c=0,
b.psize.8.c=0,
b.psize.10.c=0,
b.bsize.12.c=0,
b.bsize.14.c=0,
b.bsize.16.c=0,
b.bsize.18.c=0,
b.bsize.20.c=0,
delta_a=0,
delta_b=0,
delta_c=0)
### Vector with names (in quotes) of parameters to be kept fixed at their starting value in apollo_beta, use apollo_beta_fixed = c() if none
apollo_fixed = c()
# ################################################################# #
#### DEFINE LATENT CLASS COMPONENTS ####
# ################################################################# #
apollo_lcPars=function(apollo_beta, apollo_inputs){
lcpars = list()
lcpars[["b.asc1"]] = list(b.asc1.a, b.asc1.b, b.asc1.c)
lcpars[["b.asc2"]] = list(b.asc2.a, b.asc2.b, b.asc2.c)
lcpars[["b.travel"]] = list(b.travel.a, b.travel.b, b.travel.c)
lcpars[["b.wcatch"]] = list(b.wcatch.a, b.wcatch.b, b.wcatch.c)
lcpars[["b.bcatch"]] = list(b.bcatch.a, b.bcatch.b, b.bcatch.c)
lcpars[["b.pcatch"]] = list(b.pcatch.a, b.pcatch.b, b.pcatch.c)
lcpars[["b.wsize.10"]] = list(b.wsize.10.a, b.wsize.10.b, b.wsize.10.c)
lcpars[["b.wsize.14"]] = list(b.wsize.14.a, b.wsize.14.b, b.wsize.14.c)
lcpars[["b.wsize.16"]] = list(b.wsize.16.a, b.wsize.16.b, b.wsize.16.c)
lcpars[["b.wsize.20"]] = list(b.wsize.20.a, b.wsize.20.b, b.wsize.20.c)
lcpars[["b.wsize.24"]] = list(b.wsize.24.a, b.wsize.24.b, b.wsize.24.c)
lcpars[["b.psize.7"]] = list(b.psize.7.a, b.psize.7.b, b.psize.7.c)
lcpars[["b.psize.8"]] = list(b.psize.8.a, b.psize.8.b, b.psize.8.c)
lcpars[["b.psize.10"]] = list(b.psize.10.a, b.psize.10.b, b.psize.10.c)
lcpars[["b.bsize.12"]] = list(b.bsize.12.a, b.bsize.12.b, b.bsize.12.c)
lcpars[["b.bsize.14"]] = list(b.bsize.14.a, b.bsize.14.b, b.bsize.14.c)
lcpars[["b.bsize.16"]] = list(b.bsize.16.a, b.bsize.16.b, b.bsize.16.c)
lcpars[["b.bsize.18"]] = list(b.bsize.18.a, b.bsize.18.b, b.bsize.18.c)
lcpars[["b.bsize.20"]] = list(b.bsize.20.a, b.bsize.20.b, b.bsize.20.c)
### Utilities of class allocation model
V=list()
V[["class_a"]] = delta_a
V[["class_b"]] = delta_b
V[["class_c"]] = delta_c
### Settings for class allocation models
classAlloc_settings = list(
classes = c(class_a=1, class_b=2 ,
class_c=3),
utilities = V
)
lcpars[["pi_values"]] = apollo_classAlloc(classAlloc_settings)
return(lcpars)
}
# ################################################################# #
#### GROUP AND VALIDATE INPUTS ####
# ################################################################# #
apollo_inputs = apollo_validateInputs()
# ################################################################# #
#### DEFINE MODEL AND LIKELIHOOD FUNCTION ####
# ################################################################# #
apollo_probabilities=function(apollo_beta, apollo_inputs, functionality="estimate"){
### Attach inputs and detach after function exit
apollo_attach(apollo_beta, apollo_inputs)
on.exit(apollo_detach(apollo_beta, apollo_inputs))
### Create list of probabilities P
P = list()
### Define settings for MNL model component that are generic across classes
mnl_settings = list(
alternatives = c(alt1=1, alt2=2, alt3=3, fishElse=4, noFish=5),
choiceVar = choice
)
### Loop over classes
for(s in 1:3){
### Compute class-specific utilities
V=list()
V[["alt1"]] = b.travel[[s]]*alt1.travel+b.wcatch[[s]]*alt1.wcatch+b.pcatch[[s]]*alt1.pcatch+b.bcatch[[s]]*alt1.bcatch+
b.wsize.10[[s]]*alt1.wsize.10+b.wsize.14[[s]]*alt1.wsize.14+b.wsize.16[[s]]*alt1.wsize.16+b.wsize.20[[s]]*alt1.wsize.20 + b.wsize.24[[s]]*alt1.wsize.24+
b.psize.7[[s]]*alt1.psize.7+b.psize.8[[s]]*alt1.psize.8+b.psize.10[[s]]*alt1.psize.10+
b.bsize.12[[s]]*alt1.bsize.12+b.bsize.14[[s]]*alt1.bsize.14+b.bsize.16[[s]]*alt1.bsize.16+b.bsize.18[[s]]*alt1.bsize.18+b.bsize.20[[s]]*alt1.bsize.20
V[["alt2"]] = b.travel[[s]]*alt2.travel+b.wcatch[[s]]*alt2.wcatch+b.pcatch[[s]]*alt2.pcatch+b.bcatch[[s]]*alt2.bcatch+
b.wsize.10[[s]]*alt2.wsize.10+b.wsize.14[[s]]*alt2.wsize.14+b.wsize.16[[s]]*alt2.wsize.16+b.wsize.20[[s]]*alt2.wsize.20 + b.wsize.24[[s]]*alt2.wsize.24+
b.psize.7[[s]]*alt2.psize.7+b.psize.8[[s]]*alt2.psize.8+b.psize.10[[s]]*alt2.psize.10+
b.bsize.12[[s]]*alt2.bsize.12+b.bsize.14[[s]]*alt2.bsize.14+b.bsize.16[[s]]*alt2.bsize.16+b.bsize.18[[s]]*alt2.bsize.18+b.bsize.20[[s]]*alt2.bsize.20
V[["alt3"]] = b.travel[[s]]*alt3.travel+b.wcatch[[s]]*alt3.wcatch+b.pcatch[[s]]*alt3.pcatch+b.bcatch[[s]]*alt3.bcatch+
b.wsize.10[[s]]*alt3.wsize.10+b.wsize.14[[s]]*alt3.wsize.14+b.wsize.16[[s]]*alt3.wsize.16+b.wsize.20[[s]]*alt3.wsize.20 + b.wsize.24[[s]]*alt3.wsize.24+
b.psize.7[[s]]*alt3.psize.7+b.psize.8[[s]]*alt3.psize.8+b.psize.10[[s]]*alt3.psize.10+
b.bsize.12[[s]]*alt3.bsize.12+b.bsize.14[[s]]*alt3.bsize.14+b.bsize.16[[s]]*alt3.bsize.16+b.bsize.18[[s]]*alt3.bsize.18+b.bsize.20[[s]]*alt3.bsize.20
V[["fishElse"]] = b.asc1[[s]]
V[["noFish"]] = b.asc2[[s]]
mnl_settings$utilities = V
mnl_settings$componentName = paste0("Class_",s)
### Compute within-class choice probabilities using MNL model
P[[paste0("Class_",s)]] = apollo_mnl(mnl_settings, functionality)
### Take product across observation for same individual
P[[paste0("Class_",s)]] = apollo_panelProd(P[[paste0("Class_",s)]], apollo_inputs ,functionality)
}
### Compute latent class model probabilities
lc_settings = list(inClassProb = P, classProb=pi_values)
P[["model"]] = apollo_lc(lc_settings, apollo_inputs, functionality)
### Prepare and return outputs of function
P = apollo_prepareProb(P, apollo_inputs, functionality)
return(P)
}
# ################################################################# #
#### MODEL ESTIMATION ####
# ################################################################# #
### Estimate model
model = apollo_estimate(apollo_beta, apollo_fixed,
apollo_probabilities, apollo_inputs,
estimate_settings=list(bootstrapSE=30))
### Show output in screen
apollo_modelOutput(model)
### Save output to file(s)
apollo_saveOutput(model)
}
I want to test different sample sizes, so I produced simulated choices for 5 versions of the experiment with increasing numbers of respondents (500, 800, 1000, 1200, and 1500).
When I fit this simulated data with a simple MNL, the estimation is fine. Obviously it doesn't accommodate the preference heterogeneity, but it gives me values that make sense and reasonable SEs.
When I fit a LC model, the estimation is all over the place. When the estimation runs for the different sample sizes, I will get appropriate beta estimates and SEs about 25% of the time. For the rest of the runs, I get wild parameter values and NA values for standard errors. I have done a lot of troubleshooting checking the process for generating the data, adjusting the different class's parameter values to make them more distinctive, and looking for syntax errors. When I tried to use a bootstrapping method to estimate the SEs instead, I got a really strange error message saying, "The scaling of the model led to differences in the results, and was thus not used. This is unexpected. You may want to contact the developers."
I am trying to fit the LC model under basically optimal conditions, so it's worrying that the estimation is so unstable. I assume I am doing somethign wrong but am so far unable to find it. Below is the code I am using to estimate the LC model (without bootstrapping the SEs). I'd really appreciate if anyone could take a look!
# ################################################################# #
#### LOAD LIBRARY AND DEFINE CORE SETTINGS ####
# ################################################################# #
### Clear memory
rm(list = ls())
### Load Apollo library
library(apollo)
library(tidyverse)
# read in simulated data
data=read.csv(here::here("simulated data","choices.simplified.design.60.blocks.csv"))
data.a=data[,-1]
n.versions=max(data.a$version)
# adding loop for power analysis of different sample sizes
for(i in 1:n.versions){
### Initialise code
apollo_initialise()
apollo_control = list(
modelName = paste("LC_test_version",i,sep="_"),
modelDescr = "LC model without covariates on simulated choice data--power analysis",
indivID = "resp.id",
nCores = 3,
outputDirectory = paste("output_version",i, sep="_"),
panelData = TRUE
)
# ################################################################# #
#### LOAD DATA AND APPLY ANY TRANSFORMATIONS ####
# ################################################################# #
### Loading data from package
### if data is to be loaded from a file (e.g. called data.csv),
### the code would be: database = read.csv("data.csv",header=TRUE)
database = dplyr::filter(data.a, version==i)
# ################################################################# #
#### DEFINE MODEL PARAMETERS ####
# ################################################################# #
### Vector of parameters, including any that are kept fixed in estimation
apollo_beta = c(
b.asc1.a= 0,
b.asc2.a= 0,
b.travel.a=0,
b.wcatch.a=0,
b.bcatch.a=0,
b.pcatch.a=0,
b.wsize.10.a=0,
b.wsize.14.a=0,
b.wsize.16.a=0,
b.wsize.20.a=0,
b.wsize.24.a=0,
b.psize.7.a=0,
b.psize.8.a=0,
b.psize.10.a=0,
b.bsize.12.a=0,
b.bsize.14.a=0,
b.bsize.16.a=0,
b.bsize.18.a=0,
b.bsize.20.a=0,
b.asc1.b= 0,
b.asc2.b= 0,
b.travel.b=0,
b.wcatch.b=0,
b.bcatch.b=0,
b.pcatch.b=0,
b.wsize.10.b=0,
b.wsize.14.b=0,
b.wsize.16.b=0,
b.wsize.20.b=0,
b.wsize.24.b=0,
b.psize.7.b=0,
b.psize.8.b=0,
b.psize.10.b=0,
b.bsize.12.b=0,
b.bsize.14.b=0,
b.bsize.16.b=0,
b.bsize.18.b=0,
b.bsize.20.b=0,
b.asc1.c= 0,
b.asc2.c= 0,
b.travel.c=0,
b.wcatch.c=0,
b.bcatch.c=0,
b.pcatch.c=0,
b.wsize.10.c=0,
b.wsize.14.c=0,
b.wsize.16.c=0,
b.wsize.20.c=0,
b.wsize.24.c=0,
b.psize.7.c=0,
b.psize.8.c=0,
b.psize.10.c=0,
b.bsize.12.c=0,
b.bsize.14.c=0,
b.bsize.16.c=0,
b.bsize.18.c=0,
b.bsize.20.c=0,
delta_a=0,
delta_b=0,
delta_c=0)
### Vector with names (in quotes) of parameters to be kept fixed at their starting value in apollo_beta, use apollo_beta_fixed = c() if none
apollo_fixed = c()
# ################################################################# #
#### DEFINE LATENT CLASS COMPONENTS ####
# ################################################################# #
apollo_lcPars=function(apollo_beta, apollo_inputs){
lcpars = list()
lcpars[["b.asc1"]] = list(b.asc1.a, b.asc1.b, b.asc1.c)
lcpars[["b.asc2"]] = list(b.asc2.a, b.asc2.b, b.asc2.c)
lcpars[["b.travel"]] = list(b.travel.a, b.travel.b, b.travel.c)
lcpars[["b.wcatch"]] = list(b.wcatch.a, b.wcatch.b, b.wcatch.c)
lcpars[["b.bcatch"]] = list(b.bcatch.a, b.bcatch.b, b.bcatch.c)
lcpars[["b.pcatch"]] = list(b.pcatch.a, b.pcatch.b, b.pcatch.c)
lcpars[["b.wsize.10"]] = list(b.wsize.10.a, b.wsize.10.b, b.wsize.10.c)
lcpars[["b.wsize.14"]] = list(b.wsize.14.a, b.wsize.14.b, b.wsize.14.c)
lcpars[["b.wsize.16"]] = list(b.wsize.16.a, b.wsize.16.b, b.wsize.16.c)
lcpars[["b.wsize.20"]] = list(b.wsize.20.a, b.wsize.20.b, b.wsize.20.c)
lcpars[["b.wsize.24"]] = list(b.wsize.24.a, b.wsize.24.b, b.wsize.24.c)
lcpars[["b.psize.7"]] = list(b.psize.7.a, b.psize.7.b, b.psize.7.c)
lcpars[["b.psize.8"]] = list(b.psize.8.a, b.psize.8.b, b.psize.8.c)
lcpars[["b.psize.10"]] = list(b.psize.10.a, b.psize.10.b, b.psize.10.c)
lcpars[["b.bsize.12"]] = list(b.bsize.12.a, b.bsize.12.b, b.bsize.12.c)
lcpars[["b.bsize.14"]] = list(b.bsize.14.a, b.bsize.14.b, b.bsize.14.c)
lcpars[["b.bsize.16"]] = list(b.bsize.16.a, b.bsize.16.b, b.bsize.16.c)
lcpars[["b.bsize.18"]] = list(b.bsize.18.a, b.bsize.18.b, b.bsize.18.c)
lcpars[["b.bsize.20"]] = list(b.bsize.20.a, b.bsize.20.b, b.bsize.20.c)
### Utilities of class allocation model
V=list()
V[["class_a"]] = delta_a
V[["class_b"]] = delta_b
V[["class_c"]] = delta_c
### Settings for class allocation models
classAlloc_settings = list(
classes = c(class_a=1, class_b=2 ,
class_c=3),
utilities = V
)
lcpars[["pi_values"]] = apollo_classAlloc(classAlloc_settings)
return(lcpars)
}
# ################################################################# #
#### GROUP AND VALIDATE INPUTS ####
# ################################################################# #
apollo_inputs = apollo_validateInputs()
# ################################################################# #
#### DEFINE MODEL AND LIKELIHOOD FUNCTION ####
# ################################################################# #
apollo_probabilities=function(apollo_beta, apollo_inputs, functionality="estimate"){
### Attach inputs and detach after function exit
apollo_attach(apollo_beta, apollo_inputs)
on.exit(apollo_detach(apollo_beta, apollo_inputs))
### Create list of probabilities P
P = list()
### Define settings for MNL model component that are generic across classes
mnl_settings = list(
alternatives = c(alt1=1, alt2=2, alt3=3, fishElse=4, noFish=5),
choiceVar = choice
)
### Loop over classes
for(s in 1:3){
### Compute class-specific utilities
V=list()
V[["alt1"]] = b.travel[[s]]*alt1.travel+b.wcatch[[s]]*alt1.wcatch+b.pcatch[[s]]*alt1.pcatch+b.bcatch[[s]]*alt1.bcatch+
b.wsize.10[[s]]*alt1.wsize.10+b.wsize.14[[s]]*alt1.wsize.14+b.wsize.16[[s]]*alt1.wsize.16+b.wsize.20[[s]]*alt1.wsize.20 + b.wsize.24[[s]]*alt1.wsize.24+
b.psize.7[[s]]*alt1.psize.7+b.psize.8[[s]]*alt1.psize.8+b.psize.10[[s]]*alt1.psize.10+
b.bsize.12[[s]]*alt1.bsize.12+b.bsize.14[[s]]*alt1.bsize.14+b.bsize.16[[s]]*alt1.bsize.16+b.bsize.18[[s]]*alt1.bsize.18+b.bsize.20[[s]]*alt1.bsize.20
V[["alt2"]] = b.travel[[s]]*alt2.travel+b.wcatch[[s]]*alt2.wcatch+b.pcatch[[s]]*alt2.pcatch+b.bcatch[[s]]*alt2.bcatch+
b.wsize.10[[s]]*alt2.wsize.10+b.wsize.14[[s]]*alt2.wsize.14+b.wsize.16[[s]]*alt2.wsize.16+b.wsize.20[[s]]*alt2.wsize.20 + b.wsize.24[[s]]*alt2.wsize.24+
b.psize.7[[s]]*alt2.psize.7+b.psize.8[[s]]*alt2.psize.8+b.psize.10[[s]]*alt2.psize.10+
b.bsize.12[[s]]*alt2.bsize.12+b.bsize.14[[s]]*alt2.bsize.14+b.bsize.16[[s]]*alt2.bsize.16+b.bsize.18[[s]]*alt2.bsize.18+b.bsize.20[[s]]*alt2.bsize.20
V[["alt3"]] = b.travel[[s]]*alt3.travel+b.wcatch[[s]]*alt3.wcatch+b.pcatch[[s]]*alt3.pcatch+b.bcatch[[s]]*alt3.bcatch+
b.wsize.10[[s]]*alt3.wsize.10+b.wsize.14[[s]]*alt3.wsize.14+b.wsize.16[[s]]*alt3.wsize.16+b.wsize.20[[s]]*alt3.wsize.20 + b.wsize.24[[s]]*alt3.wsize.24+
b.psize.7[[s]]*alt3.psize.7+b.psize.8[[s]]*alt3.psize.8+b.psize.10[[s]]*alt3.psize.10+
b.bsize.12[[s]]*alt3.bsize.12+b.bsize.14[[s]]*alt3.bsize.14+b.bsize.16[[s]]*alt3.bsize.16+b.bsize.18[[s]]*alt3.bsize.18+b.bsize.20[[s]]*alt3.bsize.20
V[["fishElse"]] = b.asc1[[s]]
V[["noFish"]] = b.asc2[[s]]
mnl_settings$utilities = V
mnl_settings$componentName = paste0("Class_",s)
### Compute within-class choice probabilities using MNL model
P[[paste0("Class_",s)]] = apollo_mnl(mnl_settings, functionality)
### Take product across observation for same individual
P[[paste0("Class_",s)]] = apollo_panelProd(P[[paste0("Class_",s)]], apollo_inputs ,functionality)
}
### Compute latent class model probabilities
lc_settings = list(inClassProb = P, classProb=pi_values)
P[["model"]] = apollo_lc(lc_settings, apollo_inputs, functionality)
### Prepare and return outputs of function
P = apollo_prepareProb(P, apollo_inputs, functionality)
return(P)
}
# ################################################################# #
#### MODEL ESTIMATION ####
# ################################################################# #
### Estimate model
model = apollo_estimate(apollo_beta, apollo_fixed,
apollo_probabilities, apollo_inputs,
estimate_settings=list(bootstrapSE=30))
### Show output in screen
apollo_modelOutput(model)
### Save output to file(s)
apollo_saveOutput(model)
}