Page 1 of 1

Error when using apollo_lcEM()

Posted: 26 Oct 2024, 16:47
by nsmith
Hi,

I am trying to estimate a latent class logit model using the `apollo_lcEM()` function. I am re-running code that I had previously (successfully) compiled on June 4, 2024. When trying to estimate using the EM algorithm, apollo says that none of my parameters influence the log-likelihood of the model.

Code: Select all

# ################################################################# #
#### LOAD LIBRARY AND DEFINE CORE SETTINGS                       ####
# ################################################################# #
### Initialise code
apollo_initialise()

### Set core controls
apollo_control = list(
  modelName       = "LCA_5class",
  modelDescr      = "LCA on orchid data, EM, with cov",
  indivID         = "responseId",
  nCores          = 2
)

# ################################################################# #
#### LOAD DATA AND APPLY ANY TRANSFORMATIONS                     ####
# ################################################################# #

### Loading data from package
### if data is to be loaded from a file (e.g. called data.csv), 
### the code would be: database = read.csv("data.csv",header=TRUE)
lcaData = idByTask %>% select(responseId, choicetask, choiceSituation, choice, block, saw150v125, gender, familyPlan,
                              starts_with("opt"), 
                              age, highHealthInsLit01, percentFPL_100k, rxDevice01, chronicCount, chronicCount1v0, chronicCount2v0, chronicCount3v0) %>%
  
  arrange(responseId, choicetask) %>%
  
  filter(!is.na(age) & !is.na(highHealthInsLit01) & !is.na(percentFPL_100k) & !is.na(rxDevice01) & !is.na(chronicCount1v0) & !is.na(chronicCount2v0) & !is.na(chronicCount3v0)) %>%
  mutate(weights=1)

lcaData <- remove_all_labels(lcaData)

database <- lcaData


# ################################################################# #
#### DEFINE MODEL PARAMETERS                                     ####
# ################################################################# #

### Vector of parameters, including any that are kept fixed in estimation
apollo_beta = c(asc_opt_1_a = -0.03, 
                asc_opt_1_b = -0.02, 
                asc_opt_1_c = -0.003, 
                asc_opt_1_d = -0.24, 
                asc_opt_1_e = -0.24, 
                
                                
                asc_opt_2_a = 0, 
                asc_opt_2_b = 0,
                asc_opt_2_c = 0, 
                asc_opt_2_d = 0, 
                asc_opt_2_e = 0, 
                
                asc_opt_3_a =1.73, 
                asc_opt_3_b = -3.2, 
                asc_opt_3_c = -2.9, 
                asc_opt_3_d = -0.015, 
                asc_opt_3_e = 0.02, 
                
                # oop
                b_oop_1v0_a = -0.57,
                b_oop_2v0_a = -1.81, 
                b_oop_3v0_a = -2.23, 
                
                b_oop_1v0_b = -0.53,
                b_oop_2v0_b = -2.67, 
                b_oop_3v0_b = -3.35, 
                
                b_oop_1v0_c = 0.44,
                b_oop_2v0_c = 0.276, 
                b_oop_3v0_c = -0.067, 
                
                b_oop_1v0_d = -0.58,
                b_oop_2v0_d = -1.48, 
                b_oop_3v0_d = -4.37, 
                
                b_oop_1v0_e = -0.56,
                b_oop_2v0_e = -0.86, 
                b_oop_3v0_e = -0.81, 
                
                
                # premium
                b_premium_1v0_a = 0.079, 
                b_premium_2v0_a = -0.46, 
                
                b_premium_1v0_b = -0.84, 
                b_premium_2v0_b = -0.87, 

                b_premium_1v0_c = 0.12, 
                b_premium_2v0_c = -0.18, 
                
                b_premium_1v0_d = 0.13, 
                b_premium_2v0_d = -1.52, 
                
                b_premium_1v0_e = 0.76, 
                b_premium_2v0_e = -0.19, 
                
                
                # plan type
                b_plan_1v0_a = 0.21, 
                b_plan_2v0_a = -0.13, 
                
                b_plan_1v0_b = 0.23, 
                b_plan_2v0_b = 0.07, 
                
                b_plan_1v0_c = -0.013, 
                b_plan_2v0_c = 0.008, 
                
                b_plan_1v0_d = -0.51, 
                b_plan_2v0_d = 0.04, 
                
                b_plan_1v0_e = -0.56, 
                b_plan_2v0_e = 0.28, 
                
                
                
                # pcp network
                b_pcpNetwork_1v0_a = 1.39, 
                b_pcpNetwork_2v0_a = 1.77, 
                
                b_pcpNetwork_1v0_b = 1.68, 
                b_pcpNetwork_2v0_b = 1.26, 

                b_pcpNetwork_1v0_c = 0.44, 
                b_pcpNetwork_2v0_c = 0.39, 
              
                b_pcpNetwork_1v0_d = 4.37, 
                b_pcpNetwork_2v0_d = 6.4, 
                
                b_pcpNetwork_1v0_e = 0.42, 
                b_pcpNetwork_2v0_e = 0.60, 
                
                
                # rx coverage
                b_drugDevice_1v0_a = 1.09, 
                b_drugDevice_2v0_a = 1.53, 
                
                b_drugDevice_1v0_b = 1.13, 
                b_drugDevice_2v0_b = 1.415, 
                
                b_drugDevice_1v0_c = 0.18, 
                b_drugDevice_2v0_c = 0.42, 
                
                b_drugDevice_1v0_d = 1.29, 
                b_drugDevice_2v0_d = 1.91, 
                
                b_drugDevice_1v0_e = 0.66, 
                b_drugDevice_2v0_e = 0.83, 
                
                
                
                delta_a = -0.31, 
                delta_b = 0,
                delta_c = -0.67,
                delta_d = -0.97,
                delta_e = -0.14,
                
                
                # allow class allocation to vary based on covariates
                b_age_a = 0, 
                b_highHealthInsLit_a = -0.67, 
                b_pctFPL_a = -0.000845, 
                b_rxDevice_a = -0.029,
                b_countChronic1v0_a = 0,
                b_countChronic2v0_a = 0,
                b_countChronic3v0_a = 0,
                
                b_age_b = 0, 
                b_highHealthInsLit_b = 0, 
                b_pctFPL_b = 0, 
                b_rxDevice_b = 0,
                b_countChronic1v0_b = 0, 
                b_countChronic2v0_b = 0, 
                b_countChronic3v0_b = 0, 
                
                b_age_c = 0, 
                b_highHealthInsLit_c = 0.002, 
                b_pctFPL_c = 0.00119, 
                b_rxDevice_c = 0.07,
                b_countChronic1v0_c = 0, 
                b_countChronic2v0_c = 0, 
                b_countChronic3v0_c = 0, 
                
                b_age_d = 0, 
                b_highHealthInsLit_d = 0.03, 
                b_pctFPL_d = 0.0000357, 
                b_rxDevice_d = 0.46,
                b_countChronic1v0_d = 0, 
                b_countChronic2v0_d = 0, 
                b_countChronic3v0_d = 0, 
                
                
                b_age_e = 0, 
                b_highHealthInsLit_e = 0.139, 
                b_pctFPL_e = 0.000012, 
                b_rxDevice_e = 0.429,
                b_countChronic1v0_e = 0, 
                b_countChronic2v0_e = 0, 
                b_countChronic3v0_e = 0                
                
                
                
                
)

### Vector with names (in quotes) of parameters to be kept fixed at their starting value in apollo_beta, use apollo_beta_fixed = c() if none
apollo_fixed = c("asc_opt_2_a", "asc_opt_2_b", "asc_opt_2_c", "asc_opt_2_d", "asc_opt_2_e", "delta_b", "b_age_b", "b_highHealthInsLit_b", "b_pctFPL_b", "b_rxDevice_b", "b_countChronic1v0_b", "b_countChronic2v0_b", "b_countChronic3v0_b")

# exp(1.4079)/(exp(0)+exp(1.4079))

# ################################################################# #
#### DEFINE LATENT CLASS COMPONENTS                              ####
# ################################################################# #

apollo_lcPars=function(apollo_beta, apollo_inputs){
  lcpars = list()
  lcpars[["asc_opt_1"]] = list(asc_opt_1_a, asc_opt_1_b, asc_opt_1_c, asc_opt_1_d, asc_opt_1_e)
  lcpars[["asc_opt_2"]] = list(asc_opt_2_a, asc_opt_2_b, asc_opt_2_c, asc_opt_2_d, asc_opt_2_e)
  lcpars[["asc_opt_3"]] = list(asc_opt_3_a, asc_opt_3_b, asc_opt_3_c, asc_opt_3_d, asc_opt_3_e)
  
  lcpars[["b_oop_1v0"]] = list(b_oop_1v0_a, b_oop_1v0_b, b_oop_1v0_c, b_oop_1v0_d, b_oop_1v0_e)
  lcpars[["b_oop_2v0"]] = list(b_oop_2v0_a, b_oop_2v0_b, b_oop_2v0_c, b_oop_2v0_d, b_oop_2v0_e)
  lcpars[["b_oop_3v0"]] = list(b_oop_3v0_a, b_oop_3v0_b, b_oop_3v0_c, b_oop_3v0_d, b_oop_3v0_e)
  
  lcpars[["b_premium_1v0"]] = list(b_premium_1v0_a, b_premium_1v0_b, b_premium_1v0_c, b_premium_1v0_d, b_premium_1v0_e)
  lcpars[["b_premium_2v0"]] = list(b_premium_2v0_a, b_premium_2v0_b, b_premium_2v0_c, b_premium_2v0_d, b_premium_2v0_e)

  lcpars[["b_plan_1v0"]] = list(b_plan_1v0_a, b_plan_1v0_b, b_plan_1v0_c, b_plan_1v0_d, b_plan_1v0_e)
  lcpars[["b_plan_2v0"]] = list(b_plan_2v0_a, b_plan_2v0_b, b_plan_2v0_c, b_plan_2v0_d, b_plan_2v0_e)

  lcpars[["b_pcpNetwork_1v0"]] = list(b_pcpNetwork_1v0_a, b_pcpNetwork_1v0_b, b_pcpNetwork_1v0_c, b_pcpNetwork_1v0_d, b_pcpNetwork_1v0_e)
  lcpars[["b_pcpNetwork_2v0"]] = list(b_pcpNetwork_2v0_a, b_pcpNetwork_2v0_b, b_pcpNetwork_2v0_c, b_pcpNetwork_2v0_d, b_pcpNetwork_2v0_e)
  
  lcpars[["b_drugDevice_1v0"]] = list(b_drugDevice_1v0_a, b_drugDevice_1v0_b, b_drugDevice_1v0_c, b_drugDevice_1v0_d, b_drugDevice_1v0_e)
  lcpars[["b_drugDevice_2v0"]] = list(b_drugDevice_2v0_a, b_drugDevice_2v0_b, b_drugDevice_2v0_c, b_drugDevice_2v0_d, b_drugDevice_2v0_e)
  
  
  V=list()
  V[["class_a"]] = delta_a + b_age_a*age + b_highHealthInsLit_a*(highHealthInsLit01==1) + b_pctFPL_a*percentFPL_100k + b_rxDevice_a*(rxDevice01 == 1) + b_countChronic1v0_a*(chronicCount1v0 == 1) + b_countChronic2v0_a*(chronicCount2v0 == 1) + b_countChronic3v0_a*(chronicCount3v0 == 1)
  V[["class_b"]] = delta_b + b_age_b*age + b_highHealthInsLit_b*(highHealthInsLit01==1) + b_pctFPL_b*percentFPL_100k + b_rxDevice_b*(rxDevice01 == 1) + b_countChronic1v0_b*(chronicCount1v0 == 1) + b_countChronic2v0_b*(chronicCount2v0 == 1) + b_countChronic3v0_b*(chronicCount3v0 == 1)
  V[["class_c"]] = delta_c + b_age_c*age + b_highHealthInsLit_c*(highHealthInsLit01==1) + b_pctFPL_c*percentFPL_100k + b_rxDevice_c*(rxDevice01 == 1) + b_countChronic1v0_c*(chronicCount1v0 == 1) + b_countChronic2v0_c*(chronicCount2v0 == 1) + b_countChronic3v0_c*(chronicCount3v0 == 1)
  V[["class_d"]] = delta_d + b_age_d*age + b_highHealthInsLit_d*(highHealthInsLit01==1) + b_pctFPL_d*percentFPL_100k + b_rxDevice_d*(rxDevice01 == 1) + b_countChronic1v0_d*(chronicCount1v0 == 1) + b_countChronic2v0_d*(chronicCount2v0 == 1) + b_countChronic3v0_d*(chronicCount3v0 == 1)
  V[["class_e"]] = delta_e + b_age_e*age + b_highHealthInsLit_e*(highHealthInsLit01==1) + b_pctFPL_e*percentFPL_100k + b_rxDevice_e*(rxDevice01 == 1) + b_countChronic1v0_e*(chronicCount1v0 == 1) + b_countChronic2v0_e*(chronicCount2v0 == 1) + b_countChronic3v0_e*(chronicCount3v0 == 1)
  
  classAlloc_settings = list(
    classes      = c(class_a=1, class_b=2, class_c=3, class_d=4, class_e=5), 
    utilities    = V
  )

  lcpars[["pi_values"]] = apollo_classAlloc(classAlloc_settings)
  
  return(lcpars)
}

# ################################################################# #
#### GROUP AND VALIDATE INPUTS                                   ####
# ################################################################# #

apollo_inputs = apollo_validateInputs()

# ################################################################# #
#### DEFINE MODEL AND LIKELIHOOD FUNCTION                        ####
# ################################################################# #

apollo_probabilities=function(apollo_beta, apollo_inputs, functionality="estimate"){
  
  ### Attach inputs and detach after function exit
  apollo_attach(apollo_beta, apollo_inputs)
  on.exit(apollo_detach(apollo_beta, apollo_inputs))
  
  ### Create list of probabilities P
  P = list()
  
  ### Define settings for MNL model component that are generic across classes
  mnl_settings = list(alternatives  = c(opt1=1, opt2=2, opt3=3), 
                      choiceVar     = choice
  )
  
  ### Loop over classes
  for(s in 1:length(pi_values)){
    
    ### Compute class-specific utilities
    V=list() 
    V[['opt1']] = asc_opt_1[[s]] + b_oop_1v0[[s]]*(opt1.x1 == 1) + b_oop_2v0[[s]]*(opt1.x1 == 2) + b_oop_3v0[[s]]*(opt1.x1 == 3) + b_premium_1v0[[s]]*(opt1.x2 == 1) + b_premium_2v0[[s]]*(opt1.x2 == 2) + b_plan_1v0[[s]]*(opt1.x3 ==1) + b_plan_2v0[[s]]*(opt1.x3 == 2) + b_pcpNetwork_1v0[[s]]*(opt1.x4 == 1) + b_pcpNetwork_2v0[[s]]*(opt1.x4 == 2) + b_drugDevice_1v0[[s]]*(opt1.x5 == 1) + b_drugDevice_2v0[[s]]*(opt1.x5 == 2)
    V[['opt2']] = asc_opt_2[[s]] + b_oop_1v0[[s]]*(opt2.x1 == 1) + b_oop_2v0[[s]]*(opt2.x1 == 2) + b_oop_3v0[[s]]*(opt2.x1 == 3) + b_premium_1v0[[s]]*(opt2.x2 == 1) + b_premium_2v0[[s]]*(opt2.x2 == 2) + b_plan_1v0[[s]]*(opt2.x3 ==1) + b_plan_2v0[[s]]*(opt2.x3 == 2) + b_pcpNetwork_1v0[[s]]*(opt2.x4 == 1) + b_pcpNetwork_2v0[[s]]*(opt2.x4 == 2) + b_drugDevice_1v0[[s]]*(opt2.x5 == 1) + b_drugDevice_2v0[[s]]*(opt2.x5 == 2) 
    V[['opt3']] = asc_opt_3[[s]]
    
    mnl_settings$utilities = V
    mnl_settings$componentName = paste0("Class_",s)
    
    ### Compute within-class choice probabilities using MNL model
    P[[paste0("Class_",s)]] = apollo_mnl(mnl_settings, functionality)
    
    ### Take product across observation for same individual
    P[[paste0("Class_",s)]] = apollo_panelProd(P[[paste0("Class_",s)]], apollo_inputs ,functionality)
  }
  
  ### Compute latent class model probabilities
  lc_settings   = list(inClassProb = P, classProb=pi_values)
  P[["model"]] = apollo_lc(lc_settings, apollo_inputs, functionality)
  
  ### Prepare and return outputs of function
  P = apollo_prepareProb(P, apollo_inputs, functionality)
  return(P)
}

# ################################################################# #
#### MODEL ESTIMATION                                            ####
# ################################################################# #

### Optional starting values search
# apollo_beta=apollo_searchStart(apollo_beta, apollo_fixed,apollo_probabilities, apollo_inputs)

model=apollo_lcEM(apollo_beta, apollo_fixed, apollo_probabilities, 
                  apollo_inputs, lcEM_settings = list(EMmaxIterations=100)) 
                  
In terms of output, `apollo_validateInputs()` works fine:

Code: Select all

Several observations per individual detected based on the value of responseId. Setting
  panelData in apollo_control set to TRUE.
All checks on apollo_control completed.
All checks on database completed.
But the estimation fails:

Code: Select all

INFORMATION: The use of apollo_lcEM has a number of requirements. No checks are run for these, so the
  user needs to ensure these conditions are met by their model: 
1: This function is only suitable for single component models, i.e. no use of
  apollo_combineModels or manual multiplication of model components.
2: Any parameters that vary across classes need to be included in the definition of random
  parameters in apollo_lcPars.
3: The entries in the lists in apollo_lcPars need to be individual parameters, not functions
  thereof.

Validating inputs of likelihood function (apollo_probabilities)

Error in apollo_estimate(apollo_beta, apollo_fixed, apollo_probabilities,  : 
! SPECIFICATION ISSUE - Parameters asc_opt_1_a, asc_opt_1_b, asc_opt_1_c, asc_opt_1_d, asc_opt_1_e, asc_opt_3_a, asc_opt_3_b, asc_opt_3_c, asc_opt_3_d, asc_opt_3_e, b_oop_1v0_a, b_oop_2v0_a, b_oop_3v0_a, b_oop_1v0_b, b_oop_2v0_b, b_oop_3v0_b, b_oop_1v0_c, b_oop_2v0_c, b_oop_3v0_c, b_oop_1v0_d, b_oop_2v0_d, b_oop_3v0_d, b_oop_1v0_e, b_oop_2v0_e, b_oop_3v0_e, b_premium_1v0_a, b_premium_2v0_a, b_premium_1v0_b, b_premium_2v0_b, b_premium_1v0_c, b_premium_2v0_c, b_premium_1v0_d, b_premium_2v0_d, b_premium_1v0_e, b_premium_2v0_e, b_plan_1v0_a, b_plan_2v0_a, b_plan_1v0_b, b_plan_2v0_b, b_plan_1v0_c, b_plan_2v0_c, b_plan_1v0_d, b_plan_2v0_d, b_plan_1v0_e, b_plan_2v0_e, b_pcpNetwork_1v0_a, b_pcpNetwork_2v0_a, b_pcpNetwork_1v0_b, b_pcpNetwork_2v0_b, b_pcpNetwork_1v0_c, b_pcpNetwork_2v0_c, b_pcpNetwork_1v0_d, b_pcpNetwork_2v0_d, b_pcpNetwork_1v0_e, b_pcpNetwork_2v0_e, b_drugDevice_1v0_a, b_drugDevice_2v0_a, b_drugDevice_1v0_b, b_drugDevice_2v0_b, b_drugDevice_1v0_c, b_drugDevice_2v0_c, b_drugDevice_1v0_d, b_drugDevice_2v0_d, b_drugDevice_1v0_e, b_drugDevice_2v0_e do not influence the log-likelihood of your model!
 
Were there any updates to the newest apollo version 0.3.4 that would be creating an issue with this code? It's a bit confusing because I have not touched any of the model specifications since the last run.

Note that the LC logit model works fine when I remove the length(pi_vals) requirement in the for loop and use regular apollo_estimate()

Re: Error when using apollo_lcEM()

Posted: 04 Dec 2024, 12:47
by stephanehess
Hi

apologies for the slow reply. Could you share your code and data with me outside the forum and I'll have a look?

Thanks

Stephane