Page 1 of 1

NaN values in hybrid choice model

Posted: 05 Mar 2023, 12:29
by sethyash52
Hi Stephane,

I have attached my code and result below; this is a hybrid choice model with two choices. I do not know why I am getting NaN values. Can you please look into it?

Code: Select all

# ################################################################# #
#### LOAD LIBRARY AND DEFINE CORE SETTINGS                       ####
# ################################################################# #

### Clear memory
rm(list = ls())

### Load Apollo library
library(apollo)

### Initialise code
apollo_initialise()

### Set core controls
apollo_control = list(
  modelName       = "Hybrid_with_OL_8",
  modelDescr      = "Hybrid choice model on drug choice data, using ordered measurement model for indicators",
  indivID         = "Person_ID",
  mixing          = TRUE,
  nCores          = 22, 
  outputDirectory = "New"
)

# ################################################################# #
#### LOAD DATA AND APPLY ANY TRANSFORMATIONS                     ####
# ################################################################# #
setwd("")

getwd()

### Loading data from package
### if data is to be loaded from a file (e.g. called data.csv), 
### the code would be: database = read.csv("data.csv",header=TRUE)
database = read.csv("[size=150][size=50][/size][/size]",header=TRUE)
### for data dictionary, use ?apollo_drugChoiceData

# ################################################################# #
#### ANALYSIS OF CHOICES                                         ####
# ################################################################# #

### Illustration of how to use apollo_choiceAnalysis with user-defined alternatives.
### This is useful in cases where the alternatives in the data differ 
### across tasks. The same approach can then also be used with unlabelled data

# choiceAnalysis_settings <- list(
#   alternatives = c(alt1=1, alt2=2),
#   avail        = with(database,
#                       list(Artemis     = brand_1=="Artemis"     | brand_2=="Artemis"    , 
#                            Novum       = brand_1=="Novum"       | brand_2=="Novum"      ,
#                            BestValue   = brand_3=="BestValue"   | brand_4=="BestValue"  ,
#                            Supermarket = brand_3=="Supermarket" | brand_4=="Supermarket",
#                            PainAway    = brand_3=="PainAway"    | brand_4=="PainAway"   )
#   ),
#   choiceVar    = with(database,
#                       11*((best==1 & brand_1=="Artemis"    ) | (best==2 & brand_2=="Artemis"    )) + 
#                         12*((best==1 & brand_1=="Novum"      ) | (best==2 & brand_2=="Novum"      )) + 
#                         21*((best==3 & brand_3=="BestValue"  ) | (best==4 & brand_4=="BestValue"  )) + 
#                         22*((best==3 & brand_3=="Supermarket") | (best==4 & brand_4=="Supermarket")) + 
#                         23*((best==3 & brand_3=="PainAway"   ) | (best==4 & brand_4=="PainAway"   ))),
#   explanators  = database[,c("regular_user","university_educated","over_50")]
# )
# 
# apollo_choiceAnalysis(choiceAnalysis_settings, apollo_control, database)

# ################################################################# #
#### DEFINE MODEL PARAMETERS                                     ####
# ################################################################# #

### Vector of parameters, including any that are kept fixed in estimation
apollo_beta = c(asc_CV               = 0,
                asc_EV               = 0,
                b_pp                 = 0,
                b_oc                 = 0,
                b_ct                 = 0,
                b_r                  = 0,
                b_cf                 = 0,
                b_e                  = 0,
                b_Male               = 0,
                b_female             = 0,
                b_ageAvg             = 0,
                b_dailyDistanceAvg   = 0,
                b_knowledgeLow       = 0,
                b_knowledgeMiddle    = 0,
                b_knowledgeHigh      = 0,
                b_priorityLanes      = 0,
                b_freeParking        = 0,
                b_tollExemption      = 0,  
                lambda               = 1, 
                sigma_eta            = 3,
                zeta_socIma_1        = 1, 
                zeta_socIma_2        = 1, 
                zeta_socIma_3        = 1, 
                zeta_socIma_4        = 1, 
                tau_socIma_1_1       = 0, 
                tau_socIma_1_2       = 2, 
                tau_socIma_1_3       = 3, 
                tau_socIma_1_4       = 4, 
                tau_socIma_2_1       = 0, 
                tau_socIma_2_2       = 2, 
                tau_socIma_2_3       = 3, 
                tau_socIma_2_4       = 4, 
                tau_socIma_3_1       = 0, 
                tau_socIma_3_2       = 2, 
                tau_socIma_3_3       = 3, 
                tau_socIma_3_4       = 4, 
                tau_socIma_4_1       = 0, 
                tau_socIma_4_2       = 2, 
                tau_socIma_4_3       = 3, 
                tau_socIma_4_4       = 4)

### Vector with names (in quotes) of parameters to be kept fixed at their starting value in apollo_beta, use apollo_beta_fixed = c() if none
apollo_fixed = c("asc_CV","b_female","b_knowledgeHigh","b_knowledgeLow","b_priorityLanes","tau_socIma_1_1","tau_socIma_2_1","tau_socIma_3_1","tau_socIma_4_1","zeta_socIma_1")

# ################################################################# #
#### DEFINE RANDOM COMPONENTS                                    ####
# ################################################################# #

### Set parameters for generating draws
apollo_draws = list(
  interDrawsType="halton", 
  interNDraws=500,          
  interUnifDraws=c(),      
  interNormDraws=c("eta"), 
  
  intraDrawsType="",
  intraNDraws=0,          
  intraUnifDraws=c(),     
  intraNormDraws=c()      
)

### Create random parameters
apollo_randCoeff=function(apollo_beta, apollo_inputs){
  randcoeff = list()
  
  randcoeff[["LV"]] = b_Male*genderCoded + b_ageAvg*ageAvg + b_dailyDistanceAvg*dailyDistanceAvg + b_knowledgeLow*knowledgeLow + b_knowledgeMiddle*knowledgeMiddle  + b_knowledgeHigh*knowledgeHigh + b_priorityLanes*priorityLanes + b_freeParking*freeParking + b_tollExemption*tollExemption  + sigma_eta*eta
  
  return(randcoeff)
}

# ################################################################# #
#### GROUP AND VALIDATE INPUTS                                   ####
# ################################################################# #

apollo_inputs = apollo_validateInputs()

# ################################################################# #
#### DEFINE MODEL AND LIKELIHOOD FUNCTION                        ####
# ################################################################# #

apollo_probabilities=function(apollo_beta, apollo_inputs, functionality="estimate"){
  
  ### Attach inputs and detach after function exit
  apollo_attach(apollo_beta, apollo_inputs)
  on.exit(apollo_detach(apollo_beta, apollo_inputs))
  
  ### Create list of probabilities P
  P = list()
  
  ### Likelihood of indicators
  ol_settings1 = list(outcomeOrdered = socIma_1, 
                      V              = zeta_socIma_1*LV, 
                      tau            = list(tau_socIma_1_1, tau_socIma_1_2, tau_socIma_1_3, tau_socIma_1_4),
                      #rows           = (task==1),
                      componentName  = "indic_socIma_1")
  ol_settings2 = list(outcomeOrdered = socIma_2, 
                      V              = zeta_socIma_2*LV, 
                      tau            = list(tau_socIma_2_1, tau_socIma_2_2, tau_socIma_2_3, tau_socIma_2_4), 
                      #rows           = (task==1),
                      componentName  = "indic_socIma_2")
  ol_settings3 = list(outcomeOrdered = socIma_3, 
                      V              = zeta_socIma_3*LV, 
                      tau            = list(tau_socIma_3_1, tau_socIma_3_2, tau_socIma_3_3, tau_socIma_3_4), 
                      #rows           = (task==1),
                      componentName  = "indic_socIma_3")
  ol_settings4 = list(outcomeOrdered = socIma_4, 
                      V              = zeta_socIma_4*LV, 
                      tau            = list(tau_socIma_4_1, tau_socIma_4_2, tau_socIma_4_3, tau_socIma_4_4), 
                      #rows           = (task==1),
                      componentName  = "indic_socIma_4")
  
  P[["indic_socIma_1"]]     = apollo_ol(ol_settings1, functionality)
  P[["indic_socIma_2"]]     = apollo_ol(ol_settings2, functionality)
  P[["indic_socIma_3"]]     = apollo_ol(ol_settings3, functionality)
  P[["indic_socIma_4"]]     = apollo_ol(ol_settings4, functionality)
  
  ### Likelihood of choices
  ### List of utilities: these must use the same names as in mnl_settings, order is irrelevant
  V = list()
  V[["alt1"]] = asc_EV + ( b_pp*(ppEV*incomeAvg*6/10000) + b_oc*ocEV + b_ct*ctEV/60 + b_r*rEV/100 + b_cf*cfEV + b_e*eEV)
  V[["alt2"]] = asc_CV + ( b_pp*(ppCV*incomeAvg*6/10000) + b_oc*ocCV                + b_r*rCV/100             + b_e*eCV + lambda*LV )
  
  
  ### Define settings for MNL model component
  mnl_settings = list(
    alternatives  = c(alt1=1, alt2=2),
    avail         = list(alt1=1, alt2=1),
    choiceVar     = choiceVehicle,
    utilities     = V,
    componentName = "choice"
  )
  
  ### Compute probabilities for MNL model component
  P[["choice"]] = apollo_mnl(mnl_settings, functionality)
  
  ### Likelihood of the whole model
  P = apollo_combineModels(P, apollo_inputs, functionality)
  
  ### Take product across observation for same individual
  P = apollo_panelProd(P, apollo_inputs, functionality)
  
  ### Average across inter-individual draws
  P = apollo_avgInterDraws(P, apollo_inputs, functionality)
  
  ### Prepare and return outputs of function
  P = apollo_prepareProb(P, apollo_inputs, functionality)
  return(P)
}

# ################################################################# #
#### MODEL ESTIMATION                                            ####
# ################################################################# #

### Optional: calculate LL before model estimation
# apollo_llCalc(apollo_beta, apollo_probabilities, apollo_inputs)

### Estimate model
model = apollo_estimate(apollo_beta, apollo_fixed, apollo_probabilities, apollo_inputs, estimate_settings = list(scaleAfterConvergence=FALSE))

# ################################################################# #
#### MODEL OUTPUTS                                               ####
# ################################################################# #

# ----------------------------------------------------------------- #
#---- FORMATTED OUTPUT (TO SCREEN)                               ----
# ----------------------------------------------------------------- #

apollo_modelOutput(model)

# ----------------------------------------------------------------- #
#---- FORMATTED OUTPUT (TO FILE, using model name)               ----
# ----------------------------------------------------------------- #

# apollo_saveOutput(model)
# 
# # ################################################################# #
# ##### POST-PROCESSING                                            ####
# # ################################################################# #
# 
# ### Print outputs of additional diagnostics to new output file (remember to close file writing when complete)
# apollo_sink()
# 
# # ----------------------------------------------------------------- #
# #---- MODEL PREDICTIONS                                          ----
# # ----------------------------------------------------------------- #
# 
# forecast <- apollo_prediction(model, apollo_probabilities, apollo_inputs,
#                               prediction_settings=list(modelComponent="indic_socIma_1"))
# 
# # ----------------------------------------------------------------- #
# #---- CONDITIONALS AND UNCONDITIONALS                            ----
# # ----------------------------------------------------------------- #
# 
# conditionals <- apollo_conditionals(model,apollo_probabilities,apollo_inputs)
# 
# summary(conditionals)
# 
# unconditionals <- apollo_unconditionals(model,apollo_probabilities,apollo_inputs)
# 
# mean(unconditionals[[1]])
# sd(unconditionals[[1]])
# 
# # ----------------------------------------------------------------- #
# #---- switch off writing to file                                 ----
# # ----------------------------------------------------------------- #

#apollo_sink()
Result:

Estimates:
Estimate s.e. t.rat.(0) Rob.s.e. Rob.t.rat.(0)
asc_CV 0.000000 NA NA NA NA
asc_EV -1.025121 0.167326 -6.1265 0.173804 -5.8981
b_pp -0.001716 0.001152 -1.4897 0.001393 -1.2323
b_oc -0.075440 0.019672 -3.8348 0.019860 -3.7986
b_ct -0.037906 0.009120 -4.1565 0.009177 -4.1304
b_r 0.069384 0.021257 3.2641 0.020702 3.3516
b_cf 0.131794 0.070104 1.8800 0.067765 1.9449
b_e -0.212597 0.075244 -2.8254 0.074317 -2.8607
gamma_Male 0.544943 NaN NaN 0.355694 1.5321
gamma_female 0.000000 NA NA NA NA
gamma_ageAvg 0.143203 8.8341e-04 162.1027 0.002240 63.9361
gamma_dailyDistanceAvg 0.023027 NaN NaN 0.002336 9.8560
gamma_knowledgeLow 0.000000 NA NA NA NA
gamma_knowledgeMiddle -0.478236 NaN NaN 0.236279 -2.0240
gamma_knowledgeHigh 0.000000 NA NA NA NA
gamma_priorityLanes 0.000000 NA NA NA NA
gamma_freeParking -0.002232 0.009990 -0.2234 0.003201 -0.6974
gamma_tollExemption -0.026160 0.011973 -2.1849 0.008803 -2.9715
lambda -0.082885 0.010544 -7.8612 0.013505 -6.1373
sigma_eta 3.100390 0.007788 398.0773 0.045471 68.1838
zeta_socIma_1 1.000000 NA NA NA NA
zeta_socIma_2 1.267577 0.019854 63.8439 0.104517 12.1279
zeta_socIma_3 36.832629 NaN NaN 2.094309 17.5870
zeta_socIma_4 0.976893 0.002921 334.4190 0.086863 11.2464
tau_socIma_1_1 0.000000 NA NA NA NA
tau_socIma_1_2 0.863087 0.026476 32.5984 0.165433 5.2171
tau_socIma_1_3 2.866195 NaN NaN 0.289460 9.9019
tau_socIma_1_4 4.793235 NaN NaN 0.351097 13.6522
tau_socIma_2_1 0.000000 NA NA NA NA
tau_socIma_2_2 1.471296 0.074326 19.7953 0.173003 8.5045
tau_socIma_2_3 4.641857 0.110819 41.8867 0.282999 16.4024
tau_socIma_2_4 6.837769 0.132717 51.5215 0.332935 20.5378
tau_socIma_3_1 0.000000 NA NA NA NA
tau_socIma_3_2 69.790572 NaN NaN 5.670822 12.3070
tau_socIma_3_3 137.449843 2.409439 57.0464 7.814254 17.5896
tau_socIma_3_4 179.130207 1.636146 109.4830 6.974380 25.6840
tau_socIma_4_1 0.000000 NA NA NA NA
tau_socIma_4_2 0.835836 0.051060 16.3697 0.101882 8.2039
tau_socIma_4_3 2.868006 0.077597 36.9602 0.160207 17.9019
tau_socIma_4_4 5.102360 0.098138 51.9917 0.201249 25.3535

Re: NaN values in hybrid choice model

Posted: 06 Mar 2023, 06:58
by stephanehess
Hi

there could be many reasons

First, why are you fixing one tau to zero in each ordered model?

Stephane

Re: NaN values in hybrid choice model

Posted: 07 Mar 2023, 10:34
by sethyash52
The reason for fixing tau as zero is that the model could be overspecified. Also, in one of your papers published in 2012 ("Using ordered attitudinal indicators in a latent variable choice model: a study of the impact of security on rail travel behavior"), one tau of each indicator is fixed to zero.

Re: NaN values in hybrid choice model

Posted: 08 Mar 2023, 10:14
by stephanehess
Hi

in that paper, a constant is estimated in the ordered model, which is why one threshold is fixed to zero. That's not the case in yours, so you should estimate K-1 thresholds, where K is the number of levels of your indicator

Stephane