Socio-demographics and effect size
Posted: 26 Oct 2023, 10:50
Hi,
I am analysing a mixed logit model of our discrete choice model and have two questions. (I included the code below):
1. Adding socio-demographics: I want to add Age & Gender. However I only have categorial variable for age. Do I need to specify the categories? I defined the socio demographics also in the model parameters (apollo_beta) and then included them in the list of utilities. Is this the correct approach? It worked, however, if I included the socio-demographics also in the stauts quo (V[['sq']]), I only got the estimate coefficient but otherwise (s.e., t.rat., p...) just NA.
2. Interpretation of results: Are the results standardized, so I can directly compare the different variables? And how can I analyse the effect size?
Thank you so much in advance!
Mirjam
#Set WD
### Load Apollo library
library(apollo)
library(dplyr)
### Initialise code
apollo_initialise()
### Set core controls
apollo_control = list(
modelName = "commute_dce_mlogit_all commuters",
modelDescr = "Mixed Logit model on carpooling data, commuting",
mixing = TRUE,
indivID = "m",
nCores = 5,
outputDirectory = "output_mixedlogit_commuting_NEU"
)
# ################################################################# #
#### LOAD DATA AND APPLY ANY TRANSFORMATIONS ####
# ################################################################# #
#Transform variables
database = new_df_commuting
#database = subset(new_df_commuting, comm_level == 2)
### option 1 and SQ
database <- mutate(database, cost_SQ = ifelse(att2_3 == 0, 1, 0))
database <- mutate(database, cost_alt1 = ifelse(att2_1 == 1, 0.5, ifelse(att2_1 == 2, 0.36, ifelse(att2_1 == 3, 0.08, 0))))
database <- mutate(database, strong_SQ = ifelse(att6_3 == 0, 1, 0))
database <- mutate(database, medium_alt1 = ifelse(att6_1 == 1, 1, 0))
database <- mutate(database, weak_alt1 = ifelse(att6_1 == 2, 1, 0))
database$traffic_alt1[database$medium_alt1 == 1] <- 2
database$traffic_alt1[database$weak_alt1 == 1] <- 1
database$traffic_sq[database$strong_SQ == 1] <- 3
#### option 2
database <- mutate(database, cost_alt2 = ifelse(att2_2 == 1, 0.5, ifelse(att2_2 == 2, 0.36, ifelse(att2_2 == 3, 0.08, 0))))
database <- mutate(database, medium_alt2 = ifelse(att6_2 == 1, 1, 0))
database <- mutate(database, weak_alt2 = ifelse(att6_2 == 2, 1, 0))
database$traffic_alt2[database$medium_alt2 == 1] <- 2
database$traffic_alt2[database$weak_alt2 == 1] <- 1
# ################################################################# #
#### DEFINE MODEL PARAMETERS ####
# ################################################################# #
### Vector of parameters, including any that are kept fixed in estimation
apollo_beta=c(m_asc_sq = 0,
s_asc_sq = 0,
m_passenger = 0,
s_passenger = 0,
m_cost = 0,
s_cost = 0,
m_known = 0,
s_known = 0,
m_unknown = 0,
s_unknown = 0,
m_stranger = 0,
s_stranger = 0,
m_freeparking = 0,
s_freeparking = 0,
m_points = 0,
s_points = 0,
m_freeundpoints = 0,
s_freeundpoints = 0,
m_res = 0,
s_res = 0,
m_traffic = 0,
s_traffic = 0,
GENDER = 0,
AGE = 0)
### Vector with names (in quotes) of parameters to be kept fixed at their starting value in apollo_beta, use apollo_beta_fixed = c() if none
apollo_fixed = c("m_known", "s_known", "m_freeparking", "s_freeparking") #Reference categories/values
# ################################################################# #
#### DEFINE RANDOM COMPONENTS ####
# ################################################################# #
### Set parameters for generating draws
apollo_draws = list(
interDrawsType = "mlhs",
interNDraws = 1000,
interUnifDraws = c(),
interNormDraws = c("draws_asc_sq", "draws_passenger", "draws_cost", "draws_known", "draws_unknown", "draws_stranger", "draws_points", "draws_freeparking", "draws_freeundpoints", "draws_res", "draws_traffic"),
intraDrawsType = "mlhs",
intraNDraws = 0,
intraUnifDraws = c(),
intraNormDraws = c()
)
### Create random parameters
apollo_randCoeff = function(apollo_beta, apollo_inputs){
randcoeff = list()
randcoeff[["b_asc_sq"]] = m_asc_sq + s_asc_sq * draws_asc_sq
randcoeff[["b_passenger"]] = m_passenger + s_passenger * draws_passenger
randcoeff[["b_cost"]] = m_cost + s_cost* draws_cost
randcoeff[["b_known"]] = m_known + s_known * draws_known
randcoeff[["b_unknown"]] = m_unknown + s_unknown * draws_unknown
randcoeff[["b_stranger"]] = m_stranger + s_stranger * draws_stranger
randcoeff[["b_points"]] = m_points + s_points * draws_points
randcoeff[["b_freeparking"]] = m_freeparking + s_freeparking * draws_freeparking
randcoeff[["b_freeundpoints"]] = m_freeundpoints + s_freeundpoints * draws_freeundpoints
randcoeff[["b_res"]] = m_res + s_res * draws_res
randcoeff[["b_traffic"]] = m_traffic + s_traffic * draws_traffic
return(randcoeff)
}
# ################################################################# #
#### GROUP AND VALIDATE INPUTS ####
# ################################################################# #
apollo_inputs = apollo_validateInputs()
# ################################################################# #
#### DEFINE MODEL AND LIKELIHOOD FUNCTION ####
# ################################################################# #
apollo_probabilities = function(apollo_beta, apollo_inputs, functionality = "estimate"){
### Attach inputs and detach after function exit
apollo_attach(apollo_beta, apollo_inputs)
on.exit(apollo_detach(apollo_beta, apollo_inputs))
### Create list of probabilities P
P = list()
### List of utilities: these must use the same names as in mnl_settings, order is irrelevant
V = list()
V[['alt1']] = b_passenger*(att1_1 == 1) + b_cost*cost_alt1 + b_known*(att3_1 == 1) + b_unknown*(att3_1 == 2) + b_stranger*(att3_1 == 3) + b_freeparking*(att4_1 == 1) + b_points*(att4_1 == 2) + b_freeundpoints*(att4_1 == 3) + b_res*(att5_1 == 1) + b_traffic*traffic_alt1 + AGE*(Q2) + GENDER*(Q1)
V[['alt2']] = b_passenger*(att1_2 == 1) + b_cost*cost_alt2 + b_known*(att3_2 == 1) + b_unknown*(att3_2 == 2) + b_stranger*(att3_2 == 3) + b_freeparking*(att4_2 == 1) + b_points*(att4_2 == 2) + b_freeundpoints*(att4_2 == 3) + b_res*(att5_2 == 1) + b_traffic*traffic_alt2 + AGE*(Q2) + GENDER*(Q1)
V[['sq']] = b_asc_sq + b_cost*cost_SQ
### Define settings for MNL model component
mnl_settings = list(
alternatives = c(alt1 = 1, alt2 = 2, sq = 3),
avail = 1,
choiceVar = choice,
V = V
)
### Compute probabilities using MNL model
P[['model']] = apollo_mnl(mnl_settings, functionality)
### Take product across observation for same individual
P = apollo_panelProd(P, apollo_inputs, functionality)
### Average across inter-individual draws within classes
P = apollo_avgInterDraws(P, apollo_inputs, functionality)
### Prepare and return outputs of function
P = apollo_prepareProb(P, apollo_inputs, functionality)
return(P)
}
# ################################################################# #
#### MODEL ESTIMATION ####
# ################################################################# #
model = apollo_estimate(apollo_beta, apollo_fixed, apollo_probabilities, apollo_inputs, estimate_settings = list(writeIter = F))
# ################################################################# #
#### MODEL OUTPUTS ####
# ################################################################# #
# ----------------------------------------------------------------- #
#---- FORMATTED OUTPUT (TO SCREEN) ----
# ----------------------------------------------------------------- #
apollo_modelOutput(model)
# ----------------------------------------------------------------- #
#---- FORMATTED OUTPUT (TO FILE, using model name) ----
# ----------------------------------------------------------------- #
apollo_saveOutput(model, saveOutput_settings = list(printPVal = 2))
I am analysing a mixed logit model of our discrete choice model and have two questions. (I included the code below):
1. Adding socio-demographics: I want to add Age & Gender. However I only have categorial variable for age. Do I need to specify the categories? I defined the socio demographics also in the model parameters (apollo_beta) and then included them in the list of utilities. Is this the correct approach? It worked, however, if I included the socio-demographics also in the stauts quo (V[['sq']]), I only got the estimate coefficient but otherwise (s.e., t.rat., p...) just NA.
2. Interpretation of results: Are the results standardized, so I can directly compare the different variables? And how can I analyse the effect size?
Thank you so much in advance!
Mirjam
#Set WD
### Load Apollo library
library(apollo)
library(dplyr)
### Initialise code
apollo_initialise()
### Set core controls
apollo_control = list(
modelName = "commute_dce_mlogit_all commuters",
modelDescr = "Mixed Logit model on carpooling data, commuting",
mixing = TRUE,
indivID = "m",
nCores = 5,
outputDirectory = "output_mixedlogit_commuting_NEU"
)
# ################################################################# #
#### LOAD DATA AND APPLY ANY TRANSFORMATIONS ####
# ################################################################# #
#Transform variables
database = new_df_commuting
#database = subset(new_df_commuting, comm_level == 2)
### option 1 and SQ
database <- mutate(database, cost_SQ = ifelse(att2_3 == 0, 1, 0))
database <- mutate(database, cost_alt1 = ifelse(att2_1 == 1, 0.5, ifelse(att2_1 == 2, 0.36, ifelse(att2_1 == 3, 0.08, 0))))
database <- mutate(database, strong_SQ = ifelse(att6_3 == 0, 1, 0))
database <- mutate(database, medium_alt1 = ifelse(att6_1 == 1, 1, 0))
database <- mutate(database, weak_alt1 = ifelse(att6_1 == 2, 1, 0))
database$traffic_alt1[database$medium_alt1 == 1] <- 2
database$traffic_alt1[database$weak_alt1 == 1] <- 1
database$traffic_sq[database$strong_SQ == 1] <- 3
#### option 2
database <- mutate(database, cost_alt2 = ifelse(att2_2 == 1, 0.5, ifelse(att2_2 == 2, 0.36, ifelse(att2_2 == 3, 0.08, 0))))
database <- mutate(database, medium_alt2 = ifelse(att6_2 == 1, 1, 0))
database <- mutate(database, weak_alt2 = ifelse(att6_2 == 2, 1, 0))
database$traffic_alt2[database$medium_alt2 == 1] <- 2
database$traffic_alt2[database$weak_alt2 == 1] <- 1
# ################################################################# #
#### DEFINE MODEL PARAMETERS ####
# ################################################################# #
### Vector of parameters, including any that are kept fixed in estimation
apollo_beta=c(m_asc_sq = 0,
s_asc_sq = 0,
m_passenger = 0,
s_passenger = 0,
m_cost = 0,
s_cost = 0,
m_known = 0,
s_known = 0,
m_unknown = 0,
s_unknown = 0,
m_stranger = 0,
s_stranger = 0,
m_freeparking = 0,
s_freeparking = 0,
m_points = 0,
s_points = 0,
m_freeundpoints = 0,
s_freeundpoints = 0,
m_res = 0,
s_res = 0,
m_traffic = 0,
s_traffic = 0,
GENDER = 0,
AGE = 0)
### Vector with names (in quotes) of parameters to be kept fixed at their starting value in apollo_beta, use apollo_beta_fixed = c() if none
apollo_fixed = c("m_known", "s_known", "m_freeparking", "s_freeparking") #Reference categories/values
# ################################################################# #
#### DEFINE RANDOM COMPONENTS ####
# ################################################################# #
### Set parameters for generating draws
apollo_draws = list(
interDrawsType = "mlhs",
interNDraws = 1000,
interUnifDraws = c(),
interNormDraws = c("draws_asc_sq", "draws_passenger", "draws_cost", "draws_known", "draws_unknown", "draws_stranger", "draws_points", "draws_freeparking", "draws_freeundpoints", "draws_res", "draws_traffic"),
intraDrawsType = "mlhs",
intraNDraws = 0,
intraUnifDraws = c(),
intraNormDraws = c()
)
### Create random parameters
apollo_randCoeff = function(apollo_beta, apollo_inputs){
randcoeff = list()
randcoeff[["b_asc_sq"]] = m_asc_sq + s_asc_sq * draws_asc_sq
randcoeff[["b_passenger"]] = m_passenger + s_passenger * draws_passenger
randcoeff[["b_cost"]] = m_cost + s_cost* draws_cost
randcoeff[["b_known"]] = m_known + s_known * draws_known
randcoeff[["b_unknown"]] = m_unknown + s_unknown * draws_unknown
randcoeff[["b_stranger"]] = m_stranger + s_stranger * draws_stranger
randcoeff[["b_points"]] = m_points + s_points * draws_points
randcoeff[["b_freeparking"]] = m_freeparking + s_freeparking * draws_freeparking
randcoeff[["b_freeundpoints"]] = m_freeundpoints + s_freeundpoints * draws_freeundpoints
randcoeff[["b_res"]] = m_res + s_res * draws_res
randcoeff[["b_traffic"]] = m_traffic + s_traffic * draws_traffic
return(randcoeff)
}
# ################################################################# #
#### GROUP AND VALIDATE INPUTS ####
# ################################################################# #
apollo_inputs = apollo_validateInputs()
# ################################################################# #
#### DEFINE MODEL AND LIKELIHOOD FUNCTION ####
# ################################################################# #
apollo_probabilities = function(apollo_beta, apollo_inputs, functionality = "estimate"){
### Attach inputs and detach after function exit
apollo_attach(apollo_beta, apollo_inputs)
on.exit(apollo_detach(apollo_beta, apollo_inputs))
### Create list of probabilities P
P = list()
### List of utilities: these must use the same names as in mnl_settings, order is irrelevant
V = list()
V[['alt1']] = b_passenger*(att1_1 == 1) + b_cost*cost_alt1 + b_known*(att3_1 == 1) + b_unknown*(att3_1 == 2) + b_stranger*(att3_1 == 3) + b_freeparking*(att4_1 == 1) + b_points*(att4_1 == 2) + b_freeundpoints*(att4_1 == 3) + b_res*(att5_1 == 1) + b_traffic*traffic_alt1 + AGE*(Q2) + GENDER*(Q1)
V[['alt2']] = b_passenger*(att1_2 == 1) + b_cost*cost_alt2 + b_known*(att3_2 == 1) + b_unknown*(att3_2 == 2) + b_stranger*(att3_2 == 3) + b_freeparking*(att4_2 == 1) + b_points*(att4_2 == 2) + b_freeundpoints*(att4_2 == 3) + b_res*(att5_2 == 1) + b_traffic*traffic_alt2 + AGE*(Q2) + GENDER*(Q1)
V[['sq']] = b_asc_sq + b_cost*cost_SQ
### Define settings for MNL model component
mnl_settings = list(
alternatives = c(alt1 = 1, alt2 = 2, sq = 3),
avail = 1,
choiceVar = choice,
V = V
)
### Compute probabilities using MNL model
P[['model']] = apollo_mnl(mnl_settings, functionality)
### Take product across observation for same individual
P = apollo_panelProd(P, apollo_inputs, functionality)
### Average across inter-individual draws within classes
P = apollo_avgInterDraws(P, apollo_inputs, functionality)
### Prepare and return outputs of function
P = apollo_prepareProb(P, apollo_inputs, functionality)
return(P)
}
# ################################################################# #
#### MODEL ESTIMATION ####
# ################################################################# #
model = apollo_estimate(apollo_beta, apollo_fixed, apollo_probabilities, apollo_inputs, estimate_settings = list(writeIter = F))
# ################################################################# #
#### MODEL OUTPUTS ####
# ################################################################# #
# ----------------------------------------------------------------- #
#---- FORMATTED OUTPUT (TO SCREEN) ----
# ----------------------------------------------------------------- #
apollo_modelOutput(model)
# ----------------------------------------------------------------- #
#---- FORMATTED OUTPUT (TO FILE, using model name) ----
# ----------------------------------------------------------------- #
apollo_saveOutput(model, saveOutput_settings = list(printPVal = 2))