Page 1 of 1

latent class with best-worst data

Posted: 19 Oct 2020, 22:27
by wj5tu
Hi Prof. Hess,

Please ignore my last post regrading "latent class with best-worst data", as I believe the specification is wrong (also attached here). So previously what I have done is, first look at within class probability of best choices, average across all classes, then the within class probability for worst choices, average across classes, lastly combine the best and worst component.

Code: Select all

# 1. Specification 1. --------------------------------------------------------

# ################################################################# #
#### LOAD LIBRARY AND DEFINE CORE SETTINGS                     
# ################################################################# #

### Clear memory
rm(list = ls())

### Load Apollo library
library(apollo)

### Initialise code
apollo_initialise()

### Set core controls
apollo_control = list(
  modelName  = "Apollo_example_23",
  modelDescr = "Best-worst model on drug choice data, latent class",
  indivID    = "ID"
)

# ################################################################# #
#### LOAD DATA AND APPLY ANY TRANSFORMATIONS                  
# ################################################################# #

database = read.csv("apollo_drugChoiceData.csv",header=TRUE)

# ################################################################# #
#### DEFINE MODEL PARAMETERS                                   
# ################################################################# #

### Vector of parameters, including any that are kept fixed in estimation
apollo_beta = c(b_risk_a   = 0, 
                b_price_a  = 0,
                b_risk_b   = 0, 
                b_price_b  = 0,
                delta_a    = 0.03,
                delta_b    = 0,
                mu_worst   = 1)

### Vector with names (in quotes) of parameters to be kept fixed at their starting value in apollo_beta, use apollo_beta_fixed = c() if none
apollo_fixed = c("delta_b")

# ################################################################# #
#### DEFINE LATENT CLASS COMPONENTS                             
# ################################################################# #

apollo_lcPars=function(apollo_beta, apollo_inputs){
  lcpars = list()
  
  lcpars[["b_risk"]]  = list(b_risk_a, b_risk_b)
  lcpars[["b_price"]] = list(b_price_a, b_price_b)
  
  V=list()
  V[["class_a"]] = delta_a
  V[["class_b"]] = delta_b
  
  mnl_settings = list(
    alternatives = c(class_a=1, class_b=2), 
    avail        = 1, 
    choiceVar    = NA, 
    V            = V
  )
  
  lcpars[["pi_values"]] = apollo_mnl(mnl_settings, functionality="raw")
  
  lcpars[["pi_values"]] = apollo_firstRow(lcpars[["pi_values"]], apollo_inputs)
  
  return(lcpars)
}

# ################################################################# #
#### GROUP AND VALIDATE INPUTS                                 
# ################################################################# #

apollo_inputs = apollo_validateInputs()

# ################################################################# #
#### DEFINE MODEL AND LIKELIHOOD FUNCTION                       
# ################################################################# #

apollo_probabilities=function(apollo_beta, apollo_inputs, functionality="estimate"){
  
  ### Attach inputs and detach after function exit
  apollo_attach(apollo_beta, apollo_inputs)
  on.exit(apollo_detach(apollo_beta, apollo_inputs))
  
  ### Create list of probabilities P
  P = list()
  
  ### Define settings for MNL model component that are generic across classes
  mnl_settings = list(
    alternatives = c(alt1=1, alt2=2, alt3=3, alt4=4)
  )
  
  P_best = list() 
  
  ### Loop over classes
  s=1
  while(s<=2){
    
    ### Compute class-specific utilities
    V=list()
    V[['alt1']]  = (b_risk[[s]] * side_effects_1 + b_price[[s]] * price_1)
    V[['alt2']]  = (b_risk[[s]] * side_effects_2 + b_price[[s]] * price_2)
    V[['alt3']]  = (b_risk[[s]] * side_effects_3 + b_price[[s]] * price_3)
    V[['alt4']]  = (b_risk[[s]] * side_effects_4 + b_price[[s]] * price_4)
    
    ### Compute probabilities for 'best' choice using MNL model
    mnl_settings$avail = list(alt1=1, alt2=1, alt3=1, alt4=1)
    mnl_settings$choiceVar = best
    mnl_settings$V = V
    mnl_settings$componentName = paste0("Class_",s)
    
    ### Compute within-class choice probabilities using MNL model
    P_best[[paste0("Class_",s)]] = apollo_mnl(mnl_settings, functionality)
    
    ### Take product across observation for same individual
    P_best[[paste0("Class_",s)]] = apollo_panelProd(P_best[[paste0("Class_",s)]], apollo_inputs ,functionality)
    
    s=s+1
  }
  
  ### Compute latent class model probabilities
  lc_settings   = list(inClassProb = P_best, classProb = pi_values)
  P[["best"]]  = apollo_lc(lc_settings, apollo_inputs, functionality)
  

  P_worst = list()
  
  ### Loop over classes
  s=1
  while(s<=2){
    
    ### Compute class-specific utilities
    V=list()
    V[['alt1']]  = (b_risk[[s]] * side_effects_1 + b_price[[s]] * price_1)
    V[['alt2']]  = (b_risk[[s]] * side_effects_2 + b_price[[s]] * price_2)
    V[['alt3']]  = (b_risk[[s]] * side_effects_3 + b_price[[s]] * price_3)
    V[['alt4']]  = (b_risk[[s]] * side_effects_4 + b_price[[s]] * price_4)
    
    ### Compute probabilities for 'worst' choice using MNL model
    mnl_settings$avail        = list(alt1=(best!=1), alt2=(best!=2), alt3=(best!=3), alt4=(best!=4))
    mnl_settings$choiceVar    = worst
    mnl_settings$V            = lapply(V,"*",-mu_worst)
    mnl_settings$componentName = paste0("Class_",s)
    
    ### Compute within-class choice probabilities using MNL model
    P_worst[[paste0("Class_",s)]] = apollo_mnl(mnl_settings, functionality)
    
    ### Take product across observation for same individual
    P_worst[[paste0("Class_",s)]] = apollo_panelProd(P_worst[[paste0("Class_",s)]], apollo_inputs ,functionality)
    
    
    s=s+1
  }
  
  ### Compute latent class model probabilities
  lc_settings   = list(inClassProb = P_worst, classProb = pi_values)
  P[["worst"]]  = apollo_lc(lc_settings, apollo_inputs, functionality)
  

  ### Likelihood of the whole model
  P = apollo_combineModels(P, apollo_inputs, functionality)
  
  ### Prepare and return outputs of function
  P = apollo_prepareProb(P, apollo_inputs, functionality)
  return(P)
  
}


# ################################################################# #
#### MODEL ESTIMATION                                           
# ################################################################# #

# apollo_beta=apollo_searchStart(apollo_beta, apollo_fixed,apollo_probabilities, apollo_inputs)
  
model = apollo_estimate(apollo_beta, apollo_fixed, apollo_probabilities, apollo_inputs)
  
# ################################################################# #
#### MODEL OUTPUTS                                              
# ################################################################# #

# ----------------------------------------------------------------- #
#---- FORMATTED OUTPUT (TO SCREEN)                              
# ----------------------------------------------------------------- #

apollo_modelOutput(model)

# ----------------------------------------------------------------- #
#---- FORMATTED OUTPUT (TO FILE, using model name)             
# ----------------------------------------------------------------- #

apollo_saveOutput(model)
I revised my specification (see attached code for specification 2 below), first taking the product of conditional probability for all choices for an individual, then average across all classes. However, I got "Error in sum(inClassProb[[c]]) : invalid 'type' (list) of argument". I suspect the error is potentially from apollo_combineModels().

Code: Select all

# ################################################################# #
#### LOAD LIBRARY AND DEFINE CORE SETTINGS                     
# ################################################################# #

### Clear memory
rm(list = ls())

### Load Apollo library
library(apollo)

### Initialise code
apollo_initialise()

### Set core controls
apollo_control = list(
  modelName  = "Apollo_example_23",
  modelDescr = "Best-worst model on drug choice data, latent class",
  indivID    = "ID"
)

# ################################################################# #
#### LOAD DATA AND APPLY ANY TRANSFORMATIONS                  
# ################################################################# #

database = read.csv("apollo_drugChoiceData.csv",header=TRUE)

# ################################################################# #
#### DEFINE MODEL PARAMETERS                                   
# ################################################################# #

### Vector of parameters, including any that are kept fixed in estimation
apollo_beta = c(b_risk_a   = 0, 
                b_price_a  = 0,
                b_risk_b   = 0, 
                b_price_b  = 0,
                delta_a    = 0.03,
                delta_b    = 0,
                mu_worst   = 1)

### Vector with names (in quotes) of parameters to be kept fixed at their starting value in apollo_beta, use apollo_beta_fixed = c() if none
apollo_fixed = c("delta_b")

# ################################################################# #
#### DEFINE LATENT CLASS COMPONENTS                             
# ################################################################# #

apollo_lcPars=function(apollo_beta, apollo_inputs){
  lcpars = list()
  
  lcpars[["b_risk"]]  = list(b_risk_a, b_risk_b)
  lcpars[["b_price"]] = list(b_price_a, b_price_b)
  
  V=list()
  V[["class_a"]] = delta_a
  V[["class_b"]] = delta_b
  
  mnl_settings = list(
    alternatives = c(class_a=1, class_b=2), 
    avail        = 1, 
    choiceVar    = NA, 
    V            = V
  )
  
  lcpars[["pi_values"]] = apollo_mnl(mnl_settings, functionality="raw")
  
  lcpars[["pi_values"]] = apollo_firstRow(lcpars[["pi_values"]], apollo_inputs)
  
  return(lcpars)
}

# ################################################################# #
#### GROUP AND VALIDATE INPUTS                                 
# ################################################################# #

apollo_inputs = apollo_validateInputs()

# ################################################################# #
#### DEFINE MODEL AND LIKELIHOOD FUNCTION                       
# ################################################################# #

apollo_probabilities=function(apollo_beta, apollo_inputs, functionality="estimate"){
  
  ### Attach inputs and detach after function exit
  apollo_attach(apollo_beta, apollo_inputs)
  on.exit(apollo_detach(apollo_beta, apollo_inputs))
  
  ### Create list of probabilities P
  P = list()
  P_bw = list()
  
  ### Define settings for MNL model component that are generic across classes
  mnl_settings = list(
    alternatives = c(alt1=1, alt2=2, alt3=3, alt4=4)
  )
  
  
  ### Loop over classes
  s=1
  while(s<=2){
    
    ### Compute class-specific utilities
    V=list()
    V[['alt1']]  = (b_risk[[s]] * side_effects_1 + b_price[[s]] * price_1)
    V[['alt2']]  = (b_risk[[s]] * side_effects_2 + b_price[[s]] * price_2)
    V[['alt3']]  = (b_risk[[s]] * side_effects_3 + b_price[[s]] * price_3)
    V[['alt4']]  = (b_risk[[s]] * side_effects_4 + b_price[[s]] * price_4)
    
    ### Compute probabilities for 'best' choice using MNL model
    mnl_settings$avail = list(alt1=1, alt2=1, alt3=1, alt4=1)
    mnl_settings$choiceVar = best
    mnl_settings$V = V
    mnl_settings$componentName = paste0("Class_",s)
    
    ### Compute within-class choice probabilities using MNL model
    P_bw[["best"]] = apollo_mnl(mnl_settings, functionality)
    
    ### Take product across observation for same individual
    P_bw[["best"]] = apollo_panelProd(P_bw[["best"]], apollo_inputs ,functionality)
    
    
    ### Compute probabilities for 'worst' choice using MNL model
    mnl_settings$avail        = list(alt1=(best!=1), alt2=(best!=2), alt3=(best!=3), alt4=(best!=4))
    mnl_settings$choiceVar    = worst
    mnl_settings$V            = lapply(V,"*",-mu_worst)
    mnl_settings$componentName = paste0("Class_",s)
    
    ### Compute within-class choice probabilities using MNL model
    P_bw[["worst"]] = apollo_mnl(mnl_settings, functionality)
    
    ### Take product across observation for same individual
    P_bw[["worst"]] = apollo_panelProd(P_bw[["worst"]], apollo_inputs ,functionality)
    
    P[[paste0("Class_",s)]] = apollo_combineModels(P_bw, apollo_inputs, functionality)
     
    s=s+1
  }
  
  ### Compute latent class model probabilities
  lc_settings   = list(inClassProb = P, classProb = pi_values)
  P[["model"]]  = apollo_lc(lc_settings, apollo_inputs, functionality)
  
  ### Prepare and return outputs of function
  P = apollo_prepareProb(P, apollo_inputs, functionality)
  
  return(P)
  
}


# ################################################################# #
#### MODEL ESTIMATION                                           
# ################################################################# #

# apollo_beta=apollo_searchStart(apollo_beta, apollo_fixed,apollo_probabilities, apollo_inputs)

model = apollo_estimate(apollo_beta, apollo_fixed, apollo_probabilities, apollo_inputs)

# ################################################################# #
#### MODEL OUTPUTS                                              
# ################################################################# #

# ----------------------------------------------------------------- #
#---- FORMATTED OUTPUT (TO SCREEN)                              
# ----------------------------------------------------------------- #

apollo_modelOutput(model)

# ----------------------------------------------------------------- #
#---- FORMATTED OUTPUT (TO FILE, using model name)             
# ----------------------------------------------------------------- #

apollo_saveOutput(model)
I finally figure out how to implement this in apollo, by reshaping the dataset to long format (just stack best and worst choices), so there is no need to combine. There is a minor issue, though, I got the error " Error in rowsum.default(log(P), group = indivID) : incorrect length for 'group' ". Somehow this error can be solved when I reload the data. I am not sure this error is because of tidyr or apollo.

Code: Select all

# ################################################################# #
#### LOAD LIBRARY AND DEFINE CORE SETTINGS                     
# ################################################################# #

### Clear memory
rm(list = ls())

### Load Apollo library
library(apollo)
library(dplyr)
library(tidyr)

### Initialise code
apollo_initialise()

### Set core controls
apollo_control = list(
  modelName  = "Apollo_example_23",
  modelDescr = "Best-worst model on drug choice data, latent class",
  indivID    = "ID"
)

# ################################################################# #
#### LOAD DATA AND APPLY ANY TRANSFORMATIONS                  
# ################################################################# #

# from wide to long format
database <- read.csv("apollo_drugChoiceData.csv",header=TRUE) %>%
  pivot_longer(cols = c(best, worst), names_to = "bw", values_to = "choice") %>%
  mutate(av_worst = ifelse(bw == "best", choice, lag(choice))) # av_worst represents the best alternative chosen by respondent n in choice scenario t; it is used for identifying alternative availability condition for the worst choice scenario

# must reload the new data, otherwise "Error in rowsum.default(log(P), group = indivID) : incorrect length for 'group' "
write.csv(database, file = "newdata.csv")
database <- read.csv("newdata.csv",header=TRUE)

# ################################################################# #
#### DEFINE MODEL PARAMETERS                                   
# ################################################################# #

### Vector of parameters, including any that are kept fixed in estimation
apollo_beta = c(b_risk_a   = 0, 
                b_price_a  = 0,
                b_risk_b   = 0, 
                b_price_b  = 0,
                delta_a    = 0.03,
                delta_b    = 0,
                mu_worst   = 1)

### Vector with names (in quotes) of parameters to be kept fixed at their starting value in apollo_beta, use apollo_beta_fixed = c() if none
apollo_fixed = c("delta_b")

# ################################################################# #
#### DEFINE LATENT CLASS COMPONENTS                             
# ################################################################# #

apollo_lcPars=function(apollo_beta, apollo_inputs){
  lcpars = list()
  
  lcpars[["b_risk"]]  = list(b_risk_a, b_risk_b)
  lcpars[["b_price"]] = list(b_price_a, b_price_b)
  
  V=list()
  V[["class_a"]] = delta_a
  V[["class_b"]] = delta_b
  
  mnl_settings = list(
    alternatives = c(class_a=1, class_b=2), 
    avail        = 1, 
    choiceVar    = NA, 
    V            = V
  )
  
  lcpars[["pi_values"]] = apollo_mnl(mnl_settings, functionality="raw")
  
  lcpars[["pi_values"]] = apollo_firstRow(lcpars[["pi_values"]], apollo_inputs)
  
  return(lcpars)
}

# ################################################################# #
#### GROUP AND VALIDATE INPUTS                                 
# ################################################################# #

apollo_inputs = apollo_validateInputs()

# ################################################################# #
#### DEFINE MODEL AND LIKELIHOOD FUNCTION                       
# ################################################################# #

apollo_probabilities=function(apollo_beta, apollo_inputs, functionality="estimate"){
  
  ### Attach inputs and detach after function exit
  apollo_attach(apollo_beta, apollo_inputs)
  on.exit(apollo_detach(apollo_beta, apollo_inputs))
  
  ### Create list of probabilities P
  P = list()

  ### Loop over classes
  s=1
  while(s<=2){
    
    ### Compute class-specific utilities
    V=list()
    V[['alt1']]  = (b_risk[[s]] * side_effects_1 + b_price[[s]] * price_1)
    V[['alt2']]  = (b_risk[[s]] * side_effects_2 + b_price[[s]] * price_2)
    V[['alt3']]  = (b_risk[[s]] * side_effects_3 + b_price[[s]] * price_3)
    V[['alt4']]  = (b_risk[[s]] * side_effects_4 + b_price[[s]] * price_4)
    
    # ### Compute probabilities for 'best' choice using MNL model
    mnl_settings = list(
      alternatives = c(alt1 = 1, alt2=2, alt3=3, alt4=4),
      avail        = list(alt1 = (bw == "best") + (bw == "worst") * (av_worst != 1),
                          alt2 = (bw == "best") + (bw == "worst") * (av_worst != 2),
                          alt3 = (bw == "best") + (bw == "worst") * (av_worst != 3),
                          alt4 = (bw == "best") + (bw == "worst") * (av_worst != 4)),
      choiceVar    = choice,
      V            = lapply(V, "*", 1 * (bw == "best") - mu_worst * (bw == "worst")),
      componentName = paste0("Class_",s)
    )
    
    ### Compute within-class choice probabilities using MNL model
    P[[paste0("Class_",s)]] = apollo_mnl(mnl_settings, functionality)
    
    ### Take product across observation for same individual
    P[[paste0("Class_",s)]] = apollo_panelProd(P[[paste0("Class_",s)]], apollo_inputs ,functionality)
    
    s=s+1
  }
  
  ### Compute latent class model probabilities
  lc_settings   = list(inClassProb = P, classProb = pi_values)
  P[["model"]]  = apollo_lc(lc_settings, apollo_inputs, functionality)
  
  ### Prepare and return outputs of function
  P = apollo_prepareProb(P, apollo_inputs, functionality)
  
  return(P)
  
}


# ################################################################# #
#### MODEL ESTIMATION                                           
# ################################################################# #

# apollo_beta=apollo_searchStart(apollo_beta, apollo_fixed,apollo_probabilities, apollo_inputs)

model = apollo_estimate(apollo_beta, apollo_fixed, apollo_probabilities, apollo_inputs)

# ################################################################# #
#### MODEL OUTPUTS                                              
# ################################################################# #

# ----------------------------------------------------------------- #
#---- FORMATTED OUTPUT (TO SCREEN)                              
# ----------------------------------------------------------------- #

apollo_modelOutput(model)

# ----------------------------------------------------------------- #
#---- FORMATTED OUTPUT (TO FILE, using model name)             
# ----------------------------------------------------------------- #

apollo_saveOutput(model)

Re: latent class with best-worst data

Posted: 21 Oct 2020, 18:28
by stephanehess
Hi

sorry for the slow reply. Yes, using combineModels inside LC will cause some issues as combineModels keeps all the individual components, not just the product. I can't guarantee whether this will work, but can you try replacing

Code: Select all

P[[paste0("Class_",s)]] = apollo_combineModels(P_bw, apollo_inputs, functionality)
by

Code: Select all

P[[paste0("Class_",s)]] = apollo_combineModels(P_bw, apollo_inputs, functionality)$model
Please let us know if this works

Stephane

Re: latent class with best-worst data

Posted: 21 Oct 2020, 21:08
by wj5tu
Hi Prof. Hess,

Thanks! it works! So there is no need to transform the dataset.

By the way, the result is the same as when I transform the dataset. The only difference is BIC value, as after transforming the dataset, the number of observations doubles.

Best,
Wenjian

Re: latent class with best-worst data

Posted: 09 Nov 2020, 17:58
by stephanehess
Hi Wenjian

thanks for this, you found a bug. The number of observations in this case is not correct as Apollo uses one observation per row, despite you using apollo_combineModels. We will fix this in the next version

Stephane