Page 1 of 1

Dynamic ICLV Code Generation

Posted: 14 Apr 2025, 11:57
by kemakino
Hi all,

I have been trying to dynamically generate Apollo code for ICLV model estimation to make it easier to test multiple model specifications (e.g., number of choices), but it fails with an error.

Assume that we have the following independent variables: age (18-34, 35-54*, 55-), gender (men*, women), and income (-49,999, 50,000-99,999*, 100,000-149,999, 150,000-) (* stands for the base level), three latent variables, 10 indicators.

Before starting the ICLV model estimation, we examine which independent variables are associated with each of the latent variables, represented by a factor score. Assume we have this result:

Code: Select all

r$> regression_results = factor_regression()
[1] "Regression of the factor 1 is performed. The significant coefficients are: age, income"
[1] "Regression of the factor 2 is performed. The significant coefficients are: income"
[1] "Regression of the factor 3 is performed. The significant coefficients are: income"
For instance, we use age and income for the first latent variable in the structural equation.

Also, we have the following association between the latent variables and indicators.

Code: Select all

factor_attitude_connection = get_factor_attitude_connection(
  n_factors,
  n_sample
)

Code: Select all

r$> factor_attitude_connection
# A tibble: 10 x 1
   string
   <chr> 
 1 01_3  
 2 02_1  
 3 03_2  
 4 04_1  
 5 05_2
 6 06_1
 7 07_1
 8 08_3
 9 09_3
10 10_1
For instance, the first, eighth, and nineth indicators are associated with the third latent variable. Using the pre-supplied information, I tried this code:

Code: Select all

# test_forum.R
source(file.path("iclv", "create_variable_name_vector.R"))
source(file.path("iclv", "get_factor_attitude_connection.R"))
source(file.path("iclv", "get_factor_variable_connection.R"))
source(file.path("iclv", "find_fixed_beta.R"))

model_name <- "test_forum"

database <- readRDS(
  file.path(
    "data",
    "database_forum.rds"
  )
)

apollo_initialise()

apollo_control <- list(
  modelName = model_name,
  modelDescr = "",
  indivID = "row_index",
  outputDirectory = file.path("dist", "iclv"),
  nCores = 8
)

n_clusters = 5
n_factors = 3
n_sample = 1166

iv_list = list(
  "age_18_34" = 1,
  "age_55plus" = 3,
  "gender_women" = 1,
  "income_50minus" = 1,
  "income_100_149" = 3,
  "income_150plus" = 4
)

factor_attitude_connection = get_factor_attitude_connection(
  n_factors,
  n_sample
)

variables = c(
  paste(
    "asc",
    2:n_clusters,
    sep = "_"
  ),
  expand_grid(
    "b",
    2:n_clusters,
    iv_list %>%
      names()
  ) %>%
    create_variable_name_vector(),
  expand_grid(
    "l",
    2:n_clusters,
    1:n_factors
  ) %>%
    create_variable_name_vector(),
  expand_grid(
    "d",
    factor_attitude_connection
  ) %>%
    create_variable_name_vector(),
  expand_grid(
    "s",
    factor_attitude_connection
  ) %>%
    create_variable_name_vector(),
  get_factor_variable_connection(
    regression_results,
    iv_list
  )
)

apollo_beta <- setNames(
  rep(0, length(variables)), variables
)

apollo_beta[
  apollo_beta %>%
    names() %>%
    str_detect(
      .,
      "^[d|s]_"
    )
] = 1

apollo_fixed <- find_fixed_beta(variables)

apollo_draws <- list(
  interDrawsType = "mlhs",
  interNDraws = 200,
  interUnifDraws = c(),
  interNormDraws = paste(
    "n",
    1:n_factors,
    sep = "_"
  ),
  intraDrawsType = "",
  intraNDraws = 0,
  intraUnifDraws = c(),
  intraNormDraws = c()
)

Code: Select all

# create_variable_name_vector.R
create_variable_name_vector = function(table) {
  vector = table %>%
    unite(
      "name",
      everything(),
      sep = "_"
    ) %>%
    pull()

  return(vector)
}

Code: Select all

# get_factor_attitude_connection.R
get_factor_attitude_connection = function(n_factors, n) {
  read_csv(
    file.path(
      "dist",
      "factor",
      paste0(
        "loadings_a_",
        n_factors,
        "_n_",
        n,
        ".csv"
      )
    ),
  ) %>%
    select(
      -statement
    ) %>%
    pivot_longer(
      cols = -index,
      names_to = "factor",
      values_to = "value"
    ) %>%
    mutate(
      index = sprintf("%02d", index),
      factor = str_replace(factor, "PA", ""),
      value = abs(value),
    ) %>%
    group_by(index) %>%
    slice_max(
      order_by = value,
      n = 1,
      with_ties = FALSE
    ) %>%
    select(
      index,
      factor
    ) %>%
    unite(
      "string",
      everything(),
      sep = "_"
    ) %>%
    return()
}

Code: Select all

# get_factor_variable_connection.R
get_factor_variable_connection = function(
    regression_results,
    iv_list) {
  connections = c()
  for (i in 1:length(regression_results)) {
    for (j in 1:length(regression_results[[i]])) {
      var_index = iv_list %>%
        names() %>%
        str_detect(
          .,
          paste0("^", regression_results[[i]][j], "_")
        ) %>%
        which()

      connections = c(
        connections,
        paste(
          "a",
          i,
          iv_list[var_index] %>% names(),
          sep = "_"
        )
      )
    }
  }

  return(connections)
}

Code: Select all

# find_fixed_beta.R
find_fixed_beta = function(variables) {
  fixed_beta = variables[
    variables %>%
      str_detect("^d")
  ] %>%
    tibble() %>%
    mutate(
      factor =
        str_extract(
          .,
          "\\d{1,2}$"
        )
    ) %>%
    group_by(factor) %>%
    slice(1) %>%
    ungroup() %>%
    select(-factor) %>%
    pull()

  return(fixed_beta)
}

It generates the apollo_beta variable correctly:

Code: Select all

r$> apollo_beta
             asc_2              asc_3              asc_4              asc_5      b_2_age_18_34     b_2_age_55plus   b_2_gender_women b_2_income_50minus b_2_income_100_149 
                 0                  0                  0                  0                  0                  0                  0                  0                  0 
b_2_income_150plus      b_3_age_18_34     b_3_age_55plus   b_3_gender_women b_3_income_50minus b_3_income_100_149 b_3_income_150plus      b_4_age_18_34     b_4_age_55plus 
                 0                  0                  0                  0                  0                  0                  0                  0                  0 
  b_4_gender_women b_4_income_50minus b_4_income_100_149 b_4_income_150plus      b_5_age_18_34     b_5_age_55plus   b_5_gender_women b_5_income_50minus b_5_income_100_149 
                 0                  0                  0                  0                  0                  0                  0                  0                  0 
b_5_income_150plus              l_2_1              l_2_2              l_2_3              l_3_1              l_3_2              l_3_3              l_4_1              l_4_2 
                 0                  0                  0                  0                  0                  0                  0                  0                  0        
             l_4_3              l_5_1              l_5_2              l_5_3             d_01_3             d_02_1             d_03_2             d_04_1             d_05_2        
                 0                  0                  0                  0                  1                  1                  1                  1                  1        
            d_06_1             d_07_1             d_08_3             d_09_3             d_10_1             s_01_3             s_02_1             s_03_2             s_04_1        
                 1                  1                  1                  1                  1                  1                  1                  1                  1        
            s_05_2             s_06_1             s_07_1             s_08_3             s_09_3             s_10_1      a_1_age_18_34     a_1_age_55plus a_1_income_50minus        
                 1                  1                  1                  1                  1                  1                  0                  0                  0        
a_1_income_100_149 a_1_income_150plus a_2_income_50minus a_2_income_100_149 a_2_income_150plus a_3_income_50minus a_3_income_100_149 a_3_income_150plus
                 0                  0                  0                  0                  0                  0                  0                  0
However, when I try generating the randcoeffs with the following code

Code: Select all

# test_forum.R
apollo_randCoeff <- function(apollo_beta, apollo_inputs) {
  randcoeff <- list()

  for (i in 1:n_factors) {
    term_list <- list()

    term_list[[1]] <- as.name(paste0("n_", i))

    for (j in regression_results[[i]]) {
      keys <- names(iv_list)[
        str_detect(
          names(iv_list),
          paste0("^", j, "_")
        )
      ]

      for (key in keys) {
        condition <- call("==", as.name(j), iv_list[[key]])

        coeff_name <- as.name(paste0("a_", i, "_", key))

        term_mult <- call("*", coeff_name, condition)

        term_list[[length(term_list) + 1]] <- term_mult
      }
    }

    expr_lv <- Reduce(function(x, y) call("+", x, y), term_list)

    randcoeff[[paste0("lv_", i)]] <- eval(expr_lv,
      envir = apollo_inputs
    )
  }

  return(randcoeff)
}
instead of

Code: Select all

# test_forum.R
apollo_randCoeff <- function(apollo_beta, apollo_inputs) {
  randcoeff <- list()

  randcoeff[["lv_1"]] <- n_1 +
    a_1_age_18_34 * (age == 1) +
    a_1_age_55plus * (age == 3) +
    a_1_income_50minus * (income == 1) +
    a_1_income_100_149 * (income == 3) +
    a_1_income_150plus * (income == 4)

  randcoeff[["lv_2"]] <- n_2 +
    a_2_income_50minus * (income == 1) +
    a_2_income_100_149 * (income == 3) +
    a_2_income_150plus * (income == 4)

  randcoeff[["lv_3"]] <- n_3 +
    a_3_income_50minus * (income == 1) +
    a_3_income_100_149 * (income == 3) +
    a_3_income_150plus * (income == 4)

  return(randcoeff)
}

It shows an error:

Code: Select all

Preparing user-defined functions.
Error in value[[3L]](cond) : unused argument (cond)
and I'm stuck. Is it not possible to go this way?

Re: Dynamic ICLV Code Generation

Posted: 18 Apr 2025, 21:42
by dpalma
Hi,

I am afraid using the function "eval" inside apollo_probabilities, apollo_randCoeff, or apollo_lcPars will not work. Before running the model, Apollo does a series of modifications to these functions, to both optimise them and allow for the use of analytical gradients. That is why most formulas must be written explicitly, so their derivatives can be calculated. The use of "eval" will brake this.

The only supported way to iteratively code utilities, latente variables, or classes inside apollo_probabilities, apollo_randCoeff, or apollo_lcPars is by using a mix of "get" and "paste0", as shown in example "MNL_iterative_coding"

Best wishes,
David