SENDAs Agreement 1 Update 2010-2022 (Prediction, step 2.2, shap-informed models)

This module develops and validates optimized Cox Proportional Hazards models for post-discharge outcomes (readmission and all-cause mortality) in SUD patients (2010–2020). The selection of variables is grounded in SHAP importance tiers from XGBoost models, bridging machine learning discovery with survival analysis. Key refinements include the management of Non-Proportional Hazards via stratification, the identification of highly discriminative interaction terms through an Effect-Size & Stability Rule, and the modeling of non-linear effects using Restricted Cubic Splines.

Author

Andrés González Santa Cruz

Published

March 28, 2026

SENDAs Agreement 1 Update 2010-2022 (Prediction, step 2.2, shap-informed models)

This notebook was converted automatically from prediction22.qmd.

Quarto-specific YAML/options were preserved in the first raw cell. I also inserted setup cells at the beginning for project-root detection, loading pred21_ndp_2026_03_26.Rdata, plot sizing, and optional renv repair.

Code

# --- Setup: project root, data load, plot size/resolution, and renv activation ---

# adjust plot size and resolution for high-quality output

# --- Load pred21_ndp_2026_03_26.Rdata (simple, project-root aware, no functions) --- 
# 1) Locate the file anywhere under the project root (current WD with a trailing "/cons" trimmed)
project_root <- sub("(/)?cons/?$", "", normalizePath(getwd(), winslash = "/", mustWork = FALSE))
fname        <- "pred21_ndp_2026_03_26.Rdata"
hits <- list.files(project_root, pattern = paste0("^", fname, "$"),
                   recursive = TRUE, full.names = TRUE, ignore.case = TRUE)
if (!length(hits)) stop("File not found: ", fname, "\nSearched under: ", project_root)
path <- hits[1]  # take first match
message("Loading: ", path)

# 2) Load into a dedicated environment (keeps .GlobalEnv clean)
env_pred <- new.env(parent = emptyenv())
loaded_objects <- load(path, envir = env_pred)
message("Loaded ", length(loaded_objects), " object(s): ", paste(loaded_objects, collapse = ", "))

# 3) Quick sanity check for each loaded object
for (obj_name in loaded_objects) {
  obj <- get(obj_name, envir = env_pred)
  message("\n--- ", obj_name, " ---")
  if (is.data.frame(obj)) {
    message("Rows: ", nrow(obj), " | Cols: ", ncol(obj))
  } else {
    message("Class: ", paste(class(obj), collapse = ", "), " | Length/Size: ", length(obj))
  }
  # print(str(obj))
}

# 4) Extract all loaded objects to global environment
for (obj_name in loaded_objects) {
  assign(obj_name, get(obj_name, envir = env_pred), envir = .GlobalEnv)
}

# Clean up temporary environment
rm(env_pred)

options(jupyter.max_output_size = Inf)
options(repr.plot.width = 16 * 1.5, repr.plot.height = 12 * 1.5)
options(repr.plot.res = 300)

Sys.setenv(RENV_PROJECT = project_root)
source(paste0(project_root, "/renv/activate.R"))

getwd()
Sys.getenv("RENV_PROJECT")
.libPaths()

Loading: G:/My Drive/Alvacast/SISTRAT 2023/data/20241015_out/pred21_ndp_2026_03_26.Rdata

Warning: namespace ‘ggplot2’ is not available and has been replaced
by .GlobalEnv when processing object ‘cal_readmit2’
Warning: namespace ‘ggpubr’ is not available and has been replaced
by .GlobalEnv when processing object ‘plot_cloglog_comb’
Warning: namespace ‘survminer’ is not available and has been replaced
by .GlobalEnv when processing object ‘plot_cloglog_comb’
Warning: namespace ‘scales’ is not available and has been replaced
by .GlobalEnv when processing object ‘plot_cloglog_comb’
Warning: namespace ‘patchwork’ is not available and has been replaced
by .GlobalEnv when processing object ‘plot_cloglog_comb’

Loaded 281 object(s): infectious_set, find_latest_file, cal_readmit2, mode_pick_int, cal_readmit3, python_columns, nm, cal_death_full_ici, collapse_first_sub_used_other, .cph_boot_extract_ibs, char_variables, secret_file_path, cols_to_exclude, total_cpus, decrypt_column, n1b_cases, api_key, plot_cloglog_comb, encrypt_column, renviron_path, .mi_cv_build_level_mapping, subkey_to_label, hits, missing_pct, target_cols, py_split, aicc_death_upd2, dummy_formula, apply_ordered_mappings, t_grid, cif_d_before, m4_cases, calibrate_cph_grouped, m3_runs, violations, ph_test_death, root, ci_first, cal_readmit, existing_lines, obj, .Random.seed, n_before, results_readmit2_ibs, formula_readmit_updated2, decrypt_chr, DUMMY_REFERENCE, .mi_cv_normalize_call_name, pairs, key_exists, encrypt_chr, sum_dates_polars, normalize_txt, find_python_rel, .mi_cv_collect_strata_term_labels, time_before_dedup2, numerical_vars2, lab_first_index, print.cph_grouped_calibration, evaluate_mi_cv_safe_strata, var2_table, ci2, install_docker, m4_runs_red, py, heavy_to_remove, results_readmit3_ibs, pat, char_variables_present, ph_test_readmit, rdata_path, safe_min_where_date, .mi_cv_extract_strata_args, plot_schoenfeld_residuals, predictors, dt_numeric, formula_death_updated, meta_cols, split2125, loaded_objects, mis_colores, vars, time_labels, cal_death2, observed_foreign_n, cal_death3, expected_n_train, .cph_boot_normalize_time, results_death_ibs, get_mode_numeric, rn, n0_cases, .QuartoInlineRender, tot_variables_present, format_cells, aicc_death_upd, paks, taus, .mi_cv_make_strata_signature_safe, is_cat, .end_date, result_df, cif_d_after, fit_stratified_readmit, cox_readmit2, n0_runs, t_end, compare_mi_models_d1, dt, formula_readmit, results_readmit_ibs, sf_km, get_all_oobs, factor_vars, categorical_vars, tot_variables, print.cscox_aj_grouped_calibration, in_r_not_py, horiz, n_patients_meet, high_uncertainty_imps, .censor_date, in_py_not_r, cox_columns, m4_runs, obj_name, check_docker_running, dit90, extra_num, i, set_reference_levels, preprocess_features_robust, results_death2_ibs, df_pred, p, lab_final, .mi_cv_sanitize_level_name, out, plot.cscox_aj_grouped_calibration, p_death_eng, icd10_broad, formula_death, num_vars, .cph_boot_prepare_model_split, supergroups_levels, results_death2, results_death3, is_stata_ok, .mi_cv_prepare_model_split, other_spec_set, most_frequent, safe_min_date, n1b_patients, .cph_boot_event01, labels_vec, safe_min_where_int, gr, cox_death2, pair_str, cal_death, m3_cases_red, sf_c1, corrected_datasets_nondum_filt, sf_c2, .mi_cv_collect_explicit_factor_vars, all_required_cols, toString2, lab_excl_final, total_table, lab_after, time_cut, extra_cat, .mi_cv_mean_or_na, mixed_assoc, lab_orig, id_centro_vec, lab_orig_clean, dummified_lvls_vars, .use_discharge_origin, .mi_cv_rewrite_strata_calls, to01, py_corrected_datasets, p_esp_readm, cal_readmit_full_aj_rcv, .mi_cv_quantile_or_na, output_dir, sum_dates, m3, fit_stratified_death, categorical_vars2, m4, .t0, coress, combined_plots, classify_dx_vec, plot.cph_grouped_calibration, aicc_death, cph_evaluate_boot_oob_mi_corrected, formula_death_updated2, violations_readm, wdpath, predictor_formula, col_name, nzv, results_death, idx, formula_readmit_updated, check_quarto_version, is_num, p_esp_death, lab_post_orig_clean, escape_regex, df_surv, .mi_cv_coerce_rowwise_value, n_drop_predeath, threshold_mb, keys, km_df, eta_num_cat, r_data, sesion_info, figs_out, .cph_boot_worker, table_one_clean_print, risk_at, ibs_ipcw_train, calibrate_cscox_aj_grouped, results_readmit2, fname, results_readmit3, data_out, km, get_cif_by_code, ftime_first, injury_set, pairs_gt_02, m4_cases_red, cox_readmit, .main, .mi_cv_format_level_sets, results_readmit, table_one_clean, aicc_readmit_upd, leak_time_cols, path, compute_mi_aicc, m3_cases, vars_check, organ_set, replicates_n, lab_flag, lab_proc, envpath, lab_post_orig, .start_date, sample_n_with_seed, lab_discard_single, tp_first, m3_runs_red, .mi_cv_expr_label, cpus, fstatus_first, cramers_v, results_death3_ibs, labels_map, tidy_cph, time_breaks, lab_discard_first, aicc_readmit_upd2, aicc_readmit, var1_table, var1, .mi_cv_collect_observed_levels, numerical_vars, cat_vars, var2, project_root, cox_death, AJ_at, make_dummies, time_event_vars

--- infectious_set ---
Class: character | Length/Size: 3

--- find_latest_file ---
Class: function | Length/Size: 1

--- cal_readmit2 ---
Class: cph_grouped_calibration | Length/Size: 12

--- mode_pick_int ---
Class: function | Length/Size: 1

--- cal_readmit3 ---
Class: cph_grouped_calibration | Length/Size: 12

--- python_columns ---
Class: character | Length/Size: 69

--- nm ---
Class: character | Length/Size: 1

--- cal_death_full_ici ---
Class: cph_grouped_calibration | Length/Size: 12

--- collapse_first_sub_used_other ---
Class: function | Length/Size: 1

--- .cph_boot_extract_ibs ---
Class: function | Length/Size: 1

--- char_variables ---
Class: character | Length/Size: 54

--- secret_file_path ---
Class: character | Length/Size: 1

--- cols_to_exclude ---
Class: character | Length/Size: 6

--- total_cpus ---
Class: integer | Length/Size: 1

--- decrypt_column ---
Class: function | Length/Size: 1

--- n1b_cases ---
Class: integer | Length/Size: 1

--- api_key ---
Class: character | Length/Size: 1

--- plot_cloglog_comb ---
Class: patchwork, gg, ggplot | Length/Size: 12

--- encrypt_column ---
Class: function | Length/Size: 1

--- renviron_path ---
Class: character | Length/Size: 1

--- .mi_cv_build_level_mapping ---
Class: function | Length/Size: 1

--- subkey_to_label ---
Class: function | Length/Size: 1

--- hits ---
Class: character | Length/Size: 1

--- missing_pct ---
Class: numeric | Length/Size: 73

--- target_cols ---
Class: character | Length/Size: 4

--- py_split ---
Rows: 88152 | Cols: 5

--- aicc_death_upd2 ---
Class: list | Length/Size: 2

--- dummy_formula ---
Class: formula | Length/Size: 2

--- apply_ordered_mappings ---
Class: function | Length/Size: 1

--- t_grid ---
Class: numeric | Length/Size: 4966

--- cif_d_before ---
Rows: 4484 | Cols: 2

--- m4_cases ---
Class: integer | Length/Size: 1

--- calibrate_cph_grouped ---
Class: function | Length/Size: 1

--- m3_runs ---
Class: integer | Length/Size: 1

--- violations ---
Class: character | Length/Size: 12

--- ph_test_death ---
Class: cox.zph | Length/Size: 8

--- root ---
Class: character | Length/Size: 1

--- ci_first ---
Class: cuminc | Length/Size: 2

--- cal_readmit ---
Class: cph_grouped_calibration | Length/Size: 12

--- existing_lines ---
Class: character | Length/Size: 1

--- obj ---
Class: function | Length/Size: 1

--- .Random.seed ---
Class: integer | Length/Size: 626

--- n_before ---
Class: integer | Length/Size: 1

--- results_readmit2_ibs ---
Class: list | Length/Size: 9

--- formula_readmit_updated2 ---
Class: formula | Length/Size: 3

--- decrypt_chr ---
Class: function | Length/Size: 1

--- DUMMY_REFERENCE ---
Class: character | Length/Size: 24

--- .mi_cv_normalize_call_name ---
Class: function | Length/Size: 1

--- pairs ---
Class: list | Length/Size: 2701

--- key_exists ---
Class: logical | Length/Size: 1

--- encrypt_chr ---
Class: function | Length/Size: 1

--- sum_dates_polars ---
Class: function | Length/Size: 1

--- normalize_txt ---
Class: function | Length/Size: 1

--- find_python_rel ---
Class: function | Length/Size: 1

--- .mi_cv_collect_strata_term_labels ---
Class: function | Length/Size: 1

--- time_before_dedup2 ---
Class: POSIXct, POSIXt | Length/Size: 1

--- numerical_vars2 ---
Class: character | Length/Size: 17

--- lab_first_index ---
Class: character | Length/Size: 1

--- print.cph_grouped_calibration ---
Class: function | Length/Size: 1

--- evaluate_mi_cv_safe_strata ---
Class: function | Length/Size: 1

--- var2_table ---
Class: table | Length/Size: 10

--- ci2 ---
Class: cuminc | Length/Size: 2

--- install_docker ---
Class: function | Length/Size: 1

--- m4_runs_red ---
Class: integer | Length/Size: 1

--- py ---
Class: character | Length/Size: 1

--- heavy_to_remove ---
Class: character | Length/Size: 13

--- results_readmit3_ibs ---
Class: list | Length/Size: 9

--- pat ---
Class: character | Length/Size: 1

--- char_variables_present ---
Class: character | Length/Size: 56

--- ph_test_readmit ---
Class: cox.zph | Length/Size: 8

--- rdata_path ---
Class: character | Length/Size: 1

--- safe_min_where_date ---
Class: function | Length/Size: 1

--- .mi_cv_extract_strata_args ---
Class: function | Length/Size: 1

--- plot_schoenfeld_residuals ---
Class: function | Length/Size: 1

--- predictors ---
Class: character | Length/Size: 65

--- dt_numeric ---
Class: numeric | Length/Size: 1

--- formula_death_updated ---
Class: formula | Length/Size: 3

--- meta_cols ---
Class: character | Length/Size: 6

--- split2125 ---
Rows: 88152 | Cols: 4

--- loaded_objects ---
Class: character | Length/Size: 154

--- mis_colores ---
Class: character | Length/Size: 5

--- vars ---
Class: character | Length/Size: 74

--- time_labels ---
Class: character | Length/Size: 5

--- cal_death2 ---
Class: cph_grouped_calibration | Length/Size: 12

--- observed_foreign_n ---
Class: integer | Length/Size: 1

--- cal_death3 ---
Class: cph_grouped_calibration | Length/Size: 12

--- expected_n_train ---
Class: numeric | Length/Size: 1

--- .cph_boot_normalize_time ---
Class: function | Length/Size: 1

--- results_death_ibs ---
Class: list | Length/Size: 9

--- get_mode_numeric ---
Class: function | Length/Size: 1

--- rn ---
Class: character | Length/Size: 176

--- n0_cases ---
Class: integer | Length/Size: 1

--- .QuartoInlineRender ---
Class: function | Length/Size: 1

--- tot_variables_present ---
Class: character | Length/Size: 73

--- format_cells ---
Class: function | Length/Size: 1

--- aicc_death_upd ---
Class: list | Length/Size: 2

--- paks ---
Class: character | Length/Size: 30

--- taus ---
Class: numeric | Length/Size: 4

--- .mi_cv_make_strata_signature_safe ---
Class: function | Length/Size: 1

--- is_cat ---
Class: function | Length/Size: 1

--- .end_date ---
Class: Date | Length/Size: 1

--- result_df ---
Rows: 5 | Cols: 4

--- cif_d_after ---
Rows: 1416 | Cols: 2

--- fit_stratified_readmit ---
Class: survfit | Length/Size: 17

--- cox_readmit2 ---
Class: coxph | Length/Size: 21

--- n0_runs ---
Class: integer | Length/Size: 1

--- t_end ---
Class: numeric | Length/Size: 1

--- compare_mi_models_d1 ---
Class: function | Length/Size: 1

--- dt ---
Class: difftime | Length/Size: 1

--- formula_readmit ---
Class: formula | Length/Size: 3

--- results_readmit_ibs ---
Class: list | Length/Size: 9

--- sf_km ---
Class: stepfun, function | Length/Size: 1

--- get_all_oobs ---
Class: function | Length/Size: 1

--- factor_vars ---
Class: character | Length/Size: 24

--- categorical_vars ---
Class: character | Length/Size: 56

--- tot_variables ---
Class: character | Length/Size: 73

--- print.cscox_aj_grouped_calibration ---
Class: function | Length/Size: 1

--- in_r_not_py ---
Rows: 0 | Cols: 47

--- horiz ---
Rows: 4 | Cols: 5

--- n_patients_meet ---
Class: integer | Length/Size: 1

--- high_uncertainty_imps ---
Class: character | Length/Size: 3

--- .censor_date ---
Class: Date | Length/Size: 1

--- in_py_not_r ---
Rows: 0 | Cols: 5

--- cox_columns ---
Class: character | Length/Size: 69

--- m4_runs ---
Class: integer | Length/Size: 1

--- obj_name ---
Class: character | Length/Size: 1

--- check_docker_running ---
Class: function | Length/Size: 1

--- dit90 ---
Class: numeric | Length/Size: 1

--- extra_num ---
Class: character | Length/Size: 0

--- i ---
Class: integer | Length/Size: 1

--- set_reference_levels ---
Class: function | Length/Size: 1

--- preprocess_features_robust ---
Class: function | Length/Size: 1

--- results_death2_ibs ---
Class: list | Length/Size: 9

--- df_pred ---
Rows: 88504 | Cols: 45

--- p ---
Class: ggsurvplot, ggsurv, list | Length/Size: 3

--- lab_final ---
Class: character | Length/Size: 1

--- .mi_cv_sanitize_level_name ---
Class: function | Length/Size: 1

--- out ---
Rows: 4966 | Cols: 5

--- plot.cscox_aj_grouped_calibration ---
Class: function | Length/Size: 1

--- p_death_eng ---
Class: ggsurvplot, ggsurv, list | Length/Size: 3

--- icd10_broad ---
Class: function | Length/Size: 1

--- formula_death ---
Class: formula | Length/Size: 3

--- num_vars ---
Class: character | Length/Size: 7

--- .cph_boot_prepare_model_split ---
Class: function | Length/Size: 1

--- supergroups_levels ---
Class: character | Length/Size: 6

--- results_death2 ---
Class: list | Length/Size: 11

--- results_death3 ---
Class: list | Length/Size: 11

--- is_stata_ok ---
Class: function | Length/Size: 1

--- .mi_cv_prepare_model_split ---
Class: function | Length/Size: 1

--- other_spec_set ---
Class: character | Length/Size: 4

--- most_frequent ---
Class: function | Length/Size: 1

--- safe_min_date ---
Class: function | Length/Size: 1

--- n1b_patients ---
Class: integer | Length/Size: 1

--- .cph_boot_event01 ---
Class: function | Length/Size: 1

--- labels_vec ---
Class: character | Length/Size: 73

--- safe_min_where_int ---
Class: function | Length/Size: 1

--- gr ---
Class: grViz, htmlwidget | Length/Size: 8

--- cox_death2 ---
Class: coxph | Length/Size: 21

--- pair_str ---
Class: function | Length/Size: 1

--- cal_death ---
Class: cph_grouped_calibration | Length/Size: 12

--- m3_cases_red ---
Class: integer | Length/Size: 1

--- sf_c1 ---
Class: stepfun, function | Length/Size: 1

--- corrected_datasets_nondum_filt ---
Class: list | Length/Size: 5

--- sf_c2 ---
Class: stepfun, function | Length/Size: 1

--- .mi_cv_collect_explicit_factor_vars ---
Class: function | Length/Size: 1

--- all_required_cols ---
Class: character | Length/Size: 75

--- toString2 ---
Class: function | Length/Size: 1

--- lab_excl_final ---
Class: character | Length/Size: 1

--- total_table ---
Class: table | Length/Size: 5

--- lab_after ---
Class: character | Length/Size: 1

--- time_cut ---
Class: factor | Length/Size: 70521

--- extra_cat ---
Class: character | Length/Size: 0

--- .mi_cv_mean_or_na ---
Class: function | Length/Size: 1

--- mixed_assoc ---
Class: function | Length/Size: 1

--- lab_orig ---
Class: character | Length/Size: 1

--- id_centro_vec ---
Class: numeric | Length/Size: 88504

--- lab_orig_clean ---
Class: character | Length/Size: 1

--- dummified_lvls_vars ---
Class: character | Length/Size: 27

--- .use_discharge_origin ---
Class: logical | Length/Size: 1

--- .mi_cv_rewrite_strata_calls ---
Class: function | Length/Size: 1

--- to01 ---
Class: function | Length/Size: 1

--- py_corrected_datasets ---
Class: list | Length/Size: 5

--- p_esp_readm ---
Class: ggsurvplot, ggsurv, list | Length/Size: 3

--- cal_readmit_full_aj_rcv ---
Class: cscox_aj_grouped_calibration | Length/Size: 11

--- .mi_cv_quantile_or_na ---
Class: function | Length/Size: 1

--- output_dir ---
Class: character | Length/Size: 1

--- sum_dates ---
Class: function | Length/Size: 1

--- m3 ---
Class: list | Length/Size: 5

--- fit_stratified_death ---
Class: survfit | Length/Size: 17

--- categorical_vars2 ---
Class: character | Length/Size: 54

--- m4 ---
Class: list | Length/Size: 4

--- .t0 ---
Class: POSIXct, POSIXt | Length/Size: 1

--- coress ---
Class: numeric | Length/Size: 1

--- combined_plots ---
Class: ggcoxzph, ggsurv, list | Length/Size: 12

--- classify_dx_vec ---
Class: function | Length/Size: 1

--- plot.cph_grouped_calibration ---
Class: function | Length/Size: 1

--- aicc_death ---
Class: list | Length/Size: 2

--- cph_evaluate_boot_oob_mi_corrected ---
Class: function | Length/Size: 1

--- formula_death_updated2 ---
Class: formula | Length/Size: 3

--- violations_readm ---
Class: character | Length/Size: 21

--- wdpath ---
Class: character | Length/Size: 1

--- predictor_formula ---
Class: formula | Length/Size: 3

--- col_name ---
Class: character | Length/Size: 1

--- nzv ---
Rows: 72 | Cols: 4

--- results_death ---
Class: list | Length/Size: 11

--- idx ---
Class: integer | Length/Size: 5

--- formula_readmit_updated ---
Class: formula | Length/Size: 3

--- check_quarto_version ---
Class: function | Length/Size: 1

--- is_num ---
Class: function | Length/Size: 1

--- p_esp_death ---
Class: ggsurvplot, ggsurv, list | Length/Size: 3

--- lab_post_orig_clean ---
Class: character | Length/Size: 1

--- escape_regex ---
Class: function | Length/Size: 1

--- df_surv ---
Rows: 70521 | Cols: 76

--- .mi_cv_coerce_rowwise_value ---
Class: function | Length/Size: 1

--- n_drop_predeath ---
Class: integer | Length/Size: 1

--- threshold_mb ---
Class: numeric | Length/Size: 1

--- keys ---
Class: character | Length/Size: 76

--- km_df ---
Rows: 4966 | Cols: 2

--- eta_num_cat ---
Class: function | Length/Size: 1

--- r_data ---
Rows: 88152 | Cols: 47

--- sesion_info ---
Class: session_info, list | Length/Size: 2

--- figs_out ---
Class: character | Length/Size: 1

--- .cph_boot_worker ---
Class: function | Length/Size: 1

--- table_one_clean_print ---
Class: matrix, array | Length/Size: 1936

--- risk_at ---
Class: function | Length/Size: 1

--- ibs_ipcw_train ---
Class: function | Length/Size: 1

--- calibrate_cscox_aj_grouped ---
Class: function | Length/Size: 1

--- results_readmit2 ---
Class: list | Length/Size: 11

--- fname ---
Class: character | Length/Size: 1

--- results_readmit3 ---
Class: list | Length/Size: 11

--- data_out ---
Class: character | Length/Size: 1

--- km ---
Class: survfit | Length/Size: 16

--- get_cif_by_code ---
Class: function | Length/Size: 1

--- ftime_first ---
Class: numeric | Length/Size: 88504

--- injury_set ---
Class: character | Length/Size: 1

--- pairs_gt_02 ---
Rows: 197 | Cols: 3

--- m4_cases_red ---
Class: integer | Length/Size: 1

--- cox_readmit ---
Class: coxph | Length/Size: 21

--- .main ---
Class: function | Length/Size: 1

--- .mi_cv_format_level_sets ---
Class: function | Length/Size: 1

--- results_readmit ---
Class: list | Length/Size: 11

--- table_one_clean ---
Class: TableOne | Length/Size: 3

--- aicc_readmit_upd ---
Class: list | Length/Size: 2

--- leak_time_cols ---
Class: character | Length/Size: 2

--- path ---
Class: character | Length/Size: 1

--- compute_mi_aicc ---
Class: function | Length/Size: 1

--- m3_cases ---
Class: integer | Length/Size: 1

--- vars_check ---
Class: character | Length/Size: 73

--- organ_set ---
Class: character | Length/Size: 5

--- replicates_n ---
Class: numeric | Length/Size: 1

--- lab_flag ---
Class: character | Length/Size: 1

--- lab_proc ---
Class: character | Length/Size: 1

--- envpath ---
Class: character | Length/Size: 1

--- lab_post_orig ---
Class: character | Length/Size: 1

--- .start_date ---
Class: Date | Length/Size: 1

--- sample_n_with_seed ---
Class: function | Length/Size: 1

--- lab_discard_single ---
Class: character | Length/Size: 1

--- tp_first ---
Class: list | Length/Size: 2

--- m3_runs_red ---
Class: integer | Length/Size: 1

--- .mi_cv_expr_label ---
Class: function | Length/Size: 1

--- cpus ---
Class: numeric | Length/Size: 1

--- fstatus_first ---
Class: integer | Length/Size: 88504

--- cramers_v ---
Class: function | Length/Size: 1

--- results_death3_ibs ---
Class: list | Length/Size: 9

--- labels_map ---
Class: character | Length/Size: 76

--- tidy_cph ---
Class: function | Length/Size: 1

--- time_breaks ---
Class: numeric | Length/Size: 6

--- lab_discard_first ---
Class: character | Length/Size: 1

--- aicc_readmit_upd2 ---
Class: list | Length/Size: 2

--- aicc_readmit ---
Class: list | Length/Size: 2

--- var1_table ---
Class: table | Length/Size: 10

--- var1 ---
Class: character | Length/Size: 1

--- .mi_cv_collect_observed_levels ---
Class: function | Length/Size: 1

--- numerical_vars ---
Class: character | Length/Size: 17

--- cat_vars ---
Class: character | Length/Size: 35

--- var2 ---
Class: character | Length/Size: 1

--- project_root ---
Class: character | Length/Size: 1

--- cox_death ---
Class: coxph | Length/Size: 21

--- AJ_at ---
Class: function | Length/Size: 1

--- make_dummies ---
Class: function | Length/Size: 1

--- time_event_vars ---
Class: character | Length/Size: 4
- Project 'G:/My Drive/Alvacast/SISTRAT 2023' loaded. [renv 1.1.5]

[1] "G:/My Drive/Alvacast/SISTRAT 2023/renv/library/windows/R-4.4/x86_64-w64-mingw32"              
[2] "C:/Users/andre/AppData/Local/R/cache/R/renv/sandbox/windows/R-4.4/x86_64-w64-mingw32/e0da0d43"

Code

# --- Optional renv cleanup / repair ---
# Run this only if you actually need to repair the project library.

unlink("renv/staging", recursive = TRUE, force = TRUE)
unlink("renv/library/windows/R-4.4/x86_64-w64-mingw32/geepack", recursive = TRUE, force = TRUE)
unlink("renv/library/windows/R-4.4/x86_64-w64-mingw32/V8", recursive = TRUE, force = TRUE)

renv::status()
renv::restore(exclude = c("polars"))

No issues found -- the project is in a consistent state.
- The library is already synchronized with the lockfile.

Data Loading and Exploration

Loading Packages and uniting databases

Code

#renv falls back to copying rather than symlinking, which is evidently very slow in this configuration.
renv::settings$use.cache(FALSE)
#only use explicit dependencies (in DESCRIPTION)
renv::settings$snapshot.type("implicit")
#check if rstools is installed
if(Sys.info()["sysname"]=="Windows"){
try(installr::install.Rtools(check_r_update=F))
}
check_quarto_version <- function(required = "1.7.29", comparator = c("ge","gt","le","lt","eq")) {
  comparator <- match.arg(comparator)
  current <- package_version(paste(unlist(quarto::quarto_version()), collapse = "."))
  req     <- package_version(required)
  ok <- switch(comparator,
               ge = current >= req,
               gt = current >  req,
               le = current <= req,
               lt = current <  req,
               eq = current == req)
  if (!ok) {
    stop(sprintf("Quarto version check failed: need %s %s (installed: %s).",
                 comparator, required, current), call. = FALSE)
  }
  invisible(TRUE)
}
check_quarto_version("1.7.29", "ge") 
#change repository to CL
local({
  r <- getOption("repos")
  r["CRAN"] <- "https://cran.dcc.uchile.cl/"
  options(repos=r)
})
if(!require(pacman)){install.packages("pacman");require(pacman)}
if(!require(pak)){install.packages("pak");require(pak)}
pacman::p_unlock(lib.loc = .libPaths()) #para no tener problemas reinstalando paquetes
if(Sys.info()["sysname"]=="Windows"){
if (getRversion() != "4.4.1") { stop("Requires R version 4.4.1; Actual: ", getRversion()) }
}
#check docker
check_docker_running <- function() {
  # Try running 'docker info' to check if Docker is running
  system("docker info", intern = TRUE, ignore.stderr = TRUE)
}
if(Sys.info()["sysname"]=="Windows"){
  install_docker <- function() {
    # Open the Docker Desktop download page in the browser for installation
    browseURL("https://www.docker.com/products/docker-desktop")
  }
  # Main logic
  if (inherits(try(check_docker_running(), silent = TRUE), "try-error")) {
    liftr::install_docker()
  } else {
    message("Docker is running.")
  }
}
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#PACKAGES#######################################################################
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
unlink("*_cache", recursive=T)
pak::pak_sitrep()
# pak::sysreqs_check_installed(unique(unlist(paks)))
#pak::lockfile_create(unique(unlist(paks)),  "dependencies_duplicates24.lock", dependencies=T)
#pak::lockfile_install("dependencies_duplicates24.lock")
#https://rdrr.io/cran/pak/man/faq.html
#pak::cache_delete()
library(tidytable)
library(ggplot2)
library(readr)
library(tableone)
library(survivalmodels)
#renv::install("patchwork@1.2.0")
library(rms)
library(survidm)
library(caret)
library(survival)
library(caret)
library(dplyr)
library(survex)
library(pec)
library(future)
library(future.apply)
library(parallel)
library(mice)
library(riskRegression)
library(tidyr)
library(doFuture)
library(vcd)
library(survAUC)

#flexsurv
#if(!require(compareCstat)){install.pacakges("compareCstat")}
# library(shapr)#https://norskregnesentral.github.io/shapr/
# library(SemiMarkov)#https://www.degruyterbrill.com/document/doi/10.1515/ijb-2020-0083/html?lang=en
# #https://www.jclinepi.com/article/S0895-4356(24)00142-2/fulltext
#_#_#_#_#_#_#_#_#_#_#_#_#_-----------------------------
# 3. Activate polars code completion (safe to try even if it fails)
#_#_#_#_#_#_#_#_#_#_#_#_#_-----------------------------
#try(polars_code_completion_activate())
#_#_#_#_#_#_#_#_#_#_#_#_#_-----------------------------
# 4. BPMN from GitHub (not on CRAN, so install via devtools if missing)
#_#_#_#_#_#_#_#_#_#_#_#_#_-----------------------------
if (!requireNamespace("bpmn", quietly = TRUE)) {
  devtools::install_github("bergant/bpmn")
}
#_#_#_#_#_#_#_#_#_#_#_#_#_-----------------------------
# 5. PhantomJS Check (use webshot if PhantomJS is missing)
#_#_#_#_#_#_#_#_#_#_#_#_#_-----------------------------
# if (!webshot::is_phantomjs_installed()) {
#   webshot::install_phantomjs()
# }
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#FUNCTIONS######################################################################
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#
# NO MORE DEBUGS
options(error = NULL)        # si antes tenías options(error = recover) o browser)
options(browserNLdisabled = FALSE)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#
#NAs are replaced with "" in knitr kable
options(knitr.kable.NA = '')
pander::panderOptions('big.mark', ',')
pander::panderOptions('decimal.mark', '.')
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#
#to format rows in bold
format_cells <- function(df, rows ,cols, value = c("italics", "bold", "strikethrough")){
  # select the correct markup
  # one * for italics, two ** for bold
  map <- setNames(c("*", "**", "~~"), c("italics", "bold", "strikethrough"))
  markup <- map[value]  
  for (r in rows){
    for(c in cols){
      # Make sure values are not factors
      df[[c]] <- as.character( df[[c]])
      # Update formatting
      df[r, c] <- ifelse(nchar(df[r, c])==0,"",paste0(markup, gsub(" ", "", df[r, c]), markup))
    }
  }
  return(df)
}
#To produce line breaks in messages and warnings
knitr::knit_hooks$set(
   error = function(x, options) {
     paste('\n\n<div class="alert alert-danger" style="font-size: small !important;">',
           gsub('##', '\n', gsub('^##\ Error', '**Error**', x)),
           '</div>', sep = '\n')
   },
   warning = function(x, options) {
     paste('\n\n<div class="alert alert-warning" style="font-size: small !important;">',
           gsub('##', '\n', gsub('^##\ Warning:', '**Warning**', x)),
           '</div>', sep = '\n')
   },
   message = function(x, options) {
     paste('<div class="message" style="font-size: small !important;">',
           gsub('##', '\n', x),
           '</div>', sep = '\n')
   }
)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#
sum_dates <- function(x){
  cbind.data.frame(
    min= as.Date(min(unclass(as.Date(x)), na.rm=T), origin = "1970-01-01"),
    p001= as.Date(quantile(unclass(as.Date(x)), .001, na.rm=T), origin = "1970-01-01"),
    p005= as.Date(quantile(unclass(as.Date(x)), .005, na.rm=T), origin = "1970-01-01"),
    p025= as.Date(quantile(unclass(as.Date(x)), .025, na.rm=T), origin = "1970-01-01"),
    p25= as.Date(quantile(unclass(as.Date(x)), .25, na.rm=T), origin = "1970-01-01"),
    p50= as.Date(quantile(unclass(as.Date(x)), .5, na.rm=T), origin = "1970-01-01"),
    p75= as.Date(quantile(unclass(as.Date(x)), .75, na.rm=T), origin = "1970-01-01"),
    p975= as.Date(quantile(unclass(as.Date(x)), .975, na.rm=T), origin = "1970-01-01"),
    p995= as.Date(quantile(unclass(as.Date(x)), .995, na.rm=T), origin = "1970-01-01"),
    p999= as.Date(quantile(unclass(as.Date(x)), .999, na.rm=T), origin = "1970-01-01"),
    max= as.Date(max(unclass(as.Date(x)), na.rm=T), origin = "1970-01-01")
  )
}
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
# Define the function adapted for Polars
sum_dates_polars <- function(df, date_col) {
  # Create the list of quantiles
  quantiles <- c(0.001, 0.005, 0.025, 0.25, 0.5, 0.75, 0.975, 0.995, 0.999)
  # Create expressions to calculate min and max
  expr_list <- list(
    pl$col(date_col)$min()$alias("min"),
    pl$col(date_col)$max()$alias("max")
  )
  # Add expressions for quantiles
  for (q in quantiles) {
    expr_list <- append(expr_list, pl$col(date_col)$quantile(q)$alias(paste0("p", sub("\\.", "", as.character(q)))))
  }
  # Apply the expressions and return a DataFrame with the results
  df$select(expr_list)
}
# Custom function for sampling with a seed
sample_n_with_seed <- function(data, size, seed) {
  set.seed(seed)
  dplyr::sample_n(data, size)
}
# Function to get the most frequent value 
most_frequent <- function(x) { 
  uniq_vals <- unique(x)
  freq_vals <- sapply(uniq_vals, function(val) sum(x == val))
  most_freq <- uniq_vals[which(freq_vals == max(freq_vals))]
  
  if (length(most_freq) == 1) {
    return(most_freq)
  } else {
    return(NA)
  }
}
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#CONFIG #######################################################################
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
options(scipen=2) #display numbers rather scientific number
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
# Define the function first
#joins these values with semicolons and optionally truncates the result if it exceeds a specified width.
toString2 <- function(x, width = NULL, ...) {
    string <- paste(x, collapse = "; ")
    if (missing(width) || is.null(width) || width == 0) 
        return(string)
    if (width < 0) 
        stop("'width' must be positive")
    if (nchar(string, type = "w") > width) {
        width <- max(6, width)
        string <- paste0(substr(string, 1, width - 3), "...")
    }
    string
}
normalize_txt <- function(x) {
  x|>
    stringi::stri_trans_general("Latin-ASCII")|>
    tolower()|>
    trimws()
}
pair_str <- function(main, sub) {
  main <- as.character(main)
  sub  <- as.character(sub)
  both_na <- is.na(main) & is.na(sub)
  out <- paste0(
    tidytable::coalesce(main, "NA"),
    "::",
    tidytable::coalesce(sub,  "NA")
  )
  out[both_na] <- NA_character_
  out
}
# ── Helpers ────────────────────────────────────────────────────────────
mode_pick_int <- function(x){
  x <- x[!is.na(x)]
  if(length(x)==0) return(NA_integer_)
  tx <- sort(table(x), decreasing = TRUE)
  as.integer(names(tx)[1L])
}
subkey_to_label <- function(x){
  y <- gsub("_"," ", tolower(x))
  y <- gsub("amphetamine type stimulants","amphetamine-type stimulants", y)
  y <- gsub("tranquilizers hypnotics","tranquilizers/hypnotics", y)
  y
}
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
# ── Encryption ────────────────────────────────────────────────────────
# Pack: [24-byte nonce][ciphertext] -> hex
encrypt_chr <- function(x, key) {
    vapply(x, function(v) {
        if (is.na(v)) return(NA_character_)
        ct <- sodium::data_encrypt(charToRaw(as.character(v)), key)
        nonce <- attr(ct, "nonce")                 # 24 bytes
        sodium::bin2hex(c(nonce, ct))              # pack nonce + ct
    }, FUN.VALUE = character(1))
}
# Unpack: hex -> [nonce|ciphertext], restore attr(nonce), then decrypt
decrypt_chr <- function(x, key) {
    vapply(x, function(v) {
        if (is.na(v)) return(NA_character_)
        buf <- sodium::hex2bin(v)
        if (length(buf) < 25L) stop("Ciphertext too short or corrupted.")
        nonce <- buf[1:24]
        ct    <- buf[-(1:24)]
        attr(ct, "nonce") <- nonce
        rawToChar(sodium::data_decrypt(ct, key))
    }, FUN.VALUE = character(1))
}
# Function to encrypt a character vector
encrypt_column <- function(x, key) {
  sapply(x, function(item) {
    if (is.na(item) || item == "") {
      return(NA_character_)
    }
    encrypted_raw <- sodium::data_encrypt(charToRaw(item), key)
    base64enc::base64encode(encrypted_raw)  # Convert to base64 for storage
  }, USE.NAMES = FALSE)
}
decrypt_column <- function(x, key) {
  sapply(x, function(item) {
    if (is.na(item)) return(NA_character_)
    encrypted_raw <- base64enc::base64decode(item)
    rawToChar(sodium::data_decrypt(encrypted_raw, key))
  }, USE.NAMES = FALSE)
}
is_stata_ok <- function(x) {
  nchar(x) <= 32 & grepl("^[A-Za-z][A-Za-z0-9_]*$", x)
}
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
# ── Tidy RMS ────────────────────────────────────────────────────────
tidy_cph <- function(model) {
  if (!inherits(model, "cph")) stop("Model must be an rms::cph object")
  
  # run summary
  s <- summary(model)
  df_s <- as.data.frame(unclass(s))
  df_s$rown <- rownames(s)
  
  # find rows that are "Hazard Ratio"
  hr_rows <- trimws(df_s$rown) == "Hazard Ratio"
  
  # build tidy tibble
  tibble::tibble(
    term     = trimws(rownames(s)[which(hr_rows) - 1L]), # previous row = variable name
    estimate = df_s$Effect[hr_rows],                     # Hazard Ratio
    conf.low = df_s$`Lower 0.95`[hr_rows],
    conf.high= df_s$`Upper 0.95`[hr_rows],
    p.value  = df_s$P[hr_rows]
  )
}

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
find_latest_file <- function(project_root, prefix) {
  # Get all files matching the pattern
  pattern <- paste0(prefix, "_\\d{8}_\\d{4}\\.csv$")
  files <- list.files(
    path = project_root,
    pattern = pattern,
    full.names = TRUE
  )
  if (length(files) == 0) {
    stop(paste("No files found matching pattern:", pattern))
  }
  # Extract timestamps from filenames (format: YYYYMMDD_HHMM)
  extract_timestamp <- function(filename) {
    matches <- regmatches(
      basename(filename),
      gregexpr("\\d{8}_\\d{4}", basename(filename))
    )
    if (length(matches[[1]]) == 0) {
      return(NA)
    }
    timestamp_str <- matches[[1]][length(matches[[1]])]
    return(timestamp_str)
  }
  timestamps <- sapply(files, extract_timestamp)
  # Remove any files where timestamp extraction failed
  valid_idx <- !is.na(timestamps)
  files <- files[valid_idx]
  timestamps <- timestamps[valid_idx]
  if (length(files) == 0) {
    stop("No valid timestamps found in filenames")
  }
  # Sort by timestamp (descending) and return the latest
  latest_idx <- order(timestamps, decreasing = TRUE)[1]
  return(files[latest_idx])
}

Installing package into ‘G:/My Drive/Alvacast/SISTRAT 2023/renv/library/windows/R-4.4/x86_64-w64-mingw32’
(as ‘lib’ is unspecified)

trying URL 'https://packagemanager.posit.co/cran/latest/bin/windows/contrib/4.4/pkgbuild_1.4.8.zip'
Content type 'binary/octet-stream' length 211749 bytes (206 KB)
downloaded 206 KB

package ‘pkgbuild’ successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\andre\AppData\Local\Temp\RtmpYDfCbF\downloaded_packages
Loading required package: pkgbuild
No need to install Rtools - You've got the relevant version of Rtools installed
Loading required package: pacman
Loading required package: pak

No 00LOCK detected in:
 G:/My Drive/Alvacast/SISTRAT 2023/renv/library/windows/R-4.4/x86_64-w64-mingw32
No 00LOCK detected in:
 C:/Users/andre/AppData/Local/R/cache/R/renv/sandbox/windows/R-4.4/x86_64-w64-mingw32/e0da0d43
Docker is running.

Warning message:
In system("docker info", intern = TRUE, ignore.stderr = TRUE) :
  running command 'docker info' had status 1

* pak version:
- 0.8.0.1
* Version information:
- pak platform: x86_64-w64-mingw32 (current: x86_64-w64-mingw32, compatible)
- pak repository: - (local install?)
* Optional packages installed:
- pillar
* Library path:
- G:/My Drive/Alvacast/SISTRAT 2023/renv/library/windows/R-4.4/x86_64-w64-mingw32
- C:/Users/andre/AppData/Local/R/cache/R/renv/sandbox/windows/R-4.4/x86_64-w64-mingw32/e0da0d43
* pak is installed at G:/My Drive/Alvacast/SISTRAT 2023/renv/library/windows/R-4.4/x86_64-w64-mingw32/pak.
* Dependency versions:
- callr      3.7.6      
- cli        3.6.2      
- curl       5.2.1      
- desc       1.4.3      
- filelock   1.0.3      
- jsonlite   1.8.8      
- lpSolve    5.6.23.9000
- pkgbuild   1.4.4      
- pkgcache   2.2.2.9000 
- pkgdepends 0.7.2.9000 
- pkgsearch  3.1.3.9000 
- processx   3.8.4      
- ps         1.7.6      
- R6         2.5.1      
- zip        2.3.1      
* Dependencies can be loaded

Attaching package: ‘tidytable’

The following objects are masked from ‘package:stats’:

    dt, filter, lag

The following object is masked from ‘package:base’:

    %in%

Loading required package: Hmisc

Attaching package: ‘Hmisc’

The following object is masked from ‘package:tidytable’:

    summarize

The following objects are masked from ‘package:base’:

    format.pval, units

Loading required package: lattice

Attaching package: ‘survival’

The following object is masked from ‘package:caret’:

    cluster


Attaching package: ‘dplyr’

The following objects are masked from ‘package:Hmisc’:

    src, summarize

The following objects are masked from ‘package:tidytable’:

    across, add_count, add_tally, anti_join, arrange, between, bind_cols, bind_rows, c_across,
    case_match, case_when, coalesce, consecutive_id, count, cross_join, cume_dist, cur_column,
    cur_data, cur_group_id, cur_group_rows, dense_rank, desc, distinct, filter, first, full_join,
    group_by, group_cols, group_split, group_vars, if_all, if_any, if_else, inner_join,
    is_grouped_df, lag, last, lead, left_join, min_rank, mutate, n, n_distinct, na_if, nest_by,
    nest_join, nth, percent_rank, pick, pull, recode, reframe, relocate, rename, rename_with,
    right_join, row_number, rowwise, select, semi_join, slice, slice_head, slice_max, slice_min,
    slice_sample, slice_tail, summarise, summarize, tally, top_n, transmute, tribble, ungroup

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Attaching package: ‘survex’

The following object is masked from ‘package:dplyr’:

    explain

Loading required package: prodlim

Attaching package: ‘pec’

The following object is masked from ‘package:caret’:

    R2

The following object is masked from ‘package:survivalmodels’:

    cindex


Attaching package: ‘future’

The following object is masked from ‘package:survival’:

    cluster

The following object is masked from ‘package:caret’:

    cluster


Attaching package: ‘mice’

The following objects are masked from ‘package:tidytable’:

    complete, filter

The following object is masked from ‘package:stats’:

    filter

The following objects are masked from ‘package:base’:

    cbind, rbind

Registered S3 method overwritten by 'riskRegression':
  method        from 
  nobs.multinom broom
riskRegression version 2026.02.13

Attaching package: ‘riskRegression’

The following objects are masked from ‘package:pec’:

    ipcw, selectCox


Attaching package: ‘tidyr’

The following objects are masked from ‘package:tidytable’:

    complete, crossing, drop_na, expand, expand_grid, extract, fill, nest, nesting, pivot_longer,
    pivot_wider, replace_na, separate, separate_longer_delim, separate_rows, separate_wider_delim,
    separate_wider_regex, tribble, uncount, unite, unnest, unnest_longer, unnest_wider

Loading required package: foreach
Loading required package: grid

Code


root <- here::here()

project_root <- gsub("/cons$", "", root) 

data_out <- file.path(project_root, "data", "20241015_out")
figs_out <- file.path(root, "_figs")

Note

Aims - To identify key prognostic and predictive factors associated with the risk of, and time to, the first readmission to treatment among patients aged 18-64 undergoing initial treatment for Substance Use Disorder (SUD) in Chile between 2010 and 2020. - To identify key prognostic and predictive factors associated with the risk of, and time to, all-cause mortality among the same cohort of patients undergoing initial treatment for SUD in Chile. - To predict the risk and timing of first treatment readmission and all-cause mortality simultaneously, treating these outcomes within a semi-competing risks framework. This will allow for the estimation of cumulative incidence for each event in the presence of the other for patients aged 18-64 who initiated SUD treatment in Chile between 2010 and 2020. This study has three primary objectives. First, we aim to identify key prognostic and predictive factors associated with the risk and timing of two critical outcomes: (i) first readmission to treatment and (ii) all-cause mortality. Second, using a semi-competing risks framework, we will develop a model to simultaneously predict these two outcomes, accounting for the fact that death precludes the possibility of readmission. Finally, this model will be used to estimate the probability of various patient trajectories over time following initial treatment. The study population includes all patients aged 18-64 who began treatment for Substance Use Disorder (SUD) in Chile between 2010 and 2020.

Code

gc()

total_cpus <- parallel::detectCores()
cpus <- ifelse(total_cpus > 30, 16, 7)

predictor_tiers <- readxl::read_excel(
  file.path(project_root, "cons", "_out",
            "xgb9_dual_predictor_analysis_no_clinical_20260306_1821.xlsx"),
  sheet = "Predictors"
) |>
  dplyr::filter(!grepl("MINOR",importance_tier))


predictor_tiers |> knitr::kable(
  caption = "Predictor tiers based on SHAP importance from XGBoost models"
)



Table: Predictor tiers based on SHAP importance from XGBoost models

|outcome     |feature                                       | mean_abs_shap_log_hazard| rank| rel_to_max_pct|importance_tier       | mean_rank| cv_rank_pct| rank_ci2p5| rank_ci97p5| freq_in_top_45_pct|stable_freq_ge_90pct |direction_method     | delta_point| delta_ci_low| delta_ci_high| direction_n|direction_label |
|:-----------|:---------------------------------------------|------------------------:|----:|--------------:|:---------------------|---------:|-----------:|----------:|-----------:|------------------:|:--------------------|:--------------------|-----------:|------------:|-------------:|-----------:|:---------------|
|Readmission |primary_sub_mod_cocaine_paste                 |                0.0900146|    1|      100.00000|DOMINANT (>=50% max)  |     1.000|   0.0000000|      1.000|     1.00000|                100|TRUE                 |binary_x1_minus_x0   |   0.1847968|    0.1843984|     0.1851865|       70521|Risk-Increasing |
|Readmission |adm_age_rec3                                  |                0.0787429|    2|       87.47793|DOMINANT (>=50% max)  |     2.000|   0.0000000|      2.000|     2.00000|                100|TRUE                 |quartile_q4_minus_q1 |  -0.1631505|   -0.1663790|    -0.1599303|       35281|Risk-Decreasing |
|Readmission |porc_pobr                                     |                0.0730503|    3|       81.15382|DOMINANT (>=50% max)  |     3.000|   0.0000000|      3.000|     3.00000|                100|TRUE                 |quartile_q4_minus_q1 |   0.1191467|    0.1174144|     0.1208729|       35755|Risk-Increasing |
|Readmission |sex_rec_woman                                 |                0.0704735|    4|       78.29120|DOMINANT (>=50% max)  |     4.018|   3.3121996|      4.000|     4.00000|                100|TRUE                 |binary_x1_minus_x0   |   0.1836441|    0.1831994|     0.1841494|       70521|Risk-Increasing |
|Readmission |plan_type_corr_pg_pr                          |                0.0697222|    5|       77.45656|DOMINANT (>=50% max)  |     4.982|   2.6713004|      5.000|     5.00000|                100|TRUE                 |binary_x1_minus_x0   |   0.3249241|    0.3233632|     0.3265350|       70521|Risk-Increasing |
|Readmission |plan_type_corr_m_pr                           |                0.0560735|    6|       62.29379|DOMINANT (>=50% max)  |     6.006|   1.2871176|      6.000|     6.00000|                100|TRUE                 |binary_x1_minus_x0   |   0.5916164|    0.5872290|     0.5960696|       70521|Risk-Increasing |
|Readmission |ethnicity                                     |                0.0548934|    7|       60.98278|DOMINANT (>=50% max)  |     6.994|   1.1052943|      7.000|     7.00000|                100|TRUE                 |binary_x1_minus_x0   |   0.4423104|    0.4402313|     0.4442766|       70521|Risk-Increasing |
|Readmission |dit_m                                         |                0.0475426|    8|       52.81654|DOMINANT (>=50% max)  |     8.000|   0.0000000|      8.000|     8.00000|                100|TRUE                 |quartile_q4_minus_q1 |   0.0786650|    0.0773707|     0.0798642|       35358|Risk-Increasing |
|Readmission |eva_consumo                                   |                0.0465654|    9|       51.73095|DOMINANT (>=50% max)  |     9.000|   0.0000000|      9.000|     9.00000|                100|TRUE                 |quartile_q4_minus_q1 |   0.1301311|    0.1297658|     0.1305044|       50240|Risk-Increasing |
|Readmission |ed_attainment_corr                            |                0.0415941|   10|       46.20817|MAJOR (20-50% max)    |    10.000|   0.0000000|     10.000|    10.00000|                100|TRUE                 |quartile_q4_minus_q1 |  -0.1052401|   -0.1057542|    -0.1047200|       70369|Risk-Decreasing |
|Readmission |occupation_condition_corr24_unemployed        |                0.0364542|   11|       40.49811|MAJOR (20-50% max)    |    11.000|   0.0000000|     11.000|    11.00000|                100|TRUE                 |binary_x1_minus_x0   |   0.0807971|    0.0805471|     0.0810768|       70521|Risk-Increasing |
|Readmission |dg_psiq_cie_10_dg                             |                0.0283792|   12|       31.52734|MAJOR (20-50% max)    |    12.480|   4.0072170|     12.000|    13.00000|                100|TRUE                 |binary_x1_minus_x0   |   0.0574774|    0.0573485|     0.0576134|       70521|Risk-Increasing |
|Readmission |sub_dep_icd10_status_drug_dependence          |                0.0283749|   13|       31.52256|MAJOR (20-50% max)    |    12.520|   3.9944139|     12.000|    13.00000|                100|TRUE                 |binary_x1_minus_x0   |   0.0721797|    0.0718555|     0.0725015|       70521|Risk-Increasing |
|Readmission |polysubstance_strict                          |                0.0252768|   14|       28.08078|MAJOR (20-50% max)    |    14.000|   0.0000000|     14.000|    14.00000|                100|TRUE                 |binary_x1_minus_x0   |   0.0659369|    0.0656352|     0.0662654|       70521|Risk-Increasing |
|Readmission |primary_sub_mod_alcohol                       |                0.0245149|   15|       27.23433|MAJOR (20-50% max)    |    15.000|   0.0000000|     15.000|    15.00000|                100|TRUE                 |binary_x1_minus_x0   |  -0.0568163|   -0.0570478|    -0.0566023|       70521|Risk-Decreasing |
|Readmission |eva_sm                                        |                0.0233596|   16|       25.95087|MAJOR (20-50% max)    |    16.008|   0.5570546|     16.000|    16.00000|                100|TRUE                 |quartile_q4_minus_q1 |   0.0282714|    0.0278977|     0.0286293|       70512|Risk-Increasing |
|Readmission |primary_sub_mod_cocaine_powder                |                0.0230913|   17|       25.65284|MAJOR (20-50% max)    |    16.992|   0.5247958|     17.000|    17.00000|                100|TRUE                 |binary_x1_minus_x0   |   0.0717887|    0.0714701|     0.0721056|       70521|Risk-Increasing |
|Readmission |evaluacindelprocesoteraputico                 |                0.0216488|   18|       24.05037|MAJOR (20-50% max)    |    18.260|   2.4045599|     18.000|    19.00000|                100|TRUE                 |quartile_q4_minus_q1 |   0.0327175|    0.0324136|     0.0330536|       70520|Risk-Increasing |
|Readmission |tr_outcome_adm_discharge_rule_violation_undet |                0.0215616|   19|       23.95340|MAJOR (20-50% max)    |    18.740|   2.3429704|     18.000|    19.00000|                100|TRUE                 |binary_x1_minus_x0   |   0.1433829|    0.1425113|     0.1443133|       70521|Risk-Increasing |
|Readmission |prim_sub_freq_rec                             |                0.0194629|   20|       21.62193|MAJOR (20-50% max)    |    20.000|   0.0000000|     20.000|    20.00000|                100|TRUE                 |quartile_q4_minus_q1 |   0.0379864|    0.0377285|     0.0382766|       70310|Risk-Increasing |
|Readmission |eva_transgnorma                               |                0.0177401|   21|       19.70807|MODERATE (10-20% max) |    21.000|   0.0000000|     21.000|    21.00000|                100|TRUE                 |not_evaluated        |            |             |              |            |Not evaluated   |
|Readmission |eva_ocupacion                                 |                0.0161344|   22|       17.92424|MODERATE (10-20% max) |    22.532|   2.2167320|     22.000|    23.00000|                100|TRUE                 |not_evaluated        |            |             |              |            |Not evaluated   |
|Readmission |tr_outcome_referral                           |                0.0161336|   23|       17.92328|MODERATE (10-20% max) |    22.498|   2.4612090|     22.000|    24.00000|                100|TRUE                 |not_evaluated        |            |             |              |            |Not evaluated   |
|Readmission |adm_motive_justice_sector                     |                0.0158754|   24|       17.63643|MODERATE (10-20% max) |    24.068|   1.4843544|     23.000|    25.00000|                100|TRUE                 |not_evaluated        |            |             |              |            |Not evaluated   |
|Readmission |cohabitation_with_couple_children             |                0.0157128|   25|       17.45588|MODERATE (10-20% max) |    24.902|   1.1951352|     24.000|    25.00000|                100|TRUE                 |not_evaluated        |            |             |              |            |Not evaluated   |
|Readmission |tenure_status_household                       |                0.0148924|   26|       16.54448|MODERATE (10-20% max) |    26.000|   0.0000000|     26.000|    26.00000|                100|TRUE                 |not_evaluated        |            |             |              |            |Not evaluated   |
|Readmission |any_violence_1_domestic_violence_sex_abuse    |                0.0143226|   27|       15.91138|MODERATE (10-20% max) |    27.002|   0.1656222|     27.000|    27.00000|                100|TRUE                 |not_evaluated        |            |             |              |            |Not evaluated   |
|Readmission |eva_fam                                       |                0.0141707|   28|       15.74271|MODERATE (10-20% max) |    27.998|   0.1597304|     28.000|    28.00000|                100|TRUE                 |not_evaluated        |            |             |              |            |Not evaluated   |
|Readmission |dx_f6_personality                             |                0.0119968|   29|       13.32759|MODERATE (10-20% max) |    29.040|   0.6754676|     29.000|    30.00000|                100|TRUE                 |not_evaluated        |            |             |              |            |Not evaluated   |
|Readmission |tr_outcome_dropout                            |                0.0119004|   30|       13.22051|MODERATE (10-20% max) |    29.960|   0.6547256|     29.000|    30.00000|                100|TRUE                 |not_evaluated        |            |             |              |            |Not evaluated   |
|Readmission |any_phys_dx                                   |                0.0113077|   31|       12.56213|MODERATE (10-20% max) |    31.000|   0.0000000|     31.000|    31.00000|                100|TRUE                 |not_evaluated        |            |             |              |            |Not evaluated   |
|Readmission |adm_motive_sanitary_sector                    |                0.0107342|   32|       11.92494|MODERATE (10-20% max) |    32.000|   0.0000000|     32.000|    32.00000|                100|TRUE                 |not_evaluated        |            |             |              |            |Not evaluated   |
|Readmission |urbanicity_cat                                |                0.0104549|   33|       11.61464|MODERATE (10-20% max) |    33.026|   0.4823315|     33.000|    33.52502|                100|TRUE                 |not_evaluated        |            |             |              |            |Not evaluated   |
|Readmission |eva_relinterp                                 |                0.0103098|   34|       11.45350|MODERATE (10-20% max) |    33.974|   0.4688727|     33.475|    34.00000|                100|TRUE                 |not_evaluated        |            |             |              |            |Not evaluated   |
|Readmission |occupation_condition_corr24_inactive          |                0.0101498|   35|       11.27578|MODERATE (10-20% max) |    35.000|   0.0000000|     35.000|    35.00000|                100|TRUE                 |not_evaluated        |            |             |              |            |Not evaluated   |
|Readmission |first_sub_used_alcohol                        |                0.0092657|   36|       10.29352|MODERATE (10-20% max) |    36.000|   0.0000000|     36.000|    36.00000|                100|TRUE                 |not_evaluated        |            |             |              |            |Not evaluated   |
|Death       |adm_age_rec3                                  |                0.4974409|    1|      100.00000|DOMINANT (>=50% max)  |     1.000|   0.0000000|      1.000|     1.00000|                100|TRUE                 |quartile_q4_minus_q1 |   1.3729876|    1.3684313|     1.3772584|       35281|Risk-Increasing |
|Death       |primary_sub_mod_alcohol                       |                0.2671214|    2|       53.69913|DOMINANT (>=50% max)  |     2.000|   0.0000000|      2.000|     2.00000|                100|TRUE                 |binary_x1_minus_x0   |   0.5560125|    0.5550792|     0.5569872|       70521|Risk-Increasing |
|Death       |prim_sub_freq_rec                             |                0.1144876|    3|       23.01532|MAJOR (20-50% max)    |     3.000|   0.0000000|      3.000|     3.00000|                100|TRUE                 |quartile_q4_minus_q1 |   0.2309214|    0.2302723|     0.2315510|       70310|Risk-Increasing |
|Death       |occupation_condition_corr24_unemployed        |                0.0998760|    4|       20.07795|MAJOR (20-50% max)    |     4.000|   0.0000000|      4.000|     4.00000|                100|TRUE                 |binary_x1_minus_x0   |   0.2178780|    0.2171895|     0.2185878|       70521|Risk-Increasing |
|Death       |any_phys_dx                                   |                0.0926062|    5|       18.61653|MODERATE (10-20% max) |     5.000|   0.0000000|      5.000|     5.00000|                100|TRUE                 |quartile_q4_minus_q1 |   0.0443566|    0.0434224|     0.0453359|      134297|Risk-Increasing |
|Death       |eva_ocupacion                                 |                0.0748866|    6|       15.05438|MODERATE (10-20% max) |     6.000|   0.0000000|      6.000|     6.00000|                100|TRUE                 |quartile_q4_minus_q1 |   0.1937338|    0.1931856|     0.1942887|       51704|Risk-Increasing |
|Death       |cohabitation_with_couple_children             |                0.0581903|    7|       11.69793|MODERATE (10-20% max) |     7.000|   0.0000000|      7.000|     7.00000|                100|TRUE                 |binary_x1_minus_x0   |  -0.1176721|   -0.1179907|    -0.1173096|       70521|Risk-Decreasing |

Code

gc()

options(future.globals.maxSize = 8L * 1024^3)  # 8 GiB

# # Linear time interaction
# coxph(Surv(time, death) ~ var + var:time + other_covariates, data = df)
# # Log-time interaction
# coxph(Surv(time, death) ~ var + var:log(time) + other_covariates, data = df)
# # Step function at 1 year (365 days)
# df$late_period <- as.numeric(df$time > 365)
# coxph(Surv(time, death) ~ var + var:late_period + other_covariates, data = df)
# # Using splines (requires splines package)
# library(splines)
# coxph(Surv(time, death) ~ var + var:ns(time, df=3) + other_covariates, data = df)
vars_shap_r <- c(
  "primary_sub_mod_cocaine_paste",
  "adm_age_rec3",
  "porc_pobr",
  "sex_rec_woman",
  #"plan_type_corr_pg_pr", # Mantenido en el código como strata()
  #"plan_type_corr_m_pr",  # Mantenido en el código como strata()
  "ethnicity",
  "dit_m",
  #"eva_consumo",
  "eva_consumo_logro_intermedio",
  "eva_consumo_logro_minimo",
  #"ed_attainment_corr",
  "ed_attainment_corr_2_completed_high_school_or_less",
  "ed_attainment_corr_3_completed_primary_school_or_less",
  "sub_dep_icd10_status_drug_dependence",
  "occupation_condition_corr24_unemployed",
  "polysubstance_strict",
  "primary_sub_mod_cocaine_powder",
  #"evaluacindelprocesoteraputico",
  "evaluacindelprocesoteraputico_logro_intermedio",
  "evaluacindelprocesoteraputico_logro_minimo", 
  "dg_psiq_cie_10_dg",
  "tr_outcome_adm_discharge_rule_violation_undet",
  #"eva_sm",
  "eva_sm_logro_intermedio",
  "eva_sm_logro_minimo",   
  
  # --- NO LONGER IN TOP 20 MORTALITY (SHAP) ---
  #"adm_motive_justice_sector",
  #"tenure_status_household_others",
  #"tenure_status_household_renting",
  #"tenure_status_household_stays_temporarily_with_a_relative",
  #"tenure_status_household_illegal_settlement",
  #"tr_outcome_referral",
  #"eva_ocupacion_logro_intermedio",
  #"eva_ocupacion_logro_minimo",   
  #"eva_transgnorma_logro_intermedio",
  #"eva_transgnorma_logro_minimo",    
  
  #"prim_sub_freq_rec",
  "prim_sub_freq_rec_2_2_6_days_wk",
  "prim_sub_freq_rec_3_daily",  
  
  # --- NO LONGER IN TOP 20 MORTALITY  (SHAP) ---
  #"cohabitation_with_couple_children",
  
  "primary_sub_mod_alcohol"
  
  # --- NO LONGER IN TOP 20 MORTALITY (SHAP) ---
  #"eva_relinterp_logro_intermedio",
  #"eva_relinterp_logro_minimo",  
  #"tr_outcome_dropout",
  #"dx_f6_personality",
  #"any_phys_dx",
  #"any_violence_1_domestic_violence_sex_abuse",
  #"adm_motive_sanitary_sector",
  #"eva_fam_logro_intermedio",
  #"eva_fam_logro_minimo"
)

vars_shap_d <- c(
  "adm_age_rec3",
  "primary_sub_mod_cocaine_paste",
  #"prim_sub_freq_rec", 
  "prim_sub_freq_rec_2_2_6_days_wk",
  "prim_sub_freq_rec_3_daily",
  
  # --- NO LONGER IN TOP 20 MORTALITY (SHAP 18:22) ---
  #"primary_sub_mod_cocaine_powder",
  
  "occupation_condition_corr24_unemployed",
  "any_phys_dx",
  #"eva_ocupacion",
  "eva_ocupacion_logro_intermedio",
  "eva_ocupacion_logro_minimo",  
  "first_sub_used_alcohol",
  "sex_rec_woman",
  #"eva_fisica"
  "eva_fisica_logro_intermedio",
  "eva_fisica_logro_minimo"
)
formula_shap_readmit <- as.formula(
  paste("Surv(readmit_time_from_disch_m, readmit_event) ~", 
        paste(vars_shap_r, collapse = " + "), "+ strata(plan_type_strata)")
)
formula_shap_death <- as.formula(
  paste("Surv(death_time_from_disch_m, death_event) ~", 
        paste(vars_shap_d, collapse = " + "), "+ strata(plan_type_strata)")
)

cat("Removing collineal variable\n")
formula_shap_readmit_clean <- as.formula(
  paste(gsub("evaluacindelprocesoteraputico_logro_minimo \\+\\s*","",paste(deparse(formula_shap_readmit), collapse = " "))))

results_readmit_shap <- evaluate_mi_cv_safe_strata(
  formula = formula_shap_readmit_clean, 
  imputed_list = py_corrected_datasets, 
  time_col = "readmit_time_from_disch_m", 
  event_col = "readmit_event",
  k = 5,
  n_repeats = 20
)

results_death_shap <- evaluate_mi_cv_safe_strata(
  formula = formula_shap_death, 
  imputed_list = py_corrected_datasets, 
  time_col = "death_time_from_disch_m", 
  event_col = "death_event",
  k = 5,
  n_repeats = 20
)

Removing collineal variable
Starting Evaluation: 5 Imputations x 5 Folds x 20 Repeats = 500 Total Models
------------------------------------------------------------
Global evaluation tau: 99.10 months | C-tau: 94.14 months
Booting up 16 CPU cores using future backend...

============================================================
FINAL POOLED RESULTS (Cross-Validated + Multiply Imputed)
============================================================
Global Evaluation Time (tau): 99.10 months
Pooled Uno's C-Index: 0.6058 (repeat-quantile interval 0.6056-0.6061)
Pooled UnoC (survAUC::UnoC): 0.6784 (repeat-quantile interval 0.6473-0.6962)
Pooled IBS: 0.1602 (repeat-quantile interval 0.1601-0.1602)
Pooled IBS (IPCW-train): 0.3481 (repeat-quantile interval 0.3479-0.3483)
RMST-based Uno C-index by horizon:
  t=12 m: C=0.6294 (repeat-quantile interval 0.6291-0.6296, valid repeats=20)
  t=36 m: C=0.6135 (repeat-quantile interval 0.6133-0.6138, valid repeats=20)
  t=60 m: C=0.6087 (repeat-quantile interval 0.6085-0.6090, valid repeats=20)
============================================================

Total runtime: 837.89 sec (13.96 min)
Starting Evaluation: 5 Imputations x 5 Folds x 20 Repeats = 500 Total Models
------------------------------------------------------------
Global evaluation tau: 107.94 months | C-tau: 102.54 months
Booting up 16 CPU cores using future backend...

============================================================
FINAL POOLED RESULTS (Cross-Validated + Multiply Imputed)
============================================================
Global Evaluation Time (tau): 107.94 months
Pooled Uno's C-Index: 0.7365 (repeat-quantile interval 0.7360-0.7369)
Pooled UnoC (survAUC::UnoC): 0.8068 (repeat-quantile interval 0.7240-0.8582)
Pooled IBS: 0.0356 (repeat-quantile interval 0.0356-0.0356)
Pooled IBS (IPCW-train): 0.0903 (repeat-quantile interval 0.0902-0.0905)
RMST-based Uno C-index by horizon:
  t=12 m: C=0.7377 (repeat-quantile interval 0.7362-0.7390, valid repeats=20)
  t=36 m: C=0.7507 (repeat-quantile interval 0.7502-0.7513, valid repeats=20)
  t=60 m: C=0.7503 (repeat-quantile interval 0.7498-0.7509, valid repeats=20)
============================================================

Total runtime: 810.82 sec (13.51 min)

Code

dplyr::bind_rows(
  rms::vif(survival::coxph(formula_shap_death, data = py_corrected_datasets[[1]])) |>
    as.data.frame() |>
    tibble::rownames_to_column("Variable") |>
    dplyr::rename(Gvif = 2) |>
    dplyr::arrange(dplyr::desc(dplyr::pick(2)[[1]])) |>
    dplyr::filter(Gvif > 10) |>
    dplyr::mutate(Outcome = "Death"),
  rms::vif(survival::coxph(formula_shap_readmit, data = py_corrected_datasets[[1]])) |>
    as.data.frame() |>
    tibble::rownames_to_column("Variable") |>
    dplyr::rename(Gvif = 2) |>
    dplyr::filter(Gvif > 10) |>
    dplyr::mutate(Outcome = "Readmission")
) |>
  knitr::kable("markdown", caption = "GVIF for SHAP-based Cox models, using one imputed dataset")



Table: GVIF for SHAP-based Cox models, using one imputed dataset

|Variable                                   |     Gvif|Outcome     |
|:------------------------------------------|--------:|:-----------|
|evaluacindelprocesoteraputico_logro_minimo | 10.64565|Readmission |

Code

formula_shap_readmit_clean

Surv(readmit_time_from_disch_m, readmit_event) ~ primary_sub_mod_cocaine_paste + 
    adm_age_rec3 + porc_pobr + sex_rec_woman + ethnicity + dit_m + 
    eva_consumo_logro_intermedio + eva_consumo_logro_minimo + 
    ed_attainment_corr_2_completed_high_school_or_less + ed_attainment_corr_3_completed_primary_school_or_less + 
    sub_dep_icd10_status_drug_dependence + occupation_condition_corr24_unemployed + 
    polysubstance_strict + primary_sub_mod_cocaine_powder + evaluacindelprocesoteraputico_logro_intermedio + 
    dg_psiq_cie_10_dg + tr_outcome_adm_discharge_rule_violation_undet + 
    eva_sm_logro_intermedio + eva_sm_logro_minimo + prim_sub_freq_rec_2_2_6_days_wk + 
    prim_sub_freq_rec_3_daily + primary_sub_mod_alcohol + strata(plan_type_strata)

Code

options(future.globals.maxSize = 12 * 1024^3)

gc()

results_readmit_shap_ibs <- cph_evaluate_boot_oob_mi_corrected(
    formula = formula_shap_readmit_clean,
    imputed_list = py_corrected_datasets, # List of data.frames
    time_col = "readmit_time_from_disch_m", 
    event_col = "readmit_event",
    tau = 99,                # Evaluate up to 60 months
    B = 500,
    cpus = cpus,
    verbose = TRUE,
    min_strata_events = 1
)
results_death_shap_ibs <- cph_evaluate_boot_oob_mi_corrected(
    formula = formula_shap_death,
    imputed_list = py_corrected_datasets, # List of data.frames
    time_col = "death_time_from_disch_m", 
    event_col = "death_event",
    tau = 107,                # Evaluate up to 60 months
    B = 500,
    cpus = cpus,
    verbose = TRUE,
    min_strata_events = 1
)

aicc_readmit_shap <- compute_mi_aicc(formula_shap_readmit_clean, py_corrected_datasets)#282719.30
cat(sprintf("\nDifference in Mean AIC: %.2f\n", aicc_readmit_shap$mean_aic - aicc_readmit_upd2$mean_aic))
#Difference in Mean AIC: 10170.15
aicc_death_shap <- compute_mi_aicc(formula_shap_death, py_corrected_datasets)
#54321.99
cat(sprintf("\nDifference in Mean AIC: %.2f\n", aicc_death_shap$mean_aic - aicc_death_upd2$mean_aic))
#Difference in Mean AIC: 2825.75
cat("Comparing readmission w/o collinear var and only one strata(plan)\n")
compare_mi_models_d1(formula_readmit_updated,formula_shap_readmit_clean, py_corrected_datasets)

compare_mi_models_d1(formula_death_updated,formula_shap_death, py_corrected_datasets)

Strata variables: plan_type_strata
Categorical vars: plan_type_strata
Bootstrap OOB + MI: B=500, M=5, tau=99.00
Using 8 worker(s)...

============================================================
FINAL POOLED RESULTS (OOB Bootstrap + MI)
============================================================
Pooled OOB IBS: 0.3480 (95% CI: 0.3359 - 0.3610)
Valid bootstrap replicates: 500 / 500
============================================================
Strata variables: plan_type_strata
Categorical vars: plan_type_strata
Bootstrap OOB + MI: B=500, M=5, tau=107.00
Using 8 worker(s)...

============================================================
FINAL POOLED RESULTS (OOB Bootstrap + MI)
============================================================
Pooled OOB IBS: 0.0903 (95% CI: 0.0837 - 0.0974)
Valid bootstrap replicates: 500 / 500
============================================================

======================================================
AICc ANALYSIS FOR MULTIPLY IMPUTED DATA
========================================================
Mean AIC:  282719.24
Mean AICc: 282719.30
Correction added (Penalty): 0.066474
Parameters (k): 22 | Effective Sample Size (Events): 15247
========================================================

Difference in Mean AIC: 10170.15

======================================================
AICc ANALYSIS FOR MULTIPLY IMPUTED DATA
========================================================
Mean AIC:  54321.89
Mean AICc: 54321.99
Correction added (Penalty): 0.103106
Parameters (k): 12 | Effective Sample Size (Events): 3039
========================================================

Difference in Mean AIC: 2825.75
Comparing readmission w/o collinear var and only one strata(plan)
Fitting the FULL model on 5 imputations...
Fitting the REDUCED model on 5 imputations...
Executing the Wald Test (D1)...

============================================================
MODEL COMPARISON RESULT (D1 Statistic)
============================================================
   test statistic df1      df2 dfcom      p.value        riv
 1 ~~ 2  2.705153  42 11680.04 15183 1.835904e-08 0.05960578
============================================================
Fitting the FULL model on 5 imputations...
Fitting the REDUCED model on 5 imputations...
Executing the Wald Test (D1)...

============================================================
MODEL COMPARISON RESULT (D1 Statistic)
============================================================
   test statistic df1      df2 dfcom     p.value        riv
 1 ~~ 2  9.059786  52 2901.688  2975 6.69787e-63 0.04231255
============================================================

Code

gc()
cal_readmit_shap<- 
  calibrate_cph_grouped(
    formula = formula_shap_readmit_clean, # Original formula with months variable
    data = py_corrected_datasets, # Original data (months)
    times = c(12, 36, 60, 96),
    seed = 2125,
    validation = "cv",
    n_repeats = 20,
    n_imputations = 5,
    compute_ici = TRUE,
    ici_times = 60,
    make_plots = TRUE
)
print(cal_readmit_shap)

cal_death_shap<- 
    calibrate_cph_grouped(
    formula = formula_shap_death, # Original formula with months variable
    data = py_corrected_datasets, # Original data (months)
    times = c(12, 36, 60, 96),
    seed = 2125,
    validation = "cv",
    n_repeats = 20,
    n_imputations = 5,
    compute_ici = TRUE,
    ici_times = 60,
    make_plots = TRUE
)
print(cal_death_shap)

Detected time variable: readmit_time_from_disch_m
Detected event variable: readmit_event
Using 5 imputed dataset(s).
Validation: cv
Repetitions: 20 | same fold split across imputations within each repetition.
ICI: enabled at horizons 60.
Predicting repetition 1/20... done
Predicting repetition 2/20... done
Predicting repetition 3/20... done
Predicting repetition 4/20... done
Predicting repetition 5/20... done
Predicting repetition 6/20... done
Predicting repetition 7/20... done
Predicting repetition 8/20... done
Predicting repetition 9/20... done
Predicting repetition 10/20... done
Predicting repetition 11/20... done
Predicting repetition 12/20... done
Predicting repetition 13/20... done
Predicting repetition 14/20... done
Predicting repetition 15/20... done
Predicting repetition 16/20... done
Predicting repetition 17/20... done
Predicting repetition 18/20... done
Predicting repetition 19/20... done
Predicting repetition 20/20... done
Calibrating repetition 1/20... done
Calibrating repetition 2/20... done
Calibrating repetition 3/20... done
Calibrating repetition 4/20... done
Calibrating repetition 5/20... done
Calibrating repetition 6/20... done
Calibrating repetition 7/20... done
Calibrating repetition 8/20... done
Calibrating repetition 9/20... done
Calibrating repetition 10/20... done
Calibrating repetition 11/20... done
Calibrating repetition 12/20... done
Calibrating repetition 13/20... done
Calibrating repetition 14/20... done
Calibrating repetition 15/20... done
Calibrating repetition 16/20... done
Calibrating repetition 17/20... done
Calibrating repetition 18/20... done
Calibrating repetition 19/20... done
Calibrating repetition 20/20... done
Cox Death Calibration Analysis
==============================
Time variable: readmit_time_from_disch_m
Event variable: readmit_event
Validation: cv
Folds: 5
Repetitions: 20
Imputations: 5
Runs total: 20
Groups: 10
Min bin n: 20
ICI: enabled
ICI times: 60
ICI df: 4

Variability across repetitions:
 time_months  ici_mean      ici_sd  ici_p025  ici_p975  e50_mean      e50_sd  e50_p025  e50_p975  e90_mean      e90_sd  e90_p025  e90_p975
          12        NA          NA        NA        NA        NA          NA        NA        NA        NA          NA        NA        NA
          36        NA          NA        NA        NA        NA          NA        NA        NA        NA          NA        NA        NA
          60 0.1825182 0.001543724 0.1795637 0.1856268 0.1740643 0.001461813 0.1712492 0.1769219 0.2580269 0.002336779 0.2535524 0.2626968
          96        NA          NA        NA        NA        NA          NA        NA        NA        NA          NA        NA        NA
 emax_mean     emax_sd emax_p025 emax_p975    ece_mean       ece_sd    ece_p025    ece_p975   eo_mean         eo_sd   eo_p025   eo_p975 n_runs
        NA          NA        NA        NA 0.003307777 0.0005058810 0.002219688 0.004030118 0.9914441 0.00010839579 0.9912467 0.9916397     20
        NA          NA        NA        NA 0.005409745 0.0005309648 0.004408466 0.006187693 0.9883840 0.00008063061 0.9882823 0.9885214     20
 0.5033302 0.008794312 0.4852321 0.5181039 0.006736495 0.0006970693 0.005192256 0.008086500 0.9837819 0.00007967117 0.9836820 0.9839257     20
        NA          NA        NA        NA 0.006036953 0.0007183795 0.004459733 0.007260127 0.9760987 0.00009534760 0.9759566 0.9762694     20
 n_imp
     5
     5
     5
     5

Notes:
  - Each repetition uses the same fold split across all imputations.
  - Models are fit separately within each imputation; held-out predictions are averaged across imputations.
  - Thin gray lines are repetition-specific calibration curves.
  - Gray ribbon is the pointwise 2.5th-97.5th percentile band across repetitions.
  - Blue points/line are the mean calibration curve across repetitions.
  - Grouped observed risk is estimated with Kaplan-Meier.
  - ICI, E50, E90, and Emax are from a Cox-based smooth calibration model.
  - These summaries describe split-to-split variability, not formal bootstrap optimism correction.
  - ECE is grouped expected calibration error.
  - E/O is mean predicted / observed probability.
Detected time variable: death_time_from_disch_m
Detected event variable: death_event
Using 5 imputed dataset(s).
Validation: cv
Repetitions: 20 | same fold split across imputations within each repetition.
ICI: enabled at horizons 60.
Predicting repetition 1/20... done
Predicting repetition 2/20... done
Predicting repetition 3/20... done
Predicting repetition 4/20... done
Predicting repetition 5/20... done
Predicting repetition 6/20... done
Predicting repetition 7/20... done
Predicting repetition 8/20... done
Predicting repetition 9/20... done
Predicting repetition 10/20... done
Predicting repetition 11/20... done
Predicting repetition 12/20... done
Predicting repetition 13/20... done
Predicting repetition 14/20... done
Predicting repetition 15/20... done
Predicting repetition 16/20... done
Predicting repetition 17/20... done
Predicting repetition 18/20... done
Predicting repetition 19/20... done
Predicting repetition 20/20... done
Calibrating repetition 1/20... done
Calibrating repetition 2/20... done
Calibrating repetition 3/20... done
Calibrating repetition 4/20... done
Calibrating repetition 5/20... done
Calibrating repetition 6/20... done
Calibrating repetition 7/20... done
Calibrating repetition 8/20... done
Calibrating repetition 9/20... done
Calibrating repetition 10/20... done
Calibrating repetition 11/20... done
Calibrating repetition 12/20... done
Calibrating repetition 13/20... done
Calibrating repetition 14/20... done
Calibrating repetition 15/20... done
Calibrating repetition 16/20... done
Calibrating repetition 17/20... done
Calibrating repetition 18/20... done
Calibrating repetition 19/20... done
Calibrating repetition 20/20... done
Cox Death Calibration Analysis
==============================
Time variable: death_time_from_disch_m
Event variable: death_event
Validation: cv
Folds: 5
Repetitions: 20
Imputations: 5
Runs total: 20
Groups: 10
Min bin n: 20
ICI: enabled
ICI times: 60
ICI df: 4

Variability across repetitions:
 time_months   ici_mean       ici_sd   ici_p025   ici_p975   e50_mean       e50_sd  e50_p025   e50_p975   e90_mean       e90_sd   e90_p025
          12         NA           NA         NA         NA         NA           NA        NA         NA         NA           NA         NA
          36         NA           NA         NA         NA         NA           NA        NA         NA         NA           NA         NA
          60 0.04321168 0.0004084107 0.04256993 0.04430497 0.02732389 0.0002203186 0.0269899 0.02792679 0.09533956 0.0009852984 0.09381361
          96         NA           NA         NA         NA         NA           NA        NA         NA         NA           NA         NA
  e90_p975 emax_mean    emax_sd emax_p025 emax_p975    ece_mean        ece_sd     ece_p025    ece_p975  eo_mean        eo_sd  eo_p025  eo_p975
        NA        NA         NA        NA        NA 0.001005219 0.00009029763 0.0008244139 0.001168472 1.008615 0.0003244808 1.007975 1.009514
        NA        NA         NA        NA        NA 0.002213938 0.00017671999 0.0018275769 0.002518330 1.015327 0.0002537604 1.014855 1.015914
 0.0979637 0.5513428 0.01463271 0.5323835 0.5810292 0.002960441 0.00023512625 0.0025732116 0.003402164 1.028405 0.0002210925 1.027915 1.028752
        NA        NA         NA        NA        NA 0.003036708 0.00040795843 0.0023970221 0.003811732 1.064773 0.0002388191 1.064141 1.065291
 n_runs n_imp
     20     5
     20     5
     20     5
     20     5

Notes:
  - Each repetition uses the same fold split across all imputations.
  - Models are fit separately within each imputation; held-out predictions are averaged across imputations.
  - Thin gray lines are repetition-specific calibration curves.
  - Gray ribbon is the pointwise 2.5th-97.5th percentile band across repetitions.
  - Blue points/line are the mean calibration curve across repetitions.
  - Grouped observed risk is estimated with Kaplan-Meier.
  - ICI, E50, E90, and Emax are from a Cox-based smooth calibration model.
  - These summaries describe split-to-split variability, not formal bootstrap optimism correction.
  - ECE is grouped expected calibration error.
  - E/O is mean predicted / observed probability.

Code

cox.zph(coxph(formula_shap_death, data= py_corrected_datasets[[1]]))

                                        chisq df        p
adm_age_rec3                            0.510  1  0.47509
primary_sub_mod_cocaine_paste           1.640  1  0.20030
prim_sub_freq_rec_2_2_6_days_wk         1.000  1  0.31738
prim_sub_freq_rec_3_daily               1.750  1  0.18593
occupation_condition_corr24_unemployed  1.426  1  0.23238
any_phys_dx                            15.394  1 0.000087
eva_ocupacion_logro_intermedio          0.540  1  0.46247
eva_ocupacion_logro_minimo              3.688  1  0.05481
first_sub_used_alcohol                  0.510  1  0.47531
sex_rec_woman                           0.422  1  0.51597
eva_fisica_logro_intermedio             2.624  1  0.10526
eva_fisica_logro_minimo                11.163  1  0.00083
GLOBAL                                 33.829 12  0.00072

Code

cox.zph(coxph(formula_shap_readmit_clean, data= py_corrected_datasets[[1]]))

                                                         chisq df       p
primary_sub_mod_cocaine_paste                           0.1033  1 0.74795
adm_age_rec3                                            5.5339  1 0.01865
porc_pobr                                               0.4708  1 0.49262
sex_rec_woman                                           1.0806  1 0.29857
ethnicity                                               4.3431  1 0.03716
dit_m                                                  21.5576  1 3.4e-06
eva_consumo_logro_intermedio                            1.5075  1 0.21953
eva_consumo_logro_minimo                                6.2784  1 0.01222
ed_attainment_corr_2_completed_high_school_or_less      8.0121  1 0.00465
ed_attainment_corr_3_completed_primary_school_or_less  11.6986  1 0.00063
sub_dep_icd10_status_drug_dependence                    2.5565  1 0.10984
occupation_condition_corr24_unemployed                  0.0845  1 0.77127
polysubstance_strict                                   18.5106  1 1.7e-05
primary_sub_mod_cocaine_powder                          0.0515  1 0.82041
evaluacindelprocesoteraputico_logro_intermedio          3.6610  1 0.05570
dg_psiq_cie_10_dg                                       0.2978  1 0.58529
tr_outcome_adm_discharge_rule_violation_undet          58.6456  1 1.9e-14
eva_sm_logro_intermedio                                 2.0845  1 0.14880
eva_sm_logro_minimo                                     7.1323  1 0.00757
prim_sub_freq_rec_2_2_6_days_wk                         2.8902  1 0.08912
prim_sub_freq_rec_3_daily                               4.5277  1 0.03335
primary_sub_mod_alcohol                                 0.2439  1 0.62138
GLOBAL                                                142.7928 22 < 2e-16

Code

# Residuals: discharge to readmission
res <- resid(coxph(formula_shap_readmit_clean, data= py_corrected_datasets[[1]]), "scaledsch")
time <- as.numeric(dimnames(res)[[1]])
z2 <- loess(res[,"tr_outcome_adm_discharge_rule_violation_undet"] ~ time, span=0.50)

plot(time, fitted(z2), type="l", col="blue",
     main="Sch. res. – Completion (Violation of tules of the center)",
     xlab="Time", ylab="Residuals")
lines(supsmu(time, res[,"tr_outcome_adm_discharge_rule_violation_undet"]), lty=2, col="darkgray")

formula_shap_readmit_clean_updated <- update(
  formula_shap_readmit_clean,
  . ~ . - tr_outcome_adm_discharge_rule_violation_undet + strat(tr_outcome_adm_discharge_rule_violation_undet)
)

Code

formula_shap_readmit_clean_updated

Surv(readmit_time_from_disch_m, readmit_event) ~ primary_sub_mod_cocaine_paste + 
    adm_age_rec3 + porc_pobr + sex_rec_woman + ethnicity + dit_m + 
    eva_consumo_logro_intermedio + eva_consumo_logro_minimo + 
    ed_attainment_corr_2_completed_high_school_or_less + ed_attainment_corr_3_completed_primary_school_or_less + 
    sub_dep_icd10_status_drug_dependence + occupation_condition_corr24_unemployed + 
    polysubstance_strict + primary_sub_mod_cocaine_powder + evaluacindelprocesoteraputico_logro_intermedio + 
    dg_psiq_cie_10_dg + eva_sm_logro_intermedio + eva_sm_logro_minimo + 
    prim_sub_freq_rec_2_2_6_days_wk + prim_sub_freq_rec_3_daily + 
    primary_sub_mod_alcohol + strata(plan_type_strata) + strat(tr_outcome_adm_discharge_rule_violation_undet)

Code

fit <- coxph(formula_shap_readmit_clean, data = py_corrected_datasets[[1]])
zph <- cox.zph(fit, transform = "km")  # or "rank", "identity"

# Plot with confidence bands
plot(zph, var = "tr_outcome_adm_discharge_rule_violation_undet", 
     main = "Schoenfeld Residuals - Violation of Rules")

Code

#Function to print results in a nice format
source(paste0(project_root,"/cons/_hist_scripts/print_results_readmit_shap.R"))

#prs(results_readmit_shap)

print_results_readmit_shap.R loaded. Functions: print_pooled_results, view_pooled_results, prs

============================================================
FINAL POOLED RESULTS (Cross-Validated + Multiply Imputed)
============================================================

Global Evaluation Time (tau): 99.1 months

Pooled Uno C-Index: 0.6058 (repeat-quantile interval 0.6056 - 0.6061 )
Pooled UnoC (survAUC::UnoC): 0.6784 (repeat-quantile interval 0.6473 - 0.6962 )
Pooled IBS: 0.1602 (repeat-quantile interval 0.1601 - 0.1602 )
Pooled IBS (IPCW-train): 0.3481 (repeat-quantile interval 0.3479 - 0.3483 )

RMST-based Uno C-index by horizon:
  t=12 m: C=0.6294 (repeat-quantile interval 0.6291-0.6296, valid repeats=20)
  t=36 m: C=0.6135 (repeat-quantile interval 0.6133-0.6138, valid repeats=20)
  t=60 m: C=0.6087 (repeat-quantile interval 0.6085-0.6090, valid repeats=20)
============================================================

Additional strata to readmission

Code

gc()

results_readmit_shap2 <- evaluate_mi_cv_safe_strata(
  formula = as.formula(
  gsub(
    "strat\\(",
    "strata(",
    paste(deparse(formula_shap_readmit_clean_updated), collapse = " ")
  )
), 
  imputed_list = py_corrected_datasets, 
  time_col = "readmit_time_from_disch_m", 
  event_col = "readmit_event",
  k = 5,
  n_repeats = 20
)

Starting Evaluation: 5 Imputations x 5 Folds x 20 Repeats = 500 Total Models
------------------------------------------------------------
Global evaluation tau: 99.10 months | C-tau: 94.14 months
Booting up 16 CPU cores using future backend...

============================================================
FINAL POOLED RESULTS (Cross-Validated + Multiply Imputed)
============================================================
Global Evaluation Time (tau): 99.10 months
Pooled Uno's C-Index: 0.6058 (repeat-quantile interval 0.6056-0.6061)
Pooled UnoC (survAUC::UnoC): 0.6784 (repeat-quantile interval 0.6473-0.6962)
Pooled IBS: 0.1602 (repeat-quantile interval 0.1601-0.1602)
Pooled IBS (IPCW-train): 0.3481 (repeat-quantile interval 0.3479-0.3483)
RMST-based Uno C-index by horizon:
  t=12 m: C=0.6294 (repeat-quantile interval 0.6291-0.6296, valid repeats=20)
  t=36 m: C=0.6135 (repeat-quantile interval 0.6133-0.6138, valid repeats=20)
  t=60 m: C=0.6087 (repeat-quantile interval 0.6085-0.6090, valid repeats=20)
============================================================

Total runtime: 887.72 sec (14.80 min)
Starting Evaluation: 5 Imputations x 5 Folds x 20 Repeats = 500 Total Models
------------------------------------------------------------
Global evaluation tau: 107.94 months | C-tau: 102.54 months
Booting up 16 CPU cores using future backend...

============================================================
FINAL POOLED RESULTS (Cross-Validated + Multiply Imputed)
============================================================
Global Evaluation Time (tau): 107.94 months
Pooled Uno's C-Index: 0.7365 (repeat-quantile interval 0.7360-0.7369)
Pooled UnoC (survAUC::UnoC): 0.8068 (repeat-quantile interval 0.7240-0.8582)
Pooled IBS: 0.0356 (repeat-quantile interval 0.0356-0.0356)
Pooled IBS (IPCW-train): 0.0903 (repeat-quantile interval 0.0902-0.0905)
RMST-based Uno C-index by horizon:
  t=12 m: C=0.7377 (repeat-quantile interval 0.7362-0.7390, valid repeats=20)
  t=36 m: C=0.7507 (repeat-quantile interval 0.7502-0.7513, valid repeats=20)
  t=60 m: C=0.7503 (repeat-quantile interval 0.7498-0.7509, valid repeats=20)
============================================================

Total runtime: 802.76 sec (13.38 min)

Warning message:
In formula.character(object, env = baseenv()) :
  Using formula(x) is deprecated when x is a character vector of length > 1.
  Consider formula(paste(x, collapse = " ")) instead.

Code

results_readmit_shap_ibs <- cph_evaluate_boot_oob_mi_corrected(
  formula = as.formula(gsub("strat\\(", "strata(", deparse(formula_shap_readmit_clean_updated))),
  imputed_list = py_corrected_datasets, # List of data.frames
  time_col = "readmit_time_from_disch_m", 
  event_col = "readmit_event",
  tau = 99,                # Evaluate up to 60 months
  B = 500,
  cpus = cpus,
  verbose = TRUE,
  min_strata_events = 1
)

aicc_readmit_shap <- compute_mi_aicc(as.formula(gsub("strat\\(", "strata(", deparse(formula_shap_readmit_clean_updated))), py_corrected_datasets)
cat(sprintf("\nDifference in Mean AIC: %.2f\n", aicc_readmit_shap$mean_aic - aicc_readmit_upd2$mean_aic))

Strata variables: plan_type_strata, tr_outcome_adm_discharge_rule_violation_undet
Categorical vars: plan_type_strata, tr_outcome_adm_discharge_rule_violation_undet
Bootstrap OOB + MI: B=500, M=5, tau=99.00
Using 8 worker(s)...

============================================================
FINAL POOLED RESULTS (OOB Bootstrap + MI)
============================================================
Pooled OOB IBS: 0.3479 (95% CI: 0.3357 - 0.3609)
Valid bootstrap replicates: 500 / 500
============================================================

Warning message:
In formula.character(object, env = baseenv()) :
  Using formula(x) is deprecated when x is a character vector of length > 1.
  Consider formula(paste(x, collapse = " ")) instead.

Strata variables: plan_type_strata
Categorical vars: plan_type_strata
Bootstrap OOB + MI: B=500, M=5, tau=107.00
Using 8 worker(s)...

============================================================
FINAL POOLED RESULTS (OOB Bootstrap + MI)
============================================================
Pooled OOB IBS: 0.0903 (95% CI: 0.0837 - 0.0974)
Valid bootstrap replicates: 500 / 500
============================================================

Warning message:
In formula.character(object, env = baseenv()) :
  Using formula(x) is deprecated when x is a character vector of length > 1.
  Consider formula(paste(x, collapse = " ")) instead.


======================================================
AICc ANALYSIS FOR MULTIPLY IMPUTED DATA
========================================================
Mean AIC:  272588.32
Mean AICc: 272588.38
Correction added (Penalty): 0.060690
Parameters (k): 21 | Effective Sample Size (Events): 15247
========================================================

Warning message:
In formula.character(object, env = baseenv()) :
  Using formula(x) is deprecated when x is a character vector of length > 1.
  Consider formula(paste(x, collapse = " ")) instead.


Difference in Mean AIC: 39.23

======================================================
AICc ANALYSIS FOR MULTIPLY IMPUTED DATA
========================================================
Mean AIC:  54321.89
Mean AICc: 54321.99
Correction added (Penalty): 0.103106
Parameters (k): 12 | Effective Sample Size (Events): 3039
========================================================

Warning message:
In formula.character(object, env = baseenv()) :
  Using formula(x) is deprecated when x is a character vector of length > 1.
  Consider formula(paste(x, collapse = " ")) instead.


Difference in Mean AIC: 2825.75

Code

compare_mi_models_d1(formula_shap_readmit_clean_updated,formula_shap_readmit_clean, py_corrected_datasets)

Fitting the FULL model on 5 imputations...
Fitting the REDUCED model on 5 imputations...
Executing the Wald Test (D1)...

============================================================
MODEL COMPARISON RESULT (D1 Statistic)
============================================================
   test statistic df1 df2 dfcom     p.value           riv
 1 ~~ 2  47.39744   1   4 15225 0.002332909 0.00001724682
============================================================

Code

cal_readmit_shap2<- 
  calibrate_cph_grouped(
    formula = as.formula(gsub("strat\\(", "strata(", deparse(formula_shap_readmit_clean_updated))),  # Original formula with months variable
    data = py_corrected_datasets, # Original data (months)
    times = c(12, 36, 60, 96),
    seed = 2125,
    validation = "cv",
    n_repeats = 20,
    n_imputations = 5,
    compute_ici = TRUE,
    ici_times = 60,
    make_plots = TRUE
  )
print(cal_readmit_shap2)

Detected time variable: readmit_time_from_disch_m
Detected event variable: readmit_event
Using 5 imputed dataset(s).
Validation: cv
Repetitions: 20 | same fold split across imputations within each repetition.
ICI: enabled at horizons 60.
Predicting repetition 1/20... done
Predicting repetition 2/20... done
Predicting repetition 3/20... done
Predicting repetition 4/20... done
Predicting repetition 5/20... done
Predicting repetition 6/20... done
Predicting repetition 7/20... done
Predicting repetition 8/20... done
Predicting repetition 9/20... done
Predicting repetition 10/20... done
Predicting repetition 11/20... done
Predicting repetition 12/20... done
Predicting repetition 13/20... done
Predicting repetition 14/20... done
Predicting repetition 15/20... done
Predicting repetition 16/20... done
Predicting repetition 17/20... done
Predicting repetition 18/20... done
Predicting repetition 19/20... done
Predicting repetition 20/20... done
Calibrating repetition 1/20... done
Calibrating repetition 2/20... done
Calibrating repetition 3/20... done
Calibrating repetition 4/20... done
Calibrating repetition 5/20... done
Calibrating repetition 6/20... done
Calibrating repetition 7/20... done
Calibrating repetition 8/20... done
Calibrating repetition 9/20... done
Calibrating repetition 10/20... done
Calibrating repetition 11/20... done
Calibrating repetition 12/20... done
Calibrating repetition 13/20... done
Calibrating repetition 14/20... done
Calibrating repetition 15/20... done
Calibrating repetition 16/20... done
Calibrating repetition 17/20... done
Calibrating repetition 18/20... done
Calibrating repetition 19/20... done
Calibrating repetition 20/20... done

Warning message:
In formula.character(object, env = baseenv()) :
  Using formula(x) is deprecated when x is a character vector of length > 1.
  Consider formula(paste(x, collapse = " ")) instead.

Cox Death Calibration Analysis
==============================
Time variable: readmit_time_from_disch_m
Event variable: readmit_event
Validation: cv
Folds: 5
Repetitions: 20
Imputations: 5
Runs total: 20
Groups: 10
Min bin n: 20
ICI: enabled
ICI times: 60
ICI df: 4

Variability across repetitions:
 time_months  ici_mean      ici_sd  ici_p025  ici_p975  e50_mean      e50_sd  e50_p025  e50_p975  e90_mean      e90_sd  e90_p025  e90_p975
          12        NA          NA        NA        NA        NA          NA        NA        NA        NA          NA        NA        NA
          36        NA          NA        NA        NA        NA          NA        NA        NA        NA          NA        NA        NA
          60 0.1827935 0.001649309 0.1800823 0.1866863 0.1745213 0.001552957 0.1719348 0.1781487 0.2578218 0.002438123 0.2536995 0.2633381
          96        NA          NA        NA        NA        NA          NA        NA        NA        NA          NA        NA        NA
 emax_mean    emax_sd emax_p025 emax_p975    ece_mean       ece_sd    ece_p025    ece_p975   eo_mean         eo_sd   eo_p025   eo_p975 n_runs
        NA         NA        NA        NA 0.003444884 0.0003839767 0.002823730 0.004137820 0.9906404 0.00010952313 0.9904459 0.9908274     20
        NA         NA        NA        NA 0.005489165 0.0005031797 0.004728414 0.006648987 0.9883615 0.00007827687 0.9882534 0.9884980     20
 0.4944125 0.01017298 0.4733057 0.5153062 0.006566355 0.0006383476 0.005412974 0.008044239 0.9846647 0.00006777110 0.9845880 0.9848005     20
        NA         NA        NA        NA 0.006415337 0.0008763259 0.004597261 0.007859738 0.9771143 0.00008475560 0.9769940 0.9772526     20
 n_imp
     5
     5
     5
     5

Notes:
  - Each repetition uses the same fold split across all imputations.
  - Models are fit separately within each imputation; held-out predictions are averaged across imputations.
  - Thin gray lines are repetition-specific calibration curves.
  - Gray ribbon is the pointwise 2.5th-97.5th percentile band across repetitions.
  - Blue points/line are the mean calibration curve across repetitions.
  - Grouped observed risk is estimated with Kaplan-Meier.
  - ICI, E50, E90, and Emax are from a Cox-based smooth calibration model.
  - These summaries describe split-to-split variability, not formal bootstrap optimism correction.
  - ECE is grouped expected calibration error.
  - E/O is mean predicted / observed probability.
Detected time variable: death_time_from_disch_m
Detected event variable: death_event
Using 5 imputed dataset(s).
Validation: cv
Repetitions: 20 | same fold split across imputations within each repetition.
ICI: enabled at horizons 60.
Predicting repetition 1/20... done
Predicting repetition 2/20... done
Predicting repetition 3/20... done
Predicting repetition 4/20... done
Predicting repetition 5/20... done
Predicting repetition 6/20... done
Predicting repetition 7/20... done
Predicting repetition 8/20... done
Predicting repetition 9/20... done
Predicting repetition 10/20... done
Predicting repetition 11/20... done
Predicting repetition 12/20... done
Predicting repetition 13/20... done
Predicting repetition 14/20... done
Predicting repetition 15/20... done
Predicting repetition 16/20... done
Predicting repetition 17/20... done
Predicting repetition 18/20... done
Predicting repetition 19/20... done
Predicting repetition 20/20... done
Calibrating repetition 1/20... done
Calibrating repetition 2/20... done
Calibrating repetition 3/20... done
Calibrating repetition 4/20... done
Calibrating repetition 5/20... done
Calibrating repetition 6/20... done
Calibrating repetition 7/20... done
Calibrating repetition 8/20... done
Calibrating repetition 9/20... done
Calibrating repetition 10/20... done
Calibrating repetition 11/20... done
Calibrating repetition 12/20... done
Calibrating repetition 13/20... done
Calibrating repetition 14/20... done
Calibrating repetition 15/20... done
Calibrating repetition 16/20... done
Calibrating repetition 17/20... done
Calibrating repetition 18/20... done
Calibrating repetition 19/20... done
Calibrating repetition 20/20... done

Warning message:
In formula.character(object, env = baseenv()) :
  Using formula(x) is deprecated when x is a character vector of length > 1.
  Consider formula(paste(x, collapse = " ")) instead.

Cox Death Calibration Analysis
==============================
Time variable: death_time_from_disch_m
Event variable: death_event
Validation: cv
Folds: 5
Repetitions: 20
Imputations: 5
Runs total: 20
Groups: 10
Min bin n: 20
ICI: enabled
ICI times: 60
ICI df: 4

Variability across repetitions:
 time_months   ici_mean       ici_sd   ici_p025   ici_p975   e50_mean       e50_sd  e50_p025   e50_p975   e90_mean       e90_sd   e90_p025
          12         NA           NA         NA         NA         NA           NA        NA         NA         NA           NA         NA
          36         NA           NA         NA         NA         NA           NA        NA         NA         NA           NA         NA
          60 0.04321168 0.0004084107 0.04256993 0.04430497 0.02732389 0.0002203186 0.0269899 0.02792679 0.09533956 0.0009852984 0.09381361
          96         NA           NA         NA         NA         NA           NA        NA         NA         NA           NA         NA
  e90_p975 emax_mean    emax_sd emax_p025 emax_p975    ece_mean        ece_sd     ece_p025    ece_p975  eo_mean        eo_sd  eo_p025  eo_p975
        NA        NA         NA        NA        NA 0.001005219 0.00009029763 0.0008244139 0.001168472 1.008615 0.0003244808 1.007975 1.009514
        NA        NA         NA        NA        NA 0.002213938 0.00017671999 0.0018275769 0.002518330 1.015327 0.0002537604 1.014855 1.015914
 0.0979637 0.5513428 0.01463271 0.5323835 0.5810292 0.002960441 0.00023512625 0.0025732116 0.003402164 1.028405 0.0002210925 1.027915 1.028752
        NA        NA         NA        NA        NA 0.003036708 0.00040795843 0.0023970221 0.003811732 1.064773 0.0002388191 1.064141 1.065291
 n_runs n_imp
     20     5
     20     5
     20     5
     20     5

Notes:
  - Each repetition uses the same fold split across all imputations.
  - Models are fit separately within each imputation; held-out predictions are averaged across imputations.
  - Thin gray lines are repetition-specific calibration curves.
  - Gray ribbon is the pointwise 2.5th-97.5th percentile band across repetitions.
  - Blue points/line are the mean calibration curve across repetitions.
  - Grouped observed risk is estimated with Kaplan-Meier.
  - ICI, E50, E90, and Emax are from a Cox-based smooth calibration model.
  - These summaries describe split-to-split variability, not formal bootstrap optimism correction.
  - ECE is grouped expected calibration error.
  - E/O is mean predicted / observed probability.

With stratification, individuals in different strata have different baseline hazards, and therefore different predicted survival probabilities. This creates some discrimination purely from the stratum membership.

For the Wald test (𝐷1) to be valid, the ‘Reduced’ model must be a perfect subset of the ‘Full’ model. You cannot change the underlying structure (strata).

Interactions

Code

global_df <- read_csv(
  find_latest_file(
    file.path(project_root, "cons", "_out"),
    "xgb11_dual_interactions_global_top_enriched"),
  show_col_types = FALSE
)
time_dep_df <- read_csv(
  find_latest_file(
    file.path(project_root, "cons", "_out"), 
    "xgb11_dual_interactions_time_dependent"),
  show_col_types = FALSE
)

# 2. Merge on unique keys
merged_df <- global_df %>%
  left_join(time_dep_df, by = c("outcome", "main_feature", "interactor"))

# 3. Apply the Effect-Size & Stability Rule
cox_discriminative_interactions <- merged_df %>%
  dplyr::filter(
    # Ignore FDR completely. Use joint clinical/linear magnitude:
    abs_corr >= 0.10,
    !is.na(smd_binarized) & abs(smd_binarized) >= 0.15,
    
    # Enforce standard Cox Proportional Hazards assumptions:
    time_dependent_flag == FALSE,
    direction_flip_flag == FALSE
  ) %>%
  dplyr::select(
    outcome, 
    main_feature, 
    interactor, 
    abs_corr, 
    smd_binarized, 
    direction_q4_q1
  ) %>%
  dplyr::arrange(outcome, desc(abs_corr))

# Print results
cat("Number of highly discriminative candidates for Cox model:", nrow(cox_discriminative_interactions), "\n")
knitr::kable(cox_discriminative_interactions, "markdown", caption = "Highly discriminative interactions for Cox models based on Effect-Size & Stability Rule")

Number of highly discriminative candidates for Cox model: 18



Table: Highly discriminative interactions for Cox models based on Effect-Size & Stability Rule

|outcome     |main_feature                           |interactor                     |  abs_corr| smd_binarized|direction_q4_q1 |
|:-----------|:--------------------------------------|:------------------------------|---------:|-------------:|:---------------|
|Death       |primary_sub_mod_cocaine_paste          |primary_sub_mod_cocaine_powder | 0.3565791|    -0.9621838|Negative        |
|Death       |primary_sub_mod_cocaine_paste          |primary_sub_mod_alcohol        | 0.3000330|     0.6639645|Positive        |
|Death       |any_phys_dx                            |primary_sub_mod_alcohol        | 0.1516792|    -0.3239456|Negative        |
|Death       |primary_sub_mod_alcohol                |adm_age_rec3                   | 0.1455185|    -0.2719126|Negative        |
|Death       |cohabitation_with_couple_children      |adm_age_rec3                   | 0.1273963|     0.2131233|Positive        |
|Death       |first_sub_used_alcohol                 |primary_sub_mod_alcohol        | 0.1188216|    -0.2526242|Negative        |
|Death       |primary_sub_mod_cocaine_paste          |polysubstance_strict           | 0.1151229|    -0.2614787|Negative        |
|Death       |dit_m                                  |evaluacindelprocesoteraputico  | 0.1088313|     0.3241464|Positive        |
|Death       |first_sub_used_alcohol                 |adm_age_rec3                   | 0.1074641|    -0.1882724|Negative        |
|Death       |first_sub_used_alcohol                 |polysubstance_strict           | 0.1066656|     0.2420396|Positive        |
|Death       |occupation_condition_corr24_unemployed |adm_age_rec3                   | 0.1051251|     0.1814427|Positive        |
|Death       |dit_m                                  |eva_sm                         | 0.1033194|     0.3033179|Positive        |
|Death       |cohabitation_with_couple_children      |cohabitation_family_of_origin  | 0.1030656|    -0.2146514|Negative        |
|Death       |sex_rec_woman                          |plan_type_corr_m_pr            | 0.1017854|     0.5214039|Positive        |
|Death       |any_phys_dx                            |first_sub_used_alcohol         | 0.1006109|    -0.2015981|Negative        |
|Readmission |eva_consumo                            |tr_outcome_referral            | 0.1431817|     0.4541617|Positive        |
|Readmission |sex_rec_woman                          |primary_sub_mod_others         | 0.1187151|    -0.8945712|Negative        |
|Readmission |eva_consumo                            |plan_type_corr_pg_pr           | 0.1046228|     0.3365649|Positive        |

Code

formula_shap_readmit_interact <- stats::update(
  formula_shap_readmit_clean_updated,
  . ~ . +
    tr_outcome_referral +
    primary_sub_mod_cocaine_paste:polysubstance_strict +
    eva_consumo_logro_intermedio:tr_outcome_referral +
    eva_consumo_logro_minimo:tr_outcome_referral
)
formula_shap_death_interact <- stats::update(
  formula_shap_death,
  . ~ . +
    polysubstance_strict +
    cohabitation_with_couple_children +
    occupation_condition_corr24_inactive +
    primary_sub_mod_cocaine_paste:polysubstance_strict +
    cohabitation_with_couple_children:adm_age_rec3 +
    occupation_condition_corr24_unemployed:adm_age_rec3 +
    sex_rec_woman:occupation_condition_corr24_inactive +
    primary_sub_mod_cocaine_paste:first_sub_used_alcohol
)
# Convertir a fórmula
formula_shap_readmit_interact <- as.formula(gsub("strat\\(", "strata(", paste(deparse(formula_shap_readmit_interact), collapse = " ")))
formula_shap_death_interact <- as.formula(
  gsub("strat\\(", "strata(", 
       paste(deparse(formula_shap_death_interact), collapse = " "))
)
results_readmit_int_shap <- evaluate_mi_cv_safe_strata(
    formula = formula_shap_readmit_interact, 
    imputed_list = py_corrected_datasets, 
    time_col = "readmit_time_from_disch_m", 
    event_col = "readmit_event",
    k = 5,
    n_repeats = 20
)
results_death_int_shap <- evaluate_mi_cv_safe_strata(
    formula = formula_shap_death_interact, 
    imputed_list = py_corrected_datasets, 
    time_col = "death_time_from_disch_m", 
    event_col = "death_event",
    k = 5,
    n_repeats = 20
)

Starting Evaluation: 5 Imputations x 5 Folds x 20 Repeats = 500 Total Models
------------------------------------------------------------
Global evaluation tau: 99.10 months | C-tau: 94.14 months
Booting up 16 CPU cores using future backend...

============================================================
FINAL POOLED RESULTS (Cross-Validated + Multiply Imputed)
============================================================
Global Evaluation Time (tau): 99.10 months
Pooled Uno's C-Index: 0.6069 (repeat-quantile interval 0.6067-0.6071)
Pooled UnoC (survAUC::UnoC): 0.6776 (repeat-quantile interval 0.6462-0.6958)
Pooled IBS: 0.1601 (repeat-quantile interval 0.1601-0.1602)
Pooled IBS (IPCW-train): 0.3478 (repeat-quantile interval 0.3476-0.3480)
RMST-based Uno C-index by horizon:
  t=12 m: C=0.6306 (repeat-quantile interval 0.6303-0.6308, valid repeats=20)
  t=36 m: C=0.6145 (repeat-quantile interval 0.6143-0.6147, valid repeats=20)
  t=60 m: C=0.6098 (repeat-quantile interval 0.6096-0.6101, valid repeats=20)
============================================================

Total runtime: 874.01 sec (14.57 min)
Starting Evaluation: 5 Imputations x 5 Folds x 20 Repeats = 500 Total Models
------------------------------------------------------------
Global evaluation tau: 107.94 months | C-tau: 102.54 months
Booting up 16 CPU cores using future backend...

============================================================
FINAL POOLED RESULTS (Cross-Validated + Multiply Imputed)
============================================================
Global Evaluation Time (tau): 107.94 months
Pooled Uno's C-Index: 0.7395 (repeat-quantile interval 0.7390-0.7401)
Pooled UnoC (survAUC::UnoC): 0.8185 (repeat-quantile interval 0.7371-0.8696)
Pooled IBS: 0.0352 (repeat-quantile interval 0.0352-0.0352)
Pooled IBS (IPCW-train): 0.0893 (repeat-quantile interval 0.0892-0.0895)
RMST-based Uno C-index by horizon:
  t=12 m: C=0.7424 (repeat-quantile interval 0.7410-0.7434, valid repeats=20)
  t=36 m: C=0.7548 (repeat-quantile interval 0.7541-0.7556, valid repeats=20)
  t=60 m: C=0.7549 (repeat-quantile interval 0.7541-0.7555, valid repeats=20)
============================================================

Total runtime: 763.78 sec (12.73 min)

Code

results_readmit_shap_int_ibs <- cph_evaluate_boot_oob_mi_corrected(
    formula = formula_shap_readmit_interact,
    imputed_list = py_corrected_datasets, # List of data.frames
    time_col = "readmit_time_from_disch_m", 
    event_col = "readmit_event",
    tau = 99,                # Evaluate up to 60 months
    B = 500,
    cpus = cpus,
    verbose = TRUE,
    min_strata_events = 1
)
results_death_shap_int_ibs <- cph_evaluate_boot_oob_mi_corrected(
    formula = formula_shap_death_interact,
    imputed_list = py_corrected_datasets, # List of data.frames
    time_col = "death_time_from_disch_m", 
    event_col = "death_event",
    tau = 107,                # Evaluate up to 60 months
    B = 500,
    cpus = cpus,
    verbose = TRUE,
    min_strata_events = 1
)
# ====
aicc_readmit_shap_int <- compute_mi_aicc(formula_shap_readmit_interact, py_corrected_datasets)
aicc_readmit_shap_int
aicc_death_shap_int <- compute_mi_aicc(formula_shap_death_interact, py_corrected_datasets)
aicc_death_shap_int

Strata variables: plan_type_strata, tr_outcome_adm_discharge_rule_violation_undet
Categorical vars: plan_type_strata, tr_outcome_adm_discharge_rule_violation_undet
Bootstrap OOB + MI: B=500, M=5, tau=99.00
Using 8 worker(s)...

============================================================
FINAL POOLED RESULTS (OOB Bootstrap + MI)
============================================================
Pooled OOB IBS: 0.3476 (95% CI: 0.3357 - 0.3607)
Valid bootstrap replicates: 500 / 500
============================================================

Warning messages:
1: In fitter(X, Y, strata = Strata, offset = offset, weights = weights,  :
  Loglik converged before variable  24 ; coefficient may be infinite. 
2: In fitter(X, Y, strata = Strata, offset = offset, weights = weights,  :
  Loglik converged before variable  25 ; coefficient may be infinite. 
3: In fitter(X, Y, strata = Strata, offset = offset, weights = weights,  :
  Loglik converged before variable  25 ; coefficient may be infinite. 
4: In fitter(X, Y, strata = Strata, offset = offset, weights = weights,  :
  Loglik converged before variable  25 ; coefficient may be infinite. 
5: In fitter(X, Y, strata = Strata, offset = offset, weights = weights,  :
  Loglik converged before variable  24 ; coefficient may be infinite. 
6: In fitter(X, Y, strata = Strata, offset = offset, weights = weights,  :
  Loglik converged before variable  24 ; coefficient may be infinite.

Strata variables: plan_type_strata
Categorical vars: plan_type_strata
Bootstrap OOB + MI: B=500, M=5, tau=107.00
Using 8 worker(s)...

============================================================
FINAL POOLED RESULTS (OOB Bootstrap + MI)
============================================================
Pooled OOB IBS: 0.0893 (95% CI: 0.0828 - 0.0964)
Valid bootstrap replicates: 500 / 500
============================================================

Warning messages:
1: In fitter(X, Y, strata = Strata, offset = offset, weights = weights,  :
  Loglik converged before variable  20 ; coefficient may be infinite. 
2: In fitter(X, Y, strata = Strata, offset = offset, weights = weights,  :
  Loglik converged before variable  14 ; coefficient may be infinite. 
3: In fitter(X, Y, strata = Strata, offset = offset, weights = weights,  :
  Loglik converged before variable  19 ; coefficient may be infinite. 
4: In fitter(X, Y, strata = Strata, offset = offset, weights = weights,  :
  Loglik converged before variable  19 ; coefficient may be infinite. 
5: In fitter(X, Y, strata = Strata, offset = offset, weights = weights,  :
  Loglik converged before variable  19 ; coefficient may be infinite. 
6: In fitter(X, Y, strata = Strata, offset = offset, weights = weights,  :
  Loglik converged before variable  19 ; coefficient may be infinite. 
7: In fitter(X, Y, strata = Strata, offset = offset, weights = weights,  :
  Loglik converged before variable  19 ; coefficient may be infinite. 
8: In fitter(X, Y, strata = Strata, offset = offset, weights = weights,  :
  Loglik converged before variable  14 ; coefficient may be infinite.


======================================================
AICc ANALYSIS FOR MULTIPLY IMPUTED DATA
========================================================
Mean AIC:  272559.54
Mean AICc: 272559.63
Correction added (Penalty): 0.085408
Parameters (k): 25 | Effective Sample Size (Events): 15247
========================================================

======================================================
AICc ANALYSIS FOR MULTIPLY IMPUTED DATA
========================================================
Mean AIC:  54186.03
Mean AICc: 54186.30
Correction added (Penalty): 0.278330
Parameters (k): 20 | Effective Sample Size (Events): 3039
========================================================

$mean_aicc
[1] 54186.3

$full_results
       aic correction     aicc  k n_events
1 54174.44    0.27833 54174.72 20     3039
2 54196.28    0.27833 54196.56 20     3039
3 54192.15    0.27833 54192.43 20     3039
4 54187.93    0.27833 54188.21 20     3039
5 54179.33    0.27833 54179.61 20     3039

Code

compare_mi_models_d1(formula_shap_readmit_interact, formula_shap_readmit_clean_updated, py_corrected_datasets)
compare_mi_models_d1(formula_shap_death_interact, formula_shap_death, py_corrected_datasets)

Fitting the FULL model on 5 imputations...
Fitting the REDUCED model on 5 imputations...
Executing the Wald Test (D1)...

============================================================
MODEL COMPARISON RESULT (D1 Statistic)
============================================================
   test statistic df1      df2 dfcom      p.value          riv
 1 ~~ 2  9.437032   4 15219.27 15222 1.289769e-07 0.0001703197
============================================================
Fitting the FULL model on 5 imputations...
Fitting the REDUCED model on 5 imputations...
Executing the Wald Test (D1)...

============================================================
MODEL COMPARISON RESULT (D1 Statistic)
============================================================
   test statistic df1      df2 dfcom      p.value        riv
 1 ~~ 2  18.53821   8 2464.506  3019 3.187697e-27 0.04475468
============================================================

Code

cal_readmit_shap_int<- 
    calibrate_cph_grouped(
    formula = formula_shap_readmit_interact,
    data = py_corrected_datasets, # Original data (months)
    times = c(12, 36, 60, 96),
    seed = 2125,
    validation = "cv",
    n_repeats = 20,
    n_imputations = 5,
    compute_ici = TRUE,
    ici_times = 60,
    make_plots = TRUE
  )
print(cal_readmit_shap_int)
cal_death_shap_int<- 
    calibrate_cph_grouped(
    formula = formula_shap_death_interact,
    data = py_corrected_datasets, # Original data (months)
    times = c(12, 36, 60, 96),
    seed = 2125,
    validation = "cv",
    n_repeats = 20,
    n_imputations = 5,
    compute_ici = TRUE,
    ici_times = 60,
    make_plots = TRUE
  )
print(cal_death_shap_int)

Detected time variable: readmit_time_from_disch_m
Detected event variable: readmit_event
Using 5 imputed dataset(s).
Validation: cv
Repetitions: 20 | same fold split across imputations within each repetition.
ICI: enabled at horizons 60.
Predicting repetition 1/20... done
Predicting repetition 2/20... done
Predicting repetition 3/20... done
Predicting repetition 4/20... done
Predicting repetition 5/20... done
Predicting repetition 6/20... done
Predicting repetition 7/20... done
Predicting repetition 8/20... done
Predicting repetition 9/20... done
Predicting repetition 10/20... done
Predicting repetition 11/20... done
Predicting repetition 12/20... done
Predicting repetition 13/20... done
Predicting repetition 14/20... done
Predicting repetition 15/20... done
Predicting repetition 16/20... done
Predicting repetition 17/20... done
Predicting repetition 18/20... done
Predicting repetition 19/20... done
Predicting repetition 20/20... done
Calibrating repetition 1/20... done
Calibrating repetition 2/20... done
Calibrating repetition 3/20... done
Calibrating repetition 4/20... done
Calibrating repetition 5/20... done
Calibrating repetition 6/20... done
Calibrating repetition 7/20... done
Calibrating repetition 8/20... done
Calibrating repetition 9/20... done
Calibrating repetition 10/20... done
Calibrating repetition 11/20... done
Calibrating repetition 12/20... done
Calibrating repetition 13/20... done
Calibrating repetition 14/20... done
Calibrating repetition 15/20... done
Calibrating repetition 16/20... done
Calibrating repetition 17/20... done
Calibrating repetition 18/20... done
Calibrating repetition 19/20... done
Calibrating repetition 20/20... done
Cox Death Calibration Analysis
==============================
Time variable: readmit_time_from_disch_m
Event variable: readmit_event
Validation: cv
Folds: 5
Repetitions: 20
Imputations: 5
Runs total: 20
Groups: 10
Min bin n: 20
ICI: enabled
ICI times: 60
ICI df: 4

Variability across repetitions:
 time_months  ici_mean      ici_sd  ici_p025  ici_p975  e50_mean      e50_sd e50_p025  e50_p975  e90_mean      e90_sd  e90_p025  e90_p975
          12        NA          NA        NA        NA        NA          NA       NA        NA        NA          NA        NA        NA
          36        NA          NA        NA        NA        NA          NA       NA        NA        NA          NA        NA        NA
          60 0.1831702 0.001817819 0.1797368 0.1861172 0.1757238 0.001702743 0.172482 0.1783286 0.2586306 0.002721368 0.2535846 0.2632895
          96        NA          NA        NA        NA        NA          NA       NA        NA        NA          NA        NA        NA
 emax_mean     emax_sd emax_p025 emax_p975    ece_mean       ece_sd    ece_p025    ece_p975   eo_mean         eo_sd   eo_p025   eo_p975 n_runs
        NA          NA        NA        NA 0.003001021 0.0003369692 0.002287333 0.003437138 0.9906880 0.00012746222 0.9904516 0.9909466     20
        NA          NA        NA        NA 0.005867970 0.0005601209 0.004811135 0.007081326 0.9884368 0.00008620816 0.9882896 0.9886366     20
 0.5091392 0.009586629 0.4920651 0.5224705 0.007090911 0.0007753402 0.005781774 0.008549793 0.9846760 0.00007807727 0.9845338 0.9848708     20
        NA          NA        NA        NA 0.005645329 0.0006797829 0.004520184 0.006928501 0.9769051 0.00009298499 0.9767476 0.9770717     20
 n_imp
     5
     5
     5
     5

Notes:
  - Each repetition uses the same fold split across all imputations.
  - Models are fit separately within each imputation; held-out predictions are averaged across imputations.
  - Thin gray lines are repetition-specific calibration curves.
  - Gray ribbon is the pointwise 2.5th-97.5th percentile band across repetitions.
  - Blue points/line are the mean calibration curve across repetitions.
  - Grouped observed risk is estimated with Kaplan-Meier.
  - ICI, E50, E90, and Emax are from a Cox-based smooth calibration model.
  - These summaries describe split-to-split variability, not formal bootstrap optimism correction.
  - ECE is grouped expected calibration error.
  - E/O is mean predicted / observed probability.
Detected time variable: death_time_from_disch_m
Detected event variable: death_event
Using 5 imputed dataset(s).
Validation: cv
Repetitions: 20 | same fold split across imputations within each repetition.
ICI: enabled at horizons 60.
Predicting repetition 1/20... done
Predicting repetition 2/20... done
Predicting repetition 3/20... done
Predicting repetition 4/20... done
Predicting repetition 5/20... done
Predicting repetition 6/20... done
Predicting repetition 7/20... done
Predicting repetition 8/20... done
Predicting repetition 9/20... done
Predicting repetition 10/20... done
Predicting repetition 11/20... done
Predicting repetition 12/20... done
Predicting repetition 13/20... done
Predicting repetition 14/20... done
Predicting repetition 15/20... done
Predicting repetition 16/20... done
Predicting repetition 17/20... done
Predicting repetition 18/20... done
Predicting repetition 19/20... done
Predicting repetition 20/20... done
Calibrating repetition 1/20... done
Calibrating repetition 2/20... done
Calibrating repetition 3/20... done
Calibrating repetition 4/20... done
Calibrating repetition 5/20... done
Calibrating repetition 6/20... done
Calibrating repetition 7/20... done
Calibrating repetition 8/20... done
Calibrating repetition 9/20... done
Calibrating repetition 10/20... done
Calibrating repetition 11/20... done
Calibrating repetition 12/20... done
Calibrating repetition 13/20... done
Calibrating repetition 14/20... done
Calibrating repetition 15/20... done
Calibrating repetition 16/20... done
Calibrating repetition 17/20... done
Calibrating repetition 18/20... done
Calibrating repetition 19/20... done
Calibrating repetition 20/20... done
Cox Death Calibration Analysis
==============================
Time variable: death_time_from_disch_m
Event variable: death_event
Validation: cv
Folds: 5
Repetitions: 20
Imputations: 5
Runs total: 20
Groups: 10
Min bin n: 20
ICI: enabled
ICI times: 60
ICI df: 4

Variability across repetitions:
 time_months   ici_mean      ici_sd   ici_p025   ici_p975   e50_mean       e50_sd   e50_p025   e50_p975   e90_mean     e90_sd  e90_p025
          12         NA          NA         NA         NA         NA           NA         NA         NA         NA         NA        NA
          36         NA          NA         NA         NA         NA           NA         NA         NA         NA         NA        NA
          60 0.04167132 0.000699679 0.04042281 0.04251655 0.02581193 0.0003524503 0.02515597 0.02623736 0.09135118 0.00170566 0.0883194
          96         NA          NA         NA         NA         NA           NA         NA         NA         NA         NA        NA
   e90_p975 emax_mean    emax_sd emax_p025 emax_p975    ece_mean       ece_sd     ece_p025    ece_p975  eo_mean        eo_sd  eo_p025  eo_p975
         NA        NA         NA        NA        NA 0.001126534 0.0001253561 0.0008277553 0.001445871 1.009536 0.0003504561 1.009060 1.010438
         NA        NA         NA        NA        NA 0.002835688 0.0001536318 0.0025044129 0.003156477 1.017751 0.0003054098 1.017188 1.018211
 0.09367817 0.6434865 0.01351595 0.6168928 0.6646445 0.003250129 0.0001672949 0.0029722910 0.003659011 1.034863 0.0002362822 1.034487 1.035257
         NA        NA         NA        NA        NA 0.003918789 0.0003470501 0.0032002291 0.004582667 1.083428 0.0002501773 1.082870 1.083745
 n_runs n_imp
     20     5
     20     5
     20     5
     20     5

Notes:
  - Each repetition uses the same fold split across all imputations.
  - Models are fit separately within each imputation; held-out predictions are averaged across imputations.
  - Thin gray lines are repetition-specific calibration curves.
  - Gray ribbon is the pointwise 2.5th-97.5th percentile band across repetitions.
  - Blue points/line are the mean calibration curve across repetitions.
  - Grouped observed risk is estimated with Kaplan-Meier.
  - ICI, E50, E90, and Emax are from a Cox-based smooth calibration model.
  - These summaries describe split-to-split variability, not formal bootstrap optimism correction.
  - ECE is grouped expected calibration error.
  - E/O is mean predicted / observed probability.

Functional forms

Code

#Build spline versions of your current final formulas

# helper: replace term inside formula text
replace_term_in_formula <- function(formula_obj, old_term, new_term) {
  f_txt <- paste(deparse(formula_obj), collapse = " ")
  f_txt <- base::gsub(
    pattern = paste0("(?<![[:alnum:]_])", old_term, "(?![[:alnum:]_])"),
    replacement = new_term,
    x = f_txt,
    perl = TRUE
  )
  stats::as.formula(f_txt)
}
add_boot_splines <- function(df) {
  a <- splines::ns(df$adm_age_rec3, df = 3)
  colnames(a) <- paste0("adm_age_ns", 1:3)
  d <- splines::ns(df$dit_m, df = 4)
  colnames(d) <- paste0("dit_ns", 1:4)
  p <- splines::ns(df$porc_pobr, df = 3)
  colnames(p) <- paste0("porc_pobr_ns", 1:3)
  cbind(df, as.data.frame(a), as.data.frame(d), as.data.frame(p))
}
py_corrected_datasets_boot <- lapply(py_corrected_datasets, add_boot_splines)

#-----------------------------
# READMISSION
#-----------------------------
# READMISSION: only main effects need replacement
formula_readmit_int_spline <- update(
  formula_shap_readmit_interact,
  . ~ . -
    adm_age_rec3 -
    dit_m -
    porc_pobr +
    adm_age_ns1 + adm_age_ns2 + adm_age_ns3 +
    dit_ns1 + dit_ns2 + dit_ns3 + dit_ns4 +
    porc_pobr_ns1 + porc_pobr_ns2 + porc_pobr_ns3
)
# optional extra interaction
formula_readmit_int_spline_i1 <- update(
  formula_readmit_int_spline,
  . ~ .
)

#-----------------------------
# MORTALITY
#-----------------------------
# DEATH: main effect + both age interactions must be expanded
formula_death_int_spline <- update(
  formula_shap_death_interact,
  . ~ . -
    adm_age_rec3 -
    adm_age_rec3:cohabitation_with_couple_children -
    adm_age_rec3:occupation_condition_corr24_unemployed +
    adm_age_ns1 + adm_age_ns2 + adm_age_ns3 +
    adm_age_ns1:cohabitation_with_couple_children +
    adm_age_ns2:cohabitation_with_couple_children +
    adm_age_ns3:cohabitation_with_couple_children +
    adm_age_ns1:occupation_condition_corr24_unemployed +
    adm_age_ns2:occupation_condition_corr24_unemployed +
    adm_age_ns3:occupation_condition_corr24_unemployed
)

Final SHAP model

Code

gc()

             used   (Mb) gc trigger    (Mb)   max used    (Mb)
Ncells    6421303  343.0   10743420   573.8   10743420   573.8
Vcells 1279867518 9764.7 2083736386 15897.7 1769896875 13503.3

Code

#2. Rank models across your imputed dataset list
safe_cox <- function(formula, data) {
  base::tryCatch(
    survival::coxph(
      formula,
      data = data,
      ties = "efron",
      x = TRUE,
      y = TRUE
    ),
    error = function(e) NULL
  )
}
rank_models <- function(forms, data_list) {
  purrr::map_dfr(names(forms), function(nm) {
    fits <- purrr::map(data_list, ~ safe_cox(forms[[nm]], .x))
    aics <- purrr::map_dbl(fits,~ if (base::is.null(.x)) NA_real_ else stats::AIC(.x))
    tibble::tibble(
      model = nm,
      n_fit = base::sum(!base::is.na(aics)),
      median_AIC = stats::median(aics, na.rm = TRUE),
      mean_AIC = base::mean(aics, na.rm = TRUE)
    )
  }) |>
    dplyr::arrange(median_AIC)
}

Code

#4. Plug directly into your existing CV evaluation

readm_forms <- list(
    base_shap_int = formula_shap_readmit_interact,
    spline_main   = formula_readmit_int_spline#,
    #spline_i1     = formula_readmit_int_spline_i1
)


death_forms <- list(
    base_shap_int = formula_shap_death_interact,
    spline_main   = formula_death_int_spline
)

rank_models(readm_forms, py_corrected_datasets_boot)

rank_models(death_forms, py_corrected_datasets_boot)

# A tibble: 2 × 4
  model         n_fit median_AIC mean_AIC
  <chr>         <int>      <dbl>    <dbl>
1 spline_main       5     54159.   54156.
2 base_shap_int     5     54188.   54186.

Code

compare_mi_models_d1(formula_readmit_int_spline, formula_shap_readmit_interact, py_corrected_datasets_boot)
compare_mi_models_d1(formula_death_int_spline, formula_shap_death_interact, py_corrected_datasets_boot)

Fitting the FULL model on 5 imputations...
Fitting the REDUCED model on 5 imputations...
Executing the Wald Test (D1)...

============================================================
MODEL COMPARISON RESULT (D1 Statistic)
============================================================
   test statistic df1   df2 dfcom      p.value           riv
 1 ~~ 2   21.7288  10 15213 15215 8.057224e-41 0.00002704545
============================================================
Fitting the FULL model on 5 imputations...
Fitting the REDUCED model on 5 imputations...
Executing the Wald Test (D1)...

============================================================
MODEL COMPARISON RESULT (D1 Statistic)
============================================================
   test statistic df1      df2 dfcom       p.value         riv
 1 ~~ 2  113.0172   9 3009.028  3013 4.982305e-183 0.002491375
============================================================

Code

options(future.globals.maxSize = 12 * 1024^3)

#formula_death_int_spline
results_readmit_int_shap_spl <- evaluate_mi_cv_safe_strata(
    formula = formula_readmit_int_spline, 
    imputed_list = py_corrected_datasets_boot, 
    time_col = "readmit_time_from_disch_m", 
    event_col = "readmit_event",
    k = 5,
    n_repeats = 20
)#
results_death_int_shap_spl <- evaluate_mi_cv_safe_strata(
    formula = formula_death_int_spline, 
    imputed_list = py_corrected_datasets_boot, 
    time_col = "death_time_from_disch_m", 
    event_col = "death_event",
    k = 5,
    n_repeats = 20
)
results_readmit_shap_int_spl_ibs <- cph_evaluate_boot_oob_mi_corrected(
    formula = formula_readmit_int_spline,
    imputed_list = py_corrected_datasets_boot, # List of data.frames
    time_col = "readmit_time_from_disch_m", 
    event_col = "readmit_event",
    tau = 99,                # Evaluate up to 60 months
    B = 500,
    cpus = cpus,
    verbose = TRUE,
    min_strata_events = 1
)
results_death_shap_int_spl_ibs <- cph_evaluate_boot_oob_mi_corrected(
    formula = formula_death_int_spline,
    imputed_list = py_corrected_datasets_boot, # List of data.frames
    time_col = "death_time_from_disch_m", 
    event_col = "death_event",
    tau = 107,                # Evaluate up to 60 months
    B = 500,
    cpus = cpus,
    verbose = TRUE,
    min_strata_events = 1
)
# ====
aicc_readmit_shap_int_spl <- compute_mi_aicc(formula_readmit_int_spline, py_corrected_datasets_boot)
aicc_readmit_shap_int_spl
aicc_death_shap_int_spl <- compute_mi_aicc(formula_death_int_spline, py_corrected_datasets_boot)
aicc_death_shap_int_spl

Starting Evaluation: 5 Imputations x 5 Folds x 20 Repeats = 500 Total Models
------------------------------------------------------------
Global evaluation tau: 99.10 months | C-tau: 94.14 months
Booting up 16 CPU cores using future backend...

============================================================
FINAL POOLED RESULTS (Cross-Validated + Multiply Imputed)
============================================================
Global Evaluation Time (tau): 99.10 months
Pooled Uno's C-Index: 0.6096 (repeat-quantile interval 0.6093-0.6100)
Pooled UnoC (survAUC::UnoC): 0.6764 (repeat-quantile interval 0.6436-0.6923)
Pooled IBS: 0.1605 (repeat-quantile interval 0.1604-0.1605)
Pooled IBS (IPCW-train): 0.3480 (repeat-quantile interval 0.3479-0.3482)
RMST-based Uno C-index by horizon:
  t=12 m: C=0.6310 (repeat-quantile interval 0.6306-0.6313, valid repeats=20)
  t=36 m: C=0.6165 (repeat-quantile interval 0.6162-0.6168, valid repeats=20)
  t=60 m: C=0.6126 (repeat-quantile interval 0.6122-0.6129, valid repeats=20)
============================================================

Total runtime: 869.90 sec (14.50 min)
Starting Evaluation: 5 Imputations x 5 Folds x 20 Repeats = 500 Total Models
------------------------------------------------------------
Global evaluation tau: 107.94 months | C-tau: 102.54 months
Booting up 16 CPU cores using future backend...

============================================================
FINAL POOLED RESULTS (Cross-Validated + Multiply Imputed)
============================================================
Global Evaluation Time (tau): 107.94 months
Pooled Uno's C-Index: 0.7403 (repeat-quantile interval 0.7397-0.7409)
Pooled UnoC (survAUC::UnoC): 0.8146 (repeat-quantile interval 0.7311-0.8689)
Pooled IBS: 0.0352 (repeat-quantile interval 0.0352-0.0352)
Pooled IBS (IPCW-train): 0.0892 (repeat-quantile interval 0.0891-0.0894)
RMST-based Uno C-index by horizon:
  t=12 m: C=0.7434 (repeat-quantile interval 0.7417-0.7449, valid repeats=20)
  t=36 m: C=0.7556 (repeat-quantile interval 0.7550-0.7565, valid repeats=20)
  t=60 m: C=0.7559 (repeat-quantile interval 0.7551-0.7566, valid repeats=20)
============================================================

Total runtime: 833.07 sec (13.88 min)
Strata variables: plan_type_strata, tr_outcome_adm_discharge_rule_violation_undet
Categorical vars: plan_type_strata, tr_outcome_adm_discharge_rule_violation_undet
Bootstrap OOB + MI: B=500, M=5, tau=99.00
Using 8 worker(s)...

============================================================
FINAL POOLED RESULTS (OOB Bootstrap + MI)
============================================================
Pooled OOB IBS: 0.3479 (95% CI: 0.3361 - 0.3611)
Valid bootstrap replicates: 500 / 500
============================================================

There were 50 or more warnings (use warnings() to see the first 50)

Strata variables: plan_type_strata
Categorical vars: plan_type_strata
Bootstrap OOB + MI: B=500, M=5, tau=107.00
Using 8 worker(s)...

============================================================
FINAL POOLED RESULTS (OOB Bootstrap + MI)
============================================================
Pooled OOB IBS: 0.0893 (95% CI: 0.0826 - 0.0963)
Valid bootstrap replicates: 500 / 500
============================================================

There were 16 warnings (use warnings() to see them)


======================================================
AICc ANALYSIS FOR MULTIPLY IMPUTED DATA
========================================================
Mean AIC:  272443.79
Mean AICc: 272443.93
Correction added (Penalty): 0.138820
Parameters (k): 32 | Effective Sample Size (Events): 15247
========================================================

======================================================
AICc ANALYSIS FOR MULTIPLY IMPUTED DATA
========================================================
Mean AIC:  54156.44
Mean AICc: 54156.90
Correction added (Penalty): 0.466135
Parameters (k): 26 | Effective Sample Size (Events): 3039
========================================================

$mean_aicc
[1] 54156.9

$full_results
       aic correction     aicc  k n_events
1 54144.71  0.4661355 54145.17 26     3039
2 54166.42  0.4661355 54166.88 26     3039
3 54161.88  0.4661355 54162.34 26     3039
4 54158.60  0.4661355 54159.06 26     3039
5 54150.58  0.4661355 54151.05 26     3039

Code

cal_readmit_shap_int_spl_ici <- calibrate_cph_grouped(
    formula = formula_readmit_int_spline,
    data = py_corrected_datasets_boot,
    times = c(12, 36, 60),
    validation = "cv",
    folds = 5,
    n_repeats = 20,
    compute_ici = TRUE,
    ici_times = 60,
    make_plots = TRUE
)
print(cal_readmit_shap_int_spl_ici)

Detected time variable: readmit_time_from_disch_m
Detected event variable: readmit_event
Using 5 imputed dataset(s).
Validation: cv
Repetitions: 20 | same fold split across imputations within each repetition.
ICI: enabled at horizons 60.
Predicting repetition 1/20... done
Predicting repetition 2/20... done
Predicting repetition 3/20... done
Predicting repetition 4/20... done
Predicting repetition 5/20... done
Predicting repetition 6/20... done
Predicting repetition 7/20... done
Predicting repetition 8/20... done
Predicting repetition 9/20... done
Predicting repetition 10/20... done
Predicting repetition 11/20... done
Predicting repetition 12/20... done
Predicting repetition 13/20... done
Predicting repetition 14/20... done
Predicting repetition 15/20... done
Predicting repetition 16/20... done
Predicting repetition 17/20... done
Predicting repetition 18/20... done
Predicting repetition 19/20... done
Predicting repetition 20/20... done
Calibrating repetition 1/20... done
Calibrating repetition 2/20... done
Calibrating repetition 3/20... done
Calibrating repetition 4/20... done
Calibrating repetition 5/20... done
Calibrating repetition 6/20... done
Calibrating repetition 7/20... done
Calibrating repetition 8/20... done
Calibrating repetition 9/20... done
Calibrating repetition 10/20... done
Calibrating repetition 11/20... done
Calibrating repetition 12/20... done
Calibrating repetition 13/20... done
Calibrating repetition 14/20... done
Calibrating repetition 15/20... done
Calibrating repetition 16/20... done
Calibrating repetition 17/20... done
Calibrating repetition 18/20... done
Calibrating repetition 19/20... done
Calibrating repetition 20/20... done
Cox Death Calibration Analysis
==============================
Time variable: readmit_time_from_disch_m
Event variable: readmit_event
Validation: cv
Folds: 5
Repetitions: 20
Imputations: 5
Runs total: 20
Groups: 10
Min bin n: 20
ICI: enabled
ICI times: 60
ICI df: 4

Variability across repetitions:
 time_months  ici_mean      ici_sd  ici_p025  ici_p975  e50_mean      e50_sd e50_p025  e50_p975  e90_mean      e90_sd  e90_p025
          12        NA          NA        NA        NA        NA          NA       NA        NA        NA          NA        NA
          36        NA          NA        NA        NA        NA          NA       NA        NA        NA          NA        NA
          60 0.1946144 0.002386226 0.1897261 0.1987201 0.1862922 0.002275681  0.18167 0.1902379 0.2781687 0.003588227 0.2709322
  e90_p975 emax_mean    emax_sd emax_p025 emax_p975    ece_mean       ece_sd    ece_p025    ece_p975   eo_mean         eo_sd   eo_p025
        NA        NA         NA        NA        NA 0.002879792 0.0003398131 0.002229429 0.003350318 0.9882398 0.00013663119 0.9880099
        NA        NA         NA        NA        NA 0.004524420 0.0004704997 0.003576915 0.005273577 0.9843719 0.00009559645 0.9841849
 0.2845125 0.5639945 0.01501757 0.5331512  0.595554 0.004486050 0.0004950966 0.003840764 0.005323307 0.9788791 0.00008757258 0.9787004
   eo_p975 n_runs n_imp
 0.9884925     20     5
 0.9845638     20     5
 0.9790629     20     5

Notes:
  - Each repetition uses the same fold split across all imputations.
  - Models are fit separately within each imputation; held-out predictions are averaged across imputations.
  - Thin gray lines are repetition-specific calibration curves.
  - Gray ribbon is the pointwise 2.5th-97.5th percentile band across repetitions.
  - Blue points/line are the mean calibration curve across repetitions.
  - Grouped observed risk is estimated with Kaplan-Meier.
  - ICI, E50, E90, and Emax are from a Cox-based smooth calibration model.
  - These summaries describe split-to-split variability, not formal bootstrap optimism correction.
  - ECE is grouped expected calibration error.
  - E/O is mean predicted / observed probability.

Code

gc()
cal_readmit_aj_rcv <- calibrate_cscox_aj_grouped(
    formula_readmit = formula_readmit_int_spline,
    formula_death = formula_death_int_spline,
    data = py_corrected_datasets_boot,
    times = c(12, 36, 60),
    n_imputations = 5,
    validation = "cv",
    folds = 5,
    n_repeats = 20,
    compute_ici = TRUE,
    ici_times = 60,
    make_plots = TRUE,
    g = 10,
    min_bin_n = 20,
    seed = 2125
)
print(cal_readmit_aj_rcv)
plot(cal_readmit_aj_rcv)


cal_death_ici <- calibrate_cph_grouped(
    formula = formula_death_int_spline,
    data = py_corrected_datasets_boot,
    times = c(12, 36, 60),
    validation = "cv",
    folds = 5,
    n_repeats = 20,
    compute_ici = TRUE,
    ici_times = 60,
    make_plots = TRUE
)
print(cal_death_ici)
plot(cal_death_ici)

# Methods
# We fitted cause-specific Cox models for readmission and death, and combined them to estimate the predicted cumulative incidence of readmission at 12, 36, and 60 months. Calibration was assessed by comparing grouped predicted risks against observed Aalen–Johansen estimates of readmission, thereby accounting for the competing risk of death. To reduce optimism, calibration was evaluated using 5-fold cross-validation across imputations.
# 
# Results
# The cause-specific Cox models showed excellent calibration for readmission risk at all evaluated horizons. Cross-validated expected-to-observed ratios were 0.99 at 12 months, 0.99 at 36 months, and 0.98 at 60 months, indicating minimal miscalibration in the large. Grouped expected calibration error was also very low across horizons (12 months: 0.0029; 36 months: 0.0041; 60 months: 0.0040).
# One thing: in your message, the printed output corresponds to one fitted object, but the second call with formula_readmit_int_spline2 and formula_death_int_spline does not show its printed result yet. So I can interpret the first object with certainty; for the second one, I’d need the printed summary too.

Using 5 imputed dataset(s).
Validation: cv
Repetitions: 20 | same fold split across imputations within each repetition.
ICI: enabled at horizons 60.
Predicting repetition 1/20... done
Predicting repetition 2/20... done
Predicting repetition 3/20... done
Predicting repetition 4/20... done
Predicting repetition 5/20... done
Predicting repetition 6/20... done
Predicting repetition 7/20... done
Predicting repetition 8/20... done
Predicting repetition 9/20... done
Predicting repetition 10/20... done
Predicting repetition 11/20... done
Predicting repetition 12/20... done
Predicting repetition 13/20... done
Predicting repetition 14/20... done
Predicting repetition 15/20... done
Predicting repetition 16/20... done
Predicting repetition 17/20... done
Predicting repetition 18/20... done
Predicting repetition 19/20... done
Predicting repetition 20/20... done
Calibrating repetition 1/20... done
Calibrating repetition 2/20... done
Calibrating repetition 3/20... done
Calibrating repetition 4/20... done
Calibrating repetition 5/20... done
Calibrating repetition 6/20... done
Calibrating repetition 7/20... done
Calibrating repetition 8/20... done
Calibrating repetition 9/20... done
Calibrating repetition 10/20... done
Calibrating repetition 11/20... done
Calibrating repetition 12/20... done
Calibrating repetition 13/20... done
Calibrating repetition 14/20... done
Calibrating repetition 15/20... done
Calibrating repetition 16/20... done
Calibrating repetition 17/20... done
Calibrating repetition 18/20... done
Calibrating repetition 19/20... done
Calibrating repetition 20/20... done
Grouped calibration: cause-specific Cox vs Aalen-Johansen
========================================================
Validation : cv 
Folds      : 5 
Repetitions: 20 
Imputations: 5 
Runs total : 20 
Groups     : 10 
Min bin n  : 20 
ICI        : enabled 
ICI times  : 60 
ICI df     : 4 

Variability across repetitions:
 time_months    ici_mean       ici_sd    ici_p025    ici_p975     e50_mean       e50_sd     e50_p025    e50_p975    e90_mean       e90_sd
          12          NA           NA          NA          NA           NA           NA           NA          NA          NA           NA
          36          NA           NA          NA          NA           NA           NA           NA          NA          NA           NA
          60 0.001667115 0.0001225854 0.001446627 0.001908856 0.0008305015 0.0001491575 0.0005229563 0.001010742 0.003521011 0.0004012575
    e90_p025    e90_p975    ece_mean       ece_sd    ece_p025    ece_p975   eo_mean         eo_sd   eo_p025   eo_p975 n_runs n_imp
          NA          NA 0.003020429 0.0006156440 0.001899168 0.004392540 0.9894582 0.00011035047 0.9892696 0.9897163     20     5
          NA          NA 0.005034069 0.0007408328 0.003795272 0.006158427 0.9859025 0.00008115265 0.9857816 0.9860370     20     5
 0.002908731 0.004522435 0.004571945 0.0007436927 0.003304690 0.006612703 0.9807375 0.00008022764 0.9806223 0.9808836     20     5

Notes:
  - Each repetition uses the same fold split across all imputations.
  - Models are fit separately within each imputation; held-out predictions are averaged across imputations.
  - Thin gray lines are repetition-specific calibration curves.
  - Gray ribbon is the pointwise 2.5th-97.5th percentile band across repetitions.
  - Blue points/line are the mean calibration curve across repetitions.
  - ICI, E50, and E90 are only computed when requested.
  - These summaries describe split-to-split variability, not formal bootstrap optimism correction.
  - ECE is grouped expected calibration error.
  - E/O is mean predicted / observed probability.
Detected time variable: death_time_from_disch_m
Detected event variable: death_event
Using 5 imputed dataset(s).
Validation: cv
Repetitions: 20 | same fold split across imputations within each repetition.
ICI: enabled at horizons 60.
Predicting repetition 1/20... done
Predicting repetition 2/20... done
Predicting repetition 3/20... done
Predicting repetition 4/20... done
Predicting repetition 5/20... done
Predicting repetition 6/20... done
Predicting repetition 7/20... done
Predicting repetition 8/20... done
Predicting repetition 9/20... done
Predicting repetition 10/20... done
Predicting repetition 11/20... done
Predicting repetition 12/20... done
Predicting repetition 13/20... done
Predicting repetition 14/20... done
Predicting repetition 15/20... done
Predicting repetition 16/20... done
Predicting repetition 17/20... done
Predicting repetition 18/20... done
Predicting repetition 19/20... done
Predicting repetition 20/20... done
Calibrating repetition 1/20... done
Calibrating repetition 2/20... done
Calibrating repetition 3/20... done
Calibrating repetition 4/20... done
Calibrating repetition 5/20... done
Calibrating repetition 6/20... done
Calibrating repetition 7/20... done
Calibrating repetition 8/20... done
Calibrating repetition 9/20... done
Calibrating repetition 10/20... done
Calibrating repetition 11/20... done
Calibrating repetition 12/20... done
Calibrating repetition 13/20... done
Calibrating repetition 14/20... done
Calibrating repetition 15/20... done
Calibrating repetition 16/20... done
Calibrating repetition 17/20... done
Calibrating repetition 18/20... done
Calibrating repetition 19/20... done
Calibrating repetition 20/20... done
Cox Death Calibration Analysis
==============================
Time variable: death_time_from_disch_m
Event variable: death_event
Validation: cv
Folds: 5
Repetitions: 20
Imputations: 5
Runs total: 20
Groups: 10
Min bin n: 20
ICI: enabled
ICI times: 60
ICI df: 4

Variability across repetitions:
 time_months   ici_mean       ici_sd   ici_p025   ici_p975   e50_mean       e50_sd   e50_p025   e50_p975   e90_mean      e90_sd   e90_p025
          12         NA           NA         NA         NA         NA           NA         NA         NA         NA          NA         NA
          36         NA           NA         NA         NA         NA           NA         NA         NA         NA          NA         NA
          60 0.04182577 0.0007379223 0.03996377 0.04267822 0.02514862 0.0003708956 0.02424979 0.02559342 0.09626168 0.001669974 0.09223143
   e90_p975 emax_mean    emax_sd emax_p025 emax_p975    ece_mean        ece_sd     ece_p025    ece_p975  eo_mean        eo_sd  eo_p025
         NA        NA         NA        NA        NA 0.001096574 0.00009404802 0.0009656311 0.001270391 1.009547 0.0003051483 1.009164
         NA        NA         NA        NA        NA 0.002879313 0.00013131935 0.0026673066 0.003153210 1.017916 0.0003427558 1.017240
 0.09803166 0.5708059 0.01895727 0.5378863 0.6142696 0.003223805 0.00021573009 0.0029020586 0.003737602 1.034990 0.0002871116 1.034498
  eo_p975 n_runs n_imp
 1.010265     20     5
 1.018520     20     5
 1.035487     20     5

Notes:
  - Each repetition uses the same fold split across all imputations.
  - Models are fit separately within each imputation; held-out predictions are averaged across imputations.
  - Thin gray lines are repetition-specific calibration curves.
  - Gray ribbon is the pointwise 2.5th-97.5th percentile band across repetitions.
  - Blue points/line are the mean calibration curve across repetitions.
  - Grouped observed risk is estimated with Kaplan-Meier.
  - ICI, E50, E90, and Emax are from a Cox-based smooth calibration model.
  - These summaries describe split-to-split variability, not formal bootstrap optimism correction.
  - ECE is grouped expected calibration error.
  - E/O is mean predicted / observed probability.

Session info

Code

message(paste0("R library: ", Sys.getenv("R_LIBS_USER")))
message(paste0("Date: ",withr::with_locale(new = c('LC_TIME' = 'C'), code =Sys.time())))
message(paste0("Editor context: ", path))
cat("quarto version: "); quarto::quarto_version()
sesion_info <- devtools::session_info()

library(htmltools)
tabla <- dplyr::select(
  tibble::as_tibble(sesion_info$packages),
  c(package, loadedversion, source)
) |> 
  DT::datatable(
    filter = 'top',
    colnames = c('Row number' = 1, 'Package' = 2, 'Version' = 3),
    caption = htmltools::tags$caption(
      style = 'caption-side: top; text-align: left;',
      '', htmltools::em('R packages')
    ),
    options = list(
      pageLength = 10,
      initComplete = htmlwidgets::JS(
        "function(settings, json) {",
        "$(this.api().tables().body()).css({
            'font-family': 'Helvetica Neue',
            'font-size': '70%', 
            'white-space': 'nowrap',
            'line-height': '0.75em'
        });",
        "}"
      )
    )
  )

# Guardar el HTML
htmlwidgets::saveWidget(tabla, "tabla_packages.html", selfcontained = TRUE)

# Leer y mostrar el HTML (copia esto)
#cat(readLines("tabla_packages.html"), sep = "\n")

R library: G:/My Drive/Alvacast/SISTRAT 2023/renv/library/windows/R-4.4/x86_64-w64-mingw32
Date: 2026-03-27 08:20:26.463439
Editor context: G:/My Drive/Alvacast/SISTRAT 2023/data/20241015_out/251001_c1_1025_df_prev1y.rds
quarto version:

Warning message:
In system2("quarto", "-V", stdout = TRUE, env = paste0("TMPDIR=",  :
  running command '"quarto" TMPDIR=C:/Users/andre/AppData/Local/Temp/RtmpYDfCbF/file8fdc570b54bc -V' had status 1

<!DOCTYPE html> datatables

Save

Code

wdpath<-
paste0(gsub("/cons","",gsub("cons","",paste0(getwd(),"/cons"))))
envpath<- if(regmatches(wdpath, regexpr("[A-Za-z]+", wdpath))=="G"){"G:/Mi unidad/Alvacast/SISTRAT 2023/"}else{"E:/Mi unidad/Alvacast/SISTRAT 2023/"}
paste0(getwd(),"/cons")
file.path(paste0(wdpath,"data/20241015_out"))
file.path(paste0(envpath,"data/20241015_out"))
# Save
rdata_path <- file.path(wdpath, "data/20241015_out", paste0("pred22_ndp_", format(Sys.time(), "%Y_%m_%d"), ".Rdata"))
save.image(rdata_path)
cat("Saved in:",
    rdata_path)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
if (Sys.getenv("RSTUDIO_SESSION_TYPE") == "server" || file.exists("/.dockerenv")) {
  password <- Sys.getenv("PASSWORD_SECRET")
} else {
  if (interactive()) {
    utils::savehistory(tempfile())
    Sys.setenv(PASSWORD_SECRET = readLines(paste0(wdpath, "secret.txt"), warn = FALSE))
    utils::loadhistory()
  }
  Sys.setenv(PASSWORD_SECRET = readLines(paste0(wdpath, "secret.txt"), warn = FALSE))
}
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
save.image(paste0(rdata_path,".enc"))
# Encriptar el archivo en el mismo lugar
httr2::secret_encrypt_file(path = paste0(rdata_path,".enc"), key = "PASSWORD_SECRET")
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
cat("Copy renv lock into cons folder\n")
if (Sys.getenv("RSTUDIO_SESSION_TYPE") == "server" || file.exists("/.dockerenv")) {
  message("Running on RStudio Server or inside Docker. Folder copy skipped.")
} else {
    
  source_folder <- 
  destination_folder <- paste0(wdpath,"cons/renv")
  
  # Copy the folder recursively
    file.copy(paste0(wdpath,"renv.lock"), paste0(wdpath,"cons/renv.lock"), overwrite = TRUE)
  
  message("Renv lock copy performed.")
}
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
time_after_dedup2<-Sys.time()

Saved in: g:/My Drive/Alvacast/SISTRAT 2023///data/20241015_out/pred22_ndp_2026_03_27.RdataCopy renv lock into cons folder
Renv lock copy performed.