SENDAs Agreement 1 Codebook

Author

Andrés González Santa Cruz

Published

October 28, 2025


Code
load("G:/My Drive/Alvacast/SISTRAT 2023/data/20241015_out/26_ndp_2025_10_01.Rdata")

library(dplyr)

Adjuntando el paquete: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
Code
library(codebook)
library(labelled)

Adjuntando el paquete: 'labelled'
The following object is masked from 'package:codebook':

    to_factor
Code
# Manual variable labels definition for the specified main variables
# Based on TSV descriptions where available, inferred from provided information for others
# This includes the variables you listed, including hash_key as per your list
# Apply to SISTRAT23_c1_2010_2024_df_prev1y and generate codebook
# Manual variable labels definition for the specified main variables
# Based on TSV descriptions where available, inferred from provided information for others
# This includes the variables you listed, including hash_key as per your list
# Apply to SISTRAT23_c1_2010_2024_df_prev1y and generate codebook

labels_map <- list(
  "TABLE" = "Year of data collection (original SISTRAT table/year identifier). Represents the year the record was entered (e.g., \"2023\", \"2015\"). No transformation applied. Made in import_c1_top_data_adm_25.qmd",
  "TABLE_rec_series" = "Recoded series of TABLE",
  "rn" = "Row number or treatment episode sequence per patient. Created by `row_number() over (partition by hash_key order by adm_date)`, Step 1 (SISTRAT23_c1_2010_2024_df2). Identifies first, second, etc., treatment for each user.",
  "rn_series" = "Series of row number",
  "num_trat_ant" = "Number of prior treatments reported by patient. Direct import. May not be reliable. Not used.",
  "fecha_ultimo_tratamiento" = "Patient-reported time since last treatment (e.g., \"3 a 4 anos\"). Direct import. Categorical, not numeric. Many missing values (~50%)",
  "hash_key" = "Unique anonymized identifier for each individual (user/patient RUT derivative). Direct import from raw SISTRAT C1. Originally in uppercase letter. Generated by hashing the RUT (national ID) by MINSAL. Used to link multiple treatment episodes for the same person across years. No transformation.",
  "min_adm_age_rec3" = "min(adm_age_rec3) grouped by hash_key. Critical benchmark for validating/imputing onset ages.",
  "adm_age_rec3" = "Admission age [log1p(x)=adm_age_log, centered= adm_age_c,^2=adm_age_pow2, ^3=adm_age_pow3, three groups= adm_age_rec3_cat]",
  "birth_date_rec" = "Final authoritative date of birth. Created by selecting/imputing best `birth_date` per `hash_key` (via `kNN`, logic, or min/max validation). Used to recalculate `adm_age_rec`.",
  "adm_date_num_rec2" = "Numeric admission date recoded version 2",
  "adm_date_rec2" = "Intermediate admission date Created in Step 2 (0.c) for the resolution of overlaps",
  "dit_rec6" = "Length of stay in treatment (months) [log1p(x)=treat_log, ^2=treat_days_pow2, ^3=treat_days_pow3, <90days=treat_lt_90]",
  "disch_date_rec6" = "Final discharge date for longitudinal analysis. Created as selected consistent version for time-to-event modeling.",
  "disch_date_num_rec6_trans" = "Imputed discharge based on reference date Created in Step 3. Missing discharge dates were filled using a default value calculated from a fixed retrieval date (e.g., May 28, 2025), ensuring consistency across all treatment records and uncovering censorship mechanisms.",
  "def_date" = "Date of death (if applicable, mostly NA). Created from external mortality linkage (not in provided scripts). Parsed from mortality registry (dia_def, mes_def, ano_def), imputing missing day as 15; earliest date kept when multiple. Used to censor discharges after death. See Duplicates_25_2.qmd (pre-00.a).",
  "adm_motive" = "Admission motive",
  "tr_compliance_rec7" = "Treatment compliance category. Created in Step 4. Removed cases (n=3) with an ongoing treatment motive but with a posterior treatment after. Replaced for dropout (they had incomplete discharge information)",
  "adm_disch_reason" = "Placeholder for administrative discharge reason (mostly empty). Created as copy of `motivo_de_egreso_alta_administrativa`. Derived/translated from motivo_de_egreso_alta_administrativa; listed among deduplication key variables in Step 2. 47%-56% missing in patients with adm. discharge.",
  "referral_type" = "Recoded type of referral center. Created from `tipo_centro_derivacion` (translation): e.g., \"otro centro\" , \"other facility\", \"cosam\", \"primary health care\".",
  "plan_type" = "Recoded treatment plan type. Created as copy and translation of `tipo_de_plan` (step 1). Used in collapsing logic and outcome modeling.",
  "id_centro" = "Center ID number. Direct import. Used to link center attributes, collapse duplicates, and derive `centro_muj` (women-specific centers).",
  "senda" = "Indicator if treatment was under SENDA agreement (\"si\"/\"no\"). Direct import. Used to derive `senda_adm_date` and flag SENDA-specific episodes. No transformation initially. According to SENDA professionals, patients may be in SENDA Yes programs may anyways part of SENDA programs in some cases",
  "pub_center" = "Binary indicator for public center. Created as `tipo_centro == \"publico\"`. Used in institutional comparisons.",
  "primary_sub" = "Primary substance of use",
  "second_sub1" = "Normalized secondary substances at admission (1)",
  "second_sub2" = "Normalized secondary substances at admission (2)",
  "second_sub3" = "Normalized secondary substances at admission (3)",
  "prim_sub_freq" = "Frequency of primary substance use at admission (recoded: prim_sub_freq_rec)",
  "prim_sub_route" = "Recoded route of administration. Created from `via_administracion_sustancia_principal` into standardized terms.",
  "LB_age_primary_onset_rec2" = "Lower bound age of primary substance onset recoded 2",
  "UB_age_primary_onset_rec2" = "Upper bound age of primary substance onset recoded 2",
  "age_primary_onset_rec2" = "Created in Step 3.",
  "first_sub_used" = "Normalized first substance used. Created as `tolower(gsub(\"_\", \" \", sustancia_de_inicio))`. Input for resolving inconsistencies, prioritizing most vulnerable value = `sus_ini_mod_mvv`. Despite this, this variable was normalized in Step 3 for inconsistent values in patients.",
  "sus_ini_mod_mvv" = "Sus ini mod mvv",
  "sus_ini_1" = "Sus ini 1",
  "sus_ini_2" = "Sus ini 2",
  "sus_ini_3" = "Sus ini 3",
  "LB_age_subs_onset_rec2" = "Lower bound age of substance onset recoded 2",
  "UB_age_subs_onset_rec2" = "Upper bound age of substance onset recoded 2",
  "age_subs_onset_rec2" = "Created in Step 3.",
  "biopsych_comp" = "Recoded biopsychosocial severity. Created from `compromiso_biopsicosocial`: e.g., \"moderado\", \"2-moderate\".",
  "mod_psiq_cie_10" = "Created in Step 3, concatenated main and sub-dg to allow multiple diagnoses",
  "mod_psiq_dsm_iv" = "Created in Step 3, concatenated main and sub-dg to allow multiple diagnoses",
  "diagnostico_trs_fisico" = "Physical health diagnosis. Direct import. Often concatenated (e.g., \"Hepatitis alcoholica: cardiopatías\"). Preserved as-is for clinical profiling.",
  "otros_probl_at_sm_or" = "Other mental health issues (e.g., \"abuso sexual\"). Direct import. Recoded to `otros_probl_at_sm_or`. Used in vulnerability and trauma profiling.",
  "sub_dep_icd10_status" = "Severity of Substance Use Disorder (SUD)",
  "evaluacindelprocesoteraputico" = "Therapeutic process evaluation (e.g., \"logro alto\"). Direct import. Recoded to standardized categories. Used in outcome and program effectiveness analysis.",
  "eva_consumo" = "Evaluation of consumption pattern at discharge. Direct import. Part of multi-domain outcome assessment. Recoded similarly.",
  "eva_fam" = "Family situation evaluation at discharge. Direct import.",
  "eva_relinterp" = "Interpersonal relations evaluation at discharge. Direct import.",
  "eva_ocupacion" = "Occupational situation evaluation at discharge. Direct import.",
  "eva_sm" = "Mental health evaluation at discharge. Direct import.",
  "eva_fisica" = "Physical health evaluation at discharge. Direct import.",
  "eva_transgnorma" = "Social norm transgression evaluation at discharge. Direct import.",
  "dg_global_nec_int_soc_or" = "Global social integration need at admission. Direct import. Recoded to `dg_global_nec_int_soc_or`. Categorized as \"altas\", \"medias\", \"bajas\". Used in social vulnerability index.",
  "dg_nec_int_soc_cap_hum_or" = "Human capital need at admission. Direct import. Recoded to `dg_nec_int_soc_cap_hum_or`. Part of social integration profile.",
  "dg_nec_int_soc_cap_fis_or" = "Physical capital need at admission. Direct import. Recoded to `dg_nec_int_soc_cap_fis_or`.",
  "dg_nec_int_soc_cap_soc_or" = "Social capital need at admission. Direct import. Recoded to `dg_nec_int_soc_cap_soc_or`.",
  "dg_global_nec_int_soc_egr_or" = "Global social integration need at discharge. Direct import. Recoded similarly to admission version (_60). Used for delta/change analysis.",
  "dg_nec_int_soc_cap_hum_egr_or" = "Human capital need at discharge. Direct import.",
  "dg_nec_int_soc_cap_fis_egr_or" = "Physical capital need at discharge. Direct import.",
  "dg_nec_int_soc_cap_soc_egr_or" = "Social capital need at discharge. Direct import.",
  "usuario_tribunal_trat_droga" = "Court-referred to drug treatment (very unbalanced, ~1% yes)",
  "nationality_cons" = "Recoded/standardized from nacionalidad or pais_nacimiento.",
  "ethnicity_c1_c6_historic" = "Concatenated unique non-Chilean/non-\"no pertenece\" values from etnia (C1) and ethnicity (C6), grouped by hash_key.",
  "discapacidad" = "Indicator of disability (\"si\"/\"no\"). Direct import. Used with `opcion_discapacidad` for disability profiling. Starts coding in 2019",
  "opcion_discapacidad" = "Type of disability (e.g., \"de causa psiquica\"). Direct import. Only populated if `discapacidad == \"si\"`. Used in accessibility and needs assessment.Starts coding in 2019",
  "sex_rec" = "Sex (often used for biological categorization)",
  "identidad_de_genero" = "Gender identity (e.g., \"masculino\", \"femenino\"). Direct import. Recoded to `identidad_de_genero` factor. Used to validate/override `sexo` in `sex_rec` (Step 2). Starts coding in 2019",
  "orientacion_sexual" = "Sexual orientation (e.g., \"heterosexual\"). Direct import. Sparse. Preserved for inclusivity analysis.Starts coding in 2022",
  "pregnant" = "Recoded pregnancy status at admission. Created from `se_trata_de_una_mujer_embarazada` (NA = \"no\"). Used in gender-specific analysis (Step 3). This variable (indicating if a woman is pregnant) seems to beintroduced in 2011.",
  "pregnant_disch" = "Recoded pregnancy status at discharge. Created from `ha_estado_embarazada_egreso`. Used in gender-specific analysis (Step 3). Sparse. Seems to be only accurately collected starting in 2017. Created to capture pregnancy status specifically at treatment discharge.",
  "marital_status" = "Marital status",
  "tiene_menores_de_edad_a_cargo" = "Indicator if responsible for minors (\"si\"/\"no\"). Direct import. Mostly missing before 2015 (Duplicates_25.txt). Recoded to binary flag. Used in family vulnerability index.",
  "num_hijos_trat_res" = "Number of children entering residential treatment with patient. Direct import. Often missing or 0. Later used to derive binary flag `hijos_trat_res` (\"Had children in treatments\").",
  "numero_de_hijos" = "Number of children reported by patient. Direct import. Values >11 flagged as implausible (Duplicates_25.txt). Used in `kNN` imputation and to derive `tiene_menores_de_edad_a_cargo`.",
  "con_quien_vive" = "Living arrangement/Cohabitation status",
  "tipo_de_vivienda" = "Type of housing",
  "precariedad_vivienda" = "Precarious housing. Direct import. Available since 2022 databases.",
  "tenure_status_household" = "Housing situation",
  "servicios_basicos_95" = "Basic sanitation services. Direct import. Available since 2022 databases.",
  "perso_dormitorio_vivienda" = "Persons per dwelling bedroom Direct import. Available since 2022 databases.",
  "ed_attainment" = "Recoded education level. Created from `escolaridad_ultimo_ano_cursado` into standardized categories (e.g., \"media incompleta\" = \"2-Completed high school or less\").",
  "occupation_condition" = "Recoded employment status. Created from `condicion_ocupacional`: e.g., \"cesante\"= \"unemployed\" (in Step 1).",
  "occupation_status" = "Recoded employment category. Created from `categoria_ocupacional`. Sparse; often NA.",
  "rubro_trabaja" = "Occupational sector (e.g., \"trabajadores no calificados\"). Direct import. Recoded to `rubro_trabaja_mod`. 68% missing. 13.8% among employeed",
  "laboral_ingresos" = "Monthly income. Direct import. Available since 2022 databases.",
  "yr_block" = "Birth year block (e.g., 1980, 1990) for anonymization or grouping. Created by rounding `birth_date` year to nearest decade (e.g., `floor(year(birth_date)/10)*10`). To look for data similarities (Step 1)",
  "OBS_series" = "OBS series",
  "senda_series" = "SENDA series",
  "pub_center_series" = "Public center series",
  "id_centro_series" = "Center ID series",
  "disch_date_num_rec6" = "Numeric discharge date recoded 6",
  "dg_psiq_cie_10_instudy" = "Psychiatric comorbidity (ICD-10)",
  "dg_psiq_cie_10_dg" = "Mutually exclusive (as corrected in Step 3)",
  "dg_psiq_dsm_iv_instudy" = "Psychiatric comorbidity (DSM-IV)",
  "dg_psiq_dsm_iv_dg" = "Mutually exclusive (as corrected in Step 3)",
  "plan_type_series" = "Plan type series",
  "tr_compliance_rec7_series" = "Treatment compliance recoded 7 series",
  "referral_type_series" = "Referral type series",
  "adm_age_rec3_series" = "Admission age recoded 3 series",
  "adm_date_rec2_series" = "Admission date recoded 2 series",
  "disch_date_rec6_series" = "Discharge date recoded 6 series",
  "diagnostico_trs_fisico_series" = "Physical diagnosis series",
  "otros_probl_at_sm_or_series" = "Other mental health problems series",
  "OBS" = "Observed flags or notes from cleaning (e.g., \"Duplicated Cases\"). Created during duplicate detection and resolution. Contains manual or automated flags for data quality issues, removed entries, etc. It is separated by semi-colons",
  "occupation_status_corr" = "Corrected employment status Created in Step 4. Only kept for employed; nulled for inactive/unemployed; filled as 'other' if missing. Depends on occupation_condition_inferred, occupation_status.",
  "occupation_condition_corr24" = "Employment status",
  "plan_type_corr" = "Treatment modality",
  "ed_attainment_corr" = "Educational attainment",
  "polysubstance_strict" = "Polysubstance use",
  "icd10_diag1" = "ICD10 diagnosis 1 Created in Step 4. Separated from mod_psiq_cie_10 by \"::\". Depends on mod_psiq_cie_10.",
  "icd10_diag2" = "ICD10 diagnosis 2 Created in Step 4. Separated from mod_psiq_cie_10 by \"::\". Depends on mod_psiq_cie_10.",
  "icd10_diag3" = "ICD10 diagnosis 3 Created in Step 4. Separated from mod_psiq_cie_10 \"::\". Depends on mod_psiq_cie_10.",
  "dsmiv_diag1" = "DSMIV diagnosis 1 Created in Step 4. Separated from mod_psiq_dsm_iv \"::\". Depends on mod_psiq_dsm_iv.",
  "dsmiv_diag2" = "DSMIV diagnosis 2 Created in Step 4. Separated from mod_psiq_dsm_iv \"::\". Depends on mod_psiq_dsm_iv.",
  "dsmiv_diag3" = "DSMIV diagnosis 3 Created in Step 4. Separated from mod_psiq_dsm_iv \"::\". Depends on mod_psiq_dsm_iv.",
  "adm_year" = "Admission year",
  "porc_pobr" = "Poverty index of the commune of residence [log1p(x)=porc_pobr_log, centered= porc_pobr_c, six quantile groups= porc_pobr_c_cat6]",
  "clasificacion" = "Urbanization level of the commune of residence",
  "km2_c" = "Centered km^2 Created in Step 4. Created in Step 4. km_2 - mean(km_2). Depends on km_2.km_2 - mean(km_2). Depends on km_2.",
  "km2_log" = "Log km^2 Created in Step 4. Created in Step 4. km_2 - mean(km_2). Depends on km_2.log1p(km_2). Depends on km_2.",
  "porc_pobr_log" = "Log poverty Created in Step 3. Re-joined in Step 4. log1p(porc_pobr). Depends on porc_pobr.",
  "porc_pobr_c" = "Centered poverty Created in Step 3. Re-joined in Step 4. porc_pobr - mean(porc_pobr). Depends on porc_pobr."
  # Removed "disch_date_real" as it doesn't exist in the dataframe
)


var_label(SISTRAT23_c1_2010_2024_df_prev1y) <- labels_map

# Subset to these variables for codebook (removed disch_date_real)
main_vars <- c("TABLE", "TABLE_rec_series", "rn", "rn_series", "num_trat_ant", 
"fecha_ultimo_tratamiento", "hash_key", "min_adm_age_rec3", "adm_age_rec3", 
"birth_date_rec", "adm_date_num_rec2", "adm_date_rec2", "dit_rec6", 
"disch_date_rec6", "disch_date_num_rec6_trans", "def_date", "adm_motive", 
"tr_compliance_rec7", "adm_disch_reason", "referral_type", "plan_type", 
"id_centro", "senda", "pub_center", "primary_sub", "second_sub1", 
"second_sub2", "second_sub3", "prim_sub_freq", "prim_sub_route", 
"LB_age_primary_onset_rec2", "UB_age_primary_onset_rec2", "age_primary_onset_rec2", 
"first_sub_used", "sus_ini_mod_mvv", "sus_ini_1", "sus_ini_2", 
"sus_ini_3", "LB_age_subs_onset_rec2", "UB_age_subs_onset_rec2", 
"age_subs_onset_rec2", "biopsych_comp", "mod_psiq_cie_10", "mod_psiq_dsm_iv", 
"diagnostico_trs_fisico", "otros_probl_at_sm_or", "sub_dep_icd10_status", 
"evaluacindelprocesoteraputico", "eva_consumo", "eva_fam", "eva_relinterp", 
"eva_ocupacion", "eva_sm", "eva_fisica", "eva_transgnorma", "dg_global_nec_int_soc_or", 
"dg_nec_int_soc_cap_hum_or", "dg_nec_int_soc_cap_fis_or", "dg_nec_int_soc_cap_soc_or", 
"dg_global_nec_int_soc_egr_or", "dg_nec_int_soc_cap_hum_egr_or", 
"dg_nec_int_soc_cap_fis_egr_or", "dg_nec_int_soc_cap_soc_egr_or", 
"usuario_tribunal_trat_droga", "nationality_cons", "ethnicity_c1_c6_historic", 
"discapacidad", "opcion_discapacidad", "sex_rec", "identidad_de_genero", 
"orientacion_sexual", "pregnant", "pregnant_disch", "marital_status", 
"tiene_menores_de_edad_a_cargo", "num_hijos_trat_res", "numero_de_hijos", 
"con_quien_vive", "tipo_de_vivienda", "precariedad_vivienda", 
"tenure_status_household", "servicios_basicos_95", "perso_dormitorio_vivienda", 
"ed_attainment", "occupation_condition", "occupation_status", 
"rubro_trabaja", "laboral_ingresos", "yr_block", "OBS_series", 
"senda_series", "pub_center_series", "id_centro_series", "disch_date_num_rec6", 
"dg_psiq_cie_10_instudy", "dg_psiq_cie_10_dg", "dg_psiq_dsm_iv_instudy", 
"dg_psiq_dsm_iv_dg", "plan_type_series", "tr_compliance_rec7_series", 
"referral_type_series", "adm_age_rec3_series", "adm_date_rec2_series", 
"disch_date_rec6_series", "diagnostico_trs_fisico_series", "otros_probl_at_sm_or_series", 
"OBS", "occupation_status_corr", "occupation_condition_corr24", 
"plan_type_corr", "ed_attainment_corr", "polysubstance_strict", 
"icd10_diag1", "icd10_diag2", "icd10_diag3", "dsmiv_diag1", "dsmiv_diag2", 
"dsmiv_diag3", "adm_year", "porc_pobr", "clasificacion", "km2_c", 
"km2_log", "porc_pobr_log", "porc_pobr_c")

df_subset <- SISTRAT23_c1_2010_2024_df_prev1y %>% select(all_of(main_vars))

# Generate HTML codebook

codebook_data <- df_subset

# Add metadata
metadata(codebook_data)$datePublished <- "2024-01-01"
metadata(codebook_data)$temporalCoverage <- "2010-2024"
metadata(codebook_data)$spatialCoverage <- "Chile"

metadata(codebook_data)$creator <- list(
  list("@type" = "Person",
       givenName = "Álvaro", familyName = "Castillo-Carniglia",
       affiliation = list("@type" = "Organization",
                          name = "Universidad San Sebastián, Chile")),
  list("@type" = "Person",
       givenName = "Andrés", familyName = "González-Santa Cruz",
       email = "gonzalez.santacruz.andres@gmail.com",
       affiliation = list("@type" = "Organization",
                          name = "Universidad de Chile, Public Health")),
  list("@type" = "Person",
       givenName = "Amaru", familyName = "Agüero Jiménez",
       affiliation = list("@type" = "Organization",
                          name = "Universidad del Desarrollo, Chile"))
)
metadata(codebook_data)$citation <- "Castillo-Carniglia, Á., González-Santa Cruz, A., & Agüero Jiménez, A. (2024). Workflow of data management and analysis - Chilean substance use treatment administrative data. Consolidation of Agreement 1 Databases from 2010 to 2024. Funded by FONDECYT regular 1191282."
metadata(codebook_data)$url <- "https://github.com/FONDECYTACC/cons2025"
metadata(codebook_data)$name <- "Agreement 1 SENDA"
metadata(codebook_data)$description <- "Information About Agreement 1 of SENDA and MINSAL"
Code
codebook(codebook_data)

Metadata

Description

Dataset name: Agreement 1 SENDA

Information About Agreement 1 of SENDA and MINSAL

Metadata for search engines
  • Temporal Coverage: 2010-2024

  • Spatial Coverage: Chile

  • Citation: Castillo-Carniglia, Á., González-Santa Cruz, A., & Agüero Jiménez, A. (2024). Workflow of data management and analysis - Chilean substance use treatment administrative data. Consolidation of Agreement 1 Databases from 2010 to 2024. Funded by FONDECYT regular 1191282.

  • URL: https://github.com/FONDECYTACC/cons2025

  • Date published: 2024-01-01

  • Creator:

name value
1 Person , Álvaro , Castillo-Carniglia , Organization , Universidad San Sebastián, Chile
2 Person , Andrés , González-Santa Cruz , gonzalez.santacruz.andres@gmail.com, Organization , Universidad de Chile, Public Health
3 Person , Amaru , Agüero Jiménez , Organization , Universidad del Desarrollo, Chile
x
TABLE
TABLE_rec_series
rn
rn_series
num_trat_ant
fecha_ultimo_tratamiento
hash_key
min_adm_age_rec3
adm_age_rec3
birth_date_rec
adm_date_num_rec2
adm_date_rec2
dit_rec6
disch_date_rec6
disch_date_num_rec6_trans
def_date
adm_motive
tr_compliance_rec7
adm_disch_reason
referral_type
plan_type
id_centro
senda
pub_center
primary_sub
second_sub1
second_sub2
second_sub3
prim_sub_freq
prim_sub_route
LB_age_primary_onset_rec2
UB_age_primary_onset_rec2
age_primary_onset_rec2
first_sub_used
sus_ini_mod_mvv
sus_ini_1
sus_ini_2
sus_ini_3
LB_age_subs_onset_rec2
UB_age_subs_onset_rec2
age_subs_onset_rec2
biopsych_comp
mod_psiq_cie_10
mod_psiq_dsm_iv
diagnostico_trs_fisico
otros_probl_at_sm_or
sub_dep_icd10_status
evaluacindelprocesoteraputico
eva_consumo
eva_fam
eva_relinterp
eva_ocupacion
eva_sm
eva_fisica
eva_transgnorma
dg_global_nec_int_soc_or
dg_nec_int_soc_cap_hum_or
dg_nec_int_soc_cap_fis_or
dg_nec_int_soc_cap_soc_or
dg_global_nec_int_soc_egr_or
dg_nec_int_soc_cap_hum_egr_or
dg_nec_int_soc_cap_fis_egr_or
dg_nec_int_soc_cap_soc_egr_or
usuario_tribunal_trat_droga
nationality_cons
ethnicity_c1_c6_historic
discapacidad
opcion_discapacidad
sex_rec
identidad_de_genero
orientacion_sexual
pregnant
pregnant_disch
marital_status
tiene_menores_de_edad_a_cargo
num_hijos_trat_res
numero_de_hijos
con_quien_vive
tipo_de_vivienda
precariedad_vivienda
tenure_status_household
servicios_basicos_95
perso_dormitorio_vivienda
ed_attainment
occupation_condition
occupation_status
rubro_trabaja
laboral_ingresos
yr_block
OBS_series
senda_series
pub_center_series
id_centro_series
disch_date_num_rec6
dg_psiq_cie_10_instudy
dg_psiq_cie_10_dg
dg_psiq_dsm_iv_instudy
dg_psiq_dsm_iv_dg
plan_type_series
tr_compliance_rec7_series
referral_type_series
adm_age_rec3_series
adm_date_rec2_series
disch_date_rec6_series
diagnostico_trs_fisico_series
otros_probl_at_sm_or_series
OBS
occupation_status_corr
occupation_condition_corr24
plan_type_corr
ed_attainment_corr
polysubstance_strict
icd10_diag1
icd10_diag2
icd10_diag3
dsmiv_diag1
dsmiv_diag2
dsmiv_diag3
adm_year
porc_pobr
clasificacion
km2_c
km2_log
porc_pobr_log
porc_pobr_c

Variables

TABLE

Year of data collection (original SISTRAT table/year identifier). Represents the year the record was entered (e.g., “2023”, “2015”). No transformation applied. Made in import_c1_top_data_adm_25.qmd

Distribution

Distribution of values for TABLE

Distribution of values for TABLE

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
TABLE Year of data collection (original SISTRAT table/year identifier). Represents the year the record was entered (e.g., “2023”, “2015”). No transformation applied. Made in import_c1_top_data_adm_25.qmd character 0 1 15 0 4 4 0

TABLE_rec_series

Recoded series of TABLE

Distribution

Distribution of values for TABLE_rec_series

Distribution of values for TABLE_rec_series

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
TABLE_rec_series Recoded series of TABLE character 0 1 400 0 8 50 0

rn

Row number or treatment episode sequence per patient. Created by row_number() over (partition by hash_key order by adm_date), Step 1 (SISTRAT23_c1_2010_2024_df2). Identifies first, second, etc., treatment for each user.

Distribution

Distribution of values for rn

Distribution of values for rn

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
rn Row number or treatment episode sequence per patient. Created by row_number() over (partition by hash_key order by adm_date), Step 1 (SISTRAT23_c1_2010_2024_df2). Identifies first, second, etc., treatment for each user. numeric 0 1 1 122890 259395 128243.1 78217.81 ▇▇▇▆▇

rn_series

Series of row number

Distribution

Distribution of values for rn_series

Distribution of values for rn_series

153200 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
rn_series Series of row number character 153200 0.0595284 9697 0 8 48 0

num_trat_ant

Number of prior treatments reported by patient. Direct import. May not be reliable. Not used.

Distribution

Distribution of values for num_trat_ant

Distribution of values for num_trat_ant

9266 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
num_trat_ant Number of prior treatments reported by patient. Direct import. May not be reliable. Not used. numeric 9266 0.9431174 0 1 11 0.898842 1.172059 ▇▁▁▁▁

fecha_ultimo_tratamiento

Patient-reported time since last treatment (e.g., “3 a 4 anos”). Direct import. Categorical, not numeric. Many missing values (~50%)

Distribution

Distribution of values for fecha_ultimo_tratamiento

Distribution of values for fecha_ultimo_tratamiento

80571 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
fecha_ultimo_tratamiento Patient-reported time since last treatment (e.g., “3 a 4 anos”). Direct import. Categorical, not numeric. Many missing values (~50%) character 80571 0.5053868 6 0 10 15 0

hash_key

Unique anonymized identifier for each individual (user/patient RUT derivative). Direct import from raw SISTRAT C1. Originally in uppercase letter. Generated by hashing the RUT (national ID) by MINSAL. Used to link multiple treatment episodes for the same person across years. No transformation.

Distribution

Distribution of values for hash_key

Distribution of values for hash_key

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
hash_key Unique anonymized identifier for each individual (user/patient RUT derivative). Direct import from raw SISTRAT C1. Originally in uppercase letter. Generated by hashing the RUT (national ID) by MINSAL. Used to link multiple treatment episodes for the same person across years. No transformation. character 0 1 121299 0 64 64 0

min_adm_age_rec3

min(adm_age_rec3) grouped by hash_key. Critical benchmark for validating/imputing onset ages.

Distribution

Distribution of values for min_adm_age_rec3

Distribution of values for min_adm_age_rec3

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
min_adm_age_rec3 min(adm_age_rec3) grouped by hash_key. Critical benchmark for validating/imputing onset ages. numeric 0 1 13 34 89 35.95135 11.04595 ▅▇▃▁▁

adm_age_rec3

Admission age [log1p(x)=adm_age_log, centered= adm_age_c,^2=adm_age_pow2, ^3=adm_age_pow3, three groups= adm_age_rec3_cat]

Distribution

Distribution of values for adm_age_rec3

Distribution of values for adm_age_rec3

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
adm_age_rec3 Admission age [log1p(x)=adm_age_log, centered= adm_age_c,^2=adm_age_pow2, ^3=adm_age_pow3, three groups= adm_age_rec3_cat] numeric 0 1 13 35 89 36.91851 11.07224 ▅▇▃▁▁

birth_date_rec

Final authoritative date of birth. Created by selecting/imputing best birth_date per hash_key (via kNN, logic, or min/max validation). Used to recalculate adm_age_rec.

Distribution

19660  unique, categorical values, so not shown.

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique min median max
birth_date_rec Final authoritative date of birth. Created by selecting/imputing best birth_date per hash_key (via kNN, logic, or min/max validation). Used to recalculate adm_age_rec. Date 0 1 19660 1928-09-18 1982-01-02 2008-04-16

adm_date_num_rec2

Numeric admission date recoded version 2

Distribution

Distribution of values for adm_date_num_rec2

Distribution of values for adm_date_num_rec2

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
adm_date_num_rec2 Numeric admission date recoded version 2 numeric 0 1 10957 17345 20077 17378.37 1528.861 ▁▁▆▇▆

adm_date_rec2

Intermediate admission date Created in Step 2 (0.c) for the resolution of overlaps

Distribution

5085  unique, categorical values, so not shown.

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique min median max
adm_date_rec2 Intermediate admission date Created in Step 2 (0.c) for the resolution of overlaps Date 0 1 5085 2000-01-01 2017-06-28 2024-12-20

dit_rec6

Length of stay in treatment (months) [log1p(x)=treat_log, ^2=treat_days_pow2, ^3=treat_days_pow3, <90days=treat_lt_90]

Distribution

Distribution of values for dit_rec6

Distribution of values for dit_rec6

3103 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
dit_rec6 Length of stay in treatment (months) [log1p(x)=treat_log, ^2=treat_days_pow2, ^3=treat_days_pow3, <90days=treat_lt_90] numeric 3103 0.9809512 0 180 1988 236.0513 199.5927 ▇▁▁▁▁

disch_date_rec6

Final discharge date for longitudinal analysis. Created as selected consistent version for time-to-event modeling.

Distribution

5335  unique, categorical values, so not shown.

3270 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique min median max
disch_date_rec6 Final discharge date for longitudinal analysis. Created as selected consistent version for time-to-event modeling. Date 3270 0.979926 5335 2003-01-01 2017-12-27 2025-05-28

disch_date_num_rec6_trans

Imputed discharge based on reference date Created in Step 3. Missing discharge dates were filled using a default value calculated from a fixed retrieval date (e.g., May 28, 2025), ensuring consistency across all treatment records and uncovering censorship mechanisms.

Distribution

Distribution of values for disch_date_num_rec6_trans

Distribution of values for disch_date_num_rec6_trans

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
disch_date_num_rec6_trans Imputed discharge based on reference date Created in Step 3. Missing discharge dates were filled using a default value calculated from a fixed retrieval date (e.g., May 28, 2025), ensuring consistency across all treatment records and uncovering censorship mechanisms. numeric 0 1 12053 17563 20236 17619.79 1539.775 ▁▂▇▇▇
Warning: Setting row names on a tibble is deprecated.

def_date

Date of death (if applicable, mostly NA). Created from external mortality linkage (not in provided scripts). Parsed from mortality registry (dia_def, mes_def, ano_def), imputing missing day as 15; earliest date kept when multiple. Used to censor discharges after death. See Duplicates_25_2.qmd (pre-00.a).

Distribution

2289  unique, categorical values, so not shown.

157391 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique min median max row expected actual
def_date Date of death (if applicable, mostly NA). Created from external mortality linkage (not in provided scripts). Parsed from mortality registry (dia_def, mes_def, ano_def), imputing missing day as 15; earliest date kept when multiple. Used to censor discharges after death. See Duplicates_25_2.qmd (pre-00.a). Date 157391 0.0338005 2289 2010-01-04 2018-04-19 2020-12-31 7864 date like %d-%m-%Y 15-NA-2008

adm_motive

Admission motive

Distribution

Distribution of values for adm_motive

Distribution of values for adm_motive

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
adm_motive Admission motive character 0 1 5 0 5 46 0

tr_compliance_rec7

Treatment compliance category. Created in Step 4. Removed cases (n=3) with an ongoing treatment motive but with a posterior treatment after. Replaced for dropout (they had incomplete discharge information)

Distribution

Distribution of values for tr_compliance_rec7

Distribution of values for tr_compliance_rec7

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
tr_compliance_rec7 Treatment compliance category. Created in Step 4. Removed cases (n=3) with an ongoing treatment motive but with a posterior treatment after. Replaced for dropout (they had incomplete discharge information) character 0 1 8 0 8 19 0

adm_disch_reason

Placeholder for administrative discharge reason (mostly empty). Created as copy of motivo_de_egreso_alta_administrativa. Derived/translated from motivo_de_egreso_alta_administrativa; listed among deduplication key variables in Step 2. 47%-56% missing in patients with adm. discharge.

Distribution

Distribution of values for adm_disch_reason

Distribution of values for adm_disch_reason

156643 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
adm_disch_reason Placeholder for administrative discharge reason (mostly empty). Created as copy of motivo_de_egreso_alta_administrativa. Derived/translated from motivo_de_egreso_alta_administrativa; listed among deduplication key variables in Step 2. 47%-56% missing in patients with adm. discharge. character 156643 0.0383924 4 0 5 16 0

referral_type

Recoded type of referral center. Created from tipo_centro_derivacion (translation): e.g., “otro centro” , “other facility”, “cosam”, “primary health care”.

Distribution

Distribution of values for referral_type

Distribution of values for referral_type

158565 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
referral_type Recoded type of referral center. Created from tipo_centro_derivacion (translation): e.g., “otro centro” , “other facility”, “cosam”, “primary health care”. character 158565 0.0265935 5 0 14 25 0

plan_type

Recoded treatment plan type. Created as copy and translation of tipo_de_plan (step 1). Used in collapsing logic and outcome modeling.

Distribution

Distribution of values for plan_type

Distribution of values for plan_type

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
plan_type Recoded treatment plan type. Created as copy and translation of tipo_de_plan (step 1). Used in collapsing logic and outcome modeling. character 0 1 6 0 4 6 0

id_centro

Center ID number. Direct import. Used to link center attributes, collapse duplicates, and derive centro_muj (women-specific centers).

Distribution

Distribution of values for id_centro

Distribution of values for id_centro

39 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
id_centro Center ID number. Direct import. Used to link center attributes, collapse duplicates, and derive centro_muj (women-specific centers). numeric 39 0.9997606 104 258 899 310.5774 177.9022 ▇▃▂▂▁

senda

Indicator if treatment was under SENDA agreement (“si”/“no”). Direct import. Used to derive senda_adm_date and flag SENDA-specific episodes. No transformation initially. According to SENDA professionals, patients may be in SENDA Yes programs may anyways part of SENDA programs in some cases

Distribution

Distribution of values for senda

Distribution of values for senda

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
senda Indicator if treatment was under SENDA agreement (“si”/“no”). Direct import. Used to derive senda_adm_date and flag SENDA-specific episodes. No transformation initially. According to SENDA professionals, patients may be in SENDA Yes programs may anyways part of SENDA programs in some cases character 0 1 2 0 2 2 0

pub_center

Binary indicator for public center. Created as tipo_centro == "publico". Used in institutional comparisons.

Distribution

Distribution of values for pub_center

Distribution of values for pub_center

39 missing values.

Summary statistics

name label data_type ordered value_labels n_missing complete_rate n_unique top_counts
pub_center Binary indicator for public center. Created as tipo_centro == "publico". Used in institutional comparisons. factor FALSE 1. FALSE,
2. TRUE
39 0.9997606 2 TRU: 112242, FAL: 50616

primary_sub

Primary substance of use

Distribution

Distribution of values for primary_sub

Distribution of values for primary_sub

4 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
primary_sub Primary substance of use character 4 0.9999754 11 0 6 27 0

second_sub1

Normalized secondary substances at admission (1)

Distribution

Distribution of values for second_sub1

Distribution of values for second_sub1

45932 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
second_sub1 Normalized secondary substances at admission (1) character 45932 0.7180304 12 0 6 33 0

second_sub2

Normalized secondary substances at admission (2)

Distribution

Distribution of values for second_sub2

Distribution of values for second_sub2

101461 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
second_sub2 Normalized secondary substances at admission (2) character 101461 0.3771463 12 0 6 33 0

second_sub3

Normalized secondary substances at admission (3)

Distribution

Distribution of values for second_sub3

Distribution of values for second_sub3

142611 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
second_sub3 Normalized secondary substances at admission (3) character 142611 0.1245327 12 0 6 33 0

prim_sub_freq

Frequency of primary substance use at admission (recoded: prim_sub_freq_rec)

Distribution

Distribution of values for prim_sub_freq

Distribution of values for prim_sub_freq

808 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
prim_sub_freq Frequency of primary substance use at admission (recoded: prim_sub_freq_rec) character 808 0.9950398 5 0 8 25 0

prim_sub_route

Recoded route of administration. Created from via_administracion_sustancia_principal into standardized terms.

Distribution

Distribution of values for prim_sub_route

Distribution of values for prim_sub_route

25 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
prim_sub_route Recoded route of administration. Created from via_administracion_sustancia_principal into standardized terms. character 25 0.9998465 5 0 6 41 0

LB_age_primary_onset_rec2

Lower bound age of primary substance onset recoded 2

Distribution

Distribution of values for LB_age_primary_onset_rec2

Distribution of values for LB_age_primary_onset_rec2

91849 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
LB_age_primary_onset_rec2 Lower bound age of primary substance onset recoded 2 numeric 91849 0.4361529 5 14 71 13.38267 4.697013 ▇▁▁▁▁

UB_age_primary_onset_rec2

Upper bound age of primary substance onset recoded 2

Distribution

Distribution of values for UB_age_primary_onset_rec2

Distribution of values for UB_age_primary_onset_rec2

91849 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
UB_age_primary_onset_rec2 Upper bound age of primary substance onset recoded 2 numeric 91849 0.4361529 13 31 82 32.69701 9.823931 ▅▇▃▁▁

age_primary_onset_rec2

Created in Step 3.

Distribution

Distribution of values for age_primary_onset_rec2

Distribution of values for age_primary_onset_rec2

1254 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
age_primary_onset_rec2 Created in Step 3. numeric 1254 0.9923019 5 18 75 20.2022 7.575139 ▇▅▁▁▁

first_sub_used

Normalized first substance used. Created as tolower(gsub("_", " ", sustancia_de_inicio)). Input for resolving inconsistencies, prioritizing most vulnerable value = sus_ini_mod_mvv. Despite this, this variable was normalized in Step 3 for inconsistent values in patients.

Distribution

Distribution of values for first_sub_used

Distribution of values for first_sub_used

10508 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
first_sub_used Normalized first substance used. Created as tolower(gsub("_", " ", sustancia_de_inicio)). Input for resolving inconsistencies, prioritizing most vulnerable value = sus_ini_mod_mvv. Despite this, this variable was normalized in Step 3 for inconsistent values in patients. character 10508 0.935493 11 0 6 27 0

sus_ini_mod_mvv

Sus ini mod mvv

Distribution

Distribution of values for sus_ini_mod_mvv

Distribution of values for sus_ini_mod_mvv

6684 missing values.

Summary statistics

name label data_type ordered value_labels n_missing complete_rate n_unique top_counts
sus_ini_mod_mvv Sus ini mod mvv factor FALSE 1. cocaine paste,
2. cocaine powder,
3. alcohol,
4. marijuana,
5. others
6684 0.9589679 5 alc: 98027, mar: 33833, coc: 12128, coc: 9512

sus_ini_1

Sus ini 1

Distribution

Distribution of values for sus_ini_1

Distribution of values for sus_ini_1

121960 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
sus_ini_1 Sus ini 1 character 121960 0.251306 10 0 6 27 0

sus_ini_2

Sus ini 2

Distribution

Distribution of values for sus_ini_2

Distribution of values for sus_ini_2

128480 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
sus_ini_2 Sus ini 2 character 128480 0.2112807 10 0 6 27 0

sus_ini_3

Sus ini 3

Distribution

Distribution of values for sus_ini_3

Distribution of values for sus_ini_3

157971 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
sus_ini_3 Sus ini 3 character 157971 0.03024 10 0 6 27 0

LB_age_subs_onset_rec2

Lower bound age of substance onset recoded 2

Distribution

Distribution of values for LB_age_subs_onset_rec2

Distribution of values for LB_age_subs_onset_rec2

91846 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
LB_age_subs_onset_rec2 Lower bound age of substance onset recoded 2 numeric 91846 0.4361713 5 5 5 5 0 ▁▁▇▁▁

UB_age_subs_onset_rec2

Upper bound age of substance onset recoded 2

Distribution

Distribution of values for UB_age_subs_onset_rec2

Distribution of values for UB_age_subs_onset_rec2

91846 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
UB_age_subs_onset_rec2 Upper bound age of substance onset recoded 2 numeric 91846 0.4361713 13 31 82 32.69688 9.823752 ▅▇▃▁▁

age_subs_onset_rec2

Created in Step 3.

Distribution

Distribution of values for age_subs_onset_rec2

Distribution of values for age_subs_onset_rec2

6675 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
age_subs_onset_rec2 Created in Step 3. numeric 6675 0.9590232 5 15 74 15.52318 5.12464 ▇▁▁▁▁

biopsych_comp

Recoded biopsychosocial severity. Created from compromiso_biopsicosocial: e.g., “moderado”, “2-moderate”.

Distribution

Distribution of values for biopsych_comp

Distribution of values for biopsych_comp

1947 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
biopsych_comp Recoded biopsychosocial severity. Created from compromiso_biopsicosocial: e.g., “moderado”, “2-moderate”. character 1947 0.9880477 3 0 6 10 0

mod_psiq_cie_10

Created in Step 3, concatenated main and sub-dg to allow multiple diagnoses

Distribution

Distribution of values for mod_psiq_cie_10

Distribution of values for mod_psiq_cie_10

83784 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
mod_psiq_cie_10 Created in Step 3, concatenated main and sub-dg to allow multiple diagnoses character 83784 0.4856627 1083 0 18 525 0

mod_psiq_dsm_iv

Created in Step 3, concatenated main and sub-dg to allow multiple diagnoses

Distribution

Distribution of values for mod_psiq_dsm_iv

Distribution of values for mod_psiq_dsm_iv

148878 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
mod_psiq_dsm_iv Created in Step 3, concatenated main and sub-dg to allow multiple diagnoses character 148878 0.0860605 744 0 6 403 0

diagnostico_trs_fisico

Physical health diagnosis. Direct import. Often concatenated (e.g., “Hepatitis alcoholica: cardiopatías”). Preserved as-is for clinical profiling.

Distribution

Distribution of values for diagnostico_trs_fisico

Distribution of values for diagnostico_trs_fisico

310 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
diagnostico_trs_fisico Physical health diagnosis. Direct import. Often concatenated (e.g., “Hepatitis alcoholica: cardiopatías”). Preserved as-is for clinical profiling. character 310 0.998097 15 0 3 60 0

otros_probl_at_sm_or

Other mental health issues (e.g., “abuso sexual”). Direct import. Recoded to otros_probl_at_sm_or. Used in vulnerability and trauma profiling.

Distribution

Distribution of values for otros_probl_at_sm_or

Distribution of values for otros_probl_at_sm_or

34796 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
otros_probl_at_sm_or Other mental health issues (e.g., “abuso sexual”). Direct import. Recoded to otros_probl_at_sm_or. Used in vulnerability and trauma profiling. character 34796 0.7863926 10 0 3 37 0

sub_dep_icd10_status

Severity of Substance Use Disorder (SUD)

Distribution

Distribution of values for sub_dep_icd10_status

Distribution of values for sub_dep_icd10_status

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
sub_dep_icd10_status Severity of Substance Use Disorder (SUD) character 0 1 2 0 15 21 0

evaluacindelprocesoteraputico

Therapeutic process evaluation (e.g., “logro alto”). Direct import. Recoded to standardized categories. Used in outcome and program effectiveness analysis.

Distribution

Distribution of values for evaluacindelprocesoteraputico

Distribution of values for evaluacindelprocesoteraputico

3599 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
evaluacindelprocesoteraputico Therapeutic process evaluation (e.g., “logro alto”). Direct import. Recoded to standardized categories. Used in outcome and program effectiveness analysis. character 3599 0.9779063 3 0 10 16 0

eva_consumo

Evaluation of consumption pattern at discharge. Direct import. Part of multi-domain outcome assessment. Recoded similarly.

Distribution

Distribution of values for eva_consumo

Distribution of values for eva_consumo

3621 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
eva_consumo Evaluation of consumption pattern at discharge. Direct import. Part of multi-domain outcome assessment. Recoded similarly. character 3621 0.9777712 3 0 10 16 0

eva_fam

Family situation evaluation at discharge. Direct import.

Distribution

Distribution of values for eva_fam

Distribution of values for eva_fam

3621 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
eva_fam Family situation evaluation at discharge. Direct import. character 3621 0.9777712 3 0 10 16 0

eva_relinterp

Interpersonal relations evaluation at discharge. Direct import.

Distribution

Distribution of values for eva_relinterp

Distribution of values for eva_relinterp

3621 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
eva_relinterp Interpersonal relations evaluation at discharge. Direct import. character 3621 0.9777712 3 0 10 16 0

eva_ocupacion

Occupational situation evaluation at discharge. Direct import.

Distribution

Distribution of values for eva_ocupacion

Distribution of values for eva_ocupacion

3621 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
eva_ocupacion Occupational situation evaluation at discharge. Direct import. character 3621 0.9777712 3 0 10 16 0

eva_sm

Mental health evaluation at discharge. Direct import.

Distribution

Distribution of values for eva_sm

Distribution of values for eva_sm

3621 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
eva_sm Mental health evaluation at discharge. Direct import. character 3621 0.9777712 3 0 10 16 0

eva_fisica

Physical health evaluation at discharge. Direct import.

Distribution

Distribution of values for eva_fisica

Distribution of values for eva_fisica

3621 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
eva_fisica Physical health evaluation at discharge. Direct import. character 3621 0.9777712 3 0 10 16 0

eva_transgnorma

Social norm transgression evaluation at discharge. Direct import.

Distribution

Distribution of values for eva_transgnorma

Distribution of values for eva_transgnorma

3621 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
eva_transgnorma Social norm transgression evaluation at discharge. Direct import. character 3621 0.9777712 3 0 10 16 0

dg_global_nec_int_soc_or

Global social integration need at admission. Direct import. Recoded to dg_global_nec_int_soc_or. Categorized as “altas”, “medias”, “bajas”. Used in social vulnerability index.

Distribution

Distribution of values for dg_global_nec_int_soc_or

Distribution of values for dg_global_nec_int_soc_or

47767 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
dg_global_nec_int_soc_or Global social integration need at admission. Direct import. Recoded to dg_global_nec_int_soc_or. Categorized as “altas”, “medias”, “bajas”. Used in social vulnerability index. character 47767 0.7067656 5 0 5 12 0

dg_nec_int_soc_cap_hum_or

Human capital need at admission. Direct import. Recoded to dg_nec_int_soc_cap_hum_or. Part of social integration profile.

Distribution

Distribution of values for dg_nec_int_soc_cap_hum_or

Distribution of values for dg_nec_int_soc_cap_hum_or

47768 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
dg_nec_int_soc_cap_hum_or Human capital need at admission. Direct import. Recoded to dg_nec_int_soc_cap_hum_or. Part of social integration profile. character 47768 0.7067595 5 0 5 12 0

dg_nec_int_soc_cap_fis_or

Physical capital need at admission. Direct import. Recoded to dg_nec_int_soc_cap_fis_or.

Distribution

Distribution of values for dg_nec_int_soc_cap_fis_or

Distribution of values for dg_nec_int_soc_cap_fis_or

47767 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
dg_nec_int_soc_cap_fis_or Physical capital need at admission. Direct import. Recoded to dg_nec_int_soc_cap_fis_or. character 47767 0.7067656 5 0 5 12 0

dg_nec_int_soc_cap_soc_or

Social capital need at admission. Direct import. Recoded to dg_nec_int_soc_cap_soc_or.

Distribution

Distribution of values for dg_nec_int_soc_cap_soc_or

Distribution of values for dg_nec_int_soc_cap_soc_or

47767 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
dg_nec_int_soc_cap_soc_or Social capital need at admission. Direct import. Recoded to dg_nec_int_soc_cap_soc_or. character 47767 0.7067656 5 0 5 12 0

dg_global_nec_int_soc_egr_or

Global social integration need at discharge. Direct import. Recoded similarly to admission version (_60). Used for delta/change analysis.

Distribution

Distribution of values for dg_global_nec_int_soc_egr_or

Distribution of values for dg_global_nec_int_soc_egr_or

45847 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
dg_global_nec_int_soc_egr_or Global social integration need at discharge. Direct import. Recoded similarly to admission version (_60). Used for delta/change analysis. character 45847 0.7185522 5 0 5 12 0

dg_nec_int_soc_cap_hum_egr_or

Human capital need at discharge. Direct import.

Distribution

Distribution of values for dg_nec_int_soc_cap_hum_egr_or

Distribution of values for dg_nec_int_soc_cap_hum_egr_or

45165 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
dg_nec_int_soc_cap_hum_egr_or Human capital need at discharge. Direct import. character 45165 0.7227389 5 0 5 12 0

dg_nec_int_soc_cap_fis_egr_or

Physical capital need at discharge. Direct import.

Distribution

Distribution of values for dg_nec_int_soc_cap_fis_egr_or

Distribution of values for dg_nec_int_soc_cap_fis_egr_or

45144 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
dg_nec_int_soc_cap_fis_egr_or Physical capital need at discharge. Direct import. character 45144 0.7228678 5 0 5 12 0

dg_nec_int_soc_cap_soc_egr_or

Social capital need at discharge. Direct import.

Distribution

Distribution of values for dg_nec_int_soc_cap_soc_egr_or

Distribution of values for dg_nec_int_soc_cap_soc_egr_or

45285 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
dg_nec_int_soc_cap_soc_egr_or Social capital need at discharge. Direct import. character 45285 0.7220022 5 0 5 12 0

usuario_tribunal_trat_droga

Court-referred to drug treatment (very unbalanced, ~1% yes)

Distribution

Distribution of values for usuario_tribunal_trat_droga

Distribution of values for usuario_tribunal_trat_droga

5888 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
usuario_tribunal_trat_droga Court-referred to drug treatment (very unbalanced, ~1% yes) character 5888 0.9638545 2 0 2 2 0

nationality_cons

Recoded/standardized from nacionalidad or pais_nacimiento.

Distribution

Distribution of values for nationality_cons

Distribution of values for nationality_cons

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
nationality_cons Recoded/standardized from nacionalidad or pais_nacimiento. character 0 1 83 0 4 31 0

ethnicity_c1_c6_historic

Concatenated unique non-Chilean/non-“no pertenece” values from etnia (C1) and ethnicity (C6), grouped by hash_key.

Distribution

Distribution of values for ethnicity_c1_c6_historic

Distribution of values for ethnicity_c1_c6_historic

150808 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
ethnicity_c1_c6_historic Concatenated unique non-Chilean/non-“no pertenece” values from etnia (C1) and ethnicity (C6), grouped by hash_key. character 150808 0.0742125 31 0 5 22 0

discapacidad

Indicator of disability (“si”/“no”). Direct import. Used with opcion_discapacidad for disability profiling. Starts coding in 2019

Distribution

Distribution of values for discapacidad

Distribution of values for discapacidad

99545 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
discapacidad Indicator of disability (“si”/“no”). Direct import. Used with opcion_discapacidad for disability profiling. Starts coding in 2019 character 99545 0.3889083 2 0 2 2 0

opcion_discapacidad

Type of disability (e.g., “de causa psiquica”). Direct import. Only populated if discapacidad == "si". Used in accessibility and needs assessment.Starts coding in 2019

Distribution

Distribution of values for opcion_discapacidad

Distribution of values for opcion_discapacidad

160052 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
opcion_discapacidad Type of disability (e.g., “de causa psiquica”). Direct import. Only populated if discapacidad == "si". Used in accessibility and needs assessment.Starts coding in 2019 character 160052 0.017465 5 0 16 21 0

sex_rec

Sex (often used for biological categorization)

Distribution

Distribution of values for sex_rec

Distribution of values for sex_rec

4 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
sex_rec Sex (often used for biological categorization) character 4 0.9999754 2 0 5 6 0

identidad_de_genero

Gender identity (e.g., “masculino”, “femenino”). Direct import. Recoded to identidad_de_genero factor. Used to validate/override sexo in sex_rec (Step 2). Starts coding in 2019

Distribution

Distribution of values for identidad_de_genero

Distribution of values for identidad_de_genero

99545 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
identidad_de_genero Gender identity (e.g., “masculino”, “femenino”). Direct import. Recoded to identidad_de_genero factor. Used to validate/override sexo in sex_rec (Step 2). Starts coding in 2019 character 99545 0.3889083 8 0 8 19 0

orientacion_sexual

Sexual orientation (e.g., “heterosexual”). Direct import. Sparse. Preserved for inclusivity analysis.Starts coding in 2022

Distribution

Distribution of values for orientacion_sexual

Distribution of values for orientacion_sexual

129745 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
orientacion_sexual Sexual orientation (e.g., “heterosexual”). Direct import. Sparse. Preserved for inclusivity analysis.Starts coding in 2022 character 129745 0.2035151 5 0 7 12 0

pregnant

Recoded pregnancy status at admission. Created from se_trata_de_una_mujer_embarazada (NA = “no”). Used in gender-specific analysis (Step 3). This variable (indicating if a woman is pregnant) seems to beintroduced in 2011.

Distribution

Distribution of values for pregnant

Distribution of values for pregnant

87172 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
pregnant Recoded pregnancy status at admission. Created from se_trata_de_una_mujer_embarazada (NA = “no”). Used in gender-specific analysis (Step 3). This variable (indicating if a woman is pregnant) seems to beintroduced in 2011. character 87172 0.4648643 2 0 2 3 0

pregnant_disch

Recoded pregnancy status at discharge. Created from ha_estado_embarazada_egreso. Used in gender-specific analysis (Step 3). Sparse. Seems to be only accurately collected starting in 2017. Created to capture pregnancy status specifically at treatment discharge.

Distribution

Distribution of values for pregnant_disch

Distribution of values for pregnant_disch

100935 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
pregnant_disch Recoded pregnancy status at discharge. Created from ha_estado_embarazada_egreso. Used in gender-specific analysis (Step 3). Sparse. Seems to be only accurately collected starting in 2017. Created to capture pregnancy status specifically at treatment discharge. character 100935 0.3803753 2 0 2 3 0

marital_status

Marital status

Distribution

Distribution of values for marital_status

Distribution of values for marital_status

317 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
marital_status Marital status character 317 0.998054 4 0 6 27 0

tiene_menores_de_edad_a_cargo

Indicator if responsible for minors (“si”/“no”). Direct import. Mostly missing before 2015 (Duplicates_25.txt). Recoded to binary flag. Used in family vulnerability index.

Distribution

Distribution of values for tiene_menores_de_edad_a_cargo

Distribution of values for tiene_menores_de_edad_a_cargo

61317 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
tiene_menores_de_edad_a_cargo Indicator if responsible for minors (“si”/“no”). Direct import. Mostly missing before 2015 (Duplicates_25.txt). Recoded to binary flag. Used in family vulnerability index. character 61317 0.6235842 2 0 2 2 0

num_hijos_trat_res

Number of children entering residential treatment with patient. Direct import. Often missing or 0. Later used to derive binary flag hijos_trat_res (“Had children in treatments”).

Distribution

Distribution of values for num_hijos_trat_res

Distribution of values for num_hijos_trat_res

9266 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
num_hijos_trat_res Number of children entering residential treatment with patient. Direct import. Often missing or 0. Later used to derive binary flag hijos_trat_res (“Had children in treatments”). numeric 9266 0.9431174 0 0 11 0.0478289 0.2730079 ▇▁▁▁▁

numero_de_hijos

Number of children reported by patient. Direct import. Values >11 flagged as implausible (Duplicates_25.txt). Used in kNN imputation and to derive tiene_menores_de_edad_a_cargo.

Distribution

Distribution of values for numero_de_hijos

Distribution of values for numero_de_hijos

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
numero_de_hijos Number of children reported by patient. Direct import. Values >11 flagged as implausible (Duplicates_25.txt). Used in kNN imputation and to derive tiene_menores_de_edad_a_cargo. numeric 0 1 0 1 71 1.663032 1.705014 ▇▁▁▁▁

con_quien_vive

Living arrangement/Cohabitation status

Distribution

Distribution of values for con_quien_vive

Distribution of values for con_quien_vive

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
con_quien_vive Living arrangement/Cohabitation status character 0 1 15 0 4 53 0

tipo_de_vivienda

Type of housing

Distribution

Distribution of values for tipo_de_vivienda

Distribution of values for tipo_de_vivienda

12176 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
tipo_de_vivienda Type of housing character 12176 0.9252534 10 0 4 37 0

precariedad_vivienda

Precarious housing. Direct import. Available since 2022 databases.

Distribution

Distribution of values for precariedad_vivienda

Distribution of values for precariedad_vivienda

130860 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
precariedad_vivienda Precarious housing. Direct import. Available since 2022 databases. character 130860 0.1966703 2 0 37 82 0

tenure_status_household

Housing situation

Distribution

Distribution of values for tenure_status_household

Distribution of values for tenure_status_household

7738 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
tenure_status_household Housing situation character 7738 0.9524976 5 0 6 42 0

servicios_basicos_95

Basic sanitation services. Direct import. Available since 2022 databases.

Distribution

Distribution of values for servicios_basicos_95

Distribution of values for servicios_basicos_95

130860 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
servicios_basicos_95 Basic sanitation services. Direct import. Available since 2022 databases. character 130860 0.1966703 2 0 55 135 0

perso_dormitorio_vivienda

Persons per dwelling bedroom Direct import. Available since 2022 databases.

Distribution

Distribution of values for perso_dormitorio_vivienda

Distribution of values for perso_dormitorio_vivienda

130860 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
perso_dormitorio_vivienda Persons per dwelling bedroom Direct import. Available since 2022 databases. character 130860 0.1966703 2 0 20 20 0

ed_attainment

Recoded education level. Created from escolaridad_ultimo_ano_cursado into standardized categories (e.g., “media incompleta” = “2-Completed high school or less”).

Distribution

Distribution of values for ed_attainment

Distribution of values for ed_attainment

1105 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
ed_attainment Recoded education level. Created from escolaridad_ultimo_ano_cursado into standardized categories (e.g., “media incompleta” = “2-Completed high school or less”). character 1105 0.9932166 3 0 23 34 0

occupation_condition

Recoded employment status. Created from condicion_ocupacional: e.g., “cesante”= “unemployed” (in Step 1).

Distribution

Distribution of values for occupation_condition

Distribution of values for occupation_condition

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
occupation_condition Recoded employment status. Created from condicion_ocupacional: e.g., “cesante”= “unemployed” (in Step 1). character 0 1 3 0 8 10 0

occupation_status

Recoded employment category. Created from categoria_ocupacional. Sparse; often NA.

Distribution

Distribution of values for occupation_status

Distribution of values for occupation_status

107379 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
occupation_status Recoded employment category. Created from categoria_ocupacional. Sparse; often NA. character 107379 0.3408166 6 0 5 20 0

rubro_trabaja

Occupational sector (e.g., “trabajadores no calificados”). Direct import. Recoded to rubro_trabaja_mod. 68% missing. 13.8% among employeed

Distribution

Distribution of values for rubro_trabaja

Distribution of values for rubro_trabaja

109056 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
rubro_trabaja Occupational sector (e.g., “trabajadores no calificados”). Direct import. Recoded to rubro_trabaja_mod. 68% missing. 13.8% among employeed character 109056 0.3305217 11 0 21 77 0

laboral_ingresos

Monthly income. Direct import. Available since 2022 databases.

Distribution

Distribution of values for laboral_ingresos

Distribution of values for laboral_ingresos

148124 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
laboral_ingresos Monthly income. Direct import. Available since 2022 databases. character 148124 0.0906892 11 0 16 19 0

yr_block

Birth year block (e.g., 1980, 1990) for anonymization or grouping. Created by rounding birth_date year to nearest decade (e.g., floor(year(birth_date)/10)*10). To look for data similarities (Step 1)

Distribution

Distribution of values for yr_block

Distribution of values for yr_block

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
yr_block Birth year block (e.g., 1980, 1990) for anonymization or grouping. Created by rounding birth_date year to nearest decade (e.g., floor(year(birth_date)/10)*10). To look for data similarities (Step 1) numeric 0 1 1925 1980 2005 1978.161 11.40454 ▁▁▅▇▃

OBS_series

OBS series

Distribution

Distribution of values for OBS_series

Distribution of values for OBS_series

153200 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
OBS_series OBS series character 153200 0.0595284 993 0 45 999 0

senda_series

SENDA series

Distribution

Distribution of values for senda_series

Distribution of values for senda_series

153200 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
senda_series SENDA series character 153200 0.0595284 4 0 2 5 0

pub_center_series

Public center series

Distribution

Distribution of values for pub_center_series

Distribution of values for pub_center_series

153200 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
pub_center_series Public center series character 153200 0.0595284 4 0 4 10 0

id_centro_series

Center ID series

Distribution

Distribution of values for id_centro_series

Distribution of values for id_centro_series

153200 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
id_centro_series Center ID series character 153200 0.0595284 2773 0 3 15 0

disch_date_num_rec6

Numeric discharge date recoded 6

Distribution

Distribution of values for disch_date_num_rec6

Distribution of values for disch_date_num_rec6

3270 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
disch_date_num_rec6 Numeric discharge date recoded 6 numeric 3270 0.979926 12053 17527 20236 17566.2 1508.771 ▁▂▇▇▇

dg_psiq_cie_10_instudy

Psychiatric comorbidity (ICD-10)

Distribution

Distribution of values for dg_psiq_cie_10_instudy

Distribution of values for dg_psiq_cie_10_instudy

0 missing values.

Summary statistics

name label data_type n_missing complete_rate count mean
dg_psiq_cie_10_instudy Psychiatric comorbidity (ICD-10) logical 0 1 FAL: 136904, TRU: 25993 0.1595671

dg_psiq_cie_10_dg

Mutually exclusive (as corrected in Step 3)

Distribution

Distribution of values for dg_psiq_cie_10_dg

Distribution of values for dg_psiq_cie_10_dg

0 missing values.

Summary statistics

name label data_type n_missing complete_rate count mean
dg_psiq_cie_10_dg Mutually exclusive (as corrected in Step 3) logical 0 1 FAL: 83784, TRU: 79113 0.4856627

dg_psiq_dsm_iv_instudy

Psychiatric comorbidity (DSM-IV)

Distribution

Distribution of values for dg_psiq_dsm_iv_instudy

Distribution of values for dg_psiq_dsm_iv_instudy

0 missing values.

Summary statistics

name label data_type n_missing complete_rate count mean
dg_psiq_dsm_iv_instudy Psychiatric comorbidity (DSM-IV) logical 0 1 FAL: 91558, TRU: 71339 0.4379393

dg_psiq_dsm_iv_dg

Mutually exclusive (as corrected in Step 3)

Distribution

Distribution of values for dg_psiq_dsm_iv_dg

Distribution of values for dg_psiq_dsm_iv_dg

0 missing values.

Summary statistics

name label data_type n_missing complete_rate count mean
dg_psiq_dsm_iv_dg Mutually exclusive (as corrected in Step 3) logical 0 1 FAL: 148878, TRU: 14019 0.0860605

plan_type_series

Plan type series

Distribution

Distribution of values for plan_type_series

Distribution of values for plan_type_series

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
plan_type_series Plan type series character 0 1 184 0 8 51 0

tr_compliance_rec7_series

Treatment compliance recoded 7 series

Distribution

Distribution of values for tr_compliance_rec7_series

Distribution of values for tr_compliance_rec7_series

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
tr_compliance_rec7_series Treatment compliance recoded 7 series character 0 1 40 0 12 77 0

referral_type_series

Referral type series

Distribution

Distribution of values for referral_type_series

Distribution of values for referral_type_series

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
referral_type_series Referral type series character 0 1 127 0 6 108 0

adm_age_rec3_series

Admission age recoded 3 series

Distribution

Distribution of values for adm_age_rec3_series

Distribution of values for adm_age_rec3_series

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
adm_age_rec3_series Admission age recoded 3 series character 0 1 15333 0 6 51 0

adm_date_rec2_series

Admission date recoded 2 series

Distribution

Distribution of values for adm_date_rec2_series

Distribution of values for adm_date_rec2_series

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
adm_date_rec2_series Admission date recoded 2 series character 0 1 14635 0 14 86 0

disch_date_rec6_series

Discharge date recoded 6 series

Distribution

Distribution of values for disch_date_rec6_series

Distribution of values for disch_date_rec6_series

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
disch_date_rec6_series Discharge date recoded 6 series character 0 1 14768 0 6 86 0

diagnostico_trs_fisico_series

Physical diagnosis series

Distribution

Distribution of values for diagnostico_trs_fisico_series

Distribution of values for diagnostico_trs_fisico_series

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
diagnostico_trs_fisico_series Physical diagnosis series character 0 1 362 0 6 198 0

otros_probl_at_sm_or_series

Other mental health problems series

Distribution

Distribution of values for otros_probl_at_sm_or_series

Distribution of values for otros_probl_at_sm_or_series

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
otros_probl_at_sm_or_series Other mental health problems series character 0 1 431 0 6 168 0

OBS

Observed flags or notes from cleaning (e.g., “Duplicated Cases”). Created during duplicate detection and resolution. Contains manual or automated flags for data quality issues, removed entries, etc. It is separated by semi-colons

Distribution

Distribution of values for OBS

Distribution of values for OBS

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
OBS Observed flags or notes from cleaning (e.g., “Duplicated Cases”). Created during duplicate detection and resolution. Contains manual or automated flags for data quality issues, removed entries, etc. It is separated by semi-colons character 0 1 1053 1 0 634 0

occupation_status_corr

Corrected employment status Created in Step 4. Only kept for employed; nulled for inactive/unemployed; filled as ‘other’ if missing. Depends on occupation_condition_inferred, occupation_status.

Distribution

Distribution of values for occupation_status_corr

Distribution of values for occupation_status_corr

85961 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
occupation_status_corr Corrected employment status Created in Step 4. Only kept for employed; nulled for inactive/unemployed; filled as ‘other’ if missing. Depends on occupation_condition_inferred, occupation_status. character 85961 0.4722984 6 0 5 20 0

occupation_condition_corr24

Employment status

Distribution

Distribution of values for occupation_condition_corr24

Distribution of values for occupation_condition_corr24

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
occupation_condition_corr24 Employment status character 0 1 3 0 8 10 0

plan_type_corr

Treatment modality

Distribution

Distribution of values for plan_type_corr

Distribution of values for plan_type_corr

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
plan_type_corr Treatment modality character 0 1 5 0 4 6 0

ed_attainment_corr

Educational attainment

Distribution

Distribution of values for ed_attainment_corr

Distribution of values for ed_attainment_corr

1105 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
ed_attainment_corr Educational attainment character 1105 0.9932166 3 0 23 34 0

polysubstance_strict

Polysubstance use

Distribution

Distribution of values for polysubstance_strict

Distribution of values for polysubstance_strict

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
polysubstance_strict Polysubstance use numeric 0 1 0 1 1 0.7185522 0.4497068 ▃▁▁▁▇

icd10_diag1

ICD10 diagnosis 1 Created in Step 4. Separated from mod_psiq_cie_10 by “::”. Depends on mod_psiq_cie_10.

Distribution

Distribution of values for icd10_diag1

Distribution of values for icd10_diag1

148878 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
icd10_diag1 ICD10 diagnosis 1 Created in Step 4. Separated from mod_psiq_cie_10 by “::”. Depends on mod_psiq_cie_10. character 148878 0.0860605 94 0 6 149 0

icd10_diag2

ICD10 diagnosis 2 Created in Step 4. Separated from mod_psiq_cie_10 by “::”. Depends on mod_psiq_cie_10.

Distribution

Distribution of values for icd10_diag2

Distribution of values for icd10_diag2

161349 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
icd10_diag2 ICD10 diagnosis 2 Created in Step 4. Separated from mod_psiq_cie_10 by “::”. Depends on mod_psiq_cie_10. character 161349 0.0095029 84 0 7 202 0

icd10_diag3

ICD10 diagnosis 3 Created in Step 4. Separated from mod_psiq_cie_10 “::”. Depends on mod_psiq_cie_10.

Distribution

Distribution of values for icd10_diag3

Distribution of values for icd10_diag3

162566 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
icd10_diag3 ICD10 diagnosis 3 Created in Step 4. Separated from mod_psiq_cie_10 “::”. Depends on mod_psiq_cie_10. character 162566 0.002032 46 0 25 129 0

dsmiv_diag1

DSMIV diagnosis 1 Created in Step 4. Separated from mod_psiq_dsm_iv “::”. Depends on mod_psiq_dsm_iv.

Distribution

Distribution of values for dsmiv_diag1

Distribution of values for dsmiv_diag1

148878 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
dsmiv_diag1 DSMIV diagnosis 1 Created in Step 4. Separated from mod_psiq_dsm_iv “::”. Depends on mod_psiq_dsm_iv. character 148878 0.0860605 94 0 6 149 0

dsmiv_diag2

DSMIV diagnosis 2 Created in Step 4. Separated from mod_psiq_dsm_iv “::”. Depends on mod_psiq_dsm_iv.

Distribution

Distribution of values for dsmiv_diag2

Distribution of values for dsmiv_diag2

161349 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
dsmiv_diag2 DSMIV diagnosis 2 Created in Step 4. Separated from mod_psiq_dsm_iv “::”. Depends on mod_psiq_dsm_iv. character 161349 0.0095029 84 0 7 202 0

dsmiv_diag3

DSMIV diagnosis 3 Created in Step 4. Separated from mod_psiq_dsm_iv “::”. Depends on mod_psiq_dsm_iv.

Distribution

Distribution of values for dsmiv_diag3

Distribution of values for dsmiv_diag3

162566 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
dsmiv_diag3 DSMIV diagnosis 3 Created in Step 4. Separated from mod_psiq_dsm_iv “::”. Depends on mod_psiq_dsm_iv. character 162566 0.002032 46 0 25 129 0

adm_year

Admission year

Distribution

Distribution of values for adm_year

Distribution of values for adm_year

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
adm_year Admission year numeric 0 1 2000 2017 2024 2017.116 4.182583 ▁▁▆▇▆

porc_pobr

Poverty index of the commune of residence [log1p(x)=porc_pobr_log, centered= porc_pobr_c, six quantile groups= porc_pobr_c_cat6]

Distribution

Distribution of values for porc_pobr

Distribution of values for porc_pobr

3 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
porc_pobr Poverty index of the commune of residence [log1p(x)=porc_pobr_log, centered= porc_pobr_c, six quantile groups= porc_pobr_c_cat6] numeric 3 0.9999816 0.0017 0.13 0.64 0.1418689 0.0648752 ▇▇▁▁▁

clasificacion

Urbanization level of the commune of residence

Distribution

Distribution of values for clasificacion

Distribution of values for clasificacion

1 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
clasificacion Urbanization level of the commune of residence character 1 0.9999939 3 0 5 6 0

km2_c

Centered km^2 Created in Step 4. Created in Step 4. km_2 - mean(km_2). Depends on km_2.km_2 - mean(km_2). Depends on km_2.

Distribution

Distribution of values for km2_c

Distribution of values for km2_c

1 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
km2_c Centered km^2 Created in Step 4. Created in Step 4. km_2 - mean(km_2). Depends on km_2.km_2 - mean(km_2). Depends on km_2. numeric 1 0.9999939 -2206 -1981 46483 0 5897.095 ▇▁▁▁▁

km2_log

Log km^2 Created in Step 4. Created in Step 4. km_2 - mean(km_2). Depends on km_2.log1p(km_2). Depends on km_2.

Distribution

Distribution of values for km2_log

Distribution of values for km2_log

1 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
km2_log Log km^2 Created in Step 4. Created in Step 4. km_2 - mean(km_2). Depends on km_2.log1p(km_2). Depends on km_2. numeric 1 0.9999939 2.1 5.5 11 5.555289 2.179414 ▆▇▇▃▂

porc_pobr_log

Log poverty Created in Step 3. Re-joined in Step 4. log1p(porc_pobr). Depends on porc_pobr.

Distribution

Distribution of values for porc_pobr_log

Distribution of values for porc_pobr_log

3 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
porc_pobr_log Log poverty Created in Step 3. Re-joined in Step 4. log1p(porc_pobr). Depends on porc_pobr. numeric 3 0.9999816 0.0017 0.12 0.49 0.1311123 0.0552621 ▅▇▂▁▁

porc_pobr_c

Centered poverty Created in Step 3. Re-joined in Step 4. porc_pobr - mean(porc_pobr). Depends on porc_pobr.

Distribution

Distribution of values for porc_pobr_c

Distribution of values for porc_pobr_c

3 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
porc_pobr_c Centered poverty Created in Step 3. Re-joined in Step 4. porc_pobr - mean(porc_pobr). Depends on porc_pobr. numeric 3 0.9999816 -0.14 -0.0098 0.49 0 0.0648752 ▇▇▁▁▁

Missingness report

Codebook table

JSON-LD metadata

The following JSON-LD can be found by search engines, if you share this codebook publicly on the web.

{
  "datePublished": "2024-01-01",
  "temporalCoverage": "2010-2024",
  "spatialCoverage": "Chile",
  "creator": [
    {
      "@type": "Person",
      "givenName": "Álvaro",
      "familyName": "Castillo-Carniglia",
      "affiliation": {
        "@type": "Organization",
        "name": "Universidad San Sebastián, Chile"
      }
    },
    {
      "@type": "Person",
      "givenName": "Andrés",
      "familyName": "González-Santa Cruz",
      "email": "gonzalez.santacruz.andres@gmail.com",
      "affiliation": {
        "@type": "Organization",
        "name": "Universidad de Chile, Public Health"
      }
    },
    {
      "@type": "Person",
      "givenName": "Amaru",
      "familyName": "Agüero Jiménez",
      "affiliation": {
        "@type": "Organization",
        "name": "Universidad del Desarrollo, Chile"
      }
    }
  ],
  "citation": "Castillo-Carniglia, Á., González-Santa Cruz, A., & Agüero Jiménez, A. (2024). Workflow of data management and analysis - Chilean substance use treatment administrative data. Consolidation of Agreement 1 Databases from 2010 to 2024. Funded by FONDECYT regular 1191282.",
  "url": "https://github.com/FONDECYTACC/cons2025",
  "name": "Agreement 1 SENDA",
  "description": "Information About Agreement 1 of SENDA and MINSAL\n\n\n## Table of variables\nThis table contains variable names, labels, and number of missing values.\nSee the complete codebook for more.\n\n[truncated]\n\n### Note\nThis dataset was automatically described using the [codebook R package](https://rubenarslan.github.io/codebook/) (version 0.9.6).",
  "keywords": ["TABLE", "TABLE_rec_series", "rn", "rn_series", "num_trat_ant", "fecha_ultimo_tratamiento", "hash_key", "min_adm_age_rec3", "adm_age_rec3", "birth_date_rec", "adm_date_num_rec2", "adm_date_rec2", "dit_rec6", "disch_date_rec6", "disch_date_num_rec6_trans", "def_date", "adm_motive", "tr_compliance_rec7", "adm_disch_reason", "referral_type", "plan_type", "id_centro", "senda", "pub_center", "primary_sub", "second_sub1", "second_sub2", "second_sub3", "prim_sub_freq", "prim_sub_route", "LB_age_primary_onset_rec2", "UB_age_primary_onset_rec2", "age_primary_onset_rec2", "first_sub_used", "sus_ini_mod_mvv", "sus_ini_1", "sus_ini_2", "sus_ini_3", "LB_age_subs_onset_rec2", "UB_age_subs_onset_rec2", "age_subs_onset_rec2", "biopsych_comp", "mod_psiq_cie_10", "mod_psiq_dsm_iv", "diagnostico_trs_fisico", "otros_probl_at_sm_or", "sub_dep_icd10_status", "evaluacindelprocesoteraputico", "eva_consumo", "eva_fam", "eva_relinterp", "eva_ocupacion", "eva_sm", "eva_fisica", "eva_transgnorma", "dg_global_nec_int_soc_or", "dg_nec_int_soc_cap_hum_or", "dg_nec_int_soc_cap_fis_or", "dg_nec_int_soc_cap_soc_or", "dg_global_nec_int_soc_egr_or", "dg_nec_int_soc_cap_hum_egr_or", "dg_nec_int_soc_cap_fis_egr_or", "dg_nec_int_soc_cap_soc_egr_or", "usuario_tribunal_trat_droga", "nationality_cons", "ethnicity_c1_c6_historic", "discapacidad", "opcion_discapacidad", "sex_rec", "identidad_de_genero", "orientacion_sexual", "pregnant", "pregnant_disch", "marital_status", "tiene_menores_de_edad_a_cargo", "num_hijos_trat_res", "numero_de_hijos", "con_quien_vive", "tipo_de_vivienda", "precariedad_vivienda", "tenure_status_household", "servicios_basicos_95", "perso_dormitorio_vivienda", "ed_attainment", "occupation_condition", "occupation_status", "rubro_trabaja", "laboral_ingresos", "yr_block", "OBS_series", "senda_series", "pub_center_series", "id_centro_series", "disch_date_num_rec6", "dg_psiq_cie_10_instudy", "dg_psiq_cie_10_dg", "dg_psiq_dsm_iv_instudy", "dg_psiq_dsm_iv_dg", "plan_type_series", "tr_compliance_rec7_series", "referral_type_series", "adm_age_rec3_series", "adm_date_rec2_series", "disch_date_rec6_series", "diagnostico_trs_fisico_series", "otros_probl_at_sm_or_series", "OBS", "occupation_status_corr", "occupation_condition_corr24", "plan_type_corr", "ed_attainment_corr", "polysubstance_strict", "icd10_diag1", "icd10_diag2", "icd10_diag3", "dsmiv_diag1", "dsmiv_diag2", "dsmiv_diag3", "adm_year", "porc_pobr", "clasificacion", "km2_c", "km2_log", "porc_pobr_log", "porc_pobr_c"],
  "@context": "https://schema.org/",
  "@type": "Dataset",
  "variableMeasured": [
    {
      "name": "TABLE",
      "description": "Year of data collection (original SISTRAT table/year identifier). Represents the year the record was entered (e.g., \"2023\", \"2015\"). No transformation applied. Made in import_c1_top_data_adm_25.qmd",
      "@type": "propertyValue"
    },
    {
      "name": "TABLE_rec_series",
      "description": "Recoded series of TABLE",
      "@type": "propertyValue"
    },
    {
      "name": "rn",
      "description": "Row number or treatment episode sequence per patient. Created by `row_number() over (partition by hash_key order by adm_date)`, Step 1 (SISTRAT23_c1_2010_2024_df2). Identifies first, second, etc., treatment for each user.",
      "@type": "propertyValue"
    },
    {
      "name": "rn_series",
      "description": "Series of row number",
      "@type": "propertyValue"
    },
    {
      "name": "num_trat_ant",
      "description": "Number of prior treatments reported by patient. Direct import. May not be reliable. Not used.",
      "@type": "propertyValue"
    },
    {
      "name": "fecha_ultimo_tratamiento",
      "description": "Patient-reported time since last treatment (e.g., \"3 a 4 anos\"). Direct import. Categorical, not numeric. Many missing values (~50%)",
      "@type": "propertyValue"
    },
    {
      "name": "hash_key",
      "description": "Unique anonymized identifier for each individual (user/patient RUT derivative). Direct import from raw SISTRAT C1. Originally in uppercase letter. Generated by hashing the RUT (national ID) by MINSAL. Used to link multiple treatment episodes for the same person across years. No transformation.",
      "@type": "propertyValue"
    },
    {
      "name": "min_adm_age_rec3",
      "description": "min(adm_age_rec3) grouped by hash_key. Critical benchmark for validating/imputing onset ages.",
      "@type": "propertyValue"
    },
    {
      "name": "adm_age_rec3",
      "description": "Admission age [log1p(x)=adm_age_log, centered= adm_age_c,^2=adm_age_pow2, ^3=adm_age_pow3, three groups= adm_age_rec3_cat]",
      "@type": "propertyValue"
    },
    {
      "name": "birth_date_rec",
      "description": "Final authoritative date of birth. Created by selecting/imputing best `birth_date` per `hash_key` (via `kNN`, logic, or min/max validation). Used to recalculate `adm_age_rec`.",
      "@type": "propertyValue"
    },
    {
      "name": "adm_date_num_rec2",
      "description": "Numeric admission date recoded version 2",
      "@type": "propertyValue"
    },
    {
      "name": "adm_date_rec2",
      "description": "Intermediate admission date Created in Step 2 (0.c) for the resolution of overlaps",
      "@type": "propertyValue"
    },
    {
      "name": "dit_rec6",
      "description": "Length of stay in treatment (months) [log1p(x)=treat_log, ^2=treat_days_pow2, ^3=treat_days_pow3, <90days=treat_lt_90]",
      "@type": "propertyValue"
    },
    {
      "name": "disch_date_rec6",
      "description": "Final discharge date for longitudinal analysis. Created as selected consistent version for time-to-event modeling.",
      "@type": "propertyValue"
    },
    {
      "name": "disch_date_num_rec6_trans",
      "description": "Imputed discharge based on reference date Created in Step 3. Missing discharge dates were filled using a default value calculated from a fixed retrieval date (e.g., May 28, 2025), ensuring consistency across all treatment records and uncovering censorship mechanisms.",
      "@type": "propertyValue"
    },
    {
      "name": "def_date",
      "description": "Date of death (if applicable, mostly NA). Created from external mortality linkage (not in provided scripts). Parsed from mortality registry (dia_def, mes_def, ano_def), imputing missing day as 15; earliest date kept when multiple. Used to censor discharges after death. See Duplicates_25_2.qmd (pre-00.a).",
      "@type": "propertyValue"
    },
    {
      "name": "adm_motive",
      "description": "Admission motive",
      "@type": "propertyValue"
    },
    {
      "name": "tr_compliance_rec7",
      "description": "Treatment compliance category. Created in Step 4. Removed cases (n=3) with an ongoing treatment motive but with a posterior treatment after. Replaced for dropout (they had incomplete discharge information)",
      "@type": "propertyValue"
    },
    {
      "name": "adm_disch_reason",
      "description": "Placeholder for administrative discharge reason (mostly empty). Created as copy of `motivo_de_egreso_alta_administrativa`. Derived/translated from motivo_de_egreso_alta_administrativa; listed among deduplication key variables in Step 2. 47%-56% missing in patients with adm. discharge.",
      "@type": "propertyValue"
    },
    {
      "name": "referral_type",
      "description": "Recoded type of referral center. Created from `tipo_centro_derivacion` (translation): e.g., \"otro centro\" , \"other facility\", \"cosam\", \"primary health care\".",
      "@type": "propertyValue"
    },
    {
      "name": "plan_type",
      "description": "Recoded treatment plan type. Created as copy and translation of `tipo_de_plan` (step 1). Used in collapsing logic and outcome modeling.",
      "@type": "propertyValue"
    },
    {
      "name": "id_centro",
      "description": "Center ID number. Direct import. Used to link center attributes, collapse duplicates, and derive `centro_muj` (women-specific centers).",
      "@type": "propertyValue"
    },
    {
      "name": "senda",
      "description": "Indicator if treatment was under SENDA agreement (\"si\"/\"no\"). Direct import. Used to derive `senda_adm_date` and flag SENDA-specific episodes. No transformation initially. According to SENDA professionals, patients may be in SENDA Yes programs may anyways part of SENDA programs in some cases",
      "@type": "propertyValue"
    },
    {
      "name": "pub_center",
      "description": "Binary indicator for public center. Created as `tipo_centro == \"publico\"`. Used in institutional comparisons.",
      "value": "1. FALSE,\n2. TRUE",
      "@type": "propertyValue"
    },
    {
      "name": "primary_sub",
      "description": "Primary substance of use",
      "@type": "propertyValue"
    },
    {
      "name": "second_sub1",
      "description": "Normalized secondary substances at admission (1)",
      "@type": "propertyValue"
    },
    {
      "name": "second_sub2",
      "description": "Normalized secondary substances at admission (2)",
      "@type": "propertyValue"
    },
    {
      "name": "second_sub3",
      "description": "Normalized secondary substances at admission (3)",
      "@type": "propertyValue"
    },
    {
      "name": "prim_sub_freq",
      "description": "Frequency of primary substance use at admission (recoded: prim_sub_freq_rec)",
      "@type": "propertyValue"
    },
    {
      "name": "prim_sub_route",
      "description": "Recoded route of administration. Created from `via_administracion_sustancia_principal` into standardized terms.",
      "@type": "propertyValue"
    },
    {
      "name": "LB_age_primary_onset_rec2",
      "description": "Lower bound age of primary substance onset recoded 2",
      "@type": "propertyValue"
    },
    {
      "name": "UB_age_primary_onset_rec2",
      "description": "Upper bound age of primary substance onset recoded 2",
      "@type": "propertyValue"
    },
    {
      "name": "age_primary_onset_rec2",
      "description": "Created in Step 3.",
      "@type": "propertyValue"
    },
    {
      "name": "first_sub_used",
      "description": "Normalized first substance used. Created as `tolower(gsub(\"_\", \" \", sustancia_de_inicio))`. Input for resolving inconsistencies, prioritizing most vulnerable value = `sus_ini_mod_mvv`. Despite this, this variable was normalized in Step 3 for inconsistent values in patients.",
      "@type": "propertyValue"
    },
    {
      "name": "sus_ini_mod_mvv",
      "description": "Sus ini mod mvv",
      "value": "1. cocaine paste,\n2. cocaine powder,\n3. alcohol,\n4. marijuana,\n5. others",
      "@type": "propertyValue"
    },
    {
      "name": "sus_ini_1",
      "description": "Sus ini 1",
      "@type": "propertyValue"
    },
    {
      "name": "sus_ini_2",
      "description": "Sus ini 2",
      "@type": "propertyValue"
    },
    {
      "name": "sus_ini_3",
      "description": "Sus ini 3",
      "@type": "propertyValue"
    },
    {
      "name": "LB_age_subs_onset_rec2",
      "description": "Lower bound age of substance onset recoded 2",
      "@type": "propertyValue"
    },
    {
      "name": "UB_age_subs_onset_rec2",
      "description": "Upper bound age of substance onset recoded 2",
      "@type": "propertyValue"
    },
    {
      "name": "age_subs_onset_rec2",
      "description": "Created in Step 3.",
      "@type": "propertyValue"
    },
    {
      "name": "biopsych_comp",
      "description": "Recoded biopsychosocial severity. Created from `compromiso_biopsicosocial`: e.g., \"moderado\", \"2-moderate\".",
      "@type": "propertyValue"
    },
    {
      "name": "mod_psiq_cie_10",
      "description": "Created in Step 3, concatenated main and sub-dg to allow multiple diagnoses",
      "@type": "propertyValue"
    },
    {
      "name": "mod_psiq_dsm_iv",
      "description": "Created in Step 3, concatenated main and sub-dg to allow multiple diagnoses",
      "@type": "propertyValue"
    },
    {
      "name": "diagnostico_trs_fisico",
      "description": "Physical health diagnosis. Direct import. Often concatenated (e.g., \"Hepatitis alcoholica: cardiopatías\"). Preserved as-is for clinical profiling.",
      "@type": "propertyValue"
    },
    {
      "name": "otros_probl_at_sm_or",
      "description": "Other mental health issues (e.g., \"abuso sexual\"). Direct import. Recoded to `otros_probl_at_sm_or`. Used in vulnerability and trauma profiling.",
      "@type": "propertyValue"
    },
    {
      "name": "sub_dep_icd10_status",
      "description": "Severity of Substance Use Disorder (SUD)",
      "@type": "propertyValue"
    },
    {
      "name": "evaluacindelprocesoteraputico",
      "description": "Therapeutic process evaluation (e.g., \"logro alto\"). Direct import. Recoded to standardized categories. Used in outcome and program effectiveness analysis.",
      "@type": "propertyValue"
    },
    {
      "name": "eva_consumo",
      "description": "Evaluation of consumption pattern at discharge. Direct import. Part of multi-domain outcome assessment. Recoded similarly.",
      "@type": "propertyValue"
    },
    {
      "name": "eva_fam",
      "description": "Family situation evaluation at discharge. Direct import.",
      "@type": "propertyValue"
    },
    {
      "name": "eva_relinterp",
      "description": "Interpersonal relations evaluation at discharge. Direct import.",
      "@type": "propertyValue"
    },
    {
      "name": "eva_ocupacion",
      "description": "Occupational situation evaluation at discharge. Direct import.",
      "@type": "propertyValue"
    },
    {
      "name": "eva_sm",
      "description": "Mental health evaluation at discharge. Direct import.",
      "@type": "propertyValue"
    },
    {
      "name": "eva_fisica",
      "description": "Physical health evaluation at discharge. Direct import.",
      "@type": "propertyValue"
    },
    {
      "name": "eva_transgnorma",
      "description": "Social norm transgression evaluation at discharge. Direct import.",
      "@type": "propertyValue"
    },
    {
      "name": "dg_global_nec_int_soc_or",
      "description": "Global social integration need at admission. Direct import. Recoded to `dg_global_nec_int_soc_or`. Categorized as \"altas\", \"medias\", \"bajas\". Used in social vulnerability index.",
      "@type": "propertyValue"
    },
    {
      "name": "dg_nec_int_soc_cap_hum_or",
      "description": "Human capital need at admission. Direct import. Recoded to `dg_nec_int_soc_cap_hum_or`. Part of social integration profile.",
      "@type": "propertyValue"
    },
    {
      "name": "dg_nec_int_soc_cap_fis_or",
      "description": "Physical capital need at admission. Direct import. Recoded to `dg_nec_int_soc_cap_fis_or`.",
      "@type": "propertyValue"
    },
    {
      "name": "dg_nec_int_soc_cap_soc_or",
      "description": "Social capital need at admission. Direct import. Recoded to `dg_nec_int_soc_cap_soc_or`.",
      "@type": "propertyValue"
    },
    {
      "name": "dg_global_nec_int_soc_egr_or",
      "description": "Global social integration need at discharge. Direct import. Recoded similarly to admission version (_60). Used for delta/change analysis.",
      "@type": "propertyValue"
    },
    {
      "name": "dg_nec_int_soc_cap_hum_egr_or",
      "description": "Human capital need at discharge. Direct import.",
      "@type": "propertyValue"
    },
    {
      "name": "dg_nec_int_soc_cap_fis_egr_or",
      "description": "Physical capital need at discharge. Direct import.",
      "@type": "propertyValue"
    },
    {
      "name": "dg_nec_int_soc_cap_soc_egr_or",
      "description": "Social capital need at discharge. Direct import.",
      "@type": "propertyValue"
    },
    {
      "name": "usuario_tribunal_trat_droga",
      "description": "Court-referred to drug treatment (very unbalanced, ~1% yes)",
      "@type": "propertyValue"
    },
    {
      "name": "nationality_cons",
      "description": "Recoded/standardized from nacionalidad or pais_nacimiento.",
      "@type": "propertyValue"
    },
    {
      "name": "ethnicity_c1_c6_historic",
      "description": "Concatenated unique non-Chilean/non-\"no pertenece\" values from etnia (C1) and ethnicity (C6), grouped by hash_key.",
      "@type": "propertyValue"
    },
    {
      "name": "discapacidad",
      "description": "Indicator of disability (\"si\"/\"no\"). Direct import. Used with `opcion_discapacidad` for disability profiling. Starts coding in 2019",
      "@type": "propertyValue"
    },
    {
      "name": "opcion_discapacidad",
      "description": "Type of disability (e.g., \"de causa psiquica\"). Direct import. Only populated if `discapacidad == \"si\"`. Used in accessibility and needs assessment.Starts coding in 2019",
      "@type": "propertyValue"
    },
    {
      "name": "sex_rec",
      "description": "Sex (often used for biological categorization)",
      "@type": "propertyValue"
    },
    {
      "name": "identidad_de_genero",
      "description": "Gender identity (e.g., \"masculino\", \"femenino\"). Direct import. Recoded to `identidad_de_genero` factor. Used to validate/override `sexo` in `sex_rec` (Step 2). Starts coding in 2019",
      "@type": "propertyValue"
    },
    {
      "name": "orientacion_sexual",
      "description": "Sexual orientation (e.g., \"heterosexual\"). Direct import. Sparse. Preserved for inclusivity analysis.Starts coding in 2022",
      "@type": "propertyValue"
    },
    {
      "name": "pregnant",
      "description": "Recoded pregnancy status at admission. Created from `se_trata_de_una_mujer_embarazada` (NA = \"no\"). Used in gender-specific analysis (Step 3). This variable (indicating if a woman is pregnant) seems to beintroduced in 2011.",
      "@type": "propertyValue"
    },
    {
      "name": "pregnant_disch",
      "description": "Recoded pregnancy status at discharge. Created from `ha_estado_embarazada_egreso`. Used in gender-specific analysis (Step 3). Sparse. Seems to be only accurately collected starting in 2017. Created to capture pregnancy status specifically at treatment discharge.",
      "@type": "propertyValue"
    },
    {
      "name": "marital_status",
      "description": "Marital status",
      "@type": "propertyValue"
    },
    {
      "name": "tiene_menores_de_edad_a_cargo",
      "description": "Indicator if responsible for minors (\"si\"/\"no\"). Direct import. Mostly missing before 2015 (Duplicates_25.txt). Recoded to binary flag. Used in family vulnerability index.",
      "@type": "propertyValue"
    },
    {
      "name": "num_hijos_trat_res",
      "description": "Number of children entering residential treatment with patient. Direct import. Often missing or 0. Later used to derive binary flag `hijos_trat_res` (\"Had children in treatments\").",
      "@type": "propertyValue"
    },
    {
      "name": "numero_de_hijos",
      "description": "Number of children reported by patient. Direct import. Values >11 flagged as implausible (Duplicates_25.txt). Used in `kNN` imputation and to derive `tiene_menores_de_edad_a_cargo`.",
      "@type": "propertyValue"
    },
    {
      "name": "con_quien_vive",
      "description": "Living arrangement/Cohabitation status",
      "@type": "propertyValue"
    },
    {
      "name": "tipo_de_vivienda",
      "description": "Type of housing",
      "@type": "propertyValue"
    },
    {
      "name": "precariedad_vivienda",
      "description": "Precarious housing. Direct import. Available since 2022 databases.",
      "@type": "propertyValue"
    },
    {
      "name": "tenure_status_household",
      "description": "Housing situation",
      "@type": "propertyValue"
    },
    {
      "name": "servicios_basicos_95",
      "description": "Basic sanitation services. Direct import. Available since 2022 databases.",
      "@type": "propertyValue"
    },
    {
      "name": "perso_dormitorio_vivienda",
      "description": "Persons per dwelling bedroom Direct import. Available since 2022 databases.",
      "@type": "propertyValue"
    },
    {
      "name": "ed_attainment",
      "description": "Recoded education level. Created from `escolaridad_ultimo_ano_cursado` into standardized categories (e.g., \"media incompleta\" = \"2-Completed high school or less\").",
      "@type": "propertyValue"
    },
    {
      "name": "occupation_condition",
      "description": "Recoded employment status. Created from `condicion_ocupacional`: e.g., \"cesante\"= \"unemployed\" (in Step 1).",
      "@type": "propertyValue"
    },
    {
      "name": "occupation_status",
      "description": "Recoded employment category. Created from `categoria_ocupacional`. Sparse; often NA.",
      "@type": "propertyValue"
    },
    {
      "name": "rubro_trabaja",
      "description": "Occupational sector (e.g., \"trabajadores no calificados\"). Direct import. Recoded to `rubro_trabaja_mod`. 68% missing. 13.8% among employeed",
      "@type": "propertyValue"
    },
    {
      "name": "laboral_ingresos",
      "description": "Monthly income. Direct import. Available since 2022 databases.",
      "@type": "propertyValue"
    },
    {
      "name": "yr_block",
      "description": "Birth year block (e.g., 1980, 1990) for anonymization or grouping. Created by rounding `birth_date` year to nearest decade (e.g., `floor(year(birth_date)/10)*10`). To look for data similarities (Step 1)",
      "@type": "propertyValue"
    },
    {
      "name": "OBS_series",
      "description": "OBS series",
      "@type": "propertyValue"
    },
    {
      "name": "senda_series",
      "description": "SENDA series",
      "@type": "propertyValue"
    },
    {
      "name": "pub_center_series",
      "description": "Public center series",
      "@type": "propertyValue"
    },
    {
      "name": "id_centro_series",
      "description": "Center ID series",
      "@type": "propertyValue"
    },
    {
      "name": "disch_date_num_rec6",
      "description": "Numeric discharge date recoded 6",
      "@type": "propertyValue"
    },
    {
      "name": "dg_psiq_cie_10_instudy",
      "description": "Psychiatric comorbidity (ICD-10)",
      "@type": "propertyValue"
    },
    {
      "name": "dg_psiq_cie_10_dg",
      "description": "Mutually exclusive (as corrected in Step 3)",
      "@type": "propertyValue"
    },
    {
      "name": "dg_psiq_dsm_iv_instudy",
      "description": "Psychiatric comorbidity (DSM-IV)",
      "@type": "propertyValue"
    },
    {
      "name": "dg_psiq_dsm_iv_dg",
      "description": "Mutually exclusive (as corrected in Step 3)",
      "@type": "propertyValue"
    },
    {
      "name": "plan_type_series",
      "description": "Plan type series",
      "@type": "propertyValue"
    },
    {
      "name": "tr_compliance_rec7_series",
      "description": "Treatment compliance recoded 7 series",
      "@type": "propertyValue"
    },
    {
      "name": "referral_type_series",
      "description": "Referral type series",
      "@type": "propertyValue"
    },
    {
      "name": "adm_age_rec3_series",
      "description": "Admission age recoded 3 series",
      "@type": "propertyValue"
    },
    {
      "name": "adm_date_rec2_series",
      "description": "Admission date recoded 2 series",
      "@type": "propertyValue"
    },
    {
      "name": "disch_date_rec6_series",
      "description": "Discharge date recoded 6 series",
      "@type": "propertyValue"
    },
    {
      "name": "diagnostico_trs_fisico_series",
      "description": "Physical diagnosis series",
      "@type": "propertyValue"
    },
    {
      "name": "otros_probl_at_sm_or_series",
      "description": "Other mental health problems series",
      "@type": "propertyValue"
    },
    {
      "name": "OBS",
      "description": "Observed flags or notes from cleaning (e.g., \"Duplicated Cases\"). Created during duplicate detection and resolution. Contains manual or automated flags for data quality issues, removed entries, etc. It is separated by semi-colons",
      "@type": "propertyValue"
    },
    {
      "name": "occupation_status_corr",
      "description": "Corrected employment status Created in Step 4. Only kept for employed; nulled for inactive/unemployed; filled as 'other' if missing. Depends on occupation_condition_inferred, occupation_status.",
      "@type": "propertyValue"
    },
    {
      "name": "occupation_condition_corr24",
      "description": "Employment status",
      "@type": "propertyValue"
    },
    {
      "name": "plan_type_corr",
      "description": "Treatment modality",
      "@type": "propertyValue"
    },
    {
      "name": "ed_attainment_corr",
      "description": "Educational attainment",
      "@type": "propertyValue"
    },
    {
      "name": "polysubstance_strict",
      "description": "Polysubstance use",
      "@type": "propertyValue"
    },
    {
      "name": "icd10_diag1",
      "description": "ICD10 diagnosis 1 Created in Step 4. Separated from mod_psiq_cie_10 by \"::\". Depends on mod_psiq_cie_10.",
      "@type": "propertyValue"
    },
    {
      "name": "icd10_diag2",
      "description": "ICD10 diagnosis 2 Created in Step 4. Separated from mod_psiq_cie_10 by \"::\". Depends on mod_psiq_cie_10.",
      "@type": "propertyValue"
    },
    {
      "name": "icd10_diag3",
      "description": "ICD10 diagnosis 3 Created in Step 4. Separated from mod_psiq_cie_10 \"::\". Depends on mod_psiq_cie_10.",
      "@type": "propertyValue"
    },
    {
      "name": "dsmiv_diag1",
      "description": "DSMIV diagnosis 1 Created in Step 4. Separated from mod_psiq_dsm_iv \"::\". Depends on mod_psiq_dsm_iv.",
      "@type": "propertyValue"
    },
    {
      "name": "dsmiv_diag2",
      "description": "DSMIV diagnosis 2 Created in Step 4. Separated from mod_psiq_dsm_iv \"::\". Depends on mod_psiq_dsm_iv.",
      "@type": "propertyValue"
    },
    {
      "name": "dsmiv_diag3",
      "description": "DSMIV diagnosis 3 Created in Step 4. Separated from mod_psiq_dsm_iv \"::\". Depends on mod_psiq_dsm_iv.",
      "@type": "propertyValue"
    },
    {
      "name": "adm_year",
      "description": "Admission year",
      "@type": "propertyValue"
    },
    {
      "name": "porc_pobr",
      "description": "Poverty index of the commune of residence [log1p(x)=porc_pobr_log, centered= porc_pobr_c, six quantile groups= porc_pobr_c_cat6]",
      "@type": "propertyValue"
    },
    {
      "name": "clasificacion",
      "description": "Urbanization level of the commune of residence",
      "@type": "propertyValue"
    },
    {
      "name": "km2_c",
      "description": "Centered km^2 Created in Step 4. Created in Step 4. km_2 - mean(km_2). Depends on km_2.km_2 - mean(km_2). Depends on km_2.",
      "@type": "propertyValue"
    },
    {
      "name": "km2_log",
      "description": "Log km^2 Created in Step 4. Created in Step 4. km_2 - mean(km_2). Depends on km_2.log1p(km_2). Depends on km_2.",
      "@type": "propertyValue"
    },
    {
      "name": "porc_pobr_log",
      "description": "Log poverty Created in Step 3. Re-joined in Step 4. log1p(porc_pobr). Depends on porc_pobr.",
      "@type": "propertyValue"
    },
    {
      "name": "porc_pobr_c",
      "description": "Centered poverty Created in Step 3. Re-joined in Step 4. porc_pobr - mean(porc_pobr). Depends on porc_pobr.",
      "@type": "propertyValue"
    }
  ]
}`
Back to top