SENDAs Agreement 1 Codebook
Code
load("G:/My Drive/Alvacast/SISTRAT 2023/data/20241015_out/26_ndp_2025_10_01.Rdata")
library(dplyr)
Adjuntando el paquete: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
Code
library(codebook)
library(labelled)
Adjuntando el paquete: 'labelled'
The following object is masked from 'package:codebook':
to_factor
Code
# Manual variable labels definition for the specified main variables
# Based on TSV descriptions where available, inferred from provided information for others
# This includes the variables you listed, including hash_key as per your list
# Apply to SISTRAT23_c1_2010_2024_df_prev1y and generate codebook
# Manual variable labels definition for the specified main variables
# Based on TSV descriptions where available, inferred from provided information for others
# This includes the variables you listed, including hash_key as per your list
# Apply to SISTRAT23_c1_2010_2024_df_prev1y and generate codebook
labels_map <- list(
"TABLE" = "Year of data collection (original SISTRAT table/year identifier). Represents the year the record was entered (e.g., \"2023\", \"2015\"). No transformation applied. Made in import_c1_top_data_adm_25.qmd",
"TABLE_rec_series" = "Recoded series of TABLE",
"rn" = "Row number or treatment episode sequence per patient. Created by `row_number() over (partition by hash_key order by adm_date)`, Step 1 (SISTRAT23_c1_2010_2024_df2). Identifies first, second, etc., treatment for each user.",
"rn_series" = "Series of row number",
"num_trat_ant" = "Number of prior treatments reported by patient. Direct import. May not be reliable. Not used.",
"fecha_ultimo_tratamiento" = "Patient-reported time since last treatment (e.g., \"3 a 4 anos\"). Direct import. Categorical, not numeric. Many missing values (~50%)",
"hash_key" = "Unique anonymized identifier for each individual (user/patient RUT derivative). Direct import from raw SISTRAT C1. Originally in uppercase letter. Generated by hashing the RUT (national ID) by MINSAL. Used to link multiple treatment episodes for the same person across years. No transformation.",
"min_adm_age_rec3" = "min(adm_age_rec3) grouped by hash_key. Critical benchmark for validating/imputing onset ages.",
"adm_age_rec3" = "Admission age [log1p(x)=adm_age_log, centered= adm_age_c,^2=adm_age_pow2, ^3=adm_age_pow3, three groups= adm_age_rec3_cat]",
"birth_date_rec" = "Final authoritative date of birth. Created by selecting/imputing best `birth_date` per `hash_key` (via `kNN`, logic, or min/max validation). Used to recalculate `adm_age_rec`.",
"adm_date_num_rec2" = "Numeric admission date recoded version 2",
"adm_date_rec2" = "Intermediate admission date Created in Step 2 (0.c) for the resolution of overlaps",
"dit_rec6" = "Length of stay in treatment (months) [log1p(x)=treat_log, ^2=treat_days_pow2, ^3=treat_days_pow3, <90days=treat_lt_90]",
"disch_date_rec6" = "Final discharge date for longitudinal analysis. Created as selected consistent version for time-to-event modeling.",
"disch_date_num_rec6_trans" = "Imputed discharge based on reference date Created in Step 3. Missing discharge dates were filled using a default value calculated from a fixed retrieval date (e.g., May 28, 2025), ensuring consistency across all treatment records and uncovering censorship mechanisms.",
"def_date" = "Date of death (if applicable, mostly NA). Created from external mortality linkage (not in provided scripts). Parsed from mortality registry (dia_def, mes_def, ano_def), imputing missing day as 15; earliest date kept when multiple. Used to censor discharges after death. See Duplicates_25_2.qmd (pre-00.a).",
"adm_motive" = "Admission motive",
"tr_compliance_rec7" = "Treatment compliance category. Created in Step 4. Removed cases (n=3) with an ongoing treatment motive but with a posterior treatment after. Replaced for dropout (they had incomplete discharge information)",
"adm_disch_reason" = "Placeholder for administrative discharge reason (mostly empty). Created as copy of `motivo_de_egreso_alta_administrativa`. Derived/translated from motivo_de_egreso_alta_administrativa; listed among deduplication key variables in Step 2. 47%-56% missing in patients with adm. discharge.",
"referral_type" = "Recoded type of referral center. Created from `tipo_centro_derivacion` (translation): e.g., \"otro centro\" , \"other facility\", \"cosam\", \"primary health care\".",
"plan_type" = "Recoded treatment plan type. Created as copy and translation of `tipo_de_plan` (step 1). Used in collapsing logic and outcome modeling.",
"id_centro" = "Center ID number. Direct import. Used to link center attributes, collapse duplicates, and derive `centro_muj` (women-specific centers).",
"senda" = "Indicator if treatment was under SENDA agreement (\"si\"/\"no\"). Direct import. Used to derive `senda_adm_date` and flag SENDA-specific episodes. No transformation initially. According to SENDA professionals, patients may be in SENDA Yes programs may anyways part of SENDA programs in some cases",
"pub_center" = "Binary indicator for public center. Created as `tipo_centro == \"publico\"`. Used in institutional comparisons.",
"primary_sub" = "Primary substance of use",
"second_sub1" = "Normalized secondary substances at admission (1)",
"second_sub2" = "Normalized secondary substances at admission (2)",
"second_sub3" = "Normalized secondary substances at admission (3)",
"prim_sub_freq" = "Frequency of primary substance use at admission (recoded: prim_sub_freq_rec)",
"prim_sub_route" = "Recoded route of administration. Created from `via_administracion_sustancia_principal` into standardized terms.",
"LB_age_primary_onset_rec2" = "Lower bound age of primary substance onset recoded 2",
"UB_age_primary_onset_rec2" = "Upper bound age of primary substance onset recoded 2",
"age_primary_onset_rec2" = "Created in Step 3.",
"first_sub_used" = "Normalized first substance used. Created as `tolower(gsub(\"_\", \" \", sustancia_de_inicio))`. Input for resolving inconsistencies, prioritizing most vulnerable value = `sus_ini_mod_mvv`. Despite this, this variable was normalized in Step 3 for inconsistent values in patients.",
"sus_ini_mod_mvv" = "Sus ini mod mvv",
"sus_ini_1" = "Sus ini 1",
"sus_ini_2" = "Sus ini 2",
"sus_ini_3" = "Sus ini 3",
"LB_age_subs_onset_rec2" = "Lower bound age of substance onset recoded 2",
"UB_age_subs_onset_rec2" = "Upper bound age of substance onset recoded 2",
"age_subs_onset_rec2" = "Created in Step 3.",
"biopsych_comp" = "Recoded biopsychosocial severity. Created from `compromiso_biopsicosocial`: e.g., \"moderado\", \"2-moderate\".",
"mod_psiq_cie_10" = "Created in Step 3, concatenated main and sub-dg to allow multiple diagnoses",
"mod_psiq_dsm_iv" = "Created in Step 3, concatenated main and sub-dg to allow multiple diagnoses",
"diagnostico_trs_fisico" = "Physical health diagnosis. Direct import. Often concatenated (e.g., \"Hepatitis alcoholica: cardiopatías\"). Preserved as-is for clinical profiling.",
"otros_probl_at_sm_or" = "Other mental health issues (e.g., \"abuso sexual\"). Direct import. Recoded to `otros_probl_at_sm_or`. Used in vulnerability and trauma profiling.",
"sub_dep_icd10_status" = "Severity of Substance Use Disorder (SUD)",
"evaluacindelprocesoteraputico" = "Therapeutic process evaluation (e.g., \"logro alto\"). Direct import. Recoded to standardized categories. Used in outcome and program effectiveness analysis.",
"eva_consumo" = "Evaluation of consumption pattern at discharge. Direct import. Part of multi-domain outcome assessment. Recoded similarly.",
"eva_fam" = "Family situation evaluation at discharge. Direct import.",
"eva_relinterp" = "Interpersonal relations evaluation at discharge. Direct import.",
"eva_ocupacion" = "Occupational situation evaluation at discharge. Direct import.",
"eva_sm" = "Mental health evaluation at discharge. Direct import.",
"eva_fisica" = "Physical health evaluation at discharge. Direct import.",
"eva_transgnorma" = "Social norm transgression evaluation at discharge. Direct import.",
"dg_global_nec_int_soc_or" = "Global social integration need at admission. Direct import. Recoded to `dg_global_nec_int_soc_or`. Categorized as \"altas\", \"medias\", \"bajas\". Used in social vulnerability index.",
"dg_nec_int_soc_cap_hum_or" = "Human capital need at admission. Direct import. Recoded to `dg_nec_int_soc_cap_hum_or`. Part of social integration profile.",
"dg_nec_int_soc_cap_fis_or" = "Physical capital need at admission. Direct import. Recoded to `dg_nec_int_soc_cap_fis_or`.",
"dg_nec_int_soc_cap_soc_or" = "Social capital need at admission. Direct import. Recoded to `dg_nec_int_soc_cap_soc_or`.",
"dg_global_nec_int_soc_egr_or" = "Global social integration need at discharge. Direct import. Recoded similarly to admission version (_60). Used for delta/change analysis.",
"dg_nec_int_soc_cap_hum_egr_or" = "Human capital need at discharge. Direct import.",
"dg_nec_int_soc_cap_fis_egr_or" = "Physical capital need at discharge. Direct import.",
"dg_nec_int_soc_cap_soc_egr_or" = "Social capital need at discharge. Direct import.",
"usuario_tribunal_trat_droga" = "Court-referred to drug treatment (very unbalanced, ~1% yes)",
"nationality_cons" = "Recoded/standardized from nacionalidad or pais_nacimiento.",
"ethnicity_c1_c6_historic" = "Concatenated unique non-Chilean/non-\"no pertenece\" values from etnia (C1) and ethnicity (C6), grouped by hash_key.",
"discapacidad" = "Indicator of disability (\"si\"/\"no\"). Direct import. Used with `opcion_discapacidad` for disability profiling. Starts coding in 2019",
"opcion_discapacidad" = "Type of disability (e.g., \"de causa psiquica\"). Direct import. Only populated if `discapacidad == \"si\"`. Used in accessibility and needs assessment.Starts coding in 2019",
"sex_rec" = "Sex (often used for biological categorization)",
"identidad_de_genero" = "Gender identity (e.g., \"masculino\", \"femenino\"). Direct import. Recoded to `identidad_de_genero` factor. Used to validate/override `sexo` in `sex_rec` (Step 2). Starts coding in 2019",
"orientacion_sexual" = "Sexual orientation (e.g., \"heterosexual\"). Direct import. Sparse. Preserved for inclusivity analysis.Starts coding in 2022",
"pregnant" = "Recoded pregnancy status at admission. Created from `se_trata_de_una_mujer_embarazada` (NA = \"no\"). Used in gender-specific analysis (Step 3). This variable (indicating if a woman is pregnant) seems to beintroduced in 2011.",
"pregnant_disch" = "Recoded pregnancy status at discharge. Created from `ha_estado_embarazada_egreso`. Used in gender-specific analysis (Step 3). Sparse. Seems to be only accurately collected starting in 2017. Created to capture pregnancy status specifically at treatment discharge.",
"marital_status" = "Marital status",
"tiene_menores_de_edad_a_cargo" = "Indicator if responsible for minors (\"si\"/\"no\"). Direct import. Mostly missing before 2015 (Duplicates_25.txt). Recoded to binary flag. Used in family vulnerability index.",
"num_hijos_trat_res" = "Number of children entering residential treatment with patient. Direct import. Often missing or 0. Later used to derive binary flag `hijos_trat_res` (\"Had children in treatments\").",
"numero_de_hijos" = "Number of children reported by patient. Direct import. Values >11 flagged as implausible (Duplicates_25.txt). Used in `kNN` imputation and to derive `tiene_menores_de_edad_a_cargo`.",
"con_quien_vive" = "Living arrangement/Cohabitation status",
"tipo_de_vivienda" = "Type of housing",
"precariedad_vivienda" = "Precarious housing. Direct import. Available since 2022 databases.",
"tenure_status_household" = "Housing situation",
"servicios_basicos_95" = "Basic sanitation services. Direct import. Available since 2022 databases.",
"perso_dormitorio_vivienda" = "Persons per dwelling bedroom Direct import. Available since 2022 databases.",
"ed_attainment" = "Recoded education level. Created from `escolaridad_ultimo_ano_cursado` into standardized categories (e.g., \"media incompleta\" = \"2-Completed high school or less\").",
"occupation_condition" = "Recoded employment status. Created from `condicion_ocupacional`: e.g., \"cesante\"= \"unemployed\" (in Step 1).",
"occupation_status" = "Recoded employment category. Created from `categoria_ocupacional`. Sparse; often NA.",
"rubro_trabaja" = "Occupational sector (e.g., \"trabajadores no calificados\"). Direct import. Recoded to `rubro_trabaja_mod`. 68% missing. 13.8% among employeed",
"laboral_ingresos" = "Monthly income. Direct import. Available since 2022 databases.",
"yr_block" = "Birth year block (e.g., 1980, 1990) for anonymization or grouping. Created by rounding `birth_date` year to nearest decade (e.g., `floor(year(birth_date)/10)*10`). To look for data similarities (Step 1)",
"OBS_series" = "OBS series",
"senda_series" = "SENDA series",
"pub_center_series" = "Public center series",
"id_centro_series" = "Center ID series",
"disch_date_num_rec6" = "Numeric discharge date recoded 6",
"dg_psiq_cie_10_instudy" = "Psychiatric comorbidity (ICD-10)",
"dg_psiq_cie_10_dg" = "Mutually exclusive (as corrected in Step 3)",
"dg_psiq_dsm_iv_instudy" = "Psychiatric comorbidity (DSM-IV)",
"dg_psiq_dsm_iv_dg" = "Mutually exclusive (as corrected in Step 3)",
"plan_type_series" = "Plan type series",
"tr_compliance_rec7_series" = "Treatment compliance recoded 7 series",
"referral_type_series" = "Referral type series",
"adm_age_rec3_series" = "Admission age recoded 3 series",
"adm_date_rec2_series" = "Admission date recoded 2 series",
"disch_date_rec6_series" = "Discharge date recoded 6 series",
"diagnostico_trs_fisico_series" = "Physical diagnosis series",
"otros_probl_at_sm_or_series" = "Other mental health problems series",
"OBS" = "Observed flags or notes from cleaning (e.g., \"Duplicated Cases\"). Created during duplicate detection and resolution. Contains manual or automated flags for data quality issues, removed entries, etc. It is separated by semi-colons",
"occupation_status_corr" = "Corrected employment status Created in Step 4. Only kept for employed; nulled for inactive/unemployed; filled as 'other' if missing. Depends on occupation_condition_inferred, occupation_status.",
"occupation_condition_corr24" = "Employment status",
"plan_type_corr" = "Treatment modality",
"ed_attainment_corr" = "Educational attainment",
"polysubstance_strict" = "Polysubstance use",
"icd10_diag1" = "ICD10 diagnosis 1 Created in Step 4. Separated from mod_psiq_cie_10 by \"::\". Depends on mod_psiq_cie_10.",
"icd10_diag2" = "ICD10 diagnosis 2 Created in Step 4. Separated from mod_psiq_cie_10 by \"::\". Depends on mod_psiq_cie_10.",
"icd10_diag3" = "ICD10 diagnosis 3 Created in Step 4. Separated from mod_psiq_cie_10 \"::\". Depends on mod_psiq_cie_10.",
"dsmiv_diag1" = "DSMIV diagnosis 1 Created in Step 4. Separated from mod_psiq_dsm_iv \"::\". Depends on mod_psiq_dsm_iv.",
"dsmiv_diag2" = "DSMIV diagnosis 2 Created in Step 4. Separated from mod_psiq_dsm_iv \"::\". Depends on mod_psiq_dsm_iv.",
"dsmiv_diag3" = "DSMIV diagnosis 3 Created in Step 4. Separated from mod_psiq_dsm_iv \"::\". Depends on mod_psiq_dsm_iv.",
"adm_year" = "Admission year",
"porc_pobr" = "Poverty index of the commune of residence [log1p(x)=porc_pobr_log, centered= porc_pobr_c, six quantile groups= porc_pobr_c_cat6]",
"clasificacion" = "Urbanization level of the commune of residence",
"km2_c" = "Centered km^2 Created in Step 4. Created in Step 4. km_2 - mean(km_2). Depends on km_2.km_2 - mean(km_2). Depends on km_2.",
"km2_log" = "Log km^2 Created in Step 4. Created in Step 4. km_2 - mean(km_2). Depends on km_2.log1p(km_2). Depends on km_2.",
"porc_pobr_log" = "Log poverty Created in Step 3. Re-joined in Step 4. log1p(porc_pobr). Depends on porc_pobr.",
"porc_pobr_c" = "Centered poverty Created in Step 3. Re-joined in Step 4. porc_pobr - mean(porc_pobr). Depends on porc_pobr."
# Removed "disch_date_real" as it doesn't exist in the dataframe
)
var_label(SISTRAT23_c1_2010_2024_df_prev1y) <- labels_map
# Subset to these variables for codebook (removed disch_date_real)
main_vars <- c("TABLE", "TABLE_rec_series", "rn", "rn_series", "num_trat_ant",
"fecha_ultimo_tratamiento", "hash_key", "min_adm_age_rec3", "adm_age_rec3",
"birth_date_rec", "adm_date_num_rec2", "adm_date_rec2", "dit_rec6",
"disch_date_rec6", "disch_date_num_rec6_trans", "def_date", "adm_motive",
"tr_compliance_rec7", "adm_disch_reason", "referral_type", "plan_type",
"id_centro", "senda", "pub_center", "primary_sub", "second_sub1",
"second_sub2", "second_sub3", "prim_sub_freq", "prim_sub_route",
"LB_age_primary_onset_rec2", "UB_age_primary_onset_rec2", "age_primary_onset_rec2",
"first_sub_used", "sus_ini_mod_mvv", "sus_ini_1", "sus_ini_2",
"sus_ini_3", "LB_age_subs_onset_rec2", "UB_age_subs_onset_rec2",
"age_subs_onset_rec2", "biopsych_comp", "mod_psiq_cie_10", "mod_psiq_dsm_iv",
"diagnostico_trs_fisico", "otros_probl_at_sm_or", "sub_dep_icd10_status",
"evaluacindelprocesoteraputico", "eva_consumo", "eva_fam", "eva_relinterp",
"eva_ocupacion", "eva_sm", "eva_fisica", "eva_transgnorma", "dg_global_nec_int_soc_or",
"dg_nec_int_soc_cap_hum_or", "dg_nec_int_soc_cap_fis_or", "dg_nec_int_soc_cap_soc_or",
"dg_global_nec_int_soc_egr_or", "dg_nec_int_soc_cap_hum_egr_or",
"dg_nec_int_soc_cap_fis_egr_or", "dg_nec_int_soc_cap_soc_egr_or",
"usuario_tribunal_trat_droga", "nationality_cons", "ethnicity_c1_c6_historic",
"discapacidad", "opcion_discapacidad", "sex_rec", "identidad_de_genero",
"orientacion_sexual", "pregnant", "pregnant_disch", "marital_status",
"tiene_menores_de_edad_a_cargo", "num_hijos_trat_res", "numero_de_hijos",
"con_quien_vive", "tipo_de_vivienda", "precariedad_vivienda",
"tenure_status_household", "servicios_basicos_95", "perso_dormitorio_vivienda",
"ed_attainment", "occupation_condition", "occupation_status",
"rubro_trabaja", "laboral_ingresos", "yr_block", "OBS_series",
"senda_series", "pub_center_series", "id_centro_series", "disch_date_num_rec6",
"dg_psiq_cie_10_instudy", "dg_psiq_cie_10_dg", "dg_psiq_dsm_iv_instudy",
"dg_psiq_dsm_iv_dg", "plan_type_series", "tr_compliance_rec7_series",
"referral_type_series", "adm_age_rec3_series", "adm_date_rec2_series",
"disch_date_rec6_series", "diagnostico_trs_fisico_series", "otros_probl_at_sm_or_series",
"OBS", "occupation_status_corr", "occupation_condition_corr24",
"plan_type_corr", "ed_attainment_corr", "polysubstance_strict",
"icd10_diag1", "icd10_diag2", "icd10_diag3", "dsmiv_diag1", "dsmiv_diag2",
"dsmiv_diag3", "adm_year", "porc_pobr", "clasificacion", "km2_c",
"km2_log", "porc_pobr_log", "porc_pobr_c")
df_subset <- SISTRAT23_c1_2010_2024_df_prev1y %>% select(all_of(main_vars))
# Generate HTML codebook
codebook_data <- df_subset
# Add metadata
metadata(codebook_data)$datePublished <- "2024-01-01"
metadata(codebook_data)$temporalCoverage <- "2010-2024"
metadata(codebook_data)$spatialCoverage <- "Chile"
metadata(codebook_data)$creator <- list(
list("@type" = "Person",
givenName = "Álvaro", familyName = "Castillo-Carniglia",
affiliation = list("@type" = "Organization",
name = "Universidad San Sebastián, Chile")),
list("@type" = "Person",
givenName = "Andrés", familyName = "González-Santa Cruz",
email = "gonzalez.santacruz.andres@gmail.com",
affiliation = list("@type" = "Organization",
name = "Universidad de Chile, Public Health")),
list("@type" = "Person",
givenName = "Amaru", familyName = "Agüero Jiménez",
affiliation = list("@type" = "Organization",
name = "Universidad del Desarrollo, Chile"))
)
metadata(codebook_data)$citation <- "Castillo-Carniglia, Á., González-Santa Cruz, A., & Agüero Jiménez, A. (2024). Workflow of data management and analysis - Chilean substance use treatment administrative data. Consolidation of Agreement 1 Databases from 2010 to 2024. Funded by FONDECYT regular 1191282."
metadata(codebook_data)$url <- "https://github.com/FONDECYTACC/cons2025"
metadata(codebook_data)$name <- "Agreement 1 SENDA"
metadata(codebook_data)$description <- "Information About Agreement 1 of SENDA and MINSAL"Code
codebook(codebook_data)Metadata
Description
Dataset name: Agreement 1 SENDA
Information About Agreement 1 of SENDA and MINSAL
Metadata for search engines
Temporal Coverage: 2010-2024
Spatial Coverage: Chile
Citation: Castillo-Carniglia, Á., González-Santa Cruz, A., & Agüero Jiménez, A. (2024). Workflow of data management and analysis - Chilean substance use treatment administrative data. Consolidation of Agreement 1 Databases from 2010 to 2024. Funded by FONDECYT regular 1191282.
Date published: 2024-01-01
Creator:
| name | value |
|---|---|
| 1 | Person , Álvaro , Castillo-Carniglia , Organization , Universidad San Sebastián, Chile |
| 2 | Person , Andrés , González-Santa Cruz , gonzalez.santacruz.andres@gmail.com, Organization , Universidad de Chile, Public Health |
| 3 | Person , Amaru , Agüero Jiménez , Organization , Universidad del Desarrollo, Chile |
|
Variables
TABLE
Year of data collection (original SISTRAT table/year identifier). Represents the year the record was entered (e.g., “2023”, “2015”). No transformation applied. Made in import_c1_top_data_adm_25.qmd
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| TABLE | Year of data collection (original SISTRAT table/year identifier). Represents the year the record was entered (e.g., “2023”, “2015”). No transformation applied. Made in import_c1_top_data_adm_25.qmd | character | 0 | 1 | 15 | 0 | 4 | 4 | 0 |
TABLE_rec_series
Recoded series of TABLE
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| TABLE_rec_series | Recoded series of TABLE | character | 0 | 1 | 400 | 0 | 8 | 50 | 0 |
rn
Row number or treatment episode sequence per patient. Created by row_number() over (partition by hash_key order by adm_date), Step 1 (SISTRAT23_c1_2010_2024_df2). Identifies first, second, etc., treatment for each user.
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| rn | Row number or treatment episode sequence per patient. Created by row_number() over (partition by hash_key order by adm_date), Step 1 (SISTRAT23_c1_2010_2024_df2). Identifies first, second, etc., treatment for each user. |
numeric | 0 | 1 | 1 | 122890 | 259395 | 128243.1 | 78217.81 | ▇▇▇▆▇ |
rn_series
Series of row number
Distribution
153200 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| rn_series | Series of row number | character | 153200 | 0.0595284 | 9697 | 0 | 8 | 48 | 0 |
num_trat_ant
Number of prior treatments reported by patient. Direct import. May not be reliable. Not used.
Distribution
9266 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| num_trat_ant | Number of prior treatments reported by patient. Direct import. May not be reliable. Not used. | numeric | 9266 | 0.9431174 | 0 | 1 | 11 | 0.898842 | 1.172059 | ▇▁▁▁▁ |
fecha_ultimo_tratamiento
Patient-reported time since last treatment (e.g., “3 a 4 anos”). Direct import. Categorical, not numeric. Many missing values (~50%)
Distribution
80571 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| fecha_ultimo_tratamiento | Patient-reported time since last treatment (e.g., “3 a 4 anos”). Direct import. Categorical, not numeric. Many missing values (~50%) | character | 80571 | 0.5053868 | 6 | 0 | 10 | 15 | 0 |
hash_key
Unique anonymized identifier for each individual (user/patient RUT derivative). Direct import from raw SISTRAT C1. Originally in uppercase letter. Generated by hashing the RUT (national ID) by MINSAL. Used to link multiple treatment episodes for the same person across years. No transformation.
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| hash_key | Unique anonymized identifier for each individual (user/patient RUT derivative). Direct import from raw SISTRAT C1. Originally in uppercase letter. Generated by hashing the RUT (national ID) by MINSAL. Used to link multiple treatment episodes for the same person across years. No transformation. | character | 0 | 1 | 121299 | 0 | 64 | 64 | 0 |
min_adm_age_rec3
min(adm_age_rec3) grouped by hash_key. Critical benchmark for validating/imputing onset ages.
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| min_adm_age_rec3 | min(adm_age_rec3) grouped by hash_key. Critical benchmark for validating/imputing onset ages. | numeric | 0 | 1 | 13 | 34 | 89 | 35.95135 | 11.04595 | ▅▇▃▁▁ |
adm_age_rec3
Admission age [log1p(x)=adm_age_log, centered= adm_age_c,^2=adm_age_pow2, ^3=adm_age_pow3, three groups= adm_age_rec3_cat]
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| adm_age_rec3 | Admission age [log1p(x)=adm_age_log, centered= adm_age_c,^2=adm_age_pow2, ^3=adm_age_pow3, three groups= adm_age_rec3_cat] | numeric | 0 | 1 | 13 | 35 | 89 | 36.91851 | 11.07224 | ▅▇▃▁▁ |
birth_date_rec
Final authoritative date of birth. Created by selecting/imputing best birth_date per hash_key (via kNN, logic, or min/max validation). Used to recalculate adm_age_rec.
Distribution
19660 unique, categorical values, so not shown.
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | min | median | max |
|---|---|---|---|---|---|---|---|---|
| birth_date_rec | Final authoritative date of birth. Created by selecting/imputing best birth_date per hash_key (via kNN, logic, or min/max validation). Used to recalculate adm_age_rec. |
Date | 0 | 1 | 19660 | 1928-09-18 | 1982-01-02 | 2008-04-16 |
adm_date_num_rec2
Numeric admission date recoded version 2
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| adm_date_num_rec2 | Numeric admission date recoded version 2 | numeric | 0 | 1 | 10957 | 17345 | 20077 | 17378.37 | 1528.861 | ▁▁▆▇▆ |
adm_date_rec2
Intermediate admission date Created in Step 2 (0.c) for the resolution of overlaps
Distribution
5085 unique, categorical values, so not shown.
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | min | median | max |
|---|---|---|---|---|---|---|---|---|
| adm_date_rec2 | Intermediate admission date Created in Step 2 (0.c) for the resolution of overlaps | Date | 0 | 1 | 5085 | 2000-01-01 | 2017-06-28 | 2024-12-20 |
dit_rec6
Length of stay in treatment (months) [log1p(x)=treat_log, ^2=treat_days_pow2, ^3=treat_days_pow3, <90days=treat_lt_90]
Distribution
3103 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| dit_rec6 | Length of stay in treatment (months) [log1p(x)=treat_log, ^2=treat_days_pow2, ^3=treat_days_pow3, <90days=treat_lt_90] | numeric | 3103 | 0.9809512 | 0 | 180 | 1988 | 236.0513 | 199.5927 | ▇▁▁▁▁ |
disch_date_rec6
Final discharge date for longitudinal analysis. Created as selected consistent version for time-to-event modeling.
Distribution
5335 unique, categorical values, so not shown.
3270 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | min | median | max |
|---|---|---|---|---|---|---|---|---|
| disch_date_rec6 | Final discharge date for longitudinal analysis. Created as selected consistent version for time-to-event modeling. | Date | 3270 | 0.979926 | 5335 | 2003-01-01 | 2017-12-27 | 2025-05-28 |
disch_date_num_rec6_trans
Imputed discharge based on reference date Created in Step 3. Missing discharge dates were filled using a default value calculated from a fixed retrieval date (e.g., May 28, 2025), ensuring consistency across all treatment records and uncovering censorship mechanisms.
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| disch_date_num_rec6_trans | Imputed discharge based on reference date Created in Step 3. Missing discharge dates were filled using a default value calculated from a fixed retrieval date (e.g., May 28, 2025), ensuring consistency across all treatment records and uncovering censorship mechanisms. | numeric | 0 | 1 | 12053 | 17563 | 20236 | 17619.79 | 1539.775 | ▁▂▇▇▇ |
Warning: Setting row names on a tibble is deprecated.
def_date
Date of death (if applicable, mostly NA). Created from external mortality linkage (not in provided scripts). Parsed from mortality registry (dia_def, mes_def, ano_def), imputing missing day as 15; earliest date kept when multiple. Used to censor discharges after death. See Duplicates_25_2.qmd (pre-00.a).
Distribution
2289 unique, categorical values, so not shown.
157391 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | min | median | max | row | expected | actual |
|---|---|---|---|---|---|---|---|---|---|---|---|
| def_date | Date of death (if applicable, mostly NA). Created from external mortality linkage (not in provided scripts). Parsed from mortality registry (dia_def, mes_def, ano_def), imputing missing day as 15; earliest date kept when multiple. Used to censor discharges after death. See Duplicates_25_2.qmd (pre-00.a). | Date | 157391 | 0.0338005 | 2289 | 2010-01-04 | 2018-04-19 | 2020-12-31 | 7864 | date like %d-%m-%Y | 15-NA-2008 |
adm_motive
Admission motive
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| adm_motive | Admission motive | character | 0 | 1 | 5 | 0 | 5 | 46 | 0 |
tr_compliance_rec7
Treatment compliance category. Created in Step 4. Removed cases (n=3) with an ongoing treatment motive but with a posterior treatment after. Replaced for dropout (they had incomplete discharge information)
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| tr_compliance_rec7 | Treatment compliance category. Created in Step 4. Removed cases (n=3) with an ongoing treatment motive but with a posterior treatment after. Replaced for dropout (they had incomplete discharge information) | character | 0 | 1 | 8 | 0 | 8 | 19 | 0 |
adm_disch_reason
Placeholder for administrative discharge reason (mostly empty). Created as copy of motivo_de_egreso_alta_administrativa. Derived/translated from motivo_de_egreso_alta_administrativa; listed among deduplication key variables in Step 2. 47%-56% missing in patients with adm. discharge.
Distribution
156643 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| adm_disch_reason | Placeholder for administrative discharge reason (mostly empty). Created as copy of motivo_de_egreso_alta_administrativa. Derived/translated from motivo_de_egreso_alta_administrativa; listed among deduplication key variables in Step 2. 47%-56% missing in patients with adm. discharge. |
character | 156643 | 0.0383924 | 4 | 0 | 5 | 16 | 0 |
referral_type
Recoded type of referral center. Created from tipo_centro_derivacion (translation): e.g., “otro centro” , “other facility”, “cosam”, “primary health care”.
Distribution
158565 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| referral_type | Recoded type of referral center. Created from tipo_centro_derivacion (translation): e.g., “otro centro” , “other facility”, “cosam”, “primary health care”. |
character | 158565 | 0.0265935 | 5 | 0 | 14 | 25 | 0 |
plan_type
Recoded treatment plan type. Created as copy and translation of tipo_de_plan (step 1). Used in collapsing logic and outcome modeling.
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| plan_type | Recoded treatment plan type. Created as copy and translation of tipo_de_plan (step 1). Used in collapsing logic and outcome modeling. |
character | 0 | 1 | 6 | 0 | 4 | 6 | 0 |
id_centro
Center ID number. Direct import. Used to link center attributes, collapse duplicates, and derive centro_muj (women-specific centers).
Distribution
39 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| id_centro | Center ID number. Direct import. Used to link center attributes, collapse duplicates, and derive centro_muj (women-specific centers). |
numeric | 39 | 0.9997606 | 104 | 258 | 899 | 310.5774 | 177.9022 | ▇▃▂▂▁ |
senda
Indicator if treatment was under SENDA agreement (“si”/“no”). Direct import. Used to derive senda_adm_date and flag SENDA-specific episodes. No transformation initially. According to SENDA professionals, patients may be in SENDA Yes programs may anyways part of SENDA programs in some cases
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| senda | Indicator if treatment was under SENDA agreement (“si”/“no”). Direct import. Used to derive senda_adm_date and flag SENDA-specific episodes. No transformation initially. According to SENDA professionals, patients may be in SENDA Yes programs may anyways part of SENDA programs in some cases |
character | 0 | 1 | 2 | 0 | 2 | 2 | 0 |
pub_center
Binary indicator for public center. Created as tipo_centro == "publico". Used in institutional comparisons.
Distribution
39 missing values.
Summary statistics
| name | label | data_type | ordered | value_labels | n_missing | complete_rate | n_unique | top_counts |
|---|---|---|---|---|---|---|---|---|
| pub_center | Binary indicator for public center. Created as tipo_centro == "publico". Used in institutional comparisons. |
factor | FALSE | 1. FALSE, 2. TRUE |
39 | 0.9997606 | 2 | TRU: 112242, FAL: 50616 |
primary_sub
Primary substance of use
Distribution
4 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| primary_sub | Primary substance of use | character | 4 | 0.9999754 | 11 | 0 | 6 | 27 | 0 |
second_sub1
Normalized secondary substances at admission (1)
Distribution
45932 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| second_sub1 | Normalized secondary substances at admission (1) | character | 45932 | 0.7180304 | 12 | 0 | 6 | 33 | 0 |
second_sub2
Normalized secondary substances at admission (2)
Distribution
101461 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| second_sub2 | Normalized secondary substances at admission (2) | character | 101461 | 0.3771463 | 12 | 0 | 6 | 33 | 0 |
second_sub3
Normalized secondary substances at admission (3)
Distribution
142611 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| second_sub3 | Normalized secondary substances at admission (3) | character | 142611 | 0.1245327 | 12 | 0 | 6 | 33 | 0 |
prim_sub_freq
Frequency of primary substance use at admission (recoded: prim_sub_freq_rec)
Distribution
808 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| prim_sub_freq | Frequency of primary substance use at admission (recoded: prim_sub_freq_rec) | character | 808 | 0.9950398 | 5 | 0 | 8 | 25 | 0 |
prim_sub_route
Recoded route of administration. Created from via_administracion_sustancia_principal into standardized terms.
Distribution
25 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| prim_sub_route | Recoded route of administration. Created from via_administracion_sustancia_principal into standardized terms. |
character | 25 | 0.9998465 | 5 | 0 | 6 | 41 | 0 |
LB_age_primary_onset_rec2
Lower bound age of primary substance onset recoded 2
Distribution
91849 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| LB_age_primary_onset_rec2 | Lower bound age of primary substance onset recoded 2 | numeric | 91849 | 0.4361529 | 5 | 14 | 71 | 13.38267 | 4.697013 | ▇▁▁▁▁ |
UB_age_primary_onset_rec2
Upper bound age of primary substance onset recoded 2
Distribution
91849 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| UB_age_primary_onset_rec2 | Upper bound age of primary substance onset recoded 2 | numeric | 91849 | 0.4361529 | 13 | 31 | 82 | 32.69701 | 9.823931 | ▅▇▃▁▁ |
age_primary_onset_rec2
Created in Step 3.
Distribution
1254 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| age_primary_onset_rec2 | Created in Step 3. | numeric | 1254 | 0.9923019 | 5 | 18 | 75 | 20.2022 | 7.575139 | ▇▅▁▁▁ |
first_sub_used
Normalized first substance used. Created as tolower(gsub("_", " ", sustancia_de_inicio)). Input for resolving inconsistencies, prioritizing most vulnerable value = sus_ini_mod_mvv. Despite this, this variable was normalized in Step 3 for inconsistent values in patients.
Distribution
10508 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| first_sub_used | Normalized first substance used. Created as tolower(gsub("_", " ", sustancia_de_inicio)). Input for resolving inconsistencies, prioritizing most vulnerable value = sus_ini_mod_mvv. Despite this, this variable was normalized in Step 3 for inconsistent values in patients. |
character | 10508 | 0.935493 | 11 | 0 | 6 | 27 | 0 |
sus_ini_mod_mvv
Sus ini mod mvv
Distribution
6684 missing values.
Summary statistics
| name | label | data_type | ordered | value_labels | n_missing | complete_rate | n_unique | top_counts |
|---|---|---|---|---|---|---|---|---|
| sus_ini_mod_mvv | Sus ini mod mvv | factor | FALSE | 1. cocaine paste, 2. cocaine powder, 3. alcohol, 4. marijuana, 5. others |
6684 | 0.9589679 | 5 | alc: 98027, mar: 33833, coc: 12128, coc: 9512 |
sus_ini_1
Sus ini 1
Distribution
121960 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| sus_ini_1 | Sus ini 1 | character | 121960 | 0.251306 | 10 | 0 | 6 | 27 | 0 |
sus_ini_2
Sus ini 2
Distribution
128480 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| sus_ini_2 | Sus ini 2 | character | 128480 | 0.2112807 | 10 | 0 | 6 | 27 | 0 |
sus_ini_3
Sus ini 3
Distribution
157971 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| sus_ini_3 | Sus ini 3 | character | 157971 | 0.03024 | 10 | 0 | 6 | 27 | 0 |
LB_age_subs_onset_rec2
Lower bound age of substance onset recoded 2
Distribution
91846 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| LB_age_subs_onset_rec2 | Lower bound age of substance onset recoded 2 | numeric | 91846 | 0.4361713 | 5 | 5 | 5 | 5 | 0 | ▁▁▇▁▁ |
UB_age_subs_onset_rec2
Upper bound age of substance onset recoded 2
Distribution
91846 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| UB_age_subs_onset_rec2 | Upper bound age of substance onset recoded 2 | numeric | 91846 | 0.4361713 | 13 | 31 | 82 | 32.69688 | 9.823752 | ▅▇▃▁▁ |
age_subs_onset_rec2
Created in Step 3.
Distribution
6675 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| age_subs_onset_rec2 | Created in Step 3. | numeric | 6675 | 0.9590232 | 5 | 15 | 74 | 15.52318 | 5.12464 | ▇▁▁▁▁ |
biopsych_comp
Recoded biopsychosocial severity. Created from compromiso_biopsicosocial: e.g., “moderado”, “2-moderate”.
Distribution
1947 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| biopsych_comp | Recoded biopsychosocial severity. Created from compromiso_biopsicosocial: e.g., “moderado”, “2-moderate”. |
character | 1947 | 0.9880477 | 3 | 0 | 6 | 10 | 0 |
mod_psiq_cie_10
Created in Step 3, concatenated main and sub-dg to allow multiple diagnoses
Distribution
83784 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| mod_psiq_cie_10 | Created in Step 3, concatenated main and sub-dg to allow multiple diagnoses | character | 83784 | 0.4856627 | 1083 | 0 | 18 | 525 | 0 |
mod_psiq_dsm_iv
Created in Step 3, concatenated main and sub-dg to allow multiple diagnoses
Distribution
148878 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| mod_psiq_dsm_iv | Created in Step 3, concatenated main and sub-dg to allow multiple diagnoses | character | 148878 | 0.0860605 | 744 | 0 | 6 | 403 | 0 |
diagnostico_trs_fisico
Physical health diagnosis. Direct import. Often concatenated (e.g., “Hepatitis alcoholica: cardiopatías”). Preserved as-is for clinical profiling.
Distribution
310 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| diagnostico_trs_fisico | Physical health diagnosis. Direct import. Often concatenated (e.g., “Hepatitis alcoholica: cardiopatías”). Preserved as-is for clinical profiling. | character | 310 | 0.998097 | 15 | 0 | 3 | 60 | 0 |
otros_probl_at_sm_or
Other mental health issues (e.g., “abuso sexual”). Direct import. Recoded to otros_probl_at_sm_or. Used in vulnerability and trauma profiling.
Distribution
34796 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| otros_probl_at_sm_or | Other mental health issues (e.g., “abuso sexual”). Direct import. Recoded to otros_probl_at_sm_or. Used in vulnerability and trauma profiling. |
character | 34796 | 0.7863926 | 10 | 0 | 3 | 37 | 0 |
sub_dep_icd10_status
Severity of Substance Use Disorder (SUD)
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| sub_dep_icd10_status | Severity of Substance Use Disorder (SUD) | character | 0 | 1 | 2 | 0 | 15 | 21 | 0 |
evaluacindelprocesoteraputico
Therapeutic process evaluation (e.g., “logro alto”). Direct import. Recoded to standardized categories. Used in outcome and program effectiveness analysis.
Distribution
3599 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| evaluacindelprocesoteraputico | Therapeutic process evaluation (e.g., “logro alto”). Direct import. Recoded to standardized categories. Used in outcome and program effectiveness analysis. | character | 3599 | 0.9779063 | 3 | 0 | 10 | 16 | 0 |
eva_consumo
Evaluation of consumption pattern at discharge. Direct import. Part of multi-domain outcome assessment. Recoded similarly.
Distribution
3621 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| eva_consumo | Evaluation of consumption pattern at discharge. Direct import. Part of multi-domain outcome assessment. Recoded similarly. | character | 3621 | 0.9777712 | 3 | 0 | 10 | 16 | 0 |
eva_fam
Family situation evaluation at discharge. Direct import.
Distribution
3621 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| eva_fam | Family situation evaluation at discharge. Direct import. | character | 3621 | 0.9777712 | 3 | 0 | 10 | 16 | 0 |
eva_relinterp
Interpersonal relations evaluation at discharge. Direct import.
Distribution
3621 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| eva_relinterp | Interpersonal relations evaluation at discharge. Direct import. | character | 3621 | 0.9777712 | 3 | 0 | 10 | 16 | 0 |
eva_ocupacion
Occupational situation evaluation at discharge. Direct import.
Distribution
3621 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| eva_ocupacion | Occupational situation evaluation at discharge. Direct import. | character | 3621 | 0.9777712 | 3 | 0 | 10 | 16 | 0 |
eva_sm
Mental health evaluation at discharge. Direct import.
Distribution
3621 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| eva_sm | Mental health evaluation at discharge. Direct import. | character | 3621 | 0.9777712 | 3 | 0 | 10 | 16 | 0 |
eva_fisica
Physical health evaluation at discharge. Direct import.
Distribution
3621 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| eva_fisica | Physical health evaluation at discharge. Direct import. | character | 3621 | 0.9777712 | 3 | 0 | 10 | 16 | 0 |
eva_transgnorma
Social norm transgression evaluation at discharge. Direct import.
Distribution
3621 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| eva_transgnorma | Social norm transgression evaluation at discharge. Direct import. | character | 3621 | 0.9777712 | 3 | 0 | 10 | 16 | 0 |
dg_global_nec_int_soc_or
Global social integration need at admission. Direct import. Recoded to dg_global_nec_int_soc_or. Categorized as “altas”, “medias”, “bajas”. Used in social vulnerability index.
Distribution
47767 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| dg_global_nec_int_soc_or | Global social integration need at admission. Direct import. Recoded to dg_global_nec_int_soc_or. Categorized as “altas”, “medias”, “bajas”. Used in social vulnerability index. |
character | 47767 | 0.7067656 | 5 | 0 | 5 | 12 | 0 |
dg_nec_int_soc_cap_hum_or
Human capital need at admission. Direct import. Recoded to dg_nec_int_soc_cap_hum_or. Part of social integration profile.
Distribution
47768 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| dg_nec_int_soc_cap_hum_or | Human capital need at admission. Direct import. Recoded to dg_nec_int_soc_cap_hum_or. Part of social integration profile. |
character | 47768 | 0.7067595 | 5 | 0 | 5 | 12 | 0 |
dg_nec_int_soc_cap_fis_or
Physical capital need at admission. Direct import. Recoded to dg_nec_int_soc_cap_fis_or.
Distribution
47767 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| dg_nec_int_soc_cap_fis_or | Physical capital need at admission. Direct import. Recoded to dg_nec_int_soc_cap_fis_or. |
character | 47767 | 0.7067656 | 5 | 0 | 5 | 12 | 0 |
dg_nec_int_soc_cap_soc_or
Social capital need at admission. Direct import. Recoded to dg_nec_int_soc_cap_soc_or.
Distribution
47767 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| dg_nec_int_soc_cap_soc_or | Social capital need at admission. Direct import. Recoded to dg_nec_int_soc_cap_soc_or. |
character | 47767 | 0.7067656 | 5 | 0 | 5 | 12 | 0 |
dg_global_nec_int_soc_egr_or
Global social integration need at discharge. Direct import. Recoded similarly to admission version (_60). Used for delta/change analysis.
Distribution
45847 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| dg_global_nec_int_soc_egr_or | Global social integration need at discharge. Direct import. Recoded similarly to admission version (_60). Used for delta/change analysis. | character | 45847 | 0.7185522 | 5 | 0 | 5 | 12 | 0 |
dg_nec_int_soc_cap_hum_egr_or
Human capital need at discharge. Direct import.
Distribution
45165 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| dg_nec_int_soc_cap_hum_egr_or | Human capital need at discharge. Direct import. | character | 45165 | 0.7227389 | 5 | 0 | 5 | 12 | 0 |
dg_nec_int_soc_cap_fis_egr_or
Physical capital need at discharge. Direct import.
Distribution
45144 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| dg_nec_int_soc_cap_fis_egr_or | Physical capital need at discharge. Direct import. | character | 45144 | 0.7228678 | 5 | 0 | 5 | 12 | 0 |
dg_nec_int_soc_cap_soc_egr_or
Social capital need at discharge. Direct import.
Distribution
45285 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| dg_nec_int_soc_cap_soc_egr_or | Social capital need at discharge. Direct import. | character | 45285 | 0.7220022 | 5 | 0 | 5 | 12 | 0 |
usuario_tribunal_trat_droga
Court-referred to drug treatment (very unbalanced, ~1% yes)
Distribution
5888 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| usuario_tribunal_trat_droga | Court-referred to drug treatment (very unbalanced, ~1% yes) | character | 5888 | 0.9638545 | 2 | 0 | 2 | 2 | 0 |
nationality_cons
Recoded/standardized from nacionalidad or pais_nacimiento.
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| nationality_cons | Recoded/standardized from nacionalidad or pais_nacimiento. | character | 0 | 1 | 83 | 0 | 4 | 31 | 0 |
ethnicity_c1_c6_historic
Concatenated unique non-Chilean/non-“no pertenece” values from etnia (C1) and ethnicity (C6), grouped by hash_key.
Distribution
150808 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| ethnicity_c1_c6_historic | Concatenated unique non-Chilean/non-“no pertenece” values from etnia (C1) and ethnicity (C6), grouped by hash_key. | character | 150808 | 0.0742125 | 31 | 0 | 5 | 22 | 0 |
discapacidad
Indicator of disability (“si”/“no”). Direct import. Used with opcion_discapacidad for disability profiling. Starts coding in 2019
Distribution
99545 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| discapacidad | Indicator of disability (“si”/“no”). Direct import. Used with opcion_discapacidad for disability profiling. Starts coding in 2019 |
character | 99545 | 0.3889083 | 2 | 0 | 2 | 2 | 0 |
opcion_discapacidad
Type of disability (e.g., “de causa psiquica”). Direct import. Only populated if discapacidad == "si". Used in accessibility and needs assessment.Starts coding in 2019
Distribution
160052 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| opcion_discapacidad | Type of disability (e.g., “de causa psiquica”). Direct import. Only populated if discapacidad == "si". Used in accessibility and needs assessment.Starts coding in 2019 |
character | 160052 | 0.017465 | 5 | 0 | 16 | 21 | 0 |
sex_rec
Sex (often used for biological categorization)
Distribution
4 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| sex_rec | Sex (often used for biological categorization) | character | 4 | 0.9999754 | 2 | 0 | 5 | 6 | 0 |
identidad_de_genero
Gender identity (e.g., “masculino”, “femenino”). Direct import. Recoded to identidad_de_genero factor. Used to validate/override sexo in sex_rec (Step 2). Starts coding in 2019
Distribution
99545 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| identidad_de_genero | Gender identity (e.g., “masculino”, “femenino”). Direct import. Recoded to identidad_de_genero factor. Used to validate/override sexo in sex_rec (Step 2). Starts coding in 2019 |
character | 99545 | 0.3889083 | 8 | 0 | 8 | 19 | 0 |
orientacion_sexual
Sexual orientation (e.g., “heterosexual”). Direct import. Sparse. Preserved for inclusivity analysis.Starts coding in 2022
Distribution
129745 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| orientacion_sexual | Sexual orientation (e.g., “heterosexual”). Direct import. Sparse. Preserved for inclusivity analysis.Starts coding in 2022 | character | 129745 | 0.2035151 | 5 | 0 | 7 | 12 | 0 |
pregnant
Recoded pregnancy status at admission. Created from se_trata_de_una_mujer_embarazada (NA = “no”). Used in gender-specific analysis (Step 3). This variable (indicating if a woman is pregnant) seems to beintroduced in 2011.
Distribution
87172 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| pregnant | Recoded pregnancy status at admission. Created from se_trata_de_una_mujer_embarazada (NA = “no”). Used in gender-specific analysis (Step 3). This variable (indicating if a woman is pregnant) seems to beintroduced in 2011. |
character | 87172 | 0.4648643 | 2 | 0 | 2 | 3 | 0 |
pregnant_disch
Recoded pregnancy status at discharge. Created from ha_estado_embarazada_egreso. Used in gender-specific analysis (Step 3). Sparse. Seems to be only accurately collected starting in 2017. Created to capture pregnancy status specifically at treatment discharge.
Distribution
100935 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| pregnant_disch | Recoded pregnancy status at discharge. Created from ha_estado_embarazada_egreso. Used in gender-specific analysis (Step 3). Sparse. Seems to be only accurately collected starting in 2017. Created to capture pregnancy status specifically at treatment discharge. |
character | 100935 | 0.3803753 | 2 | 0 | 2 | 3 | 0 |
marital_status
Marital status
Distribution
317 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| marital_status | Marital status | character | 317 | 0.998054 | 4 | 0 | 6 | 27 | 0 |
tiene_menores_de_edad_a_cargo
Indicator if responsible for minors (“si”/“no”). Direct import. Mostly missing before 2015 (Duplicates_25.txt). Recoded to binary flag. Used in family vulnerability index.
Distribution
61317 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| tiene_menores_de_edad_a_cargo | Indicator if responsible for minors (“si”/“no”). Direct import. Mostly missing before 2015 (Duplicates_25.txt). Recoded to binary flag. Used in family vulnerability index. | character | 61317 | 0.6235842 | 2 | 0 | 2 | 2 | 0 |
num_hijos_trat_res
Number of children entering residential treatment with patient. Direct import. Often missing or 0. Later used to derive binary flag hijos_trat_res (“Had children in treatments”).
Distribution
9266 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| num_hijos_trat_res | Number of children entering residential treatment with patient. Direct import. Often missing or 0. Later used to derive binary flag hijos_trat_res (“Had children in treatments”). |
numeric | 9266 | 0.9431174 | 0 | 0 | 11 | 0.0478289 | 0.2730079 | ▇▁▁▁▁ |
numero_de_hijos
Number of children reported by patient. Direct import. Values >11 flagged as implausible (Duplicates_25.txt). Used in kNN imputation and to derive tiene_menores_de_edad_a_cargo.
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| numero_de_hijos | Number of children reported by patient. Direct import. Values >11 flagged as implausible (Duplicates_25.txt). Used in kNN imputation and to derive tiene_menores_de_edad_a_cargo. |
numeric | 0 | 1 | 0 | 1 | 71 | 1.663032 | 1.705014 | ▇▁▁▁▁ |
con_quien_vive
Living arrangement/Cohabitation status
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| con_quien_vive | Living arrangement/Cohabitation status | character | 0 | 1 | 15 | 0 | 4 | 53 | 0 |
tipo_de_vivienda
Type of housing
Distribution
12176 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| tipo_de_vivienda | Type of housing | character | 12176 | 0.9252534 | 10 | 0 | 4 | 37 | 0 |
precariedad_vivienda
Precarious housing. Direct import. Available since 2022 databases.
Distribution
130860 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| precariedad_vivienda | Precarious housing. Direct import. Available since 2022 databases. | character | 130860 | 0.1966703 | 2 | 0 | 37 | 82 | 0 |
tenure_status_household
Housing situation
Distribution
7738 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| tenure_status_household | Housing situation | character | 7738 | 0.9524976 | 5 | 0 | 6 | 42 | 0 |
servicios_basicos_95
Basic sanitation services. Direct import. Available since 2022 databases.
Distribution
130860 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| servicios_basicos_95 | Basic sanitation services. Direct import. Available since 2022 databases. | character | 130860 | 0.1966703 | 2 | 0 | 55 | 135 | 0 |
perso_dormitorio_vivienda
Persons per dwelling bedroom Direct import. Available since 2022 databases.
Distribution
130860 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| perso_dormitorio_vivienda | Persons per dwelling bedroom Direct import. Available since 2022 databases. | character | 130860 | 0.1966703 | 2 | 0 | 20 | 20 | 0 |
ed_attainment
Recoded education level. Created from escolaridad_ultimo_ano_cursado into standardized categories (e.g., “media incompleta” = “2-Completed high school or less”).
Distribution
1105 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| ed_attainment | Recoded education level. Created from escolaridad_ultimo_ano_cursado into standardized categories (e.g., “media incompleta” = “2-Completed high school or less”). |
character | 1105 | 0.9932166 | 3 | 0 | 23 | 34 | 0 |
occupation_condition
Recoded employment status. Created from condicion_ocupacional: e.g., “cesante”= “unemployed” (in Step 1).
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| occupation_condition | Recoded employment status. Created from condicion_ocupacional: e.g., “cesante”= “unemployed” (in Step 1). |
character | 0 | 1 | 3 | 0 | 8 | 10 | 0 |
occupation_status
Recoded employment category. Created from categoria_ocupacional. Sparse; often NA.
Distribution
107379 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| occupation_status | Recoded employment category. Created from categoria_ocupacional. Sparse; often NA. |
character | 107379 | 0.3408166 | 6 | 0 | 5 | 20 | 0 |
rubro_trabaja
Occupational sector (e.g., “trabajadores no calificados”). Direct import. Recoded to rubro_trabaja_mod. 68% missing. 13.8% among employeed
Distribution
109056 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| rubro_trabaja | Occupational sector (e.g., “trabajadores no calificados”). Direct import. Recoded to rubro_trabaja_mod. 68% missing. 13.8% among employeed |
character | 109056 | 0.3305217 | 11 | 0 | 21 | 77 | 0 |
laboral_ingresos
Monthly income. Direct import. Available since 2022 databases.
Distribution
148124 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| laboral_ingresos | Monthly income. Direct import. Available since 2022 databases. | character | 148124 | 0.0906892 | 11 | 0 | 16 | 19 | 0 |
yr_block
Birth year block (e.g., 1980, 1990) for anonymization or grouping. Created by rounding birth_date year to nearest decade (e.g., floor(year(birth_date)/10)*10). To look for data similarities (Step 1)
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| yr_block | Birth year block (e.g., 1980, 1990) for anonymization or grouping. Created by rounding birth_date year to nearest decade (e.g., floor(year(birth_date)/10)*10). To look for data similarities (Step 1) |
numeric | 0 | 1 | 1925 | 1980 | 2005 | 1978.161 | 11.40454 | ▁▁▅▇▃ |
OBS_series
OBS series
Distribution
153200 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| OBS_series | OBS series | character | 153200 | 0.0595284 | 993 | 0 | 45 | 999 | 0 |
senda_series
SENDA series
Distribution
153200 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| senda_series | SENDA series | character | 153200 | 0.0595284 | 4 | 0 | 2 | 5 | 0 |
pub_center_series
Public center series
Distribution
153200 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| pub_center_series | Public center series | character | 153200 | 0.0595284 | 4 | 0 | 4 | 10 | 0 |
id_centro_series
Center ID series
Distribution
153200 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| id_centro_series | Center ID series | character | 153200 | 0.0595284 | 2773 | 0 | 3 | 15 | 0 |
disch_date_num_rec6
Numeric discharge date recoded 6
Distribution
3270 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| disch_date_num_rec6 | Numeric discharge date recoded 6 | numeric | 3270 | 0.979926 | 12053 | 17527 | 20236 | 17566.2 | 1508.771 | ▁▂▇▇▇ |
dg_psiq_cie_10_instudy
Psychiatric comorbidity (ICD-10)
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | count | mean |
|---|---|---|---|---|---|---|
| dg_psiq_cie_10_instudy | Psychiatric comorbidity (ICD-10) | logical | 0 | 1 | FAL: 136904, TRU: 25993 | 0.1595671 |
dg_psiq_cie_10_dg
Mutually exclusive (as corrected in Step 3)
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | count | mean |
|---|---|---|---|---|---|---|
| dg_psiq_cie_10_dg | Mutually exclusive (as corrected in Step 3) | logical | 0 | 1 | FAL: 83784, TRU: 79113 | 0.4856627 |
dg_psiq_dsm_iv_instudy
Psychiatric comorbidity (DSM-IV)
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | count | mean |
|---|---|---|---|---|---|---|
| dg_psiq_dsm_iv_instudy | Psychiatric comorbidity (DSM-IV) | logical | 0 | 1 | FAL: 91558, TRU: 71339 | 0.4379393 |
dg_psiq_dsm_iv_dg
Mutually exclusive (as corrected in Step 3)
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | count | mean |
|---|---|---|---|---|---|---|
| dg_psiq_dsm_iv_dg | Mutually exclusive (as corrected in Step 3) | logical | 0 | 1 | FAL: 148878, TRU: 14019 | 0.0860605 |
plan_type_series
Plan type series
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| plan_type_series | Plan type series | character | 0 | 1 | 184 | 0 | 8 | 51 | 0 |
tr_compliance_rec7_series
Treatment compliance recoded 7 series
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| tr_compliance_rec7_series | Treatment compliance recoded 7 series | character | 0 | 1 | 40 | 0 | 12 | 77 | 0 |
referral_type_series
Referral type series
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| referral_type_series | Referral type series | character | 0 | 1 | 127 | 0 | 6 | 108 | 0 |
adm_age_rec3_series
Admission age recoded 3 series
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| adm_age_rec3_series | Admission age recoded 3 series | character | 0 | 1 | 15333 | 0 | 6 | 51 | 0 |
adm_date_rec2_series
Admission date recoded 2 series
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| adm_date_rec2_series | Admission date recoded 2 series | character | 0 | 1 | 14635 | 0 | 14 | 86 | 0 |
disch_date_rec6_series
Discharge date recoded 6 series
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| disch_date_rec6_series | Discharge date recoded 6 series | character | 0 | 1 | 14768 | 0 | 6 | 86 | 0 |
diagnostico_trs_fisico_series
Physical diagnosis series
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| diagnostico_trs_fisico_series | Physical diagnosis series | character | 0 | 1 | 362 | 0 | 6 | 198 | 0 |
otros_probl_at_sm_or_series
Other mental health problems series
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| otros_probl_at_sm_or_series | Other mental health problems series | character | 0 | 1 | 431 | 0 | 6 | 168 | 0 |
OBS
Observed flags or notes from cleaning (e.g., “Duplicated Cases”). Created during duplicate detection and resolution. Contains manual or automated flags for data quality issues, removed entries, etc. It is separated by semi-colons
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| OBS | Observed flags or notes from cleaning (e.g., “Duplicated Cases”). Created during duplicate detection and resolution. Contains manual or automated flags for data quality issues, removed entries, etc. It is separated by semi-colons | character | 0 | 1 | 1053 | 1 | 0 | 634 | 0 |
occupation_status_corr
Corrected employment status Created in Step 4. Only kept for employed; nulled for inactive/unemployed; filled as ‘other’ if missing. Depends on occupation_condition_inferred, occupation_status.
Distribution
85961 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| occupation_status_corr | Corrected employment status Created in Step 4. Only kept for employed; nulled for inactive/unemployed; filled as ‘other’ if missing. Depends on occupation_condition_inferred, occupation_status. | character | 85961 | 0.4722984 | 6 | 0 | 5 | 20 | 0 |
occupation_condition_corr24
Employment status
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| occupation_condition_corr24 | Employment status | character | 0 | 1 | 3 | 0 | 8 | 10 | 0 |
plan_type_corr
Treatment modality
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| plan_type_corr | Treatment modality | character | 0 | 1 | 5 | 0 | 4 | 6 | 0 |
ed_attainment_corr
Educational attainment
Distribution
1105 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| ed_attainment_corr | Educational attainment | character | 1105 | 0.9932166 | 3 | 0 | 23 | 34 | 0 |
polysubstance_strict
Polysubstance use
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| polysubstance_strict | Polysubstance use | numeric | 0 | 1 | 0 | 1 | 1 | 0.7185522 | 0.4497068 | ▃▁▁▁▇ |
icd10_diag1
ICD10 diagnosis 1 Created in Step 4. Separated from mod_psiq_cie_10 by “::”. Depends on mod_psiq_cie_10.
Distribution
148878 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| icd10_diag1 | ICD10 diagnosis 1 Created in Step 4. Separated from mod_psiq_cie_10 by “::”. Depends on mod_psiq_cie_10. | character | 148878 | 0.0860605 | 94 | 0 | 6 | 149 | 0 |
icd10_diag2
ICD10 diagnosis 2 Created in Step 4. Separated from mod_psiq_cie_10 by “::”. Depends on mod_psiq_cie_10.
Distribution
161349 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| icd10_diag2 | ICD10 diagnosis 2 Created in Step 4. Separated from mod_psiq_cie_10 by “::”. Depends on mod_psiq_cie_10. | character | 161349 | 0.0095029 | 84 | 0 | 7 | 202 | 0 |
icd10_diag3
ICD10 diagnosis 3 Created in Step 4. Separated from mod_psiq_cie_10 “::”. Depends on mod_psiq_cie_10.
Distribution
162566 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| icd10_diag3 | ICD10 diagnosis 3 Created in Step 4. Separated from mod_psiq_cie_10 “::”. Depends on mod_psiq_cie_10. | character | 162566 | 0.002032 | 46 | 0 | 25 | 129 | 0 |
dsmiv_diag1
DSMIV diagnosis 1 Created in Step 4. Separated from mod_psiq_dsm_iv “::”. Depends on mod_psiq_dsm_iv.
Distribution
148878 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| dsmiv_diag1 | DSMIV diagnosis 1 Created in Step 4. Separated from mod_psiq_dsm_iv “::”. Depends on mod_psiq_dsm_iv. | character | 148878 | 0.0860605 | 94 | 0 | 6 | 149 | 0 |
dsmiv_diag2
DSMIV diagnosis 2 Created in Step 4. Separated from mod_psiq_dsm_iv “::”. Depends on mod_psiq_dsm_iv.
Distribution
161349 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| dsmiv_diag2 | DSMIV diagnosis 2 Created in Step 4. Separated from mod_psiq_dsm_iv “::”. Depends on mod_psiq_dsm_iv. | character | 161349 | 0.0095029 | 84 | 0 | 7 | 202 | 0 |
dsmiv_diag3
DSMIV diagnosis 3 Created in Step 4. Separated from mod_psiq_dsm_iv “::”. Depends on mod_psiq_dsm_iv.
Distribution
162566 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| dsmiv_diag3 | DSMIV diagnosis 3 Created in Step 4. Separated from mod_psiq_dsm_iv “::”. Depends on mod_psiq_dsm_iv. | character | 162566 | 0.002032 | 46 | 0 | 25 | 129 | 0 |
adm_year
Admission year
Distribution
0 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| adm_year | Admission year | numeric | 0 | 1 | 2000 | 2017 | 2024 | 2017.116 | 4.182583 | ▁▁▆▇▆ |
porc_pobr
Poverty index of the commune of residence [log1p(x)=porc_pobr_log, centered= porc_pobr_c, six quantile groups= porc_pobr_c_cat6]
Distribution
3 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| porc_pobr | Poverty index of the commune of residence [log1p(x)=porc_pobr_log, centered= porc_pobr_c, six quantile groups= porc_pobr_c_cat6] | numeric | 3 | 0.9999816 | 0.0017 | 0.13 | 0.64 | 0.1418689 | 0.0648752 | ▇▇▁▁▁ |
clasificacion
Urbanization level of the commune of residence
Distribution
1 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| clasificacion | Urbanization level of the commune of residence | character | 1 | 0.9999939 | 3 | 0 | 5 | 6 | 0 |
km2_c
Centered km^2 Created in Step 4. Created in Step 4. km_2 - mean(km_2). Depends on km_2.km_2 - mean(km_2). Depends on km_2.
Distribution
1 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| km2_c | Centered km^2 Created in Step 4. Created in Step 4. km_2 - mean(km_2). Depends on km_2.km_2 - mean(km_2). Depends on km_2. | numeric | 1 | 0.9999939 | -2206 | -1981 | 46483 | 0 | 5897.095 | ▇▁▁▁▁ |
km2_log
Log km^2 Created in Step 4. Created in Step 4. km_2 - mean(km_2). Depends on km_2.log1p(km_2). Depends on km_2.
Distribution
1 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| km2_log | Log km^2 Created in Step 4. Created in Step 4. km_2 - mean(km_2). Depends on km_2.log1p(km_2). Depends on km_2. | numeric | 1 | 0.9999939 | 2.1 | 5.5 | 11 | 5.555289 | 2.179414 | ▆▇▇▃▂ |
porc_pobr_log
Log poverty Created in Step 3. Re-joined in Step 4. log1p(porc_pobr). Depends on porc_pobr.
Distribution
3 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| porc_pobr_log | Log poverty Created in Step 3. Re-joined in Step 4. log1p(porc_pobr). Depends on porc_pobr. | numeric | 3 | 0.9999816 | 0.0017 | 0.12 | 0.49 | 0.1311123 | 0.0552621 | ▅▇▂▁▁ |
porc_pobr_c
Centered poverty Created in Step 3. Re-joined in Step 4. porc_pobr - mean(porc_pobr). Depends on porc_pobr.
Distribution
3 missing values.
Summary statistics
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| porc_pobr_c | Centered poverty Created in Step 3. Re-joined in Step 4. porc_pobr - mean(porc_pobr). Depends on porc_pobr. | numeric | 3 | 0.9999816 | -0.14 | -0.0098 | 0.49 | 0 | 0.0648752 | ▇▇▁▁▁ |
Missingness report
Codebook table
JSON-LD metadata
The following JSON-LD can be found by search engines, if you share this codebook publicly on the web.
{
"datePublished": "2024-01-01",
"temporalCoverage": "2010-2024",
"spatialCoverage": "Chile",
"creator": [
{
"@type": "Person",
"givenName": "Álvaro",
"familyName": "Castillo-Carniglia",
"affiliation": {
"@type": "Organization",
"name": "Universidad San Sebastián, Chile"
}
},
{
"@type": "Person",
"givenName": "Andrés",
"familyName": "González-Santa Cruz",
"email": "gonzalez.santacruz.andres@gmail.com",
"affiliation": {
"@type": "Organization",
"name": "Universidad de Chile, Public Health"
}
},
{
"@type": "Person",
"givenName": "Amaru",
"familyName": "Agüero Jiménez",
"affiliation": {
"@type": "Organization",
"name": "Universidad del Desarrollo, Chile"
}
}
],
"citation": "Castillo-Carniglia, Á., González-Santa Cruz, A., & Agüero Jiménez, A. (2024). Workflow of data management and analysis - Chilean substance use treatment administrative data. Consolidation of Agreement 1 Databases from 2010 to 2024. Funded by FONDECYT regular 1191282.",
"url": "https://github.com/FONDECYTACC/cons2025",
"name": "Agreement 1 SENDA",
"description": "Information About Agreement 1 of SENDA and MINSAL\n\n\n## Table of variables\nThis table contains variable names, labels, and number of missing values.\nSee the complete codebook for more.\n\n[truncated]\n\n### Note\nThis dataset was automatically described using the [codebook R package](https://rubenarslan.github.io/codebook/) (version 0.9.6).",
"keywords": ["TABLE", "TABLE_rec_series", "rn", "rn_series", "num_trat_ant", "fecha_ultimo_tratamiento", "hash_key", "min_adm_age_rec3", "adm_age_rec3", "birth_date_rec", "adm_date_num_rec2", "adm_date_rec2", "dit_rec6", "disch_date_rec6", "disch_date_num_rec6_trans", "def_date", "adm_motive", "tr_compliance_rec7", "adm_disch_reason", "referral_type", "plan_type", "id_centro", "senda", "pub_center", "primary_sub", "second_sub1", "second_sub2", "second_sub3", "prim_sub_freq", "prim_sub_route", "LB_age_primary_onset_rec2", "UB_age_primary_onset_rec2", "age_primary_onset_rec2", "first_sub_used", "sus_ini_mod_mvv", "sus_ini_1", "sus_ini_2", "sus_ini_3", "LB_age_subs_onset_rec2", "UB_age_subs_onset_rec2", "age_subs_onset_rec2", "biopsych_comp", "mod_psiq_cie_10", "mod_psiq_dsm_iv", "diagnostico_trs_fisico", "otros_probl_at_sm_or", "sub_dep_icd10_status", "evaluacindelprocesoteraputico", "eva_consumo", "eva_fam", "eva_relinterp", "eva_ocupacion", "eva_sm", "eva_fisica", "eva_transgnorma", "dg_global_nec_int_soc_or", "dg_nec_int_soc_cap_hum_or", "dg_nec_int_soc_cap_fis_or", "dg_nec_int_soc_cap_soc_or", "dg_global_nec_int_soc_egr_or", "dg_nec_int_soc_cap_hum_egr_or", "dg_nec_int_soc_cap_fis_egr_or", "dg_nec_int_soc_cap_soc_egr_or", "usuario_tribunal_trat_droga", "nationality_cons", "ethnicity_c1_c6_historic", "discapacidad", "opcion_discapacidad", "sex_rec", "identidad_de_genero", "orientacion_sexual", "pregnant", "pregnant_disch", "marital_status", "tiene_menores_de_edad_a_cargo", "num_hijos_trat_res", "numero_de_hijos", "con_quien_vive", "tipo_de_vivienda", "precariedad_vivienda", "tenure_status_household", "servicios_basicos_95", "perso_dormitorio_vivienda", "ed_attainment", "occupation_condition", "occupation_status", "rubro_trabaja", "laboral_ingresos", "yr_block", "OBS_series", "senda_series", "pub_center_series", "id_centro_series", "disch_date_num_rec6", "dg_psiq_cie_10_instudy", "dg_psiq_cie_10_dg", "dg_psiq_dsm_iv_instudy", "dg_psiq_dsm_iv_dg", "plan_type_series", "tr_compliance_rec7_series", "referral_type_series", "adm_age_rec3_series", "adm_date_rec2_series", "disch_date_rec6_series", "diagnostico_trs_fisico_series", "otros_probl_at_sm_or_series", "OBS", "occupation_status_corr", "occupation_condition_corr24", "plan_type_corr", "ed_attainment_corr", "polysubstance_strict", "icd10_diag1", "icd10_diag2", "icd10_diag3", "dsmiv_diag1", "dsmiv_diag2", "dsmiv_diag3", "adm_year", "porc_pobr", "clasificacion", "km2_c", "km2_log", "porc_pobr_log", "porc_pobr_c"],
"@context": "https://schema.org/",
"@type": "Dataset",
"variableMeasured": [
{
"name": "TABLE",
"description": "Year of data collection (original SISTRAT table/year identifier). Represents the year the record was entered (e.g., \"2023\", \"2015\"). No transformation applied. Made in import_c1_top_data_adm_25.qmd",
"@type": "propertyValue"
},
{
"name": "TABLE_rec_series",
"description": "Recoded series of TABLE",
"@type": "propertyValue"
},
{
"name": "rn",
"description": "Row number or treatment episode sequence per patient. Created by `row_number() over (partition by hash_key order by adm_date)`, Step 1 (SISTRAT23_c1_2010_2024_df2). Identifies first, second, etc., treatment for each user.",
"@type": "propertyValue"
},
{
"name": "rn_series",
"description": "Series of row number",
"@type": "propertyValue"
},
{
"name": "num_trat_ant",
"description": "Number of prior treatments reported by patient. Direct import. May not be reliable. Not used.",
"@type": "propertyValue"
},
{
"name": "fecha_ultimo_tratamiento",
"description": "Patient-reported time since last treatment (e.g., \"3 a 4 anos\"). Direct import. Categorical, not numeric. Many missing values (~50%)",
"@type": "propertyValue"
},
{
"name": "hash_key",
"description": "Unique anonymized identifier for each individual (user/patient RUT derivative). Direct import from raw SISTRAT C1. Originally in uppercase letter. Generated by hashing the RUT (national ID) by MINSAL. Used to link multiple treatment episodes for the same person across years. No transformation.",
"@type": "propertyValue"
},
{
"name": "min_adm_age_rec3",
"description": "min(adm_age_rec3) grouped by hash_key. Critical benchmark for validating/imputing onset ages.",
"@type": "propertyValue"
},
{
"name": "adm_age_rec3",
"description": "Admission age [log1p(x)=adm_age_log, centered= adm_age_c,^2=adm_age_pow2, ^3=adm_age_pow3, three groups= adm_age_rec3_cat]",
"@type": "propertyValue"
},
{
"name": "birth_date_rec",
"description": "Final authoritative date of birth. Created by selecting/imputing best `birth_date` per `hash_key` (via `kNN`, logic, or min/max validation). Used to recalculate `adm_age_rec`.",
"@type": "propertyValue"
},
{
"name": "adm_date_num_rec2",
"description": "Numeric admission date recoded version 2",
"@type": "propertyValue"
},
{
"name": "adm_date_rec2",
"description": "Intermediate admission date Created in Step 2 (0.c) for the resolution of overlaps",
"@type": "propertyValue"
},
{
"name": "dit_rec6",
"description": "Length of stay in treatment (months) [log1p(x)=treat_log, ^2=treat_days_pow2, ^3=treat_days_pow3, <90days=treat_lt_90]",
"@type": "propertyValue"
},
{
"name": "disch_date_rec6",
"description": "Final discharge date for longitudinal analysis. Created as selected consistent version for time-to-event modeling.",
"@type": "propertyValue"
},
{
"name": "disch_date_num_rec6_trans",
"description": "Imputed discharge based on reference date Created in Step 3. Missing discharge dates were filled using a default value calculated from a fixed retrieval date (e.g., May 28, 2025), ensuring consistency across all treatment records and uncovering censorship mechanisms.",
"@type": "propertyValue"
},
{
"name": "def_date",
"description": "Date of death (if applicable, mostly NA). Created from external mortality linkage (not in provided scripts). Parsed from mortality registry (dia_def, mes_def, ano_def), imputing missing day as 15; earliest date kept when multiple. Used to censor discharges after death. See Duplicates_25_2.qmd (pre-00.a).",
"@type": "propertyValue"
},
{
"name": "adm_motive",
"description": "Admission motive",
"@type": "propertyValue"
},
{
"name": "tr_compliance_rec7",
"description": "Treatment compliance category. Created in Step 4. Removed cases (n=3) with an ongoing treatment motive but with a posterior treatment after. Replaced for dropout (they had incomplete discharge information)",
"@type": "propertyValue"
},
{
"name": "adm_disch_reason",
"description": "Placeholder for administrative discharge reason (mostly empty). Created as copy of `motivo_de_egreso_alta_administrativa`. Derived/translated from motivo_de_egreso_alta_administrativa; listed among deduplication key variables in Step 2. 47%-56% missing in patients with adm. discharge.",
"@type": "propertyValue"
},
{
"name": "referral_type",
"description": "Recoded type of referral center. Created from `tipo_centro_derivacion` (translation): e.g., \"otro centro\" , \"other facility\", \"cosam\", \"primary health care\".",
"@type": "propertyValue"
},
{
"name": "plan_type",
"description": "Recoded treatment plan type. Created as copy and translation of `tipo_de_plan` (step 1). Used in collapsing logic and outcome modeling.",
"@type": "propertyValue"
},
{
"name": "id_centro",
"description": "Center ID number. Direct import. Used to link center attributes, collapse duplicates, and derive `centro_muj` (women-specific centers).",
"@type": "propertyValue"
},
{
"name": "senda",
"description": "Indicator if treatment was under SENDA agreement (\"si\"/\"no\"). Direct import. Used to derive `senda_adm_date` and flag SENDA-specific episodes. No transformation initially. According to SENDA professionals, patients may be in SENDA Yes programs may anyways part of SENDA programs in some cases",
"@type": "propertyValue"
},
{
"name": "pub_center",
"description": "Binary indicator for public center. Created as `tipo_centro == \"publico\"`. Used in institutional comparisons.",
"value": "1. FALSE,\n2. TRUE",
"@type": "propertyValue"
},
{
"name": "primary_sub",
"description": "Primary substance of use",
"@type": "propertyValue"
},
{
"name": "second_sub1",
"description": "Normalized secondary substances at admission (1)",
"@type": "propertyValue"
},
{
"name": "second_sub2",
"description": "Normalized secondary substances at admission (2)",
"@type": "propertyValue"
},
{
"name": "second_sub3",
"description": "Normalized secondary substances at admission (3)",
"@type": "propertyValue"
},
{
"name": "prim_sub_freq",
"description": "Frequency of primary substance use at admission (recoded: prim_sub_freq_rec)",
"@type": "propertyValue"
},
{
"name": "prim_sub_route",
"description": "Recoded route of administration. Created from `via_administracion_sustancia_principal` into standardized terms.",
"@type": "propertyValue"
},
{
"name": "LB_age_primary_onset_rec2",
"description": "Lower bound age of primary substance onset recoded 2",
"@type": "propertyValue"
},
{
"name": "UB_age_primary_onset_rec2",
"description": "Upper bound age of primary substance onset recoded 2",
"@type": "propertyValue"
},
{
"name": "age_primary_onset_rec2",
"description": "Created in Step 3.",
"@type": "propertyValue"
},
{
"name": "first_sub_used",
"description": "Normalized first substance used. Created as `tolower(gsub(\"_\", \" \", sustancia_de_inicio))`. Input for resolving inconsistencies, prioritizing most vulnerable value = `sus_ini_mod_mvv`. Despite this, this variable was normalized in Step 3 for inconsistent values in patients.",
"@type": "propertyValue"
},
{
"name": "sus_ini_mod_mvv",
"description": "Sus ini mod mvv",
"value": "1. cocaine paste,\n2. cocaine powder,\n3. alcohol,\n4. marijuana,\n5. others",
"@type": "propertyValue"
},
{
"name": "sus_ini_1",
"description": "Sus ini 1",
"@type": "propertyValue"
},
{
"name": "sus_ini_2",
"description": "Sus ini 2",
"@type": "propertyValue"
},
{
"name": "sus_ini_3",
"description": "Sus ini 3",
"@type": "propertyValue"
},
{
"name": "LB_age_subs_onset_rec2",
"description": "Lower bound age of substance onset recoded 2",
"@type": "propertyValue"
},
{
"name": "UB_age_subs_onset_rec2",
"description": "Upper bound age of substance onset recoded 2",
"@type": "propertyValue"
},
{
"name": "age_subs_onset_rec2",
"description": "Created in Step 3.",
"@type": "propertyValue"
},
{
"name": "biopsych_comp",
"description": "Recoded biopsychosocial severity. Created from `compromiso_biopsicosocial`: e.g., \"moderado\", \"2-moderate\".",
"@type": "propertyValue"
},
{
"name": "mod_psiq_cie_10",
"description": "Created in Step 3, concatenated main and sub-dg to allow multiple diagnoses",
"@type": "propertyValue"
},
{
"name": "mod_psiq_dsm_iv",
"description": "Created in Step 3, concatenated main and sub-dg to allow multiple diagnoses",
"@type": "propertyValue"
},
{
"name": "diagnostico_trs_fisico",
"description": "Physical health diagnosis. Direct import. Often concatenated (e.g., \"Hepatitis alcoholica: cardiopatías\"). Preserved as-is for clinical profiling.",
"@type": "propertyValue"
},
{
"name": "otros_probl_at_sm_or",
"description": "Other mental health issues (e.g., \"abuso sexual\"). Direct import. Recoded to `otros_probl_at_sm_or`. Used in vulnerability and trauma profiling.",
"@type": "propertyValue"
},
{
"name": "sub_dep_icd10_status",
"description": "Severity of Substance Use Disorder (SUD)",
"@type": "propertyValue"
},
{
"name": "evaluacindelprocesoteraputico",
"description": "Therapeutic process evaluation (e.g., \"logro alto\"). Direct import. Recoded to standardized categories. Used in outcome and program effectiveness analysis.",
"@type": "propertyValue"
},
{
"name": "eva_consumo",
"description": "Evaluation of consumption pattern at discharge. Direct import. Part of multi-domain outcome assessment. Recoded similarly.",
"@type": "propertyValue"
},
{
"name": "eva_fam",
"description": "Family situation evaluation at discharge. Direct import.",
"@type": "propertyValue"
},
{
"name": "eva_relinterp",
"description": "Interpersonal relations evaluation at discharge. Direct import.",
"@type": "propertyValue"
},
{
"name": "eva_ocupacion",
"description": "Occupational situation evaluation at discharge. Direct import.",
"@type": "propertyValue"
},
{
"name": "eva_sm",
"description": "Mental health evaluation at discharge. Direct import.",
"@type": "propertyValue"
},
{
"name": "eva_fisica",
"description": "Physical health evaluation at discharge. Direct import.",
"@type": "propertyValue"
},
{
"name": "eva_transgnorma",
"description": "Social norm transgression evaluation at discharge. Direct import.",
"@type": "propertyValue"
},
{
"name": "dg_global_nec_int_soc_or",
"description": "Global social integration need at admission. Direct import. Recoded to `dg_global_nec_int_soc_or`. Categorized as \"altas\", \"medias\", \"bajas\". Used in social vulnerability index.",
"@type": "propertyValue"
},
{
"name": "dg_nec_int_soc_cap_hum_or",
"description": "Human capital need at admission. Direct import. Recoded to `dg_nec_int_soc_cap_hum_or`. Part of social integration profile.",
"@type": "propertyValue"
},
{
"name": "dg_nec_int_soc_cap_fis_or",
"description": "Physical capital need at admission. Direct import. Recoded to `dg_nec_int_soc_cap_fis_or`.",
"@type": "propertyValue"
},
{
"name": "dg_nec_int_soc_cap_soc_or",
"description": "Social capital need at admission. Direct import. Recoded to `dg_nec_int_soc_cap_soc_or`.",
"@type": "propertyValue"
},
{
"name": "dg_global_nec_int_soc_egr_or",
"description": "Global social integration need at discharge. Direct import. Recoded similarly to admission version (_60). Used for delta/change analysis.",
"@type": "propertyValue"
},
{
"name": "dg_nec_int_soc_cap_hum_egr_or",
"description": "Human capital need at discharge. Direct import.",
"@type": "propertyValue"
},
{
"name": "dg_nec_int_soc_cap_fis_egr_or",
"description": "Physical capital need at discharge. Direct import.",
"@type": "propertyValue"
},
{
"name": "dg_nec_int_soc_cap_soc_egr_or",
"description": "Social capital need at discharge. Direct import.",
"@type": "propertyValue"
},
{
"name": "usuario_tribunal_trat_droga",
"description": "Court-referred to drug treatment (very unbalanced, ~1% yes)",
"@type": "propertyValue"
},
{
"name": "nationality_cons",
"description": "Recoded/standardized from nacionalidad or pais_nacimiento.",
"@type": "propertyValue"
},
{
"name": "ethnicity_c1_c6_historic",
"description": "Concatenated unique non-Chilean/non-\"no pertenece\" values from etnia (C1) and ethnicity (C6), grouped by hash_key.",
"@type": "propertyValue"
},
{
"name": "discapacidad",
"description": "Indicator of disability (\"si\"/\"no\"). Direct import. Used with `opcion_discapacidad` for disability profiling. Starts coding in 2019",
"@type": "propertyValue"
},
{
"name": "opcion_discapacidad",
"description": "Type of disability (e.g., \"de causa psiquica\"). Direct import. Only populated if `discapacidad == \"si\"`. Used in accessibility and needs assessment.Starts coding in 2019",
"@type": "propertyValue"
},
{
"name": "sex_rec",
"description": "Sex (often used for biological categorization)",
"@type": "propertyValue"
},
{
"name": "identidad_de_genero",
"description": "Gender identity (e.g., \"masculino\", \"femenino\"). Direct import. Recoded to `identidad_de_genero` factor. Used to validate/override `sexo` in `sex_rec` (Step 2). Starts coding in 2019",
"@type": "propertyValue"
},
{
"name": "orientacion_sexual",
"description": "Sexual orientation (e.g., \"heterosexual\"). Direct import. Sparse. Preserved for inclusivity analysis.Starts coding in 2022",
"@type": "propertyValue"
},
{
"name": "pregnant",
"description": "Recoded pregnancy status at admission. Created from `se_trata_de_una_mujer_embarazada` (NA = \"no\"). Used in gender-specific analysis (Step 3). This variable (indicating if a woman is pregnant) seems to beintroduced in 2011.",
"@type": "propertyValue"
},
{
"name": "pregnant_disch",
"description": "Recoded pregnancy status at discharge. Created from `ha_estado_embarazada_egreso`. Used in gender-specific analysis (Step 3). Sparse. Seems to be only accurately collected starting in 2017. Created to capture pregnancy status specifically at treatment discharge.",
"@type": "propertyValue"
},
{
"name": "marital_status",
"description": "Marital status",
"@type": "propertyValue"
},
{
"name": "tiene_menores_de_edad_a_cargo",
"description": "Indicator if responsible for minors (\"si\"/\"no\"). Direct import. Mostly missing before 2015 (Duplicates_25.txt). Recoded to binary flag. Used in family vulnerability index.",
"@type": "propertyValue"
},
{
"name": "num_hijos_trat_res",
"description": "Number of children entering residential treatment with patient. Direct import. Often missing or 0. Later used to derive binary flag `hijos_trat_res` (\"Had children in treatments\").",
"@type": "propertyValue"
},
{
"name": "numero_de_hijos",
"description": "Number of children reported by patient. Direct import. Values >11 flagged as implausible (Duplicates_25.txt). Used in `kNN` imputation and to derive `tiene_menores_de_edad_a_cargo`.",
"@type": "propertyValue"
},
{
"name": "con_quien_vive",
"description": "Living arrangement/Cohabitation status",
"@type": "propertyValue"
},
{
"name": "tipo_de_vivienda",
"description": "Type of housing",
"@type": "propertyValue"
},
{
"name": "precariedad_vivienda",
"description": "Precarious housing. Direct import. Available since 2022 databases.",
"@type": "propertyValue"
},
{
"name": "tenure_status_household",
"description": "Housing situation",
"@type": "propertyValue"
},
{
"name": "servicios_basicos_95",
"description": "Basic sanitation services. Direct import. Available since 2022 databases.",
"@type": "propertyValue"
},
{
"name": "perso_dormitorio_vivienda",
"description": "Persons per dwelling bedroom Direct import. Available since 2022 databases.",
"@type": "propertyValue"
},
{
"name": "ed_attainment",
"description": "Recoded education level. Created from `escolaridad_ultimo_ano_cursado` into standardized categories (e.g., \"media incompleta\" = \"2-Completed high school or less\").",
"@type": "propertyValue"
},
{
"name": "occupation_condition",
"description": "Recoded employment status. Created from `condicion_ocupacional`: e.g., \"cesante\"= \"unemployed\" (in Step 1).",
"@type": "propertyValue"
},
{
"name": "occupation_status",
"description": "Recoded employment category. Created from `categoria_ocupacional`. Sparse; often NA.",
"@type": "propertyValue"
},
{
"name": "rubro_trabaja",
"description": "Occupational sector (e.g., \"trabajadores no calificados\"). Direct import. Recoded to `rubro_trabaja_mod`. 68% missing. 13.8% among employeed",
"@type": "propertyValue"
},
{
"name": "laboral_ingresos",
"description": "Monthly income. Direct import. Available since 2022 databases.",
"@type": "propertyValue"
},
{
"name": "yr_block",
"description": "Birth year block (e.g., 1980, 1990) for anonymization or grouping. Created by rounding `birth_date` year to nearest decade (e.g., `floor(year(birth_date)/10)*10`). To look for data similarities (Step 1)",
"@type": "propertyValue"
},
{
"name": "OBS_series",
"description": "OBS series",
"@type": "propertyValue"
},
{
"name": "senda_series",
"description": "SENDA series",
"@type": "propertyValue"
},
{
"name": "pub_center_series",
"description": "Public center series",
"@type": "propertyValue"
},
{
"name": "id_centro_series",
"description": "Center ID series",
"@type": "propertyValue"
},
{
"name": "disch_date_num_rec6",
"description": "Numeric discharge date recoded 6",
"@type": "propertyValue"
},
{
"name": "dg_psiq_cie_10_instudy",
"description": "Psychiatric comorbidity (ICD-10)",
"@type": "propertyValue"
},
{
"name": "dg_psiq_cie_10_dg",
"description": "Mutually exclusive (as corrected in Step 3)",
"@type": "propertyValue"
},
{
"name": "dg_psiq_dsm_iv_instudy",
"description": "Psychiatric comorbidity (DSM-IV)",
"@type": "propertyValue"
},
{
"name": "dg_psiq_dsm_iv_dg",
"description": "Mutually exclusive (as corrected in Step 3)",
"@type": "propertyValue"
},
{
"name": "plan_type_series",
"description": "Plan type series",
"@type": "propertyValue"
},
{
"name": "tr_compliance_rec7_series",
"description": "Treatment compliance recoded 7 series",
"@type": "propertyValue"
},
{
"name": "referral_type_series",
"description": "Referral type series",
"@type": "propertyValue"
},
{
"name": "adm_age_rec3_series",
"description": "Admission age recoded 3 series",
"@type": "propertyValue"
},
{
"name": "adm_date_rec2_series",
"description": "Admission date recoded 2 series",
"@type": "propertyValue"
},
{
"name": "disch_date_rec6_series",
"description": "Discharge date recoded 6 series",
"@type": "propertyValue"
},
{
"name": "diagnostico_trs_fisico_series",
"description": "Physical diagnosis series",
"@type": "propertyValue"
},
{
"name": "otros_probl_at_sm_or_series",
"description": "Other mental health problems series",
"@type": "propertyValue"
},
{
"name": "OBS",
"description": "Observed flags or notes from cleaning (e.g., \"Duplicated Cases\"). Created during duplicate detection and resolution. Contains manual or automated flags for data quality issues, removed entries, etc. It is separated by semi-colons",
"@type": "propertyValue"
},
{
"name": "occupation_status_corr",
"description": "Corrected employment status Created in Step 4. Only kept for employed; nulled for inactive/unemployed; filled as 'other' if missing. Depends on occupation_condition_inferred, occupation_status.",
"@type": "propertyValue"
},
{
"name": "occupation_condition_corr24",
"description": "Employment status",
"@type": "propertyValue"
},
{
"name": "plan_type_corr",
"description": "Treatment modality",
"@type": "propertyValue"
},
{
"name": "ed_attainment_corr",
"description": "Educational attainment",
"@type": "propertyValue"
},
{
"name": "polysubstance_strict",
"description": "Polysubstance use",
"@type": "propertyValue"
},
{
"name": "icd10_diag1",
"description": "ICD10 diagnosis 1 Created in Step 4. Separated from mod_psiq_cie_10 by \"::\". Depends on mod_psiq_cie_10.",
"@type": "propertyValue"
},
{
"name": "icd10_diag2",
"description": "ICD10 diagnosis 2 Created in Step 4. Separated from mod_psiq_cie_10 by \"::\". Depends on mod_psiq_cie_10.",
"@type": "propertyValue"
},
{
"name": "icd10_diag3",
"description": "ICD10 diagnosis 3 Created in Step 4. Separated from mod_psiq_cie_10 \"::\". Depends on mod_psiq_cie_10.",
"@type": "propertyValue"
},
{
"name": "dsmiv_diag1",
"description": "DSMIV diagnosis 1 Created in Step 4. Separated from mod_psiq_dsm_iv \"::\". Depends on mod_psiq_dsm_iv.",
"@type": "propertyValue"
},
{
"name": "dsmiv_diag2",
"description": "DSMIV diagnosis 2 Created in Step 4. Separated from mod_psiq_dsm_iv \"::\". Depends on mod_psiq_dsm_iv.",
"@type": "propertyValue"
},
{
"name": "dsmiv_diag3",
"description": "DSMIV diagnosis 3 Created in Step 4. Separated from mod_psiq_dsm_iv \"::\". Depends on mod_psiq_dsm_iv.",
"@type": "propertyValue"
},
{
"name": "adm_year",
"description": "Admission year",
"@type": "propertyValue"
},
{
"name": "porc_pobr",
"description": "Poverty index of the commune of residence [log1p(x)=porc_pobr_log, centered= porc_pobr_c, six quantile groups= porc_pobr_c_cat6]",
"@type": "propertyValue"
},
{
"name": "clasificacion",
"description": "Urbanization level of the commune of residence",
"@type": "propertyValue"
},
{
"name": "km2_c",
"description": "Centered km^2 Created in Step 4. Created in Step 4. km_2 - mean(km_2). Depends on km_2.km_2 - mean(km_2). Depends on km_2.",
"@type": "propertyValue"
},
{
"name": "km2_log",
"description": "Log km^2 Created in Step 4. Created in Step 4. km_2 - mean(km_2). Depends on km_2.log1p(km_2). Depends on km_2.",
"@type": "propertyValue"
},
{
"name": "porc_pobr_log",
"description": "Log poverty Created in Step 3. Re-joined in Step 4. log1p(porc_pobr). Depends on porc_pobr.",
"@type": "propertyValue"
},
{
"name": "porc_pobr_c",
"description": "Centered poverty Created in Step 3. Re-joined in Step 4. porc_pobr - mean(porc_pobr). Depends on porc_pobr.",
"@type": "propertyValue"
}
]
}`