Balance
We selected treatments at baseline for each user, leaving 85,048
observations. Then, we distinguished between residential (n= 12,706) and
ambulatory (n= 72,267) treatments. We imputed cases that did not have a
defined treatment assigned (n=75).
We selected the following variables of interest:
- “Primary Substance at Admission”
(
sus_principal_mod
)
- “First Substance Used” (
sus_ini_mod_mvv
)
- “Marital Status” (
estado_conyugal_2
)
- “Educational Attainment” (
escolaridad_rec
)
- “Substance Use Onset Age” (
edad_ini_cons
)
- “Primary Substance at Admission Usage Frequency”
(
freq_cons_sus_prin
)
- “Treatment Admission Motive” (
origen_ingreso_mod
)
- “Psychiatric comorbidity” (
dg_cie_10_rec
)
- “Drug Dependence Diagnosis” (
dg_trs_cons_sus_or
)
- “Regional Location of Center” (
nombre_region
)
- “Type of Center (Public)” (
tipo_centro_pub
)
- “Sex” (
sexo_2
)
- “Admission Age” (
edad_al_ing
)
- “Admission Date” (
fech_ing_num
)
- “Evaluation of the Therapeutic Process” (*)
(
evaluacindelprocesoteraputico
)
- “Early Dropout (Against Staff Advice)”
(
abandono_temprano_rec
) (Y)
- “Residential Setting” (
tipo_de_plan_res
) (Z)
- “Tenure status of household”
(
tenencia_de_la_vivienda_mod
) (*)
- “Employment Status” (
condicion_ocupacional_corr
)
match.on_tot <- c("row", "hash_key","sus_principal_mod","sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","edad_ini_cons","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","nombre_region","tipo_centro_pub","sexo_2","edad_al_ing","fech_ing_num","abandono_temprano_rec","tipo_de_plan_res","duplicates_filtered","dg_trs_cons_sus_or","evaluacindelprocesoteraputico", "tenencia_de_la_vivienda_mod", "condicion_ocupacional_corr")
#dg_trs_cons_sus_or
CONS_C1_df_dup_SEP_2020_match<-
CONS_C1_df_dup_SEP_2020 %>%
dplyr::filter(dup==1) %>% #, tipo_de_plan_2 %in% c("PG-PR","M-PR","PG-PAI","M-PAI","PG-PAB","M-PAB")
dplyr::mutate(tipo_de_plan_res=dplyr::case_when(grepl("PR",as.character(tipo_de_plan_2))~1,
grepl("PAI",as.character(tipo_de_plan_2))~0,
grepl("PAB",as.character(tipo_de_plan_2))~0,
TRUE~NA_real_)) %>%
dplyr::mutate(tipo_de_plan_res=factor(tipo_de_plan_res)) %>%
dplyr::mutate(abandono_temprano_rec=factor(if_else(as.character(motivodeegreso_mod_imp)=="Early Drop-out",TRUE,FALSE,NA))) %>%
dplyr::mutate(dg_trs_cons_sus_or=factor(if_else(as.character(dg_trs_cons_sus_or)=="Drug dependence",TRUE,FALSE,NA))) %>%
dplyr::mutate(tipo_centro_pub=factor(if_else(as.character(tipo_centro)=="Public",TRUE,FALSE,NA))) %>%
dplyr::mutate(condicion_ocupacional_corr=factor(condicion_ocupacional_corr),cat_ocupacional_corr=factor(cat_ocupacional_corr)) %>%
dplyr::mutate(dg_trs_fis_rec=factor(dplyr::case_when(as.character(diagnostico_trs_fisico)=="En estudio"~"Diagnosis unknown (under study)",as.character(diagnostico_trs_fisico)=="Sin trastorno"~'Without physical comorbidity',cnt_diagnostico_trs_fisico>0 ~'With physical comorbidity',
TRUE~NA_character_)))%>%
dplyr::mutate(escolaridad_rec=parse_factor(as.character(escolaridad_rec),levels=c('3-Completed primary school or less', '2-Completed high school or less', '1-More than high school'), ordered=T,trim_ws=T,include_na =F, locale=locale(encoding = "Latin1"))) %>%
dplyr::mutate(freq_cons_sus_prin=parse_factor(as.character(freq_cons_sus_prin),levels=c('Did not use', 'Less than 1 day a week','2 to 3 days a week','4 to 6 days a week','1 day a week or more','Daily'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "UTF-8"))) %>%
dplyr::mutate(evaluacindelprocesoteraputico=dplyr::case_when(grepl("1",as.character(evaluacindelprocesoteraputico))~'1-High Achievement',grepl("2",as.character(evaluacindelprocesoteraputico))~'2-Medium Achievement',grepl("3",as.character(evaluacindelprocesoteraputico))~'3-Minimum Achievement', TRUE~as.character(evaluacindelprocesoteraputico))) %>%
dplyr::mutate(evaluacindelprocesoteraputico=parse_factor(as.character(evaluacindelprocesoteraputico),levels=c('1-High Achievement', '2-Medium Achievement','3-Minimum Achievement'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "UTF-8"))) %>%
dplyr::mutate(tenencia_de_la_vivienda_mod=
factor(dplyr::case_when(tenencia_de_la_vivienda_mod=="Allegado"~"Stays temporarily with a relative",
tenencia_de_la_vivienda_mod=="Arrienda"~"Renting",
tenencia_de_la_vivienda_mod=="Cedida"~"Owner/Transferred dwellings/Pays Dividends",
tenencia_de_la_vivienda_mod=="Ocupación Irregular"~"Illegal Settlement",
tenencia_de_la_vivienda_mod=="Otros"~"Others",
tenencia_de_la_vivienda_mod=="Paga dividendo"~"Owner/Transferred dwellings/Pays Dividends",
tenencia_de_la_vivienda_mod=="Propia"~"Owner/Transferred dwellings/Pays Dividends",
T~NA_character_))) %>%
dplyr::select_(.dots = match.on_tot) %>%
dplyr::mutate(more_one_treat=factor(ifelse(duplicates_filtered>1,1,0))) %>%
#APRIL 2022
dplyr::mutate(freq_cons_sus_prin=dplyr::case_when(freq_cons_sus_prin=="1 day a week or less"~"1 day a week or more",T~as.character(freq_cons_sus_prin))) %>%
dplyr::mutate(freq_cons_sus_prin=ordered(freq_cons_sus_prin,levels=c("Did not use", "Less than 1 day a week", "1 day a week or more", "2 to 3 days a week","4 to 6 days a week", "Daily"))) %>%
data.table::data.table()
## Warning: `select_()` was deprecated in dplyr 0.7.0.
## Please use `select()` instead.
#CONS_C1_df_dup_SEP_2020_match %>%
#dplyr::group_by(dg_trs_fis) %>% dplyr::summarise(q1=quantile(dias_treat_imp_sin_na,.25),q2=quantile(dias_treat_imp_sin_na,.5),q3=quantile(dias_treat_imp_sin_na,.75)) ---> las distribuciones por días de tratamiento de las categorías de respuesta tienden a ser bastante similares, aunquequienes tienen una comorbiliad física definida tienen más tiempo en el estudio.
invisible("La diferencia en días de tratamiento entre las categorías de enfermedad psiquiátrica, indica que quienes se encuentran en estudio tienen muchos menos días en tratamiento que quienes no tienen una comorbilidad o quienes tienen una definida. No es lo mismo con el caso de la enfermedad física, en donde tienden a ser bastante similares")
invisible("Decidí no incluir diagnóstico de enferemedad física, porque hay algunas condiciones que son crónicas o que pueden serlo, y que no tengo cómo validarlas a lo largo del tratamiento")
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
attr(CONS_C1_df_dup_SEP_2020_match$sus_principal_mod,"label")<-"Primary Substance at Admission"
attr(CONS_C1_df_dup_SEP_2020_match$sus_ini_mod_mvv,"label")<-"First Substance Used"
attr(CONS_C1_df_dup_SEP_2020_match$estado_conyugal_2,"label")<-"Marital Status"
attr(CONS_C1_df_dup_SEP_2020_match$escolaridad_rec,"label")<-"Educational Attainment"
attr(CONS_C1_df_dup_SEP_2020_match$edad_ini_cons,"label")<-"Substance Use Onset Age"
attr(CONS_C1_df_dup_SEP_2020_match$freq_cons_sus_prin,"label")<-"Primary Substance at Admission Usage Frequency"
attr(CONS_C1_df_dup_SEP_2020_match$origen_ingreso_mod,"label")<-"Treatment Admission Motive"
attr(CONS_C1_df_dup_SEP_2020_match$dg_cie_10_rec,"label")<-"Psychiatric co-morbidity"
attr(CONS_C1_df_dup_SEP_2020_match$nombre_region,"label")<-"Regional Location of Center"
attr(CONS_C1_df_dup_SEP_2020_match$tipo_centro_pub,"label")<-"Type of Center (Public)"
attr(CONS_C1_df_dup_SEP_2020_match$sexo_2,"label")<-"Sex"
attr(CONS_C1_df_dup_SEP_2020_match$edad_al_ing,"label")<-"Admission Age"
attr(CONS_C1_df_dup_SEP_2020_match$fech_ing_num,"label")<-"Admission Date"
attr(CONS_C1_df_dup_SEP_2020_match$abandono_temprano_rec,"label")<-"Early Dropout"
attr(CONS_C1_df_dup_SEP_2020_match$tipo_de_plan_res,"label")<-"Residential Setting"
attr(CONS_C1_df_dup_SEP_2020_match$duplicates_filtered,"label")<-"No. of Treatments in the Database"
attr(CONS_C1_df_dup_SEP_2020_match$dg_trs_cons_sus_or,"label")<-"Drug Dependence Diagnosis"
attr(CONS_C1_df_dup_SEP_2020_match$evaluacindelprocesoteraputico,"label")<-"Evaluation of the Therapeutic Process"
attr(CONS_C1_df_dup_SEP_2020_match$condicion_ocupacional_corr,"label")<-"Employment Status"
attr(CONS_C1_df_dup_SEP_2020_match$tenencia_de_la_vivienda_mod,"label")<-"Tenure status of household"
knitr::opts_chunk$set(warning=FALSE, message=FALSE)
table1_all <- suppressWarnings(compareGroups(tipo_de_plan_res ~ sus_principal_mod+ sus_ini_mod_mvv+ estado_conyugal_2+ escolaridad_rec+ edad_ini_cons+ freq_cons_sus_prin+ origen_ingreso_mod+ dg_cie_10_rec+ nombre_region+ tipo_centro_pub+ sexo_2+ dg_trs_cons_sus_or+ edad_al_ing+ fech_ing_num+ abandono_temprano_rec+ duplicates_filtered+ dg_trs_cons_sus_or+ evaluacindelprocesoteraputico+ condicion_ocupacional_corr+ tenencia_de_la_vivienda_mod, method= c(
sus_principal_mod=3,
sus_ini_mod_mvv=3,
estado_conyugal_2=3,
escolaridad_rec=3,
edad_ini_cons=3,
freq_cons_sus_prin=3,
origen_ingreso_mod=3,
dg_cie_10_rec=3,
dg_trs_cons_sus_or=3,
nombre_region=3,
tipo_centro_pub=3,
sexo_2=3,
dg_trs_cons_sus_or=3,
edad_al_ing=2,
fech_ing_num=2,
abandono_temprano_rec=3,
duplicates_filtered=3,
condicion_ocupacional_corr=3,
evaluacindelprocesoteraputico=3,
tenencia_de_la_vivienda_mod=3),
data = CONS_C1_df_dup_SEP_2020_match,
include.miss = T,
var.equal=T)
)
table1_more_one <- suppressWarnings(compareGroups(tipo_de_plan_res ~ sus_principal_mod+ sus_ini_mod_mvv+ estado_conyugal_2+ escolaridad_rec+ edad_ini_cons+ freq_cons_sus_prin+ origen_ingreso_mod+ dg_cie_10_rec+ dg_trs_cons_sus_or+ nombre_region+ tipo_centro_pub+ sexo_2+ dg_trs_cons_sus_or+ edad_al_ing+ fech_ing_num+ abandono_temprano_rec+ evaluacindelprocesoteraputico+ condicion_ocupacional_corr+ tenencia_de_la_vivienda_mod, method= c(
sus_principal_mod=3,
sus_ini_mod_mvv=3,
estado_conyugal_2=3,
escolaridad_rec=3,
edad_ini_cons=3,
freq_cons_sus_prin=3,
origen_ingreso_mod=3,
dg_cie_10_rec=3,
dg_trs_cons_sus_or=3,
nombre_region=3,
tipo_centro_pub=3,
sexo_2=3,
dg_trs_cons_sus_or=3,
edad_al_ing=2,
fech_ing_num=2,
abandono_temprano_rec=3,
condicion_ocupacional_corr=3,
evaluacindelprocesoteraputico=3,
tenencia_de_la_vivienda_mod=3),
data = CONS_C1_df_dup_SEP_2020_match,
include.miss = T,
var.equal=T,
subset= more_one_treat==1)
)
table1_only_one <- suppressWarnings(compareGroups(tipo_de_plan_res ~ sus_principal_mod+ sus_ini_mod_mvv+ estado_conyugal_2+ escolaridad_rec+ edad_ini_cons+ freq_cons_sus_prin+ origen_ingreso_mod+ dg_cie_10_rec+ dg_trs_cons_sus_or+ nombre_region+ tipo_centro_pub+ sexo_2+ dg_trs_cons_sus_or+ edad_al_ing+ fech_ing_num+ abandono_temprano_rec+ evaluacindelprocesoteraputico+ tenencia_de_la_vivienda_mod, method= c(
sus_principal_mod=3,
sus_ini_mod_mvv=3,
estado_conyugal_2=3,
escolaridad_rec=3,
edad_ini_cons=3,
freq_cons_sus_prin=3,
origen_ingreso_mod=3,
dg_cie_10_rec=3,
dg_trs_cons_sus_or=3,
nombre_region=3,
tipo_centro_pub=3,
sexo_2=3,
dg_trs_cons_sus_or=3,
edad_al_ing=2,
fech_ing_num=2,
abandono_temprano_rec=3,
evaluacindelprocesoteraputico=3,
tenencia_de_la_vivienda_mod=3),
data = CONS_C1_df_dup_SEP_2020_match,
include.miss = T,
var.equal=T,
subset= more_one_treat==0)
)
#Possible values are: 1 - for analysis as "normal-distributed"; 2 - forces analysis as "continuous non-normal"; 3 - forces analysis as "categorical"; and 4 - NA, which performs a Shapiro-Wilks test to decide between normal or non-normal.
restab1_all <- createTable(table1_all, show.p.overall = T)
restab1_more_one <- createTable(table1_more_one, show.p.overall = T)
restab1_only_one <- createTable(table1_only_one, show.p.overall = T)
pvals1 <- getResults(table1_all)
#p.adjust(pvals, method = "BH")
export2md(restab1_all, size=11, first.strip=T, hide.no="no", position="center",
format="html",caption= "Table 1. Summary descriptives at baseline, between Users with Residential and Ambulatory Treatments from 2010-2019",col.names=c("Variables","Residential", "Ambulatory", "p-value"))%>%
kableExtra::add_footnote(c("Note. Continuous variables are presented as Medians and Percentiles 25 and 75 were shown;", "Categorical variables are presented as number (%)"), notation = "none")%>%
kableExtra::scroll_box(width = "100%", height = "375px") %>%
kableExtra::kable_classic()
Table 1. Summary descriptives at baseline, between Users with
Residential and Ambulatory Treatments from 2010-2019
Variables
|
Residential
|
Ambulatory
|
p-value
|
|
N=72267
|
N=12706
|
|
Primary Substance at Admission:
|
|
|
.
|
Alcohol
|
26410 (36.5%)
|
2431 (19.1%)
|
|
Cocaine hydrochloride
|
14481 (20.0%)
|
1660 (13.1%)
|
|
Marijuana
|
5372 (7.43%)
|
394 (3.10%)
|
|
Other
|
1381 (1.91%)
|
201 (1.58%)
|
|
Cocaine paste
|
24622 (34.1%)
|
8020 (63.1%)
|
|
‘Missing’
|
1 (0.00%)
|
0 (0.00%)
|
|
First Substance Used:
|
|
|
0.000
|
Alcohol
|
41507 (57.4%)
|
5080 (40.0%)
|
|
Cocaine hydrochloride
|
2682 (3.71%)
|
477 (3.75%)
|
|
Marijuana
|
18412 (25.5%)
|
4556 (35.9%)
|
|
Other
|
1669 (2.31%)
|
318 (2.50%)
|
|
Cocaine paste
|
2767 (3.83%)
|
1086 (8.55%)
|
|
‘Missing’
|
5230 (7.24%)
|
1189 (9.36%)
|
|
Marital Status:
|
|
|
<0.001
|
Married/Shared living arrangements
|
26185 (36.2%)
|
2910 (22.9%)
|
|
Separated/Divorced
|
7721 (10.7%)
|
1320 (10.4%)
|
|
Single
|
37343 (51.7%)
|
8328 (65.5%)
|
|
Widower
|
869 (1.20%)
|
133 (1.05%)
|
|
‘Missing’
|
149 (0.21%)
|
15 (0.12%)
|
|
Educational Attainment:
|
|
|
<0.001
|
3-Completed primary school or less
|
20062 (27.8%)
|
3862 (30.4%)
|
|
2-Completed high school or less
|
39565 (54.7%)
|
7044 (55.4%)
|
|
1-More than high school
|
12279 (17.0%)
|
1777 (14.0%)
|
|
‘Missing’
|
361 (0.50%)
|
23 (0.18%)
|
|
Primary Substance at Admission Usage Frequency:
|
|
|
0.000
|
Did not use
|
1095 (1.52%)
|
85 (0.67%)
|
|
Less than 1 day a week
|
2862 (3.96%)
|
133 (1.05%)
|
|
1 day a week or more
|
5335 (7.38%)
|
272 (2.14%)
|
|
2 to 3 days a week
|
22372 (31.0%)
|
1329 (10.5%)
|
|
4 to 6 days a week
|
12258 (17.0%)
|
1654 (13.0%)
|
|
Daily
|
27938 (38.7%)
|
9219 (72.6%)
|
|
‘Missing’
|
407 (0.56%)
|
14 (0.11%)
|
|
Treatment Admission Motive:
|
|
|
0.000
|
Spontaneous
|
33720 (46.7%)
|
4273 (33.6%)
|
|
Assisted Referral
|
4950 (6.85%)
|
3013 (23.7%)
|
|
Other
|
3766 (5.21%)
|
740 (5.82%)
|
|
Justice Sector
|
7159 (9.91%)
|
812 (6.39%)
|
|
Health Sector
|
22672 (31.4%)
|
3868 (30.4%)
|
|
Psychiatric co-morbidity:
|
|
|
<0.001
|
Without psychiatric comorbidity
|
29070 (40.2%)
|
3245 (25.5%)
|
|
Diagnosis unknown (under study)
|
13310 (18.4%)
|
2771 (21.8%)
|
|
With psychiatric comorbidity
|
29887 (41.4%)
|
6690 (52.7%)
|
|
Type of Center (Public):
|
|
|
0.000
|
FALSE
|
14964 (20.7%)
|
9066 (71.4%)
|
|
TRUE
|
57300 (79.3%)
|
3623 (28.5%)
|
|
‘Missing’
|
3 (0.00%)
|
17 (0.13%)
|
|
Sex:
|
|
|
<0.001
|
Men
|
54806 (75.8%)
|
8761 (69.0%)
|
|
Women
|
17461 (24.2%)
|
3945 (31.0%)
|
|
Drug Dependence Diagnosis:
|
|
|
0.000
|
FALSE
|
22150 (30.7%)
|
1049 (8.26%)
|
|
TRUE
|
50116 (69.3%)
|
11657 (91.7%)
|
|
‘Missing’
|
1 (0.00%)
|
0 (0.00%)
|
|
Admission Age
|
34.4 [27.5;43.5]
|
32.6 [26.3;40.9]
|
<0.001
|
Admission Date
|
16577 [15730;17359]
|
16154 [15342;17023]
|
<0.001
|
Early Dropout:
|
|
|
<0.001
|
FALSE
|
61074 (84.5%)
|
10201 (80.3%)
|
|
TRUE
|
11190 (15.5%)
|
2499 (19.7%)
|
|
‘Missing’
|
3 (0.00%)
|
6 (0.05%)
|
|
No. of Treatments in the Database:
|
|
|
.
|
1
|
58708 (81.2%)
|
8533 (67.2%)
|
|
2
|
10087 (14.0%)
|
2804 (22.1%)
|
|
3
|
2471 (3.42%)
|
927 (7.30%)
|
|
4
|
714 (0.99%)
|
295 (2.32%)
|
|
5
|
192 (0.27%)
|
94 (0.74%)
|
|
6
|
67 (0.09%)
|
36 (0.28%)
|
|
7
|
23 (0.03%)
|
11 (0.09%)
|
|
8
|
4 (0.01%)
|
6 (0.05%)
|
|
10
|
1 (0.00%)
|
0 (0.00%)
|
|
Drug Dependence Diagnosis:
|
|
|
0.000
|
FALSE
|
22150 (30.7%)
|
1049 (8.26%)
|
|
TRUE
|
50116 (69.3%)
|
11657 (91.7%)
|
|
‘Missing’
|
1 (0.00%)
|
0 (0.00%)
|
|
Evaluation of the Therapeutic Process:
|
|
|
<0.001
|
1-High Achievement
|
14081 (19.5%)
|
2831 (22.3%)
|
|
2-Medium Achievement
|
21728 (30.1%)
|
4237 (33.3%)
|
|
3-Minimum Achievement
|
31549 (43.7%)
|
5302 (41.7%)
|
|
‘Missing’
|
4909 (6.79%)
|
336 (2.64%)
|
|
Employment Status:
|
|
|
.
|
Employed
|
39613 (54.8%)
|
1773 (14.0%)
|
|
Inactive
|
7715 (10.7%)
|
1191 (9.37%)
|
|
Looking for a job for the first time
|
172 (0.24%)
|
21 (0.17%)
|
|
No activity
|
2672 (3.70%)
|
1823 (14.3%)
|
|
Not seeking for work
|
495 (0.68%)
|
335 (2.64%)
|
|
Unemployed
|
21599 (29.9%)
|
7563 (59.5%)
|
|
‘Missing’
|
1 (0.00%)
|
0 (0.00%)
|
|
Tenure status of household:
|
|
|
<0.001
|
Illegal Settlement
|
596 (0.82%)
|
309 (2.43%)
|
|
Others
|
1946 (2.69%)
|
408 (3.21%)
|
|
Owner/Transferred dwellings/Pays Dividends
|
25537 (35.3%)
|
4378 (34.5%)
|
|
Renting
|
12917 (17.9%)
|
1640 (12.9%)
|
|
Stays temporarily with a relative
|
27786 (38.4%)
|
4786 (37.7%)
|
|
‘Missing’
|
3485 (4.82%)
|
1185 (9.33%)
|
|
Note. Continuous variables are presented as Medians and
Percentiles 25 and 75 were shown;
|
Categorical variables are presented as number (%)
|
Of the 85,048 users, we selected 85,048 that fulfilled the conditions
stated above (100%).
#Additionally, we generated a correlation plot to get an overview of heterogeneous correlations between the different variables.
#Computes a heterogenous correlation matrix, consisting of Pearson product-moment correlations between numeric variables, polyserial correlations between numeric and ordinal variables, and polychoric correlations between
tiempo_antes_hetcor<-Sys.time()
hetcor_mat<-hetcor(CONS_C1_df_dup_SEP_2020_match[,-c("hash_key","row","more_one_treat","duplicates_filtered")], ML = T, std.err =T, use="pairwise.complete.obs", bins=3, pd=TRUE)
tiempo_despues_hetcor<-Sys.time()
tiempo_hetcor<-tiempo_despues_hetcor-tiempo_antes_hetcor
for (i in 1:length(match.on_tot[!match.on_tot %in% c("row","hash_key","more_one_treat","duplicates_filtered")])){
x<- match.on_tot[!match.on_tot %in% c("row","hash_key","more_one_treat","duplicates_filtered")][i]
attr(hetcor_mat$correlations,"dimnames")[[2]][i]<- attr(CONS_C1_df_dup_SEP_2020_match[[x]],"label")
attr(hetcor_mat$correlations,"dimnames")[[1]][i]<- attr(CONS_C1_df_dup_SEP_2020_match[[x]],"label")
attr(hetcor_mat$tests,"dimnames")[[2]][i]<- attr(CONS_C1_df_dup_SEP_2020_match[[x]],"label")
attr(hetcor_mat$tests,"dimnames")[[1]][i]<- attr(CONS_C1_df_dup_SEP_2020_match[[x]],"label")
}
hetcor_mat$tests[is.na(hetcor_mat$tests)]<-1
ggcorrplot<-
ggcorrplot::ggcorrplot(hetcor_mat$correlations,
ggtheme = ggplot2::theme_void,
insig = "blank",
pch=1,
pch.cex=3,
tl.srt = 45,
#pch="ns",
p.mat = hetcor_mat$tests, # replacement has 144 rows, data has 169
#type = "lower",
colors = c("#6D9EC1", "white", "#E46726"),
tl.cex=8,
lab=F)+
#scale_x_discrete(labels = var_lbls_p345, drop = F) +
#scale_y_discrete(labels = var_lbls_p345, drop = F) +
theme(axis.text.x = element_blank())+
#theme(axis.text.y = element_text(size=7.5,color ="black", hjust = 1))+
theme(axis.text.y = element_blank())+
theme(legend.position="bottom")
ggplotly(ggcorrplot, height = 800, width=800)%>%
layout(xaxis= list(showticklabels = FALSE)) %>%
layout(annotations =
list(x = .1, y = -0.031, text = "",
showarrow = F, xref='paper', yref='paper',
#xanchor='center', yanchor='auto', xshift=0, yshift=-0,
font=list(size=11, color="darkblue"))
)
Imputation
We generated a plot to see all the missing values in the sample.
#<div style="border: 1px solid #ddd; padding: 5px; overflow-y: scroll; height:400px; overflow-x: scroll; width:100%">
library(dplyr)
library(ggplot2)
missing.values<-
CONS_C1_df_dup_SEP_2020_match %>%
rowwise %>%
dplyr::mutate_at(.vars = vars(match.on_tot[!match.on_tot %in% c("row","hash_key","more_one_treat","duplicates_filtered")]),
.funs = ~ifelse(is.na(.), 1, 0)) %>%
dplyr::ungroup() %>%
dplyr::summarise_at(vars(match.on_tot[!match.on_tot %in% c("row","hash_key","more_one_treat","duplicates_filtered")]),~sum(.))
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
n_miss_baseline<-
CONS_C1_df_dup_SEP_2020_match %>%
rowwise %>%
dplyr::mutate_at(.vars = vars(match.on_tot[!match.on_tot %in% c("row","hash_key","more_one_treat","duplicates_filtered")]),
.funs = ~ifelse(is.na(.), 1, 0)) %>%
dplyr::ungroup() %>%
dplyr::mutate(sumVar=rowSums(dplyr::select(., match.on_tot[!match.on_tot %in% c("row","hash_key","more_one_treat","duplicates_filtered")]))) %>%
dplyr::filter(sumVar>0) %>%
nrow()
paste0("users with at least one missing value at baseline ",format(n_miss_baseline, big.mark=","))
## [1] "users with at least one missing value at baseline 16,466"
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
plot_miss<-
missing.values %>%
data.table::melt() %>% #condicion_ocupacional_corr
dplyr::mutate(perc= value/sum(nrow(CONS_C1_df_dup_SEP_2020_match))) %>%
dplyr::mutate(label_text= paste0("Variable= ",variable,"<br>n= ",value,"<br>",scales::percent(round(perc,3)))) %>%
dplyr::mutate(perc=perc*100) %>%
ggplot() +
geom_bar(aes(x=factor(variable), y=perc,label= label_text), stat = 'identity') +
theme_classic()+
# scale_y_continuous(limits=c(0,1), labels=percent)+
theme(axis.text.x = element_text(angle = 90, hjust = 1, size=9))+
labs(x=NULL, y="% of Missing Values", caption=paste0("Nota. Porcentaje de perdidos del total (",sum(nrow(CONS_C1_df_dup_SEP_2020_match)),")"))
ggplotly(plot_miss, tooltip = c("label_text"))%>% layout(xaxis= list(showticklabels = T), height = 600, width=800) %>% layout(yaxis = list(tickformat='%', range = c(0, 8)))
#</div>
From the figure above, we could see that the first substance used
(sus_ini_mvv
), the substance use onset age
(edad_ini_cons
), the evaluation of the therapeutic process
(evaluacindelprocesoteraputico
) counted for approximately
6% of the missing data. These values should be imputed. We first focused
on the substance use onset age It is important to consider that the
evaluation of the therapeutic process could be distorted due to
censoring (many users did not finish their treatment, and did not have
this evaluation in the study period).
#origen_ingreso #dg_global_nec_int_soc_or_1 "Diagnóstico global de necesidades de integración social" #evaluacindelprocesoteraputico "Evaluación del proceso terapéutico" #escolaridad_rec "macrozona"
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
match.on_tot2 <- c("row", "hash_key","sus_principal_mod","sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","edad_ini_cons","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","nombre_region","tipo_centro_pub","sexo_2","edad_al_ing","fech_ing_num","abandono_temprano_rec","tipo_de_plan_res","duplicates_filtered","edad_ini_sus_prin","via_adm_sus_prin_act","dg_trs_cons_sus_or","dup","evaluacindelprocesoteraputico","tenencia_de_la_vivienda_mod","condicion_ocupacional_corr","motivodeegreso_mod_imp")
#HACER BASE ESPECIAL QUE CONTENGA UNA VARIABLE DE EDAD DE INICIO DE CONSUMO DE SUSTANCIA PRINCIPAL PARA EQUIPARAR
CONS_C1_df_dup_SEP_2020_match_miss<-
CONS_C1_df_dup_SEP_2020 %>%
#tuve que sacar casos que no tenían tipo de tratamiento definido. Vale la pena que en est etapa los seleccione????????????r
#dplyr::filter(tipo_de_plan_2 %in% c("PG-PR","M-PR","PG-PAI","M-PAI","PG-PAB","M-PAB")) %>%
#:#:#:#:#:
dplyr::filter(hash_key %in% unlist(unique(CONS_C1_df_dup_SEP_2020_match$hash_key))) %>%
#dplyr::group_by(hash_key) %>%
#dplyr::mutate(rn=row_number()) %>%
#dplyr::ungroup() %>%
#:#:#:#:#:#:#:#:#:#:#:
#GENERAR VARIABLES GENERADAS PARA LA MUESTRA PARA MATCH
dplyr::mutate(tipo_de_plan_res=dplyr::case_when(grepl("PR",as.character(tipo_de_plan_2))~1,
grepl("PAI",as.character(tipo_de_plan_2))~0,
grepl("PAB",as.character(tipo_de_plan_2))~0,
TRUE~NA_real_)) %>%
dplyr::mutate(tipo_de_plan_res=factor(tipo_de_plan_res)) %>%
dplyr::mutate(abandono_temprano_rec=factor(if_else(as.character(motivodeegreso_mod_imp)=="Early Drop-out",TRUE,FALSE,NA))) %>%
dplyr::mutate(dg_trs_cons_sus_or=factor(if_else(as.character(dg_trs_cons_sus_or)=="Drug dependence",TRUE,FALSE,NA))) %>%
dplyr::mutate(tipo_centro_pub=factor(if_else(as.character(tipo_centro)=="Public",TRUE,FALSE,NA))) %>%
#:#:#:#:#:#:#:#:#:#:#:
# ORDINALIZAR LAS VARIABLES ORDINALES: escolaridad_rec freq_cons_sus_prin dg_cie_10_rec
dplyr::mutate(dg_cie_10_rec=parse_factor(as.character(dg_cie_10_rec),levels=c('Without psychiatric comorbidity','Diagnosis unknown (under study)', 'With psychiatric comorbidity'), ordered =T,trim_ws=T,include_na =F)) %>% #, locale=locale(encoding = "Latin1")
dplyr::mutate(escolaridad_rec=parse_factor(as.character(escolaridad_rec),levels=c('3-Completed primary school or less', '2-Completed high school or less', '1-More than high school'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "Latin1"))) %>%
dplyr::mutate(freq_cons_sus_prin=parse_factor(as.character(freq_cons_sus_prin),levels=c('Did not use', 'Less than 1 day a week','2 to 3 days a week','4 to 6 days a week','1 day a week or more','Daily'), ordered =F,trim_ws=T,include_na =F, locale=locale(encoding = "Latin1"))) %>%
dplyr::mutate(dg_trs_fis_rec=factor(dplyr::case_when(as.character(diagnostico_trs_fisico)=="En estudio"~"Diagnosis unknown (under study)",as.character(diagnostico_trs_fisico)=="Sin trastorno"~'Without physical comorbidity',cnt_diagnostico_trs_fisico>0 ~'With physical comorbidity',TRUE~NA_character_)))%>%
dplyr::mutate(dg_trs_fis_rec=parse_factor(as.character(dg_trs_fis_rec),levels=c('Without physical comorbidity','Diagnosis unknown (under study)', 'With physical comorbidity'), ordered =F,trim_ws=T,include_na =F)) %>%
#, locale=locale(encoding = "Latin1")
dplyr::mutate(evaluacindelprocesoteraputico=dplyr::case_when(grepl("1",as.character(evaluacindelprocesoteraputico))~'1-High Achievement',grepl("2",as.character(evaluacindelprocesoteraputico))~'2-Medium Achievement',grepl("3",as.character(evaluacindelprocesoteraputico))~'3-Minimum Achievement', TRUE~as.character(evaluacindelprocesoteraputico))) %>%
dplyr::mutate(evaluacindelprocesoteraputico=parse_factor(as.character(evaluacindelprocesoteraputico),levels=c('1-High Achievement', '2-Medium Achievement','3-Minimum Achievement'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "UTF-8"))) %>%
dplyr::mutate(tenencia_de_la_vivienda_mod=
factor(dplyr::case_when(tenencia_de_la_vivienda_mod=="Allegado"~"Stays temporarily with a relative",
tenencia_de_la_vivienda_mod=="Arrienda"~"Renting",
tenencia_de_la_vivienda_mod=="Cedida"~"Owner/Transferred dwellings/Pays Dividends",
tenencia_de_la_vivienda_mod=="Ocupación Irregular"~"Illegal Settlement",
tenencia_de_la_vivienda_mod=="Otros"~"Others",
tenencia_de_la_vivienda_mod=="Paga dividendo"~"Owner/Transferred dwellings/Pays Dividends",
tenencia_de_la_vivienda_mod=="Propia"~"Owner/Transferred dwellings/Pays Dividends",
T~NA_character_))) %>%
#:#:#:#:#:#:#:#:#:#:#:
dplyr::select_(.dots = match.on_tot2) %>%
dplyr::mutate(more_one_treat=factor(ifelse(duplicates_filtered>1,1,0))) %>%
#dplyr::mutate(motivodeegreso_mod_imp=as.character(motivodeegreso_mod_imp)) %>%
#dplyr::mutate(tr_completion=factor(dplyr::case_when(
#motivodeegreso_mod_imp=="Therapeutic discharge" ~1,
#motivodeegreso_mod_imp=="Ongoing treatment" ~0,
#TRUE~2),labels=c("Ongoing treatment", "Treatment completion","Treatment non-completion"))) %>%
#dplyr::mutate(n_hash=as.numeric(factor(hash_key, levels=unique(hash_key)))) %>%
dplyr::mutate(motivodeegreso_mod_imp=factor(dplyr::case_when(
grepl("Drop-out",motivodeegreso_mod_imp)~"Drop-out",
motivodeegreso_mod_imp=="Ongoing treatment" ~NA_character_,
TRUE~as.character(motivodeegreso_mod_imp)))) %>%
dplyr::mutate(n_hash=as.numeric(factor(hash_key, levels=unique(hash_key)))) %>%
data.table::data.table()
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
set.seed(2125)
amelia_fit <- amelia(CONS_C1_df_dup_SEP_2020_match_miss[,-c("abandono_temprano_rec", "more_one_treat")],
m=30, parallel = "multicore", #noms = "row",
idvars="row",#"hash_key","rn"
noms= c("estado_conyugal_2", "via_adm_sus_prin_act", "origen_ingreso_mod", "nombre_region", "sexo_2","dg_trs_cons_sus_or","sus_principal_mod","sus_ini_mod_mvv","dg_cie_10_rec", "tipo_centro_pub","tipo_de_plan_res","motivodeegreso_mod_imp","tenencia_de_la_vivienda_mod", "condicion_ocupacional_corr"),
ords= c("escolaridad_rec", "freq_cons_sus_prin","evaluacindelprocesoteraputico"),
cs = "hash_key",
ts = "dup",
incheck = TRUE)
#amelia_fit$imputations$imp1
#CONS_C1_df_dup_SEP_2020_match[!complete.cases(CONS_C1_df_dup_SEP_2020_match[,..match.on_tot])]
Admission Age
We started looking over the missing values in the age at admission
(n= 16). Since there were not cases with more than one treatment that
did not have an admission age, we did not have to impute taking into
account serial dependencies in the dates of treatment.
compare.density(amelia_fit,var="edad_al_ing")
As seen in the Figure above, distributions seem to differ. However,
considering the low amount of missing values in this variable, we
proceeded with the imputation of the mean, despite the minor differences
found. The imputed values must not be greater than the age of onset of
drug use and may not be less than 16 years old. Values lower than this
age may be considered less likely to receive treatment within adult
population, so it is probable that it is incorrect that they would be in
this database.
#On this graph, a y = x line indicates the line of perfect agreement; that is, if the imputation model was a perfect predictor of the true value, all the imputations would fall on this line
no_mostrar=0
if(no_mostrar==1){w
res <- {
setTimeLimit(nn_K*500)
ovr_imp_edad_ini_cons<-overimpute(amelia_fit, var = "edad_al_ing")
}
}
paste0("Users that had more than one treatment with no date of admission:",CONS_C1_df_dup_SEP_2020_match_miss %>%
dplyr::group_by(hash_key) %>%
dplyr::mutate(na_edad_ing=sum(is.na(edad_al_ing))) %>%
dplyr::ungroup() %>%
dplyr::filter(na_edad_ing>0) %>%
dplyr::group_by(hash_key) %>%
dplyr::summarise(n=n()) %>% dplyr::filter(n>1) %>% nrow())
## [1] "Users that had more than one treatment with no date of admission:0"
#Hay poca relación en las imputaciones.
#table(is.na(CONS_C1_df_dup_SEP_2020_match_not_miss$edad_al_ing),exclude=NULL)
edad_al_ing_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$edad_al_ing,
amelia_fit$imputations$imp2$edad_al_ing,
amelia_fit$imputations$imp3$edad_al_ing,
amelia_fit$imputations$imp4$edad_al_ing,
amelia_fit$imputations$imp5$edad_al_ing,
amelia_fit$imputations$imp6$edad_al_ing,
amelia_fit$imputations$imp7$edad_al_ing,
amelia_fit$imputations$imp8$edad_al_ing,
amelia_fit$imputations$imp9$edad_al_ing,
amelia_fit$imputations$imp10$edad_al_ing,
amelia_fit$imputations$imp11$edad_al_ing,
amelia_fit$imputations$imp12$edad_al_ing,
amelia_fit$imputations$imp13$edad_al_ing,
amelia_fit$imputations$imp14$edad_al_ing,
amelia_fit$imputations$imp15$edad_al_ing,
amelia_fit$imputations$imp16$edad_al_ing,
amelia_fit$imputations$imp17$edad_al_ing,
amelia_fit$imputations$imp18$edad_al_ing,
amelia_fit$imputations$imp19$edad_al_ing,
amelia_fit$imputations$imp20$edad_al_ing,
amelia_fit$imputations$imp21$edad_al_ing,
amelia_fit$imputations$imp22$edad_al_ing,
amelia_fit$imputations$imp23$edad_al_ing,
amelia_fit$imputations$imp24$edad_al_ing,
amelia_fit$imputations$imp25$edad_al_ing,
amelia_fit$imputations$imp26$edad_al_ing,
amelia_fit$imputations$imp27$edad_al_ing,
amelia_fit$imputations$imp28$edad_al_ing,
amelia_fit$imputations$imp29$edad_al_ing,
amelia_fit$imputations$imp30$edad_al_ing
)
#select the value of the age at admission that had no inconsistencies with the age of onset of drug use
edad_al_ing_imputed<-
edad_al_ing_imputed %>%
janitor::clean_names() %>%
dplyr::left_join(CONS_C1_df_dup_SEP_2020_match_miss[,c("row","edad_ini_cons")],by=c("amelia_fit_imputations_imp1_row"="row")) %>%
melt(id.vars = "amelia_fit_imputations_imp1_row") %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(edad_ini_cons=max(value[variable=="edad_ini_cons"],na.rm=T)) %>%
dplyr::ungroup() %>%
# la edad de ingreso a imputar debe ser mayor o igual a la edad de inicio de sustancias, más de 16 o más
dplyr::filter(variable!="edad_ini_cons",value>=edad_ini_cons, value>=16) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(avg_edad_al_ing_imp= mean(value,na.rm=T),min_edad_al_ing_imp=min(value,na.rm=T))
# Reemplazo los valores perdidos:
CONS_C1_df_dup_SEP_2020_match_miss0<-
CONS_C1_df_dup_SEP_2020_match_miss %>%
dplyr::left_join(edad_al_ing_imputed,by=c("row"="amelia_fit_imputations_imp1_row")) %>%
#si la edad al ingreso no existe, el valor promedio imutado es
dplyr::mutate(edad_al_ing=dplyr::case_when(is.na(edad_al_ing)~as.numeric(avg_edad_al_ing_imp),TRUE~as.numeric(edad_al_ing))) %>%
dplyr::select(-avg_edad_al_ing_imp,-min_edad_al_ing_imp)
no_mostrar=0
if(no_mostrar==1){
try(
CONS_C1_df_dup_SEP_2020_match_miss0 %>%
dplyr::left_join(edad_al_ing_imputed,by=c("row"="v1")) %>%
dplyr::group_by(hash_key) %>%
dplyr::mutate(min_edad_ini_cons=min(edad_ini_cons,na.rm=T),min_edad_ini_cons=ifelse(is.infinite(min_edad_ini_cons), NA, min_edad_ini_cons)) %>%
dplyr::ungroup() %>%
dplyr::filter(is.na(edad_al_ing)) %>%
dplyr::select(hash_key, edad_al_ing, avg_edad_ing, Min_edad_ing,min_edad_ini_cons,edad_ini_sus_prin)
)
}
no_mostrar=0
if(no_mostrar==1){
CONS_C1_df_dup_SEP_2020_match_miss0 %>%
dplyr::group_by(hash_key) %>%
dplyr::mutate(na_edad_ing=sum(is.na(edad_al_ing))) %>%
dplyr::ungroup() %>%
dplyr::filter(na_edad_ing>0) %>%
dplyr::group_by(hash_key) %>%
summarise(n())
}
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss12$edad_al_ing))
if(nrow(CONS_C1_df_dup_SEP_2020_match_miss0)-nrow(CONS_C1_df_dup_SEP_2020_match_miss)>0){
warning("Some rows were added in the imputation")}
After the imputation, there were no missing cases left.
Substance Use Onset Age
Another variable worth imputing is the Substance Use Onset Age (n=
6,592).
compare.density(amelia_fit,var="edad_ini_cons")
Based on the figure above, the substance use onset age was similar
between the imputed values and the observed. However, we followed the
rules stated in the Duplicates
process (link).
There were three logical conditions to fulfill in order to adequately
replace these values in the database: the age of onset must not be
greater than the substance use onset age in the primary substance at
admission (1), the age of onset may not be greater than the age of
admission to treatment (2), and the substance use onset age must be
greater than 4 years. Then, we selected the minimum value of substance
use onset age among the imputed values, because one user could not have
more than one value.
#On this graph, a y = x line indicates the line of perfect agreement; that is, if the imputation model was a perfect predictor of the true value, all the imputations would fall on this line
no_mostrar=0
if(no_mostrar==1){
res <- {
setTimeLimit(nn_K*500)
ovr_imp_edad_ini_cons<-overimpute(amelia_fit, var = "edad_ini_cons")
}
}
#Hay poca relación en las imputaciones.
# Ver si alguno de los usuarios con valores perdidos tiene de todas formas datos en esta variable.
edad_ini_sus_prin_for_imp<-
CONS_C1_df_dup_SEP_2020 %>%
dplyr::filter(hash_key %in% unlist(unique(CONS_C1_df_dup_SEP_2020_match[which(!complete.cases(CONS_C1_df_dup_SEP_2020_match$edad_ini_cons)),"hash_key"]))) %>%
dplyr::filter(is.na(edad_ini_cons)) %>%
dplyr::group_by(hash_key) %>%
dplyr::summarise(min_edad_ini_sus_prin=min(edad_ini_sus_prin, na.rm=T),
min_edad_ini_sus_prin=ifelse(is.infinite(min_edad_ini_sus_prin),NA,min_edad_ini_sus_prin),edad_al_ing_min=min(edad_al_ing,na.rm=T))
cumplimiento_errores<-data.frame()
for (i in paste0("imp",1:30)){
rn_cum_err<-data.frame(amelia_fit$imputations[i]) %>%
dplyr::rename(hash_key = 2) %>%
dplyr::rename(edad_ini_cons = 7) %>%
dplyr::mutate(hash_key=as.character(hash_key)) %>%
dplyr::left_join(edad_ini_sus_prin_for_imp, by="hash_key") %>%
dplyr::filter(edad_al_ing_min<edad_ini_cons|edad_ini_cons>min_edad_ini_sus_prin) %>% # edad de inicio de consumo no debe ser mayor a la menor edad a la ingreso de cada usuario, y la mínima edad de inicio de consumo de sustancia principal es menor a la edad de inicio de consumo
nrow()
rn_cum_err<-cbind(i,rn_cum_err)
cumplimiento_errores<- rbind(cumplimiento_errores,rn_cum_err)
}
colnames(cumplimiento_errores)<- c("imp","no_errors_age_of_onset_drug_use")
message(paste0("Number of users that had more than one different age of onset of drug use before replacement: ",CONS_C1_df_dup_SEP_2020_match %>%
dplyr::group_by(hash_key) %>%
dplyr::mutate(n_dis=n_distinct(edad_ini_cons)) %>%
dplyr::ungroup() %>%
dplyr::filter(n_dis>1) %>%
nrow()))
n_miss_edad_ini_cons<-nrow(CONS_C1_df_dup_SEP_2020_match[which(!complete.cases(CONS_C1_df_dup_SEP_2020_match$edad_ini_cons)),"hash_key"])
plot_imps<-
cumplimiento_errores %>%
dplyr::mutate(no_errors_age_of_onset_drug_use=as.numeric(no_errors_age_of_onset_drug_use)) %>%
dplyr::mutate(imp=factor(imp, levels =paste0("imp",1:30))) %>%
dplyr::mutate(perc= no_errors_age_of_onset_drug_use/n_miss_edad_ini_cons) %>%
dplyr::mutate(label_text= paste0("Variable= ",imp,"<br>n= ",no_errors_age_of_onset_drug_use,"<br>",scales::percent(round(perc,2)))) %>%
dplyr::mutate(perc=perc*100) %>%
ggplot() +
geom_bar(aes(x=imp, y=perc,label= label_text), stat = 'identity') +
theme_classic()+
scale_y_continuous(limits=c(0,5))+
theme(axis.text.x = element_text(angle = 90, hjust = 1, size=9))+
labs(x=NULL, y="% of Imputed Values With Logical Discrepancies")+
theme(aspect.ratio = 2/1)
#plotly
ggplotly(plot_imps, tooltip = c("label_text"))%>% layout(xaxis= list(showticklabels = T)) %>% layout(yaxis = list(tickformat='%', range = c(0, 5)), height = 400, width=533)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
edad_ini_cons_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$edad_ini_cons,
amelia_fit$imputations$imp2$edad_ini_cons,
amelia_fit$imputations$imp3$edad_ini_cons,
amelia_fit$imputations$imp4$edad_ini_cons,
amelia_fit$imputations$imp5$edad_ini_cons,
amelia_fit$imputations$imp6$edad_ini_cons,
amelia_fit$imputations$imp7$edad_ini_cons,
amelia_fit$imputations$imp8$edad_ini_cons,
amelia_fit$imputations$imp9$edad_ini_cons,
amelia_fit$imputations$imp10$edad_ini_cons,
amelia_fit$imputations$imp11$edad_ini_cons,
amelia_fit$imputations$imp12$edad_ini_cons,
amelia_fit$imputations$imp13$edad_ini_cons,
amelia_fit$imputations$imp14$edad_ini_cons,
amelia_fit$imputations$imp15$edad_ini_cons,
amelia_fit$imputations$imp16$edad_ini_cons,
amelia_fit$imputations$imp17$edad_ini_cons,
amelia_fit$imputations$imp18$edad_ini_cons,
amelia_fit$imputations$imp19$edad_ini_cons,
amelia_fit$imputations$imp20$edad_ini_cons,
amelia_fit$imputations$imp21$edad_ini_cons,
amelia_fit$imputations$imp22$edad_ini_cons,
amelia_fit$imputations$imp23$edad_ini_cons,
amelia_fit$imputations$imp24$edad_ini_cons,
amelia_fit$imputations$imp25$edad_ini_cons,
amelia_fit$imputations$imp26$edad_ini_cons,
amelia_fit$imputations$imp27$edad_ini_cons,
amelia_fit$imputations$imp28$edad_ini_cons,
amelia_fit$imputations$imp29$edad_ini_cons,
amelia_fit$imputations$imp30$edad_ini_cons
)
min_edad_al_ing_edad_ini_sus_prin<-
CONS_C1_df_dup_SEP_2020_match_miss0 %>%
dplyr::group_by(hash_key) %>%
dplyr::mutate(min_edad_al_ing=min(edad_al_ing,na.rm=T),min_edad_ini_sus_prin=min(edad_ini_sus_prin,na.rm=T),min_edad_ini_sus_prin=ifelse(is.infinite(min_edad_ini_sus_prin),100,min_edad_ini_sus_prin)) %>%
dplyr::ungroup() %>%
dplyr::select(row, hash_key,min_edad_al_ing,min_edad_ini_sus_prin)
#a- aquellos casos en que la edad de inicio de consumo está perdida, y el promedio imputado es menor o igual a la mínima edad de inicio de consumo de sustancia en la principal en la admisión, y sea menor a la primera fecha de ingreso a un programa de consumo de sustancias, se reemplazará por el promedio
#b- aquellos casos en que la edad de inicio de consumo está perdida, el promedio imputado es mayor a la mínima edad de inicio de consumo o mayor a la mínima edad de inicio a tratamiento, y el mínimo imputado es menor o igual a la mínima edad de inicio de consumo de sustancia en la principal en la admisión, y sea menor a la primera fecha de ingreso a un programa de consumo de sustancias, se reemplazará por el valor mínimo
#c- aquellos en que la edad de inicio de consumo está perdida y la edad de inicio de consumo en la sustancia principal también esté perdida,
#d- aquellos en que la edad de inicio de consumo está perdida y la edad de inicio de consumo en la sustancia principal también esté perdida,
edad_ini_cons_imputed_after<-
cbind(data.table(edad_ini_cons_imputed[,1]),data.table(edad_ini_cons_imputed[,2:31]))%>%
janitor::clean_names() %>%
melt(id.vars = "v1") %>%
dplyr::left_join(min_edad_al_ing_edad_ini_sus_prin,by=c("v1"="row")) %>%
dplyr::arrange(v1) %>%
dplyr::filter(value>=5,value<=min_edad_al_ing, value<=min_edad_ini_sus_prin) %>% # get candidate values for imputation over 5 years, according to SENDAs guidelines; get substance use onset ages that are lower or equal than the minimum admission age of each patient, and lower or equal than the minimum primary substance at admission onset age
dplyr::group_by(hash_key) %>% #changed in april 2022, from row to hash
dplyr::summarise(AVG=round(mean(value,na.rm=T),0),Min=round(min(value,na.rm=T),0)) %>%
dplyr::ungroup()
#summary(edad_ini_cons_imputed$Min)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
paste("Cases with more than missing one age of onset:",CONS_C1_df_dup_SEP_2020_match_miss0 %>%dplyr::mutate(na_edad_ini_cons=dplyr::case_when(is.na(edad_ini_cons)~1,TRUE~0)) %>% dplyr::group_by(hash_key) %>% dplyr::mutate(sum_na_edad_ini_cons=sum(na_edad_ini_cons)) %>%
dplyr::ungroup() %>% dplyr::filter(sum_na_edad_ini_cons>1) %>% nrow()
)
## [1] "Cases with more than missing one age of onset: 515"
# REPLACE MISSING VALUES:
#:#:#:#:
CONS_C1_df_dup_SEP_2020_match_miss1<-
CONS_C1_df_dup_SEP_2020_match_miss0 %>%
dplyr::left_join(edad_ini_cons_imputed_after,by=c("hash_key"="hash_key")) %>% #changed form row to hash (apr 2022)
janitor::clean_names() %>%
dplyr::mutate(edad_ini_cons=dplyr::case_when(is.na(edad_ini_cons) ~as.numeric(avg),
TRUE~as.numeric(edad_ini_cons))) #%>%
#FROM APRIL 2022, THIS SEEMS NO LONGER NECESSARY
# #me quedo con un promedio por usuario de aquellos valores imputados por usuario.
# dplyr::group_by(hash_key) %>%
# dplyr::mutate(edad_ini_cons=round(mean(edad_ini_cons,na.rm=T),0)) %>%
# dplyr::ungroup() %>%
# dplyr::select(-avg,-min)
#is.na(edad_ini_cons) & is.na(edad_ini_sus_prin) & is.na(min_edad_al_ing)~as.numeric(avg),
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss1$edad_ini_cons))
message(paste0("Number of rows with values that did not fulfill the conditions: ",CONS_C1_df_dup_SEP_2020_match_miss1 %>%
dplyr::filter(is.na(edad_ini_cons)) %>%
dplyr::select(hash_key, edad_ini_cons, edad_al_ing,edad_ini_sus_prin) %>% nrow())
)
#Lo importante es tener en cuenta que las imputaciones se hicieron por filas; no, en cambio, ahora debemos reemplazar aquellos casos que tienen perdidos (no cumplieron con las condiciones) con el valor mínimo
message(paste0("Number of rows with values that did not fulfill the conditions after replacement with the minimum by users: ",CONS_C1_df_dup_SEP_2020_match_miss1 %>%
dplyr::filter(is.na(edad_ini_cons)) %>%
dplyr::select(hash_key, edad_ini_cons, edad_al_ing,edad_ini_sus_prin) %>% nrow())
)
message(paste0("Number of users that had different age of onset of drug use after replacement: ",CONS_C1_df_dup_SEP_2020_match_miss1 %>%
dplyr::group_by(hash_key) %>%
dplyr::mutate(n_dis=n_distinct(edad_ini_cons)) %>%
dplyr::ungroup() %>%
dplyr::filter(n_dis>1) %>%
nrow())
)
if(nrow(CONS_C1_df_dup_SEP_2020_match_miss1)-nrow(CONS_C1_df_dup_SEP_2020_match_miss0)>0){
warning("Some rows were added in the imputation")}
There were 0 cases of imputed substance use onset ages that did not
fulfill the conditions necessary to replace the missing values with the
imputed ones.
First Substance Used
Then we selected the most vulnerable value among the candidates of
the imputations of the starting substance (First, Cocaine paste, Cocaine
hydrochloride or snort cocaine, Marijuana, Alcohol, and Other).
# Ver distintos valores propuestos para sustancia de inciio
sus_ini_mod_mvv_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$sus_ini_mod_mvv,
amelia_fit$imputations$imp2$sus_ini_mod_mvv,
amelia_fit$imputations$imp3$sus_ini_mod_mvv,
amelia_fit$imputations$imp4$sus_ini_mod_mvv,
amelia_fit$imputations$imp5$sus_ini_mod_mvv,
amelia_fit$imputations$imp6$sus_ini_mod_mvv,
amelia_fit$imputations$imp7$sus_ini_mod_mvv,
amelia_fit$imputations$imp8$sus_ini_mod_mvv,
amelia_fit$imputations$imp9$sus_ini_mod_mvv,
amelia_fit$imputations$imp10$sus_ini_mod_mvv,
amelia_fit$imputations$imp11$sus_ini_mod_mvv,
amelia_fit$imputations$imp12$sus_ini_mod_mvv,
amelia_fit$imputations$imp13$sus_ini_mod_mvv,
amelia_fit$imputations$imp14$sus_ini_mod_mvv,
amelia_fit$imputations$imp15$sus_ini_mod_mvv,
amelia_fit$imputations$imp16$sus_ini_mod_mvv,
amelia_fit$imputations$imp17$sus_ini_mod_mvv,
amelia_fit$imputations$imp18$sus_ini_mod_mvv,
amelia_fit$imputations$imp19$sus_ini_mod_mvv,
amelia_fit$imputations$imp20$sus_ini_mod_mvv,
amelia_fit$imputations$imp21$sus_ini_mod_mvv,
amelia_fit$imputations$imp22$sus_ini_mod_mvv,
amelia_fit$imputations$imp23$sus_ini_mod_mvv,
amelia_fit$imputations$imp24$sus_ini_mod_mvv,
amelia_fit$imputations$imp25$sus_ini_mod_mvv,
amelia_fit$imputations$imp26$sus_ini_mod_mvv,
amelia_fit$imputations$imp27$sus_ini_mod_mvv,
amelia_fit$imputations$imp28$sus_ini_mod_mvv,
amelia_fit$imputations$imp29$sus_ini_mod_mvv,
amelia_fit$imputations$imp30$sus_ini_mod_mvv
)
sus_ini_mod_mvv_imputed<-
sus_ini_mod_mvv_imputed %>%
data.frame() %>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Marijuana",as.character(.))~1,TRUE~0), .names="mar_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Alcohol",as.character(.))~1,TRUE~0), .names="oh_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Cocaine paste",as.character(.))~1,TRUE~0), .names="pb_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Cocaine hydrochloride",as.character(.))~1,TRUE~0), .names="coc_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Other",as.character(.))~1,TRUE~0), .names="otr_{col}"))%>%
dplyr::mutate(sus_ini_mod_mvv_mar = base::rowSums(dplyr::select(., starts_with("mar_"))))%>%
dplyr::mutate(sus_ini_mod_mvv_oh = base::rowSums(dplyr::select(., starts_with("oh_"))))%>%
dplyr::mutate(sus_ini_mod_mvv_pb = base::rowSums(dplyr::select(., starts_with("pb_"))))%>%
dplyr::mutate(sus_ini_mod_mvv_coc = base::rowSums(dplyr::select(., starts_with("coc_"))))%>%
dplyr::mutate(sus_ini_mod_mvv_otr = base::rowSums(dplyr::select(., starts_with("otr_")))) %>%
#dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_mar>0~1,TRUE~0)) %>%
dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_oh>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>%
dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_pb>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>%
dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_coc>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>%
dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_otr>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>%
dplyr::mutate(sus_ini_mod_mvv_to_imputation=dplyr::case_when(sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_pb>0~"Cocaine paste",sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_coc>0~"Cocaine hydrochloride",sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_mar>0~"Marijuana",sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_oh>0~"Alcohol",sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_otr>0~"Other",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_pb>0~"Cocaine paste",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_coc>0~"Cocaine hydrochloride",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_mar>0~"Marijuana",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_oh>0~"Alcohol",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_otr>0~"Other")) %>%
janitor::clean_names()
sus_ini_mod_mvv_imputed<-
dplyr::select(sus_ini_mod_mvv_imputed,amelia_fit_imputations_imp1_row,sus_ini_mod_mvv_to_imputation)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_match_miss2<-
CONS_C1_df_dup_SEP_2020_match_miss1 %>%
dplyr::left_join(sus_ini_mod_mvv_imputed, by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(sus_ini_mod_mvv=factor(dplyr::case_when(is.na(sus_ini_mod_mvv)~as.character(sus_ini_mod_mvv_to_imputation),
TRUE~as.character(sus_ini_mod_mvv)))) %>%
dplyr::select(-sus_ini_mod_mvv_to_imputation) %>%
data.table()
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
if(nrow(CONS_C1_df_dup_SEP_2020_match_miss2)-nrow(CONS_C1_df_dup_SEP_2020_match_miss1)>0){
warning("Some rows were added in the imputation")}
As a result of the imputations, there were no missing values.
Primary Substance at Admission Usage Frequency
Another variable that is worth imputing is the Primary Substance at
Admission Usage Frequency (n= 568). In case of ties, we selected the
imputed values with the value with the most frequent substance use
pattern.
# Ver distintos valores propuestos para sustancia de inciio
freq_cons_sus_prin_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$freq_cons_sus_prin,
amelia_fit$imputations$imp2$freq_cons_sus_prin,
amelia_fit$imputations$imp3$freq_cons_sus_prin,
amelia_fit$imputations$imp4$freq_cons_sus_prin,
amelia_fit$imputations$imp5$freq_cons_sus_prin,
amelia_fit$imputations$imp6$freq_cons_sus_prin,
amelia_fit$imputations$imp7$freq_cons_sus_prin,
amelia_fit$imputations$imp8$freq_cons_sus_prin,
amelia_fit$imputations$imp9$freq_cons_sus_prin,
amelia_fit$imputations$imp10$freq_cons_sus_prin,
amelia_fit$imputations$imp11$freq_cons_sus_prin,
amelia_fit$imputations$imp12$freq_cons_sus_prin,
amelia_fit$imputations$imp13$freq_cons_sus_prin,
amelia_fit$imputations$imp14$freq_cons_sus_prin,
amelia_fit$imputations$imp15$freq_cons_sus_prin,
amelia_fit$imputations$imp16$freq_cons_sus_prin,
amelia_fit$imputations$imp17$freq_cons_sus_prin,
amelia_fit$imputations$imp18$freq_cons_sus_prin,
amelia_fit$imputations$imp19$freq_cons_sus_prin,
amelia_fit$imputations$imp20$freq_cons_sus_prin,
amelia_fit$imputations$imp21$freq_cons_sus_prin,
amelia_fit$imputations$imp22$freq_cons_sus_prin,
amelia_fit$imputations$imp23$freq_cons_sus_prin,
amelia_fit$imputations$imp24$freq_cons_sus_prin,
amelia_fit$imputations$imp25$freq_cons_sus_prin,
amelia_fit$imputations$imp26$freq_cons_sus_prin,
amelia_fit$imputations$imp27$freq_cons_sus_prin,
amelia_fit$imputations$imp28$freq_cons_sus_prin,
amelia_fit$imputations$imp29$freq_cons_sus_prin,
amelia_fit$imputations$imp30$freq_cons_sus_prin
)
freq_cons_sus_prin_imputed_after<-
freq_cons_sus_prin_imputed %>%
data.frame() %>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("1 day a week or more",as.character(.))~1,TRUE~0), .names="1_day_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("2 to 3 days a week",as.character(.))~1,TRUE~0), .names="2_3_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("4 to 6 days a week",as.character(.))~1,TRUE~0), .names="4_6_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Less than 1 day a week",as.character(.))~1,TRUE~0), .names="less_1_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Did not use",as.character(.))~1,TRUE~0), .names="did_not_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Daily",as.character(.))~1,TRUE~0), .names="daily_{col}"))%>%
dplyr::mutate(freq_cons_sus_prin_daily = base::rowSums(dplyr::select(., starts_with("daily_")))) %>%
dplyr::mutate(freq_cons_sus_prin_4_6 = base::rowSums(dplyr::select(., starts_with("4_6_"))))%>%
dplyr::mutate(freq_cons_sus_prin_2_3 = base::rowSums(dplyr::select(., starts_with("2_3_"))))%>%
dplyr::mutate(freq_cons_sus_prin_1_day = base::rowSums(dplyr::select(., starts_with("1_day_"))))%>%
dplyr::mutate(freq_cons_sus_prin_less_1 = base::rowSums(dplyr::select(., starts_with("less_1_"))))%>%
dplyr::mutate(freq_cons_sus_prin_did_not = base::rowSums(dplyr::select(., starts_with("did_not_")))) %>%
#dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_1_day>0~1,TRUE~0)) %>%
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_2_3>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>%
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_4_6>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>%
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_less_1>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>%
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_did_not>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>%
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_daily>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>%
#hierarchy
dplyr::mutate(freq_cons_sus_prin_to_imputation=
dplyr::case_when(freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_daily>0~"Daily",
freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_4_6>0~"4 to 6 days a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_2_3>0~"2 to 3 days a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_1_day>0~"1 day a week or more",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_less_1>0~"Less than 1 day a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_did_not>0~"Did not use",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_daily>0~"Daily",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_4_6>0~"4 to 6 days a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_2_3>0~"2 to 3 days a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_1_day>0~"1 day a week or more",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_less_1>0~"Less than 1 day a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_did_not>0~"Did not use")) %>%
janitor::clean_names()
freq_cons_sus_prin_imputed_after<-
dplyr::select(freq_cons_sus_prin_imputed_after,amelia_fit_imputations_imp1_row,freq_cons_sus_prin_to_imputation)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_match_miss3<-
CONS_C1_df_dup_SEP_2020_match_miss2 %>%
dplyr::left_join(freq_cons_sus_prin_imputed_after, by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(freq_cons_sus_prin=factor(dplyr::case_when(is.na(freq_cons_sus_prin)~as.character(freq_cons_sus_prin_to_imputation), TRUE~as.character(freq_cons_sus_prin)))) %>%
data.table()
if(nrow(CONS_C1_df_dup_SEP_2020_match_miss3)-nrow(CONS_C1_df_dup_SEP_2020_match_miss2)>0){
warning("Some rows were added in the imputation")}
As a result of the imputations, there were no missing values.
Educational Attainment
Another variable that is worth imputing is the Educational Attainment
(n= 437). We followed the rules stated in the Duplicates4
process (link).
We were particularly cautious to impute attainments that would follow a
progression from primary school to more than high school. For this
purpose, we first looked over the actual values per user, filling
intermediate gaps in educational attainment of users with intermediate
null values (a), we overcame the difficulty of the incorrect imputations
by logically selecting them if there were any.
# Ver distintos valores propuestos para sustancia de inciio
escolaridad_rec_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$hash_key,
amelia_fit$imputations$imp1$fech_ing_num,
amelia_fit$imputations$imp1$escolaridad_rec,
amelia_fit$imputations$imp2$escolaridad_rec,
amelia_fit$imputations$imp3$escolaridad_rec,
amelia_fit$imputations$imp4$escolaridad_rec,
amelia_fit$imputations$imp5$escolaridad_rec,
amelia_fit$imputations$imp6$escolaridad_rec,
amelia_fit$imputations$imp7$escolaridad_rec,
amelia_fit$imputations$imp8$escolaridad_rec,
amelia_fit$imputations$imp9$escolaridad_rec,
amelia_fit$imputations$imp10$escolaridad_rec,
amelia_fit$imputations$imp11$escolaridad_rec,
amelia_fit$imputations$imp12$escolaridad_rec,
amelia_fit$imputations$imp13$escolaridad_rec,
amelia_fit$imputations$imp14$escolaridad_rec,
amelia_fit$imputations$imp15$escolaridad_rec,
amelia_fit$imputations$imp16$escolaridad_rec,
amelia_fit$imputations$imp17$escolaridad_rec,
amelia_fit$imputations$imp18$escolaridad_rec,
amelia_fit$imputations$imp19$escolaridad_rec,
amelia_fit$imputations$imp20$escolaridad_rec,
amelia_fit$imputations$imp21$escolaridad_rec,
amelia_fit$imputations$imp22$escolaridad_rec,
amelia_fit$imputations$imp23$escolaridad_rec,
amelia_fit$imputations$imp24$escolaridad_rec,
amelia_fit$imputations$imp25$escolaridad_rec,
amelia_fit$imputations$imp26$escolaridad_rec,
amelia_fit$imputations$imp27$escolaridad_rec,
amelia_fit$imputations$imp28$escolaridad_rec,
amelia_fit$imputations$imp29$escolaridad_rec,
amelia_fit$imputations$imp30$escolaridad_rec)
escolaridad_rec_imputed2<-
escolaridad_rec_imputed %>%
data.frame() %>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.escolaridad_rec:amelia_fit.imputations.imp30.escolaridad_rec),~dplyr::case_when(grepl("3-Completed primary school or less",as.character(.))~1,TRUE~0), .names="3_primary_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.escolaridad_rec:amelia_fit.imputations.imp30.escolaridad_rec),~dplyr::case_when(grepl("2-Completed high school or less",as.character(.))~1,TRUE~0), .names="2_high_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.escolaridad_rec:amelia_fit.imputations.imp30.escolaridad_rec),~dplyr::case_when(grepl("1-More than high school",as.character(.))~1,TRUE~0), .names="1_more_high_{col}")) %>%
dplyr::mutate(escolaridad_rec_3_primary = base::rowSums(dplyr::select(., contains("3_primary_")))) %>%
dplyr::mutate(escolaridad_rec_2_high = base::rowSums(dplyr::select(., contains("2_high_"))))%>%
dplyr::mutate(escolaridad_rec_1_more_high = base::rowSums(dplyr::select(., contains("1_more_high_"))))
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#create an ordered index of the number of treatments by user
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#:#:#:#;#;#;
CONS_C1_df_dup_SEP_2020_match_rn<-
CONS_C1_df_dup_SEP_2020_match_miss %>% #base de datos original, sin imputaciones
dplyr::group_by(hash_key) %>%
dplyr::mutate(rn=row_number()) %>%
dplyr::ungroup() %>%
dplyr::select(rn)
#:#:#:#;#;#;
escolaridad_rec_imputed3<-
escolaridad_rec_imputed2 %>%
dplyr::left_join(cbind.data.frame(CONS_C1_df_dup_SEP_2020_match_miss$row, CONS_C1_df_dup_SEP_2020_match_miss$escolaridad_rec,CONS_C1_df_dup_SEP_2020_match_rn$rn),by=c("amelia_fit.imputations.imp1.row"="CONS_C1_df_dup_SEP_2020_match_miss$row")) %>%
dplyr::rename("escolaridad_rec_original"="CONS_C1_df_dup_SEP_2020_match_miss$escolaridad_rec") %>%
dplyr::mutate(escolaridad_rec_original=as.numeric(substr(escolaridad_rec_original, 1, 1))) %>%
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#ordenar por tratamientos por usuario
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
dplyr::arrange(amelia_fit.imputations.imp1.hash_key,`CONS_C1_df_dup_SEP_2020_match_rn$rn`) %>%
dplyr::group_by(amelia_fit.imputations.imp1.hash_key) %>%
dplyr::mutate(siguiente_escolaridad_rec_original=lead(escolaridad_rec_original),
subsig_escolaridad_rec_original=lead(escolaridad_rec_original,n =2),
rn=max(`CONS_C1_df_dup_SEP_2020_match_rn$rn`),
n_na_esc_or=is.na(escolaridad_rec_original),
sum_n_na_esc_or=sum(n_na_esc_or,na.rm=T),
max_sum_n_na_esc_or=max(n_na_esc_or,na.rm=T)
) %>%
#dplyr::select(amelia_fit.imputations.imp1.hash_key,amelia_fit.imputations.imp30.rn,
# siguiente_escolaridad_rec_original,escolaridad_rec_original,amelia_fit.imputations.imp1.fech_ing_num)%>% View()
dplyr::ungroup()
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#PREPARACIÓN BASE DE DATOS PARA IMPUTACION Y CREACIÓN DE VARIABLES PARA CONDICIONES
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
escolaridad_rec_imputed4 <-
escolaridad_rec_imputed3 %>%
dplyr::select(amelia_fit.imputations.imp1.hash_key,`CONS_C1_df_dup_SEP_2020_match_rn$rn`,escolaridad_rec_original,escolaridad_rec_3_primary,escolaridad_rec_2_high, escolaridad_rec_1_more_high) %>%
dplyr::rename("hash_key"="amelia_fit.imputations.imp1.hash_key") %>%
dplyr::rename("treat_no_for_usr"="CONS_C1_df_dup_SEP_2020_match_rn$rn") %>%
dplyr::group_by(hash_key) %>%
dplyr::mutate(treat_per_usr=max(treat_no_for_usr,na.rm=T)) %>%
dplyr::ungroup() %>%
tidyr::pivot_wider(names_from=treat_no_for_usr,
#names_glue = "ord_treat_esc_{.value}",
values_from=c(escolaridad_rec_original,escolaridad_rec_3_primary,escolaridad_rec_2_high,escolaridad_rec_1_more_high),values_fill = NA) %>%
#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
#Ver si existen inconsistencias en la escolaridad, pero no sólo inconsistencias inmediatas, sino con hasta 2 espacios entre tratamientos
#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
dplyr::mutate(escolaridad_rec_tot_cond=dplyr::case_when(
(escolaridad_rec_original_10>escolaridad_rec_original_9)|(escolaridad_rec_original_10>escolaridad_rec_original_8)|(escolaridad_rec_original_10>escolaridad_rec_original_7)|
(escolaridad_rec_original_9>escolaridad_rec_original_8)|(escolaridad_rec_original_9>escolaridad_rec_original_7)|(escolaridad_rec_original_9>escolaridad_rec_original_6)|
(escolaridad_rec_original_8>escolaridad_rec_original_7)|(escolaridad_rec_original_8>escolaridad_rec_original_6)|(escolaridad_rec_original_8>escolaridad_rec_original_5)|
(escolaridad_rec_original_7>escolaridad_rec_original_6)|(escolaridad_rec_original_7>escolaridad_rec_original_5)|(escolaridad_rec_original_7>escolaridad_rec_original_4)|
(escolaridad_rec_original_6>escolaridad_rec_original_5)|(escolaridad_rec_original_6>escolaridad_rec_original_4)|(escolaridad_rec_original_6>escolaridad_rec_original_3)|
(escolaridad_rec_original_5>escolaridad_rec_original_4)|(escolaridad_rec_original_5>escolaridad_rec_original_3)|(escolaridad_rec_original_5>escolaridad_rec_original_2)|
(escolaridad_rec_original_4>escolaridad_rec_original_3)|(escolaridad_rec_original_4>escolaridad_rec_original_2)|(escolaridad_rec_original_4>escolaridad_rec_original_1)|
(escolaridad_rec_original_3>escolaridad_rec_original_2)|(escolaridad_rec_original_3>escolaridad_rec_original_1)|
(escolaridad_rec_original_2>escolaridad_rec_original_1)~1,TRUE~0)) %>%
#dplyr::filter(escolaridad_rec_tot_cond==1) %>% #View() #0 rows ¿y 374745c85601976177fe614a7370e475?
#dplyr::filter(treat_per_usr>1) %>%
#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
# Ver si hay valores de escolaridad ausentes en una progresión de tratamientos
#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
dplyr::mutate(sum_nas_esc=base::rowSums(is.na(dplyr::select(., starts_with("escolaridad_rec_original_")))))%>%
dplyr::mutate(escolaridad_rec_tot_nas_en_medio=dplyr::case_when(
(sum_nas_esc>10 & treat_per_usr==10)|
(sum_nas_esc>1 & treat_per_usr==9)|
(sum_nas_esc>2 & treat_per_usr==8)|
(sum_nas_esc>3 & treat_per_usr==7)|
(sum_nas_esc>4 & treat_per_usr==6)|
(sum_nas_esc>5 & treat_per_usr==5)|
(sum_nas_esc>6 & treat_per_usr==4)|
(sum_nas_esc>7 & treat_per_usr==3)|
(sum_nas_esc>8 & treat_per_usr==2)|
(sum_nas_esc>9 & treat_per_usr==1)~1,TRUE~0)) %>% #18b1f9646a2cd6bebd962637cff0a21a 5 casos
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#Generar la escolaridad al final
#:#:#:#:#:#:#:#:#
dplyr::mutate(last_esc=dplyr::case_when(treat_per_usr==10~escolaridad_rec_original_10,
treat_per_usr==9~escolaridad_rec_original_9,
treat_per_usr==8~escolaridad_rec_original_8,
treat_per_usr==7~escolaridad_rec_original_7,
treat_per_usr==6~escolaridad_rec_original_6,
treat_per_usr==5~escolaridad_rec_original_5,
treat_per_usr==4~escolaridad_rec_original_4,
treat_per_usr==3~escolaridad_rec_original_3,
treat_per_usr==2~escolaridad_rec_original_2,
treat_per_usr==1~escolaridad_rec_original_1)) %>%
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#a0))si valor final vs. inicial son iguales, imputar todo lo que está en medio con el mismo valor
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
dplyr::mutate(escolaridad_rec_original_9=
dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>9 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
TRUE~escolaridad_rec_original_9)) %>%
dplyr::mutate(escolaridad_rec_original_8=
dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>8 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
TRUE~escolaridad_rec_original_8)) %>%
dplyr::mutate(escolaridad_rec_original_7=
dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>7 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
TRUE~escolaridad_rec_original_7)) %>%
dplyr::mutate(escolaridad_rec_original_6=
dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>6 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
TRUE~escolaridad_rec_original_6)) %>%
dplyr::mutate(escolaridad_rec_original_5=
dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>5 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
TRUE~escolaridad_rec_original_5)) %>%
dplyr::mutate(escolaridad_rec_original_4=
dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>4 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
TRUE~escolaridad_rec_original_4)) %>%
dplyr::mutate(escolaridad_rec_original_3=
dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>3 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
TRUE~escolaridad_rec_original_3)) %>%
dplyr::mutate(escolaridad_rec_original_2=
dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>2 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
TRUE~escolaridad_rec_original_2)) %>%
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#a1))cambiar valores vacíos intermedios /// fijarse en & escolaridad_rec_tot_cond==1
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#reemplazar el número intermedio por cada tratamiento para cada usuario
dplyr::mutate(escolaridad_rec_original_9=dplyr::case_when(escolaridad_rec_original_8==escolaridad_rec_original_10 & is.na(escolaridad_rec_original_9)&!is.na(escolaridad_rec_original_10)~escolaridad_rec_original_10,TRUE~escolaridad_rec_original_9)) %>%
# dplyr::filter(!is.na(escolaridad_rec_original_10)) %>% View()
dplyr::mutate(escolaridad_rec_original_8=dplyr::case_when(escolaridad_rec_original_7==escolaridad_rec_original_9 & is.na(escolaridad_rec_original_8)&!is.na(escolaridad_rec_original_9)~escolaridad_rec_original_9,TRUE~escolaridad_rec_original_8)) %>%
# dplyr::filter(!is.na(escolaridad_rec_original_9)) %>% View()
dplyr::mutate(escolaridad_rec_original_7=dplyr::case_when(escolaridad_rec_original_6==escolaridad_rec_original_8 & is.na(escolaridad_rec_original_7)&!is.na(escolaridad_rec_original_8)~escolaridad_rec_original_8 ,TRUE~escolaridad_rec_original_7)) %>%
# dplyr::filter(!is.na(escolaridad_rec_original_8)) %>% View()
dplyr::mutate(escolaridad_rec_original_6=dplyr::case_when(escolaridad_rec_original_5==escolaridad_rec_original_7& is.na(escolaridad_rec_original_6)&!is.na(escolaridad_rec_original_7)~escolaridad_rec_original_7,TRUE~escolaridad_rec_original_6)) %>%
# dplyr::filter(!is.na(escolaridad_rec_original_7)) %>% View()
dplyr::mutate(escolaridad_rec_original_5=dplyr::case_when(escolaridad_rec_original_4==escolaridad_rec_original_6 & is.na(escolaridad_rec_original_5)&!is.na(escolaridad_rec_original_6)~escolaridad_rec_original_6,TRUE~escolaridad_rec_original_5)) %>%
# dplyr::filter(!is.na(escolaridad_rec_original_6)) %>% View()
dplyr::mutate(escolaridad_rec_original_4=dplyr::case_when(escolaridad_rec_original_3==escolaridad_rec_original_5 & is.na(escolaridad_rec_original_4)&!is.na(escolaridad_rec_original_5)~escolaridad_rec_original_5,TRUE~escolaridad_rec_original_4)) %>%
# dplyr::filter(!is.na(escolaridad_rec_original_5)) %>% View()
dplyr::mutate(escolaridad_rec_original_3=dplyr::case_when(escolaridad_rec_original_2==escolaridad_rec_original_4 & is.na(escolaridad_rec_original_3)&!is.na(escolaridad_rec_original_4)~escolaridad_rec_original_4,TRUE~escolaridad_rec_original_3)) %>%
# dplyr::filter(!is.na(escolaridad_rec_original_4)) %>% View()
dplyr::mutate(escolaridad_rec_original_2=dplyr::case_when(escolaridad_rec_original_1==escolaridad_rec_original_3 & is.na(escolaridad_rec_original_2)&!is.na(escolaridad_rec_original_3)~escolaridad_rec_original_3,TRUE~escolaridad_rec_original_2)) %>%
# dplyr::filter(!is.na(escolaridad_rec_original_3)) %>% View()
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
##a2))si tiene información en la segunda pero no en la primera, y no es un valor intermedio como secundaria completa (ya que en ese caso puede adoptar más de un valor: más o igual a ese valor), imputarlo
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
dplyr::mutate(escolaridad_rec_original_1=dplyr::case_when(escolaridad_rec_original_2==3~3,
escolaridad_rec_original_2==1~1,
TRUE~escolaridad_rec_original_1)) %>%
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
##a3))si hay más de 2 tratamientos por usuarios, y tiene información en la segunda pero no en la primera, y es un valor intermedio pero tiene un tercer tratamiento con el mismo valor, imputarlo
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
dplyr::mutate(escolaridad_rec_original_1=dplyr::case_when(escolaridad_rec_original_2==2 & escolaridad_rec_original_3==2~3,TRUE~escolaridad_rec_original_1)) %>%
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#medidas para capturar inconsistencias a lo largo de todos los tratamientos de cada usuario
#escolaridad_rec_imputed4 %>% #escolaridad_rec_tot_cond
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
dplyr::mutate(across(c(escolaridad_rec_original_1:escolaridad_rec_original_10),~dplyr::case_when(.==1~1,TRUE~0), .names="1_more_high_{col}")) %>%
dplyr::mutate(across(c(escolaridad_rec_original_1:escolaridad_rec_original_10),~dplyr::case_when(.==2~1,TRUE~0), .names="2_high_{col}")) %>%
dplyr::mutate(across(c(escolaridad_rec_original_1:escolaridad_rec_original_10),~dplyr::case_when(.==3~1,TRUE~0), .names="3_primary_{col}")) %>%
dplyr::mutate(suma_vals_escolaridad_rec_1_more_high = base::rowSums(dplyr::select(., starts_with("1_more_high_")))) %>%
dplyr::mutate(suma_vals_escolaridad_rec_2_high = base::rowSums(dplyr::select(., starts_with("2_high_")))) %>%
dplyr::mutate(suma_vals_escolaridad_rec_3_primary = base::rowSums(dplyr::select(., starts_with("3_primary_"))))
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#IMPUTACIONES
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
escolaridad_rec_imputed5<-
escolaridad_rec_imputed4 %>%
#hacer una suma de más NA's de los que debería tener según la cantidad de tratamientos que tiene la persona
#:#:#:#:#:#:#:#:#:
dplyr::mutate(sum_nas_esc_post=base::rowSums(is.na(dplyr::select(., starts_with("escolaridad_rec_original_")))))%>%
dplyr::mutate(escolaridad_rec_tot_nas_en_medio_post=dplyr::case_when(
(sum_nas_esc_post>10 & treat_per_usr==10)|
(sum_nas_esc_post>1 & treat_per_usr==9)|
(sum_nas_esc_post>2 & treat_per_usr==8)|
(sum_nas_esc_post>3 & treat_per_usr==7)|
(sum_nas_esc_post>4 & treat_per_usr==6)|
(sum_nas_esc_post>5 & treat_per_usr==5)|
(sum_nas_esc_post>6 & treat_per_usr==4)|
(sum_nas_esc_post>7 & treat_per_usr==3)|
(sum_nas_esc_post>8 & treat_per_usr==2)|
(sum_nas_esc_post>9 & treat_per_usr==1)~1,TRUE~0)) %>%
#dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)
#d864967fa0b1c5bb1d4eb5f6a7c8c2c1
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#b0))valor inicial y sólo un tratamiento, se imputa por el valor imputado más frecuente de las 30 bases de datos
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
dplyr::mutate(escolaridad_rec_original_1=dplyr::case_when(
is.na(escolaridad_rec_original_1) & treat_per_usr==1 &
(escolaridad_rec_3_primary_1>escolaridad_rec_2_high_1)&
(escolaridad_rec_2_high_1>escolaridad_rec_3_primary_1)~3,
is.na(escolaridad_rec_original_1) & treat_per_usr==1 &
(escolaridad_rec_2_high_1>escolaridad_rec_3_primary_1)&
(escolaridad_rec_2_high_1>escolaridad_rec_1_more_high_1)~2,
is.na(escolaridad_rec_original_1) & treat_per_usr==1 &
(escolaridad_rec_1_more_high_1>escolaridad_rec_3_primary_1)&
(escolaridad_rec_1_more_high_1>escolaridad_rec_2_high_1)~1,
TRUE~escolaridad_rec_original_1)) %>%
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#b1))valor en el segundo tratamiento es intermedio, inicial se imputa, dependiendo si primaria es mayor que intermedio o no
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
###
#dplyr::filter(is.na(escolaridad_rec_original_1),!is.na(escolaridad_rec_original_2)) %>%
#dplyr::select(escolaridad_rec_original_1,escolaridad_rec_original_2,escolaridad_rec_3_primary_1,escolaridad_rec_2_high_1,escolaridad_rec_1_more_high_1) %>% View()
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
dplyr::mutate(escolaridad_rec_original_1=dplyr::case_when(
is.na(escolaridad_rec_original_1) & escolaridad_rec_original_2==2 & (escolaridad_rec_3_primary_1>escolaridad_rec_2_high_1)~3,
is.na(escolaridad_rec_original_1) & escolaridad_rec_original_2==2 & (escolaridad_rec_3_primary_1<escolaridad_rec_2_high_1)~2,TRUE~escolaridad_rec_original_1))%>%
#dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)
#610dd4dba4dbb62848691b6916828948
#90d581cd11064c41b82f8e4d6ff7b70b
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#b2))Valor final es vacío, hay un valor anterior
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
dplyr::mutate(escolaridad_rec_original_10= dplyr::case_when(
#
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
treat_per_usr==10 & is.na(escolaridad_rec_original_10) & escolaridad_rec_original_9==1~1,
treat_per_usr==10 & is.na(escolaridad_rec_original_10) & escolaridad_rec_original_9==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)
treat_per_usr==10 & is.na(escolaridad_rec_original_10) & escolaridad_rec_original_9==2 &
(escolaridad_rec_1_more_high_10>escolaridad_rec_2_high_10)~1,
treat_per_usr==10 & is.na(escolaridad_rec_original_10) & escolaridad_rec_original_9==2 &
(escolaridad_rec_1_more_high_10<escolaridad_rec_2_high_10)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)
treat_per_usr==10 & is.na(escolaridad_rec_original_10) & escolaridad_rec_original_9==3 &
(escolaridad_rec_1_more_high_10>escolaridad_rec_2_high_10) & (escolaridad_rec_1_more_high_10>escolaridad_rec_3_primary_10)~1,
treat_per_usr==10 & is.na(escolaridad_rec_original_10) & escolaridad_rec_original_9==3 &
(escolaridad_rec_2_high_10 >escolaridad_rec_1_more_high_10) & (escolaridad_rec_2_high_10>escolaridad_rec_3_primary_10)~2,
treat_per_usr==10 & is.na(escolaridad_rec_original_10) & escolaridad_rec_original_9==3 &
(escolaridad_rec_3_primary_10 >escolaridad_rec_2_high_10) & (escolaridad_rec_3_primary_10>escolaridad_rec_1_more_high_10)~2,TRUE~escolaridad_rec_original_10)) %>%
# dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)
#
dplyr::mutate(escolaridad_rec_original_9= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
treat_per_usr==9 & is.na(escolaridad_rec_original_9) & escolaridad_rec_original_8==1~1,
treat_per_usr==9 & is.na(escolaridad_rec_original_9) & escolaridad_rec_original_8==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)
treat_per_usr==9 & is.na(escolaridad_rec_original_9) & escolaridad_rec_original_8==2 &
(escolaridad_rec_1_more_high_9>escolaridad_rec_2_high_9)~1,
treat_per_usr==9 & is.na(escolaridad_rec_original_9) & escolaridad_rec_original_8==2 &
(escolaridad_rec_1_more_high_9<escolaridad_rec_2_high_9)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)
treat_per_usr==9 & is.na(escolaridad_rec_original_9) & escolaridad_rec_original_8==3 &
(escolaridad_rec_1_more_high_9>escolaridad_rec_2_high_9) & (escolaridad_rec_1_more_high_9>escolaridad_rec_3_primary_9)~1,
treat_per_usr==9 & is.na(escolaridad_rec_original_9) & escolaridad_rec_original_8==3 &
(escolaridad_rec_2_high_9 >escolaridad_rec_1_more_high_9) & (escolaridad_rec_2_high_9>escolaridad_rec_3_primary_9)~2,
treat_per_usr==9 & is.na(escolaridad_rec_original_9) & escolaridad_rec_original_8==3 &
(escolaridad_rec_3_primary_9 >escolaridad_rec_2_high_9) & (escolaridad_rec_3_primary_9>escolaridad_rec_1_more_high_9)~2,TRUE~escolaridad_rec_original_9)) %>%
#
dplyr::mutate(escolaridad_rec_original_8= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
treat_per_usr==8 & is.na(escolaridad_rec_original_8) & escolaridad_rec_original_7==1~1,
treat_per_usr==8 & is.na(escolaridad_rec_original_8) & escolaridad_rec_original_7==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)
treat_per_usr==8 & is.na(escolaridad_rec_original_8) & escolaridad_rec_original_7==2 &
(escolaridad_rec_1_more_high_8>escolaridad_rec_2_high_8)~1,
treat_per_usr==8 & is.na(escolaridad_rec_original_8) & escolaridad_rec_original_7==2 &
(escolaridad_rec_1_more_high_8<escolaridad_rec_2_high_8)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)
treat_per_usr==8 & is.na(escolaridad_rec_original_8) & escolaridad_rec_original_7==3 &
(escolaridad_rec_1_more_high_8>escolaridad_rec_2_high_8) & (escolaridad_rec_1_more_high_8>escolaridad_rec_3_primary_8)~1,
treat_per_usr==8 & is.na(escolaridad_rec_original_8) & escolaridad_rec_original_7==3 &
(escolaridad_rec_2_high_8 >escolaridad_rec_1_more_high_8) & (escolaridad_rec_2_high_8>escolaridad_rec_3_primary_8)~2,
treat_per_usr==8 & is.na(escolaridad_rec_original_8) & escolaridad_rec_original_7==3 &
(escolaridad_rec_3_primary_8 >escolaridad_rec_2_high_8) & (escolaridad_rec_3_primary_8>escolaridad_rec_1_more_high_8)~2,TRUE~escolaridad_rec_original_8)) %>%
#
dplyr::mutate(escolaridad_rec_original_7= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
treat_per_usr==7 & is.na(escolaridad_rec_original_7) & escolaridad_rec_original_6==1~1,
treat_per_usr==7 & is.na(escolaridad_rec_original_7) & escolaridad_rec_original_6==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)
treat_per_usr==7 & is.na(escolaridad_rec_original_7) & escolaridad_rec_original_6==2 &
(escolaridad_rec_1_more_high_7>escolaridad_rec_2_high_7)~1,
treat_per_usr==7 & is.na(escolaridad_rec_original_7) & escolaridad_rec_original_6==2 &
(escolaridad_rec_1_more_high_7<escolaridad_rec_2_high_7)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)
treat_per_usr==7 & is.na(escolaridad_rec_original_7) & escolaridad_rec_original_6==3 &
(escolaridad_rec_1_more_high_7>escolaridad_rec_2_high_7) & (escolaridad_rec_1_more_high_7>escolaridad_rec_3_primary_7)~1,
treat_per_usr==7 & is.na(escolaridad_rec_original_7) & escolaridad_rec_original_6==3 &
(escolaridad_rec_2_high_7 >escolaridad_rec_1_more_high_7) & (escolaridad_rec_2_high_7>escolaridad_rec_3_primary_7)~2,
treat_per_usr==7 & is.na(escolaridad_rec_original_7) & escolaridad_rec_original_6==3 &
(escolaridad_rec_3_primary_7 >escolaridad_rec_2_high_7) & (escolaridad_rec_3_primary_7>escolaridad_rec_1_more_high_7)~2,TRUE~escolaridad_rec_original_7)) %>%
#
dplyr::mutate(escolaridad_rec_original_6= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
treat_per_usr==6 & is.na(escolaridad_rec_original_6) & escolaridad_rec_original_5==1~1,
treat_per_usr==6 & is.na(escolaridad_rec_original_6) & escolaridad_rec_original_5==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)
treat_per_usr==6 & is.na(escolaridad_rec_original_6) & escolaridad_rec_original_5==2 &
(escolaridad_rec_1_more_high_6>escolaridad_rec_2_high_6)~1,
treat_per_usr==6 & is.na(escolaridad_rec_original_6) & escolaridad_rec_original_5==2 &
(escolaridad_rec_1_more_high_6<escolaridad_rec_2_high_6)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)
treat_per_usr==6 & is.na(escolaridad_rec_original_6) & escolaridad_rec_original_5==3 &
(escolaridad_rec_1_more_high_6>escolaridad_rec_2_high_6) & (escolaridad_rec_1_more_high_6>escolaridad_rec_3_primary_6)~1,
treat_per_usr==6 & is.na(escolaridad_rec_original_6) & escolaridad_rec_original_5==3 &
(escolaridad_rec_2_high_6 >escolaridad_rec_1_more_high_6) & (escolaridad_rec_2_high_6>escolaridad_rec_3_primary_6)~2,
treat_per_usr==6 & is.na(escolaridad_rec_original_6) & escolaridad_rec_original_5==3 &
(escolaridad_rec_3_primary_6 >escolaridad_rec_2_high_6) & (escolaridad_rec_3_primary_6>escolaridad_rec_1_more_high_6)~2,TRUE~escolaridad_rec_original_6)) %>%
#
dplyr::mutate(escolaridad_rec_original_5= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
treat_per_usr==5 & is.na(escolaridad_rec_original_5) & escolaridad_rec_original_4==1~1,
treat_per_usr==5 & is.na(escolaridad_rec_original_5) & escolaridad_rec_original_4==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)
treat_per_usr==5 & is.na(escolaridad_rec_original_5) & escolaridad_rec_original_4==2 &
(escolaridad_rec_1_more_high_5>escolaridad_rec_2_high_5)~1,
treat_per_usr==5 & is.na(escolaridad_rec_original_5) & escolaridad_rec_original_4==2 &
(escolaridad_rec_1_more_high_5<escolaridad_rec_2_high_5)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)
treat_per_usr==5 & is.na(escolaridad_rec_original_5) & escolaridad_rec_original_4==3 &
(escolaridad_rec_1_more_high_5>escolaridad_rec_2_high_5) & (escolaridad_rec_1_more_high_5>escolaridad_rec_3_primary_5)~1,
treat_per_usr==5 & is.na(escolaridad_rec_original_5) & escolaridad_rec_original_4==3 &
(escolaridad_rec_2_high_5 >escolaridad_rec_1_more_high_5) & (escolaridad_rec_2_high_5>escolaridad_rec_3_primary_5)~2,
treat_per_usr==5 & is.na(escolaridad_rec_original_5) & escolaridad_rec_original_4==3 &
(escolaridad_rec_3_primary_5 >escolaridad_rec_2_high_5) & (escolaridad_rec_3_primary_5>escolaridad_rec_1_more_high_5)~2,TRUE~escolaridad_rec_original_5)) %>%
#
dplyr::mutate(escolaridad_rec_original_4= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
treat_per_usr==4 & is.na(escolaridad_rec_original_4) & escolaridad_rec_original_3==1~1,
treat_per_usr==4 & is.na(escolaridad_rec_original_4) & escolaridad_rec_original_3==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)
treat_per_usr==4 & is.na(escolaridad_rec_original_4) & escolaridad_rec_original_3==2 &
(escolaridad_rec_1_more_high_4>escolaridad_rec_2_high_4)~1,
treat_per_usr==4 & is.na(escolaridad_rec_original_4) & escolaridad_rec_original_3==2 &
(escolaridad_rec_1_more_high_4<escolaridad_rec_2_high_4)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)
treat_per_usr==4 & is.na(escolaridad_rec_original_4) & escolaridad_rec_original_3==3 &
(escolaridad_rec_1_more_high_4>escolaridad_rec_2_high_4) & (escolaridad_rec_1_more_high_4>escolaridad_rec_3_primary_4)~1,
treat_per_usr==4 & is.na(escolaridad_rec_original_4) & escolaridad_rec_original_3==3 &
(escolaridad_rec_2_high_4 >escolaridad_rec_1_more_high_4) & (escolaridad_rec_2_high_4>escolaridad_rec_3_primary_4)~2,
treat_per_usr==4 & is.na(escolaridad_rec_original_4) & escolaridad_rec_original_3==3 &
(escolaridad_rec_3_primary_4 >escolaridad_rec_2_high_4) & (escolaridad_rec_3_primary_4>escolaridad_rec_1_more_high_4)~2,TRUE~escolaridad_rec_original_4)) %>%
#
dplyr::mutate(escolaridad_rec_original_3= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
treat_per_usr==3 & is.na(escolaridad_rec_original_3) & escolaridad_rec_original_3==1~1,
treat_per_usr==3 & is.na(escolaridad_rec_original_3) & escolaridad_rec_original_3==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)
treat_per_usr==3 & is.na(escolaridad_rec_original_3) & escolaridad_rec_original_3==2 &
(escolaridad_rec_1_more_high_3>escolaridad_rec_2_high_3)~1,
treat_per_usr==3 & is.na(escolaridad_rec_original_3) & escolaridad_rec_original_3==2 &
(escolaridad_rec_1_more_high_3<escolaridad_rec_2_high_3)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)
treat_per_usr==3 & is.na(escolaridad_rec_original_3) & escolaridad_rec_original_2==3 &
(escolaridad_rec_1_more_high_3>escolaridad_rec_2_high_3) & (escolaridad_rec_1_more_high_3>escolaridad_rec_3_primary_3)~1,
treat_per_usr==3 & is.na(escolaridad_rec_original_3) & escolaridad_rec_original_2==3 &
(escolaridad_rec_2_high_3 >escolaridad_rec_1_more_high_3) & (escolaridad_rec_2_high_3>escolaridad_rec_3_primary_3)~2,
treat_per_usr==3 & is.na(escolaridad_rec_original_3) & escolaridad_rec_original_2==3 &
(escolaridad_rec_3_primary_3 >escolaridad_rec_2_high_3) & (escolaridad_rec_3_primary_3>escolaridad_rec_1_more_high_3)~2,TRUE~escolaridad_rec_original_3))
#:#:#:#:
# dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)
#:#:#:#:
#comprobar si en verdad calza:
#%>%dplyr::filter(hash_key=="ef4325cda7ddd92f6218bb910c3e0895") %>% dplyr::select(escolaridad_rec_original_1,escolaridad_rec_original_2,treat_per_usr,escolaridad_rec_3_primary_1,escolaridad_rec_2_high_1)
#610dd4dba4dbb62848691b6916828948
#90d581cd11064c41b82f8e4d6ff7b70b
#escolaridad_rec_imputed5 %>%
# dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)%>%dplyr::filter(hash_key=="98d6644d995ea2c8777a683160728004") %>% dplyr::select(escolaridad_rec_original_3,escolaridad_rec_original_4,escolaridad_rec_original_4,treat_per_usr,escolaridad_rec_3_primary_4,escolaridad_rec_2_high_4,escolaridad_rec_1_more_high_4)
#98d6644d995ea2c8777a683160728004
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#b2))Valor final es vacío, hay un valor anterior
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
escolaridad_rec_imputed6<-
escolaridad_rec_imputed5 %>%
#dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)%>%dplyr::filter(hash_key=="98d6644d995ea2c8777a683160728004") %>% dplyr::select(escolaridad_rec_original_4,escolaridad_rec_original_4,treat_per_usr,escolaridad_rec_3_primary_4,escolaridad_rec_2_high_4,escolaridad_rec_1_more_high_3)
dplyr::select(hash_key,starts_with("escolaridad_rec_original_")) %>%
tidyr::pivot_longer(cols = starts_with("escolaridad_rec_original_"),
names_to = "rn",
names_prefix = "escolaridad_rec_original_") %>%
dplyr::filter(!is.na(value)) %>%
dplyr::mutate(hash_rn=paste0(hash_key,"_",rn)) %>%
dplyr::select(hash_rn,value)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_match_miss4<-
CONS_C1_df_dup_SEP_2020_match_miss3 %>%
dplyr::group_by(hash_key) %>%
dplyr::mutate(rn=row_number()) %>%
dplyr::ungroup() %>%
dplyr::mutate(hash_rn=paste0(hash_key,"_",rn)) %>%
dplyr::left_join(escolaridad_rec_imputed6, by=c("hash_rn")) %>%
dplyr::mutate(escolaridad_rec=dplyr::case_when(value==1~"1-More than high school",value==2~"2-Completed high school or less",value==3~"3-Completed primary school or less")) %>%
#
dplyr::arrange(hash_key,rn) %>%
#dplyr::mutate(escolaridad_rec=dplyr::case_when(is.na(escolaridad_rec)~value,TRUE~as.character(escolaridad_rec))) %>%
dplyr::mutate(escolaridad_rec=parse_factor(as.character(escolaridad_rec),levels=c('3-Completed primary school or less', '2-Completed high school or less', '1-More than high school'), ordered =F,trim_ws=T,include_na =F, locale=locale(encoding = "Latin1"))) %>%
dplyr::select(-value,-hash_rn) %>%
data.table()
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
paste("Check inconsistencies with posterior educational attainments (0= No inconsistencies):",CONS_C1_df_dup_SEP_2020_match_miss4 %>%
dplyr::arrange(hash_key,rn) %>%
dplyr::group_by(hash_key) %>%
dplyr::mutate(escolaridad_rec_num=as.numeric(substr(escolaridad_rec, 1, 1)),
sig_escolaridad_rec_num=lead(escolaridad_rec_num),
ant_escolaridad_rec_num=lag(escolaridad_rec_num)) %>%
dplyr::ungroup() %>%
dplyr::filter(escolaridad_rec_num>ant_escolaridad_rec_num) %>%
dplyr::select(hash_key,rn,fech_ing_num, escolaridad_rec, escolaridad_rec_num, sig_escolaridad_rec_num,ant_escolaridad_rec_num) %>%
nrow())
## [1] "Check inconsistencies with posterior educational attainments (0= No inconsistencies): 0"
#4b27553c38e707a6fda01855b784cf66 5a25a197e73f83fea546ac73589928fa b715d04a584dbdd450fd6f2ea68291fc
if(nrow(CONS_C1_df_dup_SEP_2020_match_miss4)-nrow(CONS_C1_df_dup_SEP_2020_match_miss3)>0){
warning("Some rows were added in the imputation")}
We concluded with a total of 270 missing values in educational
attainment (users=267), because the imputed values did not fulfill the
requirements of a progression of the educational attainment (eg., a user
could not respond to have completed secondary school, but then answer
that he/she had completed primary school only), for example, due to ties
in the imputed values or no imputed values.
Marital status
Additionally, we replaced missing values of the marital status
(n=198). Since different marital status were not particularly more
vulnerable between each other, we selected the most frequent imputed
value among the different imputed databases.
# Ver distintos valores propuestos para estado conyugal
estado_conyugal_2_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$estado_conyugal_2,
amelia_fit$imputations$imp2$estado_conyugal_2,
amelia_fit$imputations$imp3$estado_conyugal_2,
amelia_fit$imputations$imp4$estado_conyugal_2,
amelia_fit$imputations$imp5$estado_conyugal_2,
amelia_fit$imputations$imp6$estado_conyugal_2,
amelia_fit$imputations$imp7$estado_conyugal_2,
amelia_fit$imputations$imp8$estado_conyugal_2,
amelia_fit$imputations$imp9$estado_conyugal_2,
amelia_fit$imputations$imp10$estado_conyugal_2,
amelia_fit$imputations$imp11$estado_conyugal_2,
amelia_fit$imputations$imp12$estado_conyugal_2,
amelia_fit$imputations$imp13$estado_conyugal_2,
amelia_fit$imputations$imp14$estado_conyugal_2,
amelia_fit$imputations$imp15$estado_conyugal_2,
amelia_fit$imputations$imp16$estado_conyugal_2,
amelia_fit$imputations$imp17$estado_conyugal_2,
amelia_fit$imputations$imp18$estado_conyugal_2,
amelia_fit$imputations$imp19$estado_conyugal_2,
amelia_fit$imputations$imp20$estado_conyugal_2,
amelia_fit$imputations$imp21$estado_conyugal_2,
amelia_fit$imputations$imp22$estado_conyugal_2,
amelia_fit$imputations$imp23$estado_conyugal_2,
amelia_fit$imputations$imp24$estado_conyugal_2,
amelia_fit$imputations$imp25$estado_conyugal_2,
amelia_fit$imputations$imp26$estado_conyugal_2,
amelia_fit$imputations$imp27$estado_conyugal_2,
amelia_fit$imputations$imp28$estado_conyugal_2,
amelia_fit$imputations$imp29$estado_conyugal_2,
amelia_fit$imputations$imp30$estado_conyugal_2
)
estado_conyugal_2_imputed<-
estado_conyugal_2_imputed %>%
data.frame() %>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Married/Shared living arrangements",as.character(.))~1,TRUE~0), .names="married_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Separated/Divorced",as.character(.))~1,TRUE~0), .names="sep_div_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Single",as.character(.))~1,TRUE~0), .names="singl_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Widower",as.character(.))~1,TRUE~0), .names="widow_{col}"))%>%
dplyr::mutate(estado_conyugal_2_married = base::rowSums(dplyr::select(., starts_with("married_"))))%>%
dplyr::mutate(estado_conyugal_2_sep_div = base::rowSums(dplyr::select(., starts_with("sep_div_"))))%>%
dplyr::mutate(estado_conyugal_2_singl = base::rowSums(dplyr::select(., starts_with("singl_"))))%>%
dplyr::mutate(estado_conyugal_2_wid = base::rowSums(dplyr::select(., starts_with("widow_"))))%>%
#dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_married>0~1,TRUE~0)) %>%
dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_sep_div>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>%
dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_singl>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>%
dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_wid>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>%
janitor::clean_names()
estado_conyugal_2_imputed_cat_est_cony<-
estado_conyugal_2_imputed %>%
tidyr::pivot_longer(c(estado_conyugal_2_married, estado_conyugal_2_sep_div, estado_conyugal_2_singl, estado_conyugal_2_wid), names_to = "cat_est_conyugal", values_to = "count") %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(estado_conyugal_2_imputed_max=max(count,na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::filter(estado_conyugal_2_imputed_max==count) %>%
dplyr::select(amelia_fit_imputations_imp1_row,cat_est_conyugal,count) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(n_row=n()) %>%
dplyr::ungroup() %>%
dplyr::mutate(cat_est_conyugal=dplyr::case_when(n_row>1~NA_character_,
TRUE~cat_est_conyugal)) %>%
dplyr::distinct(amelia_fit_imputations_imp1_row,.keep_all = T)
estado_conyugal_2_imputed<-
estado_conyugal_2_imputed %>%
dplyr::left_join(estado_conyugal_2_imputed_cat_est_cony, by="amelia_fit_imputations_imp1_row") %>%
dplyr::mutate(cat_est_conyugal=dplyr::case_when(cat_est_conyugal=="estado_conyugal_2_married"~"Married/Shared living arrangements",cat_est_conyugal=="estado_conyugal_2_sep_div"~"Separated/Divorced",cat_est_conyugal=="estado_conyugal_2_singl"~"Single",cat_est_conyugal=="estado_conyugal_2_wid"~"Widower"
))%>%
janitor::clean_names()
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_match_miss5<-
CONS_C1_df_dup_SEP_2020_match_miss4 %>%
dplyr::left_join(dplyr::select(estado_conyugal_2_imputed,amelia_fit_imputations_imp1_row,cat_est_conyugal), by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(estado_conyugal_2=factor(dplyr::case_when(is.na(estado_conyugal_2)~as.character(cat_est_conyugal),TRUE~as.character(estado_conyugal_2)))) %>%
data.table()
no_calzaron_estado_cony<-
CONS_C1_df_dup_SEP_2020_match_miss5 %>% dplyr::filter(is.na(estado_conyugal_2)) %>% dplyr::distinct(hash_key) %>% unlist()
if(nrow(CONS_C1_df_dup_SEP_2020_match_miss5)-nrow(CONS_C1_df_dup_SEP_2020_match_miss4)>0){
warning("Some rows were added in the imputation")}
We could not resolve Marital status in 13 cases due to ties in the
most frequent values.
Region & Type of Center (Public)
We looked over possible imputations for region of the center (n=28)
and type of the center (public or private) (n=28).
# Ver distintos valores propuestos para estado conyugal
#evaluacindelprocesoteraputico nombre_region tipo_centro_pub
#no hay información. debemos imputar
no_mostrar=0
if (no_mostrar==1){
tipo_centro_nombre_region_nas_nombre_region<-
CONS_C1_df_dup_SEP_2020 %>%
#dplyr::filter(row %in% unlist(unique(CONS_C1_df_dup_SEP_2020_match[,"row"]))) %>%
dplyr::filter(is.na(nombre_region)) %>%
janitor::tabyl(tipo_centro, nombre_region)
}
nombre_region_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$nombre_region,
amelia_fit$imputations$imp2$nombre_region,
amelia_fit$imputations$imp3$nombre_region,
amelia_fit$imputations$imp4$nombre_region,
amelia_fit$imputations$imp5$nombre_region,
amelia_fit$imputations$imp6$nombre_region,
amelia_fit$imputations$imp7$nombre_region,
amelia_fit$imputations$imp8$nombre_region,
amelia_fit$imputations$imp9$nombre_region,
amelia_fit$imputations$imp10$nombre_region,
amelia_fit$imputations$imp11$nombre_region,
amelia_fit$imputations$imp12$nombre_region,
amelia_fit$imputations$imp13$nombre_region,
amelia_fit$imputations$imp14$nombre_region,
amelia_fit$imputations$imp15$nombre_region,
amelia_fit$imputations$imp16$nombre_region,
amelia_fit$imputations$imp17$nombre_region,
amelia_fit$imputations$imp18$nombre_region,
amelia_fit$imputations$imp19$nombre_region,
amelia_fit$imputations$imp20$nombre_region,
amelia_fit$imputations$imp21$nombre_region,
amelia_fit$imputations$imp22$nombre_region,
amelia_fit$imputations$imp23$nombre_region,
amelia_fit$imputations$imp24$nombre_region,
amelia_fit$imputations$imp25$nombre_region,
amelia_fit$imputations$imp26$nombre_region,
amelia_fit$imputations$imp27$nombre_region,
amelia_fit$imputations$imp28$nombre_region,
amelia_fit$imputations$imp29$nombre_region,
amelia_fit$imputations$imp30$nombre_region
)
nombre_region_imputed<-
nombre_region_imputed %>%
data.frame() %>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Antofagasta",as.character(.))~1,TRUE~0), .names="reg_02_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Araucan",as.character(.))~1,TRUE~0), .names="reg_09_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Arica",as.character(.))~1,TRUE~0), .names="reg_15_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Atacama",as.character(.))~1,TRUE~0), .names="reg_03_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Ays",as.character(.))~1,TRUE~0), .names="reg_11_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Biob",as.character(.))~1,TRUE~0), .names="reg_08_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Coquimbo",as.character(.))~1,TRUE~0), .names="reg_04_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Los Lagos",as.character(.))~1,TRUE~0), .names="reg_10_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Los R",as.character(.))~1,TRUE~0), .names="reg_14_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Magallanes",as.character(.))~1,TRUE~0), .names="reg_12_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Maule",as.character(.))~1,TRUE~0), .names="reg_07_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Metropolitana",as.character(.))~1,TRUE~0), .names="reg_13_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("uble",as.character(.))~1,TRUE~0), .names="reg_16_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Higgins",as.character(.))~1,TRUE~0), .names="reg_06_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Tarapac",as.character(.))~1,TRUE~0), .names="reg_01_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Valpara",as.character(.))~1,TRUE~0), .names="reg_05_{col}"))%>%
dplyr::mutate(nombre_region_02 = base::rowSums(dplyr::select(., starts_with("reg_02_"))))%>%
dplyr::mutate(nombre_region_09 = base::rowSums(dplyr::select(., starts_with("reg_09_"))))%>%
dplyr::mutate(nombre_region_15 = base::rowSums(dplyr::select(., starts_with("reg_15_"))))%>%
dplyr::mutate(nombre_region_03 = base::rowSums(dplyr::select(., starts_with("reg_03_"))))%>%
dplyr::mutate(nombre_region_11 = base::rowSums(dplyr::select(., starts_with("reg_11_"))))%>%
dplyr::mutate(nombre_region_08 = base::rowSums(dplyr::select(., starts_with("reg_08_"))))%>%
dplyr::mutate(nombre_region_04 = base::rowSums(dplyr::select(., starts_with("reg_04_"))))%>%
dplyr::mutate(nombre_region_10 = base::rowSums(dplyr::select(., starts_with("reg_10_"))))%>%
dplyr::mutate(nombre_region_14 = base::rowSums(dplyr::select(., starts_with("reg_14_"))))%>%
dplyr::mutate(nombre_region_12 = base::rowSums(dplyr::select(., starts_with("reg_12_"))))%>%
dplyr::mutate(nombre_region_07 = base::rowSums(dplyr::select(., starts_with("reg_07_"))))%>%
dplyr::mutate(nombre_region_13 = base::rowSums(dplyr::select(., starts_with("reg_13_"))))%>%
dplyr::mutate(nombre_region_16 = base::rowSums(dplyr::select(., starts_with("reg_16_"))))%>%
dplyr::mutate(nombre_region_06 = base::rowSums(dplyr::select(., starts_with("reg_06_"))))%>%
dplyr::mutate(nombre_region_01 = base::rowSums(dplyr::select(., starts_with("reg_01_"))))%>%
dplyr::mutate(nombre_region_05 = base::rowSums(dplyr::select(., starts_with("reg_05_"))))%>%
#dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_02>0~1,TRUE~0)) %>%
dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_09>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>%
dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_15>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>%
dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_03>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>%
dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_11>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>%
dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_08>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>%
dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_04>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>%
dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_10>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>%
dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_14>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>%
dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_12>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>%
dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_07>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>%
dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_13>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>%
dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_16>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>%
dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_06>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>%
dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_01>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>%
dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_05>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>%
janitor::clean_names()
nombre_region_imputed_cat_reg<-
nombre_region_imputed %>%
tidyr::pivot_longer(c(nombre_region_01, nombre_region_02, nombre_region_03, nombre_region_04, nombre_region_05, nombre_region_06, nombre_region_07, nombre_region_08, nombre_region_09, nombre_region_10, nombre_region_11, nombre_region_12, nombre_region_13, nombre_region_14, nombre_region_15), names_to = "cat_nombre_region", values_to = "count") %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(nombre_region_imputed_max=max(count,na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::filter(nombre_region_imputed_max==count) %>%
dplyr::select(amelia_fit_imputations_imp1_row,cat_nombre_region,count) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(n_row=n()) %>%
dplyr::ungroup() %>%
dplyr::mutate(cat_nombre_region=dplyr::case_when(n_row>1~NA_character_,
TRUE~cat_nombre_region)) %>%
dplyr::distinct(amelia_fit_imputations_imp1_row,.keep_all = T)
nombre_region_imputed<-
nombre_region_imputed %>%
dplyr::left_join(nombre_region_imputed_cat_reg, by="amelia_fit_imputations_imp1_row") %>%
dplyr::mutate(cat_nombre_region=dplyr::case_when(cat_nombre_region=="nombre_region_01"~"Tarapacá (01)",cat_nombre_region=="nombre_region_02"~"Antofagasta (02)",cat_nombre_region=="nombre_region_03"~"Atacama (03)",cat_nombre_region=="nombre_region_04"~"Coquimbo (04)",cat_nombre_region=="nombre_region_05"~"Valparaíso (05)",cat_nombre_region=="nombre_region_06"~"O'Higgins (06)",cat_nombre_region=="nombre_region_07"~"Maule (07)",cat_nombre_region=="nombre_region_08"~"Biobío (08)",cat_nombre_region=="nombre_region_09"~"Araucanía (09)",cat_nombre_region=="nombre_region_10"~"Los Lagos (10)",cat_nombre_region=="nombre_region_11"~"Aysén (11)",cat_nombre_region=="nombre_region_12"~"Magallanes (12)",cat_nombre_region=="nombre_region_13"~"Metropolitana (13)",
cat_nombre_region=="nombre_region_14"~"Los Ríos (14)",cat_nombre_region=="nombre_region_15"~"Arica (15)",cat_nombre_region=="nombre_region_16"~"Ñuble (16)",
))%>%
janitor::clean_names()
#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_
tipo_centro_pub_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$tipo_centro_pub,
amelia_fit$imputations$imp2$tipo_centro_pub,
amelia_fit$imputations$imp3$tipo_centro_pub,
amelia_fit$imputations$imp4$tipo_centro_pub,
amelia_fit$imputations$imp5$tipo_centro_pub,
amelia_fit$imputations$imp6$tipo_centro_pub,
amelia_fit$imputations$imp7$tipo_centro_pub,
amelia_fit$imputations$imp8$tipo_centro_pub,
amelia_fit$imputations$imp9$tipo_centro_pub,
amelia_fit$imputations$imp10$tipo_centro_pub,
amelia_fit$imputations$imp11$tipo_centro_pub,
amelia_fit$imputations$imp12$tipo_centro_pub,
amelia_fit$imputations$imp13$tipo_centro_pub,
amelia_fit$imputations$imp14$tipo_centro_pub,
amelia_fit$imputations$imp15$tipo_centro_pub,
amelia_fit$imputations$imp16$tipo_centro_pub,
amelia_fit$imputations$imp17$tipo_centro_pub,
amelia_fit$imputations$imp18$tipo_centro_pub,
amelia_fit$imputations$imp19$tipo_centro_pub,
amelia_fit$imputations$imp20$tipo_centro_pub,
amelia_fit$imputations$imp21$tipo_centro_pub,
amelia_fit$imputations$imp22$tipo_centro_pub,
amelia_fit$imputations$imp23$tipo_centro_pub,
amelia_fit$imputations$imp24$tipo_centro_pub,
amelia_fit$imputations$imp25$tipo_centro_pub,
amelia_fit$imputations$imp26$tipo_centro_pub,
amelia_fit$imputations$imp27$tipo_centro_pub,
amelia_fit$imputations$imp28$tipo_centro_pub,
amelia_fit$imputations$imp29$tipo_centro_pub,
amelia_fit$imputations$imp30$tipo_centro_pub
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::filter(value==TRUE) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(tipo_centro_pub_to_imputation=ifelse(n()>15,1,0))
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_match_miss6<-
CONS_C1_df_dup_SEP_2020_match_miss5 %>%
dplyr::left_join(dplyr::select(nombre_region_imputed,amelia_fit_imputations_imp1_row,cat_nombre_region), by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(nombre_region=factor(dplyr::case_when(is.na(nombre_region)~as.character(cat_nombre_region),TRUE~as.character(nombre_region)))) %>%
dplyr::left_join(dplyr::select(tipo_centro_pub_imputed,amelia_fit_imputations_imp1_row,tipo_centro_pub_to_imputation), by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(tipo_centro_pub=factor(dplyr::case_when(is.na(tipo_centro_pub)~as.logical(tipo_centro_pub_to_imputation),TRUE~as.logical(tipo_centro_pub)))) %>%
dplyr::select(-c(cat_est_conyugal,cat_nombre_region,tipo_centro_pub_to_imputation,tipo_centro_pub_to_imputation)) %>%
data.table()
#CONS_C1_df_dup_SEP_2020_match_miss6
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$tipo_centro_pub))
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$nombre_region))
if(nrow(CONS_C1_df_dup_SEP_2020_match_miss6)-nrow(CONS_C1_df_dup_SEP_2020_match_miss5)>0){
warning("Some rows were added in the imputation")}
It was impossible to impute region of the center in 6 cases due to
ties in the different imputed values. In the case of public or private
center, all values were imputed
Drug Dependence Diagnosis
We looked over possible imputations to the diagnosis of drug
consumption (n=1).
# Ver distintos valores propuestos para estado conyugal
#evaluacindelprocesoteraputico nombre_region tipo_centro_pub
dg_trs_cons_sus_or_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$dg_trs_cons_sus_or,
amelia_fit$imputations$imp2$dg_trs_cons_sus_or,
amelia_fit$imputations$imp3$dg_trs_cons_sus_or,
amelia_fit$imputations$imp4$dg_trs_cons_sus_or,
amelia_fit$imputations$imp5$dg_trs_cons_sus_or,
amelia_fit$imputations$imp6$dg_trs_cons_sus_or,
amelia_fit$imputations$imp7$dg_trs_cons_sus_or,
amelia_fit$imputations$imp8$dg_trs_cons_sus_or,
amelia_fit$imputations$imp9$dg_trs_cons_sus_or,
amelia_fit$imputations$imp10$dg_trs_cons_sus_or,
amelia_fit$imputations$imp11$dg_trs_cons_sus_or,
amelia_fit$imputations$imp12$dg_trs_cons_sus_or,
amelia_fit$imputations$imp13$dg_trs_cons_sus_or,
amelia_fit$imputations$imp14$dg_trs_cons_sus_or,
amelia_fit$imputations$imp15$dg_trs_cons_sus_or,
amelia_fit$imputations$imp16$dg_trs_cons_sus_or,
amelia_fit$imputations$imp17$dg_trs_cons_sus_or,
amelia_fit$imputations$imp18$dg_trs_cons_sus_or,
amelia_fit$imputations$imp19$dg_trs_cons_sus_or,
amelia_fit$imputations$imp20$dg_trs_cons_sus_or,
amelia_fit$imputations$imp21$dg_trs_cons_sus_or,
amelia_fit$imputations$imp22$dg_trs_cons_sus_or,
amelia_fit$imputations$imp23$dg_trs_cons_sus_or,
amelia_fit$imputations$imp24$dg_trs_cons_sus_or,
amelia_fit$imputations$imp25$dg_trs_cons_sus_or,
amelia_fit$imputations$imp26$dg_trs_cons_sus_or,
amelia_fit$imputations$imp27$dg_trs_cons_sus_or,
amelia_fit$imputations$imp28$dg_trs_cons_sus_or,
amelia_fit$imputations$imp29$dg_trs_cons_sus_or,
amelia_fit$imputations$imp30$dg_trs_cons_sus_or
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::filter(value==TRUE) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(dg_trs_cons_imputation=ifelse(n()>15,1,0))
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_match_miss7<-
CONS_C1_df_dup_SEP_2020_match_miss6 %>%
dplyr::left_join(dplyr::select(dg_trs_cons_sus_or_imputed,amelia_fit_imputations_imp1_row,dg_trs_cons_imputation), by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(dg_trs_cons_sus_or=factor(dplyr::case_when(is.na(dg_trs_cons_sus_or)~as.logical(dg_trs_cons_imputation),TRUE~as.logical(dg_trs_cons_sus_or)))) %>%
dplyr::select(-dg_trs_cons_imputation) %>%
data.table()
#CONS_C1_df_dup_SEP_2020_match_miss6
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$tipo_centro_pub))
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$nombre_region))
if(nrow(CONS_C1_df_dup_SEP_2020_match_miss7)-nrow(CONS_C1_df_dup_SEP_2020_match_miss6)>0){
warning("Some rows were added in the imputation")}
As a result of the imputations, there were no missing values.
Cause of Discharge
We looked over possible imputations to the truly missing values,
discarding missing values due to censorship (n=20).
motivo_de_egreso_a_imputar<-
CONS_C1_df_dup_SEP_2020_match_miss %>% dplyr::filter(is.na(motivodeegreso_mod_imp)) %>% dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,fech_egres_imp)) %>% dplyr::filter(!is.na(fech_egres_imp))%>%dplyr::select(row)
motivodeegreso_mod_imp_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$motivodeegreso_mod_imp,
amelia_fit$imputations$imp2$motivodeegreso_mod_imp,
amelia_fit$imputations$imp3$motivodeegreso_mod_imp,
amelia_fit$imputations$imp4$motivodeegreso_mod_imp,
amelia_fit$imputations$imp5$motivodeegreso_mod_imp,
amelia_fit$imputations$imp6$motivodeegreso_mod_imp,
amelia_fit$imputations$imp7$motivodeegreso_mod_imp,
amelia_fit$imputations$imp8$motivodeegreso_mod_imp,
amelia_fit$imputations$imp9$motivodeegreso_mod_imp,
amelia_fit$imputations$imp10$motivodeegreso_mod_imp,
amelia_fit$imputations$imp11$motivodeegreso_mod_imp,
amelia_fit$imputations$imp12$motivodeegreso_mod_imp,
amelia_fit$imputations$imp13$motivodeegreso_mod_imp,
amelia_fit$imputations$imp14$motivodeegreso_mod_imp,
amelia_fit$imputations$imp15$motivodeegreso_mod_imp,
amelia_fit$imputations$imp16$motivodeegreso_mod_imp,
amelia_fit$imputations$imp17$motivodeegreso_mod_imp,
amelia_fit$imputations$imp18$motivodeegreso_mod_imp,
amelia_fit$imputations$imp19$motivodeegreso_mod_imp,
amelia_fit$imputations$imp20$motivodeegreso_mod_imp,
amelia_fit$imputations$imp21$motivodeegreso_mod_imp,
amelia_fit$imputations$imp22$motivodeegreso_mod_imp,
amelia_fit$imputations$imp23$motivodeegreso_mod_imp,
amelia_fit$imputations$imp24$motivodeegreso_mod_imp,
amelia_fit$imputations$imp25$motivodeegreso_mod_imp,
amelia_fit$imputations$imp26$motivodeegreso_mod_imp,
amelia_fit$imputations$imp27$motivodeegreso_mod_imp,
amelia_fit$imputations$imp28$motivodeegreso_mod_imp,
amelia_fit$imputations$imp29$motivodeegreso_mod_imp,
amelia_fit$imputations$imp30$motivodeegreso_mod_imp
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::ungroup() %>%
dplyr::filter(amelia_fit_imputations_imp1_row %in% unlist(motivo_de_egreso_a_imputar$row)) %>%
#FILTRAR CASOS QUE SON ILÓGICOS: MUERTES CON TRATAMIENTOS POSTERIORES (1)
dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,motivodeegreso_mod_imp, fech_egres_imp,dup, duplicates_filtered,evaluacindelprocesoteraputico,fech_ing_next_treat),by=c("amelia_fit_imputations_imp1_row"="row")) %>%
dplyr::mutate(value_death=dplyr::case_when(value=="Death"& !is.na(fech_ing_next_treat)~1,TRUE~0)) %>%
dplyr::filter(value_death!=1) %>%
#:#:#:#:#:
dplyr::count(amelia_fit_imputations_imp1_row,value) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::slice_min(n, n = 1) %>%
dplyr::summarise(adm_dis=sum(value == "Administrative discharge",na.rm=T),
death=sum(value == "Death",na.rm=T),
referral=sum(value == "Referral to another treatment",na.rm=T),
ter_dis=sum(value == "Therapeutic discharge",na.rm=T),
dropout=sum(value =="Drop-out",na.rm=T)) %>%
rowwise() %>%
dplyr::mutate(ties=sum(c_across(adm_dis:dropout)),ties=ifelse(ties>1,1,0)) %>%
#dplyr::filter(ties==1) %>%
dplyr::ungroup() %>%
dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,motivodeegreso_mod_imp, fech_egres_imp,fech_egres_num,dup, duplicates_filtered,evaluacindelprocesoteraputico,tipo_centro_derivacion),by=c("amelia_fit_imputations_imp1_row"="row")) %>%
dplyr::mutate(motivodeegreso_mod_imp_imputation= dplyr::case_when(
ties==0 & adm_dis==1 & fech_egres_imp<"2019-11-13"~"Administrative discharge",
#its an absorbing state. should not have posterior treatments
ties==0 & death==1 & fech_egres_imp<"2019-11-13" & dup==duplicates_filtered~"Death",
ties==0 & referral==1 & fech_egres_imp<"2019-11-13"~"Referral to another treatment",
ties==0 & ter_dis==1 & fech_egres_imp<"2019-11-13"~"Therapeutic discharge",
ties==0 & dropout==1 & fech_egres_imp<"2019-11-13"~"Drop-out",
#si no hay fecha de egreso, está en la fecha de censura, sólo puede ser tratamiento en curso
fech_egres_imp>="2019-11-13"~NA_character_,
TRUE~NA_character_)) %>%
#si tiene evaluacindelprocesoteraputico, es porque no es un tratamiento en curso
dplyr::rename("motivodeegreso_mod_imp_original"="motivodeegreso_mod_imp")
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:
CONS_C1_df_dup_SEP_2020_match_miss8<-
CONS_C1_df_dup_SEP_2020_match_miss7 %>%
dplyr::left_join(motivodeegreso_mod_imp_imputed[,c("amelia_fit_imputations_imp1_row","motivodeegreso_mod_imp_original","fech_egres_imp","fech_egres_num","motivodeegreso_mod_imp_imputation")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
#dplyr::filter(is.na(motivodeegreso_mod_imp)) %>% dplyr::select(row,hash_key,motivodeegreso_mod_imp_original, motivodeegreso_mod_imp_imputation,motivodeegreso_mod_imp,fech_egres_num,fech_egres_imp)
dplyr::mutate(motivodeegreso_mod_imp=factor(dplyr::case_when(is.na(motivodeegreso_mod_imp)~motivodeegreso_mod_imp_imputation,
motivodeegreso_mod_imp_original=="Ongoing treatment"~NA_character_, TRUE~as.character(motivodeegreso_mod_imp)))) %>%
dplyr::select(-motivodeegreso_mod_imp_imputation,-fech_egres_imp,-fech_egres_num,-motivodeegreso_mod_imp_original) %>%
#dplyr::rename_all( list(~paste0(., ".left"))) %>%
dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,motivodeegreso_mod_imp) %>%
dplyr::rename("motivodeegreso_mod_imp_original"="motivodeegreso_mod_imp"),by="row") %>%
data.table()
# CONS_C1_df_dup_SEP_2020_match_miss8 %>% janitor::tabyl(motivodeegreso_mod_imp,motivodeegreso_mod_imp_original)
#CONS_C1_df_dup_SEP_2020_match_miss8 %>% janitor::tabyl(motivodeegreso_mod_imp_original)
#
if(
CONS_C1_df_dup_SEP_2020_match_miss8 %>% dplyr::filter(motivodeegreso_mod_imp_original!="Ongoing treatment",is.na(motivodeegreso_mod_imp)) %>% nrow()>0){"There are missing values on the cause of discharge"}
if(nrow(CONS_C1_df_dup_SEP_2020_match_miss8)-nrow(CONS_C1_df_dup_SEP_2020_match_miss7)>0){
warning("Some rows were added in the imputation")}
A total of 2 cases were not imputed due to ties in the imputed
values.
Evaluation of the Therapeutic Process
Another variable that is worth imputing is the Evaluation of the
Therapeutic Process at Discharge (n= 7,378). In case of ties, we
selected the imputed values with the value with the minimum evaluation.
It must be considered that most of the null values could be explained by
censoring or by not completing the treatment within the period of the
study (n= 7,361).
# Ver distintos valores propuestos para sustancia de inciio
evaluacindelprocesoteraputico_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp2$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp3$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp4$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp5$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp6$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp7$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp8$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp9$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp10$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp11$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp12$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp13$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp14$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp15$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp16$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp17$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp18$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp19$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp20$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp21$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp22$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp23$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp24$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp25$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp26$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp27$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp28$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp29$evaluacindelprocesoteraputico,
amelia_fit$imputations$imp30$evaluacindelprocesoteraputico
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(high_ach_1=sum(value == "1-High Achievement",na.rm=T),
med_ach_2=sum(value == "2-Medium Achievement",na.rm=T),
min_ach_3=sum(value =="3-Minimum Achievement",na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::mutate(evaluacindelprocesoteraputico_imputation= dplyr::case_when(
(high_ach_1 >med_ach_2) & (med_ach_2 >min_ach_3)~"1-High Achievement",
(med_ach_2>high_ach_1) & (med_ach_2 >min_ach_3)~"2-Medium Achievement",
(min_ach_3>med_ach_2) & (min_ach_3 >high_ach_1)~"3-Minimum Achievement"))
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
##
#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(motivodeegreso_mod_imp,evaluacindelprocesoteraputico)
CONS_C1_df_dup_SEP_2020_match_miss9<-
CONS_C1_df_dup_SEP_2020_match_miss8 %>%
dplyr::left_join(evaluacindelprocesoteraputico_imputed[,c("amelia_fit_imputations_imp1_row","evaluacindelprocesoteraputico_imputation")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(evaluacindelprocesoteraputico=factor(dplyr::case_when(is.na(evaluacindelprocesoteraputico) & motivodeegreso_mod_imp %in% c("Drop-out","Administrative discharge","Therapeutic discharge","Referral to another treatment")~evaluacindelprocesoteraputico_imputation,
is.na(motivodeegreso_mod_imp)~NA_character_,
TRUE~as.character(evaluacindelprocesoteraputico)))) %>%
dplyr::mutate(evaluacindelprocesoteraputico=parse_factor(as.character(evaluacindelprocesoteraputico),levels=c('1-High Achievement', '2-Medium Achievement','3-Minimum Achievement'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "UTF-8"))) %>%
dplyr::select(-evaluacindelprocesoteraputico_imputation) %>%
data.table()
CONS_C1_df_dup_SEP_2020_match_miss9 %>% janitor::tabyl(motivodeegreso_mod_imp,evaluacindelprocesoteraputico) %>%
knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
caption = paste0("Table 2. Cause of Discharge vs. Evaluation of the Therapeutic Procress"),
col.names = c("Cause of Discharge","1-High Achievement", "2- Medium Achievement","3- Minimum Achievement","Null Values"),
align =rep('c', 101)) %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 9) %>%
kableExtra::scroll_box(width = "100%", height = "375px")
Table 2. Cause of Discharge vs. Evaluation of the Therapeutic Procress
Cause of Discharge
|
1-High Achievement
|
2- Medium Achievement
|
3- Minimum Achievement
|
Null Values
|
Administrative discharge
|
865
|
4,428
|
4,490
|
0
|
Death
|
0
|
0
|
1
|
0
|
Drop-out
|
1,767
|
16,838
|
37,301
|
0
|
Referral to another treatment
|
1,299
|
5,834
|
4,704
|
0
|
Therapeutic discharge
|
17,120
|
6,136
|
1,120
|
0
|
NA
|
0
|
0
|
0
|
7,853
|
if(nrow(CONS_C1_df_dup_SEP_2020_match_miss9)-nrow(CONS_C1_df_dup_SEP_2020_match_miss8)>0){
warning("Some rows were added in the imputation")}
As a result of the imputations, some values were not possible to
impute (n=7853).
As seen in the table above, ongoing treatments did not have an
evaluation process, which is logically valid, since their treatment
competition was not captured.
Treatment Setting (Residential)
We looked over possible imputations to the treatment setting
(n=97).
# Ver distintos valores propuestos para estado conyugal
#evaluacindelprocesoteraputico nombre_region tipo_centro_pub
tipo_de_plan_res_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$tipo_de_plan_res,
amelia_fit$imputations$imp2$tipo_de_plan_res,
amelia_fit$imputations$imp3$tipo_de_plan_res,
amelia_fit$imputations$imp4$tipo_de_plan_res,
amelia_fit$imputations$imp5$tipo_de_plan_res,
amelia_fit$imputations$imp6$tipo_de_plan_res,
amelia_fit$imputations$imp7$tipo_de_plan_res,
amelia_fit$imputations$imp8$tipo_de_plan_res,
amelia_fit$imputations$imp9$tipo_de_plan_res,
amelia_fit$imputations$imp10$tipo_de_plan_res,
amelia_fit$imputations$imp11$tipo_de_plan_res,
amelia_fit$imputations$imp12$tipo_de_plan_res,
amelia_fit$imputations$imp13$tipo_de_plan_res,
amelia_fit$imputations$imp14$tipo_de_plan_res,
amelia_fit$imputations$imp15$tipo_de_plan_res,
amelia_fit$imputations$imp16$tipo_de_plan_res,
amelia_fit$imputations$imp17$tipo_de_plan_res,
amelia_fit$imputations$imp18$tipo_de_plan_res,
amelia_fit$imputations$imp19$tipo_de_plan_res,
amelia_fit$imputations$imp20$tipo_de_plan_res,
amelia_fit$imputations$imp21$tipo_de_plan_res,
amelia_fit$imputations$imp22$tipo_de_plan_res,
amelia_fit$imputations$imp23$tipo_de_plan_res,
amelia_fit$imputations$imp24$tipo_de_plan_res,
amelia_fit$imputations$imp25$tipo_de_plan_res,
amelia_fit$imputations$imp26$tipo_de_plan_res,
amelia_fit$imputations$imp27$tipo_de_plan_res,
amelia_fit$imputations$imp28$tipo_de_plan_res,
amelia_fit$imputations$imp29$tipo_de_plan_res,
amelia_fit$imputations$imp30$tipo_de_plan_res
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(n_res=sum(value=="1",na.rm=T),n_amb=sum(value=="0",na.rm=T))
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_match_miss10<-
CONS_C1_df_dup_SEP_2020_match_miss9 %>%
dplyr::left_join(dplyr::select(tipo_de_plan_res_imputed,amelia_fit_imputations_imp1_row,n_res,n_amb), by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(tipo_de_plan_res=factor(dplyr::case_when(is.na(tipo_de_plan_res)& (n_res>n_amb)~"1",is.na(tipo_de_plan_res)& (n_res<n_amb)~"0",TRUE~as.character(tipo_de_plan_res)))) %>%
dplyr::select(-n_res,-n_amb) %>%
data.table()
if(nrow(CONS_C1_df_dup_SEP_2020_match_miss10)-nrow(CONS_C1_df_dup_SEP_2020_match_miss9)>0){
warning("Some rows were added in the imputation")}
As a result of the process of imputation, some values were not
possible to impute (n=5).
Tenure status of households
Another variable that is worth imputing is the Tenure status of
households (n= 6,319). In case of ties, we selected the imputed values
with the value with the minimum involvement. In case of ties, we kept
what we thought was the most vulnerable value (discarding “Owner” or
“Renting” values).
tenencia_de_la_vivienda_mod_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp2$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp3$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp4$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp5$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp6$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp7$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp8$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp9$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp10$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp11$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp12$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp13$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp14$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp15$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp16$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp17$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp18$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp19$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp20$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp21$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp22$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp23$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp24$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp25$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp26$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp27$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp28$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp29$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp30$tenencia_de_la_vivienda_mod
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row, value) %>%
tally() %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::top_n(1,n) %>%
dplyr::ungroup()
tenencia_de_la_vivienda_mod_imputed_dup<-
tenencia_de_la_vivienda_mod_imputed %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(num=n()) %>%
dplyr::filter(num>1) %>%
dplyr::ungroup() %>%
#1) owner, discard if it is in the maximum
dplyr::mutate(n=dplyr::case_when(value=="Owner/Transferred dwellings/Pays Dividends"~0,T~as.numeric(n))) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::top_n(1,n) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
#2) Renting vs. stays temporarily with a relative, keep the second
dplyr::mutate(n=dplyr::case_when(value=="Renting"~0,T~as.numeric(n))) %>%
dplyr::top_n(1,n) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(n_dup=n())
missing_values_to_impute_tenure_hshld<-
tenencia_de_la_vivienda_mod_imputed %>%
dplyr::left_join(tenencia_de_la_vivienda_mod_imputed_dup, by=c("amelia_fit_imputations_imp1_row", "value")) %>%
#si es vacío, y no está en la base, es valor 0 (es difícil que)
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(sum= suppressWarnings(max(num, na.rm=T))) %>%
dplyr::ungroup() %>%
#descarto los que presentaron más de un valor para una misma fila y aquellos que no fueron seleccionados
dplyr::mutate(descartar=dplyr::case_when(sum>1 & is.na(n.y)~1,T~0)) %>%
dplyr::filter(descartar==0) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(n_dup_row=n()) %>%
dplyr::ungroup() %>%
dplyr::filter(n_dup_row>1) %>%
dplyr::distinct(amelia_fit_imputations_imp1_row)
tenencia_de_la_vivienda_mod_imputed_final<-
tenencia_de_la_vivienda_mod_imputed %>%
dplyr::left_join(tenencia_de_la_vivienda_mod_imputed_dup, by=c("amelia_fit_imputations_imp1_row", "value")) %>%
#si es vacío, y no está en la base, es valor 0 (es difícil que)
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(sum= suppressWarnings(max(num, na.rm=T))) %>%
dplyr::ungroup() %>%
#descarto los que presentaron más de un valor para una misma fila y aquellos que no fueron seleccionados
dplyr::mutate(descartar=dplyr::case_when(sum>1 & is.na(n.y)~1,T~0)) %>%
dplyr::filter(descartar==0) %>%
dplyr::filter(!amelia_fit_imputations_imp1_row %in% as.character(unlist(missing_values_to_impute_tenure_hshld)))
ifelse(nrow(tenencia_de_la_vivienda_mod_imputed_final)/length(unique(CONS_C1_df_dup_SEP_2020_match_miss10$row))>1,
"There are still more than one value in the imputation","")
## [1] ""
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(motivodeegreso_mod_imp,evaluacindelprocesoteraputico)
CONS_C1_df_dup_SEP_2020_match_miss11<-
CONS_C1_df_dup_SEP_2020_match_miss10 %>%
dplyr::left_join(tenencia_de_la_vivienda_mod_imputed_final[,c("amelia_fit_imputations_imp1_row","value")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(tenencia_de_la_vivienda_mod=factor(dplyr::case_when(is.na(tenencia_de_la_vivienda_mod) ~value,
TRUE~as.character(tenencia_de_la_vivienda_mod)))) %>%
dplyr::select(-value) %>%
data.table()
if(nrow(CONS_C1_df_dup_SEP_2020_match_miss11)-nrow(CONS_C1_df_dup_SEP_2020_match_miss10)>0){
warning("AGS: Some rows were added in the imputation")}
As a result of the imputations, some values were not possible to
impute (n=4).
Primary or main substance
Then we imputed the primary/main substance at admission (n= 1).
# Ver distintos valores propuestos para sustancia de inciio
sus_principal_mod_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$sus_principal_mod,
amelia_fit$imputations$imp2$sus_principal_mod,
amelia_fit$imputations$imp3$sus_principal_mod,
amelia_fit$imputations$imp4$sus_principal_mod,
amelia_fit$imputations$imp5$sus_principal_mod,
amelia_fit$imputations$imp6$sus_principal_mod,
amelia_fit$imputations$imp7$sus_principal_mod,
amelia_fit$imputations$imp8$sus_principal_mod,
amelia_fit$imputations$imp9$sus_principal_mod,
amelia_fit$imputations$imp10$sus_principal_mod,
amelia_fit$imputations$imp11$sus_principal_mod,
amelia_fit$imputations$imp12$sus_principal_mod,
amelia_fit$imputations$imp13$sus_principal_mod,
amelia_fit$imputations$imp14$sus_principal_mod,
amelia_fit$imputations$imp15$sus_principal_mod,
amelia_fit$imputations$imp16$sus_principal_mod,
amelia_fit$imputations$imp17$sus_principal_mod,
amelia_fit$imputations$imp18$sus_principal_mod,
amelia_fit$imputations$imp19$sus_principal_mod,
amelia_fit$imputations$imp20$sus_principal_mod,
amelia_fit$imputations$imp21$sus_principal_mod,
amelia_fit$imputations$imp22$sus_principal_mod,
amelia_fit$imputations$imp23$sus_principal_mod,
amelia_fit$imputations$imp24$sus_principal_mod,
amelia_fit$imputations$imp25$sus_principal_mod,
amelia_fit$imputations$imp26$sus_principal_mod,
amelia_fit$imputations$imp27$sus_principal_mod,
amelia_fit$imputations$imp28$sus_principal_mod,
amelia_fit$imputations$imp29$sus_principal_mod,
amelia_fit$imputations$imp30$sus_principal_mod
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(sus_prin_mar=sum(value == "Marijuana",na.rm=T),
sus_prin_oh=sum(value == "Alcohol",na.rm=T),
sus_prin_pb=sum(value == "Cocaine paste",na.rm=T),
sus_prin_coc=sum(value =="Cocaine hydrochloride",na.rm=T),
sus_prin_other=sum(value =="Other",na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::mutate(ties= base::rowSums(dplyr::select(.,starts_with("sus_prin_"))>0)) %>%
dplyr::mutate(sus_principal_mod_imp= dplyr::case_when(
(sus_prin_mar> sus_prin_oh)& (sus_prin_mar> sus_prin_pb)& (sus_prin_mar> sus_prin_coc)& (sus_prin_mar> sus_prin_other)~"Marijuana",
(sus_prin_oh> sus_prin_mar)& (sus_prin_oh> sus_prin_pb)& (sus_prin_oh> sus_prin_coc)& (sus_prin_oh> sus_prin_other)~"Alcohol",
(sus_prin_pb> sus_prin_mar)& (sus_prin_pb> sus_prin_oh)& (sus_prin_pb> sus_prin_coc)& (sus_prin_pb> sus_prin_other)~"Cocaine paste",
(sus_prin_coc> sus_prin_mar)& (sus_prin_coc> sus_prin_oh)& (sus_prin_coc> sus_prin_pb)& (sus_prin_coc> sus_prin_other)~"Cocaine hydrochloride",
(sus_prin_other> sus_prin_mar)& (sus_prin_other> sus_prin_oh)& (sus_prin_other> sus_prin_pb)& (sus_prin_other> sus_prin_coc)~"Cocaine hydrochloride"
))
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_match_miss12<-
CONS_C1_df_dup_SEP_2020_match_miss11 %>%
dplyr::left_join(sus_principal_mod_imputed, by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(sus_principal_mod=factor(dplyr::case_when(is.na(sus_principal_mod)~as.character(sus_principal_mod_imp),
TRUE~as.character(sus_principal_mod)))) %>%
dplyr::select(-c(sus_prin_mar, sus_prin_oh, sus_prin_pb, sus_prin_coc, sus_prin_other, ties, sus_principal_mod_imp)) %>%
data.table()
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
if(nrow(CONS_C1_df_dup_SEP_2020_match_miss12)-nrow(CONS_C1_df_dup_SEP_2020_match_miss11)>0){
warning("Some rows were added in the imputation")}
As a result of the imputations, there were no missing values.
Sample Characteristics
We checked the characteristics of the sample depending on type of
treatment (Residential or Ambulatory/Outpatients).
# -sus_ini_mod_mvv,-estado_conyugal_2,-escolaridad_rec,-freq_cons_sus_prin,-nombre_region,-tipo_centro_pub,-evaluacindelprocesoteraputico,-motivodeegreso_mod_imp,-dg_trs_cons_sus_or,-tipo_de_plan_res,-edad_ini_cons,-via_adm_sus_prin_act
#$109,756
#añado los imputados
CONS_C1_df_dup_SEP_2020_match_miss_after_imp<-
CONS_C1_df_dup_SEP_2020_match_miss%>%
dplyr::select(-unlist(dplyr::filter(data.table::as.data.table(t(missing.values),keep.rownames = T),V1>0)[,rn])) %>% #
dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020_match_miss12,
row,
unlist(dplyr::filter(data.table::as.data.table(t(missing.values),keep.rownames = T),V1>0)[,rn])),by="row") %>%
dplyr::arrange(tipo_de_plan_res,hash_key,row) %>%
#elimino esta variable porque es accesoria
# dplyr::select(-edad_ini_sus_prin) %>%
#para transformar el motivo de egreso
dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,fech_egres_num,dias_treat_imp_sin_na),by="row") %>%
#dplyr::filter(fech_egres_num==18213,!is.na(motivodeegreso_mod_imp)) %>%
dplyr::mutate(motivodeegreso_mod_imp=dplyr::case_when(dias_treat_imp_sin_na>=90 & motivodeegreso_mod_imp=="Drop-out"~ "Late Drop-out",
dias_treat_imp_sin_na<90 & motivodeegreso_mod_imp=="Drop-out"~ "Early Drop-out",
fech_egres_num==18213 & is.na(motivodeegreso_mod_imp)~"Ongoing treatment",
TRUE~as.character(motivodeegreso_mod_imp)
)) %>% #janitor::tabyl(motivodeegreso_mod_imp)
#Temporary variable to cover missing cases but not consider ongoing treatments
dplyr::mutate(evaluacindelprocesoteraputico2=dplyr::case_when(fech_egres_num==18213 & is.na(evaluacindelprocesoteraputico)~"Ongoing treatment",
TRUE~as.character(evaluacindelprocesoteraputico)
)) %>%
dplyr::mutate(sum_miss = base::rowSums(is.na(dplyr::select(.,c("sus_ini_mod_mvv","sus_principal_mod","estado_conyugal_2","escolaridad_rec","freq_cons_sus_prin","nombre_region","tipo_centro_pub","motivodeegreso_mod_imp","dg_trs_cons_sus_or","tipo_de_plan_res","edad_ini_cons","sexo_2","edad_al_ing","fech_ing_num","tenencia_de_la_vivienda_mod"))))) %>%
dplyr::group_by(hash_key) %>%
dplyr::mutate(sum_miss=sum(sum_miss)) %>%
dplyr::ungroup()
CONS_C1_df_dup_SEP_2020_match_miss_after_imp_descartados <-
CONS_C1_df_dup_SEP_2020_match_miss_after_imp %>%
dplyr::filter(sum_miss>0)
CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados <-
CONS_C1_df_dup_SEP_2020_match_miss_after_imp %>%
dplyr::filter(sum_miss==0) %>%
dplyr::select(-sum_miss) %>%
#dplyr::left_join(CONS_C1_df_dup_SEP_2020[c("row","condicion_ocupacional_corr")], by="row") %>%
dplyr::select(-evaluacindelprocesoteraputico2)
Considering that some missing values were not able to be imputed (due
to ties in the candidate values for imputation or inconsistent values
for imputations) (385, users=307), we ended the process with a total of
109,371 complete cases (users=84,741).
kableone <- function(x, caption=NULL, col.names=NA, smd=T, test=T, varLabels=T, noSpaces=T, printToggle=T, dropEqual=F, ...) {
capture.output(x <- print(x, smd=T, test=test, varLabels=varLabels,noSpaces=noSpaces, printToggle=printToggle, dropEqual=dropEqual, ...))
knitr::kable(x,format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption= caption, col.names= col.names)
}
match.on.sel<- match.on_tot[!match.on_tot %in% c("row","hash_key","more_one_treat","duplicates_filtered","abandono_temprano_rec","tipo_de_plan_res","evaluacindelprocesoteraputico")]
match.on.sel<-c("sus_principal_mod","sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","edad_ini_cons","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","nombre_region","dg_trs_cons_sus_or", "tipo_centro_pub","sexo_2","edad_al_ing","fech_ing_num","tenencia_de_la_vivienda_mod","condicion_ocupacional_corr")
catVars<-
c("sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","tipo_centro_pub","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","dg_trs_cons_sus_or","nombre_region","tipo_de_plan_res","sexo_2","condicion_ocupacional_corr")
#length(unique(CONS_C1_df_dup_SEP_2020_match$fech_ing_num))
#:#:#:#:#: DISMINUIR LA HETEROGENEIDAD DE LA FECHA DE INGRESO
# FORMAS DE CONSTREÑIR LA VARIABLE:
#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num/10,0)
#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-cut(CONS_C1_df_dup_SEP_2020_match$fech_ing_num,100)
#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-CONS_C1_df_dup_SEP_2020_match_fech_ing_num
#CONS_C1_df_dup_SEP_2020_match_fech_ing_num<-CONS_C1_df_dup_SEP_2020_match$fech_ing_num
#length(unique(round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num,0)))
#length(unique(round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num/10,0)))
#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num/10,0)
#:#:#:#:#:
CONS_C1_df_dup_SEP_2020_match_not_miss2 <-
CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados %>%
dplyr::filter(dup==1) %>%
dplyr::select(-dias_treat_imp_sin_na,-fech_egres_num)
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$sus_principal_mod,"label")<-"Primary Substance at Admission"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$sus_ini_mod_mvv,"label")<-"First Substance Used"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$estado_conyugal_2,"label")<-"Marital Status"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$escolaridad_rec,"label")<-"Educational Attainment"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$edad_ini_cons,"label")<-"Substance Use Onset Age"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$freq_cons_sus_prin,"label")<-"Primary Substance at Admission Usage Frequency"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$nombre_region,"label")<-"Regional Location of Center"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$dg_cie_10_rec,"label")<-"Psychiatric Comorbidity"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$dg_trs_cons_sus_or,"label")<-"Drug Dependence Diagnosis"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$evaluacindelprocesoteraputico,"label")<-"Evaluation of the Therapeutic Process"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$abandono_temprano_rec,"label")<-"Early Dropout (Against Staff Advice)"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$tipo_de_plan_res,"label")<-"Residential"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$tipo_centro_pub,"label")<-"Public Center"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$condicion_ocupacional_corr,"label")<-"Employment Status"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$origen_ingreso_mod,"label")<-"Treatment Admission Motive"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$tenencia_de_la_vivienda_mod,"label")<-"Tenure status of household"
pre_tab1<-Sys.time()
tab1<-
CreateTableOne(vars = match.on.sel, strata = "tipo_de_plan_res",
data = CONS_C1_df_dup_SEP_2020_match_not_miss2, factorVars = catVars, smd=T)
post_tab1<-Sys.time()
diff_time_tab1=post_tab1-pre_tab1
kableone(tab1,
caption = paste0("Table 5. Covariate Balance in the Variables of Interest"),
col.names= c("Ambulatory","Residential", "p-values","test","SMD"),
nonnormal= c("edad_ini_cons","edad_al_ing","fech_ing_num"),#"\\hline",
smd=T, test=T, varLabels=T,noSpaces=T, printToggle=T, dropEqual=F) %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover","condensed"),font_size= 10) %>%
#()
row_spec(1, bold = T, italic =T,color ="black",hline_after=T,extra_latex_after="\\arrayrulecolor{white}",font_size= 10) %>%
#footnote(general = "Here is a general comments of the table. ",
# number = c("Footnote 1; ", "Footnote 2; "),
# alphabet = c("Footnote A; ", "Footnote B; "),
# symbol = c("Footnote Symbol 1; ", "Footnote Symbol 2")
# )%>%
kableExtra::kable_classic() %>%
scroll_box(width = "100%", height = "400px")
Table 5. Covariate Balance in the Variables of Interest
|
Ambulatory
|
Residential
|
p-values
|
test
|
SMD
|
n
|
72053
|
12688
|
|
|
|
Primary Substance at Admission (%)
|
|
|
<0.001
|
|
0.619
|
Alcohol
|
26286 (36.5)
|
2418 (19.1)
|
|
|
|
Cocaine hydrochloride
|
14473 (20.1)
|
1658 (13.1)
|
|
|
|
Cocaine paste
|
24556 (34.1)
|
8018 (63.2)
|
|
|
|
Marijuana
|
5362 (7.4)
|
392 (3.1)
|
|
|
|
Other
|
1376 (1.9)
|
202 (1.6)
|
|
|
|
First Substance Used (%)
|
|
|
<0.001
|
|
0.370
|
Alcohol
|
41391 (57.4)
|
5066 (39.9)
|
|
|
|
Cocaine hydrochloride
|
2948 (4.1)
|
522 (4.1)
|
|
|
|
Cocaine paste
|
7647 (10.6)
|
2227 (17.6)
|
|
|
|
Marijuana
|
18403 (25.5)
|
4555 (35.9)
|
|
|
|
Other
|
1664 (2.3)
|
318 (2.5)
|
|
|
|
Marital Status (%)
|
|
|
<0.001
|
|
0.309
|
Married/Shared living arrangements
|
26151 (36.3)
|
2910 (22.9)
|
|
|
|
Separated/Divorced
|
7709 (10.7)
|
1317 (10.4)
|
|
|
|
Single
|
37330 (51.8)
|
8330 (65.7)
|
|
|
|
Widower
|
863 (1.2)
|
131 (1.0)
|
|
|
|
Educational Attainment (%)
|
|
|
<0.001
|
|
0.124
|
3-Completed primary school or less
|
21851 (30.3)
|
4568 (36.0)
|
|
|
|
2-Completed high school or less
|
37191 (51.6)
|
6137 (48.4)
|
|
|
|
1-More than high school
|
13011 (18.1)
|
1983 (15.6)
|
|
|
|
Substance Use Onset Age (median [IQR])
|
15.00 [14.00, 18.00]
|
15.00 [13.00, 17.00]
|
<0.001
|
nonnorm
|
0.090
|
Primary Substance at Admission Usage Frequency (%)
|
|
|
<0.001
|
|
0.767
|
1 day a week or more
|
5323 (7.4)
|
271 (2.1)
|
|
|
|
2 to 3 days a week
|
22312 (31.0)
|
1328 (10.5)
|
|
|
|
4 to 6 days a week
|
12212 (16.9)
|
1650 (13.0)
|
|
|
|
Daily
|
28255 (39.2)
|
9223 (72.7)
|
|
|
|
Did not use
|
1095 (1.5)
|
84 (0.7)
|
|
|
|
Less than 1 day a week
|
2856 (4.0)
|
132 (1.0)
|
|
|
|
Treatment Admission Motive (%)
|
|
|
<0.001
|
|
0.509
|
Spontaneous
|
33627 (46.7)
|
4274 (33.7)
|
|
|
|
Assisted Referral
|
4932 (6.8)
|
3004 (23.7)
|
|
|
|
Other
|
3749 (5.2)
|
736 (5.8)
|
|
|
|
Justice Sector
|
7137 (9.9)
|
810 (6.4)
|
|
|
|
Health Sector
|
22608 (31.4)
|
3864 (30.5)
|
|
|
|
Psychiatric Comorbidity (%)
|
|
|
<0.001
|
|
0.317
|
Without psychiatric comorbidity
|
28994 (40.2)
|
3245 (25.6)
|
|
|
|
Diagnosis unknown (under study)
|
13260 (18.4)
|
2766 (21.8)
|
|
|
|
With psychiatric comorbidity
|
29799 (41.4)
|
6677 (52.6)
|
|
|
|
Regional Location of Center (%)
|
|
|
<0.001
|
|
0.388
|
Antofagasta (02)
|
2290 (3.2)
|
697 (5.5)
|
|
|
|
Araucanía (09)
|
2221 (3.1)
|
161 (1.3)
|
|
|
|
Arica (15)
|
1312 (1.8)
|
728 (5.7)
|
|
|
|
Atacama (03)
|
1829 (2.5)
|
262 (2.1)
|
|
|
|
Aysén (11)
|
798 (1.1)
|
40 (0.3)
|
|
|
|
Biobío (08)
|
5089 (7.1)
|
703 (5.5)
|
|
|
|
Coquimbo (04)
|
2798 (3.9)
|
270 (2.1)
|
|
|
|
Los Lagos (10)
|
2644 (3.7)
|
373 (2.9)
|
|
|
|
Los Ríos (14)
|
1110 (1.5)
|
185 (1.5)
|
|
|
|
Magallanes (12)
|
928 (1.3)
|
31 (0.2)
|
|
|
|
Maule (07)
|
4202 (5.8)
|
642 (5.1)
|
|
|
|
Metropolitana (13)
|
35959 (49.9)
|
6251 (49.3)
|
|
|
|
Ñuble (16)
|
539 (0.7)
|
20 (0.2)
|
|
|
|
O’Higgins (06)
|
3636 (5.0)
|
567 (4.5)
|
|
|
|
Tarapacá (01)
|
1348 (1.9)
|
593 (4.7)
|
|
|
|
Valparaíso (05)
|
5350 (7.4)
|
1165 (9.2)
|
|
|
|
Drug Dependence Diagnosis = TRUE (%)
|
49981 (69.4)
|
11643 (91.8)
|
<0.001
|
|
0.590
|
Public Center = TRUE (%)
|
57099 (79.2)
|
3615 (28.5)
|
<0.001
|
|
1.183
|
Sexo Usuario/Sex of User = Women (%)
|
17392 (24.1)
|
3935 (31.0)
|
<0.001
|
|
0.154
|
edad_al_ing (median [IQR])
|
34.41 [27.53, 43.45]
|
32.60 [26.31, 40.82]
|
<0.001
|
nonnorm
|
0.187
|
Fecha de Ingreso a Tratamiento (Numérico)(c)/Date of Admission to
Treatment (Numeric)(c) (median [IQR])
|
16578.00 [15730.00, 17359.00]
|
16153.00 [15342.00, 17023.00]
|
<0.001
|
nonnorm
|
0.292
|
Tenure status of household (%)
|
|
|
<0.001
|
|
0.186
|
Illegal Settlement
|
591 (0.8)
|
308 (2.4)
|
|
|
|
Others
|
1906 (2.6)
|
410 (3.2)
|
|
|
|
Owner/Transferred dwellings/Pays Dividends
|
26710 (37.1)
|
4786 (37.7)
|
|
|
|
Renting
|
12896 (17.9)
|
1642 (12.9)
|
|
|
|
Stays temporarily with a relative
|
29950 (41.6)
|
5542 (43.7)
|
|
|
|
Employment Status (%)
|
|
|
<0.001
|
|
1.026
|
Employed
|
39513 (54.8)
|
1769 (13.9)
|
|
|
|
Inactive
|
7667 (10.6)
|
1190 (9.4)
|
|
|
|
Looking for a job for the first time
|
172 (0.2)
|
21 (0.2)
|
|
|
|
No activity
|
2663 (3.7)
|
1819 (14.3)
|
|
|
|
Not seeking for work
|
493 (0.7)
|
335 (2.6)
|
|
|
|
Unemployed
|
21545 (29.9)
|
7554 (59.5)
|
|
|
|
#"tipo_de_plan_ambulatorio",
#https://cran.r-project.org/web/packages/tableone/vignettes/smd.html
#http://rstudio-pubs-static.s3.amazonaws.com/405765_2ce448f9bde24148a5f94c535a34b70e.html
#https://cran.r-project.org/web/packages/tableone/vignettes/introduction.html
#https://cran.r-project.org/web/packages/tableone/tableone.pdf
#https://www.rdocumentation.org/packages/tableone/versions/0.12.0/topics/CreateTableOne
## Construct a table
#standardized mean differences of greater than 0.1
We checked the similarity in the samples using other measures, such
as the variance ratio of the samples and Kolmogorov-Smirnov(KS)
statistics.
library(cobalt)
bal2<-bal.tab(CONS_C1_df_dup_SEP_2020_match_not_miss2[,match.on_tot[!match.on_tot %in% c("row","hash_key","more_one_treat","duplicates_filtered","abandono_temprano_rec","tipo_de_plan_res","evaluacindelprocesoteraputico")]], treat = CONS_C1_df_dup_SEP_2020_match_not_miss2$tipo_de_plan_res,
thresholds = c(m = .1, v = 2),
binary = "std",
continuous = "std",
stats = c("mean.diffs", "variance.ratios","ks.statistics"))
#"mean.diffs", "variance.ratios","ks.statistics","ovl.coefficient"
options(knitr.kable.NA = '')
bal2$Balance[,2]<-round(bal2$Balance[,2],2)
bal2$Balance[,4]<-round(bal2$Balance[,4],2)
bal2$Balance[,6]<-round(bal2$Balance[,6],2)
var_names<-
list("sus_principal_mod_Alcohol"= "Primary Subs. Adm.-Alcohol",
"sus_principal_mod_Cocaine hydrochloride"= "Primary Subs. Adm.-Cocaine hydrochloride",
"sus_principal_mod_Cocaine paste"="Primary Subs. Adm.-Cocaine paste",
"sus_principal_mod_Marijuana"="Primary Subs. Adm.-Marijuana",
"sus_principal_mod_Other"="Primary Subs. Adm.-Other",
"origen_ingreso_mod_Spontaneous"="Admission Motive-Spontaneous",
"origen_ingreso_mod_Assisted Referral"= "Admission Motive-Assisted Referral",
"origen_ingreso_mod_Other"="Admission Motive-Other",
"origen_ingreso_mod_Justice Sector"= "Admission Motive-Justice Sector",
"origen_ingreso_mod_Health Sector"="Admission Motive-Health Sector",
"dg_cie_10_rec_Without psychiatric comorbidity"="ICD-10-Wo/Psych Comorbidity",
"dg_cie_10_rec_Diagnosis unknown (under study)"="ICD-10-Dg. Unknown/under study",
"dg_cie_10_rec_With psychiatric comorbidity"="ICD-10-W/Psych Comorbidity",
"sexo_2_Women"="Sex-Women",
"edad_al_ing"="Admission Age",
"fech_ing_num"="Admission Date",
"duplicates_filtered"="Treatments (#)",
"more_one_treat"=">1 treatment",
"sus_ini_mod_mvv_Alcohol"= "First substance used-Alcohol",
"sus_ini_mod_mvv_Cocaine hydrochloride"= "First substance used-Cocaine hydrochloride",
"sus_ini_mod_mvv_Cocaine paste"="First substance used-Cocaine paste",
"sus_ini_mod_mvv_Marijuana"="First substance used-Marijuana",
"sus_ini_mod_mvv_Other"="First substance used-Other",
"estado_conyugal_2_Married/Shared living arrangements"="Marital Status-Married/Shared liv. arr.",
"condicion_ocupacional_corr_Employed"="Emp.Status-Employed",
"condicion_ocupacional_corr_Inactive"="Emp.Status-Inactive",
"condicion_ocupacional_corr_Looking for a job for the first time"="Emp.Status-Looking 1st job",
"condicion_ocupacional_corr_No activity"="Emp.Status- No activity",
"condicion_ocupacional_corr_Not seeking for work"="Emp.Status- Not seeking work",
"condicion_ocupacional_corr_Unemployed"="Emp.Status- Unemployed",
"estado_conyugal_2_Separated/Divorced"="Marital Status-Separated/Divorced",
"estado_conyugal_2_Single"= "Marital Status-Single",
"estado_conyugal_2_Widower"="Marital Status-Widower",
"escolaridad_rec_3-Completed primary school or less"="Educational Attainment-PS or less",
"escolaridad_rec_2-Completed high school or less"="Educational Attainment-HS or less",
"escolaridad_rec_1-More than high school"="Educational Attainment-More than HS",
"freq_cons_sus_prin_1 day a week or more"="Subs Use Freq-1d/wk or more",
"freq_cons_sus_prin_2 to 3 days a week"="Subs Use Freq-2-3d/wk",
"freq_cons_sus_prin_4 to 6 days a week"="Subs Use Freq-4-6d/wk",
"freq_cons_sus_prin_Daily"="Subs Use Freq-Daily",
"freq_cons_sus_prin_Did not use"="Subs Use Freq-Did not use",
"freq_cons_sus_prin_Less than 1 day a week"="Subs Use Freq-Less 1d/wk",
"nombre_region_Antofagasta (02)"="Region-Antofagasta(02)",
"nombre_region_Araucanía (09)"="Region-Araucanía(09)",
"nombre_region_Arica (15)"="Region-Arica(15)",
"nombre_region_Atacama (03)"="Region-Atacama(03)",
"nombre_region_Aysén (11)"="Region-Aysén(11)",
"nombre_region_Biobío (08)"="Region- Biobío(08)",
"nombre_region_Coquimbo (04)"="Region-Coquimbo(04)",
"nombre_region_Los Lagos (10)"="Region-Los Lagos(10)",
"nombre_region_Los Ríos (14)"="Region-Los Ríos(14)",
"nombre_region_Magallanes (12)"="Region-Magallanes(12)",
"nombre_region_Maule (07)"="Region-Maule(07)",
"nombre_region_Metropolitana (13)"="Region-Metropolitana(13)",
"nombre_region_Ñuble (16)"="Region-Ñuble(16)",
"nombre_region_O'Higgins (06)"="Region-O'Higgins(06)",
"nombre_region_Tarapacá (01)"="Region-Tarapacá(01)",
"nombre_region_Valparaíso (05)"="Region-Valparaíso(05)",
"tipo_centro_pub"="Public Center",
"dg_trs_cons_sus_or"= "Drug Dependence diagnosis",
"edad_ini_cons"="Substance Use Onset Age",
"tenencia_de_la_vivienda_mod_Illegal Settlement"="Ten.Stat.Houshld-Ill stlmnt",
"tenencia_de_la_vivienda_mod_Others"="Ten.Stat.Houshld-Other",
"tenencia_de_la_vivienda_mod_Owner/Transferred dwellings/Pays Dividends"="Ten.Stat.Houshld-Owner",
"tenencia_de_la_vivienda_mod_Renting"="Ten.Stat.Houshld-Renting",
"tenencia_de_la_vivienda_mod_Stays temporarily with a relative"="Ten.Stat.Houshld-Temporary stlmnt",
"rn"="Treatment")
var.names<-data.table(data.frame(unlist(var_names)),keep.rownames = T) %>% janitor::clean_names()
balance_prev<-
data.table::data.table(bal2$Balance[,1:6],keep.rownames = T) %>%
dplyr::arrange(-abs(Diff.Un)) %>%
dplyr::left_join(var.names,by="rn") %>%
dplyr::select(unlist_var_names,everything()) %>%
dplyr::select(-rn)
balance_prev %>% #data.table::data.table(keep.rownames = F)
knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
caption = paste0("Table 4. Covariate Balance in the Variables of Interest"),
col.names = c("Variables","Nature of Variables", "Unadjusted SMDs","Threshold","Unadjusted Variance Ratios","Threshold","Unadjusted KS"),
align =rep('c', 101)) %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 10) %>%
kableExtra::add_footnote( c(paste("Note. ")),
notation = "none") %>%
kableExtra::kable_classic() %>%
kableExtra::scroll_box(width = "100%", height = "375px")
Table 4. Covariate Balance in the Variables of Interest
Variables
|
Nature of Variables
|
Unadjusted SMDs
|
Threshold
|
Unadjusted Variance Ratios
|
Threshold
|
Unadjusted KS
|
Public Center
|
Binary
|
-1.18
|
Not Balanced, >0.1
|
|
|
0.51
|
Emp.Status-Employed
|
Binary
|
-0.95
|
Not Balanced, >0.1
|
|
|
0.41
|
Subs Use Freq-Daily
|
Binary
|
0.72
|
Not Balanced, >0.1
|
|
|
0.33
|
Emp.Status- Unemployed
|
Binary
|
0.62
|
Not Balanced, >0.1
|
|
|
0.30
|
Primary Subs. Adm.-Cocaine paste
|
Binary
|
0.61
|
Not Balanced, >0.1
|
|
|
0.29
|
Drug Dependence diagnosis
|
Binary
|
0.59
|
Not Balanced, >0.1
|
|
|
0.22
|
Subs Use Freq-2-3d/wk
|
Binary
|
-0.52
|
Not Balanced, >0.1
|
|
|
0.20
|
Admission Motive-Assisted Referral
|
Binary
|
0.48
|
Not Balanced, >0.1
|
|
|
0.17
|
Primary Subs. Adm.-Alcohol
|
Binary
|
-0.40
|
Not Balanced, >0.1
|
|
|
0.17
|
Emp.Status- No activity
|
Binary
|
0.38
|
Not Balanced, >0.1
|
|
|
0.11
|
First substance used-Alcohol
|
Binary
|
-0.36
|
Not Balanced, >0.1
|
|
|
0.18
|
ICD-10-Wo/Psych Comorbidity
|
Binary
|
-0.32
|
Not Balanced, >0.1
|
|
|
0.15
|
Marital Status-Married/Shared liv. arr.
|
Binary
|
-0.30
|
Not Balanced, >0.1
|
|
|
0.13
|
Admission Date
|
Contin.
|
-0.29
|
Not Balanced, >0.1
|
1.00
|
Balanced, <2
|
0.14
|
Marital Status-Single
|
Binary
|
0.28
|
Not Balanced, >0.1
|
|
|
0.14
|
Admission Motive-Spontaneous
|
Binary
|
-0.27
|
Not Balanced, >0.1
|
|
|
0.13
|
Subs Use Freq-1d/wk or more
|
Binary
|
-0.25
|
Not Balanced, >0.1
|
|
|
0.05
|
First substance used-Marijuana
|
Binary
|
0.23
|
Not Balanced, >0.1
|
|
|
0.10
|
ICD-10-W/Psych Comorbidity
|
Binary
|
0.23
|
Not Balanced, >0.1
|
|
|
0.11
|
Region-Arica(15)
|
Binary
|
0.21
|
Not Balanced, >0.1
|
|
|
0.04
|
Primary Subs. Adm.-Marijuana
|
Binary
|
-0.20
|
Not Balanced, >0.1
|
|
|
0.04
|
First substance used-Cocaine paste
|
Binary
|
0.20
|
Not Balanced, >0.1
|
|
|
0.07
|
Primary Subs. Adm.-Cocaine hydrochloride
|
Binary
|
-0.19
|
Not Balanced, >0.1
|
|
|
0.07
|
Subs Use Freq-Less 1d/wk
|
Binary
|
-0.19
|
Not Balanced, >0.1
|
|
|
0.03
|
Admission Age
|
Contin.
|
-0.19
|
Not Balanced, >0.1
|
0.84
|
Balanced, <2
|
0.07
|
Region-Tarapacá(01)
|
Binary
|
0.16
|
Not Balanced, >0.1
|
|
|
0.03
|
Sex-Women
|
Binary
|
0.15
|
Not Balanced, >0.1
|
|
|
0.07
|
Emp.Status- Not seeking work
|
Binary
|
0.15
|
Not Balanced, >0.1
|
|
|
0.02
|
Ten.Stat.Houshld-Renting
|
Binary
|
-0.14
|
Not Balanced, >0.1
|
|
|
0.05
|
Admission Motive-Justice Sector
|
Binary
|
-0.13
|
Not Balanced, >0.1
|
|
|
0.04
|
Ten.Stat.Houshld-Ill stlmnt
|
Binary
|
0.13
|
Not Balanced, >0.1
|
|
|
0.02
|
Educational Attainment-PS or less
|
Binary
|
0.12
|
Not Balanced, >0.1
|
|
|
0.06
|
Region-Araucanía(09)
|
Binary
|
-0.12
|
Not Balanced, >0.1
|
|
|
0.02
|
Region-Magallanes(12)
|
Binary
|
-0.12
|
Not Balanced, >0.1
|
|
|
0.01
|
Subs Use Freq-4-6d/wk
|
Binary
|
-0.11
|
Not Balanced, >0.1
|
|
|
0.04
|
Region-Antofagasta(02)
|
Binary
|
0.11
|
Not Balanced, >0.1
|
|
|
0.02
|
Region-Coquimbo(04)
|
Binary
|
-0.10
|
Not Balanced, >0.1
|
|
|
0.02
|
Substance Use Onset Age
|
Contin.
|
-0.09
|
Balanced, <0.1
|
0.91
|
Balanced, <2
|
0.07
|
Region-Aysén(11)
|
Binary
|
-0.09
|
Balanced, <0.1
|
|
|
0.01
|
Region-Ñuble(16)
|
Binary
|
-0.09
|
Balanced, <0.1
|
|
|
0.01
|
Subs Use Freq-Did not use
|
Binary
|
-0.08
|
Balanced, <0.1
|
|
|
0.01
|
ICD-10-Dg. Unknown/under study
|
Binary
|
0.08
|
Balanced, <0.1
|
|
|
0.03
|
Educational Attainment-HS or less
|
Binary
|
-0.06
|
Balanced, <0.1
|
|
|
0.03
|
Educational Attainment-More than HS
|
Binary
|
-0.06
|
Balanced, <0.1
|
|
|
0.02
|
Region- Biobío(08)
|
Binary
|
-0.06
|
Balanced, <0.1
|
|
|
0.02
|
Region-Valparaíso(05)
|
Binary
|
0.06
|
Balanced, <0.1
|
|
|
0.02
|
Region-Los Lagos(10)
|
Binary
|
-0.04
|
Balanced, <0.1
|
|
|
0.01
|
Ten.Stat.Houshld-Temporary stlmnt
|
Binary
|
0.04
|
Balanced, <0.1
|
|
|
0.02
|
Emp.Status-Inactive
|
Binary
|
-0.04
|
Balanced, <0.1
|
|
|
0.01
|
Admission Motive-Other
|
Binary
|
0.03
|
Balanced, <0.1
|
|
|
0.01
|
Region-Atacama(03)
|
Binary
|
-0.03
|
Balanced, <0.1
|
|
|
0.00
|
Region-Maule(07)
|
Binary
|
-0.03
|
Balanced, <0.1
|
|
|
0.01
|
Region-O’Higgins(06)
|
Binary
|
-0.03
|
Balanced, <0.1
|
|
|
0.01
|
Ten.Stat.Houshld-Other
|
Binary
|
0.03
|
Balanced, <0.1
|
|
|
0.01
|
Primary Subs. Adm.-Other
|
Binary
|
-0.02
|
Balanced, <0.1
|
|
|
0.00
|
Marital Status-Widower
|
Binary
|
-0.02
|
Balanced, <0.1
|
|
|
0.00
|
Admission Motive-Health Sector
|
Binary
|
-0.02
|
Balanced, <0.1
|
|
|
0.01
|
Emp.Status-Looking 1st job
|
Binary
|
-0.02
|
Balanced, <0.1
|
|
|
0.00
|
First substance used-Other
|
Binary
|
0.01
|
Balanced, <0.1
|
|
|
0.00
|
Marital Status-Separated/Divorced
|
Binary
|
-0.01
|
Balanced, <0.1
|
|
|
0.00
|
Region-Los Ríos(14)
|
Binary
|
-0.01
|
Balanced, <0.1
|
|
|
0.00
|
Region-Metropolitana(13)
|
Binary
|
-0.01
|
Balanced, <0.1
|
|
|
0.01
|
Ten.Stat.Houshld-Owner
|
Binary
|
0.01
|
Balanced, <0.1
|
|
|
0.01
|
First substance used-Cocaine hydrochloride
|
Binary
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Note.
|
We generated a plot to focus on the unbalanced data.
love.plot(bal2, binary = "std",
thresholds = c(m = .2),#, m=.2
var.order = "unadjusted")+
theme_classic()+
ggtitle(NULL)+
labs(caption="Note. Vertical dashed line= Standardized Differences of .1")+
theme( plot.caption=element_text(hjust=0))+
theme(legend.position = "none")
#ggplot(idh_pre, aes(idh_pre$quintil_0,idh_pre$idh_0)) +
# geom_point(aes(colour = idh_pre$groups), size = 5) + labs(x="Quintiles", y="Percentage") +
# theme_bw() +
# scale_fill_manual(values=c("black", "gray60") ) +
# scale_colour_manual(name="Groups",values =c("Control"="black", "Exposed"="gray60")) +
# theme(legend.key = element_rect(colour = NA)) +
# theme(text = element_text(size=20)) +
# scale_y_continuous( limits = c(0,0.4), breaks = seq(.05,.4,by=.05))
#bal.tab(bal2, treat = "tipo_de_plan_res",var.name = "fech_ing_num", data = CONS_C1_df_dup_SEP_2020_match_not_miss2)
Match
The matched variables were defined for the treatments at baseline
(n=84,741).
library(designmatch)
#fine = list(covs = fine_covs)
#solver = list(name = name, t_max = t_max, approximate = 1, round_cplex = 0, trace_cplex = 0).
#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
# 1. Gurobi installation
#For an exact solution, we strongly recommend running designmatch either with CPLEX or Gurobi. Between these two solvers, the R interface of Gurobi is considerably easier to install. Here we provide general instructions for manually installing Gurobi and its R interface in Mac and Windows machines.
#1. Create a free academic license
# Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/creating_a_new_academic_li.html
#2. Install the software
# 2.1. In http://www.gurobi.com/index, go to Downloads > Gurobi Software
# 2.2. Choose your operating system and press download
#
#3. Retrieve and set up your Gurobi license
# 2.1. Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/retrieving_and_setting_up_.html
# 2.2. Then follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/retrieving_a_free_academic.html
#
#4. Test your license
# Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/testing_your_license.html
#
#5. Install the R interface of Gurobi
# Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/r_installing_the_r_package.html
# * In Windows, in R run the command install.packages("PATH\\gurobi_7.X-Y.zip", repos=NULL) where path leads to the file gurobi_7.X-Y.zip (for example PATH=C:\\gurobi702\\win64\\R; note that the path may be different in your computer), and "7.X-Y" refers to the version you are installing.
# * In MAC, in R run the command install.packages('PATH/gurobi_7.X-Y.tgz', repos=NULL) where path leads to the file gurobi_7.X-Y.tgz (for example PATH=/Library/gurobi702/mac64/R; note that the path may be different in your computer), and "7.X-Y" refers to the version you are installing.
#
#6. Test the installation
# Load the library and run the examples therein
# * A possible error that you may get is the following: "Error: package ‘slam’ required by ‘gurobi’ could not be found". If that case, install.packages('slam') and try again.
# You should be all set!
CONS_SEP_match$tipo_de_plan_res<-ifelse(CONS_SEP_match$tipo_de_plan_res=="1",1,0)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
require(slam)
# Solver options
#default solver is glpk with approximate = 1
#For an exact solution, we strongly recommend using cplex or gurobi as they are much faster than the other solvers, but they do require a license (free for academics, but not for people outside universities)
t_max = 60*60*7
solver = "gurobi" #cplex, glpk, gurobi and symphony
solver = list(name = solver,
t_max = t_max, #t_max is a scalar with the maximum time limit for finding the matches.within this time limit, a partial, suboptimal solution is given
approximate = 0,#. If approximate = 1 (the default), an approximate solution is found via a relaxation of the original integer program. #FEB2021: I dont want to violate some balancing constraints to some extent. Change to 0.
round_cplex = 0,
trace = 1#turns the optimizer output on
)
#Indicador de tratamiento
t_ind= ifelse(CONS_SEP_match$tipo_de_plan_res=="1",1,0)
#table(is.na(CONS_SEP_match$tipo_de_plan_res))
# Moment balance: constrain differences in means to be at most 0.1 standard deviations apart
#:#:#:#:#:#:#:#:#:#:#:#:#:
#######mom_covs is a matrix where each column is a covariate whose mean is to be balanced
#######mom_tols is a vector of tolerances for the maximum difference in means for the covariates in mom_covs
#######mom_targets is a vector of target moments (e.g., means) of a distribution to be approximated by matched sampling. is optional, but if #######mom_covs is specified then mom_tols needs to be specified too
#######The lengths of mom_tols and mom_target have to be equal to the number of columns of mom_covs
mom_covs = cbind(CONS_SEP_match$edad_al_ing,
CONS_SEP_match$fech_ing_num,
CONS_SEP_match$edad_ini_cons)
mom_tols = absstddif(mom_covs, t_ind, .2)# original, 0.05, ahora probaré con 0.7
mom = list(covs = mom_covs, tols = mom_tols, targets = NULL)
# Mean balance
covs = cbind(CONS_SEP_match$edad_al_ing,
CONS_SEP_match$fech_ing_num,
CONS_SEP_match$edad_ini_cons)
meantab(covs, t_ind)
## Mis Min Max Mean T Mean C Std Dif P-val
## [1,] 0 8.84 88.84 35.97 35.97 0 1
## [2,] 0 13621.00 18199.00 16445.50 16445.50 0 1
## [3,] 0 5.00 74.00 16.51 16.51 0 1
# Fine balance
#is a matrix where each column is a nominal covariate for fine balance
fine_covs = cbind(CONS_SEP_match$origen_ingreso_mod,
CONS_SEP_match$dg_cie_10_rec,
CONS_SEP_match$sexo_2,
CONS_SEP_match$sus_ini_mod_mvv,
CONS_SEP_match$tipo_centro_pub, #cuidado
CONS_SEP_match$estado_conyugal_2,
CONS_SEP_match$escolaridad_rec,
CONS_SEP_match$freq_cons_sus_prin,
CONS_SEP_match$nombre_region,
CONS_SEP_match$condicion_ocupacional_corr,
CONS_SEP_match$sus_principal_mod,
CONS_SEP_match$dg_trs_cons_sus_or,
CONS_SEP_match$tenencia_de_la_vivienda_mod
)
fine = list(covs = fine_covs)
# 11,448; No. of controls: 11,448"
# 11,452; No. of controls: 11,452"
# 11,459; No. of controls: 11,459" #when I changed tolerance from .0999 to .1999
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#MATCH
start.time <- Sys.time()
set.seed(2125)
out = cardmatch(t_ind, #ES NECESARIO QUE LOS TRATAMIENTOS ESTEN ORDENADOS Y LOS OTROS VECTORES TAMBIËN
mom = mom,# ya los definí list(covs = mom_covs, tols = mom_tols, targets = mom_targets),
fine = fine,
solver = solver)
## Building the matching problem...
## Gurobi optimizer is open...
## Finding the optimal matches...
## Gurobi Optimizer version 9.1.2 build v9.1.2rc0 (win64)
## Thread count: 6 physical cores, 12 logical processors, using up to 12 threads
## Optimize a model with 70 rows, 84741 columns and 1610079 nonzeros
## Model fingerprint: 0xe18add91
## Variable types: 0 continuous, 84741 integer (84741 binary)
## Coefficient statistics:
## Matrix range [1e+00, 2e+04]
## Objective range [1e+00, 1e+00]
## Bounds range [0e+00, 0e+00]
## RHS range [0e+00, 0e+00]
## Found heuristic solution: objective -0.0000000
## Presolve time: 2.33s
## Presolved: 70 rows, 84741 columns, 1609869 nonzeros
## Variable types: 0 continuous, 84741 integer (84741 binary)
##
## Root relaxation: objective 1.122606e+04, 451 iterations, 1.01 seconds
##
## Nodes | Current Node | Objective Bounds | Work
## Expl Unexpl | Obj Depth IntInf | Incumbent BestBd Gap | It/Node Time
##
## 0 0 11226.0565 0 43 -0.00000 11226.0565 - - 3s
## H 0 0 2321.0000000 11226.0565 384% - 8s
## 0 0 11226.0565 0 43 2321.00000 11226.0565 384% - 10s
## H 0 0 11226.000000 11226.0565 0.00% - 12s
## 0 0 11226.0565 0 43 11226.0000 11226.0565 0.00% - 12s
##
## Explored 1 nodes (458 simplex iterations) in 12.50 seconds
## Thread count was 12 (of 12 available processors)
##
## Solution count 3: 11226 2321 -0
##
## Optimal solution found (tolerance 1.00e-04)
## Best objective 1.122600000000e+04, best bound 1.122600000000e+04, gap 0.0000%
## Optimal matches found
#FEB2021= If I change to bmatch, error can't allocate vector size 3.4gb
end.time <- Sys.time()
time.taken <- end.time - start.time
# Fine balance (note here we are getting an approximate solution)
#for (i in 1:ncol(fine_covs)) {
# print(finetab(fine_covs[, i], t_id_1, c_id_1))
#}
# Indices of the treated units and matched controls
t_id_1 = out$t_id
c_id_1 = out$c_id
group = out$group_id
ids_matched<-cbind.data.frame(t_id_1, c_id_1,group)
paste0("No. of treatments: ",table(table(t_id_1)) %>% formatC(big.mark = ","),"; No. of controls: ",table(table(c_id_1))%>% formatC(big.mark = ","))
## [1] "No. of treatments: 11,226; No. of controls: 11,226"
# Fine balance (note here we are getting an approximate solution)
finetab_match1<-data.frame()
for (i in 1:ncol(fine_covs)) {
#finetab_match1<- rbind.data.frame(
finetab(fine_covs[, i], t_id_1, c_id_1)
}
d_match = CONS_SEP_match[c(t_id_1, c_id_1), ]
paste0("Number of duplicated rows: ",d_match %>% dplyr::group_by(row) %>% dplyr::mutate(n_row=n()) %>% dplyr::ungroup() %>% dplyr::filter(n_row>1) %>% nrow())
## [1] "Number of duplicated rows: 0"
paste0("Percentage of the selected treatments: ",scales::percent(length(t_id_1)/CONS_SEP_match %>% dplyr::filter(tipo_de_plan_res==1) %>% nrow()))
## [1] "Percentage of the selected treatments: 88%"
paste0("Percentage of the selected controls: ",
scales::percent(length(c_id_1)/CONS_SEP_match %>% dplyr::filter(tipo_de_plan_res==0) %>% nrow()))
## [1] "Percentage of the selected controls: 16%"
#cuidado, el anterior me encontró más del mismo control para un tratado
#por eso ocuparé el de más abajo.
#EL DE A CONTINUACIÓN ES ERRÓNEO PORQUE ES POR POSICIÓN, NO POR COINCIDENCIA DEL NÚMERO CON LA FILA
#d_match_no_duplicates = CONS_SEP_match[which(CONS_SEP_match$row %in% c(t_id_1, c_id_1)), ]
Explore Results of the Matching
vars_ecdf<- c('edad_al_ing', 'edad_ini_cons', 'fech_ing_num')
headings <- c("Age at Admission", "Age of Onset of Drug Use", "Date of Admission")
for (i in 1:length(headings)) {
cat("### ",headings[i],"\n")
f<-vars_ecdf[i]
ecdfplot(as.data.frame(CONS_SEP_match)[,f], t_id_1, c_id_1, main_title = "", legend_position = "right")
cat('\n\n')
}
Date of Admission
if(no_mostrar==1){
jpeg("C:/Users/andre/Desktop/SUD_CL/eso3.jpg", height=14, width= 10, res= 96, units = "in")
ecdfplot(as.data.frame(CONS_SEP_match)[,f], t_id_1, c_id_1, main_title = "", xlab="", legend_position = "right")
dev.off()
}
cat("### ","Love plot","\n")
Love plot
X_mat<-cbind(
"Primary Substance at Admission"=CONS_SEP_match$sus_principal_mod,
"Admission Age"=CONS_SEP_match$edad_al_ing,
"Admission Date"=CONS_SEP_match$fech_ing_num,
"Substance Use Onset Age"=CONS_SEP_match$edad_ini_cons,
"Admission Motive"=CONS_SEP_match$origen_ingreso_mod,
"Psychiatric Comorbidity"=CONS_SEP_match$dg_cie_10_rec,
"Sex"=CONS_SEP_match$sexo_2,
"First substance used"=CONS_SEP_match$sus_ini_mod_mvv,
"Public Center"=CONS_SEP_match$tipo_centro_pub,
"Marital Status"=CONS_SEP_match$estado_conyugal_2,
"Educational Attainment"=CONS_SEP_match$escolaridad_rec,
"Primary Substance at Admission Usage Frequency"=CONS_SEP_match$freq_cons_sus_prin,
"Regional Location of the Center"=CONS_SEP_match$nombre_region,
"Employment Status"=CONS_SEP_match$condicion_ocupacional_corr,
"Drug Dependence Diagnosis"=CONS_SEP_match$dg_trs_cons_sus_or,
"Drug Dependence"=CONS_SEP_match$tenencia_de_la_vivienda_mod
)
dist_mat_match1<-meantab(X_mat, t_ind, t_id_1, c_id_1, exact = NULL, digits = 2)#standardized differences in means after matching for each covariate;
vline = 0.2
loveplot(X_mat=X_mat, t_id=t_id_1, c_id=c_id_1, v_line=vline, legend_position = "topright")
Balance
options(knitr.kable.NA = '')
covs0_unmatch <- subset(CONS_SEP_match, select = -c(row, hash_key, evaluacindelprocesoteraputico, abandono_temprano_rec,n_hash,dup,motivodeegreso_mod_imp,via_adm_sus_prin_act,edad_ini_sus_prin))#subset=,
covs0_match <- subset(d_match, select = -c(row, hash_key, evaluacindelprocesoteraputico, abandono_temprano_rec,n_hash,dup,motivodeegreso_mod_imp,via_adm_sus_prin_act,edad_ini_sus_prin))#subset=,
#_#_#_#_#_#_generar pareamientos prepost#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
library(cobalt)
bal1_nomatch<-bal.tab(covs0_unmatch, treat = CONS_SEP_match$tipo_de_plan_res,
thresholds = c(m = .1, v = 2),
binary = "std",
continuous = "std",
stats = c("mean.diffs", "variance.ratios","ks.statistics"))
bal1_nomatch$Balance[,2]<-round(bal1_nomatch$Balance[,2],2)
bal1_nomatch$Balance[,4]<-round(bal1_nomatch$Balance[,4],2)
bal1_nomatch$Balance[,6]<-round(bal1_nomatch$Balance[,6],2)
bal1_match<-bal.tab(covs0_match, treat = d_match$tipo_de_plan_res,
thresholds = c(m = .1, v = 2),
binary = "std",
continuous = "std",
stats = c("mean.diffs", "variance.ratios","ks.statistics"))
bal1_match$Balance[,2]<-round(bal1_match$Balance[,2],2)
bal1_match$Balance[,4]<-round(bal1_match$Balance[,4],2)
bal1_match$Balance[,6]<-round(bal1_match$Balance[,6],2)
var_names<-
list("sus_principal_mod_Alcohol"= "Primary Subs. Adm.-Alcohol",
"sus_principal_mod_Cocaine hydrochloride"= "Primary Subs. Adm.-Cocaine hydrochloride",
"sus_principal_mod_Cocaine paste"="Primary Subs. Adm.-Cocaine paste",
"sus_principal_mod_Marijuana"="Primary Subs. Adm.-Marijuana",
"sus_principal_mod_Other"="Primary Subs. Adm.-Other",
"origen_ingreso_mod_Spontaneous"="Admission Motive-Spontaneous",
"origen_ingreso_mod_Assisted Referral"= "Admission Motive-Assisted Referral",
"origen_ingreso_mod_Other"="Admission Motive-Other",
"origen_ingreso_mod_Justice Sector"= "Admission Motive-Justice Sector",
"origen_ingreso_mod_Health Sector"="Admission Motive-Health Sector",
"dg_cie_10_rec_Without psychiatric comorbidity"="ICD-10-Wo/Psych Comorbidity",
"dg_cie_10_rec_Diagnosis unknown (under study)"="ICD-10-Dg. Unknown/under study",
"dg_cie_10_rec_With psychiatric comorbidity"="ICD-10-W/Psych Comorbidity",
"sexo_2_Women"="Sex-Women",
"edad_al_ing"="Admission Age",
"fech_ing_num"="Admission Date",
"duplicates_filtered"="Treatments (#)",
"more_one_treat"=">1 treatment",
"sus_ini_mod_mvv_Alcohol"= "First substance used-Alcohol",
"sus_ini_mod_mvv_Cocaine hydrochloride"= "First substance used-Cocaine hydrochloride",
"sus_ini_mod_mvv_Cocaine paste"="First substance used-Cocaine paste",
"sus_ini_mod_mvv_Marijuana"="First substance used-Marijuana",
"sus_ini_mod_mvv_Other"="First substance used-Other",
"estado_conyugal_2_Married/Shared living arrangements"="Marital Status-Married/Shared liv. arr.",
"estado_conyugal_2_Separated/Divorced"="Marital Status-Separated/Divorced",
"estado_conyugal_2_Single"= "Marital Status-Single",
"estado_conyugal_2_Widower"="Marital Status-Widower",
"escolaridad_rec_3-Completed primary school or less"="Educational Attainment-PS or less",
"escolaridad_rec_2-Completed high school or less"="Educational Attainment-HS or less",
"escolaridad_rec_1-More than high school"="Educational Attainment-More than HS",
"freq_cons_sus_prin_1 day a week or more"="Subs Use Freq-1d/wk or more",
"freq_cons_sus_prin_2 to 3 days a week"="Subs Use Freq-2-3d/wk",
"freq_cons_sus_prin_4 to 6 days a week"="Subs Use Freq-4-6d/wk",
"freq_cons_sus_prin_Daily"="Subs Use Freq-Daily",
"freq_cons_sus_prin_Did not use"="Subs Use Freq-Did not use",
"freq_cons_sus_prin_Less than 1 day a week"="Subs Use Freq-Less 1d/wk",
"nombre_region_Antofagasta (02)"="Region-Antofagasta(02)",
"nombre_region_Araucanía (09)"="Region-Araucanía(09)",
"nombre_region_Arica (15)"="Region-Arica(15)",
"nombre_region_Atacama (03)"="Region-Atacama(03)",
"nombre_region_Aysén (11)"="Region-Aysén(11)",
"nombre_region_Biobío (08)"="Region- Biobío(08)",
"nombre_region_Coquimbo (04)"="Region-Coquimbo(04)",
"nombre_region_Los Lagos (10)"="Region-Los Lagos(10)",
"nombre_region_Los Ríos (14)"="Region-Los Ríos(14)",
"nombre_region_Magallanes (12)"="Region-Magallanes(12)",
"nombre_region_Maule (07)"="Region-Maule(07)",
"nombre_region_Metropolitana (13)"="Region-Metropolitana(13)",
"nombre_region_Ñuble (16)"="Region-Ñuble(16)",
"nombre_region_O'Higgins (06)"="Region-O'Higgins(06)",
"nombre_region_Tarapacá (01)"="Region-Tarapacá(01)",
"nombre_region_Valparaíso (05)"="Region-Valparaíso(05)",
"tipo_centro_pub"="Public Center",
"dg_trs_cons_sus_or"= "Drug Dependence diagnosis",
"edad_ini_cons"="Substance Use Onset Age",
"condicion_ocupacional_corr_Employed"="Emp.Status-Employed",
"condicion_ocupacional_corr_Inactive"="Emp.Status-Inactive",
"condicion_ocupacional_corr_Looking for a job for the first time"="Emp.Status-Looking 1st job",
"condicion_ocupacional_corr_No activity"="Emp.Status- No activity",
"condicion_ocupacional_corr_Not seeking for work"="Emp.Status- Not seeking work",
"condicion_ocupacional_corr_Unemployed"="Emp.Status- Unemployed",
"tenencia_de_la_vivienda_mod_Illegal Settlement"="Ten.Stat.Houshld-Ill stlmnt",
"tenencia_de_la_vivienda_mod_Others"="Ten.Stat.Houshld-Other",
"tenencia_de_la_vivienda_mod_Owner/Transferred dwellings/Pays Dividends"="Ten.Stat.Houshld-Owner",
"tenencia_de_la_vivienda_mod_Renting"="Ten.Stat.Houshld-Renting",
"tenencia_de_la_vivienda_mod_Stays temporarily with a relative"="Ten.Stat.Houshld-Temporary stlmnt",
"rn"="Treatment")
var.names<-data.table(data.frame(unlist(var_names)),keep.rownames = T) %>% janitor::clean_names()
#GENERACION DE COMPARACIONES ENTRE GRUPOS, UNA VEZ PAREADO Y DESPUÉS
pre_matched_matched<-
data.table::data.table(bal1_nomatch$Balance[,1:6],keep.rownames = T) %>%
dplyr::arrange(-abs(Diff.Un)) %>%
dplyr::left_join(data.table::data.table(bal1_match$Balance[,2:6],keep.rownames = T),by="rn") %>%
dplyr::left_join(var.names,by="rn") %>%
dplyr::select(unlist_var_names,everything()) %>%
dplyr::select(-rn)
pre_matched_matched%>%
knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
caption = paste0("Table 5a. Covariate Balance in the Variables of Interest"),
col.names = c("Variables","Nature of Variables", "SMDs","Threshold","Variance Ratios","Threshold","KS","SMDs","Threshold","Variance Ratios","Threshold","KS"),
align =c('l',rep('c', 101))) %>%
add_header_above(c(" "," ","Unadjusted" = 5, "Adjusted" = 5)) %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 9) %>%
kableExtra::add_footnote( c(paste("Note.",paste0("Unadjusted (n=",dim(covs0_unmatch)[1] %>% format(big.mark=","),")"),";",paste0("Adjusted (n=",dim(covs0_match)[1] %>% format(big.mark=","),")"),";",paste0("Total pairs: ",length(c_id_1) %>% format(big.mark=",")))),
notation = "none") %>%
kableExtra::kable_classic() %>%
kableExtra::scroll_box(width = "100%", height = "375px")
Table 5a. Covariate Balance in the Variables of Interest
|
|
Unadjusted
|
Adjusted
|
Variables
|
Nature of Variables
|
SMDs
|
Threshold
|
Variance Ratios
|
Threshold
|
KS
|
SMDs
|
Threshold
|
Variance Ratios
|
Threshold
|
KS
|
Public Center
|
Binary
|
-1.18
|
Not Balanced, >0.1
|
|
|
0.51
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Emp.Status-Employed
|
Binary
|
-0.95
|
Not Balanced, >0.1
|
|
|
0.41
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Subs Use Freq-Daily
|
Binary
|
0.72
|
Not Balanced, >0.1
|
|
|
0.33
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Emp.Status- Unemployed
|
Binary
|
0.62
|
Not Balanced, >0.1
|
|
|
0.30
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Primary Subs. Adm.-Cocaine paste
|
Binary
|
0.61
|
Not Balanced, >0.1
|
|
|
0.29
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Drug Dependence diagnosis
|
Binary
|
0.59
|
Not Balanced, >0.1
|
|
|
0.22
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Subs Use Freq-2-3d/wk
|
Binary
|
-0.52
|
Not Balanced, >0.1
|
|
|
0.20
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Admission Motive-Assisted Referral
|
Binary
|
0.48
|
Not Balanced, >0.1
|
|
|
0.17
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Primary Subs. Adm.-Alcohol
|
Binary
|
-0.40
|
Not Balanced, >0.1
|
|
|
0.17
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Emp.Status- No activity
|
Binary
|
0.38
|
Not Balanced, >0.1
|
|
|
0.11
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
First substance used-Alcohol
|
Binary
|
-0.36
|
Not Balanced, >0.1
|
|
|
0.18
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
>1 treatment
|
Binary
|
0.33
|
Not Balanced, >0.1
|
|
|
0.14
|
0.22
|
Not Balanced, >0.1
|
|
|
0.10
|
ICD-10-Wo/Psych Comorbidity
|
Binary
|
-0.32
|
Not Balanced, >0.1
|
|
|
0.15
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Treatments (#)
|
Contin.
|
0.31
|
Not Balanced, >0.1
|
1.91
|
Balanced, <2
|
0.14
|
0.20
|
Not Balanced, >0.1
|
1.46
|
Balanced, <2
|
0.10
|
Marital Status-Married/Shared liv. arr.
|
Binary
|
-0.30
|
Not Balanced, >0.1
|
|
|
0.13
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Admission Date
|
Contin.
|
-0.29
|
Not Balanced, >0.1
|
1.00
|
Balanced, <2
|
0.14
|
-0.17
|
Not Balanced, >0.1
|
0.95
|
Balanced, <2
|
0.09
|
Marital Status-Single
|
Binary
|
0.28
|
Not Balanced, >0.1
|
|
|
0.14
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Admission Motive-Spontaneous
|
Binary
|
-0.27
|
Not Balanced, >0.1
|
|
|
0.13
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Subs Use Freq-1d/wk or more
|
Binary
|
-0.25
|
Not Balanced, >0.1
|
|
|
0.05
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
ICD-10-W/Psych Comorbidity
|
Binary
|
0.23
|
Not Balanced, >0.1
|
|
|
0.11
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
First substance used-Marijuana
|
Binary
|
0.23
|
Not Balanced, >0.1
|
|
|
0.10
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Region-Arica(15)
|
Binary
|
0.21
|
Not Balanced, >0.1
|
|
|
0.04
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Primary Subs. Adm.-Marijuana
|
Binary
|
-0.20
|
Not Balanced, >0.1
|
|
|
0.04
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
First substance used-Cocaine paste
|
Binary
|
0.20
|
Not Balanced, >0.1
|
|
|
0.07
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Primary Subs. Adm.-Cocaine hydrochloride
|
Binary
|
-0.19
|
Not Balanced, >0.1
|
|
|
0.07
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Subs Use Freq-Less 1d/wk
|
Binary
|
-0.19
|
Not Balanced, >0.1
|
|
|
0.03
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Admission Age
|
Contin.
|
-0.19
|
Not Balanced, >0.1
|
0.84
|
Balanced, <2
|
0.07
|
0.04
|
Balanced, <0.1
|
1.01
|
Balanced, <2
|
0.03
|
Region-Tarapacá(01)
|
Binary
|
0.16
|
Not Balanced, >0.1
|
|
|
0.03
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Sex-Women
|
Binary
|
0.15
|
Not Balanced, >0.1
|
|
|
0.07
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Emp.Status- Not seeking work
|
Binary
|
0.15
|
Not Balanced, >0.1
|
|
|
0.02
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Ten.Stat.Houshld-Renting
|
Binary
|
-0.14
|
Not Balanced, >0.1
|
|
|
0.05
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Admission Motive-Justice Sector
|
Binary
|
-0.13
|
Not Balanced, >0.1
|
|
|
0.04
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Ten.Stat.Houshld-Ill stlmnt
|
Binary
|
0.13
|
Not Balanced, >0.1
|
|
|
0.02
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Educational Attainment-PS or less
|
Binary
|
0.12
|
Not Balanced, >0.1
|
|
|
0.06
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Region-Araucanía(09)
|
Binary
|
-0.12
|
Not Balanced, >0.1
|
|
|
0.02
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Region-Magallanes(12)
|
Binary
|
-0.12
|
Not Balanced, >0.1
|
|
|
0.01
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Subs Use Freq-4-6d/wk
|
Binary
|
-0.11
|
Not Balanced, >0.1
|
|
|
0.04
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Region-Antofagasta(02)
|
Binary
|
0.11
|
Not Balanced, >0.1
|
|
|
0.02
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Region-Coquimbo(04)
|
Binary
|
-0.10
|
Not Balanced, >0.1
|
|
|
0.02
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Substance Use Onset Age
|
Contin.
|
-0.09
|
Balanced, <0.1
|
0.91
|
Balanced, <2
|
0.07
|
0.00
|
Balanced, <0.1
|
1.00
|
Balanced, <2
|
0.01
|
Region-Aysén(11)
|
Binary
|
-0.09
|
Balanced, <0.1
|
|
|
0.01
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Region-Ñuble(16)
|
Binary
|
-0.09
|
Balanced, <0.1
|
|
|
0.01
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
ICD-10-Dg. Unknown/under study
|
Binary
|
0.08
|
Balanced, <0.1
|
|
|
0.03
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Subs Use Freq-Did not use
|
Binary
|
-0.08
|
Balanced, <0.1
|
|
|
0.01
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Educational Attainment-HS or less
|
Binary
|
-0.06
|
Balanced, <0.1
|
|
|
0.03
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Educational Attainment-More than HS
|
Binary
|
-0.06
|
Balanced, <0.1
|
|
|
0.02
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Region- Biobío(08)
|
Binary
|
-0.06
|
Balanced, <0.1
|
|
|
0.02
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Region-Valparaíso(05)
|
Binary
|
0.06
|
Balanced, <0.1
|
|
|
0.02
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Region-Los Lagos(10)
|
Binary
|
-0.04
|
Balanced, <0.1
|
|
|
0.01
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Ten.Stat.Houshld-Temporary stlmnt
|
Binary
|
0.04
|
Balanced, <0.1
|
|
|
0.02
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Emp.Status-Inactive
|
Binary
|
-0.04
|
Balanced, <0.1
|
|
|
0.01
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Admission Motive-Other
|
Binary
|
0.03
|
Balanced, <0.1
|
|
|
0.01
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Region-Atacama(03)
|
Binary
|
-0.03
|
Balanced, <0.1
|
|
|
0.00
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Region-Maule(07)
|
Binary
|
-0.03
|
Balanced, <0.1
|
|
|
0.01
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Region-O’Higgins(06)
|
Binary
|
-0.03
|
Balanced, <0.1
|
|
|
0.01
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Ten.Stat.Houshld-Other
|
Binary
|
0.03
|
Balanced, <0.1
|
|
|
0.01
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Admission Motive-Health Sector
|
Binary
|
-0.02
|
Balanced, <0.1
|
|
|
0.01
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Primary Subs. Adm.-Other
|
Binary
|
-0.02
|
Balanced, <0.1
|
|
|
0.00
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Marital Status-Widower
|
Binary
|
-0.02
|
Balanced, <0.1
|
|
|
0.00
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Emp.Status-Looking 1st job
|
Binary
|
-0.02
|
Balanced, <0.1
|
|
|
0.00
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
First substance used-Other
|
Binary
|
0.01
|
Balanced, <0.1
|
|
|
0.00
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Marital Status-Separated/Divorced
|
Binary
|
-0.01
|
Balanced, <0.1
|
|
|
0.00
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Region-Los Ríos(14)
|
Binary
|
-0.01
|
Balanced, <0.1
|
|
|
0.00
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Region-Metropolitana(13)
|
Binary
|
-0.01
|
Balanced, <0.1
|
|
|
0.01
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Ten.Stat.Houshld-Owner
|
Binary
|
0.01
|
Balanced, <0.1
|
|
|
0.01
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
First substance used-Cocaine hydrochloride
|
Binary
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
0.00
|
Balanced, <0.1
|
|
|
0.00
|
Note. Unadjusted (n=84,741) ; Adjusted (n=22,452) ; Total
pairs: 11,226
|
attr(d_match$edad_al_ing,"label")<-"Admission Age"
attr(d_match$motivodeegreso_mod_imp,"label")<-"Cause of discharge"
tab2<-
CreateTableOne(vars = match.on.sel, strata = "tipo_de_plan_res",
data = d_match, factorVars = catVars, smd=T)
kableone <- function(x, caption=NULL, col.names=NA, smd=T, test=T, varLabels=T, noSpaces=T, printToggle=T, dropEqual=F, ...) {
capture.output(x <- print(x, smd=T, test=test, varLabels=varLabels,noSpaces=noSpaces, printToggle=printToggle, dropEqual=dropEqual, ...))
knitr::kable(x,format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption= caption, col.names= col.names)
}
kableone(tab2,
caption = paste0("Table 5b. Covariate Balance in the Variables of Interest"),
col.names= c("Ambulatory","Residential", "p-value", "class","SMD"),
nonnormal= c("edad_ini_cons","edad_al_ing","fech_ing_num")#"\\hline",
) %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover","condensed"),font_size= 10) %>%
#()
row_spec(1, bold = T, italic =T,color ="black",hline_after=T,extra_latex_after="\\arrayrulecolor{white}",font_size= 10) %>%
kableExtra::kable_classic() %>%
scroll_box(width = "100%", height = "400px")
Table 5b. Covariate Balance in the Variables of Interest
|
Ambulatory
|
Residential
|
p-value
|
class
|
SMD
|
n
|
11226
|
11226
|
|
|
|
Primary Substance at Admission (%)
|
|
|
1.000
|
|
<0.001
|
Alcohol
|
2321 (20.7)
|
2321 (20.7)
|
|
|
|
Cocaine hydrochloride
|
1602 (14.3)
|
1602 (14.3)
|
|
|
|
Cocaine paste
|
6716 (59.8)
|
6716 (59.8)
|
|
|
|
Marijuana
|
391 (3.5)
|
391 (3.5)
|
|
|
|
Other
|
196 (1.7)
|
196 (1.7)
|
|
|
|
First Substance Used (%)
|
|
|
1.000
|
|
<0.001
|
Alcohol
|
4702 (41.9)
|
4702 (41.9)
|
|
|
|
Cocaine hydrochloride
|
475 (4.2)
|
475 (4.2)
|
|
|
|
Cocaine paste
|
1843 (16.4)
|
1843 (16.4)
|
|
|
|
Marijuana
|
3920 (34.9)
|
3920 (34.9)
|
|
|
|
Other
|
286 (2.5)
|
286 (2.5)
|
|
|
|
Marital Status (%)
|
|
|
1.000
|
|
<0.001
|
Married/Shared living arrangements
|
2710 (24.1)
|
2710 (24.1)
|
|
|
|
Separated/Divorced
|
1192 (10.6)
|
1192 (10.6)
|
|
|
|
Single
|
7208 (64.2)
|
7208 (64.2)
|
|
|
|
Widower
|
116 (1.0)
|
116 (1.0)
|
|
|
|
Educational Attainment (%)
|
|
|
1.000
|
|
<0.001
|
3-Completed primary school or less
|
3877 (34.5)
|
3877 (34.5)
|
|
|
|
2-Completed high school or less
|
5488 (48.9)
|
5488 (48.9)
|
|
|
|
1-More than high school
|
1861 (16.6)
|
1861 (16.6)
|
|
|
|
Substance Use Onset Age (median [IQR])
|
15.00 [13.00, 17.00]
|
15.00 [13.00, 17.00]
|
0.964
|
nonnorm
|
<0.001
|
Primary Substance at Admission Usage Frequency (%)
|
|
|
1.000
|
|
<0.001
|
1 day a week or more
|
271 (2.4)
|
271 (2.4)
|
|
|
|
2 to 3 days a week
|
1324 (11.8)
|
1324 (11.8)
|
|
|
|
4 to 6 days a week
|
1591 (14.2)
|
1591 (14.2)
|
|
|
|
Daily
|
7824 (69.7)
|
7824 (69.7)
|
|
|
|
Did not use
|
84 (0.7)
|
84 (0.7)
|
|
|
|
Less than 1 day a week
|
132 (1.2)
|
132 (1.2)
|
|
|
|
Treatment Admission Motive (%)
|
|
|
1.000
|
|
<0.001
|
Spontaneous
|
4083 (36.4)
|
4083 (36.4)
|
|
|
|
Assisted Referral
|
2035 (18.1)
|
2035 (18.1)
|
|
|
|
Other
|
719 (6.4)
|
719 (6.4)
|
|
|
|
Justice Sector
|
781 (7.0)
|
781 (7.0)
|
|
|
|
Health Sector
|
3608 (32.1)
|
3608 (32.1)
|
|
|
|
Psychiatric Comorbidity (%)
|
|
|
1.000
|
|
<0.001
|
Without psychiatric comorbidity
|
2983 (26.6)
|
2983 (26.6)
|
|
|
|
Diagnosis unknown (under study)
|
2437 (21.7)
|
2437 (21.7)
|
|
|
|
With psychiatric comorbidity
|
5806 (51.7)
|
5806 (51.7)
|
|
|
|
Regional Location of Center (%)
|
|
|
1.000
|
|
<0.001
|
Antofagasta (02)
|
676 (6.0)
|
676 (6.0)
|
|
|
|
Araucanía (09)
|
159 (1.4)
|
159 (1.4)
|
|
|
|
Arica (15)
|
609 (5.4)
|
609 (5.4)
|
|
|
|
Atacama (03)
|
262 (2.3)
|
262 (2.3)
|
|
|
|
Aysén (11)
|
40 (0.4)
|
40 (0.4)
|
|
|
|
Biobío (08)
|
529 (4.7)
|
529 (4.7)
|
|
|
|
Coquimbo (04)
|
270 (2.4)
|
270 (2.4)
|
|
|
|
Los Lagos (10)
|
363 (3.2)
|
363 (3.2)
|
|
|
|
Los Ríos (14)
|
184 (1.6)
|
184 (1.6)
|
|
|
|
Magallanes (12)
|
31 (0.3)
|
31 (0.3)
|
|
|
|
Maule (07)
|
494 (4.4)
|
494 (4.4)
|
|
|
|
Metropolitana (13)
|
5523 (49.2)
|
5523 (49.2)
|
|
|
|
Ñuble (16)
|
20 (0.2)
|
20 (0.2)
|
|
|
|
O’Higgins (06)
|
506 (4.5)
|
506 (4.5)
|
|
|
|
Tarapacá (01)
|
414 (3.7)
|
414 (3.7)
|
|
|
|
Valparaíso (05)
|
1146 (10.2)
|
1146 (10.2)
|
|
|
|
Drug Dependence Diagnosis = TRUE (%)
|
10203 (90.9)
|
10203 (90.9)
|
1.000
|
|
<0.001
|
Public Center = TRUE (%)
|
3615 (32.2)
|
3615 (32.2)
|
1.000
|
|
<0.001
|
Sexo Usuario/Sex of User = Women (%)
|
3475 (31.0)
|
3475 (31.0)
|
1.000
|
|
<0.001
|
Admission Age (median [IQR])
|
32.11 [26.07, 40.42]
|
32.87 [26.51, 41.14]
|
<0.001
|
nonnorm
|
0.042
|
Fecha de Ingreso a Tratamiento (Numérico)(c)/Date of Admission to
Treatment (Numeric)(c) (median [IQR])
|
16365.00 [15469.00, 17259.00]
|
16128.00 [15323.00, 16988.00]
|
<0.001
|
nonnorm
|
0.171
|
Tenure status of household (%)
|
|
|
1.000
|
|
<0.001
|
Illegal Settlement
|
195 (1.7)
|
195 (1.7)
|
|
|
|
Others
|
347 (3.1)
|
347 (3.1)
|
|
|
|
Owner/Transferred dwellings/Pays Dividends
|
4113 (36.6)
|
4113 (36.6)
|
|
|
|
Renting
|
1530 (13.6)
|
1530 (13.6)
|
|
|
|
Stays temporarily with a relative
|
5041 (44.9)
|
5041 (44.9)
|
|
|
|
Employment Status (%)
|
|
|
1.000
|
|
<0.001
|
Employed
|
1765 (15.7)
|
1765 (15.7)
|
|
|
|
Inactive
|
1164 (10.4)
|
1164 (10.4)
|
|
|
|
Looking for a job for the first time
|
21 (0.2)
|
21 (0.2)
|
|
|
|
No activity
|
1372 (12.2)
|
1372 (12.2)
|
|
|
|
Not seeking for work
|
269 (2.4)
|
269 (2.4)
|
|
|
|
Unemployed
|
6635 (59.1)
|
6635 (59.1)
|
|
|
|
#covs0_match_disc<-data.table::data.table(covs0_match_disc)
covs_dum_unmatch <- subset(CONS_SEP_match_dum,select= -c(row, hash_key, evaluacindelprocesoteraputico, abandono_temprano_rec,dup,duplicates_filtered,more_one_treat,n_hash,motivodeegreso_mod_imp,via_adm_sus_prin_act,edad_ini_sus_prin))#subset=,
covs_dum_match <- subset(CONS_SEP_match_dum[c(t_id_1, c_id_1)],select= -c(row, hash_key, evaluacindelprocesoteraputico, abandono_temprano_rec,dup,n_hash,motivodeegreso_mod_imp,duplicates_filtered,more_one_treat, via_adm_sus_prin_act,edad_ini_sus_prin))#subset=,
covs_dum_unmatch$tipo_de_plan_res<-ifelse(covs_dum_unmatch$tipo_de_plan_res==1,1,0)
covs_dum_match$tipo_de_plan_res<-ifelse(covs_dum_match$tipo_de_plan_res==1,1,0)
catVars<-
match.on_tot[!match.on_tot %in% c("row","hash_key","more_one_treat","duplicates_filtered","abandono_temprano_rec","tipo_de_plan_res","evaluacindelprocesoteraputico","edad_ini_cons","fech_ing_num","edad_al_ing","via_adm_sus_prin_act")]
#############################################
# Standardized differences before matching
#############################################
smd_internal<-function(var,db){
#db<-as.data.frame(db)
smd_vec<-round(abs(mean(unlist(get(db)[which(get(db)$tipo_de_plan_res_1==1),..var]),na.rm=F)- mean(unlist(get(db)[which(get(db)$tipo_de_plan_res_1==0),..var]),na.rm=F))/
sqrt(((sd(unlist(get(db)[which(get(db)$tipo_de_plan_res_1==1),..var]),na.rm=F))^2 + (sd(unlist(get(db)[which(get(db)$tipo_de_plan_res_1==0),..var]),na.rm=F))^2 )/2),2)
#return(assign(paste0(var,"_smd"),smd_vec,envir=.GlobalEnv))
return(print(smd_vec))
}
smd_df<-data.frame()
for (i in 1:length(covs_dum_unmatch)){
smd_df<-rbind(smd_df,cbind.data.frame(names(covs_dum_unmatch)[i],
smd_internal(names(covs_dum_unmatch)[i],"covs_dum_unmatch")))
}
smd_df<-
smd_df %>%
dplyr::rename("vars"= !!names(.[1]),"smd"= !!names(.[2])) %>%
dplyr::filter(vars!="tipo_de_plan_res_TRUE") %>%
dplyr::arrange(desc(smd)) %>%
dplyr::filter(!grepl("tipo_de_plan_res",vars))
##################################################
# Standardized differences after matching
##################################################
smd_df2<-data.frame()
for (i in 1:length(names(covs_dum_match))){
smd_df2<-rbind(smd_df2,cbind.data.frame(names(covs_dum_match)[i],
smd_internal(names(covs_dum_match)[i],"covs_dum_match")))
}
smd_df2<-
smd_df2 %>%
dplyr::rename("vars"= !!names(.[1]),"smd"= !!names(.[2])) %>%
dplyr::filter(vars!="tipo_de_plan_res_TRUE")%>%
dplyr::filter(!grepl("tipo_de_plan_res",vars))
smd_df_prev_match_after_match<-
smd_df %>%
dplyr::left_join(smd_df2,by="vars") %>%
dplyr::rename("Before\nMatching"= !!names(.[2]),"After\nMatching"= !!names(.[3])) %>%
melt() %>%
dplyr::left_join(var.names,by=c("vars"="rn")) %>%
dplyr::select(unlist_var_names,everything()) %>%
dplyr::filter(!vars %in% c("sexo_2_Men","dg_trs_cons_sus_or_FALSE","tipo_centro_pub_FALSE")) %>%
dplyr::mutate(unlist_var_names=dplyr::case_when(vars=="dg_trs_cons_sus_or_TRUE"~"Drug Dependence",
vars=="tipo_centro_pub_TRUE"~"Public Center",
T~unlist_var_names))
#############################################
# Standardized differences before matching
#############################################
smd_internal2<-function(var,db){
#db<-as.data.frame(db)
smd_vec<-round(abs(mean(unlist(get(db)[which(get(db)$tipo_de_plan_res==1),..var]),na.rm=F)- mean(unlist(get(db)[which(get(db)$tipo_de_plan_res==0),..var]),na.rm=F))/
sqrt(((sd(unlist(get(db)[which(get(db)$tipo_de_plan_res==1),..var]),na.rm=F))^2 + (sd(unlist(get(db)[which(get(db)$tipo_de_plan_res==0),..var]),na.rm=F))^2 )/2),2)
#return(assign(paste0(var,"_smd"),smd_vec,envir=.GlobalEnv))
return(print(smd_vec))
}
smd_df21<-data.frame()
for (i in 1:length(covs_dum_unmatch)){
smd_df21<-rbind(smd_df21,cbind.data.frame(names(covs_dum_unmatch)[i],
smd_internal2(names(covs_dum_unmatch)[i],"covs_dum_unmatch")))
}
smd_df21<-
smd_df21 %>%
dplyr::rename("vars"= !!names(.[1]),"smd"= !!names(.[2])) %>%
dplyr::filter(vars!="tipo_de_plan_res_TRUE") %>%
dplyr::arrange(desc(smd)) %>%
dplyr::filter(!grepl("tipo_de_plan_res",vars))
##################################################
# Standardized differences after matching
##################################################
smd_df22<-data.frame()
for (i in 1:length(names(covs_dum_match))){
smd_df22<-rbind(smd_df22,cbind.data.frame(names(covs_dum_match)[i],
smd_internal2(names(covs_dum_match)[i],"covs_dum_match")))
}
smd_df22<-
smd_df22 %>%
dplyr::rename("vars"= !!names(.[1]),"smd"= !!names(.[2])) %>%
dplyr::filter(vars!="tipo_de_plan_res_TRUE")%>%
dplyr::filter(!grepl("tipo_de_plan_res",vars))
smd_df_prev_match_after_match2<-
smd_df21 %>%
dplyr::left_join(smd_df22,by="vars") %>%
dplyr::rename("Before\nMatching"= !!names(.[2]),"After\nMatching"= !!names(.[3])) %>%
melt() %>%
dplyr::left_join(var.names,by=c("vars"="rn")) %>%
dplyr::select(unlist_var_names,everything()) %>%
dplyr::filter(!vars %in% c("sexo_2_Men","dg_trs_cons_sus_or_FALSE","tipo_centro_pub_FALSE")) %>%
dplyr::mutate(unlist_var_names=dplyr::case_when(vars=="dg_trs_cons_sus_or_TRUE"~"Drug Dependence",
vars=="tipo_centro_pub_TRUE"~"Public Center",
T~unlist_var_names))
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
wrap.it <- function(x, len)
{
sapply(x, function(y) paste(strwrap(y, len),
collapse = "\t"),
USE.NAMES = FALSE)
}
# Call this function with a list or vector
wrap.labels <- function(x, len)
{
if (is.list(x))
{
lapply(x, wrap.it, len)
} else {
wrap.it(x, len)
}
}
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
# Data set with stand. diff. before post matching
smd_df_prev_match_after_match<-
if(!is.nan(smd_df_prev_match_after_match$value)[15]){
smd_df_prev_match_after_match
} else{
smd_df_prev_match_after_match2
}
var_names_2022<-
list(
"sus_principal_mod_Alcohol"= "Primary Subs. at Adm.~-~Alcohol",
"sus_principal_mod_Cocaine hydrochloride"= "Primary Subs. at Adm.~-~Cocaine hydrochloride",
"sus_principal_mod_Cocaine paste"="Primary Subs. at Adm.~-~Cocaine paste",
"sus_principal_mod_Marijuana"="Primary Subs. at Adm.~-~Marijuana",
"sus_principal_mod_Other"="Primary Subs. at Adm.~-~Other",
"origen_ingreso_mod_Spontaneous"="Admission Motive~-~Spontaneous",
"origen_ingreso_mod_Assisted Referral"= "Admission Motive~-~Assisted referral",
"origen_ingreso_mod_Other"="Admission Motive~-~Other",
"origen_ingreso_mod_Justice Sector"= "Admission Motive~-~Justice sector",
"origen_ingreso_mod_Health Sector"="Admission Motive~-~Health sector",
"dg_cie_10_rec_Without psychiatric comorbidity"="Psychiatric comorbidity~-~Wo/Psych comorbidity",
"dg_cie_10_rec_Diagnosis unknown (under study)"="Psychiatric comorbidity~-~Unknown/under study",
"dg_cie_10_rec_With psychiatric comorbidity"="Psychiatric comorbidity~-~W/Psych comorbidity",
"sexo_2_Women"="Sex~-~Women",
"edad_al_ing"="Admission Age~-~ ",
"fech_ing_num"="Admission Date~-~ ",
"duplicates_filtered"="Treatments (#)",
"more_one_treat"=">1 treatment",
"sus_ini_mod_mvv_Alcohol"= "First substance used~-~Alcohol",
"sus_ini_mod_mvv_Cocaine hydrochloride"= "First substance used~-~Cocaine hydrochloride",
"sus_ini_mod_mvv_Cocaine paste"="First substance used~-~Cocaine paste",
"sus_ini_mod_mvv_Marijuana"="First substance used~-~Marijuana",
"sus_ini_mod_mvv_Other"="First substance used~-~Other",
"estado_conyugal_2_Married/Shared living arrangements"="Marital Status~-~Married/Shared liv. arr.",
"estado_conyugal_2_Separated/Divorced"="Marital Status~-~Separated/Divorced",
"estado_conyugal_2_Single"= "Marital Status~-~Single",
"estado_conyugal_2_Widower"="Marital Status~-~Widower",
"escolaridad_rec_3-Completed primary school or less"="Educational Attainment~-~Primary sch. or less",
"escolaridad_rec_2-Completed high school or less"="Educational Attainment~-~High sch. or less",
"escolaridad_rec_1-More than high school"="Educational Attainment~-~More than high sch.",
"freq_cons_sus_prin_1 day a week or more"="Subs. Use Freq.~-~1d/wk or more",
"freq_cons_sus_prin_2 to 3 days a week"="Subs. Use Freq.~-~2 to 3d/wk",
"freq_cons_sus_prin_4 to 6 days a week"="Subs. Use Freq.~-~4 to 6d/wk",
"freq_cons_sus_prin_Daily"="Subs. Use Freq.~-~Daily",
"freq_cons_sus_prin_Did not use"="Subs. Use Freq.~-~Did not use",
"freq_cons_sus_prin_Less than 1 day a week"="Subs. Use Freq.~-~Less 1d/wk",
"nombre_region_Antofagasta (02)"="Region~-~Antofagasta(02)",
"nombre_region_Araucanía (09)"="Region~-~Araucania(09)",
"nombre_region_Arica (15)"="Region~-~Arica(15)",
"nombre_region_Atacama (03)"="Region~-~Atacama(03)",
"nombre_region_Aysén (11)"="Region~-~Aysen(11)",
"nombre_region_Biobío (08)"="Region~-~Biobio(08)",
"nombre_region_Coquimbo (04)"="Region~-~Coquimbo(04)",
"nombre_region_Los Lagos (10)"="Region~-~Los Lagos(10)",
"nombre_region_Los Ríos (14)"="Region~-~Los Rios(14)",
"nombre_region_Magallanes (12)"="Region~-~Magallanes(12)",
"nombre_region_Maule (07)"="Region~-~Maule(07)",
"nombre_region_Metropolitana (13)"="Region~-~Metropolitana(13)",
"nombre_region_Ñuble (16)"="Region~-~Nuble(16)",
"nombre_region_O'Higgins (06)"="Region~-~O'Higgins(06)",
"nombre_region_Tarapacá (01)"="Region~-~Tarapaca(01)",
"nombre_region_Valparaíso (05)"="Region~-~Valparaiso(05)",
"tipo_centro_pub"="Type of Center~-~Public Center",
"dg_trs_cons_sus_or"= "Drug Dependence diagnosis~-~TRUE",
"edad_ini_cons"="Age of Onset of Drug Use~-~ ",
"condicion_ocupacional_corr_Employed"="Emp.Status~-~Employed",
"condicion_ocupacional_corr_Inactive"="Emp.Status~-~Inactive",
"condicion_ocupacional_corr_Looking for a job for the first time"="Emp.Status~-~Looking 1st job",
"condicion_ocupacional_corr_No activity"="Emp.Status~-~No activity",
"condicion_ocupacional_corr_Not seeking for work"="Emp.Status~-~Not seeking work",
"condicion_ocupacional_corr_Unemployed"="Emp.Status~-~Unemployed",
"tenencia_de_la_vivienda_mod_Illegal Settlement"="Tenure Status of Household~-~Illegal settlement",
"tenencia_de_la_vivienda_mod_Others"="Tenure Status of Household~-~Other",
"tenencia_de_la_vivienda_mod_Owner/Transferred dwellings/Pays Dividends"="Tenure Status of Household~-~Owner",
"tenencia_de_la_vivienda_mod_Renting"="Tenure Status of Household~-~Renting",
"tenencia_de_la_vivienda_mod_Stays temporarily with a relative"="Tenure Status of Household~-~Temporary settlement",
"rn"="Treatment")
# tipo_centro_pub_TRUE
# dg_trs_cons_sus_or_TRUE
loveplot_bal3 <-
# Generate plot
smd_df_prev_match_after_match %>%
dplyr::mutate(vars=dplyr::case_when(vars=="dg_trs_cons_sus_or_TRUE"~"dg_trs_cons_sus_or",vars=="tipo_centro_pub_TRUE"~"tipo_centro_pub",T~vars)) %>%
dplyr::left_join(tibble::rownames_to_column(data.frame(unlist(var_names_2022)),'vars'), by="vars") %>%
dplyr::rename("unlist_var_nms_22"="unlist.var_names_2022.") %>%
dplyr::mutate(unlist_var_nms_22= sub("Freq Drug Cons","Primary Substance Usage Freq.",unlist_var_nms_22)) %>%
dplyr::mutate(unlist_var_nms_22= sub("Motive Admission","Treatment Admission Motive",unlist_var_nms_22)) %>%
#dplyr::mutate(unlist_var_nms_22= sub("Cocaine hydrochloride","Snort cocaine",unlist_var_nms_22)) %>%
dplyr::mutate(unlist_var_nms_22= sub("Age of Onset of Drug Use","Substance Use Onset Age",unlist_var_nms_22))%>%
#2022
dplyr::mutate(unlist_var_nms_22= sub("Sex","Gender",unlist_var_nms_22)) %>%
dplyr::mutate(unlist_var_nms_22= sub("Occ.Status","Employment Status",unlist_var_nms_22)) %>%
dplyr::mutate(unlist_var_nms_22= sub("Age at Admission","Admission Age",unlist_var_nms_22)) %>%
tidyr::separate(unlist_var_nms_22,c("var","cat"), sep="~-~") %>%# View()
dplyr::mutate(unlist_var_nms_22=paste0(cat,"~-~",var)) %>%
# dplyr::mutate(unlist_var_nms_22= wrap.labels(unlist_var_nms_22, 25)) %>%
# dplyr::mutate(unlist_var_names=
# dplyr::case_when(unlist_var_names)) %>%
ggplot(aes(x = reorder(unlist_var_nms_22, value), y = value,
group = variable, color = variable)) +
geom_hline(yintercept = 0.2, color = "black", size = 0.2, linetype="dashed") +
coord_flip() +
theme_classic() +
theme(legend.key = element_blank()) +
labs(y = "Average absolute standardized differences in means",x = "",color="",shape="") +
scale_y_continuous(limits = c(0,max(smd_df_prev_match_after_match$value+.05)), breaks = seq(0,1.5,by=.1)) +
scale_color_manual(values=c("gray20","black")) +
geom_point(aes(shape=variable),size=4) +
scale_shape_manual(values=c(8,16)) +
theme(panel.spacing.x = unit(0,"line")) +
theme(text = element_text(size=13),
legend.background=element_blank(),
legend.key = element_rect(colour = "transparent", fill = "transparent"),
legend.position = c(max(smd_df_prev_match_after_match$value)*.65,.5))+
#theme(axis.text.x = ggtext::element_markdown())+
guides(y = ggh4x::guide_axis_nested(delim = "~-~", order=1))
loveplot_bal3
if(no_mostrar==1){
jpeg(paste0(gsub("SUD_CL.*","",path),"_mult_state_ags/loveplot_bal32_2022.jpg"), height=8, width= 8, res= 500, units = "in")
loveplot_bal3
dev.off()
pdf(paste0(gsub("SUD_CL.*","",path),"_mult_state_ags/loveplot_bal32_2022.pdf"), height=8, width= 8)
loveplot_bal3
dev.off()
}
We allowed to tolerate fech_ing_num (SMD=0.18), because the date of
admission not necessarily had to be strictly balanced, assuming that not
every user had to be admitted to treatment in exact dates.
Multistate
# dplyr::filter(motivodeegreso_mod_imp!="En curso")%>% #Sacar los tratamientos que estén en curso
tab1_lab<- paste0('Original C1 Dataset \n(n = ', formatC(nrow(CONS_C1), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1%>% dplyr::distinct(HASH_KEY)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab2_lab<- paste0('C1 Dataset \n(n = ', formatC(nrow(CONS_C1_df_dup_SEP_2020), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab1_5_lab<- paste0('• Duplicated entries\\l • Overlapping treatments of users\\l • Intermediate events of treatment (continuous referrals)')
tab4_lab<- paste0('Imputed C1 Dataset \n(n = ', formatC(nrow(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab3_5_lab<- paste0('C1 Dataset \n(n = ', formatC(nrow(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_descartados), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_descartados%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab6_lab<- paste0('C1 Matched Sample\nin Treatment Setting \n(n = ', formatC(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados %>%
dplyr::filter(hash_key %in% unlist(unique(d_match$hash_key))) %>% nrow(), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados %>%
dplyr::filter(hash_key %in% unlist(unique(d_match$hash_key))) %>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')
lab_tab<- paste0(" Result of the matching on treatment setting\nNo. of treatments: ",table(table(t_id_1)) %>% formatC(big.mark = ","),"; No. of controls: ",table(table(c_id_1))%>% formatC(big.mark = ","))
#https://stackoverflow.com/questions/46750364/diagrammer-and-graphviz
#https://mikeyharper.uk/flowcharts-in-r-using-diagrammer/
#http://blog.nguyenvq.com/blog/2012/05/29/better-decision-tree-graphics-for-rpart-via-party-and-partykit/
#http://blog.nguyenvq.com/blog/2014/01/17/skeleton-to-create-fast-automatic-tree-diagrams-using-r-and-graphviz/
#https://cran.r-project.org/web/packages/DiagrammeR/vignettes/graphviz-mermaid.html
#https://stackoverflow.com/questions/39133058/how-to-use-graphviz-graphs-in-diagrammer-for-r
#https://subscription.packtpub.com/book/big_data_and_business_intelligence/9781789802566/1/ch01lvl1sec21/creating-diagrams-via-the-diagrammer-package
#https://justlegal.be/2019/05/using-flowcharts-to-display-legal-procedures/
# paste0("No. of treatments: ",table(table(t_id_1)) %>% formatC(big.mark = ","),"; No. of controls: ",table(table(c_id_1))%>% formatC(big.mark = ","))
#
library(DiagrammeR) #⋉
grViz("digraph flowchart {
# node definitions with substituted label text
node [fontname = Times, shape = rectangle,fontsize = 9]
tab1 [label = '@@1']
tab2 [label = '@@2']
tab3 [label = '•Duplicated entries\\l•Intermediate events of treatment (continuous referrals)\\l',fontsize = 7]
tab4 [label = '@@4']
blank [label = '', width = 0.0001, height = 0.0001]
blank2 [label = '', width = 0.0001, height = 0.0001]
blank3 [label = '', width = 0.0001, height = 0.0001]
tab5 [label = '•Logically Inconsistent candidates for imputation\\l•Ties in candidates for imputation\\l',fontsize = 7]
tab6 [label= '@@6']
tab7 [label = '•Matching pairs based on balance of covariates at basline,\\l•Pairs 1:1\\l',fontsize = 7]
# edge definitions with the node IDs
tab1 -> blank [arrowhead = none,label=' Data wrangling and normalization process',fontsize = 8];
blank -> tab3
blank -> tab2
tab2 -> blank2 [arrowhead = none];
blank2 -> tab5
blank2 -> tab4 [label=' Result of the imputation of missing values',fontsize = 8];
tab4 -> blank3 [arrowhead= none];
blank3-> tab7
blank3 -> tab6 [label='@@7',fontsize = 8];
subgraph {
rank = same; tab3; blank;
}
subgraph {
rank = same; tab5; blank2;
}
subgraph {
rank = same; tab7; blank3;
}
}
[1]: tab1_lab
[2]: tab2_lab
[3]: tab1_5_lab
[4]: tab4_lab
[5]: ''
[6]: tab6_lab
[7]: lab_tab
")
# {rank=same; 'tab2'' -> tab3 [label='',fontsize = 11]}; #⋉
#CONS_C1_df_dup_SEP_2020_irrs_health
d_match_surv<-
CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados %>%
dplyr::filter(hash_key %in% unlist(CONS_C1_df_dup_SEP_2020_irrs_health[,"hash_key"])) %>%
dplyr::left_join(CONS_C1_df_dup_SEP_2020[c("row","event","person_days","person_years","diff_bet_treat", "fech_ing_next_treat")],by="row") %>%
#browse hash_key duplicates_filtered2 cum_diff_bet_treat dup2_real diff_bet_treat if hash_key=="0737aacbb7efdd418f7a37ce3386ce5e"|hash_key=="07668f2d3e4f6beb7975e43ee96eac80"
dplyr::mutate(res_drop_out=dplyr::case_when(
tipo_de_plan_res==1 & abandono_temprano_rec==T ~1,
T~0)) %>%
dplyr::mutate(res_drop_out=factor(res_drop_out)) %>%
dplyr::mutate(status_censorship=dplyr::case_when(
motivodeegreso_mod_imp=="Ongoing treatment" ~1,
T~0)) %>%
dplyr::mutate(tr_completion=factor(dplyr::case_when(
motivodeegreso_mod_imp=="Therapeutic discharge" ~1,
motivodeegreso_mod_imp=="Ongoing treatment" ~0,
T~2),labels=c("Ongoing treatment", "Completion","Non-completion"))) %>%
dplyr::mutate(n_hash=as.numeric(factor(hash_key, levels=unique(hash_key)))) %>%
dplyr::mutate(min_achievement=factor(dplyr::case_when(
evaluacindelprocesoteraputico=="3-Minimum Achievement" ~1,
is.na(evaluacindelprocesoteraputico) ~0,
T~2),labels=c("Ongoing treatment", "Minimum achievement","High/Medium achievement"))) %>%
dplyr::mutate(n_hash=as.numeric(factor(hash_key, levels=unique(hash_key)))) %>%
dplyr::arrange(hash_key, fech_ing_num) %>%
dplyr::select(n_hash,hash_key,everything()) %>%
#ids_matched_filter
dplyr::left_join(ids_matched_rows, by=c("row")) %>%
dplyr::mutate(group_match=ifelse(!is.na(group),group,NA)) %>%
dplyr::select(-rn_id,-group) %>% #glimpse()
dplyr::rename("row_c"="row_2") %>%
dplyr::left_join(ids_matched_rows, by=c("row"="row_2")) %>%
dplyr::mutate(t_id_1=ifelse(!is.na(t_id_1.x),t_id_1.x,t_id_1.y)) %>%
dplyr::mutate(c_id_1=ifelse(!is.na(c_id_1.x),c_id_1.x,c_id_1.y)) %>%
dplyr::mutate(row_c=ifelse(!is.na(row_c),row_c,row.y)) %>%
dplyr::mutate(group_match=ifelse(!is.na(group),group,group_match)) %>%
#2021-05-04, cambié el nombre del agrupador y rellené las columnas matcheadas para los tratamientos posteriores no matcheados
dplyr::group_by(n_hash) %>%
tidyr::fill(group_match, .direction="updown") %>%
tidyr::fill(t_id_1, .direction="updown") %>%
tidyr::fill(c_id_1, .direction="updown") %>%
ungroup() %>%
dplyr::select(-t_id_1.x,-c_id_1.x,-t_id_1.y,-c_id_1.y,-group,-row.y,-rn_id) %>%
dplyr::rename("id"="n_hash")
#%>% #glimpse() ids_matched_rows ids_matched_filter
#dplyr::left_join(ids_matched, by=c("row"="t_id_1")) %>%
#dplyr::mutate(group_match=ifelse(!is.na(group),group,NA)) %>%
#dplyr::select(-c_id_1,-group) %>%
#dplyr::left_join(ids_matched, by=c("row"="c_id_1")) %>%
#dplyr::mutate(group_match=ifelse(is.na(group_match),group,group_match)) %>%
#dplyr::filter(!is.na(group_match))
#dplyr::select(-t_id_1,-group)
if (
d_match_surv %>%
dplyr::group_by(row) %>%
dplyr::mutate(n_row=n()) %>%
dplyr::ungroup() %>%
dplyr::filter(n_row>1) %>% nrow()>1){stop("Duplicated rows")}
attr(d_match_surv$res_drop_out,"label") <- "Early Drop-out & Residential Setting (=1)"
attr(d_match_surv$status_censorship,"label") <- "Ongoing treatment"
attr(d_match_surv$origen_ingreso_mod,"label") <- "Admission Motive"
attr(d_match_surv$edad_al_ing,"label") <-"Admission Age"
attr(d_match_surv$duplicates_filtered,"label") <- "Treatments by User (#)"
attr(d_match_surv$more_one_treat,"label") <- "More than one treatment"
attr(d_match_surv$sus_ini_mod_mvv,"label")<-"First Substance Used"
attr(d_match_surv$estado_conyugal_2,"label")<-"Marital Status"
attr(d_match_surv$escolaridad_rec,"label")<-"Educational Attainment"
attr(d_match_surv$edad_ini_cons,"label")<-"Substance Use Onset Age"
attr(d_match_surv$freq_cons_sus_prin,"label")<-"Primary Substance at Admission Usage Frequency"
attr(d_match_surv$nombre_region,"label")<-"Regional Location of Center"
attr(d_match_surv$dg_cie_10_rec,"label")<-"Psychiatric Comorbidity"
attr(d_match_surv$dg_trs_cons_sus_or,"label")<-"Drug Dependence diagnosis"
attr(d_match_surv$min_achievement,"label")<-"Minimum Achievement in the Therapeutic Process"
attr(d_match_surv$abandono_temprano_rec,"label")<-"Treatment Length (>90)"
attr(d_match_surv$dias_treat_imp_sin_na,"label")<-"Days of Treatment (missing dates of discharge were replaced with difference from 2019-11-13)"
attr(d_match_surv$tipo_de_plan_res,"label")<-"Residential Setting"
attr(d_match_surv$tipo_centro_pub,"label")<-"Public Center"
attr(d_match_surv$condicion_ocupacional_corr,"label")<-"Occupational Status"
attr(d_match_surv$event,"label")<-"Users with Posterior Treatments (=1)"
attr(d_match_surv$person_days,"label")<-"User's Days available in the system for the study"
attr(d_match_surv$person_years,"label")<-"User's Years available in the system for the study"
attr(d_match_surv$motivodeegreso_mod_imp,"label")<-"Cause of Discharge"
attr(d_match_surv$diff_bet_treat,"label")<-"Days of difference between the Next Treatment"
attr(d_match_surv$group_match,"label")<-"Match Paired"
attr(d_match_surv$tr_completion,"label")<-"Treatment Successful Completion"
attr(d_match_surv$fech_ing_next_treat,"label") <- 'Date of Admission to Posterior Treatment (numeric)'
library(compareGroups)
table_surv <- compareGroups::compareGroups(tipo_de_plan_res ~ origen_ingreso_mod+ dg_cie_10_rec+ sexo_2+ edad_al_ing+ abandono_temprano_rec+ duplicates_filtered+ more_one_treat+ sus_ini_mod_mvv+ estado_conyugal_2+ escolaridad_rec+ freq_cons_sus_prin+ nombre_region+ tipo_centro_pub+ min_achievement+ dg_trs_cons_sus_or+ edad_ini_cons+ condicion_ocupacional_corr+ dias_treat_imp_sin_na+ event+ person_days+ person_years+ diff_bet_treat+ tr_completion+ motivodeegreso_mod_imp,
method= c(origen_ingreso_mod=3,
dg_cie_10_rec=3,
sexo_2=3,
edad_al_ing=2,
abandono_temprano_rec=2,
edad_al_ing=2,
duplicates_filtered=3,
more_one_treat=3,
sus_ini_mod_mvv=3,
estado_conyugal_2=3,
escolaridad_rec=3,
freq_cons_sus_prin=3,
nombre_region=3,
tipo_centro_pub=3,
min_achievement=3,
dg_trs_cons_sus_or=3,
edad_ini_cons=2,
condicion_ocupacional_corr=3,
dias_treat_imp_sin_na=2,
event=3,
person_days=2,
person_years= 2,
diff_bet_treat= 2,
tr_completion= 3,
motivodeegreso_mod_imp=3
),
data = d_match_surv,
include.miss = T,
var.equal=T
)
pvals <- getResults(table_surv)
#p.adjust(pvals, method = "BH")
restab_surv <- createTable(table_surv,show.p.overall = T)
compareGroups::export2md(restab_surv, size=13, first.strip=T, hide.no="no", position="center",col.names=c("Variables","Ambulatory","Residential", "Sig."),
format="html",caption= "Table 6. Summary descriptives table")%>%
kableExtra::row_spec(1,bold=T) %>%
kableExtra::add_footnote(c("Note. Variables of C1 dataset had to be standardized before comparison;", "Continuous variables are presented as Medians and Percentiles 25 and 75 were shown;", "Categorical variables are presented as number (%)"), notation = "none")%>%
kableExtra::kable_classic() %>%
kableExtra::scroll_box(width = "100%", height = "600px")
Table 6. Summary descriptives table
Variables
|
Ambulatory
|
Residential
|
Sig.
|
|
N=16895
|
N=14844
|
|
Admission Motive:
|
|
|
<0.001
|
Spontaneous
|
6932 (41.0%)
|
5501 (37.1%)
|
|
Assisted Referral
|
2906 (17.2%)
|
3064 (20.6%)
|
|
Other
|
951 (5.63%)
|
907 (6.11%)
|
|
Justice Sector
|
1199 (7.10%)
|
966 (6.51%)
|
|
Health Sector
|
4907 (29.0%)
|
4406 (29.7%)
|
|
Psychiatric Comorbidity:
|
|
|
<0.001
|
Without psychiatric comorbidity
|
4640 (27.5%)
|
3773 (25.4%)
|
|
Diagnosis unknown (under study)
|
3469 (20.5%)
|
3264 (22.0%)
|
|
With psychiatric comorbidity
|
8786 (52.0%)
|
7807 (52.6%)
|
|
Sexo Usuario/Sex of User:
|
|
|
0.038
|
Men
|
11203 (66.3%)
|
10007 (67.4%)
|
|
Women
|
5692 (33.7%)
|
4837 (32.6%)
|
|
Admission Age
|
32.9 [27.0;40.8]
|
33.1 [26.9;41.1]
|
0.620
|
Treatment Length (>90):
|
|
|
<0.001
|
FALSE
|
13799 (81.7%)
|
11849 (79.8%)
|
|
TRUE
|
3096 (18.3%)
|
2995 (20.2%)
|
|
Treatments by User (#):
|
|
|
0.009
|
1
|
8597 (50.9%)
|
7485 (50.4%)
|
|
2
|
4702 (27.8%)
|
4056 (27.3%)
|
|
3
|
2166 (12.8%)
|
1902 (12.8%)
|
|
4
|
911 (5.39%)
|
829 (5.58%)
|
|
5
|
314 (1.86%)
|
346 (2.33%)
|
|
6
|
151 (0.89%)
|
149 (1.00%)
|
|
7
|
38 (0.22%)
|
53 (0.36%)
|
|
8
|
16 (0.09%)
|
24 (0.16%)
|
|
More than one treatment:
|
|
|
0.419
|
0
|
8597 (50.9%)
|
7485 (50.4%)
|
|
1
|
8298 (49.1%)
|
7359 (49.6%)
|
|
First Substance Used:
|
|
|
<0.001
|
Alcohol
|
7356 (43.5%)
|
6117 (41.2%)
|
|
Cocaine hydrochloride
|
746 (4.42%)
|
632 (4.26%)
|
|
Cocaine paste
|
2457 (14.5%)
|
2319 (15.6%)
|
|
Marijuana
|
5924 (35.1%)
|
5407 (36.4%)
|
|
Other
|
412 (2.44%)
|
369 (2.49%)
|
|
Marital Status:
|
|
|
<0.001
|
Married/Shared living arrangements
|
4246 (25.1%)
|
3424 (23.1%)
|
|
Separated/Divorced
|
1864 (11.0%)
|
1550 (10.4%)
|
|
Single
|
10600 (62.7%)
|
9726 (65.5%)
|
|
Widower
|
185 (1.09%)
|
144 (0.97%)
|
|
Educational Attainment:
|
|
|
0.005
|
3-Completed primary school or less
|
5186 (30.7%)
|
4787 (32.2%)
|
|
2-Completed high school or less
|
8753 (51.8%)
|
7594 (51.2%)
|
|
1-More than high school
|
2956 (17.5%)
|
2463 (16.6%)
|
|
Primary Substance at Admission Usage Frequency:
|
|
|
<0.001
|
1 day a week or more
|
593 (3.51%)
|
323 (2.18%)
|
|
2 to 3 days a week
|
2504 (14.8%)
|
1578 (10.6%)
|
|
4 to 6 days a week
|
2444 (14.5%)
|
1947 (13.1%)
|
|
Daily
|
10639 (63.0%)
|
10736 (72.3%)
|
|
Did not use
|
335 (1.98%)
|
106 (0.71%)
|
|
Less than 1 day a week
|
380 (2.25%)
|
154 (1.04%)
|
|
Public Center:
|
|
|
<0.001
|
FALSE
|
9659 (57.2%)
|
10477 (70.6%)
|
|
TRUE
|
7236 (42.8%)
|
4367 (29.4%)
|
|
Minimum Achievement in the Therapeutic Process:
|
|
|
<0.001
|
Ongoing treatment
|
1135 (6.72%)
|
649 (4.37%)
|
|
Minimum achievement
|
8416 (49.8%)
|
6099 (41.1%)
|
|
High/Medium achievement
|
7344 (43.5%)
|
8096 (54.5%)
|
|
Drug Dependence diagnosis:
|
|
|
<0.001
|
FALSE
|
2061 (12.2%)
|
1296 (8.73%)
|
|
TRUE
|
14834 (87.8%)
|
13548 (91.3%)
|
|
Substance Use Onset Age
|
15.0 [14.0;17.0]
|
15.0 [13.0;17.0]
|
0.081
|
Occupational Status:
|
|
|
<0.001
|
Employed
|
3781 (22.4%)
|
2008 (13.5%)
|
|
Inactive
|
1872 (11.1%)
|
1530 (10.3%)
|
|
Looking for a job for the first time
|
33 (0.20%)
|
24 (0.16%)
|
|
No activity
|
1817 (10.8%)
|
2085 (14.0%)
|
|
Not seeking for work
|
343 (2.03%)
|
392 (2.64%)
|
|
Unemployed
|
9049 (53.6%)
|
8805 (59.3%)
|
|
Days of Treatment (missing dates of discharge were replaced with
difference from 2019-11-13)
|
151 [83.0;274]
|
151 [66.0;279]
|
<0.001
|
Users with Posterior Treatments (=1):
|
|
|
0.419
|
0
|
8597 (50.9%)
|
7485 (50.4%)
|
|
1
|
8298 (49.1%)
|
7359 (49.6%)
|
|
User’s Days available in the system for the study
|
416 [145;1185]
|
406 [153;1101]
|
0.026
|
User’s Years available in the system for the study
|
1.14 [0.40;3.24]
|
1.11 [0.42;3.01]
|
0.026
|
Days of difference between the Next Treatment
|
344 [137;776]
|
263 [72.0;695]
|
<0.001
|
Treatment Successful Completion:
|
|
|
<0.001
|
Ongoing treatment
|
1135 (6.72%)
|
649 (4.37%)
|
|
Completion
|
3036 (18.0%)
|
4289 (28.9%)
|
|
Non-completion
|
12724 (75.3%)
|
9906 (66.7%)
|
|
Cause of Discharge:
|
|
|
<0.001
|
Administrative discharge
|
1449 (8.58%)
|
1839 (12.4%)
|
|
Early Drop-out
|
3096 (18.3%)
|
2995 (20.2%)
|
|
Late Drop-out
|
6020 (35.6%)
|
2932 (19.8%)
|
|
Ongoing treatment
|
1135 (6.72%)
|
649 (4.37%)
|
|
Referral to another treatment
|
2159 (12.8%)
|
2140 (14.4%)
|
|
Therapeutic discharge
|
3036 (18.0%)
|
4289 (28.9%)
|
|
Note. Variables of C1 dataset had to be standardized before
comparison;
|
Continuous variables are presented as Medians and
Percentiles 25 and 75 were shown;
|
Categorical variables are presented as number (%)
|
After matching, we selected 31,739 treatments (users=22,452).
library(Epi)
#For censored state transitions it can be awkward having to replicate the censoring time for each non-visited state
#https://github.com/stulacy/multistateutils
states_trans<-c("Admission", "Readmission", "Readmission2", "Readmission3", "Readmission4")
trans_matrix <- matrix(c(
NA,1,NA,NA,NA,
NA,NA,2,NA,NA,
NA,NA,NA,3,NA,
NA,NA,NA,NA,4,
NA,NA,NA,NA,NA
), nrow=5, ncol=5,byrow=TRUE,dimnames=list(from=states_trans,to=states_trans))
Tot_reg<-
d_match_surv %>%
dplyr::select(id, duplicates_filtered, fech_ing_num,fech_egres_num,dias_treat_imp_sin_na,fech_ing_next_treat,tipo_de_plan_res,motivodeegreso_mod_imp,min_achievement,group_match,dup) %>%
## Filter cases with 4 or more registries
#sum(prop.table(table(d_match_surv$dup))[1:3])
#dplyr::filter(dup<4) %>%
nrow()
Less4_reg<-
d_match_surv %>%
dplyr::select(id, duplicates_filtered, fech_ing_num,fech_egres_num,dias_treat_imp_sin_na,fech_ing_next_treat,tipo_de_plan_res,motivodeegreso_mod_imp,min_achievement,group_match,dup) %>%
## Filter cases with 4 or more registries
#sum(prop.table(table(d_match_surv$dup))[1:3])
dplyr::filter(dup<5) %>%
nrow()
#All possible paths through the multi-state model can be found here:
Epi::boxes.Lexis(trans_matrix, wmult=1.3, hmult=1.3, cex=.8,
boxpos = list(y = rep(50,5),
x = (1:5)*(20)-10),
txt.arr=c(expression("1) " *lambda['12']),
expression("2) " *lambda['23']),
expression("3) " *lambda['34']),
expression("4) " *lambda['45'])
))
title(sub = paste0("No recurring states;\nAbsorbing state: Fourth Readmission (",scales::percent((Less4_reg/Tot_reg),accuracy = 0.1)," of the registries, considering that each registry\n had a time-to-readmission); Other causes of discharge were not events of interest")) ## internal titles

To the first and second states, we subtracted one day if it overlaps
with the date of discharge. For the third and the following states, we
added one day in case of overlapping dates due to continuous treatments.
We left 291 entries of 200 patients that had more than 4
readmissions.
library(Epi)
#For censored state transitions it can be awkward having to replicate the censoring time for each non-visited state
#https://github.com/stulacy/multistateutils
states_trans2<-c("Admission", "TD", "DWCA", "Readmission", "Readmissionb", "Readmission2", "Readmission2b", "Readmission3", "Readmission3b")
trans_matrix2 <- matrix(c(
NA,1,2,NA,NA,NA,NA,NA,NA,
NA,NA,NA,3,NA,NA,NA,NA,NA,
NA,NA,NA,NA,4,NA,NA,NA,NA,
NA,NA,NA,NA,NA,5,NA,NA,NA,
NA,NA,NA,NA,NA,NA,6,NA,NA,
NA,NA,NA,NA,NA,NA,NA,7,NA,
NA,NA,NA,NA,NA,NA,NA,NA,8,
NA,NA,NA,NA,NA,NA,NA,NA,NA,
NA,NA,NA,NA,NA,NA,NA,NA,NA
), nrow=9, ncol=9,byrow=TRUE,dimnames=list(from=states_trans2,to=states_trans2))
Less3_reg<-
d_match_surv %>%
dplyr::select(id, duplicates_filtered, fech_ing_num,fech_egres_num,dias_treat_imp_sin_na,fech_ing_next_treat,tipo_de_plan_res,motivodeegreso_mod_imp,min_achievement,group_match,dup) %>%
## Filter cases with 3 or more registries
#sum(prop.table(table(d_match_surv$dup))[1:3])
dplyr::filter(dup<4) %>%
nrow()
#All possible paths through the multi-state model can be found here:
Epi::boxes.Lexis(trans_matrix2, wmult=1, hmult=1.5, cex=.8,
boxpos = list(y = c(50,rep(c(75,25),4))+5,
x = c(7, 25, 25, 47, 47, 70, 70, 92, 92)),
txt.arr=c(expression("1) " *lambda['12']),
expression("2) " *lambda['13']),
expression("3) " *lambda['24']),
expression("4) " *lambda['35']),
expression("5) " *lambda['46']),
expression("6) " *lambda['57']),
expression("7) " *lambda['68']),
expression("8) " *lambda['79']),""
))
title(sub = paste0("a= Treatment completion; b=Treatment non-completion;\n No recurring states;\nAbsorbing state: Third Readmission (",scales::percent((Less3_reg/Tot_reg),accuracy = 0.1)," of the registries, considering that each registry\n had a time-to-readmission); Other causes of discharge were not events of interest")) ## internal titles

For the nine states model, o the first and second states, we
subtracted one day if it overlaps with the date of discharge. For the
third and the following states, we added one day in case of overlapping
dates due to continuous treatments. We left 926 entries of 635 patients
that had more than 3 readmissions.
### diff_bet_treat is the variable that includes time-to-readmission
### AGS: Starts in 0, excepting left truncated cases
### variables should start with time_ & status_
### Transform to years once generated
### Looks that they all share the same objective time
### AGS: If there is a continuous state, interval censoring is not necessary
### 0's are censored status
library(mstate)
d_match_surv_msprep<-
d_match_surv %>%
dplyr::select(id, duplicates_filtered, fech_ing_num,fech_egres_num,dias_treat_imp_sin_na,fech_ing_next_treat,tipo_de_plan_res,motivodeegreso_mod_imp,min_achievement,group_match,dup, dias_treat_imp_sin_na) %>%
## Filter the fifth readmission of registries
dplyr::filter(dup<=5) %>%
dplyr::mutate(tipo_de_plan_res=if_else(tipo_de_plan_res=="1",1,0,0)) %>%
dplyr::mutate(TD=if_else(motivodeegreso_mod_imp=="Therapeutic discharge",1,0,0)) %>%
dplyr::mutate(DWCA=if_else(motivodeegreso_mod_imp %in% c("Early Drop-out","Late Drop-out","Administrative discharge"),1,0,0)) %>%
#dplyr::mutate(tipo_de_plan_res_baseline=tipo_de_plan_res) %>%
tidyr::pivot_wider(id_cols=c("id","group_match","duplicates_filtered"), names_from=dup, names_sep="_", values_from=c("fech_ing_num","tipo_de_plan_res","TD","DWCA", "dias_treat_imp_sin_na")) %>% #"","motivodeegreso_mod_imp","min_achievement"
#,"tipo_de_plan_res_baseline"
dplyr::arrange(id) %>%
dplyr::select(id, group_match,everything()) %>%
#display error if there are more than row per user
purrr::when(dplyr::group_by(.,id) %>% dplyr::count() %>% filter(n>1) %>% nrow()>0 ~ stop("more than one case by row"),
~.) %>%
#22,916 x 20
#Check overlapped dates
purrr::when(dplyr::mutate(.,diff_bet_treat1= fech_ing_num_2-fech_ing_num_1)%>% dplyr::filter(diff_bet_treat1<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"),
~.) %>%
purrr::when(dplyr::mutate(.,diff_bet_treat2= fech_ing_num_3-fech_ing_num_2)%>% dplyr::filter(diff_bet_treat2<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"),
~.) %>%
purrr::when(dplyr::mutate(.,diff_bet_treat3= fech_ing_num_4-fech_ing_num_3)%>% dplyr::filter(diff_bet_treat3<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"),
~.) %>%
purrr::when(dplyr::mutate(.,diff_bet_treat4= fech_ing_num_5-fech_ing_num_4)%>% dplyr::filter(diff_bet_treat4<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"),
~.) %>%
dplyr::mutate(Readmission_status=if_else(!is.na(fech_ing_num_2),1,0,0),
Readmission2_status=if_else(!is.na(fech_ing_num_3),1,0,0),
Readmission3_status=if_else(!is.na(fech_ing_num_4),1,0,0),
Readmission4_status=if_else(!is.na(fech_ing_num_5),1,0,0)) %>%
#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:
## 2021-03-24, I had to reespecify times to objective times, in order to avoid further problems
## 2021-05-06, CENSORED TIME IS NOT THE DIFFERENCE BETWEEN THE TIME OF CENSORSIP AND THE TIME OF THE LAST EVENT, IS THE TOTAL DIFFERENCE. THE SUM OF DAYS UNTIL THE FOLLOWUP TIME
dplyr::mutate(
Readmission_time= dplyr::case_when(
Readmission_status==1~as.numeric(fech_ing_num_2-fech_ing_num_1),
Readmission_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
dplyr::mutate(
Readmission2_time= dplyr::case_when(
Readmission2_status==1~as.numeric(fech_ing_num_3-fech_ing_num_1),
Readmission2_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
dplyr::mutate(
Readmission3_time= dplyr::case_when(
Readmission3_status==1~as.numeric(fech_ing_num_4-fech_ing_num_1),
Readmission3_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
dplyr::mutate(
Readmission4_time= dplyr::case_when(
Readmission4_status==1~as.numeric(fech_ing_num_5-fech_ing_num_1),
Readmission4_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
## THE USERS THAT DID NOT REGISTERED AN EVENT WILL COME UP TO THE FINAL TIME OF THE FOLLOW UP
dplyr::select(
id, group_match,
tipo_de_plan_res_1,tipo_de_plan_res_2, tipo_de_plan_res_3, tipo_de_plan_res_4,
Readmission_time, Readmission_status, Readmission2_time, Readmission2_status,
Readmission3_time, Readmission3_status, Readmission4_time, Readmission4_status,
dias_treat_imp_sin_na_1, dias_treat_imp_sin_na_2, dias_treat_imp_sin_na_3, dias_treat_imp_sin_na_4,
TD_1, TD_2, TD_3, TD_4, DWCA_1, DWCA_2, DWCA_3, DWCA_4, duplicates_filtered, fech_ing_num_1) %>%
as.data.frame()
#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:
### ANALYSES W COMPLETE DATA
hashs_w_complete_data<-
na.omit(cbind.data.frame(CONS_C1_df_dup_SEP_2020_match$hash_key,
CONS_C1_df_dup_SEP_2020_match$origen_ingreso_mod,
CONS_C1_df_dup_SEP_2020_match$dg_cie_10_rec,
CONS_C1_df_dup_SEP_2020_match$sexo_2,
CONS_C1_df_dup_SEP_2020_match$sus_ini_mod_mvv,
CONS_C1_df_dup_SEP_2020_match$tipo_centro_pub, #cuidado
CONS_C1_df_dup_SEP_2020_match$estado_conyugal_2,
CONS_C1_df_dup_SEP_2020_match$escolaridad_rec,
CONS_C1_df_dup_SEP_2020_match$freq_cons_sus_prin,
CONS_C1_df_dup_SEP_2020_match$nombre_region,
CONS_C1_df_dup_SEP_2020_match$condicion_ocupacional_corr,
CONS_C1_df_dup_SEP_2020_match$sus_principal_mod,
CONS_C1_df_dup_SEP_2020_match$dg_trs_cons_sus_or,
CONS_C1_df_dup_SEP_2020_match$tenencia_de_la_vivienda_mod,
CONS_C1_df_dup_SEP_2020_match$edad_al_ing,
CONS_C1_df_dup_SEP_2020_match$fech_ing_num,
CONS_C1_df_dup_SEP_2020_match$edad_ini_cons
)) %>% dplyr::distinct(`CONS_C1_df_dup_SEP_2020_match$hash_key`)
d_match_surv_msprep_cc<-
d_match_surv %>%
dplyr::filter(hash_key %in% unlist(hashs_w_complete_data)) %>%
dplyr::group_by(group_match) %>%
dplyr::mutate(pairs=n_distinct(hash_key)) %>%
dplyr::ungroup() %>%
dplyr::filter(pairs==2) %>% #janitor::tabyl(pairs)
dplyr::select(id, duplicates_filtered, fech_ing_num,fech_egres_num,dias_treat_imp_sin_na,fech_ing_next_treat,tipo_de_plan_res,motivodeegreso_mod_imp,min_achievement,group_match,dup, dias_treat_imp_sin_na) %>%
## Filter the fifth readmission of registries
dplyr::filter(dup<=5) %>%
dplyr::mutate(tipo_de_plan_res=if_else(tipo_de_plan_res=="1",1,0,0)) %>%
dplyr::mutate(TD=if_else(motivodeegreso_mod_imp=="Therapeutic discharge",1,0,0)) %>%
dplyr::mutate(DWCA=if_else(motivodeegreso_mod_imp %in% c("Early Drop-out","Late Drop-out","Administrative discharge"),1,0,0)) %>%
#dplyr::mutate(tipo_de_plan_res_baseline=tipo_de_plan_res) %>%
tidyr::pivot_wider(id_cols=c("id","group_match","duplicates_filtered"), names_from=dup, names_sep="_", values_from=c("fech_ing_num","tipo_de_plan_res","TD","DWCA", "dias_treat_imp_sin_na")) %>% #"","motivodeegreso_mod_imp","min_achievement"
#,"tipo_de_plan_res_baseline"
dplyr::arrange(id) %>%
dplyr::select(id, group_match,everything()) %>%
#display error if there are more than row per user
purrr::when(dplyr::group_by(.,id) %>% dplyr::count() %>% filter(n>1) %>% nrow()>0 ~ stop("more than one case by row"),
~.) %>%
#22,916 x 20
#Check overlapped dates
purrr::when(dplyr::mutate(.,diff_bet_treat1= fech_ing_num_2-fech_ing_num_1)%>% dplyr::filter(diff_bet_treat1<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"),
~.) %>%
purrr::when(dplyr::mutate(.,diff_bet_treat2= fech_ing_num_3-fech_ing_num_2)%>% dplyr::filter(diff_bet_treat2<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"),
~.) %>%
purrr::when(dplyr::mutate(.,diff_bet_treat3= fech_ing_num_4-fech_ing_num_3)%>% dplyr::filter(diff_bet_treat3<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"),
~.) %>%
purrr::when(dplyr::mutate(.,diff_bet_treat4= fech_ing_num_5-fech_ing_num_4)%>% dplyr::filter(diff_bet_treat4<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"),
~.) %>%
dplyr::mutate(Readmission_status=if_else(!is.na(fech_ing_num_2),1,0,0),
Readmission2_status=if_else(!is.na(fech_ing_num_3),1,0,0),
Readmission3_status=if_else(!is.na(fech_ing_num_4),1,0,0),
Readmission4_status=if_else(!is.na(fech_ing_num_5),1,0,0)) %>%
#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:
## 2021-03-24, I had to reespecify times to objective times, in order to avoid further problems
## 2021-05-06, CENSORED TIME IS NOT THE DIFFERENCE BETWEEN THE TIME OF CENSORSIP AND THE TIME OF THE LAST EVENT, IS THE TOTAL DIFFERENCE. THE SUM OF DAYS UNTIL THE FOLLOWUP TIME
dplyr::mutate(
Readmission_time= dplyr::case_when(
Readmission_status==1~as.numeric(fech_ing_num_2-fech_ing_num_1),
Readmission_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
dplyr::mutate(
Readmission2_time= dplyr::case_when(
Readmission2_status==1~as.numeric(fech_ing_num_3-fech_ing_num_1),
Readmission2_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
dplyr::mutate(
Readmission3_time= dplyr::case_when(
Readmission3_status==1~as.numeric(fech_ing_num_4-fech_ing_num_1),
Readmission3_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
dplyr::mutate(
Readmission4_time= dplyr::case_when(
Readmission4_status==1~as.numeric(fech_ing_num_5-fech_ing_num_1),
Readmission4_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
## THE USERS THAT DID NOT REGISTERED AN EVENT WILL COME UP TO THE FINAL TIME OF THE FOLLOW UP
dplyr::select(
id, group_match,
tipo_de_plan_res_1,tipo_de_plan_res_2, tipo_de_plan_res_3, tipo_de_plan_res_4,
Readmission_time, Readmission_status, Readmission2_time, Readmission2_status,
Readmission3_time, Readmission3_status, Readmission4_time, Readmission4_status,
dias_treat_imp_sin_na_1, dias_treat_imp_sin_na_2, dias_treat_imp_sin_na_3, dias_treat_imp_sin_na_4,
TD_1, TD_2, TD_3, TD_4, DWCA_1, DWCA_2, DWCA_3, DWCA_4, duplicates_filtered,fech_ing_num_1) %>%
as.data.frame()
#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:
d_match_surv_msprep %>%
dplyr::filter(duplicates_filtered==max(duplicates_filtered)) %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 8. Data in Wide, Five-states (example of patients with >= 5 admissions)",
align= c("c",rep('c', 5)))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 13)%>%
kableExtra::add_footnote("Note= Proportions from the initial state") %>%
kableExtra::kable_classic() %>%
kableExtra::scroll_box(width = "100%", height = "350px")
Table 8. Data in Wide, Five-states (example of patients with >= 5
admissions)
id
|
group_match
|
tipo_de_plan_res_1
|
tipo_de_plan_res_2
|
tipo_de_plan_res_3
|
tipo_de_plan_res_4
|
Readmission_time
|
Readmission_status
|
Readmission2_time
|
Readmission2_status
|
Readmission3_time
|
Readmission3_status
|
Readmission4_time
|
Readmission4_status
|
dias_treat_imp_sin_na_1
|
dias_treat_imp_sin_na_2
|
dias_treat_imp_sin_na_3
|
dias_treat_imp_sin_na_4
|
TD_1
|
TD_2
|
TD_3
|
TD_4
|
DWCA_1
|
DWCA_2
|
DWCA_3
|
DWCA_4
|
duplicates_filtered
|
fech_ing_num_1
|
1,059
|
882
|
1
|
1
|
1
|
0
|
520
|
1
|
881
|
1
|
1,256
|
1
|
1,469
|
1
|
252
|
54
|
178
|
59
|
0
|
0
|
0
|
0
|
1
|
1
|
1
|
1
|
8
|
14,460
|
1,146
|
955
|
1
|
1
|
1
|
1
|
502
|
1
|
1,058
|
1
|
2,021
|
1
|
2,157
|
1
|
18
|
123
|
34
|
11
|
0
|
0
|
0
|
0
|
1
|
1
|
1
|
1
|
8
|
14,707
|
2,554
|
2,059
|
1
|
0
|
1
|
1
|
442
|
1
|
869
|
1
|
1,293
|
1
|
1,523
|
1
|
335
|
290
|
221
|
2
|
0
|
0
|
0
|
0
|
1
|
1
|
1
|
1
|
8
|
15,414
|
7,838
|
6,392
|
1
|
0
|
1
|
1
|
242
|
1
|
501
|
1
|
875
|
1
|
1,418
|
1
|
101
|
121
|
86
|
6
|
0
|
0
|
0
|
0
|
1
|
1
|
1
|
1
|
8
|
15,225
|
10,771
|
8,773
|
1
|
1
|
1
|
1
|
184
|
1
|
420
|
1
|
687
|
1
|
728
|
1
|
70
|
211
|
187
|
38
|
0
|
0
|
0
|
0
|
1
|
1
|
1
|
1
|
8
|
15,523
|
a Note= Proportions from the initial state
|
invisible("No se si debiera transformarlo a años. Tal vez a meses. Si lo transformo, me darán esas extrapolaciones bizarras del artículo anterior")
### diff_bet_treat is the variable that includes time-to-readmission
### AGS: Starts in 0, excepting left truncated cases
### variables should start with time_ & status_
### Transform to years once generated
### Looks that they all share the same objective time
### AGS: If there is a continous state, interval censoring is not necessary
### 0's are censored status
library(mstate)
d_match_surv_msprep_oct_2022<-
d_match_surv %>%
dplyr::select(id, duplicates_filtered, fech_ing_num,fech_egres_num,dias_treat_imp_sin_na,fech_ing_next_treat,tipo_de_plan_res,motivodeegreso_mod_imp,min_achievement,group_match,dup, dias_treat_imp_sin_na) %>%
## Filter the foruth readmission of registries
dplyr::filter(dup<=4) %>%
dplyr::mutate(tipo_de_plan_res=if_else(tipo_de_plan_res=="1",1,0,0)) %>%
dplyr::mutate(TD=if_else(motivodeegreso_mod_imp=="Therapeutic discharge",1,0,0)) %>%
dplyr::mutate(DWCA=if_else(motivodeegreso_mod_imp %in% c("Early Drop-out","Late Drop-out","Administrative discharge"),1,0,0)) %>%
#dplyr::mutate(tipo_de_plan_res_baseline=tipo_de_plan_res) %>%
tidyr::pivot_wider(id_cols=c("id","group_match","duplicates_filtered"), names_from=dup, names_sep="_", values_from=c("fech_ing_num","tipo_de_plan_res","TD","DWCA", "dias_treat_imp_sin_na")) %>% #"","motivodeegreso_mod_imp","min_achievement"
#,"tipo_de_plan_res_baseline"
dplyr::arrange(id) %>%
dplyr::select(id, group_match,everything()) %>%
#display error if there are more than row per user
purrr::when(dplyr::group_by(.,id) %>% dplyr::count() %>% filter(n>1) %>% nrow()>0 ~ stop("more than one case by row"),
~.) %>%
#22,916 x 20 # 2022-oct= 22,452
#OCT_2022 discount a day for treatments to avoid overlapping
dplyr::mutate(dias_treat_imp_sin_na_1= dplyr::case_when(dias_treat_imp_sin_na_1>=2~ dias_treat_imp_sin_na_1-1,T~dias_treat_imp_sin_na_1)) %>%
dplyr::mutate(fech_ing_num_2= dplyr::case_when(dias_treat_imp_sin_na_1==1~ fech_ing_num_2+1,T~fech_ing_num_2)) %>%
#Check overlapped dates
purrr::when(dplyr::mutate(.,diff_bet_treat0= fech_ing_num_2-(dias_treat_imp_sin_na_1 +fech_ing_num_1))%>% dplyr::filter(diff_bet_treat0<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"),
~.) %>%
purrr::when(dplyr::mutate(.,diff_bet_treat1= fech_ing_num_2-fech_ing_num_1)%>% dplyr::filter(diff_bet_treat1<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"),
~.) %>%
purrr::when(dplyr::mutate(.,diff_bet_treat2= fech_ing_num_3-fech_ing_num_2)%>% dplyr::filter(diff_bet_treat2<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"),
~.) %>%
purrr::when(dplyr::mutate(.,diff_bet_treat3= fech_ing_num_4-fech_ing_num_3)%>% dplyr::filter(diff_bet_treat3<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"),
~.) %>%
#OCT 2022
dplyr::mutate(Readmission_status=if_else(!is.na(fech_ing_num_2)&TD_1==1,1,0,0),
Readmission2_status=if_else(!is.na(fech_ing_num_3)&TD_1==1,1,0,0),
Readmission3_status=if_else(!is.na(fech_ing_num_4)&TD_1==1,1,0,0))%>%
dplyr::mutate(Readmissionb_status=if_else(!is.na(fech_ing_num_2)&DWCA_1==1,1,0,0),
Readmission2b_status=if_else(!is.na(fech_ing_num_3)&DWCA_1==1,1,0,0),
Readmission3b_status=if_else(!is.na(fech_ing_num_4)&DWCA_1==1,1,0,0))%>%
#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:
## 2021-03-24, I had to reespecify times to objective times, in order to avoid further problems
## 2021-05-06, CENSORED TIME IS NOT THE DIFFERENCE BETWEEN THE TIME OF CENSORSIP AND THE TIME OF THE LAST EVENT, IS THE TOTAL DIFFERENCE. THE SUM OF DAYS UNTIL THE FOLLOWUP TIME
#OCT -2022, added a variable.
dplyr::mutate(
TD_time= dplyr::case_when(
TD_1==1~as.numeric(dias_treat_imp_sin_na_1),
TD_1==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
dplyr::mutate(
DWCA_time= dplyr::case_when(
DWCA_1==1~as.numeric(dias_treat_imp_sin_na_1),
DWCA_1==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
#OCT -2022, modified readmission time
#First step: Readmission time and Readmissionb time
dplyr::mutate(
Readmission_time= dplyr::case_when(
Readmission_status==1 & TD_1==1~ as.numeric(fech_ing_num_2-fech_ing_num_1),
Readmission_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
dplyr::mutate(
Readmissionb_time= dplyr::case_when(
Readmissionb_status==1 & DWCA_1==1~ as.numeric(fech_ing_num_2-fech_ing_num_1),
Readmissionb_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
#Second step: Readmission2 time and Readmission2b time
dplyr::mutate(
Readmission2_time= dplyr::case_when(
Readmission2_status==1 & Readmission_status==1 & TD_1==1~ as.numeric(fech_ing_num_3-fech_ing_num_1),
Readmission2_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
dplyr::mutate(
Readmission2b_time= dplyr::case_when(
Readmission2b_status==1 & Readmissionb_status==1 & DWCA_1==1~ as.numeric(fech_ing_num_3-fech_ing_num_1),
Readmission2b_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
#Third step: Readmission3 time and Readmission3b time
dplyr::mutate(
Readmission3_time= dplyr::case_when(
Readmission3_status==1 & Readmission2_status==1 & Readmission_status==1 & TD_1==1~as.numeric(fech_ing_num_4-fech_ing_num_1),
Readmission3_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
dplyr::mutate(
Readmission3b_time= dplyr::case_when(
Readmission3b_status==1 & Readmission2b_status==1 & Readmissionb_status==1 & DWCA_1==1~as.numeric(fech_ing_num_4-fech_ing_num_1),
Readmission3b_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
#OCT 2022
dplyr::rename("TD_status"="TD_1","DWCA_status"="DWCA_1") %>%
## THE USERS THAT DID NOT REGISTERED AN EVENT WILL COME UP TO THE FINAL TIME OF THE FOLLOW UP
dplyr::mutate(TD_time= dplyr::case_when(TD_time==0 &TD_status==1~1,T~TD_time)) %>%
dplyr::mutate(DWCA_time= dplyr::case_when(DWCA_time==0 &DWCA_status==1~1,T~DWCA_time)) %>%
dplyr::select(
id, group_match,
tipo_de_plan_res_1, TD_time, TD_status, DWCA_time, DWCA_status,
Readmission_time, Readmission_status, Readmissionb_time, Readmissionb_status, Readmission2_time, Readmission2_status, Readmission2b_time, Readmission2b_status,
Readmission3_time, Readmission3_status, Readmission3b_time, Readmission3b_status,
dias_treat_imp_sin_na_1, duplicates_filtered, fech_ing_num_1) %>%
dplyr::mutate(cens_time=as.numeric(as.Date("2019-11-13"))-fech_ing_num_1) %>%
as.data.frame()
#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:
### ANALYSES W COMPLETE DATA
d_match_surv_msprep_cc_oct_2022<-
d_match_surv %>%
dplyr::filter(hash_key %in% unlist(hashs_w_complete_data)) %>%
dplyr::group_by(group_match) %>%
dplyr::mutate(pairs=n_distinct(hash_key)) %>%
dplyr::ungroup() %>%
dplyr::filter(pairs==2) %>% #janitor::tabyl(pairs)
dplyr::select(id, duplicates_filtered, fech_ing_num,fech_egres_num,dias_treat_imp_sin_na,fech_ing_next_treat,tipo_de_plan_res,motivodeegreso_mod_imp,min_achievement,group_match,dup, dias_treat_imp_sin_na) %>%
## Filter the fourth readmission of registries
dplyr::filter(dup<=4) %>%
dplyr::mutate(tipo_de_plan_res=if_else(tipo_de_plan_res=="1",1,0,0)) %>%
dplyr::mutate(TD=if_else(motivodeegreso_mod_imp=="Therapeutic discharge",1,0,0)) %>%
dplyr::mutate(DWCA=if_else(motivodeegreso_mod_imp %in% c("Early Drop-out","Late Drop-out","Administrative discharge"),1,0,0)) %>%
#dplyr::mutate(tipo_de_plan_res_baseline=tipo_de_plan_res) %>%
tidyr::pivot_wider(id_cols=c("id","group_match","duplicates_filtered"), names_from=dup, names_sep="_", values_from=c("fech_ing_num","tipo_de_plan_res","TD","DWCA", "dias_treat_imp_sin_na")) %>% #"","motivodeegreso_mod_imp","min_achievement"
#,"tipo_de_plan_res_baseline"
dplyr::arrange(id) %>%
dplyr::select(id, group_match,everything()) %>%
#display error if there are more than row per user
purrr::when(dplyr::group_by(.,id) %>% dplyr::count() %>% filter(n>1) %>% nrow()>0 ~ stop("more than one case by row"),
~.) %>%
#22,916 x 20 # 2022-oct= 22,452
#OCT_2022 discount a day for treatments to avoid overlapping
dplyr::mutate(dias_treat_imp_sin_na_1= dplyr::case_when(dias_treat_imp_sin_na_1>=2~ dias_treat_imp_sin_na_1-1,T~dias_treat_imp_sin_na_1)) %>%
dplyr::mutate(fech_ing_num_2= dplyr::case_when(dias_treat_imp_sin_na_1==1~ fech_ing_num_2+1,T~fech_ing_num_2)) %>%
#Check overlapped dates
purrr::when(dplyr::mutate(.,diff_bet_treat0= fech_ing_num_2-(dias_treat_imp_sin_na_1 +fech_ing_num_1))%>% dplyr::filter(diff_bet_treat0<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"),
~.) %>%
purrr::when(dplyr::mutate(.,diff_bet_treat1= fech_ing_num_2-fech_ing_num_1)%>% dplyr::filter(diff_bet_treat1<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"),
~.) %>%
purrr::when(dplyr::mutate(.,diff_bet_treat2= fech_ing_num_3-fech_ing_num_2)%>% dplyr::filter(diff_bet_treat2<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"),
~.) %>%
purrr::when(dplyr::mutate(.,diff_bet_treat3= fech_ing_num_4-fech_ing_num_3)%>% dplyr::filter(diff_bet_treat3<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"),
~.) %>%
#OCT 2022
dplyr::mutate(Readmission_status=if_else(!is.na(fech_ing_num_2)&TD_1==1,1,0,0),
Readmission2_status=if_else(!is.na(fech_ing_num_3)&TD_1==1,1,0,0),
Readmission3_status=if_else(!is.na(fech_ing_num_4)&TD_1==1,1,0,0))%>%
dplyr::mutate(Readmissionb_status=if_else(!is.na(fech_ing_num_2)&DWCA_1==1,1,0,0),
Readmission2b_status=if_else(!is.na(fech_ing_num_3)&DWCA_1==1,1,0,0),
Readmission3b_status=if_else(!is.na(fech_ing_num_4)&DWCA_1==1,1,0,0))%>%
#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:
## 2021-03-24, I had to reespecify times to objective times, in order to avoid further problems
## 2021-05-06, CENSORED TIME IS NOT THE DIFFERENCE BETWEEN THE TIME OF CENSORSIP AND THE TIME OF THE LAST EVENT, IS THE TOTAL DIFFERENCE. THE SUM OF DAYS UNTIL THE FOLLOWUP TIME
#OCT -2022, added a variable.
#OCT -2022, added a variable.
dplyr::mutate(
TD_time= dplyr::case_when(
TD_1==1~as.numeric(dias_treat_imp_sin_na_1),
TD_1==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
dplyr::mutate(
DWCA_time= dplyr::case_when(
DWCA_1==1~as.numeric(dias_treat_imp_sin_na_1),
DWCA_1==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
#OCT -2022, modified readmission time
#First step: Readmission time and Readmissionb time
dplyr::mutate(
Readmission_time= dplyr::case_when(
Readmission_status==1 & TD_1==1~ as.numeric(fech_ing_num_2-fech_ing_num_1),
Readmission_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
dplyr::mutate(
Readmissionb_time= dplyr::case_when(
Readmissionb_status==1 & DWCA_1==1~ as.numeric(fech_ing_num_2-fech_ing_num_1),
Readmissionb_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
#Second step: Readmission2 time and Readmission2b time
dplyr::mutate(
Readmission2_time= dplyr::case_when(
Readmission2_status==1 & Readmission_status==1 & TD_1==1~ as.numeric(fech_ing_num_3-fech_ing_num_1),
Readmission2_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
dplyr::mutate(
Readmission2b_time= dplyr::case_when(
Readmission2b_status==1 & Readmissionb_status==1 & DWCA_1==1~ as.numeric(fech_ing_num_3-fech_ing_num_1),
Readmission2b_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
#Third step: Readmission3 time and Readmission3b time
dplyr::mutate(
Readmission3_time= dplyr::case_when(
Readmission3_status==1 & Readmission2_status==1 & Readmission_status==1 & TD_1==1~as.numeric(fech_ing_num_4-fech_ing_num_1),
Readmission3_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
dplyr::mutate(
Readmission3b_time= dplyr::case_when(
Readmission3b_status==1 & Readmission2b_status==1 & Readmissionb_status==1 & DWCA_1==1~as.numeric(fech_ing_num_4-fech_ing_num_1),
Readmission3b_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
#OCT 2022
dplyr::rename("TD_status"="TD_1","DWCA_status"="DWCA_1") %>%
## THE USERS THAT DID NOT REGISTERED AN EVENT WILL COME UP TO THE FINAL TIME OF THE FOLLOW UP
dplyr::mutate(TD_time= dplyr::case_when(TD_time==0 &TD_status==1~1,T~TD_time)) %>%
dplyr::mutate(DWCA_time= dplyr::case_when(DWCA_time==0 &DWCA_status==1~1,T~DWCA_time)) %>%
dplyr::select(
id, group_match,
tipo_de_plan_res_1, TD_time, TD_status, DWCA_time, DWCA_status,
Readmission_time, Readmission_status, Readmissionb_time, Readmissionb_status, Readmission2_time, Readmission2_status, Readmission2b_time, Readmission2b_status,
Readmission3_time, Readmission3_status, Readmission3b_time, Readmission3b_status,
dias_treat_imp_sin_na_1, duplicates_filtered, fech_ing_num_1) %>%
dplyr::mutate(cens_time=as.numeric(as.Date("2019-11-13"))-fech_ing_num_1) %>%
as.data.frame()
#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:
d_match_surv_msprep_oct_2022 %>%
dplyr::filter(duplicates_filtered==max(duplicates_filtered)) %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 8b. Data in Wide, Five-states (example of patients with >= 4 admissions)",
align= c("c",rep('c', 5)))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 13)%>%
kableExtra::add_footnote("Note= Proportions from the initial state") %>%
kableExtra::kable_classic() %>%
kableExtra::scroll_box(width = "100%", height = "350px")
Table 8b. Data in Wide, Five-states (example of patients with >= 4
admissions)
id
|
group_match
|
tipo_de_plan_res_1
|
TD_time
|
TD_status
|
DWCA_time
|
DWCA_status
|
Readmission_time
|
Readmission_status
|
Readmissionb_time
|
Readmissionb_status
|
Readmission2_time
|
Readmission2_status
|
Readmission2b_time
|
Readmission2b_status
|
Readmission3_time
|
Readmission3_status
|
Readmission3b_time
|
Readmission3b_status
|
dias_treat_imp_sin_na_1
|
duplicates_filtered
|
fech_ing_num_1
|
cens_time
|
1,059
|
882
|
1
|
3,753
|
0
|
251
|
1
|
3,753
|
0
|
520
|
1
|
3,753
|
0
|
881
|
1
|
3,753
|
0
|
1,256
|
1
|
251
|
8
|
14,460
|
3,753
|
1,146
|
955
|
1
|
3,506
|
0
|
17
|
1
|
3,506
|
0
|
502
|
1
|
3,506
|
0
|
1,058
|
1
|
3,506
|
0
|
2,021
|
1
|
17
|
8
|
14,707
|
3,506
|
2,554
|
2,059
|
1
|
2,799
|
0
|
334
|
1
|
2,799
|
0
|
442
|
1
|
2,799
|
0
|
869
|
1
|
2,799
|
0
|
1,293
|
1
|
334
|
8
|
15,414
|
2,799
|
7,838
|
6,392
|
1
|
2,988
|
0
|
100
|
1
|
2,988
|
0
|
242
|
1
|
2,988
|
0
|
501
|
1
|
2,988
|
0
|
875
|
1
|
100
|
8
|
15,225
|
2,988
|
10,771
|
8,773
|
1
|
2,690
|
0
|
69
|
1
|
2,690
|
0
|
184
|
1
|
2,690
|
0
|
420
|
1
|
2,690
|
0
|
687
|
1
|
69
|
8
|
15,523
|
2,690
|
a Note= Proportions from the initial state
|
invisible("No se si debiera transformarlo a años. Tal vez a meses. Si lo transformo, me darán esas extrapolaciones bizarras del artículo anterior")
ms_d_match_surv <- mstate::msprep(time = c(NA, "Readmission_time", "Readmission2_time", "Readmission3_time", "Readmission4_time"),
status = c(NA, "Readmission_status", "Readmission2_status", "Readmission3_status", "Readmission4_status"),
data = d_match_surv_msprep,
id = "id",
trans = trans_matrix,
keep = c("fech_ing_num_1", paste0("tipo_de_plan_res_",1:4), paste0("TD_",1:4),paste0("DWCA_",1:4),paste0("dias_treat_imp_sin_na_",1:4)))
ms_d_match_surv_cc <- mstate::msprep(time = c(NA, "Readmission_time", "Readmission2_time", "Readmission3_time", "Readmission4_time"),
status = c(NA, "Readmission_status", "Readmission2_status", "Readmission3_status", "Readmission4_status"),
data = d_match_surv_msprep_cc,
id = "id",
trans = trans_matrix,
keep = c("fech_ing_num_1",paste0("tipo_de_plan_res_",1:4), paste0("TD_",1:4),paste0("DWCA_",1:4),paste0("dias_treat_imp_sin_na_",1:4)))
#From starting state 1, subject 66 74 19717 has smallest transition time with status=0
#Everyne has an infinite number in the transition. A good exmple is the user 19717. Only experienced a therapeutic discharge, but in the time from readmission it starts on 910 but ends in INf
#_#_#_#_#_#_#_#_#_
#2022 oct
ms_d_match_surv_oct_2022 <- mstate::msprep(time = c(NA, "TD_time", "DWCA_time", "Readmission_time", "Readmissionb_time", "Readmission2_time", "Readmission2b_time", "Readmission3_time", "Readmission3b_time"),
status = c(NA, "TD_status", "DWCA_status", "Readmission_status", "Readmissionb_status", "Readmission2_status", "Readmission2b_status", "Readmission3_status", "Readmission3b_status"),
data = d_match_surv_msprep_oct_2022,
id = "id",
trans = trans_matrix2,
keep = c("fech_ing_num_1", paste0("tipo_de_plan_res_",1)))
invisible("Para ver si hay casos en que la fecha de término del evento, y el inicio coinciden o es menor")
if(d_match_surv_msprep_oct_2022 %>%
dplyr::filter(id %in% unlist(
ms_d_match_surv_oct_2022%>%
dplyr::filter(Tstop<=Tstart) %>%
dplyr::select(id,from,to,trans,Tstart,Tstop,time,status) %>%
distinct(id))) %>%
#dplyr::mutate(diff_bet_treat=fech_ing_next_treat-fech_egres_num)%>%
nrow()>0){stop("There are cases where Tstop-Tstart <=0")}
if(ms_d_match_surv %>%
dplyr::filter(is.infinite(time)) %>% nrow()>0){stop("Error: there are infinite cases")}
if(ms_d_match_surv_oct_2022 %>%
dplyr::filter(is.infinite(time)) %>% nrow()>0){stop("Error: there are infinite cases")}
ms_d_match_surv_cc_oct_2022 <- mstate::msprep(time = c(NA, "TD_time", "DWCA_time", "Readmission_time", "Readmissionb_time", "Readmission2_time", "Readmission2b_time", "Readmission3_time", "Readmission3b_time"),
status = c(NA, "TD_status", "DWCA_status", "Readmission_status", "Readmissionb_status", "Readmission2_status", "Readmission2b_status", "Readmission3_status", "Readmission3b_status"),
data = d_match_surv_msprep_cc_oct_2022,
id = "id",
trans = trans_matrix2,
keep = c("fech_ing_num_1",paste0("tipo_de_plan_res_",1)))
invisible("Para ver si hay casos en que la fecha de término del evento, y el inicio coinciden o es menor")
if(d_match_surv_msprep_cc_oct_2022 %>%
dplyr::filter(id %in% unlist(
ms_d_match_surv_cc_oct_2022%>%
dplyr::filter(Tstop<=Tstart) %>%
dplyr::select(id,from,to,trans,Tstart,Tstop,time,status) %>%
distinct(id))) %>%
#dplyr::mutate(diff_bet_treat=fech_ing_next_treat-fech_egres_num)%>%
nrow()>0){stop("There are cases where Tstop-Tstart <=0")}
if(ms_d_match_surv_cc %>%
dplyr::filter(is.infinite(time)) %>% nrow()>0){stop("Error: there are infinite cases")}
if(ms_d_match_surv_cc_oct_2022 %>%
dplyr::filter(is.infinite(time)) %>% nrow()>0){stop("Error: there are infinite cases")}
rio::export(
d_match_surv_msprep %>%
dplyr::select(
id, group_match,Readmission_status, Readmission2_status, Readmission3_status, Readmission4_status,
Readmission_time, Readmission2_time, Readmission3_time, Readmission4_time,
tipo_de_plan_res_1,tipo_de_plan_res_2, tipo_de_plan_res_3, tipo_de_plan_res_4,
dias_treat_imp_sin_na_1, dias_treat_imp_sin_na_2, dias_treat_imp_sin_na_3, dias_treat_imp_sin_na_4,
TD_1, TD_2, TD_3, TD_4, DWCA_1, DWCA_2, DWCA_3, DWCA_4,fech_ing_num_1) %>%
dplyr::mutate(cens_time=as.numeric(as.Date("2019-11-13"))-fech_ing_num_1),
paste0(sub("/SUD_CL","",getwd()),"/_mult_state_ags/five_st_msprep_apr22.dta"))
rio::export(
d_match_surv_msprep %>%
rename_with(~ c("group.match","Readmission.status", "Readmission2.status", "Readmission3.status", "Readmission4.status",
"Readmission.time", "Readmission2.time", "Readmission3.time", "Readmission4.time"), c("group_match", "Readmission_status", "Readmission2_status", "Readmission3_status", "Readmission4_status",
"Readmission_time", "Readmission2_time", "Readmission3_time","Readmission4_time")) %>%
dplyr::select(
id, group.match,Readmission.status, Readmission2.status, Readmission3.status, Readmission4.status,
Readmission.time, Readmission2.time, Readmission3.time, Readmission4.time,
tipo_de_plan_res_1,tipo_de_plan_res_2, tipo_de_plan_res_3, tipo_de_plan_res_4,
dias_treat_imp_sin_na_1, dias_treat_imp_sin_na_2, dias_treat_imp_sin_na_3, dias_treat_imp_sin_na_4,
TD_1, TD_2, TD_3, TD_4, DWCA_1, DWCA_2, DWCA_3, DWCA_4,fech_ing_num_1
)%>%
dplyr::mutate(cens_time=as.numeric(as.Date("2019-11-13"))-fech_ing_num_1),
paste0(sub("/SUD_CL","",getwd()),"/_mult_state_ags/five_st_msprep_apr22.csv"))
##cc
rio::export(
d_match_surv_msprep_cc %>%
dplyr::select(
id, group_match,Readmission_status, Readmission2_status, Readmission3_status, Readmission4_status,
Readmission_time, Readmission2_time, Readmission3_time, Readmission4_time,
tipo_de_plan_res_1,tipo_de_plan_res_2, tipo_de_plan_res_3, tipo_de_plan_res_4,
dias_treat_imp_sin_na_1, dias_treat_imp_sin_na_2, dias_treat_imp_sin_na_3, dias_treat_imp_sin_na_4,
TD_1, TD_2, TD_3, TD_4, DWCA_1, DWCA_2, DWCA_3, DWCA_4,fech_ing_num_1)%>%
dplyr::mutate(cens_time=as.numeric(as.Date("2019-11-13"))-fech_ing_num_1),
paste0(sub("/SUD_CL","",getwd()),"/_mult_state_ags/five_st_msprep_apr22_cc.dta"))
rio::export(
d_match_surv_msprep_cc %>%
rename_with(~ c("group.match","Readmission.status", "Readmission2.status", "Readmission3.status", "Readmission4.status",
"Readmission.time", "Readmission2.time", "Readmission3.time", "Readmission4.time"), c("group_match", "Readmission_status", "Readmission2_status", "Readmission3_status", "Readmission4_status",
"Readmission_time", "Readmission2_time", "Readmission3_time","Readmission4_time")) %>%
dplyr::select(
id, group.match,Readmission.status, Readmission2.status, Readmission3.status, Readmission4.status,
Readmission.time, Readmission2.time, Readmission3.time, Readmission4.time,
tipo_de_plan_res_1,tipo_de_plan_res_2, tipo_de_plan_res_3, tipo_de_plan_res_4,
dias_treat_imp_sin_na_1, dias_treat_imp_sin_na_2, dias_treat_imp_sin_na_3, dias_treat_imp_sin_na_4,
TD_1, TD_2, TD_3, TD_4, DWCA_1, DWCA_2, DWCA_3, DWCA_4,fech_ing_num_1
)%>%
dplyr::mutate(cens_time=as.numeric(as.Date("2019-11-13"))-fech_ing_num_1),
paste0(sub("/SUD_CL","",getwd()),"/_mult_state_ags/five_st_msprep_apr22_cc.csv"))
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
##OCT 2022
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
rio::export(
d_match_surv_msprep_oct_2022,
paste0(sub("/SUD_CL","",getwd()),"/_mult_state_ags/nine_st_msprep_oct22.dta"))
rio::export(
d_match_surv_msprep_oct_2022 %>%
select_all(~gsub("\\_", ".", tolower(.))),
paste0(sub("/SUD_CL","",getwd()),"/_mult_state_ags/nine_st_msprep_oct22.csv"))
rio::export(
d_match_surv_msprep_cc_oct_2022,
paste0(sub("/SUD_CL","",getwd()),"/_mult_state_ags/nine_st_msprep_oct22_cc.dta"))
rio::export(
d_match_surv_msprep_cc_oct_2022 %>%
select_all(~gsub("\\_", ".", tolower(.))), paste0(sub("/SUD_CL","",getwd()),"/_mult_state_ags/nine_st_msprep_oct22_cc.csv"))
tab9_f<-
data.frame(mstate::events(ms_d_match_surv)$Frequencies) %>%
dplyr::filter(to!="total entering") %>%
left_join(data.frame(events(ms_d_match_surv)$Proportions), by=c("from", "to")) %>%
dplyr::rename("Frequencies"="Freq.x", "Proportions"="Freq.y") %>%
dplyr::arrange(from, to) %>%
dplyr::mutate(diff=ifelse(as.character(from)!=as.character(to),0,1)) %>%
dplyr::filter(diff==0) %>%
dplyr::select(-diff) %>%
dplyr::mutate(comb=paste0(from,"_",to)) %>%
dplyr::filter(comb %in% c("Admission_Readmission", "Readmission_Readmission2","Readmission2_Readmission3","Readmission3_Readmission4","Readmission4_Readmission5")) %>%
dplyr::select(-comb) %>%
dplyr::mutate(Proportions=scales::percent(Proportions))
tab9_f %>%
dplyr::left_join(data.frame(mstate::events(ms_d_match_surv)$Frequencies) %>%
dplyr::filter(to=="total entering") %>% dplyr::select(from,Freq),by="from") %>%
dplyr::select(from, to, Frequencies, Freq, Proportions)%>%
dplyr::rename("Total"="Freq") %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 9b. Empirical State Transition Matrix, Recurrent Events Model",
align= c("c",rep('c', 5)))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11)%>%
kableExtra::add_footnote("Note. No event describes cases that remained in the state. Percentage depicts the proportion of the state of origin.") %>%
kableExtra::kable_classic() %>%
kableExtra::scroll_box(width = "100%", height = "350px")
Table 9b. Empirical State Transition Matrix, Recurrent Events Model
from
|
to
|
Frequencies
|
Total
|
Proportions
|
Admission
|
Readmission
|
6,370
|
22,452
|
28.37%
|
Readmission
|
Readmission2
|
1,991
|
6,370
|
31.26%
|
Readmission2
|
Readmission3
|
635
|
1,991
|
31.89%
|
Readmission3
|
Readmission4
|
200
|
635
|
31.50%
|
a Note. No event describes cases that remained in the state.
Percentage depicts the proportion of the state of origin.
|
tab9_fb<-
data.frame(mstate::events(ms_d_match_surv_oct_2022)$Frequencies) %>%
dplyr::filter(to!="total entering") %>%
left_join(data.frame(events(ms_d_match_surv_oct_2022)$Proportions), by=c("from", "to")) %>%
dplyr::rename("Frequencies"="Freq.x", "Proportions"="Freq.y") %>%
dplyr::arrange(from, to) %>%
dplyr::mutate(diff=ifelse(as.character(from)!=as.character(to),0,1)) %>%
dplyr::filter(diff==0) %>%
dplyr::select(-diff) %>%
dplyr::mutate(comb=paste0(from,"_",to)) %>%
dplyr::filter(Frequencies>0) %>%
dplyr::select(-comb) %>%
dplyr::mutate(Proportions=scales::percent(Proportions))
tab9_fb %>%
dplyr::left_join(data.frame(mstate::events(ms_d_match_surv_oct_2022)$Frequencies) %>%
dplyr::filter(to=="total entering") %>% dplyr::select(from,Freq),by="from") %>%
dplyr::select(from, to, Frequencies, Freq, Proportions)%>%
dplyr::rename("Total"="Freq") %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 9b. Empirical State Transition Matrix, Recurrent Events Model (9 states)",
align= c("c",rep('c', 5)))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11)%>%
kableExtra::add_footnote("Note. No event describes cases that remained in the state. Percentage depicts the proportion of the state of origin.") %>%
kableExtra::kable_classic() %>%
kableExtra::scroll_box(width = "100%", height = "350px")
Table 9b. Empirical State Transition Matrix, Recurrent Events Model (9
states)
from
|
to
|
Frequencies
|
Total
|
Proportions
|
Admission
|
TD
|
5,356
|
22,452
|
23.855%
|
Admission
|
DWCA
|
13,067
|
22,452
|
58.200%
|
Admission
|
no event
|
4,029
|
22,452
|
17.945%
|
TD
|
Readmission
|
1,518
|
5,356
|
28.342%
|
TD
|
no event
|
3,838
|
5,356
|
71.658%
|
DWCA
|
Readmissionb
|
4,046
|
13,067
|
30.963%
|
DWCA
|
no event
|
9,021
|
13,067
|
69.037%
|
Readmission
|
Readmission2
|
447
|
1,518
|
29.447%
|
Readmission
|
no event
|
1,071
|
1,518
|
70.553%
|
Readmissionb
|
Readmission2b
|
1,305
|
4,046
|
32.254%
|
Readmissionb
|
no event
|
2,741
|
4,046
|
67.746%
|
Readmission2
|
Readmission3
|
127
|
447
|
28.412%
|
Readmission2
|
no event
|
320
|
447
|
71.588%
|
Readmission2b
|
Readmission3b
|
447
|
1,305
|
34.253%
|
Readmission2b
|
no event
|
858
|
1,305
|
65.747%
|
Readmission3
|
no event
|
127
|
127
|
100.000%
|
Readmission3b
|
no event
|
447
|
447
|
100.000%
|
a Note. No event describes cases that remained in the state.
Percentage depicts the proportion of the state of origin.
|
ms_d_match_surv_res<-
#El arrival y el número al lado del arrival repreenta el número de la transición
#mstate::expand.covs(ms_d_match_surv, "arrival", append = TRUE, longnames =F) %>%
ms_d_match_surv %>%
data.frame() %>%
dplyr::mutate(tipo_de_plan_res=dplyr::case_when(tipo_de_plan_res_1==1 & trans==1~1,
tipo_de_plan_res_2==1 & trans==2~1,
tipo_de_plan_res_3==1 & trans==3~1,
tipo_de_plan_res_4==1 & trans==4~1,
T~0)) %>%
dplyr::mutate(TD=dplyr::case_when(TD_1==1 & trans==1~1,
TD_2==1 & trans==2~1,
TD_3==1 & trans==3~1,
TD_4==1 & trans==4~1,
T~0)) %>%
dplyr::mutate(TD=dplyr::case_when(TD_1==1 & trans==1~1,
TD_2==1 & trans==2~1,
TD_3==1 & trans==3~1,
TD_4==1 & trans==4~1,
T~0)) %>%
dplyr::mutate(days_treated=dplyr::case_when(!is.na(dias_treat_imp_sin_na_1) & trans==1~dias_treat_imp_sin_na_1,
!is.na(dias_treat_imp_sin_na_2) & trans==2~dias_treat_imp_sin_na_2,
!is.na(dias_treat_imp_sin_na_3) & trans==3~dias_treat_imp_sin_na_3,
!is.na(dias_treat_imp_sin_na_4) & trans==4~dias_treat_imp_sin_na_4,
T~NA_real_))
ms_d_match_surv$tipo_de_plan_res<-ms_d_match_surv_res$tipo_de_plan_res
ms_d_match_surv$TD<-ms_d_match_surv_res$TD
ms_d_match_surv$days_treated<-ms_d_match_surv_res$days_treated
ms_d_match_surv_oct_2022$days_treated<-ms_d_match_surv_oct_2022$Tstart
Consideration of the Appropriateness of the proportional hazards
assumption
Continuous variables need to be categorized into groups. The plot
described is also known as the log(−log(survival)) plot, as the
cumulative hazard is equal to the negative logarithm of the survival
proportion. This approach requires a subjective assessment (Bradburn,
Clark, Love, et al., 2003).
#Bradburn, M., Clark, T., Love, S. et al. Survival Analysis Part III: Multivariate data analysis – choosing a model and assessing its adequacy and fit. Br J Cancer 89, 605–611 (2003). https://doi.org/10.1038/sj.bjc.6601120
plots<- data.frame(title=rep(
c("Admission to\nReadmission", "Readmission to\nSecond Readmission", "Second to\nThird Readmission", "Third to\nFourth Readmission" ),1),trans=rep(1:max(trans_matrix,na.rm=T),1))
## SIN COVARIABLES
layout(matrix(1:4, nc = 2, byrow = F))
for(i in c(1:max(trans_matrix,na.rm=T))){
plot(log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==0))$time),
log(-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==0))$surv)), type="l",
xlab="log(Days)", ylab="", xaxs="i",yaxs="i",
las=1,cex.lab=.8, cex.axis=.8)
lines(log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==1))$time),
log(-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==1))$surv)), lty=2)
legend(6,-4, c("AMBULATORY", "RESIDENTIAL"), bty="n", lty=c(2,1), cex=.8)
title(main=paste0(plots[i,"title"]), cex.main=1.2)
}
if(no_mostrar==1){
jpeg(paste0(gsub("SUD_CL/Matching_Process_APR_22.Rmd","",path),"_mult_state_ags/ph_test1.jpg"), height=7, width= 13, res= 320, units = "in")
layout(matrix(1:4, nc = 2, byrow = F))
for(i in c(1:max(trans_matrix,na.rm=T))){
plot(log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==0))$time),
log(-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==0))$surv)), type="l",
xlab="log(Days)", ylab="", xaxs="i",yaxs="i",
las=1,cex.lab=.8, cex.axis=.8)
lines(log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==1))$time),
log(-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==1))$surv)), lty=2)
legend(6,-4, c("AMBULATORY", "RESIDENTIAL"), bty="n", lty=c(2,1), cex=.8)
title(main=paste0(plots[i,"title"]), cex.main=1.2)
}
dev.off()
}
plots2<- data.frame(title=rep(
c("Admission to\nTr. Completion", "Admission to\nTr. Non-completion", "Completion to \nReadmission", "Non-completion to \nReadmission", "Readmission to\nSecond Readmission (TC)", "Readmission to\nSecond Readmission (TNC)", "Second to\nThird Readmission (TC)", "Second to\nThird Readmission (TNC)" ),1),trans=rep(1:max(trans_matrix,na.rm=T),1))
## SIN COVARIABLES
layout(matrix(1:8, nc = 2, byrow = F))
for(i in c(1:max(trans_matrix2,na.rm=T))){
plot(log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==0))$time),
log(-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==0))$surv)), type="l",
xlab="log(Days)", ylab="", xaxs="i",yaxs="i",
las=1,cex.lab=.8, cex.axis=.8)
lines(log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==1))$time),
log(-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==1))$surv)), lty=2)
legend(6,-2, c("AMBULATORY", "RESIDENTIAL"), bty="n", lty=c(2,1), cex=.8)
title(main=paste0(plots2[i,"title"]), cex.main=1.2)
}
if(no_mostrar==1){
jpeg(paste0(gsub("SUD_CL/Matching_Process_APR_22.Rmd","",path),"/_mult_state_ags/ph_test1_9s.jpg"), height=7, width= 13, res= 320, units = "in")
layout(matrix(1:8, nc = 2, byrow = F))
for(i in c(1:max(trans_matrix2,na.rm=T))){
plot(log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==0))$time),
log(-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==0))$surv)), type="l",
xlab="log(Days)", ylab="", xaxs="i",yaxs="i",
las=1,cex.lab=.8, cex.axis=.8)
lines(log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==1))$time),
log(-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==1))$surv)), lty=2)
legend(6,-2, c("AMBULATORY", "RESIDENTIAL"), bty="n", lty=c(2,1), cex=.8)
title(main=paste0(plots2[i,"title"]), cex.main=1.2)
}
dev.off()
}
layout(matrix(1:4, nc = 2, byrow = F))
for(i in c(1:max(trans_matrix,na.rm=T))){
plot(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==0))$time,
-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==0))$surv), type="l",
xlab="Days", ylab="", xaxs="i",yaxs="i",
las=1,cex.lab=.8, cex.axis=.8, col=1)
lines(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==1))$time,
-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==1))$surv), lty=2)
legend(2000,.2, c("AMBULATORY", "RESIDENTIAL"), bty="n", lty=c(2,1), cex=.8)
title(main=paste0(plots[i,"title"]), cex.main=1.2)
}
if(no_mostrar==1){
jpeg(paste0(gsub("SUD_CL/Matching_Process1_APR_22.Rmd","",path),"/_mult_state_ags/ph_test2.jpg"), height=7, width= 13, res= 320, units = "in")
layout(matrix(1:4, nc = 2, byrow = F))
for(i in c(1:max(trans_matrix,na.rm=T))){
plot(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==0))$time,
-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==0))$surv), type="l",
xlab="Days", ylab="", xaxs="i",yaxs="i",
las=1,cex.lab=.8, cex.axis=.8, col=1)
lines(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==1))$time,
-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==1))$surv), lty=2)
legend(2000,.2, c("AMBULATORY", "RESIDENTIAL"), bty="n", lty=c(2,1), cex=.8)
title(main=paste0(plots[i,"title"]), cex.main=1.2)
}
dev.off()
}
layout(matrix(1:8, nc = 2, byrow = F))
for(i in c(1:max(trans_matrix2,na.rm=T))){
plot(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==0))$time,
-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==0))$surv), type="l",
xlab="Days", ylab="", xaxs="i",yaxs="i",
las=1,cex.lab=.8, cex.axis=.8, col=1)
lines(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==1))$time,
-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==1))$surv), lty=2)
legend(2000,.2, c("AMBULATORY", "RESIDENTIAL"), bty="n", lty=c(2,1), cex=.8)
title(main=paste0(plots2[i,"title"]), cex.main=1.2)
}
if(no_mostrar==1){
jpeg(paste0(gsub("SUD_CL/Matching_Process_APR_22.Rmd","",path),"/_mult_state_ags/ph_test2_9s.jpg"), height=13, width= 7, res= 740, units = "in")
layout(matrix(1:8, nc = 2, byrow = F))
for(i in c(1:max(trans_matrix2,na.rm=T))){
plot(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==0))$time,
-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==0))$surv), type="l",
xlab="Days", ylab="", xaxs="i",yaxs="i",
las=1,cex.lab=.8, cex.axis=.8, col=1)
lines(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==1))$time,
-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==1))$surv), lty=2)
legend(2000,.2, c("AMBULATORY", "RESIDENTIAL"), bty="n", lty=c(2,1), cex=.8)
title(main=paste0(plots2[i,"title"]), cex.main=1.2)
}
dev.off()
}
cox_zph_trans<-data.frame()#rn="0",chisq=1,df=1,p=1
for(i in c(1:max(trans_matrix,na.rm=T))){
cox_zph_trans<-dplyr::bind_rows(cox_zph_trans,cbind.data.frame(trans=i,
data.table::data.table(cox.zph(coxph(Surv(Tstart,Tstop,status)~factor(tipo_de_plan_res_1)+Tstart,
data=subset(ms_d_match_surv_res, trans==i),method = "breslow"))$table, keep.rownames = T))
)
}
cox_zph_trans$p<-round(cox_zph_trans$p,3)
cox_zph_trans$chisq<-round(cox_zph_trans$chisq,3)
cox_zph_trans_cont<-data.frame()
for(i in c(1:max(trans_matrix,na.rm=T))){
cox_zph_trans_cont<-dplyr::bind_rows(cox_zph_trans_cont,cbind.data.frame(trans=i,
data.table::data.table(cox.zph(coxph(Surv(Tstart,Tstop,status)~factor(tipo_de_plan_res_1)+ factor(TD_1)+ factor(TD_2)+ factor(TD_3)+ factor(TD_4)+Tstart,data=subset(ms_d_match_surv_res, trans==i),method = "breslow"))$table, keep.rownames = T)))
}
cox_zph_trans_cont$p<-round(cox_zph_trans_cont$p,3)
cox_zph_trans_cont %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 10a. Schoenfeld's global test to test the proportional hazards assumption",
align= c("c",rep('c', 5)))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 12)%>%
kableExtra::kable_classic() %>%
kableExtra::group_rows("$1^{st}$ transition",1,6, escape=T) %>%
kableExtra::group_rows("$2^{nd}$ transition",7,13, escape=T) %>%
kableExtra::group_rows("$3^{rd}$ transition",14,20, escape=T) %>%
kableExtra::group_rows("$4^{th}$ transition",21,27, escape=T) %>%
kableExtra::scroll_box(width = "100%", height = "350px")
Table 10a. Schoenfeld’s global test to test the proportional hazards
assumption
trans
|
rn
|
chisq
|
df
|
p
|
\(1^{st}\) transition
|
1
|
factor(tipo_de_plan_res_1)
|
0.0976935
|
1
|
0.755
|
1
|
factor(TD_1)
|
26.9107013
|
1
|
0.000
|
1
|
factor(TD_2)
|
0.0009311
|
1
|
0.976
|
1
|
factor(TD_3)
|
0.7297580
|
1
|
0.393
|
1
|
factor(TD_4)
|
0.2136621
|
1
|
0.644
|
1
|
GLOBAL
|
29.3191134
|
5
|
0.000
|
\(2^{nd}\) transition
|
2
|
factor(tipo_de_plan_res_1)
|
0.7068110
|
1
|
0.401
|
2
|
factor(TD_1)
|
2.7227294
|
1
|
0.099
|
2
|
factor(TD_2)
|
6.9103154
|
1
|
0.009
|
2
|
factor(TD_3)
|
0.6363786
|
1
|
0.425
|
2
|
factor(TD_4)
|
0.3301789
|
1
|
0.566
|
2
|
Tstart
|
20.1078288
|
1
|
0.000
|
2
|
GLOBAL
|
29.1031821
|
6
|
0.000
|
\(3^{rd}\) transition
|
3
|
factor(tipo_de_plan_res_1)
|
0.0009246
|
1
|
0.976
|
3
|
factor(TD_1)
|
0.2123946
|
1
|
0.645
|
3
|
factor(TD_2)
|
0.0027750
|
1
|
0.958
|
3
|
factor(TD_3)
|
2.5872079
|
1
|
0.108
|
3
|
factor(TD_4)
|
0.4839282
|
1
|
0.487
|
3
|
Tstart
|
4.0671924
|
1
|
0.044
|
3
|
GLOBAL
|
7.1687374
|
6
|
0.306
|
\(4^{th}\) transition
|
4
|
factor(tipo_de_plan_res_1)
|
0.0292211
|
1
|
0.864
|
4
|
factor(TD_1)
|
0.8459203
|
1
|
0.358
|
4
|
factor(TD_2)
|
0.0041408
|
1
|
0.949
|
4
|
factor(TD_3)
|
2.1742646
|
1
|
0.140
|
4
|
factor(TD_4)
|
12.0116147
|
1
|
0.001
|
4
|
Tstart
|
0.8503446
|
1
|
0.356
|
4
|
GLOBAL
|
13.9760848
|
6
|
0.030
|
As seen in both Figures above, the cumulative hazards does not follow
a proportional trend in the four transitions.
cox_zph_trans2<-data.frame()#rn="0",chisq=1,df=1,p=1
for(i in c(1:max(trans_matrix2,na.rm=T))){
cox_zph_trans2<-dplyr::bind_rows(cox_zph_trans2,cbind.data.frame(trans=i,data.table::data.table(cox.zph(coxph(Surv(Tstart,Tstop,status)~factor(tipo_de_plan_res_1)+Tstart, data=subset(ms_d_match_surv_oct_2022, trans==i),method = "breslow"))$table, keep.rownames = T))
)
}
cox_zph_trans2$p<-round(cox_zph_trans2$p,3)
cox_zph_trans2$chisq<-round(cox_zph_trans2$chisq,3)
cox_zph_trans_cont2<-data.frame()
for(i in c(1:max(trans_matrix2,na.rm=T))){
cox_zph_trans_cont2<-dplyr::bind_rows(cox_zph_trans_cont2,cbind.data.frame(trans=i,
data.table::data.table(cox.zph(coxph(Surv(Tstart,Tstop,status)~factor(tipo_de_plan_res_1)+Tstart,data=subset(ms_d_match_surv_oct_2022, trans==i),method = "breslow"))$table, keep.rownames = T)))
}
cox_zph_trans_cont2$p<-round(cox_zph_trans_cont2$p,3)
cox_zph_trans_cont2 %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 10b. Schoenfeld's global test to test the proportional hazards assumption",
align= c("c",rep('c', 5)))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 12)%>%
kableExtra::kable_classic() %>%
kableExtra::group_rows("$1^{st}$ transition",1,2, escape=T) %>%
kableExtra::group_rows("$2^{nd}$ transition",3,4, escape=T) %>%
kableExtra::group_rows("$3^{rd}$ transition",5,7, escape=T) %>%
kableExtra::group_rows("$4^{th}$ transition",8,10, escape=T) %>%
kableExtra::group_rows("$5^{th}$ transition",11,13, escape=T) %>%
kableExtra::group_rows("$6^{th}$ transition",14,16, escape=T) %>%
kableExtra::group_rows("$7^{th}$ transition",17,19, escape=T) %>%
kableExtra::group_rows("$8^{th}$ transition",20,22, escape=T) %>%
kableExtra::scroll_box(width = "100%", height = "350px")
Table 10b. Schoenfeld’s global test to test the proportional hazards
assumption
trans
|
rn
|
chisq
|
df
|
p
|
\(1^{st}\) transition
|
1
|
factor(tipo_de_plan_res_1)
|
189.1263234
|
1
|
0.000
|
1
|
GLOBAL
|
189.1263234
|
1
|
0.000
|
\(2^{nd}\) transition
|
2
|
factor(tipo_de_plan_res_1)
|
294.5367633
|
1
|
0.000
|
2
|
GLOBAL
|
294.5367633
|
1
|
0.000
|
\(3^{rd}\) transition
|
3
|
factor(tipo_de_plan_res_1)
|
3.1697499
|
1
|
0.075
|
3
|
Tstart
|
33.5573442
|
1
|
0.000
|
3
|
GLOBAL
|
42.0270616
|
2
|
0.000
|
\(4^{th}\) transition
|
4
|
factor(tipo_de_plan_res_1)
|
33.4769197
|
1
|
0.000
|
4
|
Tstart
|
146.2360490
|
1
|
0.000
|
4
|
GLOBAL
|
196.0367419
|
2
|
0.000
|
\(5^{th}\) transition
|
5
|
factor(tipo_de_plan_res_1)
|
0.2108743
|
1
|
0.646
|
5
|
Tstart
|
1.0882952
|
1
|
0.297
|
5
|
GLOBAL
|
1.3876374
|
2
|
0.500
|
\(6^{th}\) transition
|
6
|
factor(tipo_de_plan_res_1)
|
0.0429944
|
1
|
0.836
|
6
|
Tstart
|
4.9996251
|
1
|
0.025
|
6
|
GLOBAL
|
5.0006658
|
2
|
0.082
|
\(7^{th}\) transition
|
7
|
factor(tipo_de_plan_res_1)
|
0.5451631
|
1
|
0.460
|
7
|
Tstart
|
0.2881151
|
1
|
0.591
|
7
|
GLOBAL
|
0.8216015
|
2
|
0.663
|
\(8^{th}\) transition
|
8
|
factor(tipo_de_plan_res_1)
|
1.4744209
|
1
|
0.225
|
8
|
Tstart
|
4.7583466
|
1
|
0.029
|
8
|
GLOBAL
|
5.9273888
|
2
|
0.052
|