DAG

library(DiagrammeR) 

gr1<-
DiagrammeR::grViz("
digraph causal {

# Nodes
  node [shape = plaintext]
  a [label = 'Observed\nConfounders\n(Z)',fontsize=10]
  b [label = 'Unobserved\nConfounders\n(U)',fontsize=10]
  c [label = 'Early\nDrop-out\n(Y)',fontsize=10]
  d [label = 'Residential\nPrograms\n(X)',fontsize=10]

# Edges
  edge [color = black,
        arrowhead = vee]
  rankdir = TB;
  
  b -> c 
  b -> a 
  a -> c  

  d -> c [minlen=1]
  d -> a [minlen=1]
  
 # a -> S #[minlen=1]
 # Z -> S #[minlen=1]
  
#  a -> C #[minlen=3]
#  Z -> C #[minlen=3]
  { rank = same; b; a; c }
# { rank = same; S; C }
  { rankdir = LR; a; d }

# Graph
  graph [overlap = true]
}")
gr1

Figure 1. Directed Acyclic Graph

#  {rank=same ; A -> B -> C -> D};
#       {rank=same ;           F -> E[dir=back]};
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3733703/
#Cohort matching on a variable associated with both outcome and censoring
#Cohort matching on a confounder. We let A denote an exposure, Y denote an outcome, and C denote a confounder and matching variable. The variable S indicates whether an individual in the source population is selected for the matched study (1: selected, 0: not selected). See Section 2-7 for details.
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7064555/
gr2<-
DiagrammeR::grViz("
digraph causal {

  # Nodes
  node [shape = plaintext]
  a [label = 'Residential\nPrograms\n(X)',fontsize=10]
  b [label = 'Unobserved\nConfounders\n(U)',fontsize=10]
  c [label = 'Early\nDrop-out\n(Y)',fontsize=10]
  d [label = 'Observed\nConfounders\n(Z)',fontsize=10]

  # Edges
  edge [color = black,
        arrowhead = vee]
  rankdir = TB
  a -> c [minlen=3]
  d -> a [minlen=3]
  d -> c [minlen=9]
  
  b -> a [minlen=1]
  b -> c
  
{ rank = same; c; d }
#{ rank = same; b; d }
  rankdir = TB
{ rank = same; d; c } #Ver si lo saco, creo que da problemas
  
  # Graph
  graph [overlap = true]
}")#LR

Balance

We selected treatments at baseline for each user, leaving 85,048 observations. Then, we distinguished between residential (n= 12,706) and ambulatory (n= 72,267) treatments. We imputed cases that did not have a defined treatment assigned (n=75).


We selected the following variables of interest:

  • “Primary Substance at Admission” (sus_principal_mod)
  • “First Substance Used” (sus_ini_mod_mvv)
  • “Marital Status” (estado_conyugal_2)
  • “Educational Attainment” (escolaridad_rec)
  • “Substance Use Onset Age” (edad_ini_cons)
  • “Primary Substance at Admission Usage Frequency” (freq_cons_sus_prin)
  • “Treatment Admission Motive” (origen_ingreso_mod)
  • “Psychiatric comorbidity” (dg_cie_10_rec)
  • “Drug Dependence Diagnosis” (dg_trs_cons_sus_or)
  • “Regional Location of Center” (nombre_region)
  • “Type of Center (Public)” (tipo_centro_pub)
  • “Sex” (sexo_2)
  • “Admission Age” (edad_al_ing)
  • “Admission Date” (fech_ing_num)
  • “Evaluation of the Therapeutic Process” (*) (evaluacindelprocesoteraputico)
  • “Early Dropout (Against Staff Advice)” (abandono_temprano_rec) (Y)
  • “Residential Setting” (tipo_de_plan_res) (Z)
  • “Tenure status of household” (tenencia_de_la_vivienda_mod) (*)
  • “Employment Status” (condicion_ocupacional_corr)


match.on_tot <- c("row", "hash_key","sus_principal_mod","sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","edad_ini_cons","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","nombre_region","tipo_centro_pub","sexo_2","edad_al_ing","fech_ing_num","abandono_temprano_rec","tipo_de_plan_res","duplicates_filtered","dg_trs_cons_sus_or","evaluacindelprocesoteraputico", "tenencia_de_la_vivienda_mod", "condicion_ocupacional_corr")
#dg_trs_cons_sus_or

CONS_C1_df_dup_SEP_2020_match<-
  CONS_C1_df_dup_SEP_2020 %>% 
  dplyr::filter(dup==1) %>% #, tipo_de_plan_2 %in% c("PG-PR","M-PR","PG-PAI","M-PAI","PG-PAB","M-PAB")
  dplyr::mutate(tipo_de_plan_res=dplyr::case_when(grepl("PR",as.character(tipo_de_plan_2))~1,
                                                  grepl("PAI",as.character(tipo_de_plan_2))~0,
                                                  grepl("PAB",as.character(tipo_de_plan_2))~0,
                                                  TRUE~NA_real_)) %>% 
  dplyr::mutate(tipo_de_plan_res=factor(tipo_de_plan_res)) %>% 
  dplyr::mutate(abandono_temprano_rec=factor(if_else(as.character(motivodeegreso_mod_imp)=="Early Drop-out",TRUE,FALSE,NA))) %>% 
  dplyr::mutate(dg_trs_cons_sus_or=factor(if_else(as.character(dg_trs_cons_sus_or)=="Drug dependence",TRUE,FALSE,NA))) %>% 
  dplyr::mutate(tipo_centro_pub=factor(if_else(as.character(tipo_centro)=="Public",TRUE,FALSE,NA))) %>% 
  dplyr::mutate(condicion_ocupacional_corr=factor(condicion_ocupacional_corr),cat_ocupacional_corr=factor(cat_ocupacional_corr)) %>% 
  dplyr::mutate(dg_trs_fis_rec=factor(dplyr::case_when(as.character(diagnostico_trs_fisico)=="En estudio"~"Diagnosis unknown (under study)",as.character(diagnostico_trs_fisico)=="Sin trastorno"~'Without physical comorbidity',cnt_diagnostico_trs_fisico>0 ~'With physical comorbidity',
                                             TRUE~NA_character_)))%>%
    dplyr::mutate(escolaridad_rec=parse_factor(as.character(escolaridad_rec),levels=c('3-Completed primary school or less', '2-Completed high school or less', '1-More than high school'), ordered=T,trim_ws=T,include_na =F, locale=locale(encoding = "Latin1"))) %>%   
dplyr::mutate(freq_cons_sus_prin=parse_factor(as.character(freq_cons_sus_prin),levels=c('Did not use', 'Less than 1 day a week','2 to 3 days a week','4 to 6 days a week','1 day a week or more','Daily'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "UTF-8"))) %>% 
  dplyr::mutate(evaluacindelprocesoteraputico=dplyr::case_when(grepl("1",as.character(evaluacindelprocesoteraputico))~'1-High Achievement',grepl("2",as.character(evaluacindelprocesoteraputico))~'2-Medium Achievement',grepl("3",as.character(evaluacindelprocesoteraputico))~'3-Minimum Achievement', TRUE~as.character(evaluacindelprocesoteraputico))) %>% 
  dplyr::mutate(evaluacindelprocesoteraputico=parse_factor(as.character(evaluacindelprocesoteraputico),levels=c('1-High Achievement', '2-Medium Achievement','3-Minimum Achievement'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "UTF-8"))) %>% 
    dplyr::mutate(tenencia_de_la_vivienda_mod=
                  factor(dplyr::case_when(tenencia_de_la_vivienda_mod=="Allegado"~"Stays temporarily with a relative",
                                 tenencia_de_la_vivienda_mod=="Arrienda"~"Renting",
                                 tenencia_de_la_vivienda_mod=="Cedida"~"Owner/Transferred dwellings/Pays Dividends",
                                 tenencia_de_la_vivienda_mod=="Ocupación Irregular"~"Illegal Settlement",
                                 tenencia_de_la_vivienda_mod=="Otros"~"Others",
                                 tenencia_de_la_vivienda_mod=="Paga dividendo"~"Owner/Transferred dwellings/Pays Dividends",
                                 tenencia_de_la_vivienda_mod=="Propia"~"Owner/Transferred dwellings/Pays Dividends",
                                 T~NA_character_))) %>% 
  dplyr::select_(.dots = match.on_tot) %>% 
  dplyr::mutate(more_one_treat=factor(ifelse(duplicates_filtered>1,1,0))) %>% 
  #APRIL 2022
  dplyr::mutate(freq_cons_sus_prin=dplyr::case_when(freq_cons_sus_prin=="1 day a week or less"~"1 day a week or more",T~as.character(freq_cons_sus_prin))) %>% 
  dplyr::mutate(freq_cons_sus_prin=ordered(freq_cons_sus_prin,levels=c("Did not use", "Less than 1 day a week", "1 day a week or more", "2 to 3 days a week","4 to 6 days a week", "Daily"))) %>%   
  data.table::data.table()
## Warning: `select_()` was deprecated in dplyr 0.7.0.
## Please use `select()` instead.
#CONS_C1_df_dup_SEP_2020_match %>% 
  #dplyr::group_by(dg_trs_fis) %>% dplyr::summarise(q1=quantile(dias_treat_imp_sin_na,.25),q2=quantile(dias_treat_imp_sin_na,.5),q3=quantile(dias_treat_imp_sin_na,.75)) ---> las distribuciones por días de tratamiento de las categorías de respuesta tienden a ser bastante similares, aunquequienes tienen una comorbiliad física definida tienen más tiempo en el estudio.
invisible("La diferencia en días de tratamiento entre las categorías de enfermedad psiquiátrica, indica que quienes se encuentran en estudio tienen muchos menos días en tratamiento que quienes no tienen una comorbilidad o quienes tienen una definida. No es lo mismo con el caso de la enfermedad física, en donde tienden a ser bastante similares")

invisible("Decidí no incluir diagnóstico de enferemedad física, porque hay algunas condiciones que son crónicas o que pueden serlo, y que no tengo cómo validarlas a lo largo del tratamiento")
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

attr(CONS_C1_df_dup_SEP_2020_match$sus_principal_mod,"label")<-"Primary Substance at Admission"
attr(CONS_C1_df_dup_SEP_2020_match$sus_ini_mod_mvv,"label")<-"First Substance Used"
attr(CONS_C1_df_dup_SEP_2020_match$estado_conyugal_2,"label")<-"Marital Status"
attr(CONS_C1_df_dup_SEP_2020_match$escolaridad_rec,"label")<-"Educational Attainment"
attr(CONS_C1_df_dup_SEP_2020_match$edad_ini_cons,"label")<-"Substance Use Onset Age"
attr(CONS_C1_df_dup_SEP_2020_match$freq_cons_sus_prin,"label")<-"Primary Substance at Admission Usage Frequency"
attr(CONS_C1_df_dup_SEP_2020_match$origen_ingreso_mod,"label")<-"Treatment Admission Motive"
attr(CONS_C1_df_dup_SEP_2020_match$dg_cie_10_rec,"label")<-"Psychiatric co-morbidity"
attr(CONS_C1_df_dup_SEP_2020_match$nombre_region,"label")<-"Regional Location of Center"
attr(CONS_C1_df_dup_SEP_2020_match$tipo_centro_pub,"label")<-"Type of Center (Public)"
attr(CONS_C1_df_dup_SEP_2020_match$sexo_2,"label")<-"Sex"
attr(CONS_C1_df_dup_SEP_2020_match$edad_al_ing,"label")<-"Admission Age"
attr(CONS_C1_df_dup_SEP_2020_match$fech_ing_num,"label")<-"Admission Date"
attr(CONS_C1_df_dup_SEP_2020_match$abandono_temprano_rec,"label")<-"Early Dropout"
attr(CONS_C1_df_dup_SEP_2020_match$tipo_de_plan_res,"label")<-"Residential Setting"
attr(CONS_C1_df_dup_SEP_2020_match$duplicates_filtered,"label")<-"No. of Treatments in the Database"
attr(CONS_C1_df_dup_SEP_2020_match$dg_trs_cons_sus_or,"label")<-"Drug Dependence Diagnosis"
attr(CONS_C1_df_dup_SEP_2020_match$evaluacindelprocesoteraputico,"label")<-"Evaluation of the Therapeutic Process"
attr(CONS_C1_df_dup_SEP_2020_match$condicion_ocupacional_corr,"label")<-"Employment Status"
attr(CONS_C1_df_dup_SEP_2020_match$tenencia_de_la_vivienda_mod,"label")<-"Tenure status of household"


knitr::opts_chunk$set(warning=FALSE, message=FALSE)

table1_all <- suppressWarnings(compareGroups(tipo_de_plan_res ~ sus_principal_mod+ sus_ini_mod_mvv+ estado_conyugal_2+ escolaridad_rec+ edad_ini_cons+ freq_cons_sus_prin+ origen_ingreso_mod+ dg_cie_10_rec+ nombre_region+ tipo_centro_pub+ sexo_2+ dg_trs_cons_sus_or+ edad_al_ing+ fech_ing_num+ abandono_temprano_rec+ duplicates_filtered+ dg_trs_cons_sus_or+ evaluacindelprocesoteraputico+ condicion_ocupacional_corr+ tenencia_de_la_vivienda_mod, method= c(
                                            sus_principal_mod=3,
                                            sus_ini_mod_mvv=3,
                                            estado_conyugal_2=3,
                                            escolaridad_rec=3,
                                            edad_ini_cons=3,
                                            freq_cons_sus_prin=3,
                                            origen_ingreso_mod=3,
                                            dg_cie_10_rec=3,
                                            dg_trs_cons_sus_or=3,
                                            nombre_region=3,
                                            tipo_centro_pub=3,
                                            sexo_2=3,
                                            dg_trs_cons_sus_or=3,
                                            edad_al_ing=2,
                                            fech_ing_num=2,
                                            abandono_temprano_rec=3,
                                            duplicates_filtered=3,
                                            condicion_ocupacional_corr=3,
                                            evaluacindelprocesoteraputico=3,
                                            tenencia_de_la_vivienda_mod=3),
                       data = CONS_C1_df_dup_SEP_2020_match,
                       include.miss = T,
                       var.equal=T)
)
table1_more_one <- suppressWarnings(compareGroups(tipo_de_plan_res ~ sus_principal_mod+ sus_ini_mod_mvv+ estado_conyugal_2+ escolaridad_rec+ edad_ini_cons+ freq_cons_sus_prin+ origen_ingreso_mod+ dg_cie_10_rec+ dg_trs_cons_sus_or+ nombre_region+ tipo_centro_pub+ sexo_2+ dg_trs_cons_sus_or+ edad_al_ing+ fech_ing_num+ abandono_temprano_rec+ evaluacindelprocesoteraputico+ condicion_ocupacional_corr+ tenencia_de_la_vivienda_mod, method= c(
                                            sus_principal_mod=3,
                                            sus_ini_mod_mvv=3,
                                            estado_conyugal_2=3,
                                            escolaridad_rec=3,
                                            edad_ini_cons=3,
                                            freq_cons_sus_prin=3,
                                            origen_ingreso_mod=3,
                                            dg_cie_10_rec=3,
                                            dg_trs_cons_sus_or=3,
                                            nombre_region=3,
                                            tipo_centro_pub=3,
                                            sexo_2=3,
                                            dg_trs_cons_sus_or=3,
                                            edad_al_ing=2,
                                            fech_ing_num=2,
                                            abandono_temprano_rec=3,
                                            condicion_ocupacional_corr=3,
                                            evaluacindelprocesoteraputico=3,
                                            tenencia_de_la_vivienda_mod=3),
                       data = CONS_C1_df_dup_SEP_2020_match,
                       include.miss = T,
                       var.equal=T,
                       subset= more_one_treat==1)
)
table1_only_one <- suppressWarnings(compareGroups(tipo_de_plan_res ~ sus_principal_mod+ sus_ini_mod_mvv+ estado_conyugal_2+ escolaridad_rec+ edad_ini_cons+ freq_cons_sus_prin+ origen_ingreso_mod+ dg_cie_10_rec+ dg_trs_cons_sus_or+ nombre_region+ tipo_centro_pub+ sexo_2+ dg_trs_cons_sus_or+ edad_al_ing+ fech_ing_num+ abandono_temprano_rec+ evaluacindelprocesoteraputico+ tenencia_de_la_vivienda_mod, method= c(
                                            sus_principal_mod=3,
                                            sus_ini_mod_mvv=3,
                                            estado_conyugal_2=3,
                                            escolaridad_rec=3,
                                            edad_ini_cons=3,
                                            freq_cons_sus_prin=3,
                                            origen_ingreso_mod=3,
                                            dg_cie_10_rec=3,
                                            dg_trs_cons_sus_or=3,
                                            nombre_region=3,
                                            tipo_centro_pub=3,
                                            sexo_2=3,
                                            dg_trs_cons_sus_or=3,
                                            edad_al_ing=2,
                                            fech_ing_num=2,
                                            abandono_temprano_rec=3,
                                            evaluacindelprocesoteraputico=3,
                                            tenencia_de_la_vivienda_mod=3),
                       data = CONS_C1_df_dup_SEP_2020_match,
                       include.miss = T,
                       var.equal=T,
                       subset= more_one_treat==0)
)
 #Possible values are: 1 - for analysis as "normal-distributed"; 2 - forces analysis as "continuous non-normal"; 3 - forces analysis as "categorical"; and 4 - NA, which performs a Shapiro-Wilks test to decide between normal or non-normal. 

restab1_all <- createTable(table1_all, show.p.overall = T)
restab1_more_one <- createTable(table1_more_one, show.p.overall = T)
restab1_only_one <- createTable(table1_only_one, show.p.overall = T)

pvals1 <- getResults(table1_all)
#p.adjust(pvals, method = "BH")
 export2md(restab1_all, size=11, first.strip=T, hide.no="no", position="center",
           format="html",caption= "Table 1. Summary descriptives at baseline, between Users with Residential and Ambulatory Treatments from 2010-2019",col.names=c("Variables","Residential", "Ambulatory", "p-value"))%>%
  kableExtra::add_footnote(c("Note. Continuous variables are presented as Medians and Percentiles 25 and 75 were shown;", "Categorical variables are presented as number (%)"), notation = "none")%>%
  kableExtra::scroll_box(width = "100%", height = "375px") %>% 
   kableExtra::kable_classic()
Table 1. Summary descriptives at baseline, between Users with Residential and Ambulatory Treatments from 2010-2019
Variables Residential Ambulatory p-value
N=72267 N=12706
Primary Substance at Admission: .
Alcohol 26410 (36.5%) 2431 (19.1%)
Cocaine hydrochloride 14481 (20.0%) 1660 (13.1%)
Marijuana 5372 (7.43%) 394 (3.10%)
Other 1381 (1.91%) 201 (1.58%)
Cocaine paste 24622 (34.1%) 8020 (63.1%)
‘Missing’ 1 (0.00%) 0 (0.00%)
First Substance Used: 0.000
Alcohol 41507 (57.4%) 5080 (40.0%)
Cocaine hydrochloride 2682 (3.71%) 477 (3.75%)
Marijuana 18412 (25.5%) 4556 (35.9%)
Other 1669 (2.31%) 318 (2.50%)
Cocaine paste 2767 (3.83%) 1086 (8.55%)
‘Missing’ 5230 (7.24%) 1189 (9.36%)
Marital Status: <0.001
Married/Shared living arrangements 26185 (36.2%) 2910 (22.9%)
Separated/Divorced 7721 (10.7%) 1320 (10.4%)
Single 37343 (51.7%) 8328 (65.5%)
Widower 869 (1.20%) 133 (1.05%)
‘Missing’ 149 (0.21%) 15 (0.12%)
Educational Attainment: <0.001
3-Completed primary school or less 20062 (27.8%) 3862 (30.4%)
2-Completed high school or less 39565 (54.7%) 7044 (55.4%)
1-More than high school 12279 (17.0%) 1777 (14.0%)
‘Missing’ 361 (0.50%) 23 (0.18%)
Primary Substance at Admission Usage Frequency: 0.000
Did not use 1095 (1.52%) 85 (0.67%)
Less than 1 day a week 2862 (3.96%) 133 (1.05%)
1 day a week or more 5335 (7.38%) 272 (2.14%)
2 to 3 days a week 22372 (31.0%) 1329 (10.5%)
4 to 6 days a week 12258 (17.0%) 1654 (13.0%)
Daily 27938 (38.7%) 9219 (72.6%)
‘Missing’ 407 (0.56%) 14 (0.11%)
Treatment Admission Motive: 0.000
Spontaneous 33720 (46.7%) 4273 (33.6%)
Assisted Referral 4950 (6.85%) 3013 (23.7%)
Other 3766 (5.21%) 740 (5.82%)
Justice Sector 7159 (9.91%) 812 (6.39%)
Health Sector 22672 (31.4%) 3868 (30.4%)
Psychiatric co-morbidity: <0.001
Without psychiatric comorbidity 29070 (40.2%) 3245 (25.5%)
Diagnosis unknown (under study) 13310 (18.4%) 2771 (21.8%)
With psychiatric comorbidity 29887 (41.4%) 6690 (52.7%)
Type of Center (Public): 0.000
FALSE 14964 (20.7%) 9066 (71.4%)
TRUE 57300 (79.3%) 3623 (28.5%)
‘Missing’ 3 (0.00%) 17 (0.13%)
Sex: <0.001
Men 54806 (75.8%) 8761 (69.0%)
Women 17461 (24.2%) 3945 (31.0%)
Drug Dependence Diagnosis: 0.000
FALSE 22150 (30.7%) 1049 (8.26%)
TRUE 50116 (69.3%) 11657 (91.7%)
‘Missing’ 1 (0.00%) 0 (0.00%)
Admission Age 34.4 [27.5;43.5] 32.6 [26.3;40.9] <0.001
Admission Date 16577 [15730;17359] 16154 [15342;17023] <0.001
Early Dropout: <0.001
FALSE 61074 (84.5%) 10201 (80.3%)
TRUE 11190 (15.5%) 2499 (19.7%)
‘Missing’ 3 (0.00%) 6 (0.05%)
No. of Treatments in the Database: .
1 58708 (81.2%) 8533 (67.2%)
2 10087 (14.0%) 2804 (22.1%)
3 2471 (3.42%) 927 (7.30%)
4 714 (0.99%) 295 (2.32%)
5 192 (0.27%) 94 (0.74%)
6 67 (0.09%) 36 (0.28%)
7 23 (0.03%) 11 (0.09%)
8 4 (0.01%) 6 (0.05%)
10 1 (0.00%) 0 (0.00%)
Drug Dependence Diagnosis: 0.000
FALSE 22150 (30.7%) 1049 (8.26%)
TRUE 50116 (69.3%) 11657 (91.7%)
‘Missing’ 1 (0.00%) 0 (0.00%)
Evaluation of the Therapeutic Process: <0.001
1-High Achievement 14081 (19.5%) 2831 (22.3%)
2-Medium Achievement 21728 (30.1%) 4237 (33.3%)
3-Minimum Achievement 31549 (43.7%) 5302 (41.7%)
‘Missing’ 4909 (6.79%) 336 (2.64%)
Employment Status: .
Employed 39613 (54.8%) 1773 (14.0%)
Inactive 7715 (10.7%) 1191 (9.37%)
Looking for a job for the first time 172 (0.24%) 21 (0.17%)
No activity 2672 (3.70%) 1823 (14.3%)
Not seeking for work 495 (0.68%) 335 (2.64%)
Unemployed 21599 (29.9%) 7563 (59.5%)
‘Missing’ 1 (0.00%) 0 (0.00%)
Tenure status of household: <0.001
Illegal Settlement 596 (0.82%) 309 (2.43%)
Others 1946 (2.69%) 408 (3.21%)
Owner/Transferred dwellings/Pays Dividends 25537 (35.3%) 4378 (34.5%)
Renting 12917 (17.9%) 1640 (12.9%)
Stays temporarily with a relative 27786 (38.4%) 4786 (37.7%)
‘Missing’ 3485 (4.82%) 1185 (9.33%)
Note. Continuous variables are presented as Medians and Percentiles 25 and 75 were shown;
Categorical variables are presented as number (%)


Of the 85,048 users, we selected 85,048 that fulfilled the conditions stated above (100%).


#Additionally, we generated a correlation plot to get an overview of heterogeneous correlations between the different variables.

#Computes a heterogenous correlation matrix, consisting of Pearson product-moment correlations between numeric variables, polyserial correlations between numeric and ordinal variables, and polychoric correlations between 
tiempo_antes_hetcor<-Sys.time()
hetcor_mat<-hetcor(CONS_C1_df_dup_SEP_2020_match[,-c("hash_key","row","more_one_treat","duplicates_filtered")], ML = T, std.err =T, use="pairwise.complete.obs", bins=3, pd=TRUE)
tiempo_despues_hetcor<-Sys.time()
tiempo_hetcor<-tiempo_despues_hetcor-tiempo_antes_hetcor

for (i in 1:length(match.on_tot[!match.on_tot %in% c("row","hash_key","more_one_treat","duplicates_filtered")])){
    x<- match.on_tot[!match.on_tot %in% c("row","hash_key","more_one_treat","duplicates_filtered")][i]
    attr(hetcor_mat$correlations,"dimnames")[[2]][i]<- attr(CONS_C1_df_dup_SEP_2020_match[[x]],"label")
    attr(hetcor_mat$correlations,"dimnames")[[1]][i]<- attr(CONS_C1_df_dup_SEP_2020_match[[x]],"label")
    attr(hetcor_mat$tests,"dimnames")[[2]][i]<- attr(CONS_C1_df_dup_SEP_2020_match[[x]],"label")
    attr(hetcor_mat$tests,"dimnames")[[1]][i]<- attr(CONS_C1_df_dup_SEP_2020_match[[x]],"label")
}

hetcor_mat$tests[is.na(hetcor_mat$tests)]<-1

ggcorrplot<-
ggcorrplot::ggcorrplot(hetcor_mat$correlations,
           ggtheme = ggplot2::theme_void,
           insig = "blank",
           pch=1,
           pch.cex=3,
           tl.srt = 45, 
           #pch="ns",
            p.mat = hetcor_mat$tests, #  replacement has 144 rows, data has 169
            #type = "lower",
           colors = c("#6D9EC1", "white", "#E46726"), 
           tl.cex=8,
           lab=F)+
  #scale_x_discrete(labels = var_lbls_p345, drop = F) +
  #scale_y_discrete(labels = var_lbls_p345, drop = F) +
  theme(axis.text.x = element_blank())+
  #theme(axis.text.y = element_text(size=7.5,color ="black", hjust = 1))+
  theme(axis.text.y = element_blank())+
  theme(legend.position="bottom")

ggplotly(ggcorrplot, height = 800, width=800)%>% 
  layout(xaxis= list(showticklabels = FALSE)) %>% 
 layout(annotations = 
 list(x = .1, y = -0.031, text = "", 
      showarrow = F, xref='paper', yref='paper', 
      #xanchor='center', yanchor='auto', xshift=0, yshift=-0,
      font=list(size=11, color="darkblue"))
 )

Figure 2. Heterogeneous Correlation Matrix of Variables of Interest


Imputation


We generated a plot to see all the missing values in the sample.


#<div style="border: 1px solid #ddd; padding: 5px; overflow-y: scroll; height:400px; overflow-x: scroll; width:100%">
library(dplyr)
library(ggplot2)

missing.values<-
CONS_C1_df_dup_SEP_2020_match %>%
  rowwise %>%
  dplyr::mutate_at(.vars = vars(match.on_tot[!match.on_tot %in% c("row","hash_key","more_one_treat","duplicates_filtered")]),
                   .funs = ~ifelse(is.na(.), 1, 0)) %>% 
  dplyr::ungroup() %>% 
  dplyr::summarise_at(vars(match.on_tot[!match.on_tot %in% c("row","hash_key","more_one_treat","duplicates_filtered")]),~sum(.))

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
n_miss_baseline<-
CONS_C1_df_dup_SEP_2020_match %>%
    rowwise %>%
    dplyr::mutate_at(.vars = vars(match.on_tot[!match.on_tot %in% c("row","hash_key","more_one_treat","duplicates_filtered")]),
                     .funs = ~ifelse(is.na(.), 1, 0)) %>% 
      dplyr::ungroup() %>%
dplyr::mutate(sumVar=rowSums(dplyr::select(., match.on_tot[!match.on_tot %in% c("row","hash_key","more_one_treat","duplicates_filtered")]))) %>% 
  dplyr::filter(sumVar>0) %>% 
  nrow()


paste0("users with at least one missing value at baseline ",format(n_miss_baseline, big.mark=","))
## [1] "users with at least one missing value at baseline 16,466"
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

plot_miss<-
missing.values %>%
  data.table::melt() %>%  #condicion_ocupacional_corr
  dplyr::mutate(perc= value/sum(nrow(CONS_C1_df_dup_SEP_2020_match))) %>% 
  dplyr::mutate(label_text= paste0("Variable= ",variable,"<br>n= ",value,"<br>",scales::percent(round(perc,3)))) %>%
  dplyr::mutate(perc=perc*100) %>% 
  ggplot() +
  geom_bar(aes(x=factor(variable), y=perc,label= label_text), stat = 'identity') +
  theme_classic()+
#  scale_y_continuous(limits=c(0,1), labels=percent)+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size=9))+
  labs(x=NULL, y="% of Missing Values", caption=paste0("Nota. Porcentaje de perdidos del total (",sum(nrow(CONS_C1_df_dup_SEP_2020_match)),")"))

  ggplotly(plot_miss, tooltip = c("label_text"))%>% layout(xaxis= list(showticklabels = T), height = 600, width=800) %>%   layout(yaxis = list(tickformat='%',  range = c(0, 8)))

Figure 3. Bar plot of Porcentaje of Missing Values per Variables at Basline

  #</div>






From the figure above, we could see that the first substance used (sus_ini_mvv), the substance use onset age (edad_ini_cons), the evaluation of the therapeutic process (evaluacindelprocesoteraputico) counted for approximately 6% of the missing data. These values should be imputed. We first focused on the substance use onset age It is important to consider that the evaluation of the therapeutic process could be distorted due to censoring (many users did not finish their treatment, and did not have this evaluation in the study period).


#origen_ingreso #dg_global_nec_int_soc_or_1 "Diagnóstico global de necesidades de integración social" #evaluacindelprocesoteraputico "Evaluación del proceso terapéutico" #escolaridad_rec "macrozona"

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

match.on_tot2 <- c("row", "hash_key","sus_principal_mod","sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","edad_ini_cons","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","nombre_region","tipo_centro_pub","sexo_2","edad_al_ing","fech_ing_num","abandono_temprano_rec","tipo_de_plan_res","duplicates_filtered","edad_ini_sus_prin","via_adm_sus_prin_act","dg_trs_cons_sus_or","dup","evaluacindelprocesoteraputico","tenencia_de_la_vivienda_mod","condicion_ocupacional_corr","motivodeegreso_mod_imp")

  #HACER BASE ESPECIAL QUE CONTENGA UNA VARIABLE DE EDAD DE INICIO DE CONSUMO DE SUSTANCIA PRINCIPAL PARA EQUIPARAR
CONS_C1_df_dup_SEP_2020_match_miss<-
CONS_C1_df_dup_SEP_2020 %>% 
    #tuve que sacar casos que no tenían tipo de tratamiento definido. Vale la pena que en est etapa los seleccione????????????r
  #dplyr::filter(tipo_de_plan_2 %in% c("PG-PR","M-PR","PG-PAI","M-PAI","PG-PAB","M-PAB")) %>% 
  #:#:#:#:#:
    dplyr::filter(hash_key %in% unlist(unique(CONS_C1_df_dup_SEP_2020_match$hash_key))) %>% 
    #dplyr::group_by(hash_key) %>% 
    #dplyr::mutate(rn=row_number()) %>% 
    #dplyr::ungroup() %>% 
  #:#:#:#:#:#:#:#:#:#:#:
  #GENERAR VARIABLES GENERADAS PARA LA MUESTRA PARA MATCH
  dplyr::mutate(tipo_de_plan_res=dplyr::case_when(grepl("PR",as.character(tipo_de_plan_2))~1,
                                                  grepl("PAI",as.character(tipo_de_plan_2))~0,
                                                  grepl("PAB",as.character(tipo_de_plan_2))~0,
                                                  TRUE~NA_real_)) %>% 
  dplyr::mutate(tipo_de_plan_res=factor(tipo_de_plan_res)) %>% 
  dplyr::mutate(abandono_temprano_rec=factor(if_else(as.character(motivodeegreso_mod_imp)=="Early Drop-out",TRUE,FALSE,NA))) %>% 
  dplyr::mutate(dg_trs_cons_sus_or=factor(if_else(as.character(dg_trs_cons_sus_or)=="Drug dependence",TRUE,FALSE,NA))) %>% 
  dplyr::mutate(tipo_centro_pub=factor(if_else(as.character(tipo_centro)=="Public",TRUE,FALSE,NA))) %>% 
  #:#:#:#:#:#:#:#:#:#:#:
  # ORDINALIZAR LAS VARIABLES ORDINALES: escolaridad_rec freq_cons_sus_prin dg_cie_10_rec
  dplyr::mutate(dg_cie_10_rec=parse_factor(as.character(dg_cie_10_rec),levels=c('Without psychiatric comorbidity','Diagnosis unknown (under study)', 'With psychiatric comorbidity'), ordered =T,trim_ws=T,include_na =F)) %>%   #, locale=locale(encoding = "Latin1")
  dplyr::mutate(escolaridad_rec=parse_factor(as.character(escolaridad_rec),levels=c('3-Completed primary school or less', '2-Completed high school or less', '1-More than high school'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "Latin1"))) %>%   
  dplyr::mutate(freq_cons_sus_prin=parse_factor(as.character(freq_cons_sus_prin),levels=c('Did not use', 'Less than 1 day a week','2 to 3 days a week','4 to 6 days a week','1 day a week or more','Daily'), ordered =F,trim_ws=T,include_na =F, locale=locale(encoding = "Latin1"))) %>%
  dplyr::mutate(dg_trs_fis_rec=factor(dplyr::case_when(as.character(diagnostico_trs_fisico)=="En estudio"~"Diagnosis unknown (under study)",as.character(diagnostico_trs_fisico)=="Sin trastorno"~'Without physical comorbidity',cnt_diagnostico_trs_fisico>0 ~'With physical comorbidity',TRUE~NA_character_)))%>%
  dplyr::mutate(dg_trs_fis_rec=parse_factor(as.character(dg_trs_fis_rec),levels=c('Without physical comorbidity','Diagnosis unknown (under study)', 'With physical comorbidity'), ordered =F,trim_ws=T,include_na =F)) %>% 
  #, locale=locale(encoding = "Latin1")
  dplyr::mutate(evaluacindelprocesoteraputico=dplyr::case_when(grepl("1",as.character(evaluacindelprocesoteraputico))~'1-High Achievement',grepl("2",as.character(evaluacindelprocesoteraputico))~'2-Medium Achievement',grepl("3",as.character(evaluacindelprocesoteraputico))~'3-Minimum Achievement', TRUE~as.character(evaluacindelprocesoteraputico))) %>% 
  dplyr::mutate(evaluacindelprocesoteraputico=parse_factor(as.character(evaluacindelprocesoteraputico),levels=c('1-High Achievement', '2-Medium Achievement','3-Minimum Achievement'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "UTF-8"))) %>% 
      dplyr::mutate(tenencia_de_la_vivienda_mod=
                  factor(dplyr::case_when(tenencia_de_la_vivienda_mod=="Allegado"~"Stays temporarily with a relative",
                                 tenencia_de_la_vivienda_mod=="Arrienda"~"Renting",
                                 tenencia_de_la_vivienda_mod=="Cedida"~"Owner/Transferred dwellings/Pays Dividends",
                                 tenencia_de_la_vivienda_mod=="Ocupación Irregular"~"Illegal Settlement",
                                 tenencia_de_la_vivienda_mod=="Otros"~"Others",
                                 tenencia_de_la_vivienda_mod=="Paga dividendo"~"Owner/Transferred dwellings/Pays Dividends",
                                 tenencia_de_la_vivienda_mod=="Propia"~"Owner/Transferred dwellings/Pays Dividends",
                                 T~NA_character_))) %>% 
  #:#:#:#:#:#:#:#:#:#:#:
    dplyr::select_(.dots = match.on_tot2) %>% 
    dplyr::mutate(more_one_treat=factor(ifelse(duplicates_filtered>1,1,0))) %>% 
    #dplyr::mutate(motivodeegreso_mod_imp=as.character(motivodeegreso_mod_imp)) %>% 
    #dplyr::mutate(tr_completion=factor(dplyr::case_when(
    #motivodeegreso_mod_imp=="Therapeutic discharge" ~1,
    #motivodeegreso_mod_imp=="Ongoing treatment" ~0,
    #TRUE~2),labels=c("Ongoing treatment", "Treatment completion","Treatment non-completion"))) %>% 
    #dplyr::mutate(n_hash=as.numeric(factor(hash_key, levels=unique(hash_key)))) %>% 
    dplyr::mutate(motivodeegreso_mod_imp=factor(dplyr::case_when(
    grepl("Drop-out",motivodeegreso_mod_imp)~"Drop-out",
    motivodeegreso_mod_imp=="Ongoing treatment" ~NA_character_,
    TRUE~as.character(motivodeegreso_mod_imp)))) %>% 
    dplyr::mutate(n_hash=as.numeric(factor(hash_key, levels=unique(hash_key)))) %>% 
    data.table::data.table()

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:


set.seed(2125)
amelia_fit <- amelia(CONS_C1_df_dup_SEP_2020_match_miss[,-c("abandono_temprano_rec", "more_one_treat")], 
                     m=30, parallel = "multicore", #noms = "row",
                     idvars="row",#"hash_key","rn"
                     noms= c("estado_conyugal_2", "via_adm_sus_prin_act",  "origen_ingreso_mod",  "nombre_region", "sexo_2","dg_trs_cons_sus_or","sus_principal_mod","sus_ini_mod_mvv","dg_cie_10_rec", "tipo_centro_pub","tipo_de_plan_res","motivodeegreso_mod_imp","tenencia_de_la_vivienda_mod", "condicion_ocupacional_corr"),
                     ords= c("escolaridad_rec", "freq_cons_sus_prin","evaluacindelprocesoteraputico"),
                     cs = "hash_key",
                     ts = "dup",
                     incheck = TRUE)

#amelia_fit$imputations$imp1
#CONS_C1_df_dup_SEP_2020_match[!complete.cases(CONS_C1_df_dup_SEP_2020_match[,..match.on_tot])]


Admission Age

We started looking over the missing values in the age at admission (n= 16). Since there were not cases with more than one treatment that did not have an admission age, we did not have to impute taking into account serial dependencies in the dates of treatment.

compare.density(amelia_fit,var="edad_al_ing")
Figure 5. Density Estimation of Distributions of Age at Admission & Imputed Age at Admission

Figure 5. Density Estimation of Distributions of Age at Admission & Imputed Age at Admission


As seen in the Figure above, distributions seem to differ. However, considering the low amount of missing values in this variable, we proceeded with the imputation of the mean, despite the minor differences found. The imputed values must not be greater than the age of onset of drug use and may not be less than 16 years old. Values lower than this age may be considered less likely to receive treatment within adult population, so it is probable that it is incorrect that they would be in this database.


#On this graph, a y = x line indicates the line of perfect agreement; that is, if the imputation model was a perfect predictor of the true value, all the imputations would fall on this line
no_mostrar=0
if(no_mostrar==1){w
  res <- { 
    setTimeLimit(nn_K*500)
    ovr_imp_edad_ini_cons<-overimpute(amelia_fit, var = "edad_al_ing")
  }
}

paste0("Users that had more than one treatment with no date of admission:",CONS_C1_df_dup_SEP_2020_match_miss %>% 
    dplyr::group_by(hash_key) %>% 
    dplyr::mutate(na_edad_ing=sum(is.na(edad_al_ing))) %>% 
    dplyr::ungroup() %>% 
    dplyr::filter(na_edad_ing>0) %>% 
    dplyr::group_by(hash_key) %>% 
    dplyr::summarise(n=n()) %>% dplyr::filter(n>1) %>% nrow())
## [1] "Users that had more than one treatment with no date of admission:0"
#Hay poca relación en las imputaciones.
#table(is.na(CONS_C1_df_dup_SEP_2020_match_not_miss$edad_al_ing),exclude=NULL)

edad_al_ing_imputed<-
  cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$edad_al_ing,
       amelia_fit$imputations$imp2$edad_al_ing,
       amelia_fit$imputations$imp3$edad_al_ing,
       amelia_fit$imputations$imp4$edad_al_ing,
       amelia_fit$imputations$imp5$edad_al_ing,
       amelia_fit$imputations$imp6$edad_al_ing,
       amelia_fit$imputations$imp7$edad_al_ing,
       amelia_fit$imputations$imp8$edad_al_ing,
       amelia_fit$imputations$imp9$edad_al_ing,
       amelia_fit$imputations$imp10$edad_al_ing,
       amelia_fit$imputations$imp11$edad_al_ing,
       amelia_fit$imputations$imp12$edad_al_ing,
       amelia_fit$imputations$imp13$edad_al_ing,
       amelia_fit$imputations$imp14$edad_al_ing,
       amelia_fit$imputations$imp15$edad_al_ing,
       amelia_fit$imputations$imp16$edad_al_ing,
       amelia_fit$imputations$imp17$edad_al_ing,
       amelia_fit$imputations$imp18$edad_al_ing,
       amelia_fit$imputations$imp19$edad_al_ing,
       amelia_fit$imputations$imp20$edad_al_ing,
       amelia_fit$imputations$imp21$edad_al_ing,
       amelia_fit$imputations$imp22$edad_al_ing,
       amelia_fit$imputations$imp23$edad_al_ing,
       amelia_fit$imputations$imp24$edad_al_ing,
       amelia_fit$imputations$imp25$edad_al_ing,
       amelia_fit$imputations$imp26$edad_al_ing,
       amelia_fit$imputations$imp27$edad_al_ing,
       amelia_fit$imputations$imp28$edad_al_ing,
       amelia_fit$imputations$imp29$edad_al_ing,
       amelia_fit$imputations$imp30$edad_al_ing
       ) 

#select the value of the age at admission that had no inconsistencies with the age of onset of drug use
edad_al_ing_imputed<-
edad_al_ing_imputed %>% 
    janitor::clean_names() %>% 
    dplyr::left_join(CONS_C1_df_dup_SEP_2020_match_miss[,c("row","edad_ini_cons")],by=c("amelia_fit_imputations_imp1_row"="row")) %>% 
    melt(id.vars = "amelia_fit_imputations_imp1_row") %>% 
    dplyr::arrange(amelia_fit_imputations_imp1_row) %>% 
    dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
    dplyr::mutate(edad_ini_cons=max(value[variable=="edad_ini_cons"],na.rm=T)) %>% 
    dplyr::ungroup() %>%
# la edad de ingreso a imputar debe ser mayor o igual a la edad de inicio de sustancias, más de 16 o más
    dplyr::filter(variable!="edad_ini_cons",value>=edad_ini_cons, value>=16) %>% 
    dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
    dplyr::summarise(avg_edad_al_ing_imp= mean(value,na.rm=T),min_edad_al_ing_imp=min(value,na.rm=T))

# Reemplazo los valores perdidos:
CONS_C1_df_dup_SEP_2020_match_miss0<-
CONS_C1_df_dup_SEP_2020_match_miss %>% 
  dplyr::left_join(edad_al_ing_imputed,by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
  #si la edad al ingreso no existe, el valor promedio imutado es
  dplyr::mutate(edad_al_ing=dplyr::case_when(is.na(edad_al_ing)~as.numeric(avg_edad_al_ing_imp),TRUE~as.numeric(edad_al_ing))) %>% 
  dplyr::select(-avg_edad_al_ing_imp,-min_edad_al_ing_imp)

no_mostrar=0
if(no_mostrar==1){
  try(
  CONS_C1_df_dup_SEP_2020_match_miss0 %>% 
      dplyr::left_join(edad_al_ing_imputed,by=c("row"="v1")) %>% 
      dplyr::group_by(hash_key) %>% 
      dplyr::mutate(min_edad_ini_cons=min(edad_ini_cons,na.rm=T),min_edad_ini_cons=ifelse(is.infinite(min_edad_ini_cons), NA, min_edad_ini_cons)) %>% 
      dplyr::ungroup() %>% 
      dplyr::filter(is.na(edad_al_ing)) %>% 
      dplyr::select(hash_key, edad_al_ing, avg_edad_ing, Min_edad_ing,min_edad_ini_cons,edad_ini_sus_prin)
  )
}

no_mostrar=0
if(no_mostrar==1){
CONS_C1_df_dup_SEP_2020_match_miss0 %>% 
    dplyr::group_by(hash_key) %>% 
    dplyr::mutate(na_edad_ing=sum(is.na(edad_al_ing))) %>% 
    dplyr::ungroup() %>% 
    dplyr::filter(na_edad_ing>0) %>% 
    dplyr::group_by(hash_key) %>% 
    summarise(n())
}  
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss12$edad_al_ing))
if(nrow(CONS_C1_df_dup_SEP_2020_match_miss0)-nrow(CONS_C1_df_dup_SEP_2020_match_miss)>0){
  warning("Some rows were added in the imputation")}

After the imputation, there were no missing cases left.


Substance Use Onset Age

Another variable worth imputing is the Substance Use Onset Age (n= 6,592).


compare.density(amelia_fit,var="edad_ini_cons")
Figure 6. Density Estimation of Distributions of Age Of Onset of Drug Use & Imputed Ones

Figure 6. Density Estimation of Distributions of Age Of Onset of Drug Use & Imputed Ones


Based on the figure above, the substance use onset age was similar between the imputed values and the observed. However, we followed the rules stated in the Duplicates process (link). There were three logical conditions to fulfill in order to adequately replace these values in the database: the age of onset must not be greater than the substance use onset age in the primary substance at admission (1), the age of onset may not be greater than the age of admission to treatment (2), and the substance use onset age must be greater than 4 years. Then, we selected the minimum value of substance use onset age among the imputed values, because one user could not have more than one value.


#On this graph, a y = x line indicates the line of perfect agreement; that is, if the imputation model was a perfect predictor of the true value, all the imputations would fall on this line
no_mostrar=0
if(no_mostrar==1){
  res <- { 
    setTimeLimit(nn_K*500)
    ovr_imp_edad_ini_cons<-overimpute(amelia_fit, var = "edad_ini_cons")
  }
}
#Hay poca relación en las imputaciones.

# Ver si alguno de los usuarios con valores perdidos tiene de todas formas datos en esta variable.
edad_ini_sus_prin_for_imp<-
CONS_C1_df_dup_SEP_2020 %>% 
    dplyr::filter(hash_key %in% unlist(unique(CONS_C1_df_dup_SEP_2020_match[which(!complete.cases(CONS_C1_df_dup_SEP_2020_match$edad_ini_cons)),"hash_key"]))) %>%
    dplyr::filter(is.na(edad_ini_cons)) %>% 
    dplyr::group_by(hash_key) %>% 
    dplyr::summarise(min_edad_ini_sus_prin=min(edad_ini_sus_prin, na.rm=T),
                     min_edad_ini_sus_prin=ifelse(is.infinite(min_edad_ini_sus_prin),NA,min_edad_ini_sus_prin),edad_al_ing_min=min(edad_al_ing,na.rm=T))

cumplimiento_errores<-data.frame()
for (i in paste0("imp",1:30)){
  rn_cum_err<-data.frame(amelia_fit$imputations[i]) %>% 
    dplyr::rename(hash_key = 2) %>% 
    dplyr::rename(edad_ini_cons = 7) %>% 
    dplyr::mutate(hash_key=as.character(hash_key)) %>% 
    dplyr::left_join(edad_ini_sus_prin_for_imp, by="hash_key") %>% 
    dplyr::filter(edad_al_ing_min<edad_ini_cons|edad_ini_cons>min_edad_ini_sus_prin) %>% # edad de inicio de consumo no debe ser mayor a la menor edad a la ingreso de cada usuario, y la mínima edad de inicio de consumo de sustancia principal es menor a la edad de inicio de consumo
    nrow()
  rn_cum_err<-cbind(i,rn_cum_err)
 cumplimiento_errores<- rbind(cumplimiento_errores,rn_cum_err)
}
colnames(cumplimiento_errores)<- c("imp","no_errors_age_of_onset_drug_use")

message(paste0("Number of users that had more than one different age of onset of drug use before replacement: ",CONS_C1_df_dup_SEP_2020_match %>% 
    dplyr::group_by(hash_key) %>% 
    dplyr::mutate(n_dis=n_distinct(edad_ini_cons)) %>% 
    dplyr::ungroup() %>% 
    dplyr::filter(n_dis>1) %>% 
      nrow()))

n_miss_edad_ini_cons<-nrow(CONS_C1_df_dup_SEP_2020_match[which(!complete.cases(CONS_C1_df_dup_SEP_2020_match$edad_ini_cons)),"hash_key"])
plot_imps<-
cumplimiento_errores %>%
  dplyr::mutate(no_errors_age_of_onset_drug_use=as.numeric(no_errors_age_of_onset_drug_use)) %>% 
  dplyr::mutate(imp=factor(imp, levels =paste0("imp",1:30))) %>% 
  dplyr::mutate(perc= no_errors_age_of_onset_drug_use/n_miss_edad_ini_cons) %>% 
  dplyr::mutate(label_text= paste0("Variable= ",imp,"<br>n= ",no_errors_age_of_onset_drug_use,"<br>",scales::percent(round(perc,2)))) %>%
  dplyr::mutate(perc=perc*100) %>% 
  ggplot() +
  geom_bar(aes(x=imp, y=perc,label= label_text), stat = 'identity') +
  theme_classic()+
  scale_y_continuous(limits=c(0,5))+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size=9))+
  labs(x=NULL, y="% of Imputed Values With Logical Discrepancies")+
  theme(aspect.ratio = 2/1)
#plotly
  ggplotly(plot_imps, tooltip = c("label_text"))%>% layout(xaxis= list(showticklabels = T)) %>%   layout(yaxis = list(tickformat='%',  range = c(0, 5)), height = 400, width=533) 

Figure 7. Bar plot of Percentage of Incorrect Imputed Values per Imputation Sample

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
edad_ini_cons_imputed<-
  cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$edad_ini_cons,
       amelia_fit$imputations$imp2$edad_ini_cons,
       amelia_fit$imputations$imp3$edad_ini_cons,
       amelia_fit$imputations$imp4$edad_ini_cons,
       amelia_fit$imputations$imp5$edad_ini_cons,
       amelia_fit$imputations$imp6$edad_ini_cons,
       amelia_fit$imputations$imp7$edad_ini_cons,
       amelia_fit$imputations$imp8$edad_ini_cons,
       amelia_fit$imputations$imp9$edad_ini_cons,
       amelia_fit$imputations$imp10$edad_ini_cons,
       amelia_fit$imputations$imp11$edad_ini_cons,
       amelia_fit$imputations$imp12$edad_ini_cons,
       amelia_fit$imputations$imp13$edad_ini_cons,
       amelia_fit$imputations$imp14$edad_ini_cons,
       amelia_fit$imputations$imp15$edad_ini_cons,
       amelia_fit$imputations$imp16$edad_ini_cons,
       amelia_fit$imputations$imp17$edad_ini_cons,
       amelia_fit$imputations$imp18$edad_ini_cons,
       amelia_fit$imputations$imp19$edad_ini_cons,
       amelia_fit$imputations$imp20$edad_ini_cons,
       amelia_fit$imputations$imp21$edad_ini_cons,
       amelia_fit$imputations$imp22$edad_ini_cons,
       amelia_fit$imputations$imp23$edad_ini_cons,
       amelia_fit$imputations$imp24$edad_ini_cons,
       amelia_fit$imputations$imp25$edad_ini_cons,
       amelia_fit$imputations$imp26$edad_ini_cons,
       amelia_fit$imputations$imp27$edad_ini_cons,
       amelia_fit$imputations$imp28$edad_ini_cons,
       amelia_fit$imputations$imp29$edad_ini_cons,
       amelia_fit$imputations$imp30$edad_ini_cons
       ) 

min_edad_al_ing_edad_ini_sus_prin<-
CONS_C1_df_dup_SEP_2020_match_miss0 %>% 
    dplyr::group_by(hash_key) %>% 
    dplyr::mutate(min_edad_al_ing=min(edad_al_ing,na.rm=T),min_edad_ini_sus_prin=min(edad_ini_sus_prin,na.rm=T),min_edad_ini_sus_prin=ifelse(is.infinite(min_edad_ini_sus_prin),100,min_edad_ini_sus_prin)) %>% 
    dplyr::ungroup() %>% 
    dplyr::select(row, hash_key,min_edad_al_ing,min_edad_ini_sus_prin)
 #a- aquellos casos en que la edad de inicio de consumo está perdida, y el promedio imputado es menor o igual a la mínima edad de inicio de consumo de sustancia en la principal en la admisión, y sea menor a la primera fecha de ingreso a un programa de consumo de sustancias, se reemplazará por el promedio
  #b- aquellos casos en que la edad de inicio de consumo está perdida, el promedio imputado es mayor a la mínima edad de inicio de consumo o mayor a la mínima edad de inicio a tratamiento, y el mínimo imputado es menor o igual a la mínima edad de inicio de consumo de sustancia en la principal en la admisión, y sea menor a la primera fecha de ingreso a un programa de consumo de sustancias, se reemplazará por el valor mínimo
 #c- aquellos en que la edad de inicio de consumo está perdida y la edad de inicio de consumo en la sustancia principal también esté perdida,
 #d- aquellos en que la edad de inicio de consumo está perdida y la edad de inicio de consumo en la sustancia principal también esté perdida, 
edad_ini_cons_imputed_after<-
  cbind(data.table(edad_ini_cons_imputed[,1]),data.table(edad_ini_cons_imputed[,2:31]))%>%
  janitor::clean_names() %>% 
  melt(id.vars = "v1") %>% 
  dplyr::left_join(min_edad_al_ing_edad_ini_sus_prin,by=c("v1"="row")) %>% 
  dplyr::arrange(v1) %>% 
  dplyr::filter(value>=5,value<=min_edad_al_ing, value<=min_edad_ini_sus_prin) %>%  # get candidate values for imputation over 5 years, according to SENDAs guidelines; get substance use onset ages that are lower or equal than the minimum admission age of each patient, and lower or equal than the minimum primary substance at admission onset age  
  dplyr::group_by(hash_key) %>%  #changed in april 2022, from row to hash
  dplyr::summarise(AVG=round(mean(value,na.rm=T),0),Min=round(min(value,na.rm=T),0)) %>% 
  dplyr::ungroup()
#summary(edad_ini_cons_imputed$Min)

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

paste("Cases with more than missing one age of onset:",CONS_C1_df_dup_SEP_2020_match_miss0 %>%dplyr::mutate(na_edad_ini_cons=dplyr::case_when(is.na(edad_ini_cons)~1,TRUE~0)) %>% dplyr::group_by(hash_key) %>% dplyr::mutate(sum_na_edad_ini_cons=sum(na_edad_ini_cons)) %>%
  dplyr::ungroup() %>% dplyr::filter(sum_na_edad_ini_cons>1) %>% nrow()
)
## [1] "Cases with more than missing one age of onset: 515"
# REPLACE MISSING VALUES:
#:#:#:#:
CONS_C1_df_dup_SEP_2020_match_miss1<-
CONS_C1_df_dup_SEP_2020_match_miss0 %>% 
  dplyr::left_join(edad_ini_cons_imputed_after,by=c("hash_key"="hash_key")) %>%  #changed form row to hash (apr 2022)
  janitor::clean_names() %>% 
  dplyr::mutate(edad_ini_cons=dplyr::case_when(is.na(edad_ini_cons) ~as.numeric(avg),
                                               TRUE~as.numeric(edad_ini_cons))) #%>% 
#FROM APRIL 2022, THIS SEEMS NO LONGER NECESSARY
  # #me quedo con un promedio por usuario de aquellos valores imputados por usuario.
  # dplyr::group_by(hash_key) %>% 
  # dplyr::mutate(edad_ini_cons=round(mean(edad_ini_cons,na.rm=T),0)) %>% 
  # dplyr::ungroup() %>% 
  # dplyr::select(-avg,-min)

#is.na(edad_ini_cons) & is.na(edad_ini_sus_prin) & is.na(min_edad_al_ing)~as.numeric(avg),
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss1$edad_ini_cons))
message(paste0("Number of rows with values that did not fulfill the conditions: ",CONS_C1_df_dup_SEP_2020_match_miss1 %>% 
    dplyr::filter(is.na(edad_ini_cons)) %>% 
    dplyr::select(hash_key, edad_ini_cons, edad_al_ing,edad_ini_sus_prin) %>% nrow())
)
#Lo importante es tener en cuenta que las imputaciones se hicieron por filas; no, en cambio, ahora debemos reemplazar aquellos casos que tienen perdidos (no cumplieron con las condiciones) con el valor mínimo

message(paste0("Number of rows with values that did not fulfill the conditions after replacement with the minimum by users: ",CONS_C1_df_dup_SEP_2020_match_miss1 %>% 
    dplyr::filter(is.na(edad_ini_cons)) %>% 
    dplyr::select(hash_key, edad_ini_cons, edad_al_ing,edad_ini_sus_prin) %>% nrow())
)
message(paste0("Number of users that had different age of onset of drug use after replacement: ",CONS_C1_df_dup_SEP_2020_match_miss1 %>% 
    dplyr::group_by(hash_key) %>% 
    dplyr::mutate(n_dis=n_distinct(edad_ini_cons)) %>% 
    dplyr::ungroup() %>% 
    dplyr::filter(n_dis>1) %>% 
      nrow())
)
if(nrow(CONS_C1_df_dup_SEP_2020_match_miss1)-nrow(CONS_C1_df_dup_SEP_2020_match_miss0)>0){
  warning("Some rows were added in the imputation")}





There were 0 cases of imputed substance use onset ages that did not fulfill the conditions necessary to replace the missing values with the imputed ones.


First Substance Used

Then we selected the most vulnerable value among the candidates of the imputations of the starting substance (First, Cocaine paste, Cocaine hydrochloride or snort cocaine, Marijuana, Alcohol, and Other).


# Ver distintos valores propuestos para sustancia de inciio
sus_ini_mod_mvv_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$sus_ini_mod_mvv,
       amelia_fit$imputations$imp2$sus_ini_mod_mvv,
       amelia_fit$imputations$imp3$sus_ini_mod_mvv,
       amelia_fit$imputations$imp4$sus_ini_mod_mvv,
       amelia_fit$imputations$imp5$sus_ini_mod_mvv,
       amelia_fit$imputations$imp6$sus_ini_mod_mvv,
       amelia_fit$imputations$imp7$sus_ini_mod_mvv,
       amelia_fit$imputations$imp8$sus_ini_mod_mvv,
       amelia_fit$imputations$imp9$sus_ini_mod_mvv,
       amelia_fit$imputations$imp10$sus_ini_mod_mvv,
       amelia_fit$imputations$imp11$sus_ini_mod_mvv,
       amelia_fit$imputations$imp12$sus_ini_mod_mvv,
       amelia_fit$imputations$imp13$sus_ini_mod_mvv,
       amelia_fit$imputations$imp14$sus_ini_mod_mvv,
       amelia_fit$imputations$imp15$sus_ini_mod_mvv,
       amelia_fit$imputations$imp16$sus_ini_mod_mvv,
       amelia_fit$imputations$imp17$sus_ini_mod_mvv,
       amelia_fit$imputations$imp18$sus_ini_mod_mvv,
       amelia_fit$imputations$imp19$sus_ini_mod_mvv,
       amelia_fit$imputations$imp20$sus_ini_mod_mvv,
       amelia_fit$imputations$imp21$sus_ini_mod_mvv,
       amelia_fit$imputations$imp22$sus_ini_mod_mvv,
       amelia_fit$imputations$imp23$sus_ini_mod_mvv,
       amelia_fit$imputations$imp24$sus_ini_mod_mvv,
       amelia_fit$imputations$imp25$sus_ini_mod_mvv,
       amelia_fit$imputations$imp26$sus_ini_mod_mvv,
       amelia_fit$imputations$imp27$sus_ini_mod_mvv,
       amelia_fit$imputations$imp28$sus_ini_mod_mvv,
       amelia_fit$imputations$imp29$sus_ini_mod_mvv,
       amelia_fit$imputations$imp30$sus_ini_mod_mvv
       ) 

sus_ini_mod_mvv_imputed<-
sus_ini_mod_mvv_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Marijuana",as.character(.))~1,TRUE~0), .names="mar_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Alcohol",as.character(.))~1,TRUE~0), .names="oh_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Cocaine paste",as.character(.))~1,TRUE~0), .names="pb_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Cocaine hydrochloride",as.character(.))~1,TRUE~0), .names="coc_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Other",as.character(.))~1,TRUE~0), .names="otr_{col}"))%>%
        dplyr::mutate(sus_ini_mod_mvv_mar = base::rowSums(dplyr::select(., starts_with("mar_"))))%>%
  dplyr::mutate(sus_ini_mod_mvv_oh = base::rowSums(dplyr::select(., starts_with("oh_"))))%>%
  dplyr::mutate(sus_ini_mod_mvv_pb = base::rowSums(dplyr::select(., starts_with("pb_"))))%>%
  dplyr::mutate(sus_ini_mod_mvv_coc = base::rowSums(dplyr::select(., starts_with("coc_"))))%>%
  dplyr::mutate(sus_ini_mod_mvv_otr = base::rowSums(dplyr::select(., starts_with("otr_")))) %>% 
  #dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_mar>0~1,TRUE~0)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_oh>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_pb>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_coc>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_otr>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_to_imputation=dplyr::case_when(sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_pb>0~"Cocaine paste",sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_coc>0~"Cocaine hydrochloride",sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_mar>0~"Marijuana",sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_oh>0~"Alcohol",sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_otr>0~"Other",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_pb>0~"Cocaine paste",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_coc>0~"Cocaine hydrochloride",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_mar>0~"Marijuana",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_oh>0~"Alcohol",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_otr>0~"Other")) %>% 
  janitor::clean_names()

sus_ini_mod_mvv_imputed<-
dplyr::select(sus_ini_mod_mvv_imputed,amelia_fit_imputations_imp1_row,sus_ini_mod_mvv_to_imputation)

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_match_miss2<-
CONS_C1_df_dup_SEP_2020_match_miss1 %>% 
   dplyr::left_join(sus_ini_mod_mvv_imputed, by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(sus_ini_mod_mvv=factor(dplyr::case_when(is.na(sus_ini_mod_mvv)~as.character(sus_ini_mod_mvv_to_imputation),
                                 TRUE~as.character(sus_ini_mod_mvv)))) %>% 
  dplyr::select(-sus_ini_mod_mvv_to_imputation) %>% 
  data.table()
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
if(nrow(CONS_C1_df_dup_SEP_2020_match_miss2)-nrow(CONS_C1_df_dup_SEP_2020_match_miss1)>0){
  warning("Some rows were added in the imputation")}

As a result of the imputations, there were no missing values.


Primary Substance at Admission Usage Frequency

Another variable that is worth imputing is the Primary Substance at Admission Usage Frequency (n= 568). In case of ties, we selected the imputed values with the value with the most frequent substance use pattern.


# Ver distintos valores propuestos para sustancia de inciio
freq_cons_sus_prin_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$freq_cons_sus_prin,
       amelia_fit$imputations$imp2$freq_cons_sus_prin,
       amelia_fit$imputations$imp3$freq_cons_sus_prin,
       amelia_fit$imputations$imp4$freq_cons_sus_prin,
       amelia_fit$imputations$imp5$freq_cons_sus_prin,
       amelia_fit$imputations$imp6$freq_cons_sus_prin,
       amelia_fit$imputations$imp7$freq_cons_sus_prin,
       amelia_fit$imputations$imp8$freq_cons_sus_prin,
       amelia_fit$imputations$imp9$freq_cons_sus_prin,
       amelia_fit$imputations$imp10$freq_cons_sus_prin,
       amelia_fit$imputations$imp11$freq_cons_sus_prin,
       amelia_fit$imputations$imp12$freq_cons_sus_prin,
       amelia_fit$imputations$imp13$freq_cons_sus_prin,
       amelia_fit$imputations$imp14$freq_cons_sus_prin,
       amelia_fit$imputations$imp15$freq_cons_sus_prin,
       amelia_fit$imputations$imp16$freq_cons_sus_prin,
       amelia_fit$imputations$imp17$freq_cons_sus_prin,
       amelia_fit$imputations$imp18$freq_cons_sus_prin,
       amelia_fit$imputations$imp19$freq_cons_sus_prin,
       amelia_fit$imputations$imp20$freq_cons_sus_prin,
       amelia_fit$imputations$imp21$freq_cons_sus_prin,
       amelia_fit$imputations$imp22$freq_cons_sus_prin,
       amelia_fit$imputations$imp23$freq_cons_sus_prin,
       amelia_fit$imputations$imp24$freq_cons_sus_prin,
       amelia_fit$imputations$imp25$freq_cons_sus_prin,
       amelia_fit$imputations$imp26$freq_cons_sus_prin,
       amelia_fit$imputations$imp27$freq_cons_sus_prin,
       amelia_fit$imputations$imp28$freq_cons_sus_prin,
       amelia_fit$imputations$imp29$freq_cons_sus_prin,
       amelia_fit$imputations$imp30$freq_cons_sus_prin
       ) 

freq_cons_sus_prin_imputed_after<-
freq_cons_sus_prin_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("1 day a week or more",as.character(.))~1,TRUE~0), .names="1_day_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("2 to 3 days a week",as.character(.))~1,TRUE~0), .names="2_3_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("4 to 6 days a week",as.character(.))~1,TRUE~0), .names="4_6_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Less than 1 day a week",as.character(.))~1,TRUE~0), .names="less_1_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Did not use",as.character(.))~1,TRUE~0), .names="did_not_{col}"))%>%
    dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Daily",as.character(.))~1,TRUE~0), .names="daily_{col}"))%>%
  dplyr::mutate(freq_cons_sus_prin_daily = base::rowSums(dplyr::select(., starts_with("daily_")))) %>% 
  dplyr::mutate(freq_cons_sus_prin_4_6 = base::rowSums(dplyr::select(., starts_with("4_6_"))))%>%
  dplyr::mutate(freq_cons_sus_prin_2_3 = base::rowSums(dplyr::select(., starts_with("2_3_"))))%>%
  dplyr::mutate(freq_cons_sus_prin_1_day = base::rowSums(dplyr::select(., starts_with("1_day_"))))%>%
  dplyr::mutate(freq_cons_sus_prin_less_1 = base::rowSums(dplyr::select(., starts_with("less_1_"))))%>%
  dplyr::mutate(freq_cons_sus_prin_did_not = base::rowSums(dplyr::select(., starts_with("did_not_")))) %>% 
  #dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_1_day>0~1,TRUE~0)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_2_3>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_4_6>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_less_1>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_did_not>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_daily>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  #hierarchy
  dplyr::mutate(freq_cons_sus_prin_to_imputation=
                  dplyr::case_when(freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_daily>0~"Daily",
                                     freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_4_6>0~"4 to 6 days a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_2_3>0~"2 to 3 days a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_1_day>0~"1 day a week or more",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_less_1>0~"Less than 1 day a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_did_not>0~"Did not use",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_daily>0~"Daily",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_4_6>0~"4 to 6 days a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_2_3>0~"2 to 3 days a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_1_day>0~"1 day a week or more",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_less_1>0~"Less than 1 day a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_did_not>0~"Did not use")) %>% 
  janitor::clean_names()

freq_cons_sus_prin_imputed_after<-
dplyr::select(freq_cons_sus_prin_imputed_after,amelia_fit_imputations_imp1_row,freq_cons_sus_prin_to_imputation)

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss3<-
CONS_C1_df_dup_SEP_2020_match_miss2 %>% 
   dplyr::left_join(freq_cons_sus_prin_imputed_after, by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(freq_cons_sus_prin=factor(dplyr::case_when(is.na(freq_cons_sus_prin)~as.character(freq_cons_sus_prin_to_imputation), TRUE~as.character(freq_cons_sus_prin)))) %>% 
  data.table()

if(nrow(CONS_C1_df_dup_SEP_2020_match_miss3)-nrow(CONS_C1_df_dup_SEP_2020_match_miss2)>0){
  warning("Some rows were added in the imputation")}

As a result of the imputations, there were no missing values.


Educational Attainment

Another variable that is worth imputing is the Educational Attainment (n= 437). We followed the rules stated in the Duplicates4 process (link). We were particularly cautious to impute attainments that would follow a progression from primary school to more than high school. For this purpose, we first looked over the actual values per user, filling intermediate gaps in educational attainment of users with intermediate null values (a), we overcame the difficulty of the incorrect imputations by logically selecting them if there were any.


# Ver distintos valores propuestos para sustancia de inciio
escolaridad_rec_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
                  amelia_fit$imputations$imp1$hash_key,
                  amelia_fit$imputations$imp1$fech_ing_num,
                  amelia_fit$imputations$imp1$escolaridad_rec,
                  amelia_fit$imputations$imp2$escolaridad_rec,
                  amelia_fit$imputations$imp3$escolaridad_rec,
                  amelia_fit$imputations$imp4$escolaridad_rec,
                  amelia_fit$imputations$imp5$escolaridad_rec,
                  amelia_fit$imputations$imp6$escolaridad_rec,
                  amelia_fit$imputations$imp7$escolaridad_rec,
                  amelia_fit$imputations$imp8$escolaridad_rec,
                  amelia_fit$imputations$imp9$escolaridad_rec,
                  amelia_fit$imputations$imp10$escolaridad_rec,
                  amelia_fit$imputations$imp11$escolaridad_rec,
                  amelia_fit$imputations$imp12$escolaridad_rec,
                  amelia_fit$imputations$imp13$escolaridad_rec,
                  amelia_fit$imputations$imp14$escolaridad_rec,
                  amelia_fit$imputations$imp15$escolaridad_rec,
                  amelia_fit$imputations$imp16$escolaridad_rec,
                  amelia_fit$imputations$imp17$escolaridad_rec,
                  amelia_fit$imputations$imp18$escolaridad_rec,
                  amelia_fit$imputations$imp19$escolaridad_rec,
                  amelia_fit$imputations$imp20$escolaridad_rec,
                  amelia_fit$imputations$imp21$escolaridad_rec,
                  amelia_fit$imputations$imp22$escolaridad_rec,
                  amelia_fit$imputations$imp23$escolaridad_rec,
                  amelia_fit$imputations$imp24$escolaridad_rec,
                  amelia_fit$imputations$imp25$escolaridad_rec,
                  amelia_fit$imputations$imp26$escolaridad_rec,
                  amelia_fit$imputations$imp27$escolaridad_rec,
                  amelia_fit$imputations$imp28$escolaridad_rec,
                  amelia_fit$imputations$imp29$escolaridad_rec,
                  amelia_fit$imputations$imp30$escolaridad_rec) 

escolaridad_rec_imputed2<-
escolaridad_rec_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.escolaridad_rec:amelia_fit.imputations.imp30.escolaridad_rec),~dplyr::case_when(grepl("3-Completed primary school or less",as.character(.))~1,TRUE~0), .names="3_primary_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.escolaridad_rec:amelia_fit.imputations.imp30.escolaridad_rec),~dplyr::case_when(grepl("2-Completed high school or less",as.character(.))~1,TRUE~0), .names="2_high_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.escolaridad_rec:amelia_fit.imputations.imp30.escolaridad_rec),~dplyr::case_when(grepl("1-More than high school",as.character(.))~1,TRUE~0), .names="1_more_high_{col}")) %>% 

  dplyr::mutate(escolaridad_rec_3_primary = base::rowSums(dplyr::select(., contains("3_primary_")))) %>% 
  dplyr::mutate(escolaridad_rec_2_high = base::rowSums(dplyr::select(., contains("2_high_"))))%>%
  dplyr::mutate(escolaridad_rec_1_more_high = base::rowSums(dplyr::select(., contains("1_more_high_"))))

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#create an ordered index of the number of treatments by user
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

#:#:#:#;#;#;
CONS_C1_df_dup_SEP_2020_match_rn<-
    CONS_C1_df_dup_SEP_2020_match_miss %>%  #base de datos original, sin imputaciones
    dplyr::group_by(hash_key) %>% 
    dplyr::mutate(rn=row_number()) %>% 
    dplyr::ungroup() %>% 
    dplyr::select(rn)
#:#:#:#;#;#;
escolaridad_rec_imputed3<-
escolaridad_rec_imputed2 %>%   
  dplyr::left_join(cbind.data.frame(CONS_C1_df_dup_SEP_2020_match_miss$row, CONS_C1_df_dup_SEP_2020_match_miss$escolaridad_rec,CONS_C1_df_dup_SEP_2020_match_rn$rn),by=c("amelia_fit.imputations.imp1.row"="CONS_C1_df_dup_SEP_2020_match_miss$row")) %>%
  dplyr::rename("escolaridad_rec_original"="CONS_C1_df_dup_SEP_2020_match_miss$escolaridad_rec") %>%
  dplyr::mutate(escolaridad_rec_original=as.numeric(substr(escolaridad_rec_original, 1, 1))) %>%
  #:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  #ordenar por tratamientos por usuario
  #:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  dplyr::arrange(amelia_fit.imputations.imp1.hash_key,`CONS_C1_df_dup_SEP_2020_match_rn$rn`) %>% 
  dplyr::group_by(amelia_fit.imputations.imp1.hash_key) %>%  
  dplyr::mutate(siguiente_escolaridad_rec_original=lead(escolaridad_rec_original), 
                subsig_escolaridad_rec_original=lead(escolaridad_rec_original,n =2), 
                rn=max(`CONS_C1_df_dup_SEP_2020_match_rn$rn`),
                n_na_esc_or=is.na(escolaridad_rec_original),
                sum_n_na_esc_or=sum(n_na_esc_or,na.rm=T),
                max_sum_n_na_esc_or=max(n_na_esc_or,na.rm=T)
                ) %>% 
#dplyr::select(amelia_fit.imputations.imp1.hash_key,amelia_fit.imputations.imp30.rn,
#              siguiente_escolaridad_rec_original,escolaridad_rec_original,amelia_fit.imputations.imp1.fech_ing_num)%>% View()
  dplyr::ungroup()

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#PREPARACIÓN  BASE DE DATOS PARA IMPUTACION Y CREACIÓN DE VARIABLES PARA CONDICIONES
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
escolaridad_rec_imputed4 <-
escolaridad_rec_imputed3 %>% 
  dplyr::select(amelia_fit.imputations.imp1.hash_key,`CONS_C1_df_dup_SEP_2020_match_rn$rn`,escolaridad_rec_original,escolaridad_rec_3_primary,escolaridad_rec_2_high, escolaridad_rec_1_more_high) %>%
  dplyr::rename("hash_key"="amelia_fit.imputations.imp1.hash_key") %>% 
  dplyr::rename("treat_no_for_usr"="CONS_C1_df_dup_SEP_2020_match_rn$rn") %>% 
  dplyr::group_by(hash_key) %>% 
  dplyr::mutate(treat_per_usr=max(treat_no_for_usr,na.rm=T)) %>% 
  dplyr::ungroup() %>% 
  tidyr::pivot_wider(names_from=treat_no_for_usr,
                     #names_glue = "ord_treat_esc_{.value}",
                     values_from=c(escolaridad_rec_original,escolaridad_rec_3_primary,escolaridad_rec_2_high,escolaridad_rec_1_more_high),values_fill = NA) %>% 
#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
#Ver si existen inconsistencias en la escolaridad, pero no sólo inconsistencias inmediatas, sino con hasta 2 espacios entre tratamientos
#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
  dplyr::mutate(escolaridad_rec_tot_cond=dplyr::case_when(
    (escolaridad_rec_original_10>escolaridad_rec_original_9)|(escolaridad_rec_original_10>escolaridad_rec_original_8)|(escolaridad_rec_original_10>escolaridad_rec_original_7)|
      (escolaridad_rec_original_9>escolaridad_rec_original_8)|(escolaridad_rec_original_9>escolaridad_rec_original_7)|(escolaridad_rec_original_9>escolaridad_rec_original_6)|
      (escolaridad_rec_original_8>escolaridad_rec_original_7)|(escolaridad_rec_original_8>escolaridad_rec_original_6)|(escolaridad_rec_original_8>escolaridad_rec_original_5)|
      (escolaridad_rec_original_7>escolaridad_rec_original_6)|(escolaridad_rec_original_7>escolaridad_rec_original_5)|(escolaridad_rec_original_7>escolaridad_rec_original_4)|
      (escolaridad_rec_original_6>escolaridad_rec_original_5)|(escolaridad_rec_original_6>escolaridad_rec_original_4)|(escolaridad_rec_original_6>escolaridad_rec_original_3)|
      (escolaridad_rec_original_5>escolaridad_rec_original_4)|(escolaridad_rec_original_5>escolaridad_rec_original_3)|(escolaridad_rec_original_5>escolaridad_rec_original_2)|
      (escolaridad_rec_original_4>escolaridad_rec_original_3)|(escolaridad_rec_original_4>escolaridad_rec_original_2)|(escolaridad_rec_original_4>escolaridad_rec_original_1)|
      (escolaridad_rec_original_3>escolaridad_rec_original_2)|(escolaridad_rec_original_3>escolaridad_rec_original_1)|
      (escolaridad_rec_original_2>escolaridad_rec_original_1)~1,TRUE~0)) %>% 
  #dplyr::filter(escolaridad_rec_tot_cond==1) %>% #View() #0 rows ¿y 374745c85601976177fe614a7370e475?
  #dplyr::filter(treat_per_usr>1) %>% 
  #:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
  # Ver si hay valores de escolaridad ausentes en una progresión de tratamientos
  #:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
  dplyr::mutate(sum_nas_esc=base::rowSums(is.na(dplyr::select(., starts_with("escolaridad_rec_original_")))))%>%
  
  dplyr::mutate(escolaridad_rec_tot_nas_en_medio=dplyr::case_when(
      (sum_nas_esc>10 & treat_per_usr==10)|
      (sum_nas_esc>1 & treat_per_usr==9)|
      (sum_nas_esc>2 & treat_per_usr==8)|
      (sum_nas_esc>3 & treat_per_usr==7)|
      (sum_nas_esc>4 & treat_per_usr==6)|
      (sum_nas_esc>5 & treat_per_usr==5)|
      (sum_nas_esc>6 & treat_per_usr==4)|
      (sum_nas_esc>7 & treat_per_usr==3)|
      (sum_nas_esc>8 & treat_per_usr==2)|
      (sum_nas_esc>9 & treat_per_usr==1)~1,TRUE~0)) %>% #18b1f9646a2cd6bebd962637cff0a21a 5 casos
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  #Generar la escolaridad al final
  #:#:#:#:#:#:#:#:#
  dplyr::mutate(last_esc=dplyr::case_when(treat_per_usr==10~escolaridad_rec_original_10,
                                          treat_per_usr==9~escolaridad_rec_original_9,
                                          treat_per_usr==8~escolaridad_rec_original_8,
                                          treat_per_usr==7~escolaridad_rec_original_7,
                                          treat_per_usr==6~escolaridad_rec_original_6,
                                          treat_per_usr==5~escolaridad_rec_original_5,
                                          treat_per_usr==4~escolaridad_rec_original_4,
                                          treat_per_usr==3~escolaridad_rec_original_3,
                                          treat_per_usr==2~escolaridad_rec_original_2,
                                          treat_per_usr==1~escolaridad_rec_original_1)) %>% 
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#a0))si valor final vs. inicial son iguales, imputar todo lo que está en medio con el mismo valor
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  dplyr::mutate(escolaridad_rec_original_9=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>9 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_9)) %>% 
  dplyr::mutate(escolaridad_rec_original_8=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>8 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_8)) %>% 
  dplyr::mutate(escolaridad_rec_original_7=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>7 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_7)) %>% 
  dplyr::mutate(escolaridad_rec_original_6=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>6 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_6)) %>% 
  dplyr::mutate(escolaridad_rec_original_5=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>5 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_5)) %>% 
  dplyr::mutate(escolaridad_rec_original_4=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>4 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_4)) %>% 
  dplyr::mutate(escolaridad_rec_original_3=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>3 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_3)) %>% 
  dplyr::mutate(escolaridad_rec_original_2=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>2 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_2)) %>% 
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#a1))cambiar valores vacíos intermedios  /// fijarse en  & escolaridad_rec_tot_cond==1
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#reemplazar el número intermedio por cada tratamiento para cada usuario
  dplyr::mutate(escolaridad_rec_original_9=dplyr::case_when(escolaridad_rec_original_8==escolaridad_rec_original_10 & is.na(escolaridad_rec_original_9)&!is.na(escolaridad_rec_original_10)~escolaridad_rec_original_10,TRUE~escolaridad_rec_original_9)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_10)) %>% View()
  dplyr::mutate(escolaridad_rec_original_8=dplyr::case_when(escolaridad_rec_original_7==escolaridad_rec_original_9 & is.na(escolaridad_rec_original_8)&!is.na(escolaridad_rec_original_9)~escolaridad_rec_original_9,TRUE~escolaridad_rec_original_8)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_9)) %>% View()
  dplyr::mutate(escolaridad_rec_original_7=dplyr::case_when(escolaridad_rec_original_6==escolaridad_rec_original_8 & is.na(escolaridad_rec_original_7)&!is.na(escolaridad_rec_original_8)~escolaridad_rec_original_8 ,TRUE~escolaridad_rec_original_7)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_8)) %>% View()
  dplyr::mutate(escolaridad_rec_original_6=dplyr::case_when(escolaridad_rec_original_5==escolaridad_rec_original_7& is.na(escolaridad_rec_original_6)&!is.na(escolaridad_rec_original_7)~escolaridad_rec_original_7,TRUE~escolaridad_rec_original_6)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_7)) %>% View()
  dplyr::mutate(escolaridad_rec_original_5=dplyr::case_when(escolaridad_rec_original_4==escolaridad_rec_original_6  & is.na(escolaridad_rec_original_5)&!is.na(escolaridad_rec_original_6)~escolaridad_rec_original_6,TRUE~escolaridad_rec_original_5)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_6)) %>% View()
  dplyr::mutate(escolaridad_rec_original_4=dplyr::case_when(escolaridad_rec_original_3==escolaridad_rec_original_5  & is.na(escolaridad_rec_original_4)&!is.na(escolaridad_rec_original_5)~escolaridad_rec_original_5,TRUE~escolaridad_rec_original_4)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_5)) %>% View()
  dplyr::mutate(escolaridad_rec_original_3=dplyr::case_when(escolaridad_rec_original_2==escolaridad_rec_original_4  & is.na(escolaridad_rec_original_3)&!is.na(escolaridad_rec_original_4)~escolaridad_rec_original_4,TRUE~escolaridad_rec_original_3)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_4)) %>% View()
  dplyr::mutate(escolaridad_rec_original_2=dplyr::case_when(escolaridad_rec_original_1==escolaridad_rec_original_3  & is.na(escolaridad_rec_original_2)&!is.na(escolaridad_rec_original_3)~escolaridad_rec_original_3,TRUE~escolaridad_rec_original_2)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_3)) %>% View()
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
##a2))si tiene información en la segunda pero no en la primera, y no es un valor intermedio como secundaria completa (ya que en ese caso puede adoptar más de un valor: más o igual a ese valor), imputarlo
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  dplyr::mutate(escolaridad_rec_original_1=dplyr::case_when(escolaridad_rec_original_2==3~3,
                                                            escolaridad_rec_original_2==1~1,
                                                            TRUE~escolaridad_rec_original_1)) %>% 
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
##a3))si hay más de 2 tratamientos por usuarios, y tiene información en la segunda pero no en la primera, y es un valor intermedio pero tiene un tercer tratamiento con el mismo valor, imputarlo
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
    dplyr::mutate(escolaridad_rec_original_1=dplyr::case_when(escolaridad_rec_original_2==2 & escolaridad_rec_original_3==2~3,TRUE~escolaridad_rec_original_1))  %>% 

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#medidas para capturar inconsistencias a lo largo de todos los tratamientos de cada usuario
#escolaridad_rec_imputed4 %>% #escolaridad_rec_tot_cond
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  dplyr::mutate(across(c(escolaridad_rec_original_1:escolaridad_rec_original_10),~dplyr::case_when(.==1~1,TRUE~0), .names="1_more_high_{col}")) %>% 
  dplyr::mutate(across(c(escolaridad_rec_original_1:escolaridad_rec_original_10),~dplyr::case_when(.==2~1,TRUE~0), .names="2_high_{col}")) %>% 
  dplyr::mutate(across(c(escolaridad_rec_original_1:escolaridad_rec_original_10),~dplyr::case_when(.==3~1,TRUE~0), .names="3_primary_{col}")) %>% 
  dplyr::mutate(suma_vals_escolaridad_rec_1_more_high = base::rowSums(dplyr::select(., starts_with("1_more_high_")))) %>% 
  dplyr::mutate(suma_vals_escolaridad_rec_2_high = base::rowSums(dplyr::select(., starts_with("2_high_")))) %>% 
  dplyr::mutate(suma_vals_escolaridad_rec_3_primary = base::rowSums(dplyr::select(., starts_with("3_primary_"))))

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#IMPUTACIONES
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
escolaridad_rec_imputed5<-
escolaridad_rec_imputed4 %>% 
  #hacer una suma de más NA's de los que debería tener según la cantidad de tratamientos que tiene la persona
  #:#:#:#:#:#:#:#:#:
  dplyr::mutate(sum_nas_esc_post=base::rowSums(is.na(dplyr::select(., starts_with("escolaridad_rec_original_")))))%>%
  dplyr::mutate(escolaridad_rec_tot_nas_en_medio_post=dplyr::case_when(
      (sum_nas_esc_post>10 & treat_per_usr==10)|
      (sum_nas_esc_post>1 & treat_per_usr==9)|
      (sum_nas_esc_post>2 & treat_per_usr==8)|
      (sum_nas_esc_post>3 & treat_per_usr==7)|
      (sum_nas_esc_post>4 & treat_per_usr==6)|
      (sum_nas_esc_post>5 & treat_per_usr==5)|
      (sum_nas_esc_post>6 & treat_per_usr==4)|
      (sum_nas_esc_post>7 & treat_per_usr==3)|
      (sum_nas_esc_post>8 & treat_per_usr==2)|
      (sum_nas_esc_post>9 & treat_per_usr==1)~1,TRUE~0)) %>%
  #dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)
  #d864967fa0b1c5bb1d4eb5f6a7c8c2c1
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#b0))valor inicial y sólo un tratamiento, se imputa por el valor imputado más frecuente de las 30 bases de datos
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
  dplyr::mutate(escolaridad_rec_original_1=dplyr::case_when(
    is.na(escolaridad_rec_original_1) & treat_per_usr==1 & 
      (escolaridad_rec_3_primary_1>escolaridad_rec_2_high_1)& 
      (escolaridad_rec_2_high_1>escolaridad_rec_3_primary_1)~3,
    is.na(escolaridad_rec_original_1) & treat_per_usr==1 & 
      (escolaridad_rec_2_high_1>escolaridad_rec_3_primary_1)& 
      (escolaridad_rec_2_high_1>escolaridad_rec_1_more_high_1)~2,
    is.na(escolaridad_rec_original_1) & treat_per_usr==1 & 
      (escolaridad_rec_1_more_high_1>escolaridad_rec_3_primary_1)& 
      (escolaridad_rec_1_more_high_1>escolaridad_rec_2_high_1)~1,
    TRUE~escolaridad_rec_original_1)) %>% 
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#b1))valor en el segundo tratamiento es intermedio, inicial se imputa, dependiendo si primaria es mayor que intermedio o no
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
  ###
  #dplyr::filter(is.na(escolaridad_rec_original_1),!is.na(escolaridad_rec_original_2)) %>%
  #dplyr::select(escolaridad_rec_original_1,escolaridad_rec_original_2,escolaridad_rec_3_primary_1,escolaridad_rec_2_high_1,escolaridad_rec_1_more_high_1) %>% View()
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#

  dplyr::mutate(escolaridad_rec_original_1=dplyr::case_when(
    is.na(escolaridad_rec_original_1) & escolaridad_rec_original_2==2 & (escolaridad_rec_3_primary_1>escolaridad_rec_2_high_1)~3,
    is.na(escolaridad_rec_original_1) & escolaridad_rec_original_2==2 & (escolaridad_rec_3_primary_1<escolaridad_rec_2_high_1)~2,TRUE~escolaridad_rec_original_1))%>%
    #dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)
#610dd4dba4dbb62848691b6916828948
  #90d581cd11064c41b82f8e4d6ff7b70b
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#b2))Valor final es vacío, hay un valor anterior
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_ 
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_  
  dplyr::mutate(escolaridad_rec_original_10= dplyr::case_when(
  #
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==1~1,
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==2 & 
      (escolaridad_rec_1_more_high_10>escolaridad_rec_2_high_10)~1,
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==2 & 
      (escolaridad_rec_1_more_high_10<escolaridad_rec_2_high_10)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==3 & 
      (escolaridad_rec_1_more_high_10>escolaridad_rec_2_high_10) & (escolaridad_rec_1_more_high_10>escolaridad_rec_3_primary_10)~1,
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==3 & 
        (escolaridad_rec_2_high_10 >escolaridad_rec_1_more_high_10) & (escolaridad_rec_2_high_10>escolaridad_rec_3_primary_10)~2,
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==3 & 
      (escolaridad_rec_3_primary_10 >escolaridad_rec_2_high_10) & (escolaridad_rec_3_primary_10>escolaridad_rec_1_more_high_10)~2,TRUE~escolaridad_rec_original_10)) %>% 
 # dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)
  #
    dplyr::mutate(escolaridad_rec_original_9= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==1~1,
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==2 & 
      (escolaridad_rec_1_more_high_9>escolaridad_rec_2_high_9)~1,
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==2 & 
      (escolaridad_rec_1_more_high_9<escolaridad_rec_2_high_9)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==3 & 
      (escolaridad_rec_1_more_high_9>escolaridad_rec_2_high_9) & (escolaridad_rec_1_more_high_9>escolaridad_rec_3_primary_9)~1,
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==3 & 
        (escolaridad_rec_2_high_9 >escolaridad_rec_1_more_high_9) & (escolaridad_rec_2_high_9>escolaridad_rec_3_primary_9)~2,
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==3 & 
      (escolaridad_rec_3_primary_9 >escolaridad_rec_2_high_9) & (escolaridad_rec_3_primary_9>escolaridad_rec_1_more_high_9)~2,TRUE~escolaridad_rec_original_9)) %>% 
  #
        dplyr::mutate(escolaridad_rec_original_8= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==1~1,
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==2 & 
      (escolaridad_rec_1_more_high_8>escolaridad_rec_2_high_8)~1,
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==2 & 
      (escolaridad_rec_1_more_high_8<escolaridad_rec_2_high_8)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==3 & 
      (escolaridad_rec_1_more_high_8>escolaridad_rec_2_high_8) & (escolaridad_rec_1_more_high_8>escolaridad_rec_3_primary_8)~1,
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==3 & 
        (escolaridad_rec_2_high_8 >escolaridad_rec_1_more_high_8) & (escolaridad_rec_2_high_8>escolaridad_rec_3_primary_8)~2,
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==3 & 
      (escolaridad_rec_3_primary_8 >escolaridad_rec_2_high_8) & (escolaridad_rec_3_primary_8>escolaridad_rec_1_more_high_8)~2,TRUE~escolaridad_rec_original_8)) %>% 
  #
        dplyr::mutate(escolaridad_rec_original_7= dplyr::case_when(
          #si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==1~1,
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==2 & 
      (escolaridad_rec_1_more_high_7>escolaridad_rec_2_high_7)~1,
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==2 & 
      (escolaridad_rec_1_more_high_7<escolaridad_rec_2_high_7)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==3 & 
      (escolaridad_rec_1_more_high_7>escolaridad_rec_2_high_7) & (escolaridad_rec_1_more_high_7>escolaridad_rec_3_primary_7)~1,
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==3 & 
        (escolaridad_rec_2_high_7 >escolaridad_rec_1_more_high_7) & (escolaridad_rec_2_high_7>escolaridad_rec_3_primary_7)~2,
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==3 & 
      (escolaridad_rec_3_primary_7 >escolaridad_rec_2_high_7) & (escolaridad_rec_3_primary_7>escolaridad_rec_1_more_high_7)~2,TRUE~escolaridad_rec_original_7)) %>% 
  #
          dplyr::mutate(escolaridad_rec_original_6= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==1~1,
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==2 & 
      (escolaridad_rec_1_more_high_6>escolaridad_rec_2_high_6)~1,
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==2 & 
      (escolaridad_rec_1_more_high_6<escolaridad_rec_2_high_6)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==3 & 
      (escolaridad_rec_1_more_high_6>escolaridad_rec_2_high_6) & (escolaridad_rec_1_more_high_6>escolaridad_rec_3_primary_6)~1,
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==3 & 
        (escolaridad_rec_2_high_6 >escolaridad_rec_1_more_high_6) & (escolaridad_rec_2_high_6>escolaridad_rec_3_primary_6)~2,
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==3 & 
      (escolaridad_rec_3_primary_6 >escolaridad_rec_2_high_6) & (escolaridad_rec_3_primary_6>escolaridad_rec_1_more_high_6)~2,TRUE~escolaridad_rec_original_6)) %>% 
  #
          dplyr::mutate(escolaridad_rec_original_5= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==1~1,
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==2 & 
      (escolaridad_rec_1_more_high_5>escolaridad_rec_2_high_5)~1,
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==2 & 
      (escolaridad_rec_1_more_high_5<escolaridad_rec_2_high_5)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==3 & 
      (escolaridad_rec_1_more_high_5>escolaridad_rec_2_high_5) & (escolaridad_rec_1_more_high_5>escolaridad_rec_3_primary_5)~1,
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==3 & 
        (escolaridad_rec_2_high_5 >escolaridad_rec_1_more_high_5) & (escolaridad_rec_2_high_5>escolaridad_rec_3_primary_5)~2,
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==3 & 
      (escolaridad_rec_3_primary_5 >escolaridad_rec_2_high_5) & (escolaridad_rec_3_primary_5>escolaridad_rec_1_more_high_5)~2,TRUE~escolaridad_rec_original_5)) %>% 
  #
          dplyr::mutate(escolaridad_rec_original_4= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==1~1,
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==2 & 
      (escolaridad_rec_1_more_high_4>escolaridad_rec_2_high_4)~1,
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==2 & 
      (escolaridad_rec_1_more_high_4<escolaridad_rec_2_high_4)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==3 & 
      (escolaridad_rec_1_more_high_4>escolaridad_rec_2_high_4) & (escolaridad_rec_1_more_high_4>escolaridad_rec_3_primary_4)~1,
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==3 & 
        (escolaridad_rec_2_high_4 >escolaridad_rec_1_more_high_4) & (escolaridad_rec_2_high_4>escolaridad_rec_3_primary_4)~2,
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==3 & 
      (escolaridad_rec_3_primary_4 >escolaridad_rec_2_high_4) & (escolaridad_rec_3_primary_4>escolaridad_rec_1_more_high_4)~2,TRUE~escolaridad_rec_original_4)) %>% 
  #
          dplyr::mutate(escolaridad_rec_original_3= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_3==1~1,
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_3==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_3==2 & 
      (escolaridad_rec_1_more_high_3>escolaridad_rec_2_high_3)~1,
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_3==2 & 
      (escolaridad_rec_1_more_high_3<escolaridad_rec_2_high_3)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_2==3 & 
      (escolaridad_rec_1_more_high_3>escolaridad_rec_2_high_3) & (escolaridad_rec_1_more_high_3>escolaridad_rec_3_primary_3)~1,
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_2==3 & 
        (escolaridad_rec_2_high_3 >escolaridad_rec_1_more_high_3) & (escolaridad_rec_2_high_3>escolaridad_rec_3_primary_3)~2,
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_2==3 & 
      (escolaridad_rec_3_primary_3 >escolaridad_rec_2_high_3) & (escolaridad_rec_3_primary_3>escolaridad_rec_1_more_high_3)~2,TRUE~escolaridad_rec_original_3))
#:#:#:#:
 # dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)
 #:#:#:#:
  #comprobar si en verdad calza:
  #%>%dplyr::filter(hash_key=="ef4325cda7ddd92f6218bb910c3e0895") %>% dplyr::select(escolaridad_rec_original_1,escolaridad_rec_original_2,treat_per_usr,escolaridad_rec_3_primary_1,escolaridad_rec_2_high_1)
  #610dd4dba4dbb62848691b6916828948
  #90d581cd11064c41b82f8e4d6ff7b70b
#escolaridad_rec_imputed5 %>% 
#    dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)%>%dplyr::filter(hash_key=="98d6644d995ea2c8777a683160728004") %>% dplyr::select(escolaridad_rec_original_3,escolaridad_rec_original_4,escolaridad_rec_original_4,treat_per_usr,escolaridad_rec_3_primary_4,escolaridad_rec_2_high_4,escolaridad_rec_1_more_high_4)

#98d6644d995ea2c8777a683160728004
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#b2))Valor final es vacío, hay un valor anterior
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_ 
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_  
escolaridad_rec_imputed6<-
escolaridad_rec_imputed5 %>% 
#dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)%>%dplyr::filter(hash_key=="98d6644d995ea2c8777a683160728004") %>% dplyr::select(escolaridad_rec_original_4,escolaridad_rec_original_4,treat_per_usr,escolaridad_rec_3_primary_4,escolaridad_rec_2_high_4,escolaridad_rec_1_more_high_3)
  dplyr::select(hash_key,starts_with("escolaridad_rec_original_")) %>%
  tidyr::pivot_longer(cols = starts_with("escolaridad_rec_original_"),
   names_to = "rn",
   names_prefix = "escolaridad_rec_original_") %>% 
  dplyr::filter(!is.na(value)) %>% 
  dplyr::mutate(hash_rn=paste0(hash_key,"_",rn)) %>% 
  dplyr::select(hash_rn,value)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_match_miss4<-
CONS_C1_df_dup_SEP_2020_match_miss3 %>%
  dplyr::group_by(hash_key) %>% 
  dplyr::mutate(rn=row_number()) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(hash_rn=paste0(hash_key,"_",rn)) %>% 
  dplyr::left_join(escolaridad_rec_imputed6, by=c("hash_rn")) %>% 
  dplyr::mutate(escolaridad_rec=dplyr::case_when(value==1~"1-More than high school",value==2~"2-Completed high school or less",value==3~"3-Completed primary school or less")) %>% 
  #
  dplyr::arrange(hash_key,rn) %>% 
  #dplyr::mutate(escolaridad_rec=dplyr::case_when(is.na(escolaridad_rec)~value,TRUE~as.character(escolaridad_rec))) %>% 
  dplyr::mutate(escolaridad_rec=parse_factor(as.character(escolaridad_rec),levels=c('3-Completed primary school or less', '2-Completed high school or less', '1-More than high school'), ordered =F,trim_ws=T,include_na =F, locale=locale(encoding = "Latin1"))) %>%
  dplyr::select(-value,-hash_rn) %>% 
  data.table()

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
paste("Check inconsistencies with posterior educational attainments (0= No inconsistencies):",CONS_C1_df_dup_SEP_2020_match_miss4 %>% 
  dplyr::arrange(hash_key,rn) %>% 
  dplyr::group_by(hash_key) %>% 
  dplyr::mutate(escolaridad_rec_num=as.numeric(substr(escolaridad_rec, 1, 1)),
                sig_escolaridad_rec_num=lead(escolaridad_rec_num),
                ant_escolaridad_rec_num=lag(escolaridad_rec_num)) %>% 
  dplyr::ungroup() %>% 
  dplyr::filter(escolaridad_rec_num>ant_escolaridad_rec_num) %>% 
  dplyr::select(hash_key,rn,fech_ing_num, escolaridad_rec, escolaridad_rec_num, sig_escolaridad_rec_num,ant_escolaridad_rec_num) %>% 
  nrow())
## [1] "Check inconsistencies with posterior educational attainments (0= No inconsistencies): 0"
#4b27553c38e707a6fda01855b784cf66 5a25a197e73f83fea546ac73589928fa b715d04a584dbdd450fd6f2ea68291fc
if(nrow(CONS_C1_df_dup_SEP_2020_match_miss4)-nrow(CONS_C1_df_dup_SEP_2020_match_miss3)>0){
  warning("Some rows were added in the imputation")}


We concluded with a total of 270 missing values in educational attainment (users=267), because the imputed values did not fulfill the requirements of a progression of the educational attainment (eg., a user could not respond to have completed secondary school, but then answer that he/she had completed primary school only), for example, due to ties in the imputed values or no imputed values.


Marital status

Additionally, we replaced missing values of the marital status (n=198). Since different marital status were not particularly more vulnerable between each other, we selected the most frequent imputed value among the different imputed databases.


# Ver distintos valores propuestos para estado conyugal
estado_conyugal_2_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$estado_conyugal_2,
       amelia_fit$imputations$imp2$estado_conyugal_2,
       amelia_fit$imputations$imp3$estado_conyugal_2,
       amelia_fit$imputations$imp4$estado_conyugal_2,
       amelia_fit$imputations$imp5$estado_conyugal_2,
       amelia_fit$imputations$imp6$estado_conyugal_2,
       amelia_fit$imputations$imp7$estado_conyugal_2,
       amelia_fit$imputations$imp8$estado_conyugal_2,
       amelia_fit$imputations$imp9$estado_conyugal_2,
       amelia_fit$imputations$imp10$estado_conyugal_2,
       amelia_fit$imputations$imp11$estado_conyugal_2,
       amelia_fit$imputations$imp12$estado_conyugal_2,
       amelia_fit$imputations$imp13$estado_conyugal_2,
       amelia_fit$imputations$imp14$estado_conyugal_2,
       amelia_fit$imputations$imp15$estado_conyugal_2,
       amelia_fit$imputations$imp16$estado_conyugal_2,
       amelia_fit$imputations$imp17$estado_conyugal_2,
       amelia_fit$imputations$imp18$estado_conyugal_2,
       amelia_fit$imputations$imp19$estado_conyugal_2,
       amelia_fit$imputations$imp20$estado_conyugal_2,
       amelia_fit$imputations$imp21$estado_conyugal_2,
       amelia_fit$imputations$imp22$estado_conyugal_2,
       amelia_fit$imputations$imp23$estado_conyugal_2,
       amelia_fit$imputations$imp24$estado_conyugal_2,
       amelia_fit$imputations$imp25$estado_conyugal_2,
       amelia_fit$imputations$imp26$estado_conyugal_2,
       amelia_fit$imputations$imp27$estado_conyugal_2,
       amelia_fit$imputations$imp28$estado_conyugal_2,
       amelia_fit$imputations$imp29$estado_conyugal_2,
       amelia_fit$imputations$imp30$estado_conyugal_2
       ) 

estado_conyugal_2_imputed<-
estado_conyugal_2_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Married/Shared living arrangements",as.character(.))~1,TRUE~0), .names="married_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Separated/Divorced",as.character(.))~1,TRUE~0), .names="sep_div_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Single",as.character(.))~1,TRUE~0), .names="singl_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Widower",as.character(.))~1,TRUE~0), .names="widow_{col}"))%>%
 
  dplyr::mutate(estado_conyugal_2_married = base::rowSums(dplyr::select(., starts_with("married_"))))%>%
  dplyr::mutate(estado_conyugal_2_sep_div = base::rowSums(dplyr::select(., starts_with("sep_div_"))))%>%
  dplyr::mutate(estado_conyugal_2_singl = base::rowSums(dplyr::select(., starts_with("singl_"))))%>%
  dplyr::mutate(estado_conyugal_2_wid = base::rowSums(dplyr::select(., starts_with("widow_"))))%>%
  #dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
  dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_married>0~1,TRUE~0)) %>% 
  dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_sep_div>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>% 
  dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_singl>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>% 
  dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_wid>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>% 
  janitor::clean_names()
  
estado_conyugal_2_imputed_cat_est_cony<-  
    estado_conyugal_2_imputed %>%
        tidyr::pivot_longer(c(estado_conyugal_2_married, estado_conyugal_2_sep_div, estado_conyugal_2_singl, estado_conyugal_2_wid), names_to = "cat_est_conyugal", values_to = "count") %>%
        dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
        dplyr::mutate(estado_conyugal_2_imputed_max=max(count,na.rm=T)) %>% 
        dplyr::ungroup() %>% 
        dplyr::filter(estado_conyugal_2_imputed_max==count) %>% 
        dplyr::select(amelia_fit_imputations_imp1_row,cat_est_conyugal,count) %>% 
        dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
        dplyr::mutate(n_row=n()) %>% 
        dplyr::ungroup() %>% 
        dplyr::mutate(cat_est_conyugal=dplyr::case_when(n_row>1~NA_character_,
                                                        TRUE~cat_est_conyugal)) %>% 
        dplyr::distinct(amelia_fit_imputations_imp1_row,.keep_all = T)
  
estado_conyugal_2_imputed<-
  estado_conyugal_2_imputed %>% 
    dplyr::left_join(estado_conyugal_2_imputed_cat_est_cony, by="amelia_fit_imputations_imp1_row") %>%
    dplyr::mutate(cat_est_conyugal=dplyr::case_when(cat_est_conyugal=="estado_conyugal_2_married"~"Married/Shared living arrangements",cat_est_conyugal=="estado_conyugal_2_sep_div"~"Separated/Divorced",cat_est_conyugal=="estado_conyugal_2_singl"~"Single",cat_est_conyugal=="estado_conyugal_2_wid"~"Widower"
    ))%>% 
  janitor::clean_names()

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss5<-
CONS_C1_df_dup_SEP_2020_match_miss4 %>% 
   dplyr::left_join(dplyr::select(estado_conyugal_2_imputed,amelia_fit_imputations_imp1_row,cat_est_conyugal), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(estado_conyugal_2=factor(dplyr::case_when(is.na(estado_conyugal_2)~as.character(cat_est_conyugal),TRUE~as.character(estado_conyugal_2)))) %>% 
  data.table()

no_calzaron_estado_cony<-
CONS_C1_df_dup_SEP_2020_match_miss5 %>% dplyr::filter(is.na(estado_conyugal_2)) %>% dplyr::distinct(hash_key) %>% unlist()

if(nrow(CONS_C1_df_dup_SEP_2020_match_miss5)-nrow(CONS_C1_df_dup_SEP_2020_match_miss4)>0){
  warning("Some rows were added in the imputation")}


We could not resolve Marital status in 13 cases due to ties in the most frequent values.


Region & Type of Center (Public)

We looked over possible imputations for region of the center (n=28) and type of the center (public or private) (n=28).


# Ver distintos valores propuestos para estado conyugal
#evaluacindelprocesoteraputico nombre_region tipo_centro_pub

#no hay información. debemos imputar
no_mostrar=0
if (no_mostrar==1){
tipo_centro_nombre_region_nas_nombre_region<-
CONS_C1_df_dup_SEP_2020 %>% 
    #dplyr::filter(row %in% unlist(unique(CONS_C1_df_dup_SEP_2020_match[,"row"]))) %>% 
    dplyr::filter(is.na(nombre_region)) %>% 
    janitor::tabyl(tipo_centro, nombre_region) 
}

nombre_region_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$nombre_region,
       amelia_fit$imputations$imp2$nombre_region,
       amelia_fit$imputations$imp3$nombre_region,
       amelia_fit$imputations$imp4$nombre_region,
       amelia_fit$imputations$imp5$nombre_region,
       amelia_fit$imputations$imp6$nombre_region,
       amelia_fit$imputations$imp7$nombre_region,
       amelia_fit$imputations$imp8$nombre_region,
       amelia_fit$imputations$imp9$nombre_region,
       amelia_fit$imputations$imp10$nombre_region,
       amelia_fit$imputations$imp11$nombre_region,
       amelia_fit$imputations$imp12$nombre_region,
       amelia_fit$imputations$imp13$nombre_region,
       amelia_fit$imputations$imp14$nombre_region,
       amelia_fit$imputations$imp15$nombre_region,
       amelia_fit$imputations$imp16$nombre_region,
       amelia_fit$imputations$imp17$nombre_region,
       amelia_fit$imputations$imp18$nombre_region,
       amelia_fit$imputations$imp19$nombre_region,
       amelia_fit$imputations$imp20$nombre_region,
       amelia_fit$imputations$imp21$nombre_region,
       amelia_fit$imputations$imp22$nombre_region,
       amelia_fit$imputations$imp23$nombre_region,
       amelia_fit$imputations$imp24$nombre_region,
       amelia_fit$imputations$imp25$nombre_region,
       amelia_fit$imputations$imp26$nombre_region,
       amelia_fit$imputations$imp27$nombre_region,
       amelia_fit$imputations$imp28$nombre_region,
       amelia_fit$imputations$imp29$nombre_region,
       amelia_fit$imputations$imp30$nombre_region
       ) 
nombre_region_imputed<-
nombre_region_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Antofagasta",as.character(.))~1,TRUE~0), .names="reg_02_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Araucan",as.character(.))~1,TRUE~0), .names="reg_09_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Arica",as.character(.))~1,TRUE~0), .names="reg_15_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Atacama",as.character(.))~1,TRUE~0), .names="reg_03_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Ays",as.character(.))~1,TRUE~0), .names="reg_11_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Biob",as.character(.))~1,TRUE~0), .names="reg_08_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Coquimbo",as.character(.))~1,TRUE~0), .names="reg_04_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Los Lagos",as.character(.))~1,TRUE~0), .names="reg_10_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Los R",as.character(.))~1,TRUE~0), .names="reg_14_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Magallanes",as.character(.))~1,TRUE~0), .names="reg_12_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Maule",as.character(.))~1,TRUE~0), .names="reg_07_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Metropolitana",as.character(.))~1,TRUE~0), .names="reg_13_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("uble",as.character(.))~1,TRUE~0), .names="reg_16_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Higgins",as.character(.))~1,TRUE~0), .names="reg_06_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Tarapac",as.character(.))~1,TRUE~0), .names="reg_01_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Valpara",as.character(.))~1,TRUE~0), .names="reg_05_{col}"))%>%
  
 
  dplyr::mutate(nombre_region_02 = base::rowSums(dplyr::select(., starts_with("reg_02_"))))%>%
  dplyr::mutate(nombre_region_09 = base::rowSums(dplyr::select(., starts_with("reg_09_"))))%>%
  dplyr::mutate(nombre_region_15 = base::rowSums(dplyr::select(., starts_with("reg_15_"))))%>%
  dplyr::mutate(nombre_region_03 = base::rowSums(dplyr::select(., starts_with("reg_03_"))))%>%
  dplyr::mutate(nombre_region_11 = base::rowSums(dplyr::select(., starts_with("reg_11_"))))%>%
  dplyr::mutate(nombre_region_08 = base::rowSums(dplyr::select(., starts_with("reg_08_"))))%>%
  dplyr::mutate(nombre_region_04 = base::rowSums(dplyr::select(., starts_with("reg_04_"))))%>%
  dplyr::mutate(nombre_region_10 = base::rowSums(dplyr::select(., starts_with("reg_10_"))))%>%
  dplyr::mutate(nombre_region_14 = base::rowSums(dplyr::select(., starts_with("reg_14_"))))%>%
  dplyr::mutate(nombre_region_12 = base::rowSums(dplyr::select(., starts_with("reg_12_"))))%>%
  dplyr::mutate(nombre_region_07 = base::rowSums(dplyr::select(., starts_with("reg_07_"))))%>%
  dplyr::mutate(nombre_region_13 = base::rowSums(dplyr::select(., starts_with("reg_13_"))))%>%
  dplyr::mutate(nombre_region_16 = base::rowSums(dplyr::select(., starts_with("reg_16_"))))%>%
  dplyr::mutate(nombre_region_06 = base::rowSums(dplyr::select(., starts_with("reg_06_"))))%>%
  dplyr::mutate(nombre_region_01 = base::rowSums(dplyr::select(., starts_with("reg_01_"))))%>%
  dplyr::mutate(nombre_region_05 = base::rowSums(dplyr::select(., starts_with("reg_05_"))))%>%
  #dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_02>0~1,TRUE~0)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_09>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_15>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_03>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>%
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_11>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_08>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_04>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_10>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_14>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_12>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_07>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_13>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_16>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_06>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_01>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_05>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  janitor::clean_names()
  
nombre_region_imputed_cat_reg<-  
    nombre_region_imputed %>%
        tidyr::pivot_longer(c(nombre_region_01, nombre_region_02, nombre_region_03, nombre_region_04, nombre_region_05, nombre_region_06, nombre_region_07, nombre_region_08, nombre_region_09, nombre_region_10, nombre_region_11, nombre_region_12, nombre_region_13, nombre_region_14, nombre_region_15), names_to = "cat_nombre_region", values_to = "count") %>%
        dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
        dplyr::mutate(nombre_region_imputed_max=max(count,na.rm=T)) %>% 
        dplyr::ungroup() %>% 
        dplyr::filter(nombre_region_imputed_max==count) %>% 
        dplyr::select(amelia_fit_imputations_imp1_row,cat_nombre_region,count) %>% 
        dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
        dplyr::mutate(n_row=n()) %>% 
        dplyr::ungroup() %>% 
        dplyr::mutate(cat_nombre_region=dplyr::case_when(n_row>1~NA_character_,
                                                        TRUE~cat_nombre_region)) %>% 
        dplyr::distinct(amelia_fit_imputations_imp1_row,.keep_all = T)
  
nombre_region_imputed<-
  nombre_region_imputed %>% 
    dplyr::left_join(nombre_region_imputed_cat_reg, by="amelia_fit_imputations_imp1_row") %>%
    dplyr::mutate(cat_nombre_region=dplyr::case_when(cat_nombre_region=="nombre_region_01"~"Tarapacá (01)",cat_nombre_region=="nombre_region_02"~"Antofagasta (02)",cat_nombre_region=="nombre_region_03"~"Atacama (03)",cat_nombre_region=="nombre_region_04"~"Coquimbo (04)",cat_nombre_region=="nombre_region_05"~"Valparaíso (05)",cat_nombre_region=="nombre_region_06"~"O'Higgins (06)",cat_nombre_region=="nombre_region_07"~"Maule (07)",cat_nombre_region=="nombre_region_08"~"Biobío (08)",cat_nombre_region=="nombre_region_09"~"Araucanía (09)",cat_nombre_region=="nombre_region_10"~"Los Lagos (10)",cat_nombre_region=="nombre_region_11"~"Aysén (11)",cat_nombre_region=="nombre_region_12"~"Magallanes (12)",cat_nombre_region=="nombre_region_13"~"Metropolitana (13)",
                                                 cat_nombre_region=="nombre_region_14"~"Los Ríos (14)",cat_nombre_region=="nombre_region_15"~"Arica (15)",cat_nombre_region=="nombre_region_16"~"Ñuble (16)",
    ))%>% 
  janitor::clean_names()

#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_
tipo_centro_pub_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$tipo_centro_pub,
       amelia_fit$imputations$imp2$tipo_centro_pub,
       amelia_fit$imputations$imp3$tipo_centro_pub,
       amelia_fit$imputations$imp4$tipo_centro_pub,
       amelia_fit$imputations$imp5$tipo_centro_pub,
       amelia_fit$imputations$imp6$tipo_centro_pub,
       amelia_fit$imputations$imp7$tipo_centro_pub,
       amelia_fit$imputations$imp8$tipo_centro_pub,
       amelia_fit$imputations$imp9$tipo_centro_pub,
       amelia_fit$imputations$imp10$tipo_centro_pub,
       amelia_fit$imputations$imp11$tipo_centro_pub,
       amelia_fit$imputations$imp12$tipo_centro_pub,
       amelia_fit$imputations$imp13$tipo_centro_pub,
       amelia_fit$imputations$imp14$tipo_centro_pub,
       amelia_fit$imputations$imp15$tipo_centro_pub,
       amelia_fit$imputations$imp16$tipo_centro_pub,
       amelia_fit$imputations$imp17$tipo_centro_pub,
       amelia_fit$imputations$imp18$tipo_centro_pub,
       amelia_fit$imputations$imp19$tipo_centro_pub,
       amelia_fit$imputations$imp20$tipo_centro_pub,
       amelia_fit$imputations$imp21$tipo_centro_pub,
       amelia_fit$imputations$imp22$tipo_centro_pub,
       amelia_fit$imputations$imp23$tipo_centro_pub,
       amelia_fit$imputations$imp24$tipo_centro_pub,
       amelia_fit$imputations$imp25$tipo_centro_pub,
       amelia_fit$imputations$imp26$tipo_centro_pub,
       amelia_fit$imputations$imp27$tipo_centro_pub,
       amelia_fit$imputations$imp28$tipo_centro_pub,
       amelia_fit$imputations$imp29$tipo_centro_pub,
       amelia_fit$imputations$imp30$tipo_centro_pub
       ) %>% 
  melt(id.vars="amelia_fit$imputations$imp1$row") %>% 
  janitor::clean_names() %>% 
  dplyr::filter(value==TRUE) %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
  dplyr::summarise(tipo_centro_pub_to_imputation=ifelse(n()>15,1,0))

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss6<-
CONS_C1_df_dup_SEP_2020_match_miss5 %>% 
   dplyr::left_join(dplyr::select(nombre_region_imputed,amelia_fit_imputations_imp1_row,cat_nombre_region), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(nombre_region=factor(dplyr::case_when(is.na(nombre_region)~as.character(cat_nombre_region),TRUE~as.character(nombre_region)))) %>% 
  dplyr::left_join(dplyr::select(tipo_centro_pub_imputed,amelia_fit_imputations_imp1_row,tipo_centro_pub_to_imputation), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
  dplyr::mutate(tipo_centro_pub=factor(dplyr::case_when(is.na(tipo_centro_pub)~as.logical(tipo_centro_pub_to_imputation),TRUE~as.logical(tipo_centro_pub)))) %>%
  dplyr::select(-c(cat_est_conyugal,cat_nombre_region,tipo_centro_pub_to_imputation,tipo_centro_pub_to_imputation)) %>% 
  data.table()
#CONS_C1_df_dup_SEP_2020_match_miss6
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$tipo_centro_pub))
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$nombre_region))

if(nrow(CONS_C1_df_dup_SEP_2020_match_miss6)-nrow(CONS_C1_df_dup_SEP_2020_match_miss5)>0){
  warning("Some rows were added in the imputation")}


It was impossible to impute region of the center in 6 cases due to ties in the different imputed values. In the case of public or private center, all values were imputed


Drug Dependence Diagnosis

We looked over possible imputations to the diagnosis of drug consumption (n=1).


# Ver distintos valores propuestos para estado conyugal
#evaluacindelprocesoteraputico nombre_region tipo_centro_pub

dg_trs_cons_sus_or_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp2$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp3$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp4$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp5$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp6$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp7$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp8$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp9$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp10$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp11$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp12$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp13$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp14$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp15$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp16$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp17$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp18$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp19$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp20$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp21$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp22$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp23$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp24$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp25$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp26$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp27$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp28$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp29$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp30$dg_trs_cons_sus_or
       ) %>% 
  melt(id.vars="amelia_fit$imputations$imp1$row") %>% 
  janitor::clean_names() %>% 
  dplyr::filter(value==TRUE) %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
  dplyr::summarise(dg_trs_cons_imputation=ifelse(n()>15,1,0))

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss7<-
CONS_C1_df_dup_SEP_2020_match_miss6 %>% 
    dplyr::left_join(dplyr::select(dg_trs_cons_sus_or_imputed,amelia_fit_imputations_imp1_row,dg_trs_cons_imputation), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
  dplyr::mutate(dg_trs_cons_sus_or=factor(dplyr::case_when(is.na(dg_trs_cons_sus_or)~as.logical(dg_trs_cons_imputation),TRUE~as.logical(dg_trs_cons_sus_or)))) %>%
  dplyr::select(-dg_trs_cons_imputation) %>% 
  data.table()
#CONS_C1_df_dup_SEP_2020_match_miss6
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$tipo_centro_pub))
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$nombre_region))

if(nrow(CONS_C1_df_dup_SEP_2020_match_miss7)-nrow(CONS_C1_df_dup_SEP_2020_match_miss6)>0){
  warning("Some rows were added in the imputation")}

As a result of the imputations, there were no missing values.


Cause of Discharge

We looked over possible imputations to the truly missing values, discarding missing values due to censorship (n=20).

motivo_de_egreso_a_imputar<-
CONS_C1_df_dup_SEP_2020_match_miss %>% dplyr::filter(is.na(motivodeegreso_mod_imp)) %>% dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,fech_egres_imp)) %>% dplyr::filter(!is.na(fech_egres_imp))%>%dplyr::select(row)

motivodeegreso_mod_imp_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp2$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp3$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp4$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp5$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp6$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp7$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp8$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp9$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp10$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp11$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp12$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp13$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp14$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp15$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp16$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp17$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp18$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp19$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp20$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp21$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp22$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp23$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp24$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp25$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp26$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp27$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp28$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp29$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp30$motivodeegreso_mod_imp
       ) %>% 
  melt(id.vars="amelia_fit$imputations$imp1$row") %>% 
  janitor::clean_names() %>% 
  dplyr::arrange(amelia_fit_imputations_imp1_row) %>% 
  dplyr::ungroup() %>% 
  dplyr::filter(amelia_fit_imputations_imp1_row %in% unlist(motivo_de_egreso_a_imputar$row)) %>% 
  #FILTRAR CASOS QUE SON ILÓGICOS: MUERTES CON TRATAMIENTOS POSTERIORES (1)
  dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,motivodeegreso_mod_imp, fech_egres_imp,dup, duplicates_filtered,evaluacindelprocesoteraputico,fech_ing_next_treat),by=c("amelia_fit_imputations_imp1_row"="row")) %>% 
  dplyr::mutate(value_death=dplyr::case_when(value=="Death"& !is.na(fech_ing_next_treat)~1,TRUE~0)) %>% 
  dplyr::filter(value_death!=1) %>%  
  #:#:#:#:#:
  dplyr::count(amelia_fit_imputations_imp1_row,value) %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
  dplyr::slice_min(n, n = 1) %>% 
  dplyr::summarise(adm_dis=sum(value == "Administrative discharge",na.rm=T),
                    death=sum(value == "Death",na.rm=T),
                    referral=sum(value == "Referral to another treatment",na.rm=T),
                    ter_dis=sum(value == "Therapeutic discharge",na.rm=T),
                    dropout=sum(value =="Drop-out",na.rm=T)) %>% 
  rowwise() %>% 
  dplyr::mutate(ties=sum(c_across(adm_dis:dropout)),ties=ifelse(ties>1,1,0)) %>% 
  #dplyr::filter(ties==1) %>% 
  dplyr::ungroup() %>% 
  dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,motivodeegreso_mod_imp, fech_egres_imp,fech_egres_num,dup, duplicates_filtered,evaluacindelprocesoteraputico,tipo_centro_derivacion),by=c("amelia_fit_imputations_imp1_row"="row")) %>% 
  dplyr::mutate(motivodeegreso_mod_imp_imputation= dplyr::case_when(
    ties==0 & adm_dis==1 & fech_egres_imp<"2019-11-13"~"Administrative discharge",
    #its an absorbing state. should not have posterior treatments
    ties==0 & death==1 & fech_egres_imp<"2019-11-13" & dup==duplicates_filtered~"Death",
    ties==0 & referral==1 & fech_egres_imp<"2019-11-13"~"Referral to another treatment",
    ties==0 & ter_dis==1 & fech_egres_imp<"2019-11-13"~"Therapeutic discharge",
    ties==0 & dropout==1 & fech_egres_imp<"2019-11-13"~"Drop-out",
    #si no hay fecha de egreso, está en la fecha de censura, sólo puede ser tratamiento en curso
    fech_egres_imp>="2019-11-13"~NA_character_,
    TRUE~NA_character_)) %>% 
    #si tiene evaluacindelprocesoteraputico, es porque no es un tratamiento en curso
  dplyr::rename("motivodeegreso_mod_imp_original"="motivodeegreso_mod_imp")

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:
CONS_C1_df_dup_SEP_2020_match_miss8<-
CONS_C1_df_dup_SEP_2020_match_miss7 %>% 
   dplyr::left_join(motivodeegreso_mod_imp_imputed[,c("amelia_fit_imputations_imp1_row","motivodeegreso_mod_imp_original","fech_egres_imp","fech_egres_num","motivodeegreso_mod_imp_imputation")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
  #dplyr::filter(is.na(motivodeegreso_mod_imp)) %>% dplyr::select(row,hash_key,motivodeegreso_mod_imp_original, motivodeegreso_mod_imp_imputation,motivodeegreso_mod_imp,fech_egres_num,fech_egres_imp)
      dplyr::mutate(motivodeegreso_mod_imp=factor(dplyr::case_when(is.na(motivodeegreso_mod_imp)~motivodeegreso_mod_imp_imputation,
                                                                   motivodeegreso_mod_imp_original=="Ongoing treatment"~NA_character_, TRUE~as.character(motivodeegreso_mod_imp)))) %>% 
  dplyr::select(-motivodeegreso_mod_imp_imputation,-fech_egres_imp,-fech_egres_num,-motivodeegreso_mod_imp_original) %>% 
  #dplyr::rename_all( list(~paste0(., ".left"))) %>% 
  dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,motivodeegreso_mod_imp) %>% 
                     dplyr::rename("motivodeegreso_mod_imp_original"="motivodeegreso_mod_imp"),by="row") %>%
  data.table()

# CONS_C1_df_dup_SEP_2020_match_miss8 %>% janitor::tabyl(motivodeegreso_mod_imp,motivodeegreso_mod_imp_original)
#CONS_C1_df_dup_SEP_2020_match_miss8 %>% janitor::tabyl(motivodeegreso_mod_imp_original)

#
if(
CONS_C1_df_dup_SEP_2020_match_miss8 %>% dplyr::filter(motivodeegreso_mod_imp_original!="Ongoing treatment",is.na(motivodeegreso_mod_imp)) %>% nrow()>0){"There are missing values on the cause of discharge"}

if(nrow(CONS_C1_df_dup_SEP_2020_match_miss8)-nrow(CONS_C1_df_dup_SEP_2020_match_miss7)>0){
  warning("Some rows were added in the imputation")}


A total of 2 cases were not imputed due to ties in the imputed values.


Evaluation of the Therapeutic Process

Another variable that is worth imputing is the Evaluation of the Therapeutic Process at Discharge (n= 7,378). In case of ties, we selected the imputed values with the value with the minimum evaluation. It must be considered that most of the null values could be explained by censoring or by not completing the treatment within the period of the study (n= 7,361).


# Ver distintos valores propuestos para sustancia de inciio
evaluacindelprocesoteraputico_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp2$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp3$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp4$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp5$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp6$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp7$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp8$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp9$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp10$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp11$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp12$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp13$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp14$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp15$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp16$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp17$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp18$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp19$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp20$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp21$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp22$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp23$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp24$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp25$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp26$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp27$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp28$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp29$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp30$evaluacindelprocesoteraputico
       ) %>% 
  melt(id.vars="amelia_fit$imputations$imp1$row") %>% 
  janitor::clean_names() %>% 
  dplyr::arrange(amelia_fit_imputations_imp1_row) %>% 
  dplyr::ungroup() %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
  dplyr::summarise(high_ach_1=sum(value == "1-High Achievement",na.rm=T),
                   med_ach_2=sum(value == "2-Medium Achievement",na.rm=T),
                  min_ach_3=sum(value =="3-Minimum Achievement",na.rm=T)) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(evaluacindelprocesoteraputico_imputation= dplyr::case_when(
      (high_ach_1 >med_ach_2) & (med_ach_2 >min_ach_3)~"1-High Achievement",
      (med_ach_2>high_ach_1) & (med_ach_2 >min_ach_3)~"2-Medium Achievement",
      (min_ach_3>med_ach_2) & (min_ach_3 >high_ach_1)~"3-Minimum Achievement"))

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
##
#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(motivodeegreso_mod_imp,evaluacindelprocesoteraputico)

CONS_C1_df_dup_SEP_2020_match_miss9<-
CONS_C1_df_dup_SEP_2020_match_miss8 %>% 
   dplyr::left_join(evaluacindelprocesoteraputico_imputed[,c("amelia_fit_imputations_imp1_row","evaluacindelprocesoteraputico_imputation")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
    dplyr::mutate(evaluacindelprocesoteraputico=factor(dplyr::case_when(is.na(evaluacindelprocesoteraputico) & motivodeegreso_mod_imp %in% c("Drop-out","Administrative discharge","Therapeutic discharge","Referral to another treatment")~evaluacindelprocesoteraputico_imputation,
                                                                        is.na(motivodeegreso_mod_imp)~NA_character_,
                                                                        TRUE~as.character(evaluacindelprocesoteraputico)))) %>% 
     dplyr::mutate(evaluacindelprocesoteraputico=parse_factor(as.character(evaluacindelprocesoteraputico),levels=c('1-High Achievement', '2-Medium Achievement','3-Minimum Achievement'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "UTF-8"))) %>% 
  dplyr::select(-evaluacindelprocesoteraputico_imputation) %>% 
  data.table()

CONS_C1_df_dup_SEP_2020_match_miss9 %>% janitor::tabyl(motivodeegreso_mod_imp,evaluacindelprocesoteraputico) %>% 
    knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 2. Cause of Discharge vs. Evaluation of the Therapeutic Procress"),
               col.names = c("Cause of Discharge","1-High Achievement", "2- Medium Achievement","3- Minimum Achievement","Null Values"),
               align =rep('c', 101)) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 9) %>%
  kableExtra::scroll_box(width = "100%", height = "375px") 
Table 2. Cause of Discharge vs. Evaluation of the Therapeutic Procress
Cause of Discharge 1-High Achievement 2- Medium Achievement 3- Minimum Achievement Null Values
Administrative discharge 865 4,428 4,490 0
Death 0 0 1 0
Drop-out 1,767 16,838 37,301 0
Referral to another treatment 1,299 5,834 4,704 0
Therapeutic discharge 17,120 6,136 1,120 0
NA 0 0 0 7,853
if(nrow(CONS_C1_df_dup_SEP_2020_match_miss9)-nrow(CONS_C1_df_dup_SEP_2020_match_miss8)>0){
  warning("Some rows were added in the imputation")}


As a result of the imputations, some values were not possible to impute (n=7853).

As seen in the table above, ongoing treatments did not have an evaluation process, which is logically valid, since their treatment competition was not captured.


Treatment Setting (Residential)

We looked over possible imputations to the treatment setting (n=97).


# Ver distintos valores propuestos para estado conyugal
#evaluacindelprocesoteraputico nombre_region tipo_centro_pub

tipo_de_plan_res_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$tipo_de_plan_res,
       amelia_fit$imputations$imp2$tipo_de_plan_res,
       amelia_fit$imputations$imp3$tipo_de_plan_res,
       amelia_fit$imputations$imp4$tipo_de_plan_res,
       amelia_fit$imputations$imp5$tipo_de_plan_res,
       amelia_fit$imputations$imp6$tipo_de_plan_res,
       amelia_fit$imputations$imp7$tipo_de_plan_res,
       amelia_fit$imputations$imp8$tipo_de_plan_res,
       amelia_fit$imputations$imp9$tipo_de_plan_res,
       amelia_fit$imputations$imp10$tipo_de_plan_res,
       amelia_fit$imputations$imp11$tipo_de_plan_res,
       amelia_fit$imputations$imp12$tipo_de_plan_res,
       amelia_fit$imputations$imp13$tipo_de_plan_res,
       amelia_fit$imputations$imp14$tipo_de_plan_res,
       amelia_fit$imputations$imp15$tipo_de_plan_res,
       amelia_fit$imputations$imp16$tipo_de_plan_res,
       amelia_fit$imputations$imp17$tipo_de_plan_res,
       amelia_fit$imputations$imp18$tipo_de_plan_res,
       amelia_fit$imputations$imp19$tipo_de_plan_res,
       amelia_fit$imputations$imp20$tipo_de_plan_res,
       amelia_fit$imputations$imp21$tipo_de_plan_res,
       amelia_fit$imputations$imp22$tipo_de_plan_res,
       amelia_fit$imputations$imp23$tipo_de_plan_res,
       amelia_fit$imputations$imp24$tipo_de_plan_res,
       amelia_fit$imputations$imp25$tipo_de_plan_res,
       amelia_fit$imputations$imp26$tipo_de_plan_res,
       amelia_fit$imputations$imp27$tipo_de_plan_res,
       amelia_fit$imputations$imp28$tipo_de_plan_res,
       amelia_fit$imputations$imp29$tipo_de_plan_res,
       amelia_fit$imputations$imp30$tipo_de_plan_res
       ) %>% 
  melt(id.vars="amelia_fit$imputations$imp1$row") %>% 
  janitor::clean_names() %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
  dplyr::summarise(n_res=sum(value=="1",na.rm=T),n_amb=sum(value=="0",na.rm=T))

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss10<-
CONS_C1_df_dup_SEP_2020_match_miss9 %>% 
    dplyr::left_join(dplyr::select(tipo_de_plan_res_imputed,amelia_fit_imputations_imp1_row,n_res,n_amb), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
  dplyr::mutate(tipo_de_plan_res=factor(dplyr::case_when(is.na(tipo_de_plan_res)& (n_res>n_amb)~"1",is.na(tipo_de_plan_res)& (n_res<n_amb)~"0",TRUE~as.character(tipo_de_plan_res)))) %>%
  dplyr::select(-n_res,-n_amb) %>% 
  data.table()

if(nrow(CONS_C1_df_dup_SEP_2020_match_miss10)-nrow(CONS_C1_df_dup_SEP_2020_match_miss9)>0){
  warning("Some rows were added in the imputation")}

As a result of the process of imputation, some values were not possible to impute (n=5).


Tenure status of households

Another variable that is worth imputing is the Tenure status of households (n= 6,319). In case of ties, we selected the imputed values with the value with the minimum involvement. In case of ties, we kept what we thought was the most vulnerable value (discarding “Owner” or “Renting” values).


tenencia_de_la_vivienda_mod_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
         amelia_fit$imputations$imp1$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp2$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp3$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp4$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp5$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp6$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp7$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp8$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp9$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp10$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp11$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp12$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp13$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp14$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp15$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp16$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp17$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp18$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp19$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp20$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp21$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp22$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp23$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp24$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp25$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp26$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp27$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp28$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp29$tenencia_de_la_vivienda_mod,
       amelia_fit$imputations$imp30$tenencia_de_la_vivienda_mod
       ) %>% 
  melt(id.vars="amelia_fit$imputations$imp1$row") %>% 
  janitor::clean_names() %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row, value) %>% 
  tally() %>% 
  dplyr::ungroup() %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
  dplyr::top_n(1,n) %>% 
  dplyr::ungroup()

tenencia_de_la_vivienda_mod_imputed_dup<-
  tenencia_de_la_vivienda_mod_imputed %>% 
    dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
    dplyr::mutate(num=n()) %>% 
    dplyr::filter(num>1) %>% 
    dplyr::ungroup() %>% 
  #1) owner, discard if it is in the maximum
    dplyr::mutate(n=dplyr::case_when(value=="Owner/Transferred dwellings/Pays Dividends"~0,T~as.numeric(n))) %>% 
    dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
    dplyr::top_n(1,n) %>% 
    dplyr::ungroup() %>% 
    dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
  #2) Renting vs. stays temporarily with a relative, keep the second
    dplyr::mutate(n=dplyr::case_when(value=="Renting"~0,T~as.numeric(n))) %>% 
    dplyr::top_n(1,n) %>% 
    dplyr::ungroup() %>% 
    dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
    dplyr::mutate(n_dup=n()) 


missing_values_to_impute_tenure_hshld<-
  tenencia_de_la_vivienda_mod_imputed %>% 
    dplyr::left_join(tenencia_de_la_vivienda_mod_imputed_dup, by=c("amelia_fit_imputations_imp1_row", "value")) %>% 
  #si es vacío, y no está en la base, es valor 0 (es difícil que)
    dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
    dplyr::mutate(sum= suppressWarnings(max(num, na.rm=T))) %>% 
    dplyr::ungroup() %>% 
  #descarto los que presentaron más de un valor para una misma fila y aquellos que no fueron seleccionados
    dplyr::mutate(descartar=dplyr::case_when(sum>1 & is.na(n.y)~1,T~0)) %>% 
    dplyr::filter(descartar==0) %>% 
    dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
    dplyr::mutate(n_dup_row=n()) %>% 
    dplyr::ungroup() %>% 
    dplyr::filter(n_dup_row>1) %>% 
    dplyr::distinct(amelia_fit_imputations_imp1_row)

tenencia_de_la_vivienda_mod_imputed_final<-
tenencia_de_la_vivienda_mod_imputed %>% 
    dplyr::left_join(tenencia_de_la_vivienda_mod_imputed_dup, by=c("amelia_fit_imputations_imp1_row", "value")) %>% 
  #si es vacío, y no está en la base, es valor 0 (es difícil que)
    dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
    dplyr::mutate(sum= suppressWarnings(max(num, na.rm=T))) %>% 
    dplyr::ungroup() %>% 
  #descarto los que presentaron más de un valor para una misma fila y aquellos que no fueron seleccionados
    dplyr::mutate(descartar=dplyr::case_when(sum>1 & is.na(n.y)~1,T~0)) %>% 
    dplyr::filter(descartar==0) %>% 
    dplyr::filter(!amelia_fit_imputations_imp1_row %in% as.character(unlist(missing_values_to_impute_tenure_hshld)))


ifelse(nrow(tenencia_de_la_vivienda_mod_imputed_final)/length(unique(CONS_C1_df_dup_SEP_2020_match_miss10$row))>1,
       "There are still more than one value in the imputation","")
## [1] ""
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(motivodeegreso_mod_imp,evaluacindelprocesoteraputico)

CONS_C1_df_dup_SEP_2020_match_miss11<-
CONS_C1_df_dup_SEP_2020_match_miss10 %>% 
   dplyr::left_join(tenencia_de_la_vivienda_mod_imputed_final[,c("amelia_fit_imputations_imp1_row","value")], by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(tenencia_de_la_vivienda_mod=factor(dplyr::case_when(is.na(tenencia_de_la_vivienda_mod) ~value,
                                                                        TRUE~as.character(tenencia_de_la_vivienda_mod)))) %>% 
  dplyr::select(-value) %>% 
  data.table()
if(nrow(CONS_C1_df_dup_SEP_2020_match_miss11)-nrow(CONS_C1_df_dup_SEP_2020_match_miss10)>0){
  warning("AGS: Some rows were added in the imputation")}

As a result of the imputations, some values were not possible to impute (n=4).


Primary or main substance

Then we imputed the primary/main substance at admission (n= 1).


# Ver distintos valores propuestos para sustancia de inciio
sus_principal_mod_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$sus_principal_mod,
       amelia_fit$imputations$imp2$sus_principal_mod,
       amelia_fit$imputations$imp3$sus_principal_mod,
       amelia_fit$imputations$imp4$sus_principal_mod,
       amelia_fit$imputations$imp5$sus_principal_mod,
       amelia_fit$imputations$imp6$sus_principal_mod,
       amelia_fit$imputations$imp7$sus_principal_mod,
       amelia_fit$imputations$imp8$sus_principal_mod,
       amelia_fit$imputations$imp9$sus_principal_mod,
       amelia_fit$imputations$imp10$sus_principal_mod,
       amelia_fit$imputations$imp11$sus_principal_mod,
       amelia_fit$imputations$imp12$sus_principal_mod,
       amelia_fit$imputations$imp13$sus_principal_mod,
       amelia_fit$imputations$imp14$sus_principal_mod,
       amelia_fit$imputations$imp15$sus_principal_mod,
       amelia_fit$imputations$imp16$sus_principal_mod,
       amelia_fit$imputations$imp17$sus_principal_mod,
       amelia_fit$imputations$imp18$sus_principal_mod,
       amelia_fit$imputations$imp19$sus_principal_mod,
       amelia_fit$imputations$imp20$sus_principal_mod,
       amelia_fit$imputations$imp21$sus_principal_mod,
       amelia_fit$imputations$imp22$sus_principal_mod,
       amelia_fit$imputations$imp23$sus_principal_mod,
       amelia_fit$imputations$imp24$sus_principal_mod,
       amelia_fit$imputations$imp25$sus_principal_mod,
       amelia_fit$imputations$imp26$sus_principal_mod,
       amelia_fit$imputations$imp27$sus_principal_mod,
       amelia_fit$imputations$imp28$sus_principal_mod,
       amelia_fit$imputations$imp29$sus_principal_mod,
       amelia_fit$imputations$imp30$sus_principal_mod
       )  %>% 
  melt(id.vars="amelia_fit$imputations$imp1$row") %>% 
  janitor::clean_names() %>% 
  dplyr::arrange(amelia_fit_imputations_imp1_row) %>% 
  dplyr::ungroup() %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
  dplyr::summarise(sus_prin_mar=sum(value == "Marijuana",na.rm=T),
                   sus_prin_oh=sum(value == "Alcohol",na.rm=T),
                   sus_prin_pb=sum(value == "Cocaine paste",na.rm=T),
                  sus_prin_coc=sum(value =="Cocaine hydrochloride",na.rm=T),
                  sus_prin_other=sum(value =="Other",na.rm=T)) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(ties= base::rowSums(dplyr::select(.,starts_with("sus_prin_"))>0)) %>% 
  dplyr::mutate(sus_principal_mod_imp= dplyr::case_when(
  (sus_prin_mar> sus_prin_oh)& (sus_prin_mar> sus_prin_pb)& (sus_prin_mar> sus_prin_coc)& (sus_prin_mar> sus_prin_other)~"Marijuana",
  (sus_prin_oh> sus_prin_mar)& (sus_prin_oh> sus_prin_pb)& (sus_prin_oh> sus_prin_coc)& (sus_prin_oh> sus_prin_other)~"Alcohol",
  (sus_prin_pb> sus_prin_mar)& (sus_prin_pb> sus_prin_oh)& (sus_prin_pb> sus_prin_coc)& (sus_prin_pb> sus_prin_other)~"Cocaine paste",
  (sus_prin_coc> sus_prin_mar)& (sus_prin_coc> sus_prin_oh)& (sus_prin_coc> sus_prin_pb)& (sus_prin_coc> sus_prin_other)~"Cocaine hydrochloride",
  (sus_prin_other> sus_prin_mar)& (sus_prin_other> sus_prin_oh)& (sus_prin_other> sus_prin_pb)& (sus_prin_other> sus_prin_coc)~"Cocaine hydrochloride"
  )) 

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_match_miss12<-
CONS_C1_df_dup_SEP_2020_match_miss11 %>% 
   dplyr::left_join(sus_principal_mod_imputed, by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(sus_principal_mod=factor(dplyr::case_when(is.na(sus_principal_mod)~as.character(sus_principal_mod_imp),
                                 TRUE~as.character(sus_principal_mod)))) %>% 
  dplyr::select(-c(sus_prin_mar, sus_prin_oh, sus_prin_pb, sus_prin_coc, sus_prin_other, ties, sus_principal_mod_imp)) %>% 
  data.table()
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
if(nrow(CONS_C1_df_dup_SEP_2020_match_miss12)-nrow(CONS_C1_df_dup_SEP_2020_match_miss11)>0){
  warning("Some rows were added in the imputation")}

As a result of the imputations, there were no missing values.


Sample Characteristics

We checked the characteristics of the sample depending on type of treatment (Residential or Ambulatory/Outpatients).


# -sus_ini_mod_mvv,-estado_conyugal_2,-escolaridad_rec,-freq_cons_sus_prin,-nombre_region,-tipo_centro_pub,-evaluacindelprocesoteraputico,-motivodeegreso_mod_imp,-dg_trs_cons_sus_or,-tipo_de_plan_res,-edad_ini_cons,-via_adm_sus_prin_act

#$109,756
#añado los imputados
CONS_C1_df_dup_SEP_2020_match_miss_after_imp<-
CONS_C1_df_dup_SEP_2020_match_miss%>% 
  dplyr::select(-unlist(dplyr::filter(data.table::as.data.table(t(missing.values),keep.rownames = T),V1>0)[,rn])) %>% #
  dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020_match_miss12,
                                 row,
                                 unlist(dplyr::filter(data.table::as.data.table(t(missing.values),keep.rownames = T),V1>0)[,rn])),by="row") %>% 
  dplyr::arrange(tipo_de_plan_res,hash_key,row) %>% 
  #elimino esta variable porque es accesoria
 # dplyr::select(-edad_ini_sus_prin) %>% 
  #para transformar el motivo de egreso
  dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,fech_egres_num,dias_treat_imp_sin_na),by="row") %>%
  #dplyr::filter(fech_egres_num==18213,!is.na(motivodeegreso_mod_imp)) %>% 
  dplyr::mutate(motivodeegreso_mod_imp=dplyr::case_when(dias_treat_imp_sin_na>=90 & motivodeegreso_mod_imp=="Drop-out"~ "Late Drop-out",
                                                        dias_treat_imp_sin_na<90 & motivodeegreso_mod_imp=="Drop-out"~ "Early Drop-out",
                                                        fech_egres_num==18213 & is.na(motivodeegreso_mod_imp)~"Ongoing treatment",
                                                        TRUE~as.character(motivodeegreso_mod_imp)
                                                        )) %>% #janitor::tabyl(motivodeegreso_mod_imp)
  #Temporary variable to cover missing cases but not consider ongoing treatments
  dplyr::mutate(evaluacindelprocesoteraputico2=dplyr::case_when(fech_egres_num==18213 & is.na(evaluacindelprocesoteraputico)~"Ongoing treatment",
                                                        TRUE~as.character(evaluacindelprocesoteraputico)
  )) %>% 
   dplyr::mutate(sum_miss = base::rowSums(is.na(dplyr::select(.,c("sus_ini_mod_mvv","sus_principal_mod","estado_conyugal_2","escolaridad_rec","freq_cons_sus_prin","nombre_region","tipo_centro_pub","motivodeegreso_mod_imp","dg_trs_cons_sus_or","tipo_de_plan_res","edad_ini_cons","sexo_2","edad_al_ing","fech_ing_num","tenencia_de_la_vivienda_mod"))))) %>% 
  dplyr::group_by(hash_key) %>% 
  dplyr::mutate(sum_miss=sum(sum_miss)) %>% 
  dplyr::ungroup() 

CONS_C1_df_dup_SEP_2020_match_miss_after_imp_descartados <-
  CONS_C1_df_dup_SEP_2020_match_miss_after_imp %>% 
  dplyr::filter(sum_miss>0)

CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados <-
  CONS_C1_df_dup_SEP_2020_match_miss_after_imp %>% 
  dplyr::filter(sum_miss==0) %>% 
  dplyr::select(-sum_miss) %>% 
  #dplyr::left_join(CONS_C1_df_dup_SEP_2020[c("row","condicion_ocupacional_corr")], by="row") %>% 
  dplyr::select(-evaluacindelprocesoteraputico2)


Considering that some missing values were not able to be imputed (due to ties in the candidate values for imputation or inconsistent values for imputations) (385, users=307), we ended the process with a total of 109,371 complete cases (users=84,741).


kableone <- function(x, caption=NULL, col.names=NA, smd=T, test=T, varLabels=T, noSpaces=T, printToggle=T, dropEqual=F, ...) {
  capture.output(x <- print(x, smd=T, test=test, varLabels=varLabels,noSpaces=noSpaces, printToggle=printToggle, dropEqual=dropEqual, ...))
  
  knitr::kable(x,format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
               caption= caption, col.names= col.names)
}
match.on.sel<- match.on_tot[!match.on_tot %in% c("row","hash_key","more_one_treat","duplicates_filtered","abandono_temprano_rec","tipo_de_plan_res","evaluacindelprocesoteraputico")]

match.on.sel<-c("sus_principal_mod","sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","edad_ini_cons","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","nombre_region","dg_trs_cons_sus_or", "tipo_centro_pub","sexo_2","edad_al_ing","fech_ing_num","tenencia_de_la_vivienda_mod","condicion_ocupacional_corr")

catVars<-
c("sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","tipo_centro_pub","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","dg_trs_cons_sus_or","nombre_region","tipo_de_plan_res","sexo_2","condicion_ocupacional_corr")
#length(unique(CONS_C1_df_dup_SEP_2020_match$fech_ing_num))
#:#:#:#:#: DISMINUIR LA HETEROGENEIDAD DE LA FECHA DE INGRESO
# FORMAS DE CONSTREÑIR LA VARIABLE:
#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num/10,0)
#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-cut(CONS_C1_df_dup_SEP_2020_match$fech_ing_num,100)
#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-CONS_C1_df_dup_SEP_2020_match_fech_ing_num
#CONS_C1_df_dup_SEP_2020_match_fech_ing_num<-CONS_C1_df_dup_SEP_2020_match$fech_ing_num
#length(unique(round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num,0)))
#length(unique(round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num/10,0)))

#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num/10,0)
#:#:#:#:#: 

CONS_C1_df_dup_SEP_2020_match_not_miss2 <-
  CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados %>% 
  dplyr::filter(dup==1) %>% 
  dplyr::select(-dias_treat_imp_sin_na,-fech_egres_num)

attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$sus_principal_mod,"label")<-"Primary Substance at Admission"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$sus_ini_mod_mvv,"label")<-"First Substance Used" 
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$estado_conyugal_2,"label")<-"Marital Status"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$escolaridad_rec,"label")<-"Educational Attainment"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$edad_ini_cons,"label")<-"Substance Use Onset Age"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$freq_cons_sus_prin,"label")<-"Primary Substance at Admission Usage Frequency"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$nombre_region,"label")<-"Regional Location of Center"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$dg_cie_10_rec,"label")<-"Psychiatric Comorbidity"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$dg_trs_cons_sus_or,"label")<-"Drug Dependence Diagnosis"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$evaluacindelprocesoteraputico,"label")<-"Evaluation of the Therapeutic Process"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$abandono_temprano_rec,"label")<-"Early Dropout (Against Staff Advice)"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$tipo_de_plan_res,"label")<-"Residential"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$tipo_centro_pub,"label")<-"Public Center"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$condicion_ocupacional_corr,"label")<-"Employment Status"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$origen_ingreso_mod,"label")<-"Treatment Admission Motive"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$tenencia_de_la_vivienda_mod,"label")<-"Tenure status of household" 

pre_tab1<-Sys.time()
tab1<-
CreateTableOne(vars = match.on.sel, strata = "tipo_de_plan_res", 
                       data = CONS_C1_df_dup_SEP_2020_match_not_miss2, factorVars = catVars, smd=T)
post_tab1<-Sys.time()
diff_time_tab1=post_tab1-pre_tab1

kableone(tab1, 
         caption = paste0("Table 5. Covariate Balance in the Variables of Interest"),
         col.names= c("Ambulatory","Residential", "p-values","test","SMD"),
         nonnormal= c("edad_ini_cons","edad_al_ing","fech_ing_num"),#"\\hline",
                       smd=T, test=T, varLabels=T,noSpaces=T, printToggle=T, dropEqual=F) %>% 
    kableExtra::kable_styling(bootstrap_options = c("striped", "hover","condensed"),font_size= 10) %>%
  #()
  row_spec(1, bold = T, italic =T,color ="black",hline_after=T,extra_latex_after="\\arrayrulecolor{white}",font_size= 10) %>%
  #footnote(general = "Here is a general comments of the table. ",
  #        number = c("Footnote 1; ", "Footnote 2; "),
  #         alphabet = c("Footnote A; ", "Footnote B; "),
  #         symbol = c("Footnote Symbol 1; ", "Footnote Symbol 2")
  #         )%>%
  kableExtra::kable_classic() %>% 
  scroll_box(width = "100%", height = "400px") 
Table 5. Covariate Balance in the Variables of Interest
Ambulatory Residential p-values test SMD
n 72053 12688
Primary Substance at Admission (%) <0.001 0.619
Alcohol 26286 (36.5) 2418 (19.1)
Cocaine hydrochloride 14473 (20.1) 1658 (13.1)
Cocaine paste 24556 (34.1) 8018 (63.2)
Marijuana 5362 (7.4) 392 (3.1)
Other 1376 (1.9) 202 (1.6)
First Substance Used (%) <0.001 0.370
Alcohol 41391 (57.4) 5066 (39.9)
Cocaine hydrochloride 2948 (4.1) 522 (4.1)
Cocaine paste 7647 (10.6) 2227 (17.6)
Marijuana 18403 (25.5) 4555 (35.9)
Other 1664 (2.3) 318 (2.5)
Marital Status (%) <0.001 0.309
Married/Shared living arrangements 26151 (36.3) 2910 (22.9)
Separated/Divorced 7709 (10.7) 1317 (10.4)
Single 37330 (51.8) 8330 (65.7)
Widower 863 (1.2) 131 (1.0)
Educational Attainment (%) <0.001 0.124
3-Completed primary school or less 21851 (30.3) 4568 (36.0)
2-Completed high school or less 37191 (51.6) 6137 (48.4)
1-More than high school 13011 (18.1) 1983 (15.6)
Substance Use Onset Age (median [IQR]) 15.00 [14.00, 18.00] 15.00 [13.00, 17.00] <0.001 nonnorm 0.090
Primary Substance at Admission Usage Frequency (%) <0.001 0.767
1 day a week or more 5323 (7.4) 271 (2.1)
2 to 3 days a week 22312 (31.0) 1328 (10.5)
4 to 6 days a week 12212 (16.9) 1650 (13.0)
Daily 28255 (39.2) 9223 (72.7)
Did not use 1095 (1.5) 84 (0.7)
Less than 1 day a week 2856 (4.0) 132 (1.0)
Treatment Admission Motive (%) <0.001 0.509
Spontaneous 33627 (46.7) 4274 (33.7)
Assisted Referral 4932 (6.8) 3004 (23.7)
Other 3749 (5.2) 736 (5.8)
Justice Sector 7137 (9.9) 810 (6.4)
Health Sector 22608 (31.4) 3864 (30.5)
Psychiatric Comorbidity (%) <0.001 0.317
Without psychiatric comorbidity 28994 (40.2) 3245 (25.6)
Diagnosis unknown (under study) 13260 (18.4) 2766 (21.8)
With psychiatric comorbidity 29799 (41.4) 6677 (52.6)
Regional Location of Center (%) <0.001 0.388
Antofagasta (02) 2290 (3.2) 697 (5.5)
Araucanía (09) 2221 (3.1) 161 (1.3)
Arica (15) 1312 (1.8) 728 (5.7)
Atacama (03) 1829 (2.5) 262 (2.1)
Aysén (11) 798 (1.1) 40 (0.3)
Biobío (08) 5089 (7.1) 703 (5.5)
Coquimbo (04) 2798 (3.9) 270 (2.1)
Los Lagos (10) 2644 (3.7) 373 (2.9)
Los Ríos (14) 1110 (1.5) 185 (1.5)
Magallanes (12) 928 (1.3) 31 (0.2)
Maule (07) 4202 (5.8) 642 (5.1)
Metropolitana (13) 35959 (49.9) 6251 (49.3)
Ñuble (16) 539 (0.7) 20 (0.2)
O’Higgins (06) 3636 (5.0) 567 (4.5)
Tarapacá (01) 1348 (1.9) 593 (4.7)
Valparaíso (05) 5350 (7.4) 1165 (9.2)
Drug Dependence Diagnosis = TRUE (%) 49981 (69.4) 11643 (91.8) <0.001 0.590
Public Center = TRUE (%) 57099 (79.2) 3615 (28.5) <0.001 1.183
Sexo Usuario/Sex of User = Women (%) 17392 (24.1) 3935 (31.0) <0.001 0.154
edad_al_ing (median [IQR]) 34.41 [27.53, 43.45] 32.60 [26.31, 40.82] <0.001 nonnorm 0.187
Fecha de Ingreso a Tratamiento (Numérico)(c)/Date of Admission to Treatment (Numeric)(c) (median [IQR]) 16578.00 [15730.00, 17359.00] 16153.00 [15342.00, 17023.00] <0.001 nonnorm 0.292
Tenure status of household (%) <0.001 0.186
Illegal Settlement 591 (0.8) 308 (2.4)
Others 1906 (2.6) 410 (3.2)
Owner/Transferred dwellings/Pays Dividends 26710 (37.1) 4786 (37.7)
Renting 12896 (17.9) 1642 (12.9)
Stays temporarily with a relative 29950 (41.6) 5542 (43.7)
Employment Status (%) <0.001 1.026
Employed 39513 (54.8) 1769 (13.9)
Inactive 7667 (10.6) 1190 (9.4)
Looking for a job for the first time 172 (0.2) 21 (0.2)
No activity 2663 (3.7) 1819 (14.3)
Not seeking for work 493 (0.7) 335 (2.6)
Unemployed 21545 (29.9) 7554 (59.5)
#"tipo_de_plan_ambulatorio",
#https://cran.r-project.org/web/packages/tableone/vignettes/smd.html
#http://rstudio-pubs-static.s3.amazonaws.com/405765_2ce448f9bde24148a5f94c535a34b70e.html
#https://cran.r-project.org/web/packages/tableone/vignettes/introduction.html
#https://cran.r-project.org/web/packages/tableone/tableone.pdf
#https://www.rdocumentation.org/packages/tableone/versions/0.12.0/topics/CreateTableOne

## Construct a table 
#standardized mean differences of greater than 0.1


We checked the similarity in the samples using other measures, such as the variance ratio of the samples and Kolmogorov-Smirnov(KS) statistics.


library(cobalt)

bal2<-bal.tab(CONS_C1_df_dup_SEP_2020_match_not_miss2[,match.on_tot[!match.on_tot %in% c("row","hash_key","more_one_treat","duplicates_filtered","abandono_temprano_rec","tipo_de_plan_res","evaluacindelprocesoteraputico")]], treat = CONS_C1_df_dup_SEP_2020_match_not_miss2$tipo_de_plan_res,
         thresholds = c(m = .1, v = 2),
         binary = "std", 
         continuous = "std",
         stats = c("mean.diffs", "variance.ratios","ks.statistics"))
#"mean.diffs", "variance.ratios","ks.statistics","ovl.coefficient"

options(knitr.kable.NA = '')

bal2$Balance[,2]<-round(bal2$Balance[,2],2)
bal2$Balance[,4]<-round(bal2$Balance[,4],2)
bal2$Balance[,6]<-round(bal2$Balance[,6],2)

var_names<- 
    list("sus_principal_mod_Alcohol"= "Primary Subs. Adm.-Alcohol",
         "sus_principal_mod_Cocaine hydrochloride"= "Primary Subs. Adm.-Cocaine hydrochloride",
         "sus_principal_mod_Cocaine paste"="Primary Subs. Adm.-Cocaine paste",
         "sus_principal_mod_Marijuana"="Primary Subs. Adm.-Marijuana",
         "sus_principal_mod_Other"="Primary Subs. Adm.-Other",
         "origen_ingreso_mod_Spontaneous"="Admission Motive-Spontaneous",
         "origen_ingreso_mod_Assisted Referral"= "Admission Motive-Assisted Referral",
         "origen_ingreso_mod_Other"="Admission Motive-Other",
         "origen_ingreso_mod_Justice Sector"= "Admission Motive-Justice Sector",
         "origen_ingreso_mod_Health Sector"="Admission Motive-Health Sector",
         "dg_cie_10_rec_Without psychiatric comorbidity"="ICD-10-Wo/Psych Comorbidity",
         "dg_cie_10_rec_Diagnosis unknown (under study)"="ICD-10-Dg. Unknown/under study",
         "dg_cie_10_rec_With psychiatric comorbidity"="ICD-10-W/Psych Comorbidity",
         "sexo_2_Women"="Sex-Women",
         "edad_al_ing"="Admission Age",
         "fech_ing_num"="Admission Date",
         "duplicates_filtered"="Treatments (#)",
         "more_one_treat"=">1 treatment",
         "sus_ini_mod_mvv_Alcohol"= "First substance used-Alcohol",
         "sus_ini_mod_mvv_Cocaine hydrochloride"= "First substance used-Cocaine hydrochloride",
         "sus_ini_mod_mvv_Cocaine paste"="First substance used-Cocaine paste",
         "sus_ini_mod_mvv_Marijuana"="First substance used-Marijuana",
         "sus_ini_mod_mvv_Other"="First substance used-Other",
         "estado_conyugal_2_Married/Shared living arrangements"="Marital Status-Married/Shared liv. arr.",
         "condicion_ocupacional_corr_Employed"="Emp.Status-Employed",
         "condicion_ocupacional_corr_Inactive"="Emp.Status-Inactive",
         "condicion_ocupacional_corr_Looking for a job for the first time"="Emp.Status-Looking 1st job",
         "condicion_ocupacional_corr_No activity"="Emp.Status- No activity",
         "condicion_ocupacional_corr_Not seeking for work"="Emp.Status- Not seeking work",
         "condicion_ocupacional_corr_Unemployed"="Emp.Status- Unemployed",
         "estado_conyugal_2_Separated/Divorced"="Marital Status-Separated/Divorced",
         "estado_conyugal_2_Single"= "Marital Status-Single",
         "estado_conyugal_2_Widower"="Marital Status-Widower",
         "escolaridad_rec_3-Completed primary school or less"="Educational Attainment-PS or less",
         "escolaridad_rec_2-Completed high school or less"="Educational Attainment-HS or less",
         "escolaridad_rec_1-More than high school"="Educational Attainment-More than HS",
         "freq_cons_sus_prin_1 day a week or more"="Subs Use Freq-1d/wk or more",
         "freq_cons_sus_prin_2 to 3 days a week"="Subs Use Freq-2-3d/wk",
         "freq_cons_sus_prin_4 to 6 days a week"="Subs Use Freq-4-6d/wk",
         "freq_cons_sus_prin_Daily"="Subs Use Freq-Daily",
         "freq_cons_sus_prin_Did not use"="Subs Use Freq-Did not use",
         "freq_cons_sus_prin_Less than 1 day a week"="Subs Use Freq-Less 1d/wk",
         "nombre_region_Antofagasta (02)"="Region-Antofagasta(02)",
         "nombre_region_Araucanía (09)"="Region-Araucanía(09)",
         "nombre_region_Arica (15)"="Region-Arica(15)",
         "nombre_region_Atacama (03)"="Region-Atacama(03)",
         "nombre_region_Aysén (11)"="Region-Aysén(11)",
         "nombre_region_Biobío (08)"="Region- Biobío(08)",
         "nombre_region_Coquimbo (04)"="Region-Coquimbo(04)",
         "nombre_region_Los Lagos (10)"="Region-Los Lagos(10)",
         "nombre_region_Los Ríos (14)"="Region-Los Ríos(14)",
         "nombre_region_Magallanes (12)"="Region-Magallanes(12)",
         "nombre_region_Maule (07)"="Region-Maule(07)",
         "nombre_region_Metropolitana (13)"="Region-Metropolitana(13)",
         "nombre_region_Ñuble (16)"="Region-Ñuble(16)",
         "nombre_region_O'Higgins (06)"="Region-O'Higgins(06)",
         "nombre_region_Tarapacá (01)"="Region-Tarapacá(01)",
         "nombre_region_Valparaíso (05)"="Region-Valparaíso(05)",
         "tipo_centro_pub"="Public Center",
         "dg_trs_cons_sus_or"= "Drug Dependence diagnosis",
         "edad_ini_cons"="Substance Use Onset Age",
         "tenencia_de_la_vivienda_mod_Illegal Settlement"="Ten.Stat.Houshld-Ill stlmnt",
         "tenencia_de_la_vivienda_mod_Others"="Ten.Stat.Houshld-Other",
         "tenencia_de_la_vivienda_mod_Owner/Transferred dwellings/Pays Dividends"="Ten.Stat.Houshld-Owner",
         "tenencia_de_la_vivienda_mod_Renting"="Ten.Stat.Houshld-Renting",
         "tenencia_de_la_vivienda_mod_Stays temporarily with a relative"="Ten.Stat.Houshld-Temporary stlmnt",
         "rn"="Treatment")

var.names<-data.table(data.frame(unlist(var_names)),keep.rownames = T) %>% janitor::clean_names()

balance_prev<-
data.table::data.table(bal2$Balance[,1:6],keep.rownames = T) %>%
  dplyr::arrange(-abs(Diff.Un)) %>% 
  dplyr::left_join(var.names,by="rn") %>% 
  dplyr::select(unlist_var_names,everything()) %>% 
  dplyr::select(-rn) 

balance_prev %>% #data.table::data.table(keep.rownames = F)
    knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 4. Covariate Balance in the Variables of Interest"),
               col.names = c("Variables","Nature of Variables", "Unadjusted SMDs","Threshold","Unadjusted Variance Ratios","Threshold","Unadjusted KS"),
               align =rep('c', 101)) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 10) %>%
  kableExtra::add_footnote( c(paste("Note. ")), 
                            notation = "none") %>%
  kableExtra::kable_classic() %>% 
  kableExtra::scroll_box(width = "100%", height = "375px")
Table 4. Covariate Balance in the Variables of Interest
Variables Nature of Variables Unadjusted SMDs Threshold Unadjusted Variance Ratios Threshold Unadjusted KS
Public Center Binary -1.18 Not Balanced, >0.1 0.51
Emp.Status-Employed Binary -0.95 Not Balanced, >0.1 0.41
Subs Use Freq-Daily Binary 0.72 Not Balanced, >0.1 0.33
Emp.Status- Unemployed Binary 0.62 Not Balanced, >0.1 0.30
Primary Subs. Adm.-Cocaine paste Binary 0.61 Not Balanced, >0.1 0.29
Drug Dependence diagnosis Binary 0.59 Not Balanced, >0.1 0.22
Subs Use Freq-2-3d/wk Binary -0.52 Not Balanced, >0.1 0.20
Admission Motive-Assisted Referral Binary 0.48 Not Balanced, >0.1 0.17
Primary Subs. Adm.-Alcohol Binary -0.40 Not Balanced, >0.1 0.17
Emp.Status- No activity Binary 0.38 Not Balanced, >0.1 0.11
First substance used-Alcohol Binary -0.36 Not Balanced, >0.1 0.18
ICD-10-Wo/Psych Comorbidity Binary -0.32 Not Balanced, >0.1 0.15
Marital Status-Married/Shared liv. arr. Binary -0.30 Not Balanced, >0.1 0.13
Admission Date Contin. -0.29 Not Balanced, >0.1 1.00 Balanced, <2 0.14
Marital Status-Single Binary 0.28 Not Balanced, >0.1 0.14
Admission Motive-Spontaneous Binary -0.27 Not Balanced, >0.1 0.13
Subs Use Freq-1d/wk or more Binary -0.25 Not Balanced, >0.1 0.05
First substance used-Marijuana Binary 0.23 Not Balanced, >0.1 0.10
ICD-10-W/Psych Comorbidity Binary 0.23 Not Balanced, >0.1 0.11
Region-Arica(15) Binary 0.21 Not Balanced, >0.1 0.04
Primary Subs. Adm.-Marijuana Binary -0.20 Not Balanced, >0.1 0.04
First substance used-Cocaine paste Binary 0.20 Not Balanced, >0.1 0.07
Primary Subs. Adm.-Cocaine hydrochloride Binary -0.19 Not Balanced, >0.1 0.07
Subs Use Freq-Less 1d/wk Binary -0.19 Not Balanced, >0.1 0.03
Admission Age Contin. -0.19 Not Balanced, >0.1 0.84 Balanced, <2 0.07
Region-Tarapacá(01) Binary 0.16 Not Balanced, >0.1 0.03
Sex-Women Binary 0.15 Not Balanced, >0.1 0.07
Emp.Status- Not seeking work Binary 0.15 Not Balanced, >0.1 0.02
Ten.Stat.Houshld-Renting Binary -0.14 Not Balanced, >0.1 0.05
Admission Motive-Justice Sector Binary -0.13 Not Balanced, >0.1 0.04
Ten.Stat.Houshld-Ill stlmnt Binary 0.13 Not Balanced, >0.1 0.02
Educational Attainment-PS or less Binary 0.12 Not Balanced, >0.1 0.06
Region-Araucanía(09) Binary -0.12 Not Balanced, >0.1 0.02
Region-Magallanes(12) Binary -0.12 Not Balanced, >0.1 0.01
Subs Use Freq-4-6d/wk Binary -0.11 Not Balanced, >0.1 0.04
Region-Antofagasta(02) Binary 0.11 Not Balanced, >0.1 0.02
Region-Coquimbo(04) Binary -0.10 Not Balanced, >0.1 0.02
Substance Use Onset Age Contin. -0.09 Balanced, <0.1 0.91 Balanced, <2 0.07
Region-Aysén(11) Binary -0.09 Balanced, <0.1 0.01
Region-Ñuble(16) Binary -0.09 Balanced, <0.1 0.01
Subs Use Freq-Did not use Binary -0.08 Balanced, <0.1 0.01
ICD-10-Dg. Unknown/under study Binary 0.08 Balanced, <0.1 0.03
Educational Attainment-HS or less Binary -0.06 Balanced, <0.1 0.03
Educational Attainment-More than HS Binary -0.06 Balanced, <0.1 0.02
Region- Biobío(08) Binary -0.06 Balanced, <0.1 0.02
Region-Valparaíso(05) Binary 0.06 Balanced, <0.1 0.02
Region-Los Lagos(10) Binary -0.04 Balanced, <0.1 0.01
Ten.Stat.Houshld-Temporary stlmnt Binary 0.04 Balanced, <0.1 0.02
Emp.Status-Inactive Binary -0.04 Balanced, <0.1 0.01
Admission Motive-Other Binary 0.03 Balanced, <0.1 0.01
Region-Atacama(03) Binary -0.03 Balanced, <0.1 0.00
Region-Maule(07) Binary -0.03 Balanced, <0.1 0.01
Region-O’Higgins(06) Binary -0.03 Balanced, <0.1 0.01
Ten.Stat.Houshld-Other Binary 0.03 Balanced, <0.1 0.01
Primary Subs. Adm.-Other Binary -0.02 Balanced, <0.1 0.00
Marital Status-Widower Binary -0.02 Balanced, <0.1 0.00
Admission Motive-Health Sector Binary -0.02 Balanced, <0.1 0.01
Emp.Status-Looking 1st job Binary -0.02 Balanced, <0.1 0.00
First substance used-Other Binary 0.01 Balanced, <0.1 0.00
Marital Status-Separated/Divorced Binary -0.01 Balanced, <0.1 0.00
Region-Los Ríos(14) Binary -0.01 Balanced, <0.1 0.00
Region-Metropolitana(13) Binary -0.01 Balanced, <0.1 0.01
Ten.Stat.Houshld-Owner Binary 0.01 Balanced, <0.1 0.01
First substance used-Cocaine hydrochloride Binary 0.00 Balanced, <0.1 0.00
Note.


We generated a plot to focus on the unbalanced data.


love.plot(bal2, binary = "std", 
          thresholds = c(m = .2),#, m=.2
          var.order = "unadjusted")+
  theme_classic()+
  ggtitle(NULL)+
  labs(caption="Note. Vertical dashed line= Standardized Differences of .1")+
  theme( plot.caption=element_text(hjust=0))+
   theme(legend.position = "none")
Figure 8. Covariates Balance on Different Values

Figure 8. Covariates Balance on Different Values

#ggplot(idh_pre, aes(idh_pre$quintil_0,idh_pre$idh_0)) + 
#  geom_point(aes(colour = idh_pre$groups), size = 5) + labs(x="Quintiles", y="Percentage")  + 
#  theme_bw() +  
#  scale_fill_manual(values=c("black", "gray60") ) + 
#  scale_colour_manual(name="Groups",values =c("Control"="black", "Exposed"="gray60")) + 
#  theme(legend.key = element_rect(colour = NA)) + 
#  theme(text = element_text(size=20)) + 
#  scale_y_continuous( limits = c(0,0.4), breaks = seq(.05,.4,by=.05))

#bal.tab(bal2, treat = "tipo_de_plan_res",var.name = "fech_ing_num", data = CONS_C1_df_dup_SEP_2020_match_not_miss2)

Specification

First, we had to discretize categorical variables into logical parameters, and for continuous covariates, we divided them into 20 equal parts.


catVars<-
match.on_tot[!match.on_tot %in% c("row","hash_key","more_one_treat","duplicates_filtered","abandono_temprano_rec","tipo_de_plan_res","evaluacindelprocesoteraputico","edad_ini_cons","fech_ing_num","edad_al_ing")]

columna_dummy <- function(df, columna) {
  df %>% 
  mutate_at(columna, ~paste(columna, eval(as.symbol(columna)), sep = "_")) %>% 
    mutate(valor = 1) %>% 
    spread(key = columna, value = valor, fill = 0)
}

quantiles = function(covar, n_q) {
    p_q = seq(0, 1, 1/n_q)
    val_q = quantile(covar, probs = p_q, na.rm = TRUE)
    covar_out = rep(NA, length(covar))
    for (i in 1:n_q) {
        if (i==1) {covar_out[covar<val_q[i+1]] = i}
        if (i>1 & i<n_q) {covar_out[covar>=val_q[i] & covar<val_q[i+1]] = i}
        if (i==n_q) {covar_out[covar>=val_q[i] & covar<=val_q[i+1]] = i}}
    covar_out
}

CONS_C1_df_dup_SEP_2020_match_not_miss3<-CONS_C1_df_dup_SEP_2020_match_not_miss2
for (i in c(1:length(catVars))){#catVars[-10] excluding treatment indicator
  cat<-as.character(catVars[i])#catVars[-10] excluding treatment indicator
  CONS_C1_df_dup_SEP_2020_match_not_miss3<-columna_dummy(CONS_C1_df_dup_SEP_2020_match_not_miss3,cat)
}
CONS_C1_df_dup_SEP_2020_match_not_miss3$tipo_de_plan_res_FALSE<-NULL
CONS_C1_df_dup_SEP_2020_match_not_miss3$edad_ini_cons<-quantiles(CONS_C1_df_dup_SEP_2020_match_not_miss3$edad_ini_cons,20)
CONS_C1_df_dup_SEP_2020_match_not_miss3$edad_al_ing<-quantiles(CONS_C1_df_dup_SEP_2020_match_not_miss3$edad_al_ing,20)
CONS_C1_df_dup_SEP_2020_match_not_miss3$fech_ing_num<-quantiles(CONS_C1_df_dup_SEP_2020_match_not_miss3$fech_ing_num,20)
match.on.sel2<-names(CONS_C1_df_dup_SEP_2020_match_not_miss3)[-c(1,2,5)]
#"edad_ini_cons","edad_al_ing","fech_ing_num")

CONS_SEP_match = data.table::data.table(CONS_C1_df_dup_SEP_2020_match_not_miss2[order(CONS_C1_df_dup_SEP_2020_match_not_miss2$tipo_de_plan_res, decreasing = TRUE), ])

CONS_SEP_match_dum = data.table::data.table(CONS_C1_df_dup_SEP_2020_match_not_miss3 %>% dplyr::arrange(factor(row, levels = CONS_SEP_match$row)))


Match

The matched variables were defined for the treatments at baseline (n=84,741).


library(designmatch)

#fine = list(covs = fine_covs)
#solver = list(name = name, t_max = t_max, approximate = 1, round_cplex = 0, trace_cplex = 0).
#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
# 1. Gurobi installation

#For an exact solution, we strongly recommend running designmatch either with CPLEX or Gurobi.  Between these two solvers, the R interface of Gurobi is considerably easier to install.  Here we provide general instructions for manually installing Gurobi and its R interface in Mac and Windows machines.

#1. Create a free academic license
#   Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/creating_a_new_academic_li.html

#2. Install the software
#   2.1. In http://www.gurobi.com/index, go to Downloads > Gurobi Software
#   2.2. Choose your operating system and press download
#
#3. Retrieve and set up your Gurobi license
#   2.1. Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/retrieving_and_setting_up_.html
#   2.2. Then follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/retrieving_a_free_academic.html
#
#4. Test your license
#   Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/testing_your_license.html
#
#5. Install the R interface of Gurobi   
#   Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/r_installing_the_r_package.html
#   * In Windows, in R run the command install.packages("PATH\\gurobi_7.X-Y.zip", repos=NULL) where path leads to the file gurobi_7.X-Y.zip (for example PATH=C:\\gurobi702\\win64\\R; note that the path may be different in your computer), and "7.X-Y" refers to the version you are installing.
#   * In MAC, in R run the command install.packages('PATH/gurobi_7.X-Y.tgz', repos=NULL) where path leads to the file gurobi_7.X-Y.tgz (for example PATH=/Library/gurobi702/mac64/R; note that the path may be different in your computer), and "7.X-Y" refers to the version you are installing.
#       
#6. Test the installation 
#   Load the library and run the examples therein
#   * A possible error that you may get is the following: "Error: package ‘slam’ required by ‘gurobi’ could not be found". If that case, install.packages('slam') and try again.
#   You should be all set!
CONS_SEP_match$tipo_de_plan_res<-ifelse(CONS_SEP_match$tipo_de_plan_res=="1",1,0)

#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
require(slam)
# Solver options
#default solver is glpk with approximate = 1
#For an exact solution, we strongly recommend using cplex or gurobi as they are much faster than the other solvers, but they do require a license (free for academics, but not for people outside universities)
t_max = 60*60*7
solver = "gurobi" #cplex, glpk, gurobi and symphony
solver = list(name = solver, 
  t_max = t_max, #t_max is a scalar with the maximum time limit for finding the matches.within this time limit, a partial, suboptimal solution is given
  approximate = 0,#. If approximate = 1 (the default), an approximate solution is found via a relaxation of the original integer program. #FEB2021: I dont want to violate some balancing constraints to some extent. Change to 0.
  round_cplex = 0, 
  trace = 1#turns the optimizer output on
  )

#Indicador de tratamiento
t_ind= ifelse(CONS_SEP_match$tipo_de_plan_res=="1",1,0)

#table(is.na(CONS_SEP_match$tipo_de_plan_res))

# Moment balance: constrain differences in means to be at most 0.1 standard deviations apart
#:#:#:#:#:#:#:#:#:#:#:#:#:
#######mom_covs is a matrix where each column is a covariate whose mean is to be balanced
#######mom_tols is a vector of tolerances for the maximum difference in means for the covariates in mom_covs
#######mom_targets is a vector of target moments (e.g., means) of a distribution to be approximated by matched sampling. is optional, but if #######mom_covs is specified then mom_tols needs to be specified too
#######The lengths of mom_tols and mom_target have to be equal to the number of columns of mom_covs
mom_covs = cbind(CONS_SEP_match$edad_al_ing,
                 CONS_SEP_match$fech_ing_num,
                 CONS_SEP_match$edad_ini_cons)
mom_tols = absstddif(mom_covs, t_ind, .2)# original, 0.05, ahora probaré con 0.7
mom = list(covs = mom_covs, tols = mom_tols, targets = NULL)

# Mean balance
covs = cbind(CONS_SEP_match$edad_al_ing,
                 CONS_SEP_match$fech_ing_num,
                 CONS_SEP_match$edad_ini_cons)
meantab(covs, t_ind)
##      Mis      Min      Max   Mean T   Mean C Std Dif P-val
## [1,]   0     8.84    88.84    35.97    35.97       0     1
## [2,]   0 13621.00 18199.00 16445.50 16445.50       0     1
## [3,]   0     5.00    74.00    16.51    16.51       0     1
# Fine balance
#is a matrix where each column is a nominal covariate for fine balance
fine_covs = cbind(CONS_SEP_match$origen_ingreso_mod,
                  CONS_SEP_match$dg_cie_10_rec,
                  CONS_SEP_match$sexo_2,
                  CONS_SEP_match$sus_ini_mod_mvv,
                  CONS_SEP_match$tipo_centro_pub, #cuidado
                  CONS_SEP_match$estado_conyugal_2, 
                  CONS_SEP_match$escolaridad_rec,
                  CONS_SEP_match$freq_cons_sus_prin,
                  CONS_SEP_match$nombre_region,
                  CONS_SEP_match$condicion_ocupacional_corr,
                  CONS_SEP_match$sus_principal_mod,
                  CONS_SEP_match$dg_trs_cons_sus_or,
                  CONS_SEP_match$tenencia_de_la_vivienda_mod
)
fine = list(covs = fine_covs)

# 11,448; No. of controls: 11,448"
# 11,452; No. of controls: 11,452"
# 11,459; No. of controls: 11,459" #when I changed tolerance from .0999 to .1999
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#MATCH
start.time <- Sys.time()
set.seed(2125)
out = cardmatch(t_ind, #ES NECESARIO QUE LOS TRATAMIENTOS ESTEN ORDENADOS Y LOS OTROS VECTORES TAMBIËN 
                mom = mom,# ya los definí list(covs = mom_covs, tols = mom_tols, targets = mom_targets), 
          fine = fine, 
          solver = solver)
##   Building the matching problem... 
##   Gurobi optimizer is open... 
##   Finding the optimal matches... 
## Gurobi Optimizer version 9.1.2 build v9.1.2rc0 (win64)
## Thread count: 6 physical cores, 12 logical processors, using up to 12 threads
## Optimize a model with 70 rows, 84741 columns and 1610079 nonzeros
## Model fingerprint: 0xe18add91
## Variable types: 0 continuous, 84741 integer (84741 binary)
## Coefficient statistics:
##   Matrix range     [1e+00, 2e+04]
##   Objective range  [1e+00, 1e+00]
##   Bounds range     [0e+00, 0e+00]
##   RHS range        [0e+00, 0e+00]
## Found heuristic solution: objective -0.0000000
## Presolve time: 2.33s
## Presolved: 70 rows, 84741 columns, 1609869 nonzeros
## Variable types: 0 continuous, 84741 integer (84741 binary)
## 
## Root relaxation: objective 1.122606e+04, 451 iterations, 1.01 seconds
## 
##     Nodes    |    Current Node    |     Objective Bounds      |     Work
##  Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time
## 
##      0     0 11226.0565    0   43   -0.00000 11226.0565      -     -    3s
## H    0     0                    2321.0000000 11226.0565   384%     -    8s
##      0     0 11226.0565    0   43 2321.00000 11226.0565   384%     -   10s
## H    0     0                    11226.000000 11226.0565  0.00%     -   12s
##      0     0 11226.0565    0   43 11226.0000 11226.0565  0.00%     -   12s
## 
## Explored 1 nodes (458 simplex iterations) in 12.50 seconds
## Thread count was 12 (of 12 available processors)
## 
## Solution count 3: 11226 2321 -0 
## 
## Optimal solution found (tolerance 1.00e-04)
## Best objective 1.122600000000e+04, best bound 1.122600000000e+04, gap 0.0000%
##   Optimal matches found
#FEB2021= If I change to bmatch, error can't allocate vector size 3.4gb
end.time <- Sys.time()
time.taken <- end.time - start.time
# Fine balance (note here we are getting an approximate solution)
#for (i in 1:ncol(fine_covs)) {     
#   print(finetab(fine_covs[, i], t_id_1, c_id_1))
#}
# Indices of the treated units and matched controls
t_id_1 = out$t_id  
c_id_1 = out$c_id   
group = out$group_id    
ids_matched<-cbind.data.frame(t_id_1, c_id_1,group)

paste0("No. of treatments: ",table(table(t_id_1)) %>% formatC(big.mark = ","),"; No. of controls: ",table(table(c_id_1))%>% formatC(big.mark = ","))
## [1] "No. of treatments: 11,226; No. of controls: 11,226"
# Fine balance (note here we are getting an approximate solution)
finetab_match1<-data.frame()
for (i in 1:ncol(fine_covs)) {      
    #finetab_match1<- rbind.data.frame(
  finetab(fine_covs[, i], t_id_1, c_id_1)
}

d_match = CONS_SEP_match[c(t_id_1, c_id_1), ]

paste0("Number of duplicated rows: ",d_match %>%  dplyr::group_by(row) %>%  dplyr::mutate(n_row=n()) %>% dplyr::ungroup() %>% dplyr::filter(n_row>1) %>% nrow())
## [1] "Number of duplicated rows: 0"
paste0("Percentage of the selected treatments: ",scales::percent(length(t_id_1)/CONS_SEP_match %>% dplyr::filter(tipo_de_plan_res==1) %>% nrow()))
## [1] "Percentage of the selected treatments: 88%"
paste0("Percentage of the selected controls: ",
       scales::percent(length(c_id_1)/CONS_SEP_match %>% dplyr::filter(tipo_de_plan_res==0) %>% nrow()))
## [1] "Percentage of the selected controls: 16%"
#cuidado, el anterior me encontró más del mismo control para un tratado
#por eso ocuparé el de más abajo.
#EL DE A CONTINUACIÓN ES ERRÓNEO PORQUE ES POR POSICIÓN, NO POR COINCIDENCIA DEL NÚMERO CON LA FILA
#d_match_no_duplicates = CONS_SEP_match[which(CONS_SEP_match$row %in% c(t_id_1, c_id_1)), ]


Explore Results of the Matching


vars_ecdf<- c('edad_al_ing', 'edad_ini_cons', 'fech_ing_num')
headings <- c("Age at Admission", "Age of Onset of Drug Use", "Date of Admission")
for (i in 1:length(headings)) {
  cat("### ",headings[i],"\n")
  f<-vars_ecdf[i]
  ecdfplot(as.data.frame(CONS_SEP_match)[,f], t_id_1, c_id_1, main_title = "", legend_position = "right")
  cat('\n\n')
}

Age at Admission

Figures 9-12. Empirical Cumulative Distribution Functions on the Matched Sample

Figures 9-12. Empirical Cumulative Distribution Functions on the Matched Sample

Age of Onset of Drug Use

Figures 9-12. Empirical Cumulative Distribution Functions on the Matched Sample

Figures 9-12. Empirical Cumulative Distribution Functions on the Matched Sample

Date of Admission

Figures 9-12. Empirical Cumulative Distribution Functions on the Matched Sample

Figures 9-12. Empirical Cumulative Distribution Functions on the Matched Sample

if(no_mostrar==1){
jpeg("C:/Users/andre/Desktop/SUD_CL/eso3.jpg", height=14, width= 10, res= 96, units = "in")
ecdfplot(as.data.frame(CONS_SEP_match)[,f], t_id_1, c_id_1, main_title = "", xlab="", legend_position = "right")
dev.off()
}


cat("### ","Love plot","\n")

Love plot

X_mat<-cbind(
            "Primary Substance at Admission"=CONS_SEP_match$sus_principal_mod,
            "Admission Age"=CONS_SEP_match$edad_al_ing, 
            "Admission Date"=CONS_SEP_match$fech_ing_num,
            "Substance Use Onset Age"=CONS_SEP_match$edad_ini_cons,
            "Admission Motive"=CONS_SEP_match$origen_ingreso_mod,
            "Psychiatric Comorbidity"=CONS_SEP_match$dg_cie_10_rec,
            "Sex"=CONS_SEP_match$sexo_2, 
            "First substance used"=CONS_SEP_match$sus_ini_mod_mvv,
            "Public Center"=CONS_SEP_match$tipo_centro_pub,
            "Marital Status"=CONS_SEP_match$estado_conyugal_2,
            "Educational Attainment"=CONS_SEP_match$escolaridad_rec,
            "Primary Substance at Admission Usage Frequency"=CONS_SEP_match$freq_cons_sus_prin,
            "Regional Location of the Center"=CONS_SEP_match$nombre_region,
            "Employment Status"=CONS_SEP_match$condicion_ocupacional_corr,
            "Drug Dependence Diagnosis"=CONS_SEP_match$dg_trs_cons_sus_or,
            "Drug Dependence"=CONS_SEP_match$tenencia_de_la_vivienda_mod
            )
dist_mat_match1<-meantab(X_mat, t_ind, t_id_1, c_id_1, exact = NULL, digits = 2)#standardized differences in means after matching for each covariate;
vline = 0.2
loveplot(X_mat=X_mat, t_id=t_id_1, c_id=c_id_1, v_line=vline, legend_position = "topright")
Figure 10. Love plot of the Matched Sample in Covariates v/s Unmatched Sample

Figure 10. Love plot of the Matched Sample in Covariates v/s Unmatched Sample


Balance

options(knitr.kable.NA = '')
covs0_unmatch <- subset(CONS_SEP_match, select = -c(row, hash_key, evaluacindelprocesoteraputico, abandono_temprano_rec,n_hash,dup,motivodeegreso_mod_imp,via_adm_sus_prin_act,edad_ini_sus_prin))#subset=,
covs0_match <- subset(d_match, select = -c(row, hash_key, evaluacindelprocesoteraputico, abandono_temprano_rec,n_hash,dup,motivodeegreso_mod_imp,via_adm_sus_prin_act,edad_ini_sus_prin))#subset=,
#_#_#_#_#_#_generar pareamientos prepost#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
library(cobalt)
bal1_nomatch<-bal.tab(covs0_unmatch, treat = CONS_SEP_match$tipo_de_plan_res,
         thresholds = c(m = .1, v = 2),
         binary = "std", 
         continuous = "std",
         stats = c("mean.diffs", "variance.ratios","ks.statistics"))

bal1_nomatch$Balance[,2]<-round(bal1_nomatch$Balance[,2],2)
bal1_nomatch$Balance[,4]<-round(bal1_nomatch$Balance[,4],2)
bal1_nomatch$Balance[,6]<-round(bal1_nomatch$Balance[,6],2)

bal1_match<-bal.tab(covs0_match, treat = d_match$tipo_de_plan_res,
         thresholds = c(m = .1, v = 2),
         binary = "std", 
         continuous = "std",
         stats = c("mean.diffs", "variance.ratios","ks.statistics"))

bal1_match$Balance[,2]<-round(bal1_match$Balance[,2],2)
bal1_match$Balance[,4]<-round(bal1_match$Balance[,4],2)
bal1_match$Balance[,6]<-round(bal1_match$Balance[,6],2)


var_names<- 
  list("sus_principal_mod_Alcohol"= "Primary Subs. Adm.-Alcohol",
      "sus_principal_mod_Cocaine hydrochloride"= "Primary Subs. Adm.-Cocaine hydrochloride",
      "sus_principal_mod_Cocaine paste"="Primary Subs. Adm.-Cocaine paste",
      "sus_principal_mod_Marijuana"="Primary Subs. Adm.-Marijuana",
      "sus_principal_mod_Other"="Primary Subs. Adm.-Other",
      "origen_ingreso_mod_Spontaneous"="Admission Motive-Spontaneous",
      "origen_ingreso_mod_Assisted Referral"= "Admission Motive-Assisted Referral",
      "origen_ingreso_mod_Other"="Admission Motive-Other",
      "origen_ingreso_mod_Justice Sector"= "Admission Motive-Justice Sector",
      "origen_ingreso_mod_Health Sector"="Admission Motive-Health Sector",
      "dg_cie_10_rec_Without psychiatric comorbidity"="ICD-10-Wo/Psych Comorbidity",
      "dg_cie_10_rec_Diagnosis unknown (under study)"="ICD-10-Dg. Unknown/under study",
      "dg_cie_10_rec_With psychiatric comorbidity"="ICD-10-W/Psych Comorbidity",
      "sexo_2_Women"="Sex-Women",
      "edad_al_ing"="Admission Age",
      "fech_ing_num"="Admission Date",
      "duplicates_filtered"="Treatments (#)",
      "more_one_treat"=">1 treatment",
      "sus_ini_mod_mvv_Alcohol"= "First substance used-Alcohol",
      "sus_ini_mod_mvv_Cocaine hydrochloride"= "First substance used-Cocaine hydrochloride",
      "sus_ini_mod_mvv_Cocaine paste"="First substance used-Cocaine paste",
      "sus_ini_mod_mvv_Marijuana"="First substance used-Marijuana",
      "sus_ini_mod_mvv_Other"="First substance used-Other",
      "estado_conyugal_2_Married/Shared living arrangements"="Marital Status-Married/Shared liv. arr.",
      "estado_conyugal_2_Separated/Divorced"="Marital Status-Separated/Divorced",
      "estado_conyugal_2_Single"= "Marital Status-Single",
      "estado_conyugal_2_Widower"="Marital Status-Widower",
      "escolaridad_rec_3-Completed primary school or less"="Educational Attainment-PS or less",
      "escolaridad_rec_2-Completed high school or less"="Educational Attainment-HS or less",
      "escolaridad_rec_1-More than high school"="Educational Attainment-More than HS", 
      "freq_cons_sus_prin_1 day a week or more"="Subs Use Freq-1d/wk or more",
      "freq_cons_sus_prin_2 to 3 days a week"="Subs Use Freq-2-3d/wk",
      "freq_cons_sus_prin_4 to 6 days a week"="Subs Use Freq-4-6d/wk",
      "freq_cons_sus_prin_Daily"="Subs Use Freq-Daily",
      "freq_cons_sus_prin_Did not use"="Subs Use Freq-Did not use",
      "freq_cons_sus_prin_Less than 1 day a week"="Subs Use Freq-Less 1d/wk",
      "nombre_region_Antofagasta (02)"="Region-Antofagasta(02)",
      "nombre_region_Araucanía (09)"="Region-Araucanía(09)",
      "nombre_region_Arica (15)"="Region-Arica(15)",
      "nombre_region_Atacama (03)"="Region-Atacama(03)",
      "nombre_region_Aysén (11)"="Region-Aysén(11)",
      "nombre_region_Biobío (08)"="Region- Biobío(08)",
      "nombre_region_Coquimbo (04)"="Region-Coquimbo(04)",
      "nombre_region_Los Lagos (10)"="Region-Los Lagos(10)",
      "nombre_region_Los Ríos (14)"="Region-Los Ríos(14)",
      "nombre_region_Magallanes (12)"="Region-Magallanes(12)",
      "nombre_region_Maule (07)"="Region-Maule(07)",
      "nombre_region_Metropolitana (13)"="Region-Metropolitana(13)",
      "nombre_region_Ñuble (16)"="Region-Ñuble(16)",
      "nombre_region_O'Higgins (06)"="Region-O'Higgins(06)",
      "nombre_region_Tarapacá (01)"="Region-Tarapacá(01)",
      "nombre_region_Valparaíso (05)"="Region-Valparaíso(05)",
      "tipo_centro_pub"="Public Center",
      "dg_trs_cons_sus_or"= "Drug Dependence diagnosis",
      "edad_ini_cons"="Substance Use Onset Age",
      "condicion_ocupacional_corr_Employed"="Emp.Status-Employed",
      "condicion_ocupacional_corr_Inactive"="Emp.Status-Inactive",
      "condicion_ocupacional_corr_Looking for a job for the first time"="Emp.Status-Looking 1st job",
      "condicion_ocupacional_corr_No activity"="Emp.Status- No activity",
      "condicion_ocupacional_corr_Not seeking for work"="Emp.Status- Not seeking work",
      "condicion_ocupacional_corr_Unemployed"="Emp.Status- Unemployed",
      "tenencia_de_la_vivienda_mod_Illegal Settlement"="Ten.Stat.Houshld-Ill stlmnt",
      "tenencia_de_la_vivienda_mod_Others"="Ten.Stat.Houshld-Other",
      "tenencia_de_la_vivienda_mod_Owner/Transferred dwellings/Pays Dividends"="Ten.Stat.Houshld-Owner",
      "tenencia_de_la_vivienda_mod_Renting"="Ten.Stat.Houshld-Renting",
      "tenencia_de_la_vivienda_mod_Stays temporarily with a relative"="Ten.Stat.Houshld-Temporary stlmnt",
      "rn"="Treatment")

var.names<-data.table(data.frame(unlist(var_names)),keep.rownames = T) %>% janitor::clean_names()

#GENERACION DE COMPARACIONES ENTRE GRUPOS, UNA VEZ PAREADO Y DESPUÉS
pre_matched_matched<-
data.table::data.table(bal1_nomatch$Balance[,1:6],keep.rownames = T) %>%
  dplyr::arrange(-abs(Diff.Un)) %>% 
  dplyr::left_join(data.table::data.table(bal1_match$Balance[,2:6],keep.rownames = T),by="rn") %>% 
  dplyr::left_join(var.names,by="rn") %>% 
  dplyr::select(unlist_var_names,everything()) %>% 
  dplyr::select(-rn) 

pre_matched_matched%>% 
    knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 5a. Covariate Balance in the Variables of Interest"),
               col.names = c("Variables","Nature of Variables", "SMDs","Threshold","Variance Ratios","Threshold","KS","SMDs","Threshold","Variance Ratios","Threshold","KS"),
               align =c('l',rep('c', 101))) %>%
  add_header_above(c(" "," ","Unadjusted" = 5, "Adjusted" = 5)) %>% 
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 9) %>%
  kableExtra::add_footnote( c(paste("Note.",paste0("Unadjusted (n=",dim(covs0_unmatch)[1] %>% format(big.mark=","),")"),";",paste0("Adjusted (n=",dim(covs0_match)[1] %>% format(big.mark=","),")"),";",paste0("Total pairs: ",length(c_id_1) %>% format(big.mark=",")))), 
                            notation = "none") %>%
  kableExtra::kable_classic() %>% 
  kableExtra::scroll_box(width = "100%", height = "375px") 
Table 5a. Covariate Balance in the Variables of Interest
Unadjusted
Adjusted
Variables Nature of Variables SMDs Threshold Variance Ratios Threshold KS SMDs Threshold Variance Ratios Threshold KS
Public Center Binary -1.18 Not Balanced, >0.1 0.51 0.00 Balanced, <0.1 0.00
Emp.Status-Employed Binary -0.95 Not Balanced, >0.1 0.41 0.00 Balanced, <0.1 0.00
Subs Use Freq-Daily Binary 0.72 Not Balanced, >0.1 0.33 0.00 Balanced, <0.1 0.00
Emp.Status- Unemployed Binary 0.62 Not Balanced, >0.1 0.30 0.00 Balanced, <0.1 0.00
Primary Subs. Adm.-Cocaine paste Binary 0.61 Not Balanced, >0.1 0.29 0.00 Balanced, <0.1 0.00
Drug Dependence diagnosis Binary 0.59 Not Balanced, >0.1 0.22 0.00 Balanced, <0.1 0.00
Subs Use Freq-2-3d/wk Binary -0.52 Not Balanced, >0.1 0.20 0.00 Balanced, <0.1 0.00
Admission Motive-Assisted Referral Binary 0.48 Not Balanced, >0.1 0.17 0.00 Balanced, <0.1 0.00
Primary Subs. Adm.-Alcohol Binary -0.40 Not Balanced, >0.1 0.17 0.00 Balanced, <0.1 0.00
Emp.Status- No activity Binary 0.38 Not Balanced, >0.1 0.11 0.00 Balanced, <0.1 0.00
First substance used-Alcohol Binary -0.36 Not Balanced, >0.1 0.18 0.00 Balanced, <0.1 0.00
>1 treatment Binary 0.33 Not Balanced, >0.1 0.14 0.22 Not Balanced, >0.1 0.10
ICD-10-Wo/Psych Comorbidity Binary -0.32 Not Balanced, >0.1 0.15 0.00 Balanced, <0.1 0.00
Treatments (#) Contin. 0.31 Not Balanced, >0.1 1.91 Balanced, <2 0.14 0.20 Not Balanced, >0.1 1.46 Balanced, <2 0.10
Marital Status-Married/Shared liv. arr. Binary -0.30 Not Balanced, >0.1 0.13 0.00 Balanced, <0.1 0.00
Admission Date Contin. -0.29 Not Balanced, >0.1 1.00 Balanced, <2 0.14 -0.17 Not Balanced, >0.1 0.95 Balanced, <2 0.09
Marital Status-Single Binary 0.28 Not Balanced, >0.1 0.14 0.00 Balanced, <0.1 0.00
Admission Motive-Spontaneous Binary -0.27 Not Balanced, >0.1 0.13 0.00 Balanced, <0.1 0.00
Subs Use Freq-1d/wk or more Binary -0.25 Not Balanced, >0.1 0.05 0.00 Balanced, <0.1 0.00
ICD-10-W/Psych Comorbidity Binary 0.23 Not Balanced, >0.1 0.11 0.00 Balanced, <0.1 0.00
First substance used-Marijuana Binary 0.23 Not Balanced, >0.1 0.10 0.00 Balanced, <0.1 0.00
Region-Arica(15) Binary 0.21 Not Balanced, >0.1 0.04 0.00 Balanced, <0.1 0.00
Primary Subs. Adm.-Marijuana Binary -0.20 Not Balanced, >0.1 0.04 0.00 Balanced, <0.1 0.00
First substance used-Cocaine paste Binary 0.20 Not Balanced, >0.1 0.07 0.00 Balanced, <0.1 0.00
Primary Subs. Adm.-Cocaine hydrochloride Binary -0.19 Not Balanced, >0.1 0.07 0.00 Balanced, <0.1 0.00
Subs Use Freq-Less 1d/wk Binary -0.19 Not Balanced, >0.1 0.03 0.00 Balanced, <0.1 0.00
Admission Age Contin. -0.19 Not Balanced, >0.1 0.84 Balanced, <2 0.07 0.04 Balanced, <0.1 1.01 Balanced, <2 0.03
Region-Tarapacá(01) Binary 0.16 Not Balanced, >0.1 0.03 0.00 Balanced, <0.1 0.00
Sex-Women Binary 0.15 Not Balanced, >0.1 0.07 0.00 Balanced, <0.1 0.00
Emp.Status- Not seeking work Binary 0.15 Not Balanced, >0.1 0.02 0.00 Balanced, <0.1 0.00
Ten.Stat.Houshld-Renting Binary -0.14 Not Balanced, >0.1 0.05 0.00 Balanced, <0.1 0.00
Admission Motive-Justice Sector Binary -0.13 Not Balanced, >0.1 0.04 0.00 Balanced, <0.1 0.00
Ten.Stat.Houshld-Ill stlmnt Binary 0.13 Not Balanced, >0.1 0.02 0.00 Balanced, <0.1 0.00
Educational Attainment-PS or less Binary 0.12 Not Balanced, >0.1 0.06 0.00 Balanced, <0.1 0.00
Region-Araucanía(09) Binary -0.12 Not Balanced, >0.1 0.02 0.00 Balanced, <0.1 0.00
Region-Magallanes(12) Binary -0.12 Not Balanced, >0.1 0.01 0.00 Balanced, <0.1 0.00
Subs Use Freq-4-6d/wk Binary -0.11 Not Balanced, >0.1 0.04 0.00 Balanced, <0.1 0.00
Region-Antofagasta(02) Binary 0.11 Not Balanced, >0.1 0.02 0.00 Balanced, <0.1 0.00
Region-Coquimbo(04) Binary -0.10 Not Balanced, >0.1 0.02 0.00 Balanced, <0.1 0.00
Substance Use Onset Age Contin. -0.09 Balanced, <0.1 0.91 Balanced, <2 0.07 0.00 Balanced, <0.1 1.00 Balanced, <2 0.01
Region-Aysén(11) Binary -0.09 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Region-Ñuble(16) Binary -0.09 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
ICD-10-Dg. Unknown/under study Binary 0.08 Balanced, <0.1 0.03 0.00 Balanced, <0.1 0.00
Subs Use Freq-Did not use Binary -0.08 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Educational Attainment-HS or less Binary -0.06 Balanced, <0.1 0.03 0.00 Balanced, <0.1 0.00
Educational Attainment-More than HS Binary -0.06 Balanced, <0.1 0.02 0.00 Balanced, <0.1 0.00
Region- Biobío(08) Binary -0.06 Balanced, <0.1 0.02 0.00 Balanced, <0.1 0.00
Region-Valparaíso(05) Binary 0.06 Balanced, <0.1 0.02 0.00 Balanced, <0.1 0.00
Region-Los Lagos(10) Binary -0.04 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Ten.Stat.Houshld-Temporary stlmnt Binary 0.04 Balanced, <0.1 0.02 0.00 Balanced, <0.1 0.00
Emp.Status-Inactive Binary -0.04 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Admission Motive-Other Binary 0.03 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Region-Atacama(03) Binary -0.03 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Region-Maule(07) Binary -0.03 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Region-O’Higgins(06) Binary -0.03 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Ten.Stat.Houshld-Other Binary 0.03 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Admission Motive-Health Sector Binary -0.02 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Primary Subs. Adm.-Other Binary -0.02 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Marital Status-Widower Binary -0.02 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Emp.Status-Looking 1st job Binary -0.02 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
First substance used-Other Binary 0.01 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Marital Status-Separated/Divorced Binary -0.01 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Region-Los Ríos(14) Binary -0.01 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Region-Metropolitana(13) Binary -0.01 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Ten.Stat.Houshld-Owner Binary 0.01 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
First substance used-Cocaine hydrochloride Binary 0.00 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Note. Unadjusted (n=84,741) ; Adjusted (n=22,452) ; Total pairs: 11,226


attr(d_match$edad_al_ing,"label")<-"Admission Age"
attr(d_match$motivodeegreso_mod_imp,"label")<-"Cause of discharge"

tab2<-
  CreateTableOne(vars = match.on.sel, strata = "tipo_de_plan_res", 
                 data = d_match, factorVars = catVars, smd=T)


kableone <- function(x, caption=NULL, col.names=NA, smd=T, test=T, varLabels=T, noSpaces=T, printToggle=T, dropEqual=F, ...) {
  capture.output(x <- print(x, smd=T, test=test, varLabels=varLabels,noSpaces=noSpaces, printToggle=printToggle, dropEqual=dropEqual, ...))
  
  knitr::kable(x,format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
               caption= caption, col.names= col.names)
}

kableone(tab2, 
         caption = paste0("Table 5b. Covariate Balance in the Variables of Interest"),
         col.names= c("Ambulatory","Residential", "p-value", "class","SMD"),
         nonnormal= c("edad_ini_cons","edad_al_ing","fech_ing_num")#"\\hline",
         ) %>% 
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover","condensed"),font_size= 10) %>%
  #()
  row_spec(1, bold = T, italic =T,color ="black",hline_after=T,extra_latex_after="\\arrayrulecolor{white}",font_size= 10) %>%
  kableExtra::kable_classic() %>% 
  scroll_box(width = "100%", height = "400px") 
Table 5b. Covariate Balance in the Variables of Interest
Ambulatory Residential p-value class SMD
n 11226 11226
Primary Substance at Admission (%) 1.000 <0.001
Alcohol 2321 (20.7) 2321 (20.7)
Cocaine hydrochloride 1602 (14.3) 1602 (14.3)
Cocaine paste 6716 (59.8) 6716 (59.8)
Marijuana 391 (3.5) 391 (3.5)
Other 196 (1.7) 196 (1.7)
First Substance Used (%) 1.000 <0.001
Alcohol 4702 (41.9) 4702 (41.9)
Cocaine hydrochloride 475 (4.2) 475 (4.2)
Cocaine paste 1843 (16.4) 1843 (16.4)
Marijuana 3920 (34.9) 3920 (34.9)
Other 286 (2.5) 286 (2.5)
Marital Status (%) 1.000 <0.001
Married/Shared living arrangements 2710 (24.1) 2710 (24.1)
Separated/Divorced 1192 (10.6) 1192 (10.6)
Single 7208 (64.2) 7208 (64.2)
Widower 116 (1.0) 116 (1.0)
Educational Attainment (%) 1.000 <0.001
3-Completed primary school or less 3877 (34.5) 3877 (34.5)
2-Completed high school or less 5488 (48.9) 5488 (48.9)
1-More than high school 1861 (16.6) 1861 (16.6)
Substance Use Onset Age (median [IQR]) 15.00 [13.00, 17.00] 15.00 [13.00, 17.00] 0.964 nonnorm <0.001
Primary Substance at Admission Usage Frequency (%) 1.000 <0.001
1 day a week or more 271 (2.4) 271 (2.4)
2 to 3 days a week 1324 (11.8) 1324 (11.8)
4 to 6 days a week 1591 (14.2) 1591 (14.2)
Daily 7824 (69.7) 7824 (69.7)
Did not use 84 (0.7) 84 (0.7)
Less than 1 day a week 132 (1.2) 132 (1.2)
Treatment Admission Motive (%) 1.000 <0.001
Spontaneous 4083 (36.4) 4083 (36.4)
Assisted Referral 2035 (18.1) 2035 (18.1)
Other 719 (6.4) 719 (6.4)
Justice Sector 781 (7.0) 781 (7.0)
Health Sector 3608 (32.1) 3608 (32.1)
Psychiatric Comorbidity (%) 1.000 <0.001
Without psychiatric comorbidity 2983 (26.6) 2983 (26.6)
Diagnosis unknown (under study) 2437 (21.7) 2437 (21.7)
With psychiatric comorbidity 5806 (51.7) 5806 (51.7)
Regional Location of Center (%) 1.000 <0.001
Antofagasta (02) 676 (6.0) 676 (6.0)
Araucanía (09) 159 (1.4) 159 (1.4)
Arica (15) 609 (5.4) 609 (5.4)
Atacama (03) 262 (2.3) 262 (2.3)
Aysén (11) 40 (0.4) 40 (0.4)
Biobío (08) 529 (4.7) 529 (4.7)
Coquimbo (04) 270 (2.4) 270 (2.4)
Los Lagos (10) 363 (3.2) 363 (3.2)
Los Ríos (14) 184 (1.6) 184 (1.6)
Magallanes (12) 31 (0.3) 31 (0.3)
Maule (07) 494 (4.4) 494 (4.4)
Metropolitana (13) 5523 (49.2) 5523 (49.2)
Ñuble (16) 20 (0.2) 20 (0.2)
O’Higgins (06) 506 (4.5) 506 (4.5)
Tarapacá (01) 414 (3.7) 414 (3.7)
Valparaíso (05) 1146 (10.2) 1146 (10.2)
Drug Dependence Diagnosis = TRUE (%) 10203 (90.9) 10203 (90.9) 1.000 <0.001
Public Center = TRUE (%) 3615 (32.2) 3615 (32.2) 1.000 <0.001
Sexo Usuario/Sex of User = Women (%) 3475 (31.0) 3475 (31.0) 1.000 <0.001
Admission Age (median [IQR]) 32.11 [26.07, 40.42] 32.87 [26.51, 41.14] <0.001 nonnorm 0.042
Fecha de Ingreso a Tratamiento (Numérico)(c)/Date of Admission to Treatment (Numeric)(c) (median [IQR]) 16365.00 [15469.00, 17259.00] 16128.00 [15323.00, 16988.00] <0.001 nonnorm 0.171
Tenure status of household (%) 1.000 <0.001
Illegal Settlement 195 (1.7) 195 (1.7)
Others 347 (3.1) 347 (3.1)
Owner/Transferred dwellings/Pays Dividends 4113 (36.6) 4113 (36.6)
Renting 1530 (13.6) 1530 (13.6)
Stays temporarily with a relative 5041 (44.9) 5041 (44.9)
Employment Status (%) 1.000 <0.001
Employed 1765 (15.7) 1765 (15.7)
Inactive 1164 (10.4) 1164 (10.4)
Looking for a job for the first time 21 (0.2) 21 (0.2)
No activity 1372 (12.2) 1372 (12.2)
Not seeking for work 269 (2.4) 269 (2.4)
Unemployed 6635 (59.1) 6635 (59.1)


#covs0_match_disc<-data.table::data.table(covs0_match_disc)
covs_dum_unmatch <- subset(CONS_SEP_match_dum,select= -c(row, hash_key, evaluacindelprocesoteraputico, abandono_temprano_rec,dup,duplicates_filtered,more_one_treat,n_hash,motivodeegreso_mod_imp,via_adm_sus_prin_act,edad_ini_sus_prin))#subset=,
covs_dum_match <- subset(CONS_SEP_match_dum[c(t_id_1, c_id_1)],select= -c(row, hash_key, evaluacindelprocesoteraputico, abandono_temprano_rec,dup,n_hash,motivodeegreso_mod_imp,duplicates_filtered,more_one_treat, via_adm_sus_prin_act,edad_ini_sus_prin))#subset=,

covs_dum_unmatch$tipo_de_plan_res<-ifelse(covs_dum_unmatch$tipo_de_plan_res==1,1,0)
covs_dum_match$tipo_de_plan_res<-ifelse(covs_dum_match$tipo_de_plan_res==1,1,0)

catVars<-
match.on_tot[!match.on_tot %in% c("row","hash_key","more_one_treat","duplicates_filtered","abandono_temprano_rec","tipo_de_plan_res","evaluacindelprocesoteraputico","edad_ini_cons","fech_ing_num","edad_al_ing","via_adm_sus_prin_act")]

#############################################
# Standardized differences before matching
#############################################
smd_internal<-function(var,db){
  #db<-as.data.frame(db)
  smd_vec<-round(abs(mean(unlist(get(db)[which(get(db)$tipo_de_plan_res_1==1),..var]),na.rm=F)- mean(unlist(get(db)[which(get(db)$tipo_de_plan_res_1==0),..var]),na.rm=F))/
          sqrt(((sd(unlist(get(db)[which(get(db)$tipo_de_plan_res_1==1),..var]),na.rm=F))^2 + (sd(unlist(get(db)[which(get(db)$tipo_de_plan_res_1==0),..var]),na.rm=F))^2 )/2),2)
#return(assign(paste0(var,"_smd"),smd_vec,envir=.GlobalEnv))
  return(print(smd_vec))
}

smd_df<-data.frame()
for (i in 1:length(covs_dum_unmatch)){
  smd_df<-rbind(smd_df,cbind.data.frame(names(covs_dum_unmatch)[i],
                             smd_internal(names(covs_dum_unmatch)[i],"covs_dum_unmatch")))
}
smd_df<-
smd_df %>% 
  dplyr::rename("vars"= !!names(.[1]),"smd"= !!names(.[2])) %>% 
  dplyr::filter(vars!="tipo_de_plan_res_TRUE") %>% 
  dplyr::arrange(desc(smd)) %>% 
  dplyr::filter(!grepl("tipo_de_plan_res",vars))
##################################################
# Standardized differences after matching
##################################################

smd_df2<-data.frame()
for (i in 1:length(names(covs_dum_match))){
  smd_df2<-rbind(smd_df2,cbind.data.frame(names(covs_dum_match)[i],
                             smd_internal(names(covs_dum_match)[i],"covs_dum_match")))
}
smd_df2<-
  smd_df2 %>% 
  dplyr::rename("vars"= !!names(.[1]),"smd"= !!names(.[2])) %>% 
  dplyr::filter(vars!="tipo_de_plan_res_TRUE")%>% 
  dplyr::filter(!grepl("tipo_de_plan_res",vars))

smd_df_prev_match_after_match<-
  smd_df %>% 
  dplyr::left_join(smd_df2,by="vars") %>% 
  dplyr::rename("Before\nMatching"= !!names(.[2]),"After\nMatching"= !!names(.[3])) %>% 
  melt() %>% 
  dplyr::left_join(var.names,by=c("vars"="rn")) %>% 
                     dplyr::select(unlist_var_names,everything()) %>% 
  dplyr::filter(!vars %in% c("sexo_2_Men","dg_trs_cons_sus_or_FALSE","tipo_centro_pub_FALSE")) %>% 
  dplyr::mutate(unlist_var_names=dplyr::case_when(vars=="dg_trs_cons_sus_or_TRUE"~"Drug Dependence",
                                                  vars=="tipo_centro_pub_TRUE"~"Public Center",
                                                  T~unlist_var_names))

#############################################
# Standardized differences before matching
#############################################
smd_internal2<-function(var,db){
  #db<-as.data.frame(db)
  smd_vec<-round(abs(mean(unlist(get(db)[which(get(db)$tipo_de_plan_res==1),..var]),na.rm=F)- mean(unlist(get(db)[which(get(db)$tipo_de_plan_res==0),..var]),na.rm=F))/
          sqrt(((sd(unlist(get(db)[which(get(db)$tipo_de_plan_res==1),..var]),na.rm=F))^2 + (sd(unlist(get(db)[which(get(db)$tipo_de_plan_res==0),..var]),na.rm=F))^2 )/2),2)
#return(assign(paste0(var,"_smd"),smd_vec,envir=.GlobalEnv))
  return(print(smd_vec))
}

smd_df21<-data.frame()
for (i in 1:length(covs_dum_unmatch)){
  smd_df21<-rbind(smd_df21,cbind.data.frame(names(covs_dum_unmatch)[i],
                             smd_internal2(names(covs_dum_unmatch)[i],"covs_dum_unmatch")))
}
smd_df21<-
smd_df21 %>% 
  dplyr::rename("vars"= !!names(.[1]),"smd"= !!names(.[2])) %>% 
  dplyr::filter(vars!="tipo_de_plan_res_TRUE") %>% 
  dplyr::arrange(desc(smd)) %>% 
  dplyr::filter(!grepl("tipo_de_plan_res",vars))
##################################################
# Standardized differences after matching
##################################################

smd_df22<-data.frame()
for (i in 1:length(names(covs_dum_match))){
  smd_df22<-rbind(smd_df22,cbind.data.frame(names(covs_dum_match)[i],
                             smd_internal2(names(covs_dum_match)[i],"covs_dum_match")))
}
smd_df22<-
  smd_df22 %>% 
  dplyr::rename("vars"= !!names(.[1]),"smd"= !!names(.[2])) %>% 
  dplyr::filter(vars!="tipo_de_plan_res_TRUE")%>% 
  dplyr::filter(!grepl("tipo_de_plan_res",vars))

smd_df_prev_match_after_match2<-
  smd_df21 %>% 
  dplyr::left_join(smd_df22,by="vars") %>% 
  dplyr::rename("Before\nMatching"= !!names(.[2]),"After\nMatching"= !!names(.[3])) %>% 
  melt() %>% 
  dplyr::left_join(var.names,by=c("vars"="rn")) %>% 
                     dplyr::select(unlist_var_names,everything()) %>% 
  dplyr::filter(!vars %in% c("sexo_2_Men","dg_trs_cons_sus_or_FALSE","tipo_centro_pub_FALSE")) %>% 
  dplyr::mutate(unlist_var_names=dplyr::case_when(vars=="dg_trs_cons_sus_or_TRUE"~"Drug Dependence",
                                                  vars=="tipo_centro_pub_TRUE"~"Public Center",
                                                  T~unlist_var_names))
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
wrap.it <- function(x, len)
{ 
  sapply(x, function(y) paste(strwrap(y, len), 
                              collapse = "\t"), 
         USE.NAMES = FALSE)
}


# Call this function with a list or vector
wrap.labels <- function(x, len)
{
  if (is.list(x))
  {
    lapply(x, wrap.it, len)
  } else {
    wrap.it(x, len)
  }
}
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
# Data set with stand. diff. before post matching
smd_df_prev_match_after_match<-
  if(!is.nan(smd_df_prev_match_after_match$value)[15]){
    smd_df_prev_match_after_match
  } else{
    smd_df_prev_match_after_match2
  }

var_names_2022<- 
  list(
    "sus_principal_mod_Alcohol"= "Primary Subs. at Adm.~-~Alcohol",
    "sus_principal_mod_Cocaine hydrochloride"= "Primary Subs. at Adm.~-~Cocaine hydrochloride",
    "sus_principal_mod_Cocaine paste"="Primary Subs. at Adm.~-~Cocaine paste",
    "sus_principal_mod_Marijuana"="Primary Subs. at Adm.~-~Marijuana",
    "sus_principal_mod_Other"="Primary Subs. at Adm.~-~Other",
    "origen_ingreso_mod_Spontaneous"="Admission Motive~-~Spontaneous",
    "origen_ingreso_mod_Assisted Referral"= "Admission Motive~-~Assisted referral",
    "origen_ingreso_mod_Other"="Admission Motive~-~Other",
    "origen_ingreso_mod_Justice Sector"= "Admission Motive~-~Justice sector",
    "origen_ingreso_mod_Health Sector"="Admission Motive~-~Health sector",
    "dg_cie_10_rec_Without psychiatric comorbidity"="Psychiatric comorbidity~-~Wo/Psych comorbidity",
    "dg_cie_10_rec_Diagnosis unknown (under study)"="Psychiatric comorbidity~-~Unknown/under study",
    "dg_cie_10_rec_With psychiatric comorbidity"="Psychiatric comorbidity~-~W/Psych comorbidity",
    "sexo_2_Women"="Sex~-~Women",
    "edad_al_ing"="Admission Age~-~ ",
    "fech_ing_num"="Admission Date~-~ ",
    "duplicates_filtered"="Treatments (#)",
    "more_one_treat"=">1 treatment",
    "sus_ini_mod_mvv_Alcohol"= "First substance used~-~Alcohol",
    "sus_ini_mod_mvv_Cocaine hydrochloride"= "First substance used~-~Cocaine hydrochloride",
    "sus_ini_mod_mvv_Cocaine paste"="First substance used~-~Cocaine paste",
    "sus_ini_mod_mvv_Marijuana"="First substance used~-~Marijuana",
    "sus_ini_mod_mvv_Other"="First substance used~-~Other",
    "estado_conyugal_2_Married/Shared living arrangements"="Marital Status~-~Married/Shared liv. arr.",
    "estado_conyugal_2_Separated/Divorced"="Marital Status~-~Separated/Divorced",
    "estado_conyugal_2_Single"= "Marital Status~-~Single",
    "estado_conyugal_2_Widower"="Marital Status~-~Widower",
    "escolaridad_rec_3-Completed primary school or less"="Educational Attainment~-~Primary sch. or less",
    "escolaridad_rec_2-Completed high school or less"="Educational Attainment~-~High sch. or less",
    "escolaridad_rec_1-More than high school"="Educational Attainment~-~More than high sch.", 
    "freq_cons_sus_prin_1 day a week or more"="Subs. Use Freq.~-~1d/wk or more",
    "freq_cons_sus_prin_2 to 3 days a week"="Subs. Use Freq.~-~2 to 3d/wk",
    "freq_cons_sus_prin_4 to 6 days a week"="Subs. Use Freq.~-~4 to 6d/wk",
    "freq_cons_sus_prin_Daily"="Subs. Use Freq.~-~Daily",
    "freq_cons_sus_prin_Did not use"="Subs. Use Freq.~-~Did not use",
    "freq_cons_sus_prin_Less than 1 day a week"="Subs. Use Freq.~-~Less 1d/wk",
    "nombre_region_Antofagasta (02)"="Region~-~Antofagasta(02)",
    "nombre_region_Araucanía (09)"="Region~-~Araucania(09)",
    "nombre_region_Arica (15)"="Region~-~Arica(15)",
    "nombre_region_Atacama (03)"="Region~-~Atacama(03)",
    "nombre_region_Aysén (11)"="Region~-~Aysen(11)",
    "nombre_region_Biobío (08)"="Region~-~Biobio(08)",
    "nombre_region_Coquimbo (04)"="Region~-~Coquimbo(04)",
    "nombre_region_Los Lagos (10)"="Region~-~Los Lagos(10)",
    "nombre_region_Los Ríos (14)"="Region~-~Los Rios(14)",
    "nombre_region_Magallanes (12)"="Region~-~Magallanes(12)",
    "nombre_region_Maule (07)"="Region~-~Maule(07)",
    "nombre_region_Metropolitana (13)"="Region~-~Metropolitana(13)",
    "nombre_region_Ñuble (16)"="Region~-~Nuble(16)",
    "nombre_region_O'Higgins (06)"="Region~-~O'Higgins(06)",
    "nombre_region_Tarapacá (01)"="Region~-~Tarapaca(01)",
    "nombre_region_Valparaíso (05)"="Region~-~Valparaiso(05)",
    "tipo_centro_pub"="Type of Center~-~Public Center",
    "dg_trs_cons_sus_or"= "Drug Dependence diagnosis~-~TRUE",
    "edad_ini_cons"="Age of Onset of Drug Use~-~ ",
    "condicion_ocupacional_corr_Employed"="Emp.Status~-~Employed",
    "condicion_ocupacional_corr_Inactive"="Emp.Status~-~Inactive",
    "condicion_ocupacional_corr_Looking for a job for the first time"="Emp.Status~-~Looking 1st job",
    "condicion_ocupacional_corr_No activity"="Emp.Status~-~No activity",
    "condicion_ocupacional_corr_Not seeking for work"="Emp.Status~-~Not seeking work",
    "condicion_ocupacional_corr_Unemployed"="Emp.Status~-~Unemployed",
    "tenencia_de_la_vivienda_mod_Illegal Settlement"="Tenure Status of Household~-~Illegal settlement",
    "tenencia_de_la_vivienda_mod_Others"="Tenure Status of Household~-~Other",
    "tenencia_de_la_vivienda_mod_Owner/Transferred dwellings/Pays Dividends"="Tenure Status of Household~-~Owner",
    "tenencia_de_la_vivienda_mod_Renting"="Tenure Status of Household~-~Renting",
    "tenencia_de_la_vivienda_mod_Stays temporarily with a relative"="Tenure Status of Household~-~Temporary settlement",
    "rn"="Treatment")

# tipo_centro_pub_TRUE
# dg_trs_cons_sus_or_TRUE

loveplot_bal3 <-
# Generate plot
smd_df_prev_match_after_match %>%
  dplyr::mutate(vars=dplyr::case_when(vars=="dg_trs_cons_sus_or_TRUE"~"dg_trs_cons_sus_or",vars=="tipo_centro_pub_TRUE"~"tipo_centro_pub",T~vars)) %>% 
  dplyr::left_join(tibble::rownames_to_column(data.frame(unlist(var_names_2022)),'vars'), by="vars") %>% 
  dplyr::rename("unlist_var_nms_22"="unlist.var_names_2022.") %>% 
  dplyr::mutate(unlist_var_nms_22= sub("Freq Drug Cons","Primary Substance Usage Freq.",unlist_var_nms_22)) %>% 
  dplyr::mutate(unlist_var_nms_22= sub("Motive Admission","Treatment Admission Motive",unlist_var_nms_22)) %>%
  #dplyr::mutate(unlist_var_nms_22= sub("Cocaine hydrochloride","Snort cocaine",unlist_var_nms_22)) %>% 
  dplyr::mutate(unlist_var_nms_22= sub("Age of Onset of Drug Use","Substance Use Onset Age",unlist_var_nms_22))%>%
  #2022
  dplyr::mutate(unlist_var_nms_22= sub("Sex","Gender",unlist_var_nms_22)) %>%
  dplyr::mutate(unlist_var_nms_22= sub("Occ.Status","Employment Status",unlist_var_nms_22)) %>%
  dplyr::mutate(unlist_var_nms_22= sub("Age at Admission","Admission Age",unlist_var_nms_22)) %>%
  tidyr::separate(unlist_var_nms_22,c("var","cat"), sep="~-~") %>%# View()
  dplyr::mutate(unlist_var_nms_22=paste0(cat,"~-~",var)) %>% 
 # dplyr::mutate(unlist_var_nms_22= wrap.labels(unlist_var_nms_22, 25)) %>% 
  
  # dplyr::mutate(unlist_var_names= 
  #                 dplyr::case_when(unlist_var_names)) %>% 
  ggplot(aes(x = reorder(unlist_var_nms_22, value), y = value,
                                                    group = variable, color = variable)) +
  geom_hline(yintercept = 0.2, color = "black", size = 0.2, linetype="dashed") +
  coord_flip() +
  theme_classic() + 
  theme(legend.key = element_blank()) +
  labs(y = "Average absolute standardized differences in means",x = "",color="",shape="") +
  scale_y_continuous(limits = c(0,max(smd_df_prev_match_after_match$value+.05)), breaks = seq(0,1.5,by=.1)) +
  scale_color_manual(values=c("gray20","black")) + 
  geom_point(aes(shape=variable),size=4) +
  scale_shape_manual(values=c(8,16)) + 
  theme(panel.spacing.x = unit(0,"line")) +
  theme(text = element_text(size=13),
        legend.background=element_blank(),
        legend.key = element_rect(colour = "transparent", fill = "transparent"),
        legend.position = c(max(smd_df_prev_match_after_match$value)*.65,.5))+
        #theme(axis.text.x = ggtext::element_markdown())+
  guides(y = ggh4x::guide_axis_nested(delim = "~-~", order=1))

loveplot_bal3
Figure 13. Love plot of the Matched Sample in Covariates v/s Unmatched Sample

Figure 13. Love plot of the Matched Sample in Covariates v/s Unmatched Sample

if(no_mostrar==1){
  
jpeg(paste0(gsub("SUD_CL.*","",path),"_mult_state_ags/loveplot_bal32_2022.jpg"), height=8, width= 8, res= 500, units = "in")
loveplot_bal3
dev.off()

pdf(paste0(gsub("SUD_CL.*","",path),"_mult_state_ags/loveplot_bal32_2022.pdf"), height=8, width= 8)
loveplot_bal3
dev.off()
}


We allowed to tolerate fech_ing_num (SMD=0.18), because the date of admission not necessarily had to be strictly balanced, assuming that not every user had to be admitted to treatment in exact dates.

Survival Setting

Bivariate

We selected the first treatments,


#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
# CHECK  DUPLICATED ROWS
# d_match %>% 
    #dplyr::group_by(row) %>% dplyr::mutate(rn_row=row_number()) %>% janitor::tabyl(rn_row)
#22,914
#22,452 --> APR 2022

#
#d_match_surv %>% janitor::tabyl(duplicates_filtered,event)
#nrow(ids_matched)/2 =11,457
#nrow(ids_matched)/2 =11,226 --> APR 2022

#CONS_SEP_match %>% dplyr::group_by(hash_key) %>% dplyr::mutate(rn_hash=row_number()) %>% dplyr::ungroup() %>% janitor::tabyl(rn_hash)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

ids_matched_filter<-
ids_matched %>% 
    dplyr::group_by(t_id_1) %>% 
    dplyr::mutate(rn_id=row_number()) %>% 
    dplyr::ungroup() %>% 
    dplyr::filter(rn_id==1)

ids_matched_rows<-cbind.data.frame("row_t"=CONS_SEP_match[c(t_id_1),"row"],
                        t_id_1,
                        "row_c"=CONS_SEP_match[c(c_id_1),"row"],
                        c_id_1) %>% 
  janitor::clean_names() %>% 
  dplyr::left_join(subset(ids_matched_filter,select=-c_id_1),by="t_id_1")

CONS_C1_df_dup_SEP_2020_irrs_health<-  
d_match %>% 
  dplyr::left_join(CONS_C1_df_dup_SEP_2020[c("row","dias_treat_imp_sin_na", "event", "person_days","fech_egres_num", "person_years","diff_bet_treat")],by="row") %>%
  dplyr::left_join(ids_matched_rows, by=c("row")) %>% 
  dplyr::mutate(group_match=ifelse(!is.na(group),group,NA)) %>% 
  dplyr::select(-rn_id,-group) %>% #glimpse()
  dplyr::rename("row_c"="row_2") %>% 
  dplyr::left_join(ids_matched_rows, by=c("row"="row_2")) %>% 
  dplyr::mutate(t_id_1=ifelse(!is.na(t_id_1.x),t_id_1.x,t_id_1.y)) %>% 
  dplyr::mutate(c_id_1=ifelse(!is.na(c_id_1.x),c_id_1.x,c_id_1.y)) %>% 
  dplyr::mutate(row_c=ifelse(!is.na(row_c),row_c,row.y)) %>% 
  dplyr::mutate(group_match=ifelse(!is.na(group),group,group_match)) %>% 

  dplyr::select(-t_id_1.x,-c_id_1.x,-t_id_1.y,-c_id_1.y,-group,-row.y,-rn_id) %>% #glimpse()
  
  dplyr::mutate(res_drop_out=dplyr::case_when(
  tipo_de_plan_res==1 & abandono_temprano_rec==TRUE ~1,
  TRUE~0)) %>% 
  dplyr::mutate(min_ach=dplyr::case_when(
  evaluacindelprocesoteraputico=="3-Minimum Achievement" ~1,
  TRUE~0)) %>% 
  dplyr::mutate(res_drop_out=factor(res_drop_out)) %>% 
    dplyr::mutate(min_ach=factor(min_ach)) %>% 
  dplyr::mutate(status_censorship=dplyr::case_when(
  motivodeegreso_mod_imp=="Ongoing treatmentt" ~1,
  TRUE~0)) %>% 

  dplyr::mutate(outcome_to_readmission= dplyr::case_when(
                        event==1~ (diff_bet_treat)/365.25,# & grepl("",comp_status)
                        event==0~ (as.numeric(as.Date("2019-11-13"))-fech_egres_num)/365.25)) %>% 
  dplyr::mutate(admission_to_readmission= dplyr::case_when(
                        event==1~ (diff_bet_treat+dias_treat_imp_sin_na)/365.25,# & grepl("",comp_status)
                        event==0~ (as.numeric(as.Date("2019-11-13"))-fech_ing_num)/365.25))
  
# CONS_C1_df_dup_SEP_2020_irrs_health%>% janitor::tabyl(cnt_diagnostico_trs_fisico_irr)
#label(CONS_C1_df_dup_SEP_2020_prev4_explore$dg_fis_anemia) <- "Physical Dg. Anemia"
#   cnt_mod_cie_10_or cnt_otros_probl_at_sm_or

#22,914
#d_match %>% dplyr::group_by(hash_key) %>% dplyr::mutate(rn_hash=row_number()) %>% dplyr::ungroup() %>% nrow()

#27 Y ALGO
#CONS_C1_df_dup_SEP_2020_irrs_health %>% dplyr::group_by(hash_key) %>% dplyr::mutate(rn_hash=row_number()) %>% dplyr::ungroup() %>% nrow()

# HAY UN SEGUNDO TRATAMIENTO PARA 4,565 CASOS
#PARA VER SI HAY MAS DE UN CASO POR USUARIO
#CONS_C1_df_dup_SEP_2020_irrs_health %>% dplyr::group_by(hash_key) %>% dplyr::mutate(rn_hash=row_number()) %>% dplyr::ungroup() %>% janitor::tabyl(rn_hash)

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#irrs_min_ach & irrs_res_early- outcome to readmission  
irrs_early_drop<-irrs(x="abandono_temprano_rec", z="outcome_to_readmission", db="CONS_C1_df_dup_SEP_2020_irrs_health")
irrs_res_plan<-irrs(x="tipo_de_plan_res" ,z="admission_to_readmission", db="CONS_C1_df_dup_SEP_2020_irrs_health")
irrs_res_early<-irrs(x="res_drop_out" ,z="outcome_to_readmission", db="CONS_C1_df_dup_SEP_2020_irrs_health")
irrs_min_ach<-irrs(x="min_ach" , z="outcome_to_readmission", db="CONS_C1_df_dup_SEP_2020_irrs_health")


The incidence rate of readmission was 0.97 (95% IC 0.91-1.03) in users that had at least an early dropout, compared to users that did not have a physical condition at baseline (p= 0.277).


#biostat3::survRate(Surv((outcome_to_readmission)/1000, readmission==1) ~ comp_status, data=CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_cmprsk)
#para compararlo

comp_status_readmission_setting <-
  irrs(x="as.numeric(tipo_de_plan_res)", 
                              y="as.numeric(event)",# 2021-04-06, 
                              z="admission_to_readmission", 
                              db="CONS_C1_df_dup_SEP_2020_irrs_health")

biostat3::survRate(Surv((admission_to_readmission), event==1) ~ tipo_de_plan_res, data=CONS_C1_df_dup_SEP_2020_irrs_health)
##                    tipo_de_plan_res    tstop event       rate      lower
## tipo_de_plan_res=0                0 46697.95  2629 0.05629798 0.05416631
## tipo_de_plan_res=1                1 46689.69  3741 0.08012476 0.07757754
##                         upper
## tipo_de_plan_res=0 0.05849203
## tipo_de_plan_res=1 0.08273431
#https://mail.google.com/mail/u/0/?tab=rm&ogbl#search/%22event%22+%22jos%C3%A9%22/FMfcgzGkXwLJKPwsCJJNSWTlgGlPQqKk

Excluding censored cases, the incidence rate of readmission was significantly higher (1.42, 95% IC 1.35-1.5) in users that had a Discharge without clinical advice, compared to users that had a Therapeutic discharge (p<0.001).


fit_abandono_temprano_rec<- survfit(Surv(outcome_to_readmission, event) ~abandono_temprano_rec, data=CONS_C1_df_dup_SEP_2020_irrs_health,
                               type      = "kaplan-meier",
                                error     = "greenwood",
                                conf.type = "log-log") 

fit_abandono_temprano_rec_na <- fit_abandono_temprano_rec %>% fortify %>% group_by(strata) %>% mutate(CumHaz = cumsum(n.event/n.risk))

#http://rstudio-pubs-static.s3.amazonaws.com/522481_5e55bec9c94044678e680a6d07e96a2e.html
#https://rstudio-pubs-static.s3.amazonaws.com/258589_cd197f86fb5548ac89d7bcffd4bc6afe.html
#http://pcool.dyndns.org:8080/statsbook/?page_id=513
#http://rstudio-pubs-static.s3.amazonaws.com/316989_83cbe556125645b698c9ff6cf88c4c1a.html
#https://cran.r-project.org/web/packages/survminer/readme/README.html
#https://docs.ufpr.br/~jlpadilha/CE077/Aulas/2.TecnicasNaoParametricas.pdf 
#http://www.columbia.edu/~sjm2186/EPIC_R/packages.pdf
ggsurvplot_fit_abandono_temprano_rec<-
  ggsurvplot(fit_abandono_temprano_rec, 
           fun = "cumhaz",
           conf.int = TRUE,
           legend.labs = c("Late Dropout", "Early Dropout"), 
           risk.table = "abs_pct",
           #ncensor.plot = TRUE,
           ggtheme = theme_classic2(base_size=10),
           risk.table.y.text.col = F,
           risk.table.col="black",
           font.tickslab = c(10),
           risk.table.height = .2,
           risk.table.fontsize = 2.5,
           #break.time.by = 365.25,
           pval = F,
           #ylim=c(0,10),
           legend = c(0.88, 0.15), 
           legend.title="Early Drop.",
           xlab= "Time (in years)", 
           #cumevents=T,
           surv.connect = T,
           censor= F,
          # xscale=  "d_y",
           palette = c("skyblue4","orangered4"))
ggsurvplot_fit_abandono_temprano_rec
Figure 12. Cum. Hazards to Experience Readmission to SUD Treatment, by Ealy Dropout of Treatment at Baseline

Figure 12. Cum. Hazards to Experience Readmission to SUD Treatment, by Ealy Dropout of Treatment at Baseline

 # scale_y_continuous(breaks = sort(c(seq(0, 100, 10), 56)))


The incidence rate of readmission was 1.42 (95% IC 1.35-1.5) in users that had a residential plan, compared to users that had an ambulatory plan at baseline (p<0.001).


fit_tipo_de_plan_res<- survfit(Surv(admission_to_readmission, event==1) ~tipo_de_plan_res, data=CONS_C1_df_dup_SEP_2020_irrs_health,
                               type      = "kaplan-meier",
                                error     = "greenwood",
                                conf.type = "log-log") 

fit_tipo_de_plan_res_na <- fit_tipo_de_plan_res %>% fortify %>% group_by(strata) %>% mutate(CumHaz = cumsum(n.event/n.risk))

#http://rstudio-pubs-static.s3.amazonaws.com/522481_5e55bec9c94044678e680a6d07e96a2e.html
#https://rstudio-pubs-static.s3.amazonaws.com/258589_cd197f86fb5548ac89d7bcffd4bc6afe.html
#http://pcool.dyndns.org:8080/statsbook/?page_id=513
#http://rstudio-pubs-static.s3.amazonaws.com/316989_83cbe556125645b698c9ff6cf88c4c1a.html
#https://cran.r-project.org/web/packages/survminer/readme/README.html
#https://docs.ufpr.br/~jlpadilha/CE077/Aulas/2.TecnicasNaoParametricas.pdf 
#http://www.columbia.edu/~sjm2186/EPIC_R/packages.pdf
ggsurvplot_fit_tipo_de_plan_res<-
  ggsurvplot(fit_tipo_de_plan_res, 
           fun = "cumhaz",
           conf.int = TRUE,
           legend.labs = c("Ambulatory", "Residential Plan"), 
           risk.table = "abs_pct",
           #ncensor.plot = TRUE,
           ggtheme = theme_classic2(base_size=10),
           risk.table.y.text.col = F,
           risk.table.col="black",
           font.tickslab = c(10),
           risk.table.height = .2,
           risk.table.fontsize = 2.5,
          # break.time.by = 365.25,
           pval = F,
           #ylim=c(0,10),
           legend = c(0.88, 0.15), 
           legend.title="Res. Plan",
           xlab= "Time (in years)", 
           #cumevents=T,
           surv.connect = T,
           censor= F,
           #xscale=  "d_y",
           palette = c("skyblue4","orangered4"))
ggsurvplot_fit_tipo_de_plan_res
Figure 13. Cum. Hazards to Experience Readmission to SUD Treatment, by Treatment Setting at Baseline

Figure 13. Cum. Hazards to Experience Readmission to SUD Treatment, by Treatment Setting at Baseline

 # scale_y_continuous(breaks = sort(c(seq(0, 100, 10), 56)))


The incidence rate of readmission was 1.21 (95% IC 1.12-1.3) in users that had a residential plan and an early dropout, compared to the rest of users at baseline (p<0.001).


fit_res_drop_out<- survfit(Surv(outcome_to_readmission, event==1) ~res_drop_out, data=CONS_C1_df_dup_SEP_2020_irrs_health,
                               type      = "kaplan-meier",
                                error     = "greenwood",
                                conf.type = "log-log") 

fit_res_drop_out_na <- fit_res_drop_out %>% fortify %>% group_by(strata) %>% mutate(CumHaz = cumsum(n.event/n.risk))
#min_ach
ggsurvplot_fit_res_drop_out<-
  ggsurvplot(fit_res_drop_out, 
           fun = "cumhaz",
           conf.int = TRUE,
           legend.labs = c("Other", "Residential Plan & Drop-out"), 
           risk.table = "abs_pct",
           #ncensor.plot = TRUE,
           ggtheme = theme_classic2(base_size=10),
           risk.table.y.text.col = F,
           risk.table.col="black",
           font.tickslab = c(10),
           risk.table.height = .2,
           risk.table.fontsize = 2.5,
         #  break.time.by = 365.25,
           pval = F,
           #ylim=c(0,10),
           legend = c(0.78, 0.15), 
           legend.title="Res. Plan",
           xlab= "Time (in years)", 
           #cumevents=T,
           surv.connect = T,
           censor= F,
          # xscale=  "d_y",
           palette = c("skyblue4","orangered4"))
ggsurvplot_fit_res_drop_out
Figure 14. Cum. Hazards to Experience Readmission to SUD Treatment, whether it was a person in a Residential Treatment with an Early Dropout

Figure 14. Cum. Hazards to Experience Readmission to SUD Treatment, whether it was a person in a Residential Treatment with an Early Dropout

 # scale_y_continuous(breaks = sort(c(seq(0, 100, 10), 56)))


The incidence rate of readmission was 1.14 (95% IC 1.08-1.19) in users that had a minimum achievement of the therapeutic goals, compared to the rest of users at baseline (p<0.001).


fit_min_ach<- survfit(Surv(outcome_to_readmission, event==1) ~min_ach, data=CONS_C1_df_dup_SEP_2020_irrs_health,
                                type      = "kaplan-meier",
                                error     = "greenwood",
                                conf.type = "log-log") 
fit_min_ach_na <- fit_min_ach %>% fortify %>% group_by(strata) %>% mutate(CumHaz = cumsum(n.event/n.risk))
#min_ach
#http://rstudio-pubs-static.s3.amazonaws.com/522481_5e55bec9c94044678e680a6d07e96a2e.html
#https://rstudio-pubs-static.s3.amazonaws.com/258589_cd197f86fb5548ac89d7bcffd4bc6afe.html
#http://pcool.dyndns.org:8080/statsbook/?page_id=513
#http://rstudio-pubs-static.s3.amazonaws.com/316989_83cbe556125645b698c9ff6cf88c4c1a.html
#https://cran.r-project.org/web/packages/survminer/readme/README.html
#https://docs.ufpr.br/~jlpadilha/CE077/Aulas/2.TecnicasNaoParametricas.pdf 
#http://www.columbia.edu/~sjm2186/EPIC_R/packages.pdf
ggsurvplot_fit_fit_min_ach<-
  ggsurvplot(fit_min_ach, 
           fun = "cumhaz",
           conf.int = TRUE,
           legend.labs = c("Other", "Minimum Achievement"), 
           risk.table = "abs_pct",
           #ncensor.plot = TRUE,
           ggtheme = theme_classic2(base_size=10),
           risk.table.y.text.col = F,
           risk.table.col="black",
           font.tickslab = c(10),
           risk.table.height = .2,
           risk.table.fontsize = 2.5,
           #break.time.by = 365.25,
           pval = F,
           #ylim=c(0,10),
           legend = c(0.88, 0.15), 
           legend.title="Res. Plan",
           xlab= "Time (in years)", 
           #cumevents=T,
           surv.connect = T,
           censor= F,
           #xscale=  "d_y",
           palette = c("skyblue4","orangered4"))
ggsurvplot_fit_fit_min_ach
Figure 15. Cum. Hazards to Experience Readmission to SUD Treatment, whether it was a person had a Minimum Achievement in Therapeutic Goals

Figure 15. Cum. Hazards to Experience Readmission to SUD Treatment, whether it was a person had a Minimum Achievement in Therapeutic Goals

 # scale_y_continuous(breaks = sort(c(seq(0, 100, 10), 56)))


Multistate


#  dplyr::filter(motivodeegreso_mod_imp!="En curso")%>% #Sacar los tratamientos que estén en curso 
tab1_lab<- paste0('Original C1 Dataset \n(n = ', formatC(nrow(CONS_C1), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1%>% dplyr::distinct(HASH_KEY)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab2_lab<- paste0('C1 Dataset \n(n = ', formatC(nrow(CONS_C1_df_dup_SEP_2020), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab1_5_lab<- paste0('&#8226; Duplicated entries\\l &#8226; Overlapping treatments of users\\l &#8226; Intermediate events of treatment (continuous referrals)')
tab4_lab<- paste0('Imputed C1 Dataset \n(n = ', formatC(nrow(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab3_5_lab<- paste0('C1 Dataset \n(n = ', formatC(nrow(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_descartados), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_descartados%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab6_lab<- paste0('C1 Matched Sample\nin Treatment Setting \n(n = ', formatC(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados %>% 
  dplyr::filter(hash_key %in% unlist(unique(d_match$hash_key))) %>% nrow(), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados %>% 
  dplyr::filter(hash_key %in% unlist(unique(d_match$hash_key))) %>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')

lab_tab<- paste0("  Result of the matching on treatment setting\nNo. of treatments: ",table(table(t_id_1)) %>% formatC(big.mark = ","),"; No. of controls: ",table(table(c_id_1))%>% formatC(big.mark = ","))

#https://stackoverflow.com/questions/46750364/diagrammer-and-graphviz
#https://mikeyharper.uk/flowcharts-in-r-using-diagrammer/
#http://blog.nguyenvq.com/blog/2012/05/29/better-decision-tree-graphics-for-rpart-via-party-and-partykit/
#http://blog.nguyenvq.com/blog/2014/01/17/skeleton-to-create-fast-automatic-tree-diagrams-using-r-and-graphviz/
#https://cran.r-project.org/web/packages/DiagrammeR/vignettes/graphviz-mermaid.html
#https://stackoverflow.com/questions/39133058/how-to-use-graphviz-graphs-in-diagrammer-for-r
#https://subscription.packtpub.com/book/big_data_and_business_intelligence/9781789802566/1/ch01lvl1sec21/creating-diagrams-via-the-diagrammer-package
#https://justlegal.be/2019/05/using-flowcharts-to-display-legal-procedures/
# paste0("No. of treatments: ",table(table(t_id_1)) %>% formatC(big.mark = ","),"; No. of controls: ",table(table(c_id_1))%>% formatC(big.mark = ","))
#
library(DiagrammeR) #⋉
grViz("digraph flowchart {
      # node definitions with substituted label text
      node [fontname = Times, shape = rectangle,fontsize = 9]        
      tab1 [label = '@@1']
      tab2 [label = '@@2']
      tab3 [label = '&#8226;Duplicated entries\\l&#8226;Intermediate events of treatment (continuous referrals)\\l',fontsize = 7]
      tab4 [label = '@@4']
      blank [label = '', width = 0.0001, height = 0.0001]
      blank2 [label = '', width = 0.0001, height = 0.0001]
      blank3 [label = '', width = 0.0001, height = 0.0001]
      tab5 [label = '&#8226;Logically Inconsistent candidates for imputation\\l&#8226;Ties in candidates for imputation\\l',fontsize = 7]
      tab6 [label= '@@6']
      tab7 [label = '&#8226;Matching pairs based on balance of covariates at basline,\\l&#8226;Pairs 1:1\\l',fontsize = 7]
      
      # edge definitions with the node IDs
      tab1 -> blank [arrowhead = none,label='  Data wrangling and normalization process',fontsize = 8];
      blank -> tab3
      blank -> tab2
      tab2 -> blank2 [arrowhead = none];
      blank2 -> tab5 
      blank2 -> tab4 [label='  Result of the imputation of missing values',fontsize = 8];
      tab4 -> blank3 [arrowhead= none];
      blank3-> tab7
      blank3 -> tab6 [label='@@7',fontsize = 8];
            subgraph {
              rank = same; tab3; blank;
            }
            subgraph {
              rank = same; tab5; blank2;
            }
            subgraph {
              rank = same; tab7; blank3;
            }
      }

      [1]:  tab1_lab
      [2]:  tab2_lab
      [3]:  tab1_5_lab
      [4]:  tab4_lab
      [5]:  ''
      [6]:  tab6_lab
      [7]:  lab_tab
      ")
#      {rank=same; 'tab2'' -> tab3 [label='',fontsize = 11]}; #⋉
#CONS_C1_df_dup_SEP_2020_irrs_health
d_match_surv<-
CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados %>% 
  dplyr::filter(hash_key %in% unlist(CONS_C1_df_dup_SEP_2020_irrs_health[,"hash_key"])) %>% 
  dplyr::left_join(CONS_C1_df_dup_SEP_2020[c("row","event","person_days","person_years","diff_bet_treat", "fech_ing_next_treat")],by="row") %>% 
#browse hash_key duplicates_filtered2 cum_diff_bet_treat dup2_real diff_bet_treat if hash_key=="0737aacbb7efdd418f7a37ce3386ce5e"|hash_key=="07668f2d3e4f6beb7975e43ee96eac80"
  dplyr::mutate(res_drop_out=dplyr::case_when(
  tipo_de_plan_res==1 & abandono_temprano_rec==T ~1,
  T~0)) %>% 
  dplyr::mutate(res_drop_out=factor(res_drop_out)) %>% 
  dplyr::mutate(status_censorship=dplyr::case_when(
  motivodeegreso_mod_imp=="Ongoing treatment" ~1,
  T~0)) %>% 
  dplyr::mutate(tr_completion=factor(dplyr::case_when(
  motivodeegreso_mod_imp=="Therapeutic discharge" ~1,
  motivodeegreso_mod_imp=="Ongoing treatment" ~0,
  T~2),labels=c("Ongoing treatment", "Completion","Non-completion"))) %>% 
  dplyr::mutate(n_hash=as.numeric(factor(hash_key, levels=unique(hash_key)))) %>% 
  dplyr::mutate(min_achievement=factor(dplyr::case_when(
  evaluacindelprocesoteraputico=="3-Minimum Achievement" ~1,
  is.na(evaluacindelprocesoteraputico) ~0,
  T~2),labels=c("Ongoing treatment", "Minimum achievement","High/Medium achievement"))) %>% 
  dplyr::mutate(n_hash=as.numeric(factor(hash_key, levels=unique(hash_key)))) %>% 
  dplyr::arrange(hash_key, fech_ing_num) %>% 
  dplyr::select(n_hash,hash_key,everything()) %>% 
  
#ids_matched_filter
  dplyr::left_join(ids_matched_rows, by=c("row")) %>% 
  dplyr::mutate(group_match=ifelse(!is.na(group),group,NA)) %>% 
  dplyr::select(-rn_id,-group) %>% #glimpse()
  dplyr::rename("row_c"="row_2") %>% 
  dplyr::left_join(ids_matched_rows, by=c("row"="row_2")) %>% 
  dplyr::mutate(t_id_1=ifelse(!is.na(t_id_1.x),t_id_1.x,t_id_1.y)) %>% 
  dplyr::mutate(c_id_1=ifelse(!is.na(c_id_1.x),c_id_1.x,c_id_1.y)) %>% 
  dplyr::mutate(row_c=ifelse(!is.na(row_c),row_c,row.y)) %>% 
  dplyr::mutate(group_match=ifelse(!is.na(group),group,group_match)) %>% 

#2021-05-04, cambié el nombre del agrupador y rellené las columnas matcheadas para los tratamientos posteriores no matcheados
  dplyr::group_by(n_hash) %>% 
  tidyr::fill(group_match, .direction="updown") %>% 
  tidyr::fill(t_id_1, .direction="updown") %>%
  tidyr::fill(c_id_1, .direction="updown") %>% 
  ungroup() %>% 
  dplyr::select(-t_id_1.x,-c_id_1.x,-t_id_1.y,-c_id_1.y,-group,-row.y,-rn_id) %>% 
  dplyr::rename("id"="n_hash")

#%>% #glimpse() ids_matched_rows ids_matched_filter 
#dplyr::left_join(ids_matched, by=c("row"="t_id_1")) %>% 
#dplyr::mutate(group_match=ifelse(!is.na(group),group,NA)) %>% 
#dplyr::select(-c_id_1,-group) %>% 
#dplyr::left_join(ids_matched, by=c("row"="c_id_1")) %>% 
#dplyr::mutate(group_match=ifelse(is.na(group_match),group,group_match)) %>% 
#dplyr::filter(!is.na(group_match))
#dplyr::select(-t_id_1,-group)

if (
d_match_surv %>% 
    dplyr::group_by(row) %>% 
    dplyr::mutate(n_row=n()) %>% 
    dplyr::ungroup() %>% 
    dplyr::filter(n_row>1) %>% nrow()>1){stop("Duplicated rows")}


attr(d_match_surv$res_drop_out,"label") <- "Early Drop-out & Residential Setting (=1)"
attr(d_match_surv$status_censorship,"label") <- "Ongoing treatment"
attr(d_match_surv$origen_ingreso_mod,"label") <- "Admission Motive"
attr(d_match_surv$edad_al_ing,"label") <-"Admission Age"
attr(d_match_surv$duplicates_filtered,"label") <- "Treatments by User (#)"
attr(d_match_surv$more_one_treat,"label") <- "More than one treatment"
attr(d_match_surv$sus_ini_mod_mvv,"label")<-"First Substance Used"
attr(d_match_surv$estado_conyugal_2,"label")<-"Marital Status"
attr(d_match_surv$escolaridad_rec,"label")<-"Educational Attainment"
attr(d_match_surv$edad_ini_cons,"label")<-"Substance Use Onset Age"
attr(d_match_surv$freq_cons_sus_prin,"label")<-"Primary Substance at Admission Usage Frequency"
attr(d_match_surv$nombre_region,"label")<-"Regional Location of Center"
attr(d_match_surv$dg_cie_10_rec,"label")<-"Psychiatric Comorbidity"
attr(d_match_surv$dg_trs_cons_sus_or,"label")<-"Drug Dependence diagnosis"
attr(d_match_surv$min_achievement,"label")<-"Minimum Achievement in the Therapeutic Process"
attr(d_match_surv$abandono_temprano_rec,"label")<-"Treatment Length (>90)"
attr(d_match_surv$dias_treat_imp_sin_na,"label")<-"Days of Treatment (missing dates of discharge were replaced with difference from 2019-11-13)"
attr(d_match_surv$tipo_de_plan_res,"label")<-"Residential Setting"
attr(d_match_surv$tipo_centro_pub,"label")<-"Public Center"
attr(d_match_surv$condicion_ocupacional_corr,"label")<-"Occupational Status"
attr(d_match_surv$event,"label")<-"Users with Posterior Treatments (=1)"
attr(d_match_surv$person_days,"label")<-"User's Days available in the system for the study"
attr(d_match_surv$person_years,"label")<-"User's Years available in the system for the study"
attr(d_match_surv$motivodeegreso_mod_imp,"label")<-"Cause of Discharge"
attr(d_match_surv$diff_bet_treat,"label")<-"Days of difference between the Next Treatment"
attr(d_match_surv$group_match,"label")<-"Match Paired"
attr(d_match_surv$tr_completion,"label")<-"Treatment Successful Completion"
attr(d_match_surv$fech_ing_next_treat,"label") <- 'Date of Admission to Posterior Treatment (numeric)'


library(compareGroups)
table_surv <- compareGroups::compareGroups(tipo_de_plan_res ~ origen_ingreso_mod+ dg_cie_10_rec+ sexo_2+ edad_al_ing+ abandono_temprano_rec+ duplicates_filtered+ more_one_treat+ sus_ini_mod_mvv+ estado_conyugal_2+ escolaridad_rec+ freq_cons_sus_prin+ nombre_region+ tipo_centro_pub+ min_achievement+ dg_trs_cons_sus_or+ edad_ini_cons+ condicion_ocupacional_corr+ dias_treat_imp_sin_na+ event+ person_days+ person_years+ diff_bet_treat+ tr_completion+ motivodeegreso_mod_imp,
                                       method= c(origen_ingreso_mod=3,
                                                 dg_cie_10_rec=3,
                                                 sexo_2=3,
                                                 edad_al_ing=2,
                                                 abandono_temprano_rec=2,
                                                 edad_al_ing=2,
                                                 duplicates_filtered=3,
                                                 more_one_treat=3,
                                                 sus_ini_mod_mvv=3,
                                                 estado_conyugal_2=3,
                                                 escolaridad_rec=3,
                                                 freq_cons_sus_prin=3,
                                                 nombre_region=3,
                                                 tipo_centro_pub=3,
                                                 min_achievement=3,
                                                 dg_trs_cons_sus_or=3,
                                                 edad_ini_cons=2,
                                                 condicion_ocupacional_corr=3,
                                                 dias_treat_imp_sin_na=2,
                                                 event=3,
                                                 person_days=2,
                                                 person_years= 2,
                                                 diff_bet_treat= 2, 
                                                 tr_completion= 3,
                                                 motivodeegreso_mod_imp=3
                                                 ),
                                       data = d_match_surv,
                                       include.miss = T,
                                       var.equal=T
)

pvals <- getResults(table_surv)
#p.adjust(pvals, method = "BH")
restab_surv <- createTable(table_surv,show.p.overall = T)
compareGroups::export2md(restab_surv, size=13, first.strip=T, hide.no="no", position="center",col.names=c("Variables","Ambulatory","Residential", "Sig."),
                         format="html",caption= "Table 6. Summary descriptives table")%>%
  kableExtra::row_spec(1,bold=T) %>% 
  kableExtra::add_footnote(c("Note. Variables of C1 dataset had to be standardized before comparison;", "Continuous variables are presented as Medians and Percentiles 25 and 75 were shown;", "Categorical variables are presented as number (%)"), notation = "none")%>%
  kableExtra::kable_classic() %>% 
  kableExtra::scroll_box(width = "100%", height = "600px")
Table 6. Summary descriptives table
Variables Ambulatory Residential Sig.
N=16895 N=14844
Admission Motive: <0.001
Spontaneous 6932 (41.0%) 5501 (37.1%)
Assisted Referral 2906 (17.2%) 3064 (20.6%)
Other 951 (5.63%) 907 (6.11%)
Justice Sector 1199 (7.10%) 966 (6.51%)
Health Sector 4907 (29.0%) 4406 (29.7%)
Psychiatric Comorbidity: <0.001
Without psychiatric comorbidity 4640 (27.5%) 3773 (25.4%)
Diagnosis unknown (under study) 3469 (20.5%) 3264 (22.0%)
With psychiatric comorbidity 8786 (52.0%) 7807 (52.6%)
Sexo Usuario/Sex of User: 0.038
Men 11203 (66.3%) 10007 (67.4%)
Women 5692 (33.7%) 4837 (32.6%)
Admission Age 32.9 [27.0;40.8] 33.1 [26.9;41.1] 0.620
Treatment Length (>90): <0.001
FALSE 13799 (81.7%) 11849 (79.8%)
TRUE 3096 (18.3%) 2995 (20.2%)
Treatments by User (#): 0.009
1 8597 (50.9%) 7485 (50.4%)
2 4702 (27.8%) 4056 (27.3%)
3 2166 (12.8%) 1902 (12.8%)
4 911 (5.39%) 829 (5.58%)
5 314 (1.86%) 346 (2.33%)
6 151 (0.89%) 149 (1.00%)
7 38 (0.22%) 53 (0.36%)
8 16 (0.09%) 24 (0.16%)
More than one treatment: 0.419
0 8597 (50.9%) 7485 (50.4%)
1 8298 (49.1%) 7359 (49.6%)
First Substance Used: <0.001
Alcohol 7356 (43.5%) 6117 (41.2%)
Cocaine hydrochloride 746 (4.42%) 632 (4.26%)
Cocaine paste 2457 (14.5%) 2319 (15.6%)
Marijuana 5924 (35.1%) 5407 (36.4%)
Other 412 (2.44%) 369 (2.49%)
Marital Status: <0.001
Married/Shared living arrangements 4246 (25.1%) 3424 (23.1%)
Separated/Divorced 1864 (11.0%) 1550 (10.4%)
Single 10600 (62.7%) 9726 (65.5%)
Widower 185 (1.09%) 144 (0.97%)
Educational Attainment: 0.005
3-Completed primary school or less 5186 (30.7%) 4787 (32.2%)
2-Completed high school or less 8753 (51.8%) 7594 (51.2%)
1-More than high school 2956 (17.5%) 2463 (16.6%)
Primary Substance at Admission Usage Frequency: <0.001
1 day a week or more 593 (3.51%) 323 (2.18%)
2 to 3 days a week 2504 (14.8%) 1578 (10.6%)
4 to 6 days a week 2444 (14.5%) 1947 (13.1%)
Daily 10639 (63.0%) 10736 (72.3%)
Did not use 335 (1.98%) 106 (0.71%)
Less than 1 day a week 380 (2.25%) 154 (1.04%)
Public Center: <0.001
FALSE 9659 (57.2%) 10477 (70.6%)
TRUE 7236 (42.8%) 4367 (29.4%)
Minimum Achievement in the Therapeutic Process: <0.001
Ongoing treatment 1135 (6.72%) 649 (4.37%)
Minimum achievement 8416 (49.8%) 6099 (41.1%)
High/Medium achievement 7344 (43.5%) 8096 (54.5%)
Drug Dependence diagnosis: <0.001
FALSE 2061 (12.2%) 1296 (8.73%)
TRUE 14834 (87.8%) 13548 (91.3%)
Substance Use Onset Age 15.0 [14.0;17.0] 15.0 [13.0;17.0] 0.081
Occupational Status: <0.001
Employed 3781 (22.4%) 2008 (13.5%)
Inactive 1872 (11.1%) 1530 (10.3%)
Looking for a job for the first time 33 (0.20%) 24 (0.16%)
No activity 1817 (10.8%) 2085 (14.0%)
Not seeking for work 343 (2.03%) 392 (2.64%)
Unemployed 9049 (53.6%) 8805 (59.3%)
Days of Treatment (missing dates of discharge were replaced with difference from 2019-11-13) 151 [83.0;274] 151 [66.0;279] <0.001
Users with Posterior Treatments (=1): 0.419
0 8597 (50.9%) 7485 (50.4%)
1 8298 (49.1%) 7359 (49.6%)
User’s Days available in the system for the study 416 [145;1185] 406 [153;1101] 0.026
User’s Years available in the system for the study 1.14 [0.40;3.24] 1.11 [0.42;3.01] 0.026
Days of difference between the Next Treatment 344 [137;776] 263 [72.0;695] <0.001
Treatment Successful Completion: <0.001
Ongoing treatment 1135 (6.72%) 649 (4.37%)
Completion 3036 (18.0%) 4289 (28.9%)
Non-completion 12724 (75.3%) 9906 (66.7%)
Cause of Discharge: <0.001
Administrative discharge 1449 (8.58%) 1839 (12.4%)
Early Drop-out 3096 (18.3%) 2995 (20.2%)
Late Drop-out 6020 (35.6%) 2932 (19.8%)
Ongoing treatment 1135 (6.72%) 649 (4.37%)
Referral to another treatment 2159 (12.8%) 2140 (14.4%)
Therapeutic discharge 3036 (18.0%) 4289 (28.9%)
Note. Variables of C1 dataset had to be standardized before comparison;
Continuous variables are presented as Medians and Percentiles 25 and 75 were shown;
Categorical variables are presented as number (%)


After matching, we selected 31,739 treatments (users=22,452).


library(Epi)
#For censored state transitions it can be awkward having to replicate the censoring time for each non-visited state
#https://github.com/stulacy/multistateutils
states_trans<-c("Admission",    "Readmission",  "Readmission2", "Readmission3", "Readmission4")

trans_matrix <- matrix(c(
NA,1,NA,NA,NA,
NA,NA,2,NA,NA,
NA,NA,NA,3,NA,
NA,NA,NA,NA,4,
NA,NA,NA,NA,NA
), nrow=5, ncol=5,byrow=TRUE,dimnames=list(from=states_trans,to=states_trans))


Tot_reg<-
d_match_surv %>% 
    dplyr::select(id, duplicates_filtered, fech_ing_num,fech_egres_num,dias_treat_imp_sin_na,fech_ing_next_treat,tipo_de_plan_res,motivodeegreso_mod_imp,min_achievement,group_match,dup) %>%
    ## Filter cases with 4 or more registries
    #sum(prop.table(table(d_match_surv$dup))[1:3])
    #dplyr::filter(dup<4) %>% 
     nrow()

Less4_reg<-
d_match_surv %>% 
    dplyr::select(id, duplicates_filtered, fech_ing_num,fech_egres_num,dias_treat_imp_sin_na,fech_ing_next_treat,tipo_de_plan_res,motivodeegreso_mod_imp,min_achievement,group_match,dup) %>%
    ## Filter cases with 4 or more registries
    #sum(prop.table(table(d_match_surv$dup))[1:3])
    dplyr::filter(dup<5) %>% 
     nrow()


#All possible paths through the multi-state model can be found here:
Epi::boxes.Lexis(trans_matrix, wmult=1.3, hmult=1.3, cex=.8,
             boxpos = list(y = rep(50,5),
                           x = (1:5)*(20)-10), 
            txt.arr=c(expression("1) " *lambda['12']), 
                      expression("2) " *lambda['23']),
                      expression("3) " *lambda['34']),
                      expression("4) " *lambda['45'])
                      ))
title(sub = paste0("No recurring states;\nAbsorbing state: Fourth Readmission (",scales::percent((Less4_reg/Tot_reg),accuracy = 0.1)," of the registries, considering that each registry\n had a time-to-readmission); Other causes of discharge were not events of interest")) ## internal titles


To the first and second states, we subtracted one day if it overlaps with the date of discharge. For the third and the following states, we added one day in case of overlapping dates due to continuous treatments. We left 291 entries of 200 patients that had more than 4 readmissions.

library(Epi)
#For censored state transitions it can be awkward having to replicate the censoring time for each non-visited state
#https://github.com/stulacy/multistateutils

states_trans2<-c("Admission", "TD", "DWCA", "Readmission",  "Readmissionb", "Readmission2", "Readmission2b", "Readmission3", "Readmission3b")

trans_matrix2 <- matrix(c(
NA,1,2,NA,NA,NA,NA,NA,NA, 
NA,NA,NA,3,NA,NA,NA,NA,NA, 
NA,NA,NA,NA,4,NA,NA,NA,NA, 
NA,NA,NA,NA,NA,5,NA,NA,NA, 
NA,NA,NA,NA,NA,NA,6,NA,NA, 
NA,NA,NA,NA,NA,NA,NA,7,NA, 
NA,NA,NA,NA,NA,NA,NA,NA,8, 
NA,NA,NA,NA,NA,NA,NA,NA,NA, 
NA,NA,NA,NA,NA,NA,NA,NA,NA
), nrow=9, ncol=9,byrow=TRUE,dimnames=list(from=states_trans2,to=states_trans2))

Less3_reg<-
d_match_surv %>% 
    dplyr::select(id, duplicates_filtered, fech_ing_num,fech_egres_num,dias_treat_imp_sin_na,fech_ing_next_treat,tipo_de_plan_res,motivodeegreso_mod_imp,min_achievement,group_match,dup) %>%
    ## Filter cases with 3 or more registries
    #sum(prop.table(table(d_match_surv$dup))[1:3])
    dplyr::filter(dup<4) %>% 
     nrow()

#All possible paths through the multi-state model can be found here:
Epi::boxes.Lexis(trans_matrix2, wmult=1, hmult=1.5, cex=.8,
             boxpos = list(y = c(50,rep(c(75,25),4))+5,
                           x = c(7, 25, 25, 47, 47, 70, 70, 92, 92)), 
            txt.arr=c(expression("1) " *lambda['12']), 
                      expression("2) " *lambda['13']),
                      expression("3) " *lambda['24']),
                      expression("4) " *lambda['35']),
                      expression("5) " *lambda['46']),
                      expression("6) " *lambda['57']),
                      expression("7) " *lambda['68']),
                      expression("8) " *lambda['79']),""
                      ))
title(sub = paste0("a= Treatment completion; b=Treatment non-completion;\n No recurring states;\nAbsorbing state: Third Readmission (",scales::percent((Less3_reg/Tot_reg),accuracy = 0.1)," of the registries, considering that each registry\n had a time-to-readmission); Other causes of discharge were not events of interest")) ## internal titles


For the nine states model, o the first and second states, we subtracted one day if it overlaps with the date of discharge. For the third and the following states, we added one day in case of overlapping dates due to continuous treatments. We left 926 entries of 635 patients that had more than 3 readmissions.


### diff_bet_treat is the variable that includes time-to-readmission
### AGS: Starts in 0, excepting left truncated cases
### variables should start with time_ & status_
### Transform to years once generated
### Looks that they all share the same objective time
### AGS: If there is a continuous state, interval censoring is not necessary 
### 0's are censored status

library(mstate)

d_match_surv_msprep<-
  d_match_surv %>% 
  dplyr::select(id, duplicates_filtered, fech_ing_num,fech_egres_num,dias_treat_imp_sin_na,fech_ing_next_treat,tipo_de_plan_res,motivodeegreso_mod_imp,min_achievement,group_match,dup, dias_treat_imp_sin_na) %>% 
  ## Filter the fifth readmission of registries
  dplyr::filter(dup<=5) %>% 
  dplyr::mutate(tipo_de_plan_res=if_else(tipo_de_plan_res=="1",1,0,0)) %>% 
  dplyr::mutate(TD=if_else(motivodeegreso_mod_imp=="Therapeutic discharge",1,0,0)) %>% 
  dplyr::mutate(DWCA=if_else(motivodeegreso_mod_imp %in% c("Early Drop-out","Late Drop-out","Administrative discharge"),1,0,0)) %>% 
  #dplyr::mutate(tipo_de_plan_res_baseline=tipo_de_plan_res) %>% 
  tidyr::pivot_wider(id_cols=c("id","group_match","duplicates_filtered"), names_from=dup, names_sep="_", values_from=c("fech_ing_num","tipo_de_plan_res","TD","DWCA", "dias_treat_imp_sin_na")) %>% #"","motivodeegreso_mod_imp","min_achievement"
  #,"tipo_de_plan_res_baseline"
  dplyr::arrange(id) %>%
  dplyr::select(id, group_match,everything()) %>% 
  #display error if there are more than row per user
  purrr::when(dplyr::group_by(.,id) %>% dplyr::count() %>% filter(n>1) %>% nrow()>0 ~ stop("more than one case by row"), 
              ~.) %>% 
  #22,916 x 20
  #Check overlapped dates
  purrr::when(dplyr::mutate(.,diff_bet_treat1= fech_ing_num_2-fech_ing_num_1)%>% dplyr::filter(diff_bet_treat1<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"), 
              ~.) %>%
  purrr::when(dplyr::mutate(.,diff_bet_treat2= fech_ing_num_3-fech_ing_num_2)%>% dplyr::filter(diff_bet_treat2<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"), 
              ~.) %>%
  purrr::when(dplyr::mutate(.,diff_bet_treat3= fech_ing_num_4-fech_ing_num_3)%>% dplyr::filter(diff_bet_treat3<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"), 
              ~.) %>%
    purrr::when(dplyr::mutate(.,diff_bet_treat4= fech_ing_num_5-fech_ing_num_4)%>% dplyr::filter(diff_bet_treat4<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"), 
              ~.) %>%
    dplyr::mutate(Readmission_status=if_else(!is.na(fech_ing_num_2),1,0,0),
                  Readmission2_status=if_else(!is.na(fech_ing_num_3),1,0,0),
                  Readmission3_status=if_else(!is.na(fech_ing_num_4),1,0,0),
                  Readmission4_status=if_else(!is.na(fech_ing_num_5),1,0,0)) %>% 
  
#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:
## 2021-03-24, I had to reespecify times to objective times, in order to avoid further problems
## 2021-05-06, CENSORED TIME IS NOT THE DIFFERENCE BETWEEN THE TIME OF CENSORSIP AND THE TIME OF THE LAST EVENT, IS THE TOTAL DIFFERENCE. THE SUM OF DAYS UNTIL THE FOLLOWUP TIME
  dplyr::mutate( 
  Readmission_time= dplyr::case_when(
        Readmission_status==1~as.numeric(fech_ing_num_2-fech_ing_num_1),
        Readmission_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>% 
  dplyr::mutate( 
  Readmission2_time= dplyr::case_when(
        Readmission2_status==1~as.numeric(fech_ing_num_3-fech_ing_num_1),
        Readmission2_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>% 
  dplyr::mutate( 
  Readmission3_time= dplyr::case_when(
        Readmission3_status==1~as.numeric(fech_ing_num_4-fech_ing_num_1),
        Readmission3_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>% 
  dplyr::mutate( 
  Readmission4_time= dplyr::case_when(
        Readmission4_status==1~as.numeric(fech_ing_num_5-fech_ing_num_1),
        Readmission4_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>% 
    
    ## THE USERS THAT  DID NOT REGISTERED AN EVENT WILL COME UP TO THE FINAL TIME OF THE FOLLOW UP
 dplyr::select(
     id, group_match, 
     tipo_de_plan_res_1,tipo_de_plan_res_2, tipo_de_plan_res_3, tipo_de_plan_res_4, 
     Readmission_time, Readmission_status, Readmission2_time, Readmission2_status, 
     Readmission3_time, Readmission3_status, Readmission4_time, Readmission4_status, 
     dias_treat_imp_sin_na_1, dias_treat_imp_sin_na_2, dias_treat_imp_sin_na_3, dias_treat_imp_sin_na_4,
     TD_1, TD_2, TD_3, TD_4, DWCA_1, DWCA_2, DWCA_3, DWCA_4, duplicates_filtered, fech_ing_num_1) %>%  
  as.data.frame() 

#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:
### ANALYSES W COMPLETE DATA

hashs_w_complete_data<-
na.omit(cbind.data.frame(CONS_C1_df_dup_SEP_2020_match$hash_key,
    CONS_C1_df_dup_SEP_2020_match$origen_ingreso_mod,
      CONS_C1_df_dup_SEP_2020_match$dg_cie_10_rec,
      CONS_C1_df_dup_SEP_2020_match$sexo_2,
      CONS_C1_df_dup_SEP_2020_match$sus_ini_mod_mvv,
      CONS_C1_df_dup_SEP_2020_match$tipo_centro_pub, #cuidado
      CONS_C1_df_dup_SEP_2020_match$estado_conyugal_2, 
      CONS_C1_df_dup_SEP_2020_match$escolaridad_rec,
      CONS_C1_df_dup_SEP_2020_match$freq_cons_sus_prin,
      CONS_C1_df_dup_SEP_2020_match$nombre_region,
      CONS_C1_df_dup_SEP_2020_match$condicion_ocupacional_corr,
      CONS_C1_df_dup_SEP_2020_match$sus_principal_mod,
      CONS_C1_df_dup_SEP_2020_match$dg_trs_cons_sus_or,
      CONS_C1_df_dup_SEP_2020_match$tenencia_de_la_vivienda_mod,
    CONS_C1_df_dup_SEP_2020_match$edad_al_ing,
    CONS_C1_df_dup_SEP_2020_match$fech_ing_num,
    CONS_C1_df_dup_SEP_2020_match$edad_ini_cons
)) %>% dplyr::distinct(`CONS_C1_df_dup_SEP_2020_match$hash_key`)


d_match_surv_msprep_cc<-
  d_match_surv %>% 
  dplyr::filter(hash_key %in% unlist(hashs_w_complete_data)) %>% 
  dplyr::group_by(group_match) %>% 
  dplyr::mutate(pairs=n_distinct(hash_key)) %>% 
  dplyr::ungroup() %>% 
  dplyr::filter(pairs==2) %>% #janitor::tabyl(pairs)
  dplyr::select(id, duplicates_filtered, fech_ing_num,fech_egres_num,dias_treat_imp_sin_na,fech_ing_next_treat,tipo_de_plan_res,motivodeegreso_mod_imp,min_achievement,group_match,dup, dias_treat_imp_sin_na) %>% 
  ## Filter the fifth readmission of registries
  dplyr::filter(dup<=5) %>% 
  dplyr::mutate(tipo_de_plan_res=if_else(tipo_de_plan_res=="1",1,0,0)) %>% 
  dplyr::mutate(TD=if_else(motivodeegreso_mod_imp=="Therapeutic discharge",1,0,0)) %>% 
  dplyr::mutate(DWCA=if_else(motivodeegreso_mod_imp %in% c("Early Drop-out","Late Drop-out","Administrative discharge"),1,0,0)) %>% 
  #dplyr::mutate(tipo_de_plan_res_baseline=tipo_de_plan_res) %>% 
  tidyr::pivot_wider(id_cols=c("id","group_match","duplicates_filtered"), names_from=dup, names_sep="_", values_from=c("fech_ing_num","tipo_de_plan_res","TD","DWCA", "dias_treat_imp_sin_na")) %>% #"","motivodeegreso_mod_imp","min_achievement"
  #,"tipo_de_plan_res_baseline"
  dplyr::arrange(id) %>%
  dplyr::select(id, group_match,everything()) %>% 
  #display error if there are more than row per user
  purrr::when(dplyr::group_by(.,id) %>% dplyr::count() %>% filter(n>1) %>% nrow()>0 ~ stop("more than one case by row"), 
              ~.) %>% 
  #22,916 x 20
  #Check overlapped dates
  purrr::when(dplyr::mutate(.,diff_bet_treat1= fech_ing_num_2-fech_ing_num_1)%>% dplyr::filter(diff_bet_treat1<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"), 
              ~.) %>%
  purrr::when(dplyr::mutate(.,diff_bet_treat2= fech_ing_num_3-fech_ing_num_2)%>% dplyr::filter(diff_bet_treat2<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"), 
              ~.) %>%
  purrr::when(dplyr::mutate(.,diff_bet_treat3= fech_ing_num_4-fech_ing_num_3)%>% dplyr::filter(diff_bet_treat3<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"), 
              ~.) %>%
    purrr::when(dplyr::mutate(.,diff_bet_treat4= fech_ing_num_5-fech_ing_num_4)%>% dplyr::filter(diff_bet_treat4<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"), 
              ~.) %>%
    dplyr::mutate(Readmission_status=if_else(!is.na(fech_ing_num_2),1,0,0),
                  Readmission2_status=if_else(!is.na(fech_ing_num_3),1,0,0),
                  Readmission3_status=if_else(!is.na(fech_ing_num_4),1,0,0),
                  Readmission4_status=if_else(!is.na(fech_ing_num_5),1,0,0)) %>% 
  
#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:
## 2021-03-24, I had to reespecify times to objective times, in order to avoid further problems
## 2021-05-06, CENSORED TIME IS NOT THE DIFFERENCE BETWEEN THE TIME OF CENSORSIP AND THE TIME OF THE LAST EVENT, IS THE TOTAL DIFFERENCE. THE SUM OF DAYS UNTIL THE FOLLOWUP TIME
  dplyr::mutate( 
  Readmission_time= dplyr::case_when(
        Readmission_status==1~as.numeric(fech_ing_num_2-fech_ing_num_1),
        Readmission_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>% 
  dplyr::mutate( 
  Readmission2_time= dplyr::case_when(
        Readmission2_status==1~as.numeric(fech_ing_num_3-fech_ing_num_1),
        Readmission2_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>% 
  dplyr::mutate( 
  Readmission3_time= dplyr::case_when(
        Readmission3_status==1~as.numeric(fech_ing_num_4-fech_ing_num_1),
        Readmission3_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>% 
  dplyr::mutate( 
  Readmission4_time= dplyr::case_when(
        Readmission4_status==1~as.numeric(fech_ing_num_5-fech_ing_num_1),
        Readmission4_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>% 
    
    ## THE USERS THAT  DID NOT REGISTERED AN EVENT WILL COME UP TO THE FINAL TIME OF THE FOLLOW UP
 dplyr::select(
     id, group_match, 
     tipo_de_plan_res_1,tipo_de_plan_res_2, tipo_de_plan_res_3, tipo_de_plan_res_4, 
     Readmission_time, Readmission_status, Readmission2_time, Readmission2_status, 
     Readmission3_time, Readmission3_status, Readmission4_time, Readmission4_status, 
     dias_treat_imp_sin_na_1, dias_treat_imp_sin_na_2, dias_treat_imp_sin_na_3, dias_treat_imp_sin_na_4,
     TD_1, TD_2, TD_3, TD_4, DWCA_1, DWCA_2, DWCA_3, DWCA_4, duplicates_filtered,fech_ing_num_1) %>%  
  as.data.frame() 


#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:
d_match_surv_msprep %>% 
  dplyr::filter(duplicates_filtered==max(duplicates_filtered)) %>% 
      knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
               caption="Table 8. Data in Wide, Five-states (example of patients with >= 5 admissions)",
               align= c("c",rep('c', 5)))%>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 13)%>% 
  kableExtra::add_footnote("Note= Proportions from the initial state") %>% 
  kableExtra::kable_classic() %>% 
  kableExtra::scroll_box(width = "100%", height = "350px")
Table 8. Data in Wide, Five-states (example of patients with >= 5 admissions)
id group_match tipo_de_plan_res_1 tipo_de_plan_res_2 tipo_de_plan_res_3 tipo_de_plan_res_4 Readmission_time Readmission_status Readmission2_time Readmission2_status Readmission3_time Readmission3_status Readmission4_time Readmission4_status dias_treat_imp_sin_na_1 dias_treat_imp_sin_na_2 dias_treat_imp_sin_na_3 dias_treat_imp_sin_na_4 TD_1 TD_2 TD_3 TD_4 DWCA_1 DWCA_2 DWCA_3 DWCA_4 duplicates_filtered fech_ing_num_1
1,059 882 1 1 1 0 520 1 881 1 1,256 1 1,469 1 252 54 178 59 0 0 0 0 1 1 1 1 8 14,460
1,146 955 1 1 1 1 502 1 1,058 1 2,021 1 2,157 1 18 123 34 11 0 0 0 0 1 1 1 1 8 14,707
2,554 2,059 1 0 1 1 442 1 869 1 1,293 1 1,523 1 335 290 221 2 0 0 0 0 1 1 1 1 8 15,414
7,838 6,392 1 0 1 1 242 1 501 1 875 1 1,418 1 101 121 86 6 0 0 0 0 1 1 1 1 8 15,225
10,771 8,773 1 1 1 1 184 1 420 1 687 1 728 1 70 211 187 38 0 0 0 0 1 1 1 1 8 15,523
a Note= Proportions from the initial state
invisible("No se si debiera transformarlo a años. Tal vez a meses. Si lo transformo, me darán esas extrapolaciones bizarras del artículo anterior")
### diff_bet_treat is the variable that includes time-to-readmission
### AGS: Starts in 0, excepting left truncated cases
### variables should start with time_ & status_
### Transform to years once generated
### Looks that they all share the same objective time
### AGS: If there is a continous state, interval censoring is not necessary 
### 0's are censored status

library(mstate)

d_match_surv_msprep_oct_2022<-
  d_match_surv %>% 
  dplyr::select(id, duplicates_filtered, fech_ing_num,fech_egres_num,dias_treat_imp_sin_na,fech_ing_next_treat,tipo_de_plan_res,motivodeegreso_mod_imp,min_achievement,group_match,dup, dias_treat_imp_sin_na) %>% 
  ## Filter the foruth readmission of registries
  dplyr::filter(dup<=4) %>% 
  dplyr::mutate(tipo_de_plan_res=if_else(tipo_de_plan_res=="1",1,0,0)) %>% 
  dplyr::mutate(TD=if_else(motivodeegreso_mod_imp=="Therapeutic discharge",1,0,0)) %>% 
  dplyr::mutate(DWCA=if_else(motivodeegreso_mod_imp %in% c("Early Drop-out","Late Drop-out","Administrative discharge"),1,0,0)) %>% 
  #dplyr::mutate(tipo_de_plan_res_baseline=tipo_de_plan_res) %>% 
  tidyr::pivot_wider(id_cols=c("id","group_match","duplicates_filtered"), names_from=dup, names_sep="_", values_from=c("fech_ing_num","tipo_de_plan_res","TD","DWCA", "dias_treat_imp_sin_na")) %>% #"","motivodeegreso_mod_imp","min_achievement"
  #,"tipo_de_plan_res_baseline"
  dplyr::arrange(id) %>%
  dplyr::select(id, group_match,everything()) %>% 
  #display error if there are more than row per user
  purrr::when(dplyr::group_by(.,id) %>% dplyr::count() %>% filter(n>1) %>% nrow()>0 ~ stop("more than one case by row"), 
              ~.) %>% 
  #22,916 x 20 # 2022-oct= 22,452
  #OCT_2022 discount a day for treatments to avoid overlapping
  dplyr::mutate(dias_treat_imp_sin_na_1= dplyr::case_when(dias_treat_imp_sin_na_1>=2~ dias_treat_imp_sin_na_1-1,T~dias_treat_imp_sin_na_1)) %>% 
  dplyr::mutate(fech_ing_num_2= dplyr::case_when(dias_treat_imp_sin_na_1==1~ fech_ing_num_2+1,T~fech_ing_num_2)) %>% 
  #Check overlapped dates
    purrr::when(dplyr::mutate(.,diff_bet_treat0= fech_ing_num_2-(dias_treat_imp_sin_na_1 +fech_ing_num_1))%>% dplyr::filter(diff_bet_treat0<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"), 
              ~.) %>%
  purrr::when(dplyr::mutate(.,diff_bet_treat1= fech_ing_num_2-fech_ing_num_1)%>% dplyr::filter(diff_bet_treat1<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"), 
              ~.) %>%
  purrr::when(dplyr::mutate(.,diff_bet_treat2= fech_ing_num_3-fech_ing_num_2)%>% dplyr::filter(diff_bet_treat2<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"), 
              ~.) %>%
  purrr::when(dplyr::mutate(.,diff_bet_treat3= fech_ing_num_4-fech_ing_num_3)%>% dplyr::filter(diff_bet_treat3<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"), 
              ~.) %>%
  
  #OCT 2022
  dplyr::mutate(Readmission_status=if_else(!is.na(fech_ing_num_2)&TD_1==1,1,0,0),
              Readmission2_status=if_else(!is.na(fech_ing_num_3)&TD_1==1,1,0,0),
              Readmission3_status=if_else(!is.na(fech_ing_num_4)&TD_1==1,1,0,0))%>% 
  dplyr::mutate(Readmissionb_status=if_else(!is.na(fech_ing_num_2)&DWCA_1==1,1,0,0),
              Readmission2b_status=if_else(!is.na(fech_ing_num_3)&DWCA_1==1,1,0,0),
              Readmission3b_status=if_else(!is.na(fech_ing_num_4)&DWCA_1==1,1,0,0))%>% 
  
#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:
## 2021-03-24, I had to reespecify times to objective times, in order to avoid further problems
## 2021-05-06, CENSORED TIME IS NOT THE DIFFERENCE BETWEEN THE TIME OF CENSORSIP AND THE TIME OF THE LAST EVENT, IS THE TOTAL DIFFERENCE. THE SUM OF DAYS UNTIL THE FOLLOWUP TIME
  #OCT -2022, added a variable.
  dplyr::mutate( 
  TD_time= dplyr::case_when(
        TD_1==1~as.numeric(dias_treat_imp_sin_na_1),
        TD_1==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>% 
  dplyr::mutate( 
  DWCA_time= dplyr::case_when(
        DWCA_1==1~as.numeric(dias_treat_imp_sin_na_1),
        DWCA_1==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>% 
  #OCT -2022, modified readmission time
  #First step: Readmission time and Readmissionb time
  dplyr::mutate( 
  Readmission_time= dplyr::case_when(
        Readmission_status==1 & TD_1==1~ as.numeric(fech_ing_num_2-fech_ing_num_1),
        Readmission_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
    dplyr::mutate( 
  Readmissionb_time= dplyr::case_when(
        Readmissionb_status==1 & DWCA_1==1~ as.numeric(fech_ing_num_2-fech_ing_num_1),
        Readmissionb_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>% 
  #Second step: Readmission2 time and Readmission2b time
  dplyr::mutate( 
  Readmission2_time= dplyr::case_when(
        Readmission2_status==1 & Readmission_status==1 & TD_1==1~ as.numeric(fech_ing_num_3-fech_ing_num_1),
        Readmission2_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>% 
  dplyr::mutate( 
  Readmission2b_time= dplyr::case_when(
        Readmission2b_status==1 & Readmissionb_status==1 & DWCA_1==1~ as.numeric(fech_ing_num_3-fech_ing_num_1),
        Readmission2b_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
 #Third step: Readmission3 time and Readmission3b time
  dplyr::mutate( 
  Readmission3_time= dplyr::case_when(
        Readmission3_status==1 & Readmission2_status==1 & Readmission_status==1 & TD_1==1~as.numeric(fech_ing_num_4-fech_ing_num_1),
        Readmission3_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
    dplyr::mutate( 
  Readmission3b_time= dplyr::case_when(
        Readmission3b_status==1 & Readmission2b_status==1 & Readmissionb_status==1 & DWCA_1==1~as.numeric(fech_ing_num_4-fech_ing_num_1),
        Readmission3b_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%

  #OCT 2022    
    dplyr::rename("TD_status"="TD_1","DWCA_status"="DWCA_1") %>% 
    ## THE USERS THAT  DID NOT REGISTERED AN EVENT WILL COME UP TO THE FINAL TIME OF THE FOLLOW UP
 dplyr::mutate(TD_time= dplyr::case_when(TD_time==0 &TD_status==1~1,T~TD_time)) %>% 
  dplyr::mutate(DWCA_time= dplyr::case_when(DWCA_time==0 &DWCA_status==1~1,T~DWCA_time)) %>% 
  
 dplyr::select(
     id, group_match, 
     tipo_de_plan_res_1, TD_time, TD_status, DWCA_time, DWCA_status,
     Readmission_time, Readmission_status, Readmissionb_time, Readmissionb_status, Readmission2_time, Readmission2_status, Readmission2b_time, Readmission2b_status, 
     Readmission3_time, Readmission3_status, Readmission3b_time, Readmission3b_status,
     dias_treat_imp_sin_na_1, duplicates_filtered, fech_ing_num_1) %>%  
  dplyr::mutate(cens_time=as.numeric(as.Date("2019-11-13"))-fech_ing_num_1) %>% 
  as.data.frame() 

#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:
### ANALYSES W COMPLETE DATA
d_match_surv_msprep_cc_oct_2022<-
  d_match_surv %>% 
  dplyr::filter(hash_key %in% unlist(hashs_w_complete_data)) %>% 
  dplyr::group_by(group_match) %>% 
  dplyr::mutate(pairs=n_distinct(hash_key)) %>% 
  dplyr::ungroup() %>% 
  dplyr::filter(pairs==2) %>% #janitor::tabyl(pairs)
  dplyr::select(id, duplicates_filtered, fech_ing_num,fech_egres_num,dias_treat_imp_sin_na,fech_ing_next_treat,tipo_de_plan_res,motivodeegreso_mod_imp,min_achievement,group_match,dup, dias_treat_imp_sin_na) %>% 
  ## Filter the fourth readmission of registries
  dplyr::filter(dup<=4) %>% 
  dplyr::mutate(tipo_de_plan_res=if_else(tipo_de_plan_res=="1",1,0,0)) %>% 
  dplyr::mutate(TD=if_else(motivodeegreso_mod_imp=="Therapeutic discharge",1,0,0)) %>% 
  dplyr::mutate(DWCA=if_else(motivodeegreso_mod_imp %in% c("Early Drop-out","Late Drop-out","Administrative discharge"),1,0,0)) %>% 
  #dplyr::mutate(tipo_de_plan_res_baseline=tipo_de_plan_res) %>% 
  tidyr::pivot_wider(id_cols=c("id","group_match","duplicates_filtered"), names_from=dup, names_sep="_", values_from=c("fech_ing_num","tipo_de_plan_res","TD","DWCA", "dias_treat_imp_sin_na")) %>% #"","motivodeegreso_mod_imp","min_achievement"
  #,"tipo_de_plan_res_baseline"
  dplyr::arrange(id) %>%
  dplyr::select(id, group_match,everything()) %>% 
  #display error if there are more than row per user
  purrr::when(dplyr::group_by(.,id) %>% dplyr::count() %>% filter(n>1) %>% nrow()>0 ~ stop("more than one case by row"), 
              ~.) %>% 
  #22,916 x 20 # 2022-oct= 22,452
  #OCT_2022 discount a day for treatments to avoid overlapping
  dplyr::mutate(dias_treat_imp_sin_na_1= dplyr::case_when(dias_treat_imp_sin_na_1>=2~ dias_treat_imp_sin_na_1-1,T~dias_treat_imp_sin_na_1)) %>% 
  dplyr::mutate(fech_ing_num_2= dplyr::case_when(dias_treat_imp_sin_na_1==1~ fech_ing_num_2+1,T~fech_ing_num_2)) %>% 
  #Check overlapped dates
    purrr::when(dplyr::mutate(.,diff_bet_treat0= fech_ing_num_2-(dias_treat_imp_sin_na_1 +fech_ing_num_1))%>% dplyr::filter(diff_bet_treat0<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"), 
              ~.) %>%
  purrr::when(dplyr::mutate(.,diff_bet_treat1= fech_ing_num_2-fech_ing_num_1)%>% dplyr::filter(diff_bet_treat1<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"), 
              ~.) %>%
  purrr::when(dplyr::mutate(.,diff_bet_treat2= fech_ing_num_3-fech_ing_num_2)%>% dplyr::filter(diff_bet_treat2<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"), 
              ~.) %>%
  purrr::when(dplyr::mutate(.,diff_bet_treat3= fech_ing_num_4-fech_ing_num_3)%>% dplyr::filter(diff_bet_treat3<=0)%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"), 
              ~.) %>%
  #OCT 2022
  dplyr::mutate(Readmission_status=if_else(!is.na(fech_ing_num_2)&TD_1==1,1,0,0),
              Readmission2_status=if_else(!is.na(fech_ing_num_3)&TD_1==1,1,0,0),
              Readmission3_status=if_else(!is.na(fech_ing_num_4)&TD_1==1,1,0,0))%>% 
  dplyr::mutate(Readmissionb_status=if_else(!is.na(fech_ing_num_2)&DWCA_1==1,1,0,0),
              Readmission2b_status=if_else(!is.na(fech_ing_num_3)&DWCA_1==1,1,0,0),
              Readmission3b_status=if_else(!is.na(fech_ing_num_4)&DWCA_1==1,1,0,0))%>% 
  
#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:
## 2021-03-24, I had to reespecify times to objective times, in order to avoid further problems
## 2021-05-06, CENSORED TIME IS NOT THE DIFFERENCE BETWEEN THE TIME OF CENSORSIP AND THE TIME OF THE LAST EVENT, IS THE TOTAL DIFFERENCE. THE SUM OF DAYS UNTIL THE FOLLOWUP TIME
  #OCT -2022, added a variable.
  #OCT -2022, added a variable.
  dplyr::mutate( 
  TD_time= dplyr::case_when(
        TD_1==1~as.numeric(dias_treat_imp_sin_na_1),
        TD_1==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>% 
  dplyr::mutate( 
  DWCA_time= dplyr::case_when(
        DWCA_1==1~as.numeric(dias_treat_imp_sin_na_1),
        DWCA_1==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>% 
  #OCT -2022, modified readmission time
  #First step: Readmission time and Readmissionb time
  dplyr::mutate( 
  Readmission_time= dplyr::case_when(
        Readmission_status==1 & TD_1==1~ as.numeric(fech_ing_num_2-fech_ing_num_1),
        Readmission_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
    dplyr::mutate( 
  Readmissionb_time= dplyr::case_when(
        Readmissionb_status==1 & DWCA_1==1~ as.numeric(fech_ing_num_2-fech_ing_num_1),
        Readmissionb_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>% 
  #Second step: Readmission2 time and Readmission2b time
  dplyr::mutate( 
  Readmission2_time= dplyr::case_when(
        Readmission2_status==1 & Readmission_status==1 & TD_1==1~ as.numeric(fech_ing_num_3-fech_ing_num_1),
        Readmission2_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>% 
  dplyr::mutate( 
  Readmission2b_time= dplyr::case_when(
        Readmission2b_status==1 & Readmissionb_status==1 & DWCA_1==1~ as.numeric(fech_ing_num_3-fech_ing_num_1),
        Readmission2b_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
 #Third step: Readmission3 time and Readmission3b time
  dplyr::mutate( 
  Readmission3_time= dplyr::case_when(
        Readmission3_status==1 & Readmission2_status==1 & Readmission_status==1 & TD_1==1~as.numeric(fech_ing_num_4-fech_ing_num_1),
        Readmission3_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%
    dplyr::mutate( 
  Readmission3b_time= dplyr::case_when(
        Readmission3b_status==1 & Readmission2b_status==1 & Readmissionb_status==1 & DWCA_1==1~as.numeric(fech_ing_num_4-fech_ing_num_1),
        Readmission3b_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>%

    #OCT 2022    
    dplyr::rename("TD_status"="TD_1","DWCA_status"="DWCA_1") %>% 
    ## THE USERS THAT  DID NOT REGISTERED AN EVENT WILL COME UP TO THE FINAL TIME OF THE FOLLOW UP
 dplyr::mutate(TD_time= dplyr::case_when(TD_time==0 &TD_status==1~1,T~TD_time)) %>% 
  dplyr::mutate(DWCA_time= dplyr::case_when(DWCA_time==0 &DWCA_status==1~1,T~DWCA_time)) %>% 
  
 dplyr::select(
     id, group_match, 
     tipo_de_plan_res_1, TD_time, TD_status, DWCA_time, DWCA_status,
     Readmission_time, Readmission_status, Readmissionb_time, Readmissionb_status, Readmission2_time, Readmission2_status, Readmission2b_time, Readmission2b_status, 
     Readmission3_time, Readmission3_status, Readmission3b_time, Readmission3b_status,
     dias_treat_imp_sin_na_1, duplicates_filtered, fech_ing_num_1) %>%  
  dplyr::mutate(cens_time=as.numeric(as.Date("2019-11-13"))-fech_ing_num_1) %>% 
  as.data.frame() 


#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:
d_match_surv_msprep_oct_2022 %>% 
  dplyr::filter(duplicates_filtered==max(duplicates_filtered)) %>% 
      knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
               caption="Table 8b. Data in Wide, Five-states (example of patients with >= 4 admissions)",
               align= c("c",rep('c', 5)))%>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 13)%>% 
  kableExtra::add_footnote("Note= Proportions from the initial state") %>% 
  kableExtra::kable_classic() %>% 
  kableExtra::scroll_box(width = "100%", height = "350px")
Table 8b. Data in Wide, Five-states (example of patients with >= 4 admissions)
id group_match tipo_de_plan_res_1 TD_time TD_status DWCA_time DWCA_status Readmission_time Readmission_status Readmissionb_time Readmissionb_status Readmission2_time Readmission2_status Readmission2b_time Readmission2b_status Readmission3_time Readmission3_status Readmission3b_time Readmission3b_status dias_treat_imp_sin_na_1 duplicates_filtered fech_ing_num_1 cens_time
1,059 882 1 3,753 0 251 1 3,753 0 520 1 3,753 0 881 1 3,753 0 1,256 1 251 8 14,460 3,753
1,146 955 1 3,506 0 17 1 3,506 0 502 1 3,506 0 1,058 1 3,506 0 2,021 1 17 8 14,707 3,506
2,554 2,059 1 2,799 0 334 1 2,799 0 442 1 2,799 0 869 1 2,799 0 1,293 1 334 8 15,414 2,799
7,838 6,392 1 2,988 0 100 1 2,988 0 242 1 2,988 0 501 1 2,988 0 875 1 100 8 15,225 2,988
10,771 8,773 1 2,690 0 69 1 2,690 0 184 1 2,690 0 420 1 2,690 0 687 1 69 8 15,523 2,690
a Note= Proportions from the initial state
invisible("No se si debiera transformarlo a años. Tal vez a meses. Si lo transformo, me darán esas extrapolaciones bizarras del artículo anterior")


ms_d_match_surv <- mstate::msprep(time = c(NA, "Readmission_time", "Readmission2_time", "Readmission3_time", "Readmission4_time"), 
                  status = c(NA, "Readmission_status", "Readmission2_status", "Readmission3_status", "Readmission4_status"), 
                                            data = d_match_surv_msprep,
                                            id = "id",
                                            trans = trans_matrix,
                                            keep =  c("fech_ing_num_1", paste0("tipo_de_plan_res_",1:4), paste0("TD_",1:4),paste0("DWCA_",1:4),paste0("dias_treat_imp_sin_na_",1:4)))




ms_d_match_surv_cc <- mstate::msprep(time = c(NA, "Readmission_time", "Readmission2_time", "Readmission3_time", "Readmission4_time"), 
                  status = c(NA, "Readmission_status", "Readmission2_status", "Readmission3_status", "Readmission4_status"), 
                                            data = d_match_surv_msprep_cc,
                                            id = "id",
                                            trans = trans_matrix,
                                            keep =  c("fech_ing_num_1",paste0("tipo_de_plan_res_",1:4), paste0("TD_",1:4),paste0("DWCA_",1:4),paste0("dias_treat_imp_sin_na_",1:4)))
#From starting state 1, subject 66 74 19717 has smallest transition time with status=0
#Everyne has an infinite number in the transition. A good exmple is the user 19717. Only experienced a therapeutic discharge, but in the time from readmission it starts on 910 but ends in INf


#_#_#_#_#_#_#_#_#_
#2022 oct

ms_d_match_surv_oct_2022 <- mstate::msprep(time = c(NA, "TD_time", "DWCA_time", "Readmission_time", "Readmissionb_time", "Readmission2_time", "Readmission2b_time", "Readmission3_time", "Readmission3b_time"), 
                  status = c(NA, "TD_status", "DWCA_status", "Readmission_status", "Readmissionb_status", "Readmission2_status", "Readmission2b_status", "Readmission3_status", "Readmission3b_status"), 
                                            data = d_match_surv_msprep_oct_2022,
                                            id = "id",
                                            trans = trans_matrix2,
                                            keep =  c("fech_ing_num_1", paste0("tipo_de_plan_res_",1)))

invisible("Para ver si hay casos en que la fecha de término del evento, y el inicio coinciden o es menor")
if(d_match_surv_msprep_oct_2022 %>% 
    dplyr::filter(id %in% unlist(
        ms_d_match_surv_oct_2022%>% 
            dplyr::filter(Tstop<=Tstart) %>% 
            dplyr::select(id,from,to,trans,Tstart,Tstop,time,status) %>% 
            distinct(id))) %>%
    #dplyr::mutate(diff_bet_treat=fech_ing_next_treat-fech_egres_num)%>% 
    nrow()>0){stop("There are cases where Tstop-Tstart <=0")}

if(ms_d_match_surv %>% 
    dplyr::filter(is.infinite(time)) %>% nrow()>0){stop("Error: there are infinite cases")}
if(ms_d_match_surv_oct_2022 %>% 
    dplyr::filter(is.infinite(time)) %>% nrow()>0){stop("Error: there are infinite cases")}

ms_d_match_surv_cc_oct_2022 <- mstate::msprep(time = c(NA, "TD_time", "DWCA_time", "Readmission_time", "Readmissionb_time", "Readmission2_time", "Readmission2b_time", "Readmission3_time", "Readmission3b_time"), 
                  status = c(NA, "TD_status", "DWCA_status", "Readmission_status", "Readmissionb_status", "Readmission2_status", "Readmission2b_status", "Readmission3_status", "Readmission3b_status"), 
                                            data = d_match_surv_msprep_cc_oct_2022,
                                            id = "id",
                                            trans = trans_matrix2,
                                            keep =  c("fech_ing_num_1",paste0("tipo_de_plan_res_",1)))


invisible("Para ver si hay casos en que la fecha de término del evento, y el inicio coinciden o es menor")
if(d_match_surv_msprep_cc_oct_2022 %>% 
    dplyr::filter(id %in% unlist(
        ms_d_match_surv_cc_oct_2022%>% 
            dplyr::filter(Tstop<=Tstart) %>% 
            dplyr::select(id,from,to,trans,Tstart,Tstop,time,status) %>% 
            distinct(id))) %>%
    #dplyr::mutate(diff_bet_treat=fech_ing_next_treat-fech_egres_num)%>% 
    nrow()>0){stop("There are cases where Tstop-Tstart <=0")}

if(ms_d_match_surv_cc %>% 
    dplyr::filter(is.infinite(time)) %>% nrow()>0){stop("Error: there are infinite cases")}
if(ms_d_match_surv_cc_oct_2022 %>% 
    dplyr::filter(is.infinite(time)) %>% nrow()>0){stop("Error: there are infinite cases")}
rio::export(
d_match_surv_msprep %>% 
      dplyr::select(
      id, group_match,Readmission_status, Readmission2_status, Readmission3_status, Readmission4_status,
      Readmission_time, Readmission2_time, Readmission3_time, Readmission4_time,
      tipo_de_plan_res_1,tipo_de_plan_res_2, tipo_de_plan_res_3, tipo_de_plan_res_4,
      dias_treat_imp_sin_na_1, dias_treat_imp_sin_na_2, dias_treat_imp_sin_na_3, dias_treat_imp_sin_na_4,
      TD_1, TD_2, TD_3, TD_4, DWCA_1, DWCA_2, DWCA_3, DWCA_4,fech_ing_num_1) %>% 
  dplyr::mutate(cens_time=as.numeric(as.Date("2019-11-13"))-fech_ing_num_1), 
paste0(sub("/SUD_CL","",getwd()),"/_mult_state_ags/five_st_msprep_apr22.dta"))

rio::export(
d_match_surv_msprep %>% 
  rename_with(~ c("group.match","Readmission.status", "Readmission2.status", "Readmission3.status", "Readmission4.status",
      "Readmission.time", "Readmission2.time", "Readmission3.time", "Readmission4.time"), c("group_match", "Readmission_status", "Readmission2_status", "Readmission3_status", "Readmission4_status",
      "Readmission_time", "Readmission2_time", "Readmission3_time","Readmission4_time")) %>% 
      dplyr::select(
      id, group.match,Readmission.status, Readmission2.status, Readmission3.status, Readmission4.status,
      Readmission.time, Readmission2.time, Readmission3.time, Readmission4.time,
      tipo_de_plan_res_1,tipo_de_plan_res_2, tipo_de_plan_res_3, tipo_de_plan_res_4,
      dias_treat_imp_sin_na_1, dias_treat_imp_sin_na_2, dias_treat_imp_sin_na_3, dias_treat_imp_sin_na_4,
      TD_1, TD_2, TD_3, TD_4, DWCA_1, DWCA_2, DWCA_3, DWCA_4,fech_ing_num_1
      )%>% 
  dplyr::mutate(cens_time=as.numeric(as.Date("2019-11-13"))-fech_ing_num_1), 
paste0(sub("/SUD_CL","",getwd()),"/_mult_state_ags/five_st_msprep_apr22.csv"))
##cc
rio::export(
d_match_surv_msprep_cc %>% 
      dplyr::select(
      id, group_match,Readmission_status, Readmission2_status, Readmission3_status, Readmission4_status,
      Readmission_time, Readmission2_time, Readmission3_time, Readmission4_time,
      tipo_de_plan_res_1,tipo_de_plan_res_2, tipo_de_plan_res_3, tipo_de_plan_res_4,
      dias_treat_imp_sin_na_1, dias_treat_imp_sin_na_2, dias_treat_imp_sin_na_3, dias_treat_imp_sin_na_4,
      TD_1, TD_2, TD_3, TD_4, DWCA_1, DWCA_2, DWCA_3, DWCA_4,fech_ing_num_1)%>% 
  dplyr::mutate(cens_time=as.numeric(as.Date("2019-11-13"))-fech_ing_num_1), 
paste0(sub("/SUD_CL","",getwd()),"/_mult_state_ags/five_st_msprep_apr22_cc.dta"))

rio::export(
d_match_surv_msprep_cc %>% 
  rename_with(~ c("group.match","Readmission.status", "Readmission2.status", "Readmission3.status", "Readmission4.status",
      "Readmission.time", "Readmission2.time", "Readmission3.time", "Readmission4.time"), c("group_match", "Readmission_status", "Readmission2_status", "Readmission3_status", "Readmission4_status",
      "Readmission_time", "Readmission2_time", "Readmission3_time","Readmission4_time")) %>% 
      dplyr::select(
      id, group.match,Readmission.status, Readmission2.status, Readmission3.status, Readmission4.status,
      Readmission.time, Readmission2.time, Readmission3.time, Readmission4.time,
      tipo_de_plan_res_1,tipo_de_plan_res_2, tipo_de_plan_res_3, tipo_de_plan_res_4,
      dias_treat_imp_sin_na_1, dias_treat_imp_sin_na_2, dias_treat_imp_sin_na_3, dias_treat_imp_sin_na_4,
      TD_1, TD_2, TD_3, TD_4, DWCA_1, DWCA_2, DWCA_3, DWCA_4,fech_ing_num_1
      )%>% 
  dplyr::mutate(cens_time=as.numeric(as.Date("2019-11-13"))-fech_ing_num_1), 
paste0(sub("/SUD_CL","",getwd()),"/_mult_state_ags/five_st_msprep_apr22_cc.csv"))

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
##OCT 2022
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
rio::export(
d_match_surv_msprep_oct_2022, 
paste0(sub("/SUD_CL","",getwd()),"/_mult_state_ags/nine_st_msprep_oct22.dta"))

rio::export(
d_match_surv_msprep_oct_2022 %>% 
  select_all(~gsub("\\_", ".", tolower(.))), 
paste0(sub("/SUD_CL","",getwd()),"/_mult_state_ags/nine_st_msprep_oct22.csv"))

rio::export(
d_match_surv_msprep_cc_oct_2022, 
paste0(sub("/SUD_CL","",getwd()),"/_mult_state_ags/nine_st_msprep_oct22_cc.dta"))

rio::export(
d_match_surv_msprep_cc_oct_2022 %>%
  select_all(~gsub("\\_", ".", tolower(.))), paste0(sub("/SUD_CL","",getwd()),"/_mult_state_ags/nine_st_msprep_oct22_cc.csv"))


tab9_f<-
data.frame(mstate::events(ms_d_match_surv)$Frequencies) %>% 
    dplyr::filter(to!="total entering") %>% 
    left_join(data.frame(events(ms_d_match_surv)$Proportions), by=c("from", "to")) %>% 
    dplyr::rename("Frequencies"="Freq.x", "Proportions"="Freq.y") %>% 
    dplyr::arrange(from, to) %>% 
    dplyr::mutate(diff=ifelse(as.character(from)!=as.character(to),0,1)) %>% 
    dplyr::filter(diff==0) %>%
    dplyr::select(-diff) %>% 
    dplyr::mutate(comb=paste0(from,"_",to)) %>% 
    dplyr::filter(comb %in% c("Admission_Readmission", "Readmission_Readmission2","Readmission2_Readmission3","Readmission3_Readmission4","Readmission4_Readmission5")) %>% 
    dplyr::select(-comb) %>% 
    dplyr::mutate(Proportions=scales::percent(Proportions))

tab9_f %>% 
  dplyr::left_join(data.frame(mstate::events(ms_d_match_surv)$Frequencies) %>% 
    dplyr::filter(to=="total entering") %>% dplyr::select(from,Freq),by="from") %>% 
  dplyr::select(from, to, Frequencies, Freq, Proportions)%>% 
  dplyr::rename("Total"="Freq") %>% 
    knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
               caption="Table 9b. Empirical State Transition Matrix, Recurrent Events Model",
               align= c("c",rep('c', 5)))%>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11)%>% 
  kableExtra::add_footnote("Note. No event describes cases that remained in the state. Percentage depicts the proportion of the state of origin.") %>% 
  kableExtra::kable_classic() %>% 
  kableExtra::scroll_box(width = "100%", height = "350px")
Table 9b. Empirical State Transition Matrix, Recurrent Events Model
from to Frequencies Total Proportions
Admission Readmission 6,370 22,452 28.37%
Readmission Readmission2 1,991 6,370 31.26%
Readmission2 Readmission3 635 1,991 31.89%
Readmission3 Readmission4 200 635 31.50%
a Note. No event describes cases that remained in the state. Percentage depicts the proportion of the state of origin.


tab9_fb<-
data.frame(mstate::events(ms_d_match_surv_oct_2022)$Frequencies) %>% 
    dplyr::filter(to!="total entering") %>% 
    left_join(data.frame(events(ms_d_match_surv_oct_2022)$Proportions), by=c("from", "to")) %>% 
    dplyr::rename("Frequencies"="Freq.x", "Proportions"="Freq.y") %>% 
    dplyr::arrange(from, to) %>% 
    dplyr::mutate(diff=ifelse(as.character(from)!=as.character(to),0,1)) %>% 
    dplyr::filter(diff==0) %>%
    dplyr::select(-diff) %>% 
    dplyr::mutate(comb=paste0(from,"_",to)) %>% 
    dplyr::filter(Frequencies>0) %>% 
    dplyr::select(-comb) %>% 
    dplyr::mutate(Proportions=scales::percent(Proportions))

tab9_fb %>% 
  dplyr::left_join(data.frame(mstate::events(ms_d_match_surv_oct_2022)$Frequencies) %>% 
    dplyr::filter(to=="total entering") %>% dplyr::select(from,Freq),by="from") %>% 
  dplyr::select(from, to, Frequencies, Freq, Proportions)%>% 
  dplyr::rename("Total"="Freq") %>% 
    knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
               caption="Table 9b. Empirical State Transition Matrix, Recurrent Events Model (9 states)",
               align= c("c",rep('c', 5)))%>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11)%>% 
  kableExtra::add_footnote("Note. No event describes cases that remained in the state. Percentage depicts the proportion of the state of origin.") %>% 
  kableExtra::kable_classic() %>% 
  kableExtra::scroll_box(width = "100%", height = "350px")
Table 9b. Empirical State Transition Matrix, Recurrent Events Model (9 states)
from to Frequencies Total Proportions
Admission TD 5,356 22,452 23.855%
Admission DWCA 13,067 22,452 58.200%
Admission no event 4,029 22,452 17.945%
TD Readmission 1,518 5,356 28.342%
TD no event 3,838 5,356 71.658%
DWCA Readmissionb 4,046 13,067 30.963%
DWCA no event 9,021 13,067 69.037%
Readmission Readmission2 447 1,518 29.447%
Readmission no event 1,071 1,518 70.553%
Readmissionb Readmission2b 1,305 4,046 32.254%
Readmissionb no event 2,741 4,046 67.746%
Readmission2 Readmission3 127 447 28.412%
Readmission2 no event 320 447 71.588%
Readmission2b Readmission3b 447 1,305 34.253%
Readmission2b no event 858 1,305 65.747%
Readmission3 no event 127 127 100.000%
Readmission3b no event 447 447 100.000%
a Note. No event describes cases that remained in the state. Percentage depicts the proportion of the state of origin.


ms_d_match_surv_res<-
  #El arrival y el número al lado del arrival repreenta el número de la transición
  #mstate::expand.covs(ms_d_match_surv, "arrival", append = TRUE, longnames =F) %>% 
  ms_d_match_surv %>% 
  data.frame() %>%
  dplyr::mutate(tipo_de_plan_res=dplyr::case_when(tipo_de_plan_res_1==1 & trans==1~1,
                                                  tipo_de_plan_res_2==1 & trans==2~1,
                                                  tipo_de_plan_res_3==1 & trans==3~1,
                                                  tipo_de_plan_res_4==1 & trans==4~1,
                                                  T~0)) %>% 
    dplyr::mutate(TD=dplyr::case_when(TD_1==1 & trans==1~1,
                                                  TD_2==1 & trans==2~1,
                                                  TD_3==1 & trans==3~1,
                                                  TD_4==1 & trans==4~1,
                                                  T~0)) %>% 
    dplyr::mutate(TD=dplyr::case_when(TD_1==1 & trans==1~1,
                                                  TD_2==1 & trans==2~1,
                                                  TD_3==1 & trans==3~1,
                                                  TD_4==1 & trans==4~1,
                                                  T~0)) %>% 
    dplyr::mutate(days_treated=dplyr::case_when(!is.na(dias_treat_imp_sin_na_1) & trans==1~dias_treat_imp_sin_na_1,
                                                !is.na(dias_treat_imp_sin_na_2) & trans==2~dias_treat_imp_sin_na_2,
                                                !is.na(dias_treat_imp_sin_na_3) & trans==3~dias_treat_imp_sin_na_3,
                                                !is.na(dias_treat_imp_sin_na_4) & trans==4~dias_treat_imp_sin_na_4,
                                                  T~NA_real_)) 

ms_d_match_surv$tipo_de_plan_res<-ms_d_match_surv_res$tipo_de_plan_res
ms_d_match_surv$TD<-ms_d_match_surv_res$TD
ms_d_match_surv$days_treated<-ms_d_match_surv_res$days_treated
ms_d_match_surv_oct_2022$days_treated<-ms_d_match_surv_oct_2022$Tstart


Consideration of the Appropriateness of the proportional hazards assumption

Continuous variables need to be categorized into groups. The plot described is also known as the log(−log(survival)) plot, as the cumulative hazard is equal to the negative logarithm of the survival proportion. This approach requires a subjective assessment (Bradburn, Clark, Love, et al., 2003).

#Bradburn, M., Clark, T., Love, S. et al. Survival Analysis Part III: Multivariate data analysis – choosing a model and assessing its adequacy and fit. Br J Cancer 89, 605–611 (2003). https://doi.org/10.1038/sj.bjc.6601120
plots<- data.frame(title=rep(
  c("Admission to\nReadmission", "Readmission to\nSecond Readmission", "Second to\nThird Readmission", "Third to\nFourth Readmission" ),1),trans=rep(1:max(trans_matrix,na.rm=T),1))


## SIN COVARIABLES
layout(matrix(1:4, nc = 2, byrow = F))
for(i in c(1:max(trans_matrix,na.rm=T))){
plot(log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==0))$time), 
     log(-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==0))$surv)), type="l",
     xlab="log(Days)", ylab="", xaxs="i",yaxs="i",
     las=1,cex.lab=.8, cex.axis=.8)
lines(log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==1))$time), 
      log(-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==1))$surv)), lty=2)
legend(6,-4, c("AMBULATORY", "RESIDENTIAL"), bty="n", lty=c(2,1), cex=.8)
title(main=paste0(plots[i,"title"]), cex.main=1.2)
}
Figure 18a. LOG CUMULATIVE HAZARD VS LOG TIME PLOT (w/o covars)

Figure 18a. LOG CUMULATIVE HAZARD VS LOG TIME PLOT (w/o covars)

if(no_mostrar==1){
jpeg(paste0(gsub("SUD_CL/Matching_Process_APR_22.Rmd","",path),"_mult_state_ags/ph_test1.jpg"), height=7, width= 13, res= 320, units = "in")
  
layout(matrix(1:4, nc = 2, byrow = F))
for(i in c(1:max(trans_matrix,na.rm=T))){
plot(log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==0))$time), 
     log(-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==0))$surv)), type="l",
     xlab="log(Days)", ylab="", xaxs="i",yaxs="i",
     las=1,cex.lab=.8, cex.axis=.8)
lines(log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==1))$time), 
      log(-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==1))$surv)), lty=2)
legend(6,-4, c("AMBULATORY", "RESIDENTIAL"), bty="n", lty=c(2,1), cex=.8)
title(main=paste0(plots[i,"title"]), cex.main=1.2)
}

dev.off()
}
plots2<- data.frame(title=rep(
  c("Admission to\nTr. Completion", "Admission to\nTr. Non-completion", "Completion to \nReadmission", "Non-completion to \nReadmission", "Readmission to\nSecond Readmission (TC)", "Readmission to\nSecond Readmission (TNC)", "Second to\nThird Readmission (TC)", "Second to\nThird Readmission (TNC)" ),1),trans=rep(1:max(trans_matrix,na.rm=T),1))

## SIN COVARIABLES
  
layout(matrix(1:8, nc = 2, byrow = F))
for(i in c(1:max(trans_matrix2,na.rm=T))){
plot(log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==0))$time), 
     log(-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==0))$surv)), type="l",
     xlab="log(Days)", ylab="", xaxs="i",yaxs="i",
     las=1,cex.lab=.8, cex.axis=.8)
lines(log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==1))$time), 
      log(-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==1))$surv)), lty=2)
legend(6,-2, c("AMBULATORY", "RESIDENTIAL"), bty="n", lty=c(2,1), cex=.8)
title(main=paste0(plots2[i,"title"]), cex.main=1.2)
}
Figure 18b. LOG CUMULATIVE HAZARD VS LOG TIME PLOT (w/o covars) (9 states)

Figure 18b. LOG CUMULATIVE HAZARD VS LOG TIME PLOT (w/o covars) (9 states)

if(no_mostrar==1){
jpeg(paste0(gsub("SUD_CL/Matching_Process_APR_22.Rmd","",path),"/_mult_state_ags/ph_test1_9s.jpg"), height=7, width= 13, res= 320, units = "in")
  
layout(matrix(1:8, nc = 2, byrow = F))
for(i in c(1:max(trans_matrix2,na.rm=T))){
plot(log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==0))$time), 
     log(-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==0))$surv)), type="l",
     xlab="log(Days)", ylab="", xaxs="i",yaxs="i",
     las=1,cex.lab=.8, cex.axis=.8)
lines(log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==1))$time), 
      log(-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==1))$surv)), lty=2)
legend(6,-2, c("AMBULATORY", "RESIDENTIAL"), bty="n", lty=c(2,1), cex=.8)
title(main=paste0(plots2[i,"title"]), cex.main=1.2)
}

dev.off()
}
layout(matrix(1:4, nc = 2, byrow = F))

for(i in c(1:max(trans_matrix,na.rm=T))){
plot(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==0))$time, 
     -log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==0))$surv), type="l",
     xlab="Days", ylab="", xaxs="i",yaxs="i", 
     las=1,cex.lab=.8, cex.axis=.8, col=1)
lines(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==1))$time, 
      -log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==1))$surv), lty=2)
legend(2000,.2, c("AMBULATORY", "RESIDENTIAL"), bty="n", lty=c(2,1), cex=.8)
title(main=paste0(plots[i,"title"]), cex.main=1.2)
}
Figure 18c. CUMULATIVE HAZARD PLOT: -LOG(KM SURVIVAL) (w/o covars)

Figure 18c. CUMULATIVE HAZARD PLOT: -LOG(KM SURVIVAL) (w/o covars)

if(no_mostrar==1){
jpeg(paste0(gsub("SUD_CL/Matching_Process1_APR_22.Rmd","",path),"/_mult_state_ags/ph_test2.jpg"), height=7, width= 13, res= 320, units = "in")
  
layout(matrix(1:4, nc = 2, byrow = F))

for(i in c(1:max(trans_matrix,na.rm=T))){
plot(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==0))$time, 
     -log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==0))$surv), type="l",
     xlab="Days", ylab="", xaxs="i",yaxs="i", 
     las=1,cex.lab=.8, cex.axis=.8, col=1)
lines(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==1))$time, 
      -log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==1))$surv), lty=2)
legend(2000,.2, c("AMBULATORY", "RESIDENTIAL"), bty="n", lty=c(2,1), cex=.8)
title(main=paste0(plots[i,"title"]), cex.main=1.2)
}

dev.off()
}
layout(matrix(1:8, nc = 2, byrow = F))
for(i in c(1:max(trans_matrix2,na.rm=T))){
plot(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==0))$time, 
     -log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==0))$surv), type="l",
     xlab="Days", ylab="", xaxs="i",yaxs="i", 
     las=1,cex.lab=.8, cex.axis=.8, col=1)
lines(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==1))$time, 
      -log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==1))$surv), lty=2)
legend(2000,.2, c("AMBULATORY", "RESIDENTIAL"), bty="n", lty=c(2,1), cex=.8)
title(main=paste0(plots2[i,"title"]), cex.main=1.2)
}
Figure 18d. CUMULATIVE HAZARD PLOT: -LOG(KM SURVIVAL) (w/o covars) (9 states)

Figure 18d. CUMULATIVE HAZARD PLOT: -LOG(KM SURVIVAL) (w/o covars) (9 states)

if(no_mostrar==1){
jpeg(paste0(gsub("SUD_CL/Matching_Process_APR_22.Rmd","",path),"/_mult_state_ags/ph_test2_9s.jpg"), height=13, width= 7, res= 740, units = "in")
  
layout(matrix(1:8, nc = 2, byrow = F))

for(i in c(1:max(trans_matrix2,na.rm=T))){
plot(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==0))$time, 
     -log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==0))$surv), type="l",
     xlab="Days", ylab="", xaxs="i",yaxs="i", 
     las=1,cex.lab=.8, cex.axis=.8, col=1)
lines(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==1))$time, 
      -log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv_oct_2022, trans==plots2[i,"trans"] & tipo_de_plan_res_1==1))$surv), lty=2)
legend(2000,.2, c("AMBULATORY", "RESIDENTIAL"), bty="n", lty=c(2,1), cex=.8)
title(main=paste0(plots2[i,"title"]), cex.main=1.2)
}

dev.off()
}
cox_zph_trans<-data.frame()#rn="0",chisq=1,df=1,p=1
for(i in c(1:max(trans_matrix,na.rm=T))){

cox_zph_trans<-dplyr::bind_rows(cox_zph_trans,cbind.data.frame(trans=i,
                                data.table::data.table(cox.zph(coxph(Surv(Tstart,Tstop,status)~factor(tipo_de_plan_res_1)+Tstart,
              data=subset(ms_d_match_surv_res, trans==i),method = "breslow"))$table, keep.rownames = T))
)
}

cox_zph_trans$p<-round(cox_zph_trans$p,3)
cox_zph_trans$chisq<-round(cox_zph_trans$chisq,3)

cox_zph_trans_cont<-data.frame()
for(i in c(1:max(trans_matrix,na.rm=T))){
cox_zph_trans_cont<-dplyr::bind_rows(cox_zph_trans_cont,cbind.data.frame(trans=i,
  data.table::data.table(cox.zph(coxph(Surv(Tstart,Tstop,status)~factor(tipo_de_plan_res_1)+ factor(TD_1)+ factor(TD_2)+ factor(TD_3)+ factor(TD_4)+Tstart,data=subset(ms_d_match_surv_res, trans==i),method = "breslow"))$table, keep.rownames = T)))
}
cox_zph_trans_cont$p<-round(cox_zph_trans_cont$p,3)


cox_zph_trans_cont %>% 
    knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
               caption="Table 10a. Schoenfeld's global test to test the proportional hazards assumption",
               align= c("c",rep('c', 5)))%>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 12)%>% 
  kableExtra::kable_classic() %>% 
  kableExtra::group_rows("$1^{st}$ transition",1,6, escape=T) %>% 
  kableExtra::group_rows("$2^{nd}$ transition",7,13, escape=T) %>% 
  kableExtra::group_rows("$3^{rd}$ transition",14,20, escape=T) %>% 
  kableExtra::group_rows("$4^{th}$ transition",21,27, escape=T) %>% 
  kableExtra::scroll_box(width = "100%", height = "350px")
Table 10a. Schoenfeld’s global test to test the proportional hazards assumption
trans rn chisq df p
\(1^{st}\) transition
1 factor(tipo_de_plan_res_1) 0.0976935 1 0.755
1 factor(TD_1) 26.9107013 1 0.000
1 factor(TD_2) 0.0009311 1 0.976
1 factor(TD_3) 0.7297580 1 0.393
1 factor(TD_4) 0.2136621 1 0.644
1 GLOBAL 29.3191134 5 0.000
\(2^{nd}\) transition
2 factor(tipo_de_plan_res_1) 0.7068110 1 0.401
2 factor(TD_1) 2.7227294 1 0.099
2 factor(TD_2) 6.9103154 1 0.009
2 factor(TD_3) 0.6363786 1 0.425
2 factor(TD_4) 0.3301789 1 0.566
2 Tstart 20.1078288 1 0.000
2 GLOBAL 29.1031821 6 0.000
\(3^{rd}\) transition
3 factor(tipo_de_plan_res_1) 0.0009246 1 0.976
3 factor(TD_1) 0.2123946 1 0.645
3 factor(TD_2) 0.0027750 1 0.958
3 factor(TD_3) 2.5872079 1 0.108
3 factor(TD_4) 0.4839282 1 0.487
3 Tstart 4.0671924 1 0.044
3 GLOBAL 7.1687374 6 0.306
\(4^{th}\) transition
4 factor(tipo_de_plan_res_1) 0.0292211 1 0.864
4 factor(TD_1) 0.8459203 1 0.358
4 factor(TD_2) 0.0041408 1 0.949
4 factor(TD_3) 2.1742646 1 0.140
4 factor(TD_4) 12.0116147 1 0.001
4 Tstart 0.8503446 1 0.356
4 GLOBAL 13.9760848 6 0.030

As seen in both Figures above, the cumulative hazards does not follow a proportional trend in the four transitions.

cox_zph_trans2<-data.frame()#rn="0",chisq=1,df=1,p=1
for(i in c(1:max(trans_matrix2,na.rm=T))){

cox_zph_trans2<-dplyr::bind_rows(cox_zph_trans2,cbind.data.frame(trans=i,data.table::data.table(cox.zph(coxph(Surv(Tstart,Tstop,status)~factor(tipo_de_plan_res_1)+Tstart, data=subset(ms_d_match_surv_oct_2022, trans==i),method = "breslow"))$table, keep.rownames = T))
)
}

cox_zph_trans2$p<-round(cox_zph_trans2$p,3)
cox_zph_trans2$chisq<-round(cox_zph_trans2$chisq,3)

cox_zph_trans_cont2<-data.frame()
for(i in c(1:max(trans_matrix2,na.rm=T))){
cox_zph_trans_cont2<-dplyr::bind_rows(cox_zph_trans_cont2,cbind.data.frame(trans=i,
  data.table::data.table(cox.zph(coxph(Surv(Tstart,Tstop,status)~factor(tipo_de_plan_res_1)+Tstart,data=subset(ms_d_match_surv_oct_2022, trans==i),method = "breslow"))$table, keep.rownames = T)))
}
cox_zph_trans_cont2$p<-round(cox_zph_trans_cont2$p,3)


cox_zph_trans_cont2 %>% 
    knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
               caption="Table 10b. Schoenfeld's global test to test the proportional hazards assumption",
               align= c("c",rep('c', 5)))%>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 12)%>% 
  kableExtra::kable_classic() %>% 
  kableExtra::group_rows("$1^{st}$ transition",1,2, escape=T) %>% 
  kableExtra::group_rows("$2^{nd}$ transition",3,4, escape=T) %>% 
  kableExtra::group_rows("$3^{rd}$ transition",5,7, escape=T) %>% 
  kableExtra::group_rows("$4^{th}$ transition",8,10, escape=T) %>%
  kableExtra::group_rows("$5^{th}$ transition",11,13, escape=T) %>% 
  kableExtra::group_rows("$6^{th}$ transition",14,16, escape=T) %>% 
  kableExtra::group_rows("$7^{th}$ transition",17,19, escape=T) %>%
  kableExtra::group_rows("$8^{th}$ transition",20,22, escape=T) %>% 
  kableExtra::scroll_box(width = "100%", height = "350px")
Table 10b. Schoenfeld’s global test to test the proportional hazards assumption
trans rn chisq df p
\(1^{st}\) transition
1 factor(tipo_de_plan_res_1) 189.1263234 1 0.000
1 GLOBAL 189.1263234 1 0.000
\(2^{nd}\) transition
2 factor(tipo_de_plan_res_1) 294.5367633 1 0.000
2 GLOBAL 294.5367633 1 0.000
\(3^{rd}\) transition
3 factor(tipo_de_plan_res_1) 3.1697499 1 0.075
3 Tstart 33.5573442 1 0.000
3 GLOBAL 42.0270616 2 0.000
\(4^{th}\) transition
4 factor(tipo_de_plan_res_1) 33.4769197 1 0.000
4 Tstart 146.2360490 1 0.000
4 GLOBAL 196.0367419 2 0.000
\(5^{th}\) transition
5 factor(tipo_de_plan_res_1) 0.2108743 1 0.646
5 Tstart 1.0882952 1 0.297
5 GLOBAL 1.3876374 2 0.500
\(6^{th}\) transition
6 factor(tipo_de_plan_res_1) 0.0429944 1 0.836
6 Tstart 4.9996251 1 0.025
6 GLOBAL 5.0006658 2 0.082
\(7^{th}\) transition
7 factor(tipo_de_plan_res_1) 0.5451631 1 0.460
7 Tstart 0.2881151 1 0.591
7 GLOBAL 0.8216015 2 0.663
\(8^{th}\) transition
8 factor(tipo_de_plan_res_1) 1.4744209 1 0.225
8 Tstart 4.7583466 1 0.029
8 GLOBAL 5.9273888 2 0.052


Session Info

Sys.getenv("R_LIBS_USER")
## [1] "C:/Users/CISS Fondecyt/OneDrive/Documentos/R/win-library/4.0"
rstudioapi::getSourceEditorContext()
## Document Context: 
## - id:        '99E6795C'
## - path:      'C:/Users/CISS Fondecyt/Mi unidad/Alvacast/SISTRAT 2019 (github)/SUD_CL/Matching_Process1_APR_22.Rmd'
## - contents:  <5072 rows>
## Document Selection:
## - [4569, 1] -- [4569, 25]: 'ms_d_match_surv_oct_2022 <...>'
if (grepl("CISS Fondecyt",rstudioapi::getSourceEditorContext()$path)==T){
    save.image("C:/Users/CISS Fondecyt/Mi unidad/Alvacast/SISTRAT 2019 (github)/mult_state_1_apr22.RData")
  } else if (grepl("andre",rstudioapi::getSourceEditorContext()$path)==T){
    save.image("C:/Users/andre/Desktop/SUD_CL/mult_state_1_apr22.RData")
  } else if (grepl("E:",rstudioapi::getSourceEditorContext()$path)==T){
    save.image("E:/Mi unidad/Alvacast/SISTRAT 2019 (github)/mult_state_1_apr22.RData")
  } else if (grepl("G:",rstudioapi::getSourceEditorContext()$path)==T){
    save.image("G:/Mi unidad/Alvacast/SISTRAT 2019 (github)/mult_state_1_apr22.RData")
  } else {
    save.image("~/mult_state_1_apr22.RData")
    path.expand("~/mult_state_1_apr22.RData")
  }

sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19042)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=Spanish_Chile.1252  LC_CTYPE=Spanish_Chile.1252   
## [3] LC_MONETARY=Spanish_Chile.1252 LC_NUMERIC=C                  
## [5] LC_TIME=Spanish_Chile.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] mstate_0.3.1            Epi_2.44                DiagrammeR_1.0.6.1.9000
##  [4] gurobi_9.1-2            ggh4x_0.2.1             promises_1.2.0.1       
##  [7] Amelia_1.8.0            Rcpp_1.0.7              polycor_0.7-10         
## [10] compareGroups_4.5.1     etm_1.1.1               flexsurv_2.1           
## [13] eha_2.9.0               cobalt_4.3.1            sensitivityfull_1.5.6  
## [16] sensitivity2x2xk_1.01   MatchIt_4.2.0           tableone_0.13.0        
## [19] stargazer_5.2.2         reshape2_1.4.4          exactRankTests_0.8-32  
## [22] gridExtra_2.3           foreign_0.8-81          glpkAPI_1.3.2          
## [25] designmatch_0.3.1       Rglpk_0.6-4             slam_0.1-48            
## [28] MASS_7.3-51.6           survMisc_0.5.5          ggfortify_0.4.12       
## [31] rateratio.test_1.0-2    survminer_0.4.9         ggpubr_0.4.0           
## [34] epiR_2.0.33             forcats_0.5.1           purrr_0.3.4            
## [37] readr_2.0.1             tibble_3.0.3            tidyverse_1.3.1        
## [40] treemapify_2.5.5        ggiraph_0.7.10          chilemapas_0.2         
## [43] sf_1.0-2                finalfit_1.0.3          lsmeans_2.30-0         
## [46] emmeans_1.6.3           choroplethrAdmin1_1.1.1 choroplethrMaps_1.0.1  
## [49] RColorBrewer_1.1-2      panelr_0.7.5            lme4_1.1-27.1          
## [52] Matrix_1.2-18           dplyr_1.0.7             data.table_1.14.2      
## [55] codebook_0.9.2          devtools_2.4.2          usethis_2.0.1          
## [58] sqldf_0.4-11            RSQLite_2.2.8           gsubfn_0.7             
## [61] proto_1.0.0             broom_0.7.9             zoo_1.8-9              
## [64] rbokeh_0.5.2            janitor_2.1.0           plotly_4.9.4.1         
## [67] kableExtra_1.3.4        Hmisc_4.5-0             Formula_1.2-4          
## [70] survival_3.1-12         lattice_0.20-41         ggplot2_3.3.5          
## [73] stringr_1.4.0           stringi_1.4.6           tidyr_1.1.3            
## [76] knitr_1.33              matrixStats_0.60.0      boot_1.3-28            
## 
## loaded via a namespace (and not attached):
##   [1] svglite_2.0.0          class_7.3-19           ps_1.6.0              
##   [4] rprojroot_2.0.2        crayon_1.4.1           V8_3.4.2              
##   [7] nlme_3.1-148           backports_1.1.7        ggcorrplot_0.1.3      
##  [10] reprex_2.0.1           rlang_0.4.11           readxl_1.3.1          
##  [13] nloptr_1.2.2.2         callr_3.7.0            flextable_0.6.7       
##  [16] xgboost_1.4.1.1        data.tree_1.0.0        cmprsk_2.2-10         
##  [19] bit64_4.0.5            glue_1.4.1             parallel_4.0.2        
##  [22] processx_3.5.2         shinyAce_0.4.1         classInt_0.4-3        
##  [25] randomizr_0.20.0       tcltk_4.0.2            haven_2.4.3           
##  [28] tidyselect_1.1.1       km.ci_0.5-2            rio_0.5.27            
##  [31] chron_2.3-56           xtable_1.8-4           magrittr_2.0.1        
##  [34] evaluate_0.14          gdtools_0.2.3          cli_3.0.1             
##  [37] rstudioapi_0.13        bslib_0.2.5.1          radiant.model_1.4.0   
##  [40] rpart_4.1-15           jtools_2.1.3           maps_3.3.0            
##  [43] shiny_1.6.0            gistr_0.9.0            xfun_0.25             
##  [46] pkgbuild_1.2.0         cluster_2.1.2          ggfittext_0.9.1       
##  [49] ggrepel_0.8.2          png_0.1-7              withr_2.4.2           
##  [52] ranger_0.13.1          plyr_1.8.6             cellranger_1.1.0      
##  [55] e1071_1.7-8            survey_4.1-1           coda_0.19-4           
##  [58] pillar_1.7.0           cachem_1.0.6           multcomp_1.4-17       
##  [61] fs_1.5.0               vctrs_0.3.8            ellipsis_0.3.2        
##  [64] generics_0.1.0         tools_4.0.2            munsell_0.5.0         
##  [67] proxy_0.4-26           fastmap_1.1.0          compiler_4.0.2        
##  [70] pkgload_1.2.1          abind_1.4-5            httpuv_1.6.2          
##  [73] sessioninfo_1.1.1      biostat3_0.1.5         visNetwork_2.0.9      
##  [76] utf8_1.1.4             later_1.3.0            NeuralNetTools_1.5.2  
##  [79] jsonlite_1.7.2         scales_1.1.1           carData_3.0-4         
##  [82] estimability_1.3       lazyeval_0.2.2         radiant.data_1.3.12   
##  [85] car_3.0-11             latticeExtra_0.6-29    checkmate_2.0.0       
##  [88] rmarkdown_2.11         openxlsx_4.2.4         sandwich_3.0-1        
##  [91] webshot_0.5.2          pander_0.6.3           import_1.2.0          
##  [94] numDeriv_2016.8-1.1    yaml_2.2.1             systemfonts_1.0.2     
##  [97] htmltools_0.5.1.1      memoise_2.0.0          quadprog_1.5-8        
## [100] viridisLite_0.4.0      jsonvalidate_1.1.0     digest_0.6.25         
## [103] assertthat_0.2.1       mime_0.11              BiasedUrn_1.07        
## [106] KMsurv_0.1-5           units_0.7-2            remotes_2.4.0         
## [109] blob_1.2.2             labeling_0.4.2         deSolve_1.28          
## [112] splines_4.0.2          pdp_0.7.0              hms_1.1.0             
## [115] rmapshaper_0.4.5       modelr_0.1.8           colorspace_1.4-1      
## [118] base64enc_0.1-3        mnormt_2.0.2           tmvnsim_1.0-2         
## [121] nnet_7.3-16            sass_0.4.0             mvtnorm_1.1-2         
## [124] fansi_0.4.1            tzdb_0.1.2             truncnorm_1.0-8       
## [127] R6_2.5.1               grid_4.0.2             crul_1.1.0            
## [130] lifecycle_1.0.1        labelled_2.8.0         zip_2.2.0             
## [133] writexl_1.3            curl_4.3               geojsonlint_0.4.0     
## [136] ggsignif_0.6.3         pryr_0.1.5             minqa_1.2.4           
## [139] testthat_3.0.4         jquerylib_0.1.4        snakecase_0.11.0      
## [142] desc_1.3.0             TH.data_1.0-10         htmlwidgets_1.5.3.9000
## [145] officer_0.3.19         markdown_1.1           crosstalk_1.1.1       
## [148] mgcv_1.8-31            rvest_1.0.1            htmlTable_2.2.1       
## [151] patchwork_1.1.1        codetools_0.2-16       lubridate_1.7.10      
## [154] muhaz_1.2.6.4          prettyunits_1.1.1      psych_2.1.6           
## [157] dbplyr_2.1.1           gtable_0.3.0           DBI_1.1.1             
## [160] highr_0.9              httr_1.4.2             KernSmooth_2.23-17    
## [163] farver_2.0.3           uuid_0.1-4             hexbin_1.28.2         
## [166] mice_3.13.0            xml2_1.3.2             shinyFiles_0.9.0      
## [169] bit_4.0.4              jpeg_0.1-9             pkgconfig_2.0.3       
## [172] rstatix_0.7.0          mitools_2.4            HardyWeinberg_1.7.2   
## [175] Rsolnp_1.16            httpcode_0.3.0
sesion_info <- devtools::session_info()
dplyr::select(
  tibble::as_tibble(sesion_info$packages),
  c(package, loadedversion, source)
) %>% 
  DT::datatable(filter = 'top', colnames = c('Row number' =1,'Variable' = 2, 'Percentage'= 3),
              caption = htmltools::tags$caption(
        style = 'caption-side: top; text-align: left;',
        '', htmltools::em('Packages')),
      options=list(
initComplete = htmlwidgets::JS(
      "function(settings, json) {",
      "$(this.api().tables().body()).css({'font-size': '80%'});",
      "}")))