Transition matrix
The model schematic illustrates treatment progression and the possible transitions between the states and the model structure. The first represents a semi-competing risks model, and the second represent a competing risk structure.
library(igraph)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
## 3 ESTADOS SIMPLES ##
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
links<-data.frame(stringsAsFactors=FALSE,
from = c(rep("Admitted to\nTreatment", 2),"Therapeutic\nDischarge"),
to = c(rep(c("Therapeutic\nDischarge"),1), rep("Readmission", 2)),
value = c("1","2","3"))
links2<-data.frame(stringsAsFactors=FALSE,
from = c(rep("Admitted to\nTreatment", 3),"Therapeutic\nDischarge","Discharge w/o\nClinical Advice"),
to = c(rep(c("Therapeutic\nDischarge","Discharge w/o\nClinical Advice"),1), rep("Readmission", 3)),
value = c("1","2","3","4","5"))
#dev.off()
#https://www.r-graph-gallery.com/248-igraph-plotting-parameters.html
#https://rstudio-pubs-static.s3.amazonaws.com/341807_7b9d1a4e787146d492115f90ad75cd2d.html
par(mfrow=c(1, 2))
#for (i in c(2560:2660)){
set.seed(2630) #i #2660 #2646 #2642 #2630 #2650
co <- layout.fruchterman.reingold(graph_from_data_frame(links, directed=TRUE))
plot(graph_from_data_frame(links, directed=TRUE),
asp = 0,
layout= co,
edge.label=links$value,
edge.label.cex=3,
edge.label.color="black",
#vertex.label= rev(),
vertex.color="white",
vertex.size=120,
vertex.size2=25,
vertex.label.cex=1,
edge.arrow.size=1,
edge.color="black",
vertex.shape="rectangle",
vertex.label.color="black",
edge.curved=0,
edge.width=1.5,
#main=paste0(i),
outputorder="edgesfirst",
vertex.label.dist=0,
vertex.cex = 3)
#}
title("a) Three-states Model (Simplest)", sub = "No recurring states; Absorving state: Readmission;\nOther causes of discharge were not events of interest;\nModified version of an illness-death model") ## internal titles
set.seed(10990)#i #10990 10921 10898 10835
co2 <- layout.fruchterman.reingold(graph_from_data_frame(links2, directed=TRUE))
plot(graph_from_data_frame(links2, directed=TRUE),
asp = 0,
layout= co2,
edge.label=links2$value,
edge.label.cex=2.5,
edge.label.color="black",
#vertex.label= rev(),
vertex.color="white",
vertex.size=120,
vertex.size2=25,
vertex.label.cex=1,
edge.arrow.size=1,
edge.color="black",
vertex.shape="rectangle",
vertex.label.color="black",
edge.curved=0,
edge.width=1.5,
outputorder="edgesfirst",
#main=paste0(i),
vertex.label.dist=0,
vertex.cex = 3)
title("b) Four-states Model", sub = "No recurring states; Absorving state: Readmission;\nOther causes of discharge were not events of interest") ## internal titles

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
## Probando con paquetes estadísticos
if(no_mostrar==1){
library(igraph)
Nodes <- c("Admitted to\nGP","Admitted to\nWE","Therapeutic\nDischarge","Discharge w/o\nClinical Advice","Readmission") #states possible in MSM
Edges <- list("Admitted to\nGP"=list(edges=c("Therapeutic\nDischarge","Discharge w/o\nClinical Advice")),
"Admitted to\nWE"=list(edges=c("Therapeutic\nDischarge","Discharge w/o\nClinical Advice")),
"Therapeutic\nDischarge"=list(edges=c("Readmission")),
"Discharge /wo\nClinical Advice"=list(edges=c("Readmission")),
"Readmission"=list(edges=NULL)) #transitions from each state
RCLTtree <- new("graphNEL",nodes=Nodes,edgeL=Edges,edgemode="directed")
plot(RCLTtree)
#https://www.rdocumentation.org/packages/msSurv/versions/1.2-2/topics/msSurv
msSurv(LTRCdata, RCLTtree, cens.type="ind", LT=FALSE, bs=FALSE, B=200)
}
library(Epi)
#For censored state transitions it can be awkward having to replicate the censoring time for each non-visited state
#https://github.com/stulacy/multistateutils
mat_3_states <- trans.illdeath(names=c("Admission","Therapeutic\nDischarge","Readmission"))
#All possible paths through the multi-state model can be found here:
#paths(mat_3_states)
box_ms_3s<-
boxes.Lexis(mat_3_states, wmult=1.25, hmult=1.25, cex=1.2,
boxpos = list(x = c(20, 70, 70), y = c(80, 80, 20)),
txt.arr=c(expression(" 1) " *lambda['01']),
expression(" 2) " *lambda['02']),
expression(" 3) " *lambda['12'])))
title("a) Three-states Model (Simplest)", sub = "No recurring states; Absorving state: Readmission;\nOther causes of discharge were not events of interest;\nModified version of an illness-death model") ## internal titles

#Los números son determinados por posición en cada columna (o eje Y).
#Si uno quiere definir para la otra fila, salta al siguiente vector:
mat_4_states<- transMat(list(c(2,3,4), 4, 4, c()),
names = c("Admission", "Therapeutic\nDischarge", "Discharge Without\nMedical Advice", "Readmission"))
box_ms_4s<-
boxes.Lexis(mat_4_states, wmult=1.25, hmult=1.5, boxpos = list(x = c(20, 20, 70, 70), y = c(80, 20, 80, 20)), cex=1.2,
txt.arr=c(
expression("1)" *lambda['01']),
expression("2)" *lambda['02']),
expression("3)" *lambda['03']),
expression("4)" *lambda['13']),
expression("5)" *lambda['23'])))
title("b) Four-states Model", sub = "No recurring states; Absorving state: Readmission;\nOther causes of discharge were not events of interest") ## internal titles

Any observation where an event occurs which is not the event of interest for a specific transition is treated as a censored at the end of the study (2019, November 13) observation (only referrals and also had inexact dates of discharge), that is, in the same way as a patient that was lost to follow-up. If there is a readmission posterior to loss-to follow-up cases, these cases we obtained the length in days between being readmitted posterior and the time of admission, knowing that any intermediate date of discharge was interval-censored (the exact time in which users discharged is unknown, and its treatment outcomes are unknown, so there is no exact time-to-readmission). Covariates are fixed at baseline. Some transitions were shown to be simultaneous (n= 132). Small adjustment such that transitions were sequential rather than simultaneous by adding one day to the absorbing event, and subtracting a day if the transition corresponded to an intermediate state.
The stacked format of the data allows to calculate hazards very simply by each transition defined earlier (trans
).
#Data should be in a data frame with column names "id", "stop", "st.stage", and "stage" where "id" is the individual's identification number, "stop" is the transition time from state j to j', "st.stage" is the state the individual is transitioning from (i.e., j), and "stage" is the state the individual is transitioning to (i.e., j') and equals 0 if right censored.
## 80% of sample is LT, rest has start time of 0
### AGS: Parten en 0, salvo que estén truncados a la izquierda.
### Parece que todos comparten un mismo tiempo ojetivo.
### AGS: Cuando hay un estado seguido no es necesario interval censoring, se dn en tun tiempo continuo
### El 0 es censura
library(mstate)
#### MSPREP= 3 STATES
#_#_#_#_#_#_#_
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep<-
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados %>%
# 2021-03-24: Eliminate cases with readmissions posterior to interval-censored discharges
#dplyr::mutate(cens=ifelse(motivodeegreso_mod_imp=="Referral to another treatment"& !is.na(fech_ing_next_treat),1,0)) %>% dplyr::filter(cens==0) %>%
dplyr::mutate(ther_disch=ifelse(motivodeegreso_mod_imp=="Therapeutic discharge",1,0)) %>%
# para problema para intervalos 0 entre un tratamiento nuevo y otro y entre el ingreso a un tratamiento y su término y que comento más abajo
dplyr::mutate(cambio_fecha_ing= dplyr::case_when(dias_treat_imp_sin_na==0~1 ,T~0)) %>%
dplyr::mutate(fech_ing_num= dplyr::case_when(cambio_fecha_ing==1~fech_ing_num-1,T~fech_ing_num)) %>%
dplyr::mutate(dias_treat_imp_sin_na= dplyr::case_when(cambio_fecha_ing==1~fech_egres_num-fech_ing_num, T~dias_treat_imp_sin_na)) %>%
dplyr::select(-cambio_fecha_ing) %>%
#If status=1, the corresponding transition has been observed. Si no no se ha observado
# para efectos de este modelo simple, experimentar el alta terapéutica es el objetivo, por lo que el resto será censura
dplyr::mutate(readmission=ifelse(!is.na(fech_ing_next_treat),1,0)) %>%
#time of arrival at the state
dplyr::mutate(diff_bet_treat=fech_ing_next_treat-fech_egres_num) %>%
# para problema para intervalos 0 entre un tratamiento nuevo y otro y entre el ingreso a un tratamiento y su término y que comento más abajo. Le añado un día
dplyr::mutate(cambio_fecha_ing_nuevo_t= dplyr::case_when(diff_bet_treat==0~1 ,T~0)) %>%
dplyr::mutate(fech_ing_next_treat= dplyr::case_when(cambio_fecha_ing_nuevo_t==1~fech_ing_next_treat+1,T~fech_ing_next_treat)) %>%
dplyr::mutate(diff_bet_treat= dplyr::case_when(cambio_fecha_ing_nuevo_t==1~fech_ing_next_treat-fech_egres_num, T~diff_bet_treat)) %>%
dplyr::select(-cambio_fecha_ing_nuevo_t) %>%
#dplyr::filter(diff_bet_treat_corr!=diff_bet_treat)
#ADD DATES TO THE FINAL FOLLOW UP OF THE STUDY IF CENSORED
dplyr::mutate(dias_treat_imp_sin_na= dplyr::case_when(ther_disch==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num,T~dias_treat_imp_sin_na)) %>%
## 2021-03-24, I had to reespecify times to objective times, in order to avoid further problems
dplyr::mutate(date_ther_disch= dias_treat_imp_sin_na, #fech_ing_num+
## 2021-03-24, The posterior treatment has to include the days in the previous treatment
date_post_treat= dplyr::case_when(ther_disch==1 & readmission==1~ dias_treat_imp_sin_na+ diff_bet_treat,
#if the first time is censored,
ther_disch==0 & readmission==1~ fech_ing_next_treat-fech_ing_num,
readmission==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num)) %>%
## EL RESTO DE LOS PACIENTES NO VAN A HABER REGISTRADO EVENTOS EN ESE TIEMPO, POR LO QUE LLEGARÁN AL FINAL DEL SEGUIMIENTO
dplyr::select(row, fech_ing_num, date_ther_disch, ther_disch, date_post_treat, readmission, tipo_de_programa_2:numero_de_hijos_mod_rec,tipo_de_plan_res) %>%
as.data.frame()
#### MSPREP= 4 STATES
#_#_#_#_#_#_#_
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep2<-
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados %>%
# 2021-03-24: Eliminate cases with readmissions posterior to interval-censored discharges
#dplyr::mutate(cens=ifelse(motivodeegreso_mod_imp=="Referral to another treatment"& !is.na(fech_ing_next_treat),1,0)) %>% dplyr::filter(cens==0) %>%
dplyr::mutate(ther_disch=ifelse(motivodeegreso_mod_imp=="Therapeutic discharge",1,0)) %>%
# For problems with 0 intervals between a new treatment and other between the admission and discharge, I subtract 1 day of admission. Then, and if there is a change in the date, we replace it. If not, we maintain the original value
dplyr::mutate(cambio_fecha_ing= dplyr::case_when(dias_treat_imp_sin_na==0~1 ,T~0)) %>%
dplyr::mutate(fech_ing_num= dplyr::case_when(cambio_fecha_ing==1~fech_ing_num-1,T~fech_ing_num)) %>%
dplyr::mutate(dias_treat_imp_sin_na= dplyr::case_when(cambio_fecha_ing==1~fech_egres_num-fech_ing_num, T~dias_treat_imp_sin_na)) %>%
dplyr::select(-cambio_fecha_ing) %>%
#If status=1, the corresponding transition has been observed. If not, it assumes not observed
dplyr::mutate(disch_wo_clin_adv=ifelse(motivodeegreso_mod_imp %in% c("Early Drop-out","Late Drop-out","Administrative discharge"),1,0)) %>%
dplyr::mutate(readmission=ifelse(!is.na(fech_ing_next_treat),1,0)) %>%
#time of arrival at the state of readmission
dplyr::mutate(diff_bet_treat=fech_ing_next_treat-fech_egres_num) %>%
# For problems with 0 intervals between a new treatment and other between the admission and discharge, I add 1 day of admission to the next treatment. Then, and if there is a change in the date, we replace it. If not, we maintain the original value
dplyr::mutate(cambio_fecha_ing_nuevo_t= dplyr::case_when(diff_bet_treat==0~1 ,T~0)) %>%
dplyr::mutate(fech_ing_next_treat= dplyr::case_when(cambio_fecha_ing_nuevo_t==1~fech_ing_next_treat+1,T~fech_ing_next_treat)) %>%
dplyr::mutate(diff_bet_treat= dplyr::case_when(cambio_fecha_ing_nuevo_t==1~fech_ing_next_treat-fech_egres_num, T~diff_bet_treat)) %>%
dplyr::select(-cambio_fecha_ing_nuevo_t) %>%
#dplyr::filter(diff_bet_treat_corr!=diff_bet_treat)
#ADD DATES TO THE FINAL FOLLOW UP OF THE STUDY IF CENSORED
dplyr::mutate(days_to_ther_disch= dplyr::case_when(ther_disch==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num,T~dias_treat_imp_sin_na)) %>%
dplyr::mutate(days_to_disch_wo_clin_adv= dplyr::case_when(disch_wo_clin_adv==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num,T~dias_treat_imp_sin_na)) %>%
## 2021-03-24, I had to reespecify times to objective times, in order to avoid further problems
dplyr::mutate(date_ther_disch= days_to_ther_disch, #fech_ing_num+
date_disch_wo_clin_adv= days_to_disch_wo_clin_adv,
## 2021-03-24, The posterior treatment has to include the days in the previous treatment
date_post_treat= dplyr::case_when(ther_disch==1 & readmission==1~ days_to_ther_disch+ diff_bet_treat,
disch_wo_clin_adv==1 & readmission==1~ days_to_disch_wo_clin_adv+ diff_bet_treat,
#if the first time is censored,
disch_wo_clin_adv==0 & ther_disch==0 & readmission==1~ fech_ing_next_treat-fech_ing_num,
ther_disch==1 & disch_wo_clin_adv==0 & readmission==1~ fech_ing_next_treat-fech_ing_num,
readmission==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num)) %>%
## EL RESTO DE LOS PACIENTES NO VAN A HABER REGISTRADO EVENTOS EN ESE TIEMPO, POR LO QUE LLEGARÁN AL FINAL DEL SEGUIMIENTO
dplyr::select(row, fech_ing_num, date_ther_disch, ther_disch, date_disch_wo_clin_adv, disch_wo_clin_adv, date_post_treat, readmission, tipo_de_programa_2:numero_de_hijos_mod_rec,tipo_de_plan_res) %>%
as.data.frame()
tail(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep) %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 8a. Data in Wide, 3-states",
align= c("c",rep('c', 5)))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11)%>%
kableExtra::add_footnote("Note= Proportions from the initial state") %>%
kableExtra::scroll_box(width = "100%", height = "350px")
Table 8a. Data in Wide, 3-states
|
row
|
fech_ing_num
|
date_ther_disch
|
ther_disch
|
date_post_treat
|
readmission
|
tipo_de_programa_2
|
estado_conyugal_2
|
edad_al_ing_grupos
|
escolaridad_rec
|
sus_principal_mod
|
freq_cons_sus_prin
|
compromiso_biopsicosocial
|
tenencia_de_la_vivienda_mod
|
num_otras_sus_mod
|
numero_de_hijos_mod_rec
|
tipo_de_plan_res
|
21373
|
18,172
|
15,187
|
3,026
|
0
|
1,761
|
1
|
General population
|
Single
|
30-39
|
3-Completed primary school or less
|
Cocaine paste
|
Daily
|
2-Moderate
|
Stays temporarily with a relative
|
One additional substance
|
Yes
|
Outpatient
|
21374
|
41,467
|
15,873
|
2,340
|
0
|
275
|
1
|
Women specific
|
Married/Shared living arrangements
|
18-29
|
3-Completed primary school or less
|
Cocaine paste
|
Daily
|
2-Moderate
|
Illegal Settlement
|
More than one additional substance
|
Yes
|
Residential
|
21375
|
16,343
|
15,114
|
3,099
|
0
|
2,244
|
1
|
Women specific
|
Single
|
18-29
|
2-Completed high school or less
|
Alcohol
|
2 to 3 days a week
|
3-Severe
|
Renting
|
More than one additional substance
|
Yes
|
Residential
|
21376
|
139,357
|
17,688
|
525
|
0
|
525
|
0
|
Women specific
|
Single
|
18-29
|
2-Completed high school or less
|
Cocaine paste
|
Daily
|
3-Severe
|
Stays temporarily with a relative
|
More than one additional substance
|
No
|
Residential
|
21377
|
24,900
|
15,345
|
349
|
1
|
2,868
|
0
|
Women specific
|
Single
|
18-29
|
1-More than high school
|
Alcohol
|
Daily
|
2-Moderate
|
Owner/Transferred dwellings/Pays Dividends
|
One additional substance
|
Yes
|
Outpatient
|
21378
|
41,118
|
15,874
|
2,339
|
0
|
2,339
|
0
|
Women specific
|
Married/Shared living arrangements
|
30-39
|
2-Completed high school or less
|
Cocaine paste
|
Daily
|
3-Severe
|
Stays temporarily with a relative
|
No additional substance
|
Yes
|
Residential
|
a Note= Proportions from the initial state
|
tail(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep2) %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 8b. Data in Wide, 4-states",
align= c("c",rep('c', 5)))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11)%>%
kableExtra::add_footnote("Note= Proportions from the initial state") %>%
kableExtra::scroll_box(width = "100%", height = "350px")
Table 8b. Data in Wide, 4-states
|
row
|
fech_ing_num
|
date_ther_disch
|
ther_disch
|
date_disch_wo_clin_adv
|
disch_wo_clin_adv
|
date_post_treat
|
readmission
|
tipo_de_programa_2
|
estado_conyugal_2
|
edad_al_ing_grupos
|
escolaridad_rec
|
sus_principal_mod
|
freq_cons_sus_prin
|
compromiso_biopsicosocial
|
tenencia_de_la_vivienda_mod
|
num_otras_sus_mod
|
numero_de_hijos_mod_rec
|
tipo_de_plan_res
|
21373
|
18,172
|
15,187
|
3,026
|
0
|
304
|
1
|
1,761
|
1
|
General population
|
Single
|
30-39
|
3-Completed primary school or less
|
Cocaine paste
|
Daily
|
2-Moderate
|
Stays temporarily with a relative
|
One additional substance
|
Yes
|
Outpatient
|
21374
|
41,467
|
15,873
|
2,340
|
0
|
2,340
|
0
|
275
|
1
|
Women specific
|
Married/Shared living arrangements
|
18-29
|
3-Completed primary school or less
|
Cocaine paste
|
Daily
|
2-Moderate
|
Illegal Settlement
|
More than one additional substance
|
Yes
|
Residential
|
21375
|
16,343
|
15,114
|
3,099
|
0
|
115
|
1
|
2,244
|
1
|
Women specific
|
Single
|
18-29
|
2-Completed high school or less
|
Alcohol
|
2 to 3 days a week
|
3-Severe
|
Renting
|
More than one additional substance
|
Yes
|
Residential
|
21376
|
139,357
|
17,688
|
525
|
0
|
56
|
1
|
525
|
0
|
Women specific
|
Single
|
18-29
|
2-Completed high school or less
|
Cocaine paste
|
Daily
|
3-Severe
|
Stays temporarily with a relative
|
More than one additional substance
|
No
|
Residential
|
21377
|
24,900
|
15,345
|
349
|
1
|
2,868
|
0
|
2,868
|
0
|
Women specific
|
Single
|
18-29
|
1-More than high school
|
Alcohol
|
Daily
|
2-Moderate
|
Owner/Transferred dwellings/Pays Dividends
|
One additional substance
|
Yes
|
Outpatient
|
21378
|
41,118
|
15,874
|
2,339
|
0
|
38
|
1
|
2,339
|
0
|
Women specific
|
Married/Shared living arrangements
|
30-39
|
2-Completed high school or less
|
Cocaine paste
|
Daily
|
3-Severe
|
Stays temporarily with a relative
|
No additional substance
|
Yes
|
Residential
|
a Note= Proportions from the initial state
|
#For censored state transitions it can be awkward having to replicate the censoring time for each non-visited state
#https://github.com/stulacy/multistateutils
mat_3_states <- trans.illdeath(names=c("Admission","Therapeutic\nDischarge","Readmission"))
#All possible paths through the multi-state model can be found here:
#paths(mat_3_states)
#Los números son determinados por posición en cada columna (o eje Y).
#Si uno quiere definir para la otra fila, salta al siguiente vector:
mat_4_states<- transMat(list(c(2,3,4), 4, 4, c()),
names = c("Admission", "Therapeutic\nDischarge", "Discharge Without\nMedical Advice", "Readmission"))
#illness-death model without recovery
ms_CONS_C1_SEP_2020_women_imputed <- msprep(time = c(NA, "date_ther_disch", "date_post_treat"),
status = c(NA, "ther_disch", "readmission"),
data = CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep,
id = "row",
trans = mat_3_states,
keep = c("tipo_de_programa_2","estado_conyugal_2","edad_al_ing_grupos","escolaridad_rec","sus_principal_mod", "freq_cons_sus_prin","compromiso_biopsicosocial","tenencia_de_la_vivienda_mod","num_otras_sus_mod","numero_de_hijos_mod_rec","tipo_de_plan_res"))
#model of 5 states without recovery.
ms2_CONS_C1_SEP_2020_women_imputed <- msprep(time = c(NA, "date_ther_disch",
"date_disch_wo_clin_adv", "date_post_treat"),
status = c(NA, "ther_disch", "disch_wo_clin_adv", "readmission"),
data = CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep2,
id = "row",
trans = mat_4_states,
keep = c("tipo_de_programa_2","estado_conyugal_2","edad_al_ing_grupos","escolaridad_rec","sus_principal_mod", "freq_cons_sus_prin","compromiso_biopsicosocial","tenencia_de_la_vivienda_mod","num_otras_sus_mod","numero_de_hijos_mod_rec","tipo_de_plan_res"))
#Starting from state 1, simultaneous transitions possible for subjects 36666 36586 56465 136847 37595 60609 51706 76376 117544 140210 at times 126 472 32 36 1 203 45 14 5 71; smallest receiving state chosen
invisible(c("This problem responds to differences between treatments 0. Should be resolved in the initial stages"))
if(no_mostrar==1){
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep2 %>%
dplyr::filter(row %in% unlist(
ms2_CONS_C1_SEP_2020_women_imputed %>%
dplyr::filter(Tstop<=Tstart) %>%
dplyr::select(row,from,to,trans,Tstart,Tstop,time,status) %>%
distinct(row)
)) %>%
#dplyr::mutate(diff_bet_treat=fech_ing_next_treat-fech_egres_num)%>%
View()
}
if(no_mostrar==1){
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados %>%
dplyr::filter(row %in% unlist(
ms2_CONS_C1_SEP_2020_women_imputed %>%
dplyr::filter(Tstop<=Tstart) %>%
dplyr::select(row,from,to,trans,Tstart,Tstop,time,status) %>%
distinct(row)
)) %>%
dplyr::select(row, motivodeegreso_mod_imp, contains("fech"))
}
invisible(c("Investigate warning: done"))
#37595 Administrative discharge 2013-03-18 2013-03-19 2013-03-19
#
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#SCALE INTERVALS TO YEARS
ms_CONS_C1_SEP_2020_women_imputed[, c("Tstart", "Tstop", "time")] <- ms_CONS_C1_SEP_2020_women_imputed[, c("Tstart", "Tstop", "time")]/365.25
ms2_CONS_C1_SEP_2020_women_imputed[, c("Tstart", "Tstop", "time")] <- ms2_CONS_C1_SEP_2020_women_imputed[, c("Tstart", "Tstop", "time")]/365.25
path<-rstudioapi::getSourceEditorContext()$path
if (grepl("CISS Fondecyt",path)==T){
dta_path<-paste0("C:/Users/CISS Fondecyt/OneDrive/Escritorio/SUD_CL/")
} else if (grepl("andre",path)==T){
dta_path<-paste0('C:/Users/andre/Desktop/SUD_CL/')
} else if (grepl("E:",path)==T){
dta_path<-paste0("E:/Mi unidad/Alvacast/SISTRAT 2019 (github)/_WO vs MG/")
} else {
dta_path<-paste0("G:/Mi unidad/Alvacast/SISTRAT 2019 (github)/_WO vs MG/")
}
rio::export(
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep %>%
dplyr::rename("id"="row", "ther_disch_time"="date_ther_disch","ther_disch_status"="ther_disch",
"readmission_time"="date_post_treat","readmission_status"="readmission"),
paste0(dta_path,"three_st_msprep.dta"))
rio::export(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep2%>%
dplyr::rename("id"="row", "ther_disch_time"="date_ther_disch","ther_disch_status"="ther_disch",
"disch_wo_clin_adv_time"="date_disch_wo_clin_adv",
"disch_wo_clin_adv_status"="disch_wo_clin_adv",
"readmission_time"="date_post_treat","readmission_status"="readmission"),
paste0(dta_path,"four_st_msprep.dta"))
Then we present the transition with the frequencies and proportions.
data.frame(events(ms_CONS_C1_SEP_2020_women_imputed)$Frequencies) %>%
dplyr::filter(to!="total entering") %>%
left_join(data.frame(events(ms_CONS_C1_SEP_2020_women_imputed)$Proportions), by=c("from", "to")) %>%
dplyr::rename("Frequencies"="Freq.x", "Proportions"="Freq.y") %>%
dplyr::arrange(from, to) %>%
dplyr::mutate(diff=ifelse(as.character(from)!=as.character(to),0,1)) %>%
dplyr::filter(diff==0) %>%
dplyr::select(-diff) %>%
dplyr::mutate(Proportions=scales::percent(Proportions)) %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 9a. Empirical State Transition Matrix, 3 States Model",
align= c("c",rep('c', 5)))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11)%>%
kableExtra::add_footnote("Note= Proportions from the initial state") %>%
kableExtra::scroll_box(width = "100%", height = "350px")
Table 9a. Empirical State Transition Matrix, 3 States Model
from
|
to
|
Frequencies
|
Proportions
|
Admission
|
Therapeutic Discharge
|
5,114
|
23.92%
|
Admission
|
Readmission
|
4,348
|
20.34%
|
Admission
|
no event
|
11,916
|
55.74%
|
Therapeutic Discharge
|
Admission
|
0
|
0.00%
|
Therapeutic Discharge
|
Readmission
|
1,063
|
20.79%
|
Therapeutic Discharge
|
no event
|
4,051
|
79.21%
|
Readmission
|
Admission
|
0
|
0.00%
|
Readmission
|
Therapeutic Discharge
|
0
|
0.00%
|
Readmission
|
no event
|
5,411
|
100.00%
|
a Note= Proportions from the initial state
|
data.frame(events(ms2_CONS_C1_SEP_2020_women_imputed)$Frequencies) %>%
dplyr::filter(to!="total entering") %>%
left_join(data.frame(events(ms2_CONS_C1_SEP_2020_women_imputed)$Proportions), by=c("from", "to")) %>%
dplyr::rename("Frequencies"="Freq.x", "Proportions"="Freq.y") %>%
dplyr::arrange(from, to) %>%
dplyr::mutate(diff=ifelse(as.character(from)!=as.character(to),0,1)) %>%
dplyr::filter(diff==0) %>%
dplyr::select(-diff) %>%
dplyr::mutate(Proportions=scales::percent(Proportions)) %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 9b. Empirical State Transition Matrix, 4 States Model",
align= c("c",rep('c', 5)))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11)%>%
kableExtra::add_footnote("Note= Proportions from the initial state")%>%
kableExtra::scroll_box(width = "100%", height = "350px")
Table 9b. Empirical State Transition Matrix, 4 States Model
from
|
to
|
Frequencies
|
Proportions
|
Admission
|
Therapeutic Discharge
|
5,114
|
23.9%
|
Admission
|
Discharge Without Medical Advice
|
11,995
|
56.1%
|
Admission
|
Readmission
|
753
|
3.5%
|
Admission
|
no event
|
3,516
|
16.4%
|
Therapeutic Discharge
|
Admission
|
0
|
0.0%
|
Therapeutic Discharge
|
Discharge Without Medical Advice
|
0
|
0.0%
|
Therapeutic Discharge
|
Readmission
|
1,063
|
20.8%
|
Therapeutic Discharge
|
no event
|
4,051
|
79.2%
|
Discharge Without Medical Advice
|
Admission
|
0
|
0.0%
|
Discharge Without Medical Advice
|
Therapeutic Discharge
|
0
|
0.0%
|
Discharge Without Medical Advice
|
Readmission
|
3,595
|
30.0%
|
Discharge Without Medical Advice
|
no event
|
8,400
|
70.0%
|
Readmission
|
Admission
|
0
|
0.0%
|
Readmission
|
Therapeutic Discharge
|
0
|
0.0%
|
Readmission
|
Discharge Without Medical Advice
|
0
|
0.0%
|
Readmission
|
no event
|
5,411
|
100.0%
|
a Note= Proportions from the initial state
|
Decision whether to use Markov or Semi-Markov
For the three-states model, we only looked at transition 3 as only transition where different times entering the state. Interested in knowing the risk of readmission in completion of a treatment w/therapeutic discharge (status) from different treatments and different time to progression.
In the four-states model, we looked at transition 4 and 5.
#state arrival extended (semi-)Markov to mean that the i → j transition hazard depends on thetime of arrival at state i.
#Build a Cox proportional hazard model including treatment and time in previous state as covariates
CoxMarkov<-coxph(Surv(Tstart,Tstop,status)~factor(tipo_de_programa_2)+Tstart,
data=subset(ms_CONS_C1_SEP_2020_women_imputed, trans==3),method = "breslow")
## Warning in coxph(Surv(Tstart, Tstop, status) ~ factor(tipo_de_programa_2) + : a
## variable appears on both the left and right sides of the formula
HR<-round(exp(coef(CoxMarkov)),2)
CI<-round(exp(confint(CoxMarkov)),2)
P<-round(coef(summary(CoxMarkov))[,5],4)
CoxMarkov12<-coxph(Surv(time,status)~factor(tipo_de_programa_2)+Tstart,
data=subset(ms_CONS_C1_SEP_2020_women_imputed, trans==3),method = "breslow")
HR12<-round(exp(coef(CoxMarkov12)),2)
CI12<-round(exp(confint(CoxMarkov12)),2)
P12<-round(coef(summary(CoxMarkov12))[,5],4)
#In Surv(Tstart, Tstop, status) : Stop time must be > start time, NA created
CoxMarkov2<-coxph(Surv(Tstart,Tstop,status)~factor(tipo_de_programa_2)+Tstart,
data=subset(ms2_CONS_C1_SEP_2020_women_imputed, trans %in% c(4,5)),method = "breslow")
## Warning in coxph(Surv(Tstart, Tstop, status) ~ factor(tipo_de_programa_2) + : a
## variable appears on both the left and right sides of the formula
HR2<-round(exp(coef(CoxMarkov2)),2)
CI2<-round(exp(confint(CoxMarkov2)),2)
P2<-round(coef(summary(CoxMarkov2))[,5],4)
tab_cox_markov<- as.data.frame(cbind(HR2,CI2,P2))
CoxMarkov22<-coxph(Surv(time,status)~factor(tipo_de_programa_2)+Tstart,
data=subset(ms2_CONS_C1_SEP_2020_women_imputed, trans %in% c(4,5)),method = "breslow")
HR22<-round(exp(coef(CoxMarkov22)),2)
CI22<-round(exp(confint(CoxMarkov22)),2)
P22<-round(coef(summary(CoxMarkov22))[,5],4)
tab_cox_markov2<- as.data.frame(cbind(HR22,CI22,P22))
#Stop time must be > start time, NA created #INVESTIGATE - RESOLVED IN 2021-03-24
rbindlist(list(data.table(cbind(HR,CI,P),keep.rownames=T),
data.table(cbind(HR12,CI12,P12),keep.rownames=T),
data.table(cbind(HR2,CI2,P2),keep.rownames=T),
data.table(cbind(HR22,CI22,P22),keep.rownames=T))) %>%
dplyr::rename("Terms"="rn") %>%
dplyr::mutate(Terms=dplyr::case_when(grepl("tipo_de_", Terms)~"Type of Program",
grepl("Tstart",Terms)~"Time in previous state(in years)")) %>%
dplyr::mutate(P=ifelse(as.numeric(P)<.001,"<.001",as.character(round(P,3)))) %>%
dplyr::rename("Sig."="P") %>%
dplyr::mutate(`95% CIs`=paste0(`2.5 %`,", ",`97.5 %`)) %>%
dplyr::select(-`2.5 %`,-`97.5 %`) %>%
dplyr::select(Terms, HR, `95% CIs`, Sig.) %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 10. PH Model incluiding time in previous state & Type of Program as a covariate",
align= c("c",rep('c', 5)))%>%
kableExtra::pack_rows("Three-states", 1, 2) %>%
kableExtra::pack_rows("Three-states- Renewal time", 3, 4) %>%
kableExtra::pack_rows("Four-states", 5, 6) %>%
kableExtra::pack_rows("Four-states- Renewal time", 7, 8) %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11)%>%
kableExtra::scroll_box(width = "100%", height = "350px")
## Column 2 ['HR12'] of item 2 is missing in item 1. Use fill=TRUE to fill with NA (NULL for list columns), or use.names=FALSE to ignore column names. use.names='check' (default from v1.12.2) emits this message and proceeds as if use.names=FALSE for backwards compatibility. See news item 5 in v1.12.2 for options to control this message.
Table 10. PH Model incluiding time in previous state & Type of Program as a covariate
Terms
|
HR
|
95% CIs
|
Sig.
|
Three-states
|
Type of Program
|
1.85
|
1.64, 2.09
|
<.001
|
Time in previous state(in years)
|
1.55
|
1.37, 1.76
|
<.001
|
Three-states- Renewal time
|
Type of Program
|
1.83
|
1.63, 2.07
|
<.001
|
Time in previous state(in years)
|
0.89
|
0.8, 1
|
0.042
|
Four-states
|
Type of Program
|
1.48
|
1.39, 1.56
|
<.001
|
Time in previous state(in years)
|
1.56
|
1.47, 1.66
|
<.001
|
Four-states- Renewal time
|
Type of Program
|
1.50
|
1.42, 1.59
|
<.001
|
Time in previous state(in years)
|
0.90
|
0.85, 0.95
|
<.001
|
#a variable appears on both the left and right sides of the formula
#this warning should be normal, since we are dealing with time to arrival at a determined state.
columna_dummy <- function(df, columna) {
df %>%
mutate_at(columna, ~paste(columna, eval(as.symbol(columna)), sep = "_")) %>%
mutate(valor = 1) %>%
spread(key = columna, value = valor, fill = 0)
}
catVars<-c("edad_al_ing_grupos","escolaridad_rec","sus_principal_mod","freq_cons_sus_prin","compromiso_biopsicosocial",
"tenencia_de_la_vivienda_mod","num_otras_sus_mod","numero_de_hijos_mod_rec","tipo_de_programa_2","tipo_de_plan_res")
ms_CONS_C1_SEP_2020_women_imputed_mod<-ms_CONS_C1_SEP_2020_women_imputed
for (i in c(1:length(catVars))){#catVars[-10] excluding treatment indicator
cat<-as.character(catVars[i])#catVars[-10] excluding treatment indicator
ms_CONS_C1_SEP_2020_women_imputed_mod<-columna_dummy(ms_CONS_C1_SEP_2020_women_imputed_mod,cat)
}
colnames(ms_CONS_C1_SEP_2020_women_imputed_mod)[8:length(ms_CONS_C1_SEP_2020_women_imputed_mod)]<-
colnames(janitor::clean_names(ms_CONS_C1_SEP_2020_women_imputed_mod))[8:length(ms_CONS_C1_SEP_2020_women_imputed_mod)]
attr(ms_CONS_C1_SEP_2020_women_imputed_mod,"trans")<-mat_3_states
#data: dataset in etm format: "entry", "exit", "from", "to", "id". Should also contain the relevant covariates: no factors allowed
#Multi-state data in msdata format. Should also contain (dummy codings of) the relevant covariates; no factors allowed
# ms_CONS_C1_SEP_2020_women_imputed$id<-ms_CONS_C1_SEP_2020_women_imputed$row
ms_CONS_C1_SEP_2020_women_imputed_mod$id<-ms_CONS_C1_SEP_2020_women_imputed_mod$row
ms_CONS_C1_SEP_2020_women_imputed_mod$row<-NULL
formula_char<-
"edad_al_ing_grupos_18_29+ edad_al_ing_grupos_30_39+
edad_al_ing_grupos_40_49+ edad_al_ing_grupos_50+ escolaridad_rec_1_more_than_high_school+
escolaridad_rec_2_completed_high_school_or_less+ escolaridad_rec_3_completed_primary_school_or_less+
sus_principal_mod_alcohol+ sus_principal_mod_cocaine_hydrochloride+ sus_principal_mod_cocaine_paste+
sus_principal_mod_marijuana+ sus_principal_mod_other+ freq_cons_sus_prin_1_day_a_week_or_more+
freq_cons_sus_prin_2_to_3_days_a_week+ freq_cons_sus_prin_4_to_6_days_a_week+
freq_cons_sus_prin_daily+ freq_cons_sus_prin_less_than_1_day_a_week+
compromiso_biopsicosocial_1_mild+ compromiso_biopsicosocial_2_moderate+
compromiso_biopsicosocial_3_severe+ tenencia_de_la_vivienda_mod_illegal_settlement+
tenencia_de_la_vivienda_mod_others+ tenencia_de_la_vivienda_mod_owner_transferred_dwellings_pays_dividends+
tenencia_de_la_vivienda_mod_renting+ tenencia_de_la_vivienda_mod_stays_temporarily_with_a_relative+
num_otras_sus_mod_more_than_one_additional_substance+ num_otras_sus_mod_no_additional_substance+
num_otras_sus_mod_one_additional_substance+ numero_de_hijos_mod_rec_no+
numero_de_hijos_mod_rec_yes+ tipo_de_programa_2_1+
tipo_de_programa_2_0+ tipo_de_plan_res_outpatient+
tipo_de_plan_res_residential
"
MT <- MarkovTest(ms_CONS_C1_SEP_2020_women_imputed_mod,
formula= formula_char,
transition = 2,
grid = 1,#seq(0, 11, by = 1/12),
B = 25)
#Tried with transition 2 and 3
#Error in rep(mmm, length.out = l1) :
# attempt to replicate an object of type 'symbol'
#Además: There were 50 or more warnings (use warnings() to see the first 50)
data<-ms_CONS_C1_SEP_2020_women_imputed_mod #no puede ir arrival
id<-"id"
transition<-3
grid<-90 #3 months
grid<-1096 #3 years
dist<-"poisson"
trans=ifelse(is.null(attr(data, "trans")),get("mat_3_states"),attr(data, "trans"))
fn = list(function(x) mean(abs(x), na.rm = TRUE))
fn2 = list(function(x) mean(x, na.rm = TRUE))
formula<-formula_char
B=25
MarkovTest <- function(data, id, formula = NULL, transition, grid,
trans=NULL,
B = 1000,
fn = list(function(x) mean(abs(x), na.rm = TRUE)),
fn2 = list(function(x) mean(x, na.rm = TRUE)),
min_time = 0,
other_weights = NULL,
dist = c("poisson", "normal")) {
dist <- match.arg(dist)
if (missing(id)) id <- "id"
# Remove "empty" lines in the data
wh <- which(data$Tstop <= data$Tstart)
if (length(wh)>0)
{
warning(length(wh), " lines with Tstart <= Tstop, have been removed before applying tests!")
data <- data[-wh, ]
}
# Convert data to etm data
# Make sure to retain all covariates (possibly way to many) in msdata (needed in formula perhaps)
mtch <- match(c("id", "from", "to", "trans", "Tstart", "Tstop", "status"), names(data))
covcols <- 1:ncol(data)
covcols <- covcols[!covcols %in% mtch]
ncovs <- length(covcols)
trans <- get("mat_3_states")
etmdata <- msdata2etm(data, id)
if (ncovs > 0) etmdata <- msdata2etm(data, id, names(data)[covcols])
trans2 <- to.trans2(trans)
tfrom <- trans2$from[trans2$transno == transition]
tto <- trans2$to[trans2$transno == transition]
# Determine qualifying set
qualset <- c(tfrom, which(trans2Q(trans)[, tfrom] > 0))
qualset <- sort(unique(qualset)) # for circular models, tfrom is included twice
# Functions
if (!is.list(fn))
fn <- list(fn) # coerce to be list if a single function is provided
if (is.list(fn) & is.function(fn[[1]])) {
# coerce to be a list of lists, by repeating the same list each time
tempfn <- list()
for (i in 1:length(qualset)) tempfn[[i]] <- fn
fn <- tempfn
}
if (!is.list(fn2))
fn2 <- list(fn2) # coerce to be list if a single function is provided
# Establish the relevant patients who ever enter tfrom
relpat <- sort(unique(etmdata$id[etmdata$from == tfrom]))
rdata <- etmdata[etmdata$from == tfrom, ] # only need time periods in the relevant state...
rdata$status <- 1 * (rdata$to == tto)
if (!is.null(formula)) {
form <- as.formula(paste("Surv(entry, exit, status) ~ ", formula,
sep = ""))
progfit <- coxph(form, data = rdata)
if (length(progfit$coefficients) > 0) {
#Sacado por andrés
Zmat <- as.matrix(rdata[, match(names(progfit$coefficients),
names(rdata))])
#Zmat <- as.matrix(rdata[, 7:44])
Ncov <- dim(Zmat)[2]
} else {
Ncov <- 0
}
if (!is.null(progfit$offset)) {
offset <- progfit$offset
} else {
offset <- rep(0, dim(rdata)[1])
}
} else {
Ncov <- 0
offset <- rep(0, dim(rdata)[1])
progfit <- NULL
}
# Minimal data, change names
progdat <- rdata[, match(c("id", "entry", "exit", "status"), names(rdata))]
names(progdat) <- c("id", "T0", "T1", "D")
nobs_grid <- sapply(grid, function(x) sum(progdat$D[progdat$T1 > x]))
# Have the extra dimension of indexes
index_gM <- array(0, c(length(relpat), length(grid), length(qualset)))
for (indx in 1:length(qualset)) {
qualstate <- qualset[indx]
index_g <- sapply(grid, function(y) sapply(relpat, function(x)
which(etmdata$entry < y & etmdata$exit >= y & etmdata$id == x)))
index_g <- array(1 * (etmdata$from[sapply(index_g, function(y)
ifelse(length(y) > 0, y,
dim(etmdata)[1] + 1))] == qualstate), c(length(relpat), length(grid)))
index_g[is.na(index_g)] <- 0
index_gM[, , indx] <- index_g
}
# Need a separate Z3mat for each group as well...
Z3mat <- index_gM[match(progdat$id, relpat), , , drop = FALSE]
N1 <- dim(progdat)[1]
if (Ncov > 0) {
LP <- c(Zmat %*% progfit$coefficients) + offset
} else {
LP <- rep(0, N1) + offset
}
S0 <- sapply(1:N1, function(x) sum(exp(LP) * (progdat$T0 < progdat$T1[x] &
progdat$T1 >= progdat$T1[x])))
incr <- progdat$D / S0
cumhaz <- approxfun(c(0, sort(unique(progdat$T1)), Inf),
c(0, cumsum(tapply(incr, progdat$T1, sum)), sum(incr)),
method = "constant")
resid_mat <- sapply(grid, function(x) progdat$D * (progdat$T1 > x) - exp(LP) *
(cumhaz(pmax(x, progdat$T1)) - cumhaz(pmax(x, progdat$T0))))
# Have a separate trace for each qualifying state...
obs_trace <- array(0, c(length(grid), length(qualset)))
for (indx in 1:(length(qualset))) {
obs_trace[, indx] <- sapply(1:length(grid), function(k)
sum(resid_mat[, k] * Z3mat[, k, indx] * (progdat$T1 > grid[k])))
}
nqstate <- length(qualset)
if (Ncov > 0)
Ifish <- progfit$var
N1 <- dim(progdat)[1]
if (Ncov > 0)
Zbar0 <- array(0, c(N1, Ncov))
Zbar <- array(0, c(N1, length(grid), nqstate))
for (i in 1:N1) {
x <- i
if (Ncov > 0) {
for (j in 1:Ncov) {
Zbar0[i, j] <- sum(Zmat[, j] * exp(LP) *
(progdat$T0 < progdat$T1[x] & progdat$T1 >= progdat$T1[x])) /
sum(exp(LP) * (progdat$T0 < progdat$T1[x] & progdat$T1 >= progdat$T1[x]))
}
}
for (j in 1:length(grid)) {
for (k in 1:nqstate)
Zbar[i, j, k] <- sum(Z3mat[, j, k] * exp(LP) *
(progdat$T0 < progdat$T1[x] & progdat$T1 >= progdat$T1[x])) /
sum(exp(LP) * (progdat$T0 < progdat$T1[x] & progdat$T1 >= progdat$T1[x]))
}
}
NAe <- incr
if (Ncov > 0) {
Hmat <- array(0, c(length(grid), Ncov, nqstate))
for (j in 1:Ncov) {
# for (k in 1:nqstate) Hmat[,j,k] <- sapply(1:length(grid),function(y)
# sum(sapply(1:N1,function(x) sum(exp(LP) *Zmat[,j]* (Z3mat[x,y,k] -
# Zbar[x,y,k]) * NAe[x] * (progdat$T0[x] > grid[y] & progdat$T1[x] <=
# progdat$T1)))))
for (k in 1:nqstate) Hmat[, j, k] <- sapply(1:length(grid), function(y)
sum(sapply(1:N1, function(x)
sum(exp(LP[x]) * ((Zmat[x, j] - Zbar0[, j]) *
(Z3mat[x, y, k] - Zbar[, y, k])) * NAe *
(progdat$T1[x] > grid[y]) * (progdat$T1 > progdat$T0[x] & progdat$T1 <= progdat$T1[x])))))
}
}
if (Ncov > 0) {
multiplier <- array(0, dim(Hmat))
for (k in 1:nqstate) multiplier[, , k] <- Hmat[, , k] %*% Ifish
est_cov <- array(0, c(length(grid), nqstate, nqstate))
for (indx1 in 1:nqstate) {
for (indx2 in (indx1):nqstate) {
est_var <- sapply(1:length(grid), function(k)
sum(sapply(1:N1, function(v)
sum(((Z3mat[v, k, indx1] - Zbar[, k, indx1]) *
(progdat$T1 > grid[k]) - c(multiplier[k, , indx1, drop = FALSE] %*%
t(Zmat[v, ] - Zbar0))) *
((Z3mat[v, k, indx2] - Zbar[, k, indx1]) * (progdat$T1 > grid[k]) -
c(multiplier[k, , indx2, drop = FALSE] %*% t(Zmat[v, ] - Zbar0))) *
exp(LP[v]) * (progdat$T0[v] < progdat$T1 & progdat$T1[v] >= progdat$T1) * NAe))))
est_cov[, indx1, indx2] <- est_cov[, indx2, indx1] <- est_var
}
}
} else {
est_cov <- array(0, c(length(grid), nqstate, nqstate))
for (indx1 in 1:nqstate) {
for (indx2 in (indx1):nqstate) {
est_var <- sapply(1:length(grid), function(k)
sum(sapply(1:N1, function(v)
sum((Z3mat[v, k, indx1] - Zbar[, k, indx1]) * (Z3mat[v, k, indx2] - Zbar[, k, indx2]) *
exp(LP[v]) * (progdat$T1 > grid[k] & progdat$T0[v] < progdat$T1 & progdat$T1[v] >= progdat$T1) * NAe))))
est_cov[, indx1, indx2] <- est_cov[, indx2, indx1] <- est_var
}
}
}
# First obtain the individually normalized traces...
est_var <- obs_norm_trace <- array(0, c(length(grid), nqstate))
for (k in 1:nqstate) {
est_var[, k] <- est_cov[cbind(1:length(grid), k, k)]
# This should be the same as before...
obs_norm_trace[, k] <- obs_trace[, k] / sqrt(est_var[, k] + 1 * (est_var[, k] == 0))
}
# Find singular matrices
obs_chisq_trace <- rep(0, length(grid))
for (k in 1:length(grid)) {
sol <- tryCatch(solve(est_cov[k, -1, -1]), error = function(e)
return(diag(0, nqstate - 1)))
obs_chisq_trace[k] <- (obs_trace[k, -1]) %*% sol %*%
(obs_trace[k, -1]) # do something about singular matrices...
}
##############
n_wb_trace <- wb_trace0 <- wb_trace <- array(0, c(B, length(grid), nqstate))
nch_wb_trace <- array(0, c(B, length(grid)))
for (wb in 1:B) {
if (dist == "poisson") {
G <- rpois(dim(progdat)[1], 1) - 1
} else if (dist == "normal") {
G <- rnorm(dim(progdat)[1], 0, 1)
} else stop("argument dist should be poisson or normal")
trace0 <- array(0, c(length(grid), nqstate))
for (k in 1:nqstate) {
trace0[, k] <- apply(sapply(1:length(grid), function(x)
progdat$D * (Z3mat[, x, k] - Zbar[, x, k]) * (progdat$T1 > grid[x]) * G), 2, sum)
if (Ncov > 0) {
Imul <- sapply(1:Ncov, function(x)
sum(progdat$D * (Zmat[, x] - Zbar0[, x]) * G))
trace1 <- (Hmat[, , k] %*% Ifish %*% Imul)[, 1]
} else {
trace1 <- 0
}
wb_trace[wb, , k] <- trace0[, k] - trace1
n_wb_trace[wb, , k] <- wb_trace[wb, , k]/sqrt(est_var[, k] +
1 * (est_var[, k] == 0))
for (w in 1:length(grid)) {
sol <- tryCatch(solve(est_cov[w, -1, -1]), error = function(e)
return(diag(0, nqstate - 1)))
nch_wb_trace[wb, w] <- (wb_trace[wb, w, -1]) %*% sol %*%
(wb_trace[wb, w, -1]) # do something about singular matrices...
}
}
}
# Need to have one of these per nqstate
NS <- length(fn[[1]])
orig_stat <- array(sapply(1:nqstate, function(y)
sapply(fn[[y]], function(g) g(obs_norm_trace[, y]))), c(NS, nqstate))
orig_ch_stat <- sapply(fn2, function(g) g(obs_chisq_trace))
p_stat_wb <- array(0, c(NS, nqstate))
wb_stat <- array(0, c(B, NS, nqstate))
for (k in 1:nqstate) {
wb_stat[, , k] <- array(t(apply(n_wb_trace[, , k, drop = FALSE],
1, function(x)
sapply(fn[[k]], function(g) g(x)))), c(B, NS))
p_stat_wb[, k] <- sapply(1:NS, function(x) mean(wb_stat[, x, k] > orig_stat[x, k]))
}
est_quant <- array(0, c(2, length(grid), nqstate))
for (k in 1:nqstate)
est_quant[, , k] <- apply(n_wb_trace[, , k, drop = FALSE], 2,
quantile, c(0.025, 0.975), na.rm = TRUE)
NS2 <- length(fn2)
p_ch_stat_wb <- rep(0, NS2)
wb_ch_stat <- array(t(apply(nch_wb_trace, 1, function(x)
sapply(fn2, function(g) g(x)))), c(B, NS2))
p_ch_stat_wb <- sapply(1:NS2, function(x) mean(wb_ch_stat[, x] > orig_ch_stat[x]))
# Is a question whether should use Nsub as number of subjects or number
# of spells within the state
MTres <- list(orig_stat = orig_stat, orig_ch_stat = orig_ch_stat, p_stat_wb = p_stat_wb,
p_ch_stat_wb = p_ch_stat_wb, b_stat_wb = wb_stat, zbar = obs_norm_trace,
nobs_grid = nobs_grid, Nsub = length(relpat), est_quant = est_quant,
obs_chisq_trace = obs_chisq_trace, nch_wb_trace = nch_wb_trace,
n_wb_trace = n_wb_trace, est_cov = est_cov, transition = transition,
from = tfrom, to = tto, B = B, dist = dist,
qualset = qualset, coxfit = progfit, fn = fn, fn2 = fn2)
class(MTres) <- c("MarkovTest")
return(MTres)
}
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#' @export
optimal_weights_multiple <- function(data, id, grid, transition, min_time = 0)
{
# Convert data to etm data
trans <- attr(data, "trans")
etmdata <- msdata2etm(data, id)
trans2 <- to.trans2(trans)
from <- trans2$from[trans2$transno == transition]
to <- trans2$to[trans2$transno == transition]
numbers <- sapply(grid, function(x)
table(factor(etmdata$from)[(etmdata$entry <= x & etmdata$exit > x)]))
subevent <- sapply(grid, function(x)
sum(etmdata$from == from & etmdata$to == to & etmdata$exit > x))
tnumbers <- apply(numbers, 2, sum)
weights <- sapply(1:dim(numbers)[1], function(x)
subevent * numbers[x, ] * (tnumbers - numbers[x, ])/tnumbers^2)
weights[is.nan(weights)] <- 0
weight <- apply(weights, 1, max)
weight * diff(c(min_time, grid))
}
#' @export
optimal_weights_matrix <- function(data, id, grid, transition, min_time = 0,
other_weights = NULL)
{
# Convert data to etm data
trans <- attr(data, "trans")
etmdata <- msdata2etm(data, id)
trans2 <- to.trans2(trans)
from <- trans2$from[trans2$transno == transition]
to <- trans2$to[trans2$transno == transition]
numbers <- sapply(grid, function(x)
table(factor(etmdata$from)[(etmdata$entry <= x & etmdata$exit > x)]))
subevent <- sapply(grid, function(x)
sum(etmdata$from == from & etmdata$to == to & etmdata$exit > x))
tnumbers <- apply(numbers, 2, sum)
weights <- sapply(1:dim(numbers)[1], function(x)
sqrt(subevent * numbers[x, ] * (tnumbers - numbers[x, ]))/tnumbers)
weights[is.nan(weights)] <- 0
fn_list <- list()
for (i in 1:dim(numbers)[1]) {
# Take into account the distance between grids
val <- weights[, i] * diff(c(min_time, grid))
fn_list[[i]] <- list(fn = function(x)
weighted.mean(abs(x), w = val, na.rm = TRUE))
if (!is.null(other_weights)) {
nother <- length(other_weights)
fn_list[[i]][2:(nother + 1)] <- other_weights
}
}
# Store the weights as an attribute
attr(fn_list, "weights") <- weights
fn_list
}
The model considered the transition from intermediate states to our absorbing state (being readmitted) is explained by the time spent in the previous health state. This covariate (time in the previous state) was shown to be statistically significant (p<.001); results indicated a longer duration spent in the first treatment is associated with increased risk of readmission. Therefore, a semi-Markov (called a Markov renewal model) or clock reset approach should be undertaken for both models. They bear the advantage that information from the process history can be included as transition-specific explanatory covariates.
#Since prtime only makes sense for transition 3 (PR → RelDeath), we need the transition-specific covariate
#of prtime for transition 3, which is prtime.3. The corresponding model is termed the ”state
#arrival extended Markov PH” model in the tutorial, and appears on the right of Table III.
ms_CONS_C1_SEP_2020_women_imputed$arrival<-ms_CONS_C1_SEP_2020_women_imputed$Tstart
ms2_CONS_C1_SEP_2020_women_imputed$arrival<-ms2_CONS_C1_SEP_2020_women_imputed$Tstart
ms_CONS_C1_SEP_2020_women_imputed_exp <- expand.covs(ms_CONS_C1_SEP_2020_women_imputed, "arrival", append = TRUE, longnames = FALSE)
ms2_CONS_C1_SEP_2020_women_imputed_exp <- expand.covs(ms2_CONS_C1_SEP_2020_women_imputed, "arrival", append = TRUE, longnames = FALSE)
Assessment of Fit of Transitions
We need to derive appropriate functional forms and define respective survival functions. One reason to favor patametric models is that they can be easier to generalize. Seven candidate distributions were considered to model transitions from Adm→Ther.Disch., Adm.→Readm. and Ther.Disch.→Readm., including Weibull (assume a monotonically increasing or decreasing hazard), Log-logistic (non-monotonic hazards), Gompertz (assume a monotonically increasing or decreasing hazard), Log-normal (non-monotonic hazards), Exponential (constant hazard), Gamma & Generalized gamma (more flexible).
The following plots fitted survival curves from each model (coloured lines), with the Kaplan-Meier estimate, in red, obtained from an example of Jackson available here, added to the contributions of Wathers & Cutler available here.
options(warn=-1)
n_iter<-10000
tiempo_antes_fits<-Sys.time()
#Weathers, Brandon and Cutler, Richard Dr., "Comparision of Survival Curves Between Cox Proportional
#Hazards, Random Forests, and Conditional Inference Forests in Survival Analysis" (2017). All Graduate
#Plan B and other Reports. 927.
#https://digitalcommons.usu.edu/gradreports/927
#<div style="border: 1px solid #ddd; padding: 5px; overflow-y: scroll; height:350px; overflow-x: scroll; width:100%">
#https://devinincerti.com/2019/01/01/sim-mstate.html
n_trans <- max(mat_3_states, na.rm = TRUE)
fits_wei <- vector(mode = "list", length = n_trans)
fits_weiph <- vector(mode = "list", length = n_trans)
fits_llogis <- vector(mode = "list", length = n_trans)
fits_gomp <- vector(mode = "list", length = n_trans)
fits_logn <- vector(mode = "list", length = n_trans)
fits_exp <- vector(mode = "list", length = n_trans)
fits_gam <- vector(mode = "list", length = n_trans)
fits_ggam <- vector(mode = "list", length = n_trans)
fits_genf <- vector(mode = "list", length = n_trans)
fits_c_wei <- vector(mode = "list", length = n_trans)
fits_c_weiph <- vector(mode = "list", length = n_trans)
fits_c_llogis <- vector(mode = "list", length = n_trans)
fits_c_gomp <- vector(mode = "list", length = n_trans)
fits_c_logn <- vector(mode = "list", length = n_trans)
fits_c_exp <- vector(mode = "list", length = n_trans)
fits_c_gam <- vector(mode = "list", length = n_trans)
fits_c_ggam <- vector(mode = "list", length = n_trans)
fits_c_genf <- vector(mode = "list", length = n_trans)
km.lc<-list()
fits_cox<-list()
fits_c_cox<-list()
#"gengamma" Generalized gamma (stable parameterisation)
#"gengamma.orig" Generalized gamma (original parameterisation)
#"genf" Generalized F (stable parameterisation)
#"genf.orig" Generalized F (original parameterisation)
#"weibull" Weibull
#"gamma" Gamma
#"exp" Exponential
#"lnorm" Log-normal
#"gompertz" Gompertz
library(flexsurv)
#Specify the formula
fitform <- Surv(time, status) ~ 1
for (i in 1:n_trans){
fits_wei[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "weibull")
}
for (i in 1:n_trans){
fits_weiph[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "weibullph")
}
for (i in 1:n_trans){
fits_llogis[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "llogis")
}
for (i in 1:n_trans){
fits_gam[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gamma")
}
#LOS ERRORES OCURREN CUANDO NO HAY COVARIABLES
#In (function (q, shape, rate = 1, scale = 1/rate, lower.tail = TRUE, ... : NaNs produced
#gamma no funcionó: NaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs produced
for (i in 1:n_trans){
fits_ggam[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gengamma")
}
for (i in 1:n_trans){
fits_gomp[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gompertz")
}
for (i in 1:n_trans){
fits_logn[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "lnorm")
}
for (i in 1:n_trans){
fits_exp[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "exp")
}
for (i in 1:n_trans){
fits_genf[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "genf")
}
#_#_#_#_#_#_#_#_#_#_#_#_#_
#covariates
#Specify the formula
fitform2 <- Surv(time, status) ~ edad_al_ing_grupos+ escolaridad_rec+ sus_principal_mod+
freq_cons_sus_prin+ compromiso_biopsicosocial+ tenencia_de_la_vivienda_mod+
num_otras_sus_mod+ numero_de_hijos_mod_rec+ factor(tipo_de_programa_2)+ tipo_de_plan_res
fitform2_t3 <- Surv(time, status) ~ edad_al_ing_grupos+ escolaridad_rec+ sus_principal_mod+
freq_cons_sus_prin+ compromiso_biopsicosocial+ tenencia_de_la_vivienda_mod+
num_otras_sus_mod+ numero_de_hijos_mod_rec+ factor(tipo_de_programa_2)+ tipo_de_plan_res+ arrival
for (i in 1:(n_trans-1)){
fits_c_wei[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "weibull")
}
for (i in 1:(n_trans-1)){
fits_c_weiph[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "weibullph")
}
for (i in 1:(n_trans-1)){
fits_c_llogis[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "llogis")
}
for (i in 1:(n_trans-1)){
fits_c_gam[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gamma")
}
#LOS ERRORES OCURREN CUANDO NO HAY COVARIABLES
#In (function (q, shape, rate = 1, scale = 1/rate, lower.tail = TRUE, ... : NaNs produced
#gamma no funcionó: NaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs produced
for (i in 1:(n_trans-1)){
fits_c_ggam[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gengamma")
}
for (i in 1:(n_trans-1)){
fits_c_gomp[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gompertz")
}
for (i in 1:(n_trans-1)){
fits_c_logn[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "lnorm")
}
for (i in 1:(n_trans-1)){
fits_c_exp[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "exp")
}
for (i in 1:(n_trans-1)){
fits_c_genf[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "genf")
}
fits_c_wei[[n_trans]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == n_trans),
dist = "weibull")
fits_c_weiph[[n_trans]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == n_trans),
dist = "weibullph")
fits_c_llogis[[n_trans]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == n_trans),
dist = "llogis")
fits_c_gam[[n_trans]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == n_trans),
dist = "gamma")
fits_c_ggam[[n_trans]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == n_trans),
dist = "gengamma")
fits_c_gomp[[n_trans]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == n_trans),
dist = "gompertz")
fits_c_logn[[n_trans]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == n_trans),
dist = "lnorm")
fits_c_exp[[n_trans]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == n_trans),
dist = "exp")
fits_c_genf[[n_trans]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == n_trans),
dist = "genf")
for (i in 1:n_trans){
km.lc[[i]] <- survfit(formula= fitform, data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i))
}
endTime <- Sys.time()
paste0("Time in process: ");endTime - tiempo_antes_fits
## [1] "Time in process: "
## Time difference of 20.90386 mins
transition_label<- c(`1`="Transition 1: Admission to Ther.Discharge",`2`="Transition 2: Admission to Readmission",`3`="Transition 3: Ther.Discharge to Readmission")
startTime <- Sys.time()
layout(matrix(1:3, nc = 1, byrow = F))
for (i in 1:n_trans){
plot(km.lc[[i]], col="red", conf.int=F, xlim=c(0,12));
lines(fits_wei[[i]], col="coral4", ci=F);
lines(fits_weiph[[i]], col="navyblue", ci=F);
lines(fits_gomp[[i]], col="lightpink", ci=F);
lines(fits_llogis[[i]], col="gray25", ci=F);#A0A36D
lines(fits_gam[[i]], col="darkorchid4", ci=F);
lines(fits_ggam[[i]], col="#496A72", ci=F);
lines(fits_logn[[i]], col="gray70", ci=F);
lines(fits_exp[[i]],col="#A0A36D", ci=F);
lines(fits_exp[[i]],col="cadetblue", ci=F)
legend("bottomleft", legend = c("Kaplan-Meier","Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential", "Generalized F"), col =
c("red","coral4","navyblue","lightpink","gray25",#"#A0A36D","#886894",
"darkorchid4","#496A72","gray70","#A0A36D", "cadetblue"),
title = "Distributions", cex = .95, bty = "n", lty=1,lwd=3)# lty = 1:2,
title(main=transition_label[[i]])
}
endTime <- Sys.time()
paste0("Time in process: ");endTime - startTime
## [1] "Time in process: "
## Time difference of 0.6183422 secs
#23 min aprox.
#For more complicated models, users should specify what covariate values they want summaries for, rather than relying on the default
#</div>
options(warn=0)
options(warn=-1)
n_iter<-10000
tiempo_antes_fits<-Sys.time()
#Weathers, Brandon and Cutler, Richard Dr., "Comparision of Survival Curves Between Cox Proportional
#Hazards, Random Forests, and Conditional Inference Forests in Survival Analysis" (2017). All Graduate
#Plan B and other Reports. 927.
#https://digitalcommons.usu.edu/gradreports/927
#<div style="border: 1px solid #ddd; padding: 5px; overflow-y: scroll; height:350px; overflow-x: scroll; width:100%">
#https://devinincerti.com/2019/01/01/sim-mstate.html
n_trans2 <- max(mat_4_states, na.rm = TRUE)
fits_wei2 <- vector(mode = "list", length = n_trans2)
fits_weiph2 <- vector(mode = "list", length = n_trans2)
fits_llogis2 <- vector(mode = "list", length = n_trans2)
fits_gomp2 <- vector(mode = "list", length = n_trans2)
fits_logn2 <- vector(mode = "list", length = n_trans2)
fits_exp2 <- vector(mode = "list", length = n_trans2)
fits_gam2 <- vector(mode = "list", length = n_trans2)
fits_ggam2 <- vector(mode = "list", length = n_trans2)
fits_genf2 <- vector(mode = "list", length = n_trans2)
fits_c_wei2 <- vector(mode = "list", length = n_trans2)
fits_c_weiph2 <- vector(mode = "list", length = n_trans2)
fits_c_llogis2 <- vector(mode = "list", length = n_trans2)
fits_c_gomp2 <- vector(mode = "list", length = n_trans2)
fits_c_logn2 <- vector(mode = "list", length = n_trans2)
fits_c_exp2 <- vector(mode = "list", length = n_trans2)
fits_c_gam2 <- vector(mode = "list", length = n_trans2)
fits_c_ggam2 <- vector(mode = "list", length = n_trans2)
fits_c_genf2 <- vector(mode = "list", length = n_trans2)
km.lc2<-list()
fits_cox2<-list()
fits_c_cox2<-list()
#"gengamma" Generalized gamma (stable parameterisation)
#"gengamma.orig" Generalized gamma (original parameterisation)
#"genf" Generalized F (stable parameterisation)
#"genf.orig" Generalized F (original parameterisation)
#"weibull" Weibull
#"gamma" Gamma
#"exp" Exponential
#"lnorm" Log-normal
#"gompertz" Gompertz
library(flexsurv)
#Specify the formula
fitform_4s <- Surv(time, status) ~ 1
for (i in 1:n_trans2){
fits_wei2[[i]] <- flexsurvreg(formula=fitform_4s,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "weibull")
}
for (i in 1:n_trans2){
fits_weiph2[[i]] <- flexsurvreg(formula=fitform_4s,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "weibullph")
}
for (i in 1:n_trans2){
fits_llogis2[[i]] <- flexsurvreg(formula=fitform_4s,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "llogis")
}
for (i in 1:n_trans2){
fits_gam2[[i]] <- flexsurvreg(formula=fitform_4s,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gamma")
}
#LOS ERRORES OCURREN CUANDO NO HAY COVARIABLES
#In (function (q, shape, rate = 1, scale = 1/rate, lower.tail = TRUE, ... : NaNs produced
#gamma no funcionó: NaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs produced
for (i in 1:n_trans2){
fits_ggam2[[i]] <- flexsurvreg(formula=fitform_4s,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gengamma")
}
for (i in 1:n_trans2){
fits_gomp2[[i]] <- flexsurvreg(formula=fitform_4s,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gompertz")
}
for (i in 1:n_trans2){
fits_logn2[[i]] <- flexsurvreg(formula=fitform_4s,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "lnorm")
}
for (i in 1:n_trans2){
fits_exp2[[i]] <- flexsurvreg(formula=fitform_4s,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "exp")
}
for (i in 1:n_trans2){
fits_genf2[[i]] <- flexsurvreg(formula=fitform_4s,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "genf")
}
#_#_#_#_#_#_#_#_#_#_#_#_#_
#covariates
#Specify the formula
fitform2 <- Surv(time, status) ~ edad_al_ing_grupos+ escolaridad_rec+ sus_principal_mod+
freq_cons_sus_prin+ compromiso_biopsicosocial+ tenencia_de_la_vivienda_mod+
num_otras_sus_mod+ numero_de_hijos_mod_rec+ factor(tipo_de_programa_2)+ tipo_de_plan_res
fitform2_t3 <- Surv(time, status) ~ edad_al_ing_grupos+ escolaridad_rec+ sus_principal_mod+
freq_cons_sus_prin+ compromiso_biopsicosocial+ tenencia_de_la_vivienda_mod+
num_otras_sus_mod+ numero_de_hijos_mod_rec+ factor(tipo_de_programa_2)+ tipo_de_plan_res+ arrival
for (i in 1:(n_trans2-2)){
fits_c_wei2[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "weibull")
}
for (i in 1:(n_trans2-2)){
fits_c_weiph2[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "weibullph")
}
for (i in 1:(n_trans2-2)){
fits_c_llogis2[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "llogis")
}
for (i in 1:(n_trans2-2)){
fits_c_gam2[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gamma")
}
#LOS ERRORES OCURREN CUANDO NO HAY COVARIABLES
#In (function (q, shape, rate = 1, scale = 1/rate, lower.tail = TRUE, ... : NaNs produced
#gamma no funcionó: NaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs produced
for (i in 1:(n_trans2-3)){
fits_c_ggam2[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gengamma")
}
#https://stackoverflow.com/questions/47100140/estimating-survival-with-a-generalized-gamma-function-in-flexsurv-fails
#optim uses a finite-difference approximation for the gradient as stated in help("optim")
fits_c_ggam2[[3]] <- flexsurvreg(formula=fitform2,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == 3),
dist = "gengamma", inits=c(.0001, mean(subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == 3)[,"time"],na.rm=T)))
#A function of the uncensored survival times t, which returns a vector of reasonable initial values for maximum likelihood estimation of each parameter. For example, function(t){ c(1, mean(t)) } will always initialize the first of two parameters at 1, and the second (a scale parameter, for instance) at the mean of t.
for (i in 1:(n_trans2-2)){
fits_c_gomp2[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gompertz")
}
for (i in 1:(n_trans2-2)){
fits_c_logn2[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "lnorm")
}
for (i in 1:(n_trans2-2)){
fits_c_exp2[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "exp")
}
for (i in 1:(n_trans2-2)){
fits_c_genf2[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "genf")
}
##4th state
fits_c_wei2[[(n_trans2-1)]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-1)),
dist = "weibull")
fits_c_weiph2[[(n_trans2-1)]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-1)),
dist = "weibullph")
fits_c_llogis2[[(n_trans2-1)]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-1)),
dist = "llogis")
fits_c_gam2[[(n_trans2-1)]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-1)),
dist = "gamma")
fits_c_ggam2[[(n_trans2-1)]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-1)),
dist = "gengamma")
fits_c_gomp2[[(n_trans2-1)]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-1)),
dist = "gompertz")
fits_c_logn2[[(n_trans2-1)]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-1)),
dist = "lnorm")
fits_c_exp2[[(n_trans2-1)]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-1)),
dist = "exp")
fits_c_genf2[[(n_trans2-1)]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-1)),
dist = "genf")
#5th state
fits_c_wei2[[n_trans2]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-0)),
dist = "weibull")
fits_c_weiph2[[n_trans2]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-0)),
dist = "weibullph")
fits_c_llogis2[[n_trans2]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-0)),
dist = "llogis")
fits_c_gam2[[n_trans2]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-0)),
dist = "gamma")
fits_c_ggam2[[n_trans2]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-0)),
dist = "gengamma")
fits_c_gomp2[[n_trans2]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-0)),
dist = "gompertz")
fits_c_logn2[[n_trans2]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-0)),
dist = "lnorm")
fits_c_exp2[[n_trans2]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-0)),
dist = "exp")
fits_c_genf2[[n_trans2]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-0)),
dist = "genf")
for (i in 1:n_trans2){
km.lc2[[i]] <- survfit(formula= fitform_4s, data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i))
}
endTime <- Sys.time()
paste0("Time in process: ");endTime - tiempo_antes_fits
## [1] "Time in process: "
## Time difference of 31.604 mins
transition_label_4s<- c(`1`="Transition 1: Admission to Ther.Discharge",`2`="Transition 2: Admission to Discharge w/o Clinical Advice",`3`="Transition 3: Admission to Readmission",`4`="Transition 4: Ther.Discharge to Readmission", `5`="Transition 5: Discharge w/o Clinical Advice to Readmission")
startTime <- Sys.time()
layout(matrix(1:6, nc = 2, byrow = F))
for (i in 1:n_trans2){
plot(km.lc2[[i]], col="red", conf.int=F, xlim=c(0,12));
lines(fits_wei2[[i]], col="coral4", ci=F);
lines(fits_weiph2[[i]], col="navyblue", ci=F);
lines(fits_gomp2[[i]], col="lightpink", ci=F);
lines(fits_llogis2[[i]], col="gray25", ci=F);##A0A36D
lines(fits_gam2[[i]], col="darkorchid4", ci=F);##886894
lines(fits_ggam2[[i]], col="#496A72", ci=F);
lines(fits_logn2[[i]], col="gray70", ci=F);
lines(fits_exp2[[i]],col="#A0A36D", ci=F);
lines(fits_genf2[[i]],col="cadetblue", ci=F)
legend("bottomleft", legend = c("Kaplan-Meier","Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential", "Generalized F"), col =
c("red","coral4","navyblue","lightpink","gray25",#"#B89673",##886894
"darkorchid4","#496A72","gray70","#A0A36D","cadetblue"),
title = "Distributions", cex = .95, bty = "n", lty=1.2,lwd=3)# lty = 1:2,
title(main=transition_label_4s[[i]])
}
endTime <- Sys.time()
paste0("Time in process: ");endTime - startTime
## [1] "Time in process: "
## Time difference of 1.047194 secs
#23 min aprox.
#For more complicated models, users should specify what covariate values they want summaries for, rather than relying on the default
#</div>
options(warn=0)
Additionally, we compared the hazards of these distributions with non-parametric techniques, using a smoothed hazard function from right-censored data using kernel-based methods (as Incerti shows in his blog). The confidence interval around the predicted hazard function is estimated using bootstrapping methods of 10,000 iterations. Confidence intervals are obtained by sampling randomly from the asymptotic normal distribution of the maximum likelihood estimates and then taking quantiles (see, e.g. Mandel, 2013).
#Micha Mandel (2013) Simulation-Based Confidence Intervals for Functions With Complicated Derivatives, The American Statistician, 67:2, 76-81, DOI: 10.1080/00031305.2013.783880
# https://rpubs.com/martina_morris/testhazard
tiempo_antes_fits2<-Sys.time()
newtime2 = seq(from=1/24, to= 15, by=1/12)
#```{r fit,eval=T, echo=T, paged.print=TRUE, fig.height=13, fig.width=10, fig.align="center", results = 'asis'}
kernel_haz_est<-list()
kernel_haz<-list()
for (i in 1:n_trans){
library("muhaz")
kernel_haz_est[[i]] <- muhaz(ms_CONS_C1_SEP_2020_women_imputed_exp[which(ms_CONS_C1_SEP_2020_women_imputed_exp$trans==i),"time"],
ms_CONS_C1_SEP_2020_women_imputed_exp[which(ms_CONS_C1_SEP_2020_women_imputed_exp$trans==i),"status"])
kernel_haz[[i]] <- data.table(time = kernel_haz_est[[i]]$est.grid,
est = kernel_haz_est[[i]]$haz.est,
dist = "Kernel density")
}
haz_int_only<-
cbind(trans=rep(1:n_trans,each=nrow(kernel_haz[[i]])),
rbindlist(kernel_haz))
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#create list of survreg for different transitions
#<- flexsurvreg_list(fit1, fit2, fit3)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
dists_wo_covs <- cbind.data.frame(covs=c(rep("fits_",9)),
formal=rep(c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential", "Generalized F"),1),
model=rep(c("wei", "weiph", "gomp", "llogis", "gam","ggam", "logn", "exp", "genf"),1))
dists_w_covs <- cbind.data.frame(covs=c(rep("fits_c_",9)),
formal=rep(c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential", "Generalized F"),1),
model=rep(c("wei", "weiph", "gomp", "llogis", "gam","ggam", "logn", "exp", "genf"),1))
#hr.exp <- round(exp(coef(get(paste0("fits_",dists[x,"model"]))[[y]])["groupGood"]),3)
#WO COVS
fitted_flexsurvreg2a<-data.frame()
fit_flexsurvreg2a<-data.frame()
for (y in 1:n_trans){
for (x in 1:nrow(dists_wo_covs)){
cat(paste0("#### Flexible Survival Model (w/o covs): ",
dists_wo_covs[x,"formal"], "; transition: ",y, "\n \n"))
#Return fitted survival, cumulative hazard or hazard at a series of times from a fitted model
#
mod_flexsurv<-paste0(dists_wo_covs[x,"covs"],dists_wo_covs[x,"model"])
#FITTED LINES
#Lines
est_by_time<-
data.table::data.table(summary(get(mod_flexsurv)[[y]], ci = F, t= newtime2, B=10000,type = "hazard", tidy=T))
#dataframe
fitted_flexsurvreg2a<-rbind.data.frame(fitted_flexsurvreg2a,
cbind.data.frame(dist= rep(dists_wo_covs[x,"formal"],),
trans=rep(y,),
est_by_time))
#
fit_flexsurvreg2a<-rbind(fit_flexsurvreg2a,
cbind(dist= dists_wo_covs[x,"formal"], trans=y,
covs="w/o covs",
AIC= get(mod_flexsurv)[[y]]$AIC,
Llik= get(mod_flexsurv)[[y]]$loglik,
npars= get(mod_flexsurv)[[y]]$npars,
pars= get(mod_flexsurv)[[y]]$AIC/2 + get(mod_flexsurv)[[y]]$loglik,
BIC= get(mod_flexsurv)[[y]]$loglik+ log(get(mod_flexsurv)[[y]]$N)* (get(mod_flexsurv)[[y]]$AIC/2 + get(mod_flexsurv)[[y]]$loglik)
#the BIC may not be appropriate if none of the candidate models are considered to be close to the ‘true’ model.
)
)
}
}
## #### Flexible Survival Model (w/o covs): Weibull (AFT); transition: 1
##
## #### Flexible Survival Model (w/o covs): Weibull (PH); transition: 1
##
## #### Flexible Survival Model (w/o covs): Gompertz; transition: 1
##
## #### Flexible Survival Model (w/o covs): Log-logistic; transition: 1
##
## #### Flexible Survival Model (w/o covs): Gamma; transition: 1
##
## #### Flexible Survival Model (w/o covs): Generalized gamma; transition: 1
##
## #### Flexible Survival Model (w/o covs): Lognormal; transition: 1
##
## #### Flexible Survival Model (w/o covs): Exponential; transition: 1
##
## #### Flexible Survival Model (w/o covs): Generalized F; transition: 1
##
## #### Flexible Survival Model (w/o covs): Weibull (AFT); transition: 2
##
## #### Flexible Survival Model (w/o covs): Weibull (PH); transition: 2
##
## #### Flexible Survival Model (w/o covs): Gompertz; transition: 2
##
## #### Flexible Survival Model (w/o covs): Log-logistic; transition: 2
##
## #### Flexible Survival Model (w/o covs): Gamma; transition: 2
##
## #### Flexible Survival Model (w/o covs): Generalized gamma; transition: 2
##
## #### Flexible Survival Model (w/o covs): Lognormal; transition: 2
##
## #### Flexible Survival Model (w/o covs): Exponential; transition: 2
##
## #### Flexible Survival Model (w/o covs): Generalized F; transition: 2
##
## #### Flexible Survival Model (w/o covs): Weibull (AFT); transition: 3
##
## #### Flexible Survival Model (w/o covs): Weibull (PH); transition: 3
##
## #### Flexible Survival Model (w/o covs): Gompertz; transition: 3
##
## #### Flexible Survival Model (w/o covs): Log-logistic; transition: 3
##
## #### Flexible Survival Model (w/o covs): Gamma; transition: 3
##
## #### Flexible Survival Model (w/o covs): Generalized gamma; transition: 3
##
## #### Flexible Survival Model (w/o covs): Lognormal; transition: 3
##
## #### Flexible Survival Model (w/o covs): Exponential; transition: 3
##
## #### Flexible Survival Model (w/o covs): Generalized F; transition: 3
##
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#Calculate error
haz_int_only1_binned<-
haz_int_only %>%
dplyr::mutate(time=ifelse(time<.05,.05,time)) %>%
dplyr::group_by(trans) %>%
dplyr::mutate(x_binned = cut(time,
breaks= seq(0,max(haz_int_only$time),.05))) %>%
dplyr::ungroup() %>%
dplyr::group_by(trans, dist, x_binned) %>%
dplyr::summarise(mean_time=mean(time,na.rm=T),mean_est=mean(est,na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::select(-dist)
## `summarise()` regrouping output by 'trans', 'dist' (override with `.groups` argument)
#180 rows vs. 303 in haz int only
fitted_flexsurvreg2a_binned<-
fitted_flexsurvreg2a[,c("dist","trans","time","est")] %>%
dplyr::filter(time<=max(haz_int_only$time)) %>%
dplyr::mutate(time=ifelse(time<.05,.05,time)) %>%
dplyr::group_by(trans, dist) %>%
dplyr::mutate(x_binned = cut(time,
breaks= seq(0,max(haz_int_only$time),.05))) %>%
dplyr::ungroup() %>%
dplyr::group_by(trans, dist, x_binned) %>%
dplyr::summarise(mean_time=mean(time,na.rm=T),mean_est=mean(est,na.rm=T)) %>%
dplyr::ungroup()
## `summarise()` regrouping output by 'trans', 'dist' (override with `.groups` argument)
fitted_flexsurvreg2a_binned_mix<-
fitted_flexsurvreg2a_binned %>%
dplyr::left_join(haz_int_only1_binned, by=c("trans", "x_binned")) %>%
dplyr::select(-mean_time.y,-mean_time.x) %>%
dplyr::rename("mean_est_flexsurv"="mean_est.x","mean_est_cumhaz"="mean_est.y")
db_for_apply_rmse_a<-
cbind.data.frame(
dist=rep(c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential", "Generalized F"),3),
trans=rep(c(1:3),each=9))
rmse_comp_fits_2a<- data.frame()
for(i in 1:nrow(db_for_apply_rmse_a)){
rmse<- Metrics::rmse(subset(fitted_flexsurvreg2a_binned_mix[complete.cases(fitted_flexsurvreg2a_binned_mix),],
dist==db_for_apply_rmse_a[i,"dist"] &
trans==db_for_apply_rmse_a[i,"trans"])$mean_est_flexsurv,
subset(fitted_flexsurvreg2a_binned_mix[complete.cases(fitted_flexsurvreg2a_binned_mix),],
dist==db_for_apply_rmse_a[i,"dist"] &
trans==db_for_apply_rmse_a[i,"trans"])$mean_est_cumhaz)
rmse_comp_fits_2a<- rbind(rmse_comp_fits_2a,cbind(dist=db_for_apply_rmse_a[i,"dist"],
trans=db_for_apply_rmse_a[i,"trans"],
rmse=rmse))
}
rmse_comp_fits_2a<-
rmse_comp_fits_2a %>%
dplyr::mutate(rmse=round(as.numeric(rmse),4)) %>%
dplyr::arrange(trans,rmse)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
haz <- rbind(haz_int_only, fitted_flexsurvreg2a)
#brown burlywood3 blueviolet blue4 cadetblue4
haz_plot_int_only<-
haz %>%
dplyr::mutate(dist=factor(dist,levels=c("Kernel density",dists_wo_covs$formal))) %>%
ggplot()+
geom_line(aes(time, est, color=dist),size=1)+
#geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.5, linetype=0) +
scale_color_manual(name="Distributions", values = c("black","#f54b96","#00e9b1","#69b763",
"#166000","#b27ff9","#fa863b","#013eab","#a7aa48","#b34b40","darkred"))+
#c("black",brewer.pal(n = 9, name = 'Paired')))+
#c("#112A60","#085754","#D3A347","#4F3C91","red","#112A60","#085754","#8F630D","#251363")) +
facet_wrap(~trans,labeller = labeller(trans = transition_label))+
sjPlot::theme_sjplot2()+
theme(legend.position="bottom",
strip.background = element_rect(fill = "white", colour = "white"))+
scale_x_continuous(breaks = seq(1, 15, 2))+
#theme(axis.text.x = element_blank(),
# panel.grid.major = element_blank(),
# panel.grid.minor = element_blank()) +
labs(y="Hazard",x="Time (years)")
fit_plot_int_only<-
fit_flexsurvreg2a %>%
melt(id.vars=c("dist","trans","covs","npars","pars")) %>%
dplyr::filter(variable!="Llik") %>%
dplyr::mutate(dist=factor(dist, levels= dists_wo_covs$formal),
value=as.numeric(value)) %>%
ggplot(aes(dist, value, fill=variable))+
geom_bar(stat= "identity")+
#geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.5, linetype=0) +
scale_fill_manual(name="Indices", values = c("gray30","gray70"))+
#c("#112A60","#085754","#D3A347","#4F3C91","red","#112A60","#085754","#8F630D","#251363")) +
facet_wrap(~trans,labeller = labeller(trans = transition_label))+
sjPlot::theme_sjplot2()+
theme(legend.position="bottom",
axis.text.x= element_text(angle=45, vjust=0.5),
strip.background = element_blank(),
strip.text.x = element_blank(),
axis.title.x=element_blank())+
#theme(axis.text.x = element_blank(),
# panel.grid.major = element_blank(),
# panel.grid.minor = element_blank())+
labs(y="Value")
haz_plot_int_only
if(no_mostrar==1){
jpeg("C:/Users/andre/Desktop/SUD_CL/eso3.jpg", height=14, width= 10, res= 96, units = "in")
haz_plot_int_only
dev.off()
}
We looked over estimations and confidence intervals (made by resamples of 10,000 iterations), compared to the mentioned smooth estimate of the hazard function for censored data. The main difference is these models contain covariates of interest for the study. To extrapolate confidence intervals, we defined a different model for the third transition, including the time of arrival to the intermediate state.
options(warn=-1)
kernel_haz_est2a<-list()
kernel_haz_est2b<-list()
kernel_haz2a<-list()
kernel_haz2b<-list()
for (i in 1:n_trans){
library("muhaz")
kernel_haz_est2a[[i]] <- muhaz(ms_CONS_C1_SEP_2020_women_imputed_exp[which(ms_CONS_C1_SEP_2020_women_imputed_exp$trans==i &
ms_CONS_C1_SEP_2020_women_imputed_exp$tipo_de_programa_2==1),"time"],
ms_CONS_C1_SEP_2020_women_imputed_exp[which(ms_CONS_C1_SEP_2020_women_imputed_exp$trans==i &
ms_CONS_C1_SEP_2020_women_imputed_exp$tipo_de_programa_2==1),"status"])
kernel_haz2a[[i]] <- data.table(time = kernel_haz_est2a[[i]]$est.grid,
est = kernel_haz_est2a[[i]]$haz.est,
dist = "Kernel density")
kernel_haz_est2b[[i]] <- muhaz(ms_CONS_C1_SEP_2020_women_imputed_exp[which(ms_CONS_C1_SEP_2020_women_imputed_exp$trans==i &
ms_CONS_C1_SEP_2020_women_imputed_exp$tipo_de_programa_2==0),"time"],
ms_CONS_C1_SEP_2020_women_imputed_exp[which(ms_CONS_C1_SEP_2020_women_imputed_exp$trans==i &
ms_CONS_C1_SEP_2020_women_imputed_exp$tipo_de_programa_2==0),"status"])
kernel_haz2b[[i]] <- data.table(time = kernel_haz_est2b[[i]]$est.grid,
est = kernel_haz_est2b[[i]]$haz.est,
dist = "Kernel density")
}
haz_int_only2<-
rbind(cbind(trans=rep(1:n_trans,each=nrow(kernel_haz2a[[i]])),
tipo_programa=rep(1,nrow(kernel_haz2a[[i]])),
rbindlist(kernel_haz2a)),
cbind(trans=rep(1:n_trans,each=nrow(kernel_haz2b[[i]])),
tipo_programa=rep(0,nrow(kernel_haz2b[[i]])),
rbindlist(kernel_haz2b)))
# Database to contrast adjustments
newdat2 <- data.table::data.table(tipo_de_programa_2= factor(c(rep(1,1),rep(0,1))),
#comp_status= factor(rep(c("Therapeutic discharge","Discharge without clinical advice"),2)),
edad_al_ing_grupos= factor(rep("50+",2)),
escolaridad_rec= factor(rep("1-More than high school",2)),
sus_principal_mod= factor(rep("Marijuana",2)),
freq_cons_sus_prin= factor(rep("2 to 3 days a week",2)),
compromiso_biopsicosocial= factor(rep("1-Mild",2)),
tenencia_de_la_vivienda_mod= factor(rep("Owner/Transferred dwellings/Pays Dividends",2)),
num_otras_sus_mod= factor(rep("No additional substance",2)),
numero_de_hijos_mod_rec= factor(rep("No",2)),
tipo_de_plan_res= factor(rep("Outpatient",2)))
newdat2_t3 <- data.table::data.table(tipo_de_programa_2= factor(c(rep(1,1),rep(0,1))),
#comp_status= factor(rep(c("Therapeutic discharge","Discharge without clinical advice"),2)),
edad_al_ing_grupos= factor(rep("50+",2)),
escolaridad_rec= factor(rep("1-More than high school",2)),
sus_principal_mod= factor(rep("Marijuana",2)),
freq_cons_sus_prin= factor(rep("2 to 3 days a week",2)),
compromiso_biopsicosocial= factor(rep("1-Mild",2)),
tenencia_de_la_vivienda_mod= factor(rep("Owner/Transferred dwellings/Pays Dividends",2)),
num_otras_sus_mod= factor(rep("No additional substance",2)),
numero_de_hijos_mod_rec= factor(rep("No",2)),
tipo_de_plan_res= factor(rep("Outpatient",2)),
arrival=0)
## W COVS
fitted_flexsurvreg2b<-data.frame()
fit_flexsurvreg2b<-data.frame()
for (y in 1:n_trans){
for (x in 1:nrow(dists_w_covs)){
cat(paste0("#### Flexible Survival Model (w/ covs): ",
dists_w_covs[x,"formal"], "; transition: ",y, "\n \n"))
#Return fitted survival, cumulative hazard or hazard at a series of times from a fitted model
#
mod_flexsurv<-paste0(dists_w_covs[x,"covs"],dists_w_covs[x,"model"])
#FITTED LINES
#Lines
if(y==n_trans){
est_by_time<- data.table::data.table(summary(get(mod_flexsurv)[[y]],newdata=newdat2_t3,t=newtime2,B=10000,type="hazard",tidy=T))} else{
est_by_time<- data.table::data.table(cbind.data.frame(summary(get(mod_flexsurv)[[y]],newdata=newdat2,t=newtime2,B=10000,type="hazard",tidy=T),arrival=rep(0,)))
}
#"survival" for survival probabilities.
#"cumhaz" for cumulative hazards.
#"hazard" for hazards.
#"rmst" for restricted mean survival.
#"mean" for mean survival.
#"median" for median survival (alternative to type="quantile" with quantiles=0.5).
#"quantile" for quantiles of the survival time distribution.
#"link" for the fitted value of the location parameter (i.e. the "linear predictor")
#dataframe
fitted_flexsurvreg2b<-rbind.data.frame(fitted_flexsurvreg2b,
cbind.data.frame(dist= rep(dists_w_covs[x,"formal"],),
hr= round(exp(coef(get(mod_flexsurv)[[y]])["factor(tipo_de_programa_2)1"]),3),
trans=rep(y,),
est_by_time))
#
fit_flexsurvreg2b<-rbind(fit_flexsurvreg2b,
cbind(dist= dists_w_covs[x,"formal"],
trans=y,
covs="w/ covs",
AIC= get(mod_flexsurv)[[y]]$AIC,
Llik= get(mod_flexsurv)[[y]]$loglik,
npars= get(mod_flexsurv)[[y]]$npars,
pars= get(mod_flexsurv)[[y]]$AIC/2 + get(mod_flexsurv)[[y]]$loglik,
BIC= get(mod_flexsurv)[[y]]$loglik+ log(get(mod_flexsurv)[[y]]$N)* (get(mod_flexsurv)[[y]]$AIC/2 + get(mod_flexsurv)[[y]]$loglik)
#the BIC may not be appropriate if none of the candidate models are considered to be close to the ‘true’ model.
)
)
}
}
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 1
##
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 1
##
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 1
##
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 1
##
## #### Flexible Survival Model (w/ covs): Gamma; transition: 1
##
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 1
##
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 1
##
## #### Flexible Survival Model (w/ covs): Exponential; transition: 1
##
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 1
##
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 2
##
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 2
##
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 2
##
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 2
##
## #### Flexible Survival Model (w/ covs): Gamma; transition: 2
##
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 2
##
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 2
##
## #### Flexible Survival Model (w/ covs): Exponential; transition: 2
##
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 2
##
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 3
##
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 3
##
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 3
##
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 3
##
## #### Flexible Survival Model (w/ covs): Gamma; transition: 3
##
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 3
##
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 3
##
## #### Flexible Survival Model (w/ covs): Exponential; transition: 3
##
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 3
##
tipo_programa_label<- c(`0`="General Population",`1`="Women specific")
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
if(no_mostrar==1){
fitted_flexsurvreg2b %>%
dplyr::group_by(trans) %>%
summarise(mean(ucl,na.rm=T))
}
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#Calculate error
haz_int_only2_binned<-
haz_int_only2 %>%
dplyr::mutate(time=ifelse(time<.5,.5,time)) %>%
dplyr::group_by(trans, tipo_programa) %>%
dplyr::mutate(x_binned = cut(time,
breaks= seq(0,max(haz_int_only2$time),.5))) %>%
dplyr::ungroup() %>%
dplyr::group_by(trans, tipo_programa, dist, x_binned) %>%
dplyr::summarise(mean_time=mean(time,na.rm=T),mean_est=mean(est,na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::rename("tipo_de_programa_2"="tipo_programa") %>%
dplyr::mutate(tipo_de_programa_2=factor(tipo_de_programa_2))%>%
dplyr::select(-dist)
## `summarise()` regrouping output by 'trans', 'tipo_programa', 'dist' (override with `.groups` argument)
fitted_flexsurvreg2b_binned<-
fitted_flexsurvreg2b[,c("dist","trans","time","est","factor(tipo_de_programa_2)")] %>%
dplyr::rename("tipo_de_programa_2"="factor(tipo_de_programa_2)") %>%
dplyr::filter(time<=max(haz_int_only2$time)) %>%
dplyr::mutate(time=ifelse(time<.5,.5,time)) %>%
dplyr::group_by(trans, dist, tipo_de_programa_2) %>%
dplyr::mutate(x_binned = cut(time,
breaks= seq(0,max(haz_int_only2$time),.5))) %>%
dplyr::ungroup() %>%
dplyr::group_by(trans, dist, tipo_de_programa_2, x_binned) %>%
dplyr::summarise(mean_time=mean(time,na.rm=T),mean_est=mean(est,na.rm=T)) %>%
dplyr::ungroup()
## `summarise()` regrouping output by 'trans', 'dist', 'tipo_de_programa_2' (override with `.groups` argument)
fitted_flexsurvreg2b_binned_mix<-
fitted_flexsurvreg2b_binned %>%
dplyr::left_join(haz_int_only2_binned, by=c("trans","tipo_de_programa_2", "x_binned")) %>%
dplyr::select(-mean_time.y,-mean_time.x) %>%
dplyr::rename("mean_est_flexsurv"="mean_est.x","mean_est_cumhaz"="mean_est.y")
db_for_apply_rmse<-
cbind.data.frame(tipo_prog=rep(c("0","1"),each=9,3),
dist=rep(c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential", "Generalized F"),2*3),
trans=rep(c(1:3),each=9*2))
rmse_comp_fits_2b<- data.frame()
for(i in 1:nrow(db_for_apply_rmse)){
rmse<- Metrics::rmse(subset(fitted_flexsurvreg2b_binned_mix[complete.cases(fitted_flexsurvreg2b_binned_mix),],
tipo_de_programa_2==db_for_apply_rmse[i,"tipo_prog"] &
dist==db_for_apply_rmse[i,"dist"] &
trans==db_for_apply_rmse[i,"trans"])$mean_est_flexsurv,
subset(fitted_flexsurvreg2b_binned_mix[complete.cases(fitted_flexsurvreg2b_binned_mix),],
tipo_de_programa_2==db_for_apply_rmse[i,"tipo_prog"] &
dist==db_for_apply_rmse[i,"dist"] &
trans==db_for_apply_rmse[i,"trans"])$mean_est_cumhaz)
rmse_comp_fits_2b<- rbind(rmse_comp_fits_2b,cbind(dist=db_for_apply_rmse[i,"dist"],
tipo_prog=db_for_apply_rmse[i,"tipo_prog"],
trans=db_for_apply_rmse[i,"trans"],
rmse=rmse))
}
rmse_comp_fits_2b<-
rmse_comp_fits_2b %>%
tidyr::pivot_wider(names_from = tipo_prog, values_from = rmse) %>%
dplyr::rename("gp"="0","we"="1") %>%
dplyr::mutate(gp=as.numeric(gp),we=as.numeric(we)) %>%
dplyr::mutate(mean_rmse=rowSums(.[3:4])/2) %>%
dplyr::arrange(trans,dist, mean_rmse)%>%
dplyr::mutate(mean_rmse=round(mean_rmse,4))
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#http://colorschemedesigner.com/csd-3.5/#
#https://www.r-graph-gallery.com/42-colors-names.html
plot_fitted_flexsurvreg2b<-
fitted_flexsurvreg2b[,c("dist","trans","time","est","lcl","ucl","factor(tipo_de_programa_2)")] %>%
rbind(cbind(dplyr::rename(haz_int_only2, "factor(tipo_de_programa_2)"="tipo_programa"),
"lcl"=haz_int_only2$est,"ucl"=haz_int_only2$est)) %>%
dplyr::rename("tipo_de_programa_2"="factor(tipo_de_programa_2)") %>%
dplyr::mutate(tipo_de_programa_2=factor(tipo_de_programa_2),
dist=factor(dist, levels=c("Kernel density",dists_w_covs$formal)),
trans=factor(trans)) %>%
#dplyr::group_by(tipo_de_programa_2,dist,time,trans) %>% summarise(n=n()) %>%
ggplot()+
geom_line(aes(time, est, color=dist),size=1)+
geom_ribbon(aes(x = time, ymin = lcl, ymax = ucl, fill = dist), alpha = 0.2)+
#geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.5, linetype=0) +
scale_color_manual(name="Distributions", values = c("red","gray20","darkolivegreen","#D3A347","#4F3C91","coral4","#085754","lightpink","turquoise","navyblue")) +
scale_fill_manual(name="Distributions", values = c("red","gray20","darkolivegreen","#D3A347","#4F3C91","coral4","#085754","lightpink","turquoise","navyblue")) +
facet_wrap(tipo_de_programa_2~trans,labeller = labeller(trans = transition_label, tipo_de_programa_2=tipo_programa_label))+
sjPlot::theme_sjplot2()+
#ylim(0,.3)+
scale_x_continuous(breaks = seq(0, 15, 2))+
theme(legend.position="bottom",
strip.background = element_rect(fill = "white", colour = "white"))+
labs(y="Hazard",x="Time (years)",caption="Note. Kernel density, stratified by type of program")
plot_fitted_flexsurvreg2b
## http://www.statistica.it/gianluca/teaching/r-hta-workshop/2019/Bullement.pdf
tiempo_despues_fits2<-Sys.time()
paste0("Time in process: ");tiempo_despues_fits2-tiempo_antes_fits2
## [1] "Time in process: "
## Time difference of 2.391648 mins
#13 minutos aprox. en DELL
options(warn=0)
if(no_mostrar==1){
jpeg("C:/Users/andre/Desktop/SUD_CL/eso4.jpg", height=10, width= 10, res= 96, units = "in")
plot_fitted_flexsurvreg2b
dev.off()
}
4 states
#Micha Mandel (2013) Simulation-Based Confidence Intervals for Functions With Complicated Derivatives, The American Statistician, 67:2, 76-81, DOI: 10.1080/00031305.2013.783880
# https://rpubs.com/martina_morris/testhazard
tiempo_antes_fits2<-Sys.time()
newtime2 = seq(from=1/24, to= 15, by=1/12)
kernel_haz_est_4s<-list()
kernel_haz_4s<-list()
for (i in 1:n_trans2){
library("muhaz")
kernel_haz_est_4s[[i]] <- muhaz(ms2_CONS_C1_SEP_2020_women_imputed_exp[which(ms2_CONS_C1_SEP_2020_women_imputed_exp$trans==i),"time"],
ms2_CONS_C1_SEP_2020_women_imputed_exp[which(ms2_CONS_C1_SEP_2020_women_imputed_exp$trans==i),"status"])
kernel_haz_4s[[i]] <- data.table(time = kernel_haz_est_4s[[i]]$est.grid,
est = kernel_haz_est_4s[[i]]$haz.est,
dist = "Kernel density")
}
#, n.est.grid=length(unique(ms2_CONS_C1_SEP_2020_women_imputed_exp[which(ms2_CONS_C1_SEP_2020_women_imputed_exp$trans==i),"time"]))/300
haz_int_only_4s<-
cbind(trans=rep(1:n_trans2,each=nrow(kernel_haz_4s[[i]])),
rbindlist(kernel_haz_4s))
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#create list of survreg for different transitions
#<- flexsurvreg_list(fit1, fit2, fit3)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
dists_wo_covs_4s <- cbind.data.frame(covs=c(rep("fits_",9)),
formal=rep(c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential", "Generalized F"),1),
model=rep(c("wei2", "weiph2", "gomp2", "llogis2", "gam2","ggam2", "logn2", "exp2", "genf2"),1))
dists_w_covs_4s <- cbind.data.frame(covs=c(rep("fits_c_",9)),
formal=rep(c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential", "Generalized F"),1),
model=rep(c("wei2", "weiph2", "gomp2", "llogis2", "gam2","ggam2", "logn2", "exp2", "genf2"),1))
#WO COVS
fitted_flexsurvreg2a_4s<-data.frame()
fit_flexsurvreg2a_4s<-data.frame()
for (y in 1:n_trans2){
for (x in 1:nrow(dists_wo_covs_4s)){
cat(paste0("#### Flexible Survival Model (w/o covs): ",
dists_wo_covs_4s[x,"formal"], "; transition: ",y, "\n \n"))
#Return fitted survival, cumulative hazard or hazard at a series of times from a fitted model
#
mod_flexsurv2<-paste0(dists_wo_covs_4s[x,"covs"],dists_wo_covs_4s[x,"model"])
#FITTED LINES
#Lines
est_by_time2<-
data.table::data.table(summary(get(mod_flexsurv2)[[y]], ci = F, t= newtime2, B=10000,type = "hazard", tidy=T))
#dataframe
fitted_flexsurvreg2a_4s<-rbind.data.frame(fitted_flexsurvreg2a_4s,
cbind.data.frame(dist= rep(dists_wo_covs_4s[x,"formal"],),
trans=rep(y,),
est_by_time2))
#
fit_flexsurvreg2a_4s<-rbind(fit_flexsurvreg2a_4s,
cbind(dist= dists_wo_covs_4s[x,"formal"], trans=y,
covs="w/o covs",
AIC= get(mod_flexsurv2)[[y]]$AIC,
Llik= get(mod_flexsurv2)[[y]]$loglik,
npars= get(mod_flexsurv2)[[y]]$npars,
pars= get(mod_flexsurv2)[[y]]$AIC/2 + get(mod_flexsurv2)[[y]]$loglik,
BIC= get(mod_flexsurv2)[[y]]$loglik+ log(get(mod_flexsurv2)[[y]]$N)* (get(mod_flexsurv2)[[y]]$AIC/2 + get(mod_flexsurv2)[[y]]$loglik)
#the BIC may not be appropriate if none of the candidate models are considered to be close to the ‘true’ model.
)
)
}
}
## #### Flexible Survival Model (w/o covs): Weibull (AFT); transition: 1
##
## #### Flexible Survival Model (w/o covs): Weibull (PH); transition: 1
##
## #### Flexible Survival Model (w/o covs): Gompertz; transition: 1
##
## #### Flexible Survival Model (w/o covs): Log-logistic; transition: 1
##
## #### Flexible Survival Model (w/o covs): Gamma; transition: 1
##
## #### Flexible Survival Model (w/o covs): Generalized gamma; transition: 1
##
## #### Flexible Survival Model (w/o covs): Lognormal; transition: 1
##
## #### Flexible Survival Model (w/o covs): Exponential; transition: 1
##
## #### Flexible Survival Model (w/o covs): Generalized F; transition: 1
##
## #### Flexible Survival Model (w/o covs): Weibull (AFT); transition: 2
##
## #### Flexible Survival Model (w/o covs): Weibull (PH); transition: 2
##
## #### Flexible Survival Model (w/o covs): Gompertz; transition: 2
##
## #### Flexible Survival Model (w/o covs): Log-logistic; transition: 2
##
## #### Flexible Survival Model (w/o covs): Gamma; transition: 2
##
## #### Flexible Survival Model (w/o covs): Generalized gamma; transition: 2
##
## #### Flexible Survival Model (w/o covs): Lognormal; transition: 2
##
## #### Flexible Survival Model (w/o covs): Exponential; transition: 2
##
## #### Flexible Survival Model (w/o covs): Generalized F; transition: 2
##
## #### Flexible Survival Model (w/o covs): Weibull (AFT); transition: 3
##
## #### Flexible Survival Model (w/o covs): Weibull (PH); transition: 3
##
## #### Flexible Survival Model (w/o covs): Gompertz; transition: 3
##
## #### Flexible Survival Model (w/o covs): Log-logistic; transition: 3
##
## #### Flexible Survival Model (w/o covs): Gamma; transition: 3
##
## #### Flexible Survival Model (w/o covs): Generalized gamma; transition: 3
##
## #### Flexible Survival Model (w/o covs): Lognormal; transition: 3
##
## #### Flexible Survival Model (w/o covs): Exponential; transition: 3
##
## #### Flexible Survival Model (w/o covs): Generalized F; transition: 3
##
## #### Flexible Survival Model (w/o covs): Weibull (AFT); transition: 4
##
## #### Flexible Survival Model (w/o covs): Weibull (PH); transition: 4
##
## #### Flexible Survival Model (w/o covs): Gompertz; transition: 4
##
## #### Flexible Survival Model (w/o covs): Log-logistic; transition: 4
##
## #### Flexible Survival Model (w/o covs): Gamma; transition: 4
##
## #### Flexible Survival Model (w/o covs): Generalized gamma; transition: 4
##
## #### Flexible Survival Model (w/o covs): Lognormal; transition: 4
##
## #### Flexible Survival Model (w/o covs): Exponential; transition: 4
##
## #### Flexible Survival Model (w/o covs): Generalized F; transition: 4
##
## #### Flexible Survival Model (w/o covs): Weibull (AFT); transition: 5
##
## #### Flexible Survival Model (w/o covs): Weibull (PH); transition: 5
##
## #### Flexible Survival Model (w/o covs): Gompertz; transition: 5
##
## #### Flexible Survival Model (w/o covs): Log-logistic; transition: 5
##
## #### Flexible Survival Model (w/o covs): Gamma; transition: 5
##
## #### Flexible Survival Model (w/o covs): Generalized gamma; transition: 5
##
## #### Flexible Survival Model (w/o covs): Lognormal; transition: 5
##
## #### Flexible Survival Model (w/o covs): Exponential; transition: 5
##
## #### Flexible Survival Model (w/o covs): Generalized F; transition: 5
##
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#Calculate error
haz_int_only1_4s_binned<-
haz_int_only_4s %>%
dplyr::mutate(time=ifelse(time<.05,.05,time)) %>%
dplyr::group_by(trans) %>%
dplyr::mutate(x_binned = cut(time,
breaks= seq(0,max(haz_int_only_4s$time),.05))) %>%
dplyr::ungroup() %>%
dplyr::group_by(trans, dist, x_binned) %>%
dplyr::summarise(mean_time=mean(time,na.rm=T),mean_est=mean(est,na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::select(-dist)
## `summarise()` regrouping output by 'trans', 'dist' (override with `.groups` argument)
#180 rows vs. 303 in haz int only
fitted_flexsurvreg2a_4s_binned<-
fitted_flexsurvreg2a_4s[,c("dist","trans","time","est")] %>%
dplyr::filter(time<=max(haz_int_only_4s$time)) %>%
dplyr::mutate(time=ifelse(time<.05,.05,time)) %>%
dplyr::group_by(trans, dist) %>%
dplyr::mutate(x_binned = cut(time,
breaks= seq(0,max(haz_int_only_4s$time),.05))) %>%
dplyr::ungroup() %>%
dplyr::group_by(trans, dist, x_binned) %>%
dplyr::summarise(mean_time=mean(time,na.rm=T),mean_est=mean(est,na.rm=T)) %>%
dplyr::ungroup()
## `summarise()` regrouping output by 'trans', 'dist' (override with `.groups` argument)
fitted_flexsurvreg2a_4s_binned_mix<-
fitted_flexsurvreg2a_4s_binned %>%
dplyr::left_join(haz_int_only1_4s_binned, by=c("trans", "x_binned")) %>%
dplyr::select(-mean_time.y,-mean_time.x) %>%
dplyr::rename("mean_est_flexsurv"="mean_est.x","mean_est_cumhaz"="mean_est.y")
db_for_apply_rmse_c<-
cbind.data.frame(
dist=rep(c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential", "Generalized F"),5),
trans=rep(c(1:5),each=9))
rmse_comp_fits_2c<- data.frame()
for(i in 1:nrow(db_for_apply_rmse_c)){
rmse<- Metrics::rmse(subset(fitted_flexsurvreg2a_4s_binned_mix[complete.cases(fitted_flexsurvreg2a_4s_binned_mix),],
dist==db_for_apply_rmse_c[i,"dist"] &
trans==db_for_apply_rmse_c[i,"trans"])$mean_est_flexsurv,
subset(fitted_flexsurvreg2a_4s_binned_mix[complete.cases(fitted_flexsurvreg2a_4s_binned_mix),],
dist==db_for_apply_rmse_c[i,"dist"] &
trans==db_for_apply_rmse_c[i,"trans"])$mean_est_cumhaz)
rmse_comp_fits_2c<- rbind(rmse_comp_fits_2c,cbind(dist=db_for_apply_rmse_c[i,"dist"],
trans=db_for_apply_rmse_c[i,"trans"],
rmse=rmse))
}
rmse_comp_fits_2c<-
rmse_comp_fits_2c %>%
dplyr::mutate(rmse=round(as.numeric(rmse),4)) %>%
dplyr::arrange(trans,rmse)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
haz_4s <- rbind(haz_int_only_4s, fitted_flexsurvreg2a_4s)
#brown burlywood3 blueviolet blue4 cadetblue4
haz_plot_4s_int_only<-
haz_4s %>%
dplyr::mutate(dist=factor(dist,levels=c("Kernel density",dists_wo_covs_4s$formal))) %>%
ggplot()+
geom_line(aes(time, est, color=dist),size=1)+
#geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.5, linetype=0) +
scale_color_manual(name="Distributions", values = c("black","#f54b96","#00e9b1","#69b763",
"#166000","#b27ff9","#fa863b","#013eab","#a7aa48","#b34b40","darkred"))+
#c("black",brewer.pal(n = 9, name = 'Paired')))+
#c("#112A60","#085754","#D3A347","#4F3C91","red","#112A60","#085754","#8F630D","#251363")) +
facet_wrap(.~trans,labeller = labeller(trans = transition_label_4s), ncol=1, scales = "free_y")+
sjPlot::theme_sjplot2()+
theme(legend.position="bottom",
strip.background = element_rect(fill = "white", colour = "white"))+
scale_x_continuous(breaks = seq(1, 15, 2))+
#ylim(0,1.25)+
#theme(axis.text.x = element_blank(),
# panel.grid.major = element_blank(),
# panel.grid.minor = element_blank()) +
labs(y="Hazard",x="Time (years)")
fit_plot_int_4s_only<-
fit_flexsurvreg2a_4s %>%
melt(id.vars=c("dist","trans","covs","npars","pars")) %>%
dplyr::filter(variable!="Llik") %>%
dplyr::mutate(dist=factor(dist, levels= dists_wo_covs_4s$formal),
value=as.numeric(value)) %>%
ggplot(aes(dist, value, fill=variable))+
geom_bar(stat= "identity")+
#geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.5, linetype=0) +
scale_fill_manual(name="Indices", values = c("gray30","gray70"))+
#c("#112A60","#085754","#D3A347","#4F3C91","red","#112A60","#085754","#8F630D","#251363")) +
facet_wrap(~trans,labeller = labeller(trans = transition_label_4s), ncol=1)+
sjPlot::theme_sjplot2()+
theme(legend.position="bottom",
axis.text.x= element_text(angle=45, vjust=0.5),
strip.background = element_blank(),
strip.text.x = element_blank(),
axis.title.x=element_blank())+
#theme(axis.text.x = element_blank(),
# panel.grid.major = element_blank(),
# panel.grid.minor = element_blank())+
labs(y="Value")
#grid.arrange(haz_plot_4s_int_only,fit_plot_int_4s_only, heights=c(2,1.5))
haz_plot_4s_int_only
if(no_mostrar==1){
jpeg("C:/Users/andre/Desktop/SUD_CL/eso9.jpg", height=17, width= 10, res= 96, units = "in")
haz_plot_4s_int_only
#grid.arrange(haz_plot_4s_int_only,fit_plot_int_4s_only, heights=c(2,1.5))
dev.off()
}
We looked over estimations and confidence intervals (made by resamples of 10,000 iterations), compared to the mentioned smooth estimate of the hazard function for censored data. The main difference is these models contain covariates of interest for the study. To extrapolate confidence intervals, we defined a different model for the fourth and fifth transition.
options(warn=-1)
tiempo_antes_fits22<- Sys.time()
kernel_haz_est2a_4s<-list()
kernel_haz_est2b_4s<-list()
kernel_haz2a_4s<-list()
kernel_haz2b_4s<-list()
for (i in 1:n_trans2){
library("muhaz")
kernel_haz_est2a_4s[[i]] <- muhaz(ms2_CONS_C1_SEP_2020_women_imputed_exp[which(ms2_CONS_C1_SEP_2020_women_imputed_exp$trans==i &
ms2_CONS_C1_SEP_2020_women_imputed_exp$tipo_de_programa_2==1),"time"],
ms2_CONS_C1_SEP_2020_women_imputed_exp[which(ms2_CONS_C1_SEP_2020_women_imputed_exp$trans==i &
ms2_CONS_C1_SEP_2020_women_imputed_exp$tipo_de_programa_2==1),"status"])
kernel_haz2a_4s[[i]] <- data.table(time = kernel_haz_est2a_4s[[i]]$est.grid,
est = kernel_haz_est2a_4s[[i]]$haz.est,
dist = "Kernel density")
kernel_haz_est2b_4s[[i]] <- muhaz(ms2_CONS_C1_SEP_2020_women_imputed_exp[which(ms2_CONS_C1_SEP_2020_women_imputed_exp$trans==i &
ms2_CONS_C1_SEP_2020_women_imputed_exp$tipo_de_programa_2==0),"time"],
ms2_CONS_C1_SEP_2020_women_imputed_exp[which(ms2_CONS_C1_SEP_2020_women_imputed_exp$trans==i &
ms2_CONS_C1_SEP_2020_women_imputed_exp$tipo_de_programa_2==0),"status"])
kernel_haz2b_4s[[i]] <- data.table(time = kernel_haz_est2b_4s[[i]]$est.grid,
est = kernel_haz_est2b_4s[[i]]$haz.est,
dist = "Kernel density")
}
haz_int_only2_4s<-
rbind(cbind(trans=rep(1:n_trans2,each=nrow(kernel_haz2a_4s[[i]])),
tipo_programa=rep(1,nrow(kernel_haz2a_4s[[i]])),
rbindlist(kernel_haz2a_4s)),
cbind(trans=rep(1:n_trans2,each=nrow(kernel_haz2b_4s[[i]])),
tipo_programa=rep(0,nrow(kernel_haz2b_4s[[i]])),
rbindlist(kernel_haz2b_4s)))
# Database to contrast adjustments
newdat2_4s <- data.table::data.table(tipo_de_programa_2= factor(c(rep(1,1),rep(0,1))),
#comp_status= factor(rep(c("Therapeutic discharge","Discharge without clinical advice"),2)),
edad_al_ing_grupos= factor(rep("50+",2)),
escolaridad_rec= factor(rep("1-More than high school",2)),
sus_principal_mod= factor(rep("Marijuana",2)),
freq_cons_sus_prin= factor(rep("2 to 3 days a week",2)),
compromiso_biopsicosocial= factor(rep("1-Mild",2)),
tenencia_de_la_vivienda_mod= factor(rep("Owner/Transferred dwellings/Pays Dividends",2)),
num_otras_sus_mod= factor(rep("No additional substance",2)),
numero_de_hijos_mod_rec= factor(rep("No",2)),
tipo_de_plan_res= factor(rep("Outpatient",2)))
newdat2_t45 <- data.table::data.table(tipo_de_programa_2= factor(c(rep(1,1),rep(0,1))),
#comp_status= factor(rep(c("Therapeutic discharge","Discharge without clinical advice"),2)),
edad_al_ing_grupos= factor(rep("50+",2)),
escolaridad_rec= factor(rep("1-More than high school",2)),
sus_principal_mod= factor(rep("Marijuana",2)),
freq_cons_sus_prin= factor(rep("2 to 3 days a week",2)),
compromiso_biopsicosocial= factor(rep("1-Mild",2)),
tenencia_de_la_vivienda_mod= factor(rep("Owner/Transferred dwellings/Pays Dividends",2)),
num_otras_sus_mod= factor(rep("No additional substance",2)),
numero_de_hijos_mod_rec= factor(rep("No",2)),
tipo_de_plan_res= factor(rep("Outpatient",2)),
arrival=0)
## W COVS
fitted_flexsurvreg2b_4s<-data.frame()
fit_flexsurvreg2b_4s<-data.frame()
for (y in 1:n_trans2){
for (x in 1:nrow(dists_w_covs_4s)){
cat(paste0("#### Flexible Survival Model (w/ covs): ",
dists_w_covs[x,"formal"], "; transition: ",y, "\n \n"))
#Return fitted survival, cumulative hazard or hazard at a series of times from a fitted model
#
mod_flexsurv2<-paste0(dists_w_covs_4s[x,"covs"],dists_w_covs_4s[x,"model"])
#FITTED LINES
#Lines
if(y%in% c(4,5)){
est_by_time2<- data.table::data.table(summary(get(mod_flexsurv2)[[y]],newdata=newdat2_t45,t=newtime2,B=10000,type="hazard",tidy=T))} else{
est_by_time2<- data.table::data.table(cbind.data.frame(summary(get(mod_flexsurv2)[[y]],newdata=newdat2_4s,t=newtime2,B=10000,type="hazard",tidy=T),arrival=rep(0,)))
}
#"survival" for survival probabilities.
#"cumhaz" for cumulative hazards.
#"hazard" for hazards.
#"rmst" for restricted mean survival.
#"mean" for mean survival.
#"median" for median survival (alternative to type="quantile" with quantiles=0.5).
#"quantile" for quantiles of the survival time distribution.
#"link" for the fitted value of the location parameter (i.e. the "linear predictor")
#dataframe
fitted_flexsurvreg2b_4s<-rbind.data.frame(fitted_flexsurvreg2b_4s,
cbind.data.frame(dist= rep(dists_w_covs_4s[x,"formal"],),
hr= round(exp(coef(get(mod_flexsurv2)[[y]])["factor(tipo_de_programa_2)1"]),3),
trans=rep(y,),
est_by_time2))
#
fit_flexsurvreg2b_4s<-rbind(fit_flexsurvreg2b_4s,
cbind(dist= dists_w_covs_4s[x,"formal"],
trans=y,
covs="w/ covs",
AIC= get(mod_flexsurv2)[[y]]$AIC,
Llik= get(mod_flexsurv2)[[y]]$loglik,
npars= get(mod_flexsurv2)[[y]]$npars,
pars= get(mod_flexsurv2)[[y]]$AIC/2 + get(mod_flexsurv2)[[y]]$loglik,
BIC= get(mod_flexsurv2)[[y]]$loglik+ log(get(mod_flexsurv2)[[y]]$N)* (get(mod_flexsurv2)[[y]]$AIC/2 + get(mod_flexsurv2)[[y]]$loglik)
#the BIC may not be appropriate if none of the candidate models are considered to be close to the ‘true’ model.
)
)
}
}
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 1
##
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 1
##
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 1
##
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 1
##
## #### Flexible Survival Model (w/ covs): Gamma; transition: 1
##
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 1
##
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 1
##
## #### Flexible Survival Model (w/ covs): Exponential; transition: 1
##
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 1
##
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 2
##
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 2
##
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 2
##
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 2
##
## #### Flexible Survival Model (w/ covs): Gamma; transition: 2
##
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 2
##
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 2
##
## #### Flexible Survival Model (w/ covs): Exponential; transition: 2
##
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 2
##
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 3
##
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 3
##
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 3
##
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 3
##
## #### Flexible Survival Model (w/ covs): Gamma; transition: 3
##
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 3
##
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 3
##
## #### Flexible Survival Model (w/ covs): Exponential; transition: 3
##
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 3
##
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 4
##
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 4
##
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 4
##
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 4
##
## #### Flexible Survival Model (w/ covs): Gamma; transition: 4
##
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 4
##
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 4
##
## #### Flexible Survival Model (w/ covs): Exponential; transition: 4
##
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 4
##
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 5
##
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 5
##
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 5
##
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 5
##
## #### Flexible Survival Model (w/ covs): Gamma; transition: 5
##
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 5
##
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 5
##
## #### Flexible Survival Model (w/ covs): Exponential; transition: 5
##
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 5
##
#### Flexible Survival Model (w/ covs): Exponential; transition: 2
tipo_programa_label<- c(`0`="General Population",`1`="Women specific")
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
if(no_mostrar==1){
fitted_flexsurvreg2b_4s %>%
dplyr::group_by(trans) %>%
summarise(mean(ucl,na.rm=T))
}
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#Calculate error
haz_int_only2_binned<-
haz_int_only2_4s %>%
dplyr::mutate(time=ifelse(time<.5,.5,time)) %>%
dplyr::group_by(trans, tipo_programa) %>%
dplyr::mutate(x_binned = cut(time,
breaks= seq(0,max(haz_int_only2_4s$time),.5))) %>%
dplyr::ungroup() %>%
dplyr::group_by(trans, tipo_programa, dist, x_binned) %>%
dplyr::summarise(mean_time=mean(time,na.rm=T),mean_est=mean(est,na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::rename("tipo_de_programa_2"="tipo_programa") %>%
dplyr::mutate(tipo_de_programa_2=factor(tipo_de_programa_2))%>%
dplyr::select(-dist)
## `summarise()` regrouping output by 'trans', 'tipo_programa', 'dist' (override with `.groups` argument)
fitted_flexsurvreg2b_4s_binned<-
fitted_flexsurvreg2b_4s[,c("dist","trans","time","est","factor(tipo_de_programa_2)")] %>%
dplyr::rename("tipo_de_programa_2"="factor(tipo_de_programa_2)") %>%
dplyr::filter(time<=max(haz_int_only2_4s$time)) %>%
dplyr::mutate(time=ifelse(time<.5,.5,time)) %>%
dplyr::group_by(trans, dist, tipo_de_programa_2) %>%
dplyr::mutate(x_binned = cut(time,
breaks= seq(0,max(haz_int_only2_4s$time),.5))) %>%
dplyr::ungroup() %>%
dplyr::group_by(trans, dist, tipo_de_programa_2, x_binned) %>%
dplyr::summarise(mean_time=mean(time,na.rm=T),mean_est=mean(est,na.rm=T)) %>%
dplyr::ungroup()
## `summarise()` regrouping output by 'trans', 'dist', 'tipo_de_programa_2' (override with `.groups` argument)
fitted_flexsurvreg2b_4s_binned_mix<-
fitted_flexsurvreg2b_4s_binned %>%
dplyr::left_join(haz_int_only2_binned, by=c("trans","tipo_de_programa_2", "x_binned")) %>%
dplyr::select(-mean_time.y,-mean_time.x) %>%
dplyr::rename("mean_est_flexsurv"="mean_est.x","mean_est_cumhaz"="mean_est.y")
db_for_apply_rmse_d<-
cbind.data.frame(tipo_prog=rep(c("0","1"),each=9,5),
dist=rep(c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential", "Generalized F"),2*5),
trans=rep(c(1:5),each=9*2))
rmse_comp_fits_2b_4s<- data.frame()
for(i in 1:nrow(db_for_apply_rmse_d)){
rmse<- Metrics::rmse(subset(fitted_flexsurvreg2b_4s_binned_mix[complete.cases(fitted_flexsurvreg2b_4s_binned_mix),],
tipo_de_programa_2==db_for_apply_rmse_d[i,"tipo_prog"] &
dist==db_for_apply_rmse_d[i,"dist"] &
trans==db_for_apply_rmse_d[i,"trans"])$mean_est_flexsurv,
subset(fitted_flexsurvreg2b_4s_binned_mix[complete.cases(fitted_flexsurvreg2b_4s_binned_mix),],
tipo_de_programa_2==db_for_apply_rmse_d[i,"tipo_prog"] &
dist==db_for_apply_rmse_d[i,"dist"] &
trans==db_for_apply_rmse_d[i,"trans"])$mean_est_cumhaz)
rmse_comp_fits_2b_4s<- rbind(rmse_comp_fits_2b_4s,cbind(dist=db_for_apply_rmse_d[i,"dist"],
tipo_prog=db_for_apply_rmse_d[i,"tipo_prog"],
trans=db_for_apply_rmse_d[i,"trans"],
rmse=rmse))
}
rmse_comp_fits_2b_4s<-
rmse_comp_fits_2b_4s %>%
tidyr::pivot_wider(names_from = tipo_prog, values_from = rmse) %>%
dplyr::rename("gp"="0","we"="1") %>%
dplyr::mutate(gp=as.numeric(gp),we=as.numeric(we)) %>%
dplyr::mutate(mean_rmse=rowSums(.[3:4])/2) %>%
dplyr::arrange(trans,dist, mean_rmse)%>%
dplyr::mutate(mean_rmse=round(mean_rmse,4))
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#http://colorschemedesigner.com/csd-3.5/#
#https://www.r-graph-gallery.com/42-colors-names.html
plot_fitted_flexsurvreg2b_4s<-
fitted_flexsurvreg2b_4s[,c("dist","trans","time","est","lcl","ucl","factor(tipo_de_programa_2)")] %>%
rbind(cbind(dplyr::rename(haz_int_only2_4s, "factor(tipo_de_programa_2)"="tipo_programa"),
"lcl"=haz_int_only2_4s$est,"ucl"=haz_int_only2_4s$est)) %>%
dplyr::rename("tipo_de_programa_2"="factor(tipo_de_programa_2)") %>%
dplyr::mutate(tipo_de_programa_2=factor(tipo_de_programa_2),
dist=factor(dist, levels=c("Kernel density",dists_w_covs$formal)),
trans=factor(trans)) %>%
#dplyr::group_by(tipo_de_programa_2,dist,time,trans) %>% summarise(n=n()) %>%
ggplot()+
geom_line(aes(time, est, color=dist),size=1)+
geom_ribbon(aes(x = time, ymin = lcl, ymax = ucl, fill = dist), alpha = 0.2)+
#geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.5, linetype=0) +
scale_color_manual(name="Distributions", values = c("red","gray20","darkolivegreen","#D3A347","#4F3C91","coral4","#085754","lightpink","turquoise","greenyellow")) +
scale_fill_manual(name="Distributions", values = c("red","gray20","darkolivegreen","#D3A347","#4F3C91","coral4","#085754","lightpink","turquoise","greenyellow")) +
facet_wrap(tipo_de_programa_2~trans,labeller = labeller(trans = transition_label_4s, tipo_de_programa_2=tipo_programa_label),ncol=2, dir="v", scales="free_y")+
sjPlot::theme_sjplot2()+
#ylim(0,.3)+
scale_x_continuous(breaks = seq(0, 15, 2))+
theme(legend.position="bottom",
strip.background = element_rect(fill = "white", colour = "white"))+
labs(y="Hazard",x="Time (years)",caption="Note. Kernel density, stratified by type of program")
plot_fitted_flexsurvreg2b_4s
## http://www.statistica.it/gianluca/teaching/r-hta-workshop/2019/Bullement.pdf
tiempo_despues_fits22<-Sys.time()
paste0("Time in process: ");tiempo_despues_fits22-tiempo_antes_fits22
## [1] "Time in process: "
## Time difference of 3.624504 mins
#13 minutos aprox. en DELL
options(warn=0)
if(no_mostrar==1){
jpeg("C:/Users/andre/Desktop/SUD_CL/eso13.jpg", height=17, width= 10, res= 96, units = "in")
plot_fitted_flexsurvreg2b_4s
dev.off()
}