Cumulative Transition Hazards of Joint Models

Cumulative baseline hazard can be estimated non-parametrically through a Breslow estimator. Must take note that we are assuming a semi-markov process in these transitions. The cumulative hazards obtained through the times specified by 180 points.

# Semi-parametric models
#Since there are tied event times, we need to specify ties="breslow" in order to obtain the Aalen-Johansen estimator of
fitform2 <- Surv(time, status) ~  factor(tipo_de_plan_res_1)
n_trans <- max(trans_matrix, na.rm = T)
n_trans2 <- max(trans_matrix, na.rm = T)
n_trans2_9s <- max(trans_matrix2, na.rm = T)

Time for this code chunk to run: 0 minutes

As seen in the formula above, we included the transition-specific covariables regarding the time that arrived to the state into the models.

#m2_1_ggam m2_2_ggam m2_3_ggam m2_4_logn
#m2_1_rp5 m2_2_rp4 m2_3_rp2 m2_4_rp3
#defined a series of Royston-Parmar models with a function of restricted cubic splines, in which the knots (#df -1) are defined in each percentile of the distribution. 
fits_c_logn2 <- vector(mode = "list", length = n_trans2)
fits_c_ggam2 <- vector(mode = "list", length = n_trans2)
fits_c_ggam_orig2 <- vector(mode = "list", length = n_trans2)
fits_c_rp042 <- vector(mode = "list", length = n_trans2)
fits_c_rp032 <- vector(mode = "list", length = n_trans2)
fits_c_rp012 <- vector(mode = "list", length = n_trans2)
fits_c_rp022 <- vector(mode = "list", length = n_trans2)


ms_d_match_surv$arrival<-ms_d_match_surv$Tstart
#APR 2022, had to center the variable to avoid convergence issues
ms_d_match_surv_exp<-ms_d_match_surv

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
no_attempts <- 30

form<-
  function(i=1){
  f<-dplyr::case_when(i==4~paste0("Surv(time, status) ~ ",fitform2," + arrival")[[3]],i==3~paste0("Surv(time, status) ~ ",fitform2," + arrival")[[3]],i==2~paste0("Surv(time, status) ~ ",fitform2," + arrival")[[3]],T~paste0("Surv(time, status) ~ ",fitform2,"")[[3]])
  return(f)
}

for (i in 1:3){
    r <- NULL
    attempt <- 0
    while( is.null(r) && attempt <= no_attempts ) {
        attempt <- attempt + 1
    try(
      r <- flexsurvreg(formula=as.formula(form(i)),
                             data = subset(dplyr::mutate(ms_d_match_surv_exp,arrival=scale(arrival, scale=F)), trans == i),
                             dist = "gengamma.orig")
        )
    }
    fits_c_ggam_orig2[[i]] <- r
}  

for (i in 1:3){
    r <- NULL
    attempt <- 0
    while( is.null(r) && attempt <= no_attempts ) {
      attempt <- attempt + 1
    try(
      r <- flexsurvreg(formula=as.formula(form(i)),
                                   data = subset(dplyr::mutate(ms_d_match_surv_exp,arrival=scale(arrival, scale=F)), trans == i),
                                   dist = "gengamma")
      )
    }
    fits_c_ggam2[[i]] <- r
}

for (i in 4){
    r <- NULL
    attempt <- 0
    while( is.null(r) && attempt <= no_attempts ) {
      attempt <- attempt + 1
    try(
      r <- flexsurvreg(formula=as.formula(form(i)),
                                   data = subset(dplyr::mutate(ms_d_match_surv_exp,arrival=scale(arrival, scale=F)), trans == i),
                                   dist = "lnorm")
      )
    }
    fits_c_logn2[[i]] <- r
}  

for (i in 1){
    r <- NULL
    attempt <- 0
    while( is.null(r) && attempt <= no_attempts ) {
      attempt <- attempt + 1
    try(
      r <- flexsurvspline(formula=as.formula(form(i)),
                            k=4,
                               data = subset(ms_d_match_surv_exp, trans == i))
      )
    }
    fits_c_rp042[[i]] <- r
}
for (i in 2){
    r <- NULL
    attempt <- 0
    while( is.null(r) && attempt <= no_attempts ) {
      attempt <- attempt + 1
    try(
      r <- flexsurvspline(formula=as.formula(form(i)),
                            k=3,
                               data = subset(ms_d_match_surv_exp, trans == i))
      )
    }
    fits_c_rp032[[i]] <- r
}

for (i in 3){
    r <- NULL
    attempt <- 0
    while( is.null(r) && attempt <= no_attempts ) {
      attempt <- attempt + 1
    try(
      r <- flexsurvspline(formula=as.formula(form(i)),
                            k=1,
                               data = subset(ms_d_match_surv_exp, trans == i))
      )
    }
    fits_c_rp012[[i]] <- r
}

for (i in 4){
    r <- NULL
    attempt <- 0
    while( is.null(r) && attempt <= no_attempts ) {
      attempt <- attempt + 1
    try(
      r <- flexsurvspline(formula=as.formula(form(i)),
                            k=2,
                               data = subset(ms_d_match_surv_exp, trans == i))
      )
    }
    fits_c_rp022[[i]] <- r
}
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

Time for this code chunk to run: 0 minutes

# m2_1_gom m2_2_gom m2_3_logn m2_4_ggam m2_5_logn m2_6_ggam m2_7_ggam m2_8_ggam 
#m2_1_rp9 m2_2_rp10 m2_3_rp10 m2_4_rp7 m2_5_rp3 m2_6_rp4 m2_7_rp4 m2_8_rp2

fits_c_gomp3 <- vector(mode = "list", length = n_trans2_9s)
fits_c_logn3 <- vector(mode = "list", length = n_trans2_9s)
fits_c_ggam3 <- vector(mode = "list", length = n_trans2_9s)
fits_c_ggam_orig3 <- vector(mode = "list", length = n_trans2_9s)
fits_c_rp083 <- vector(mode = "list", length = n_trans2_9s)
fits_c_rp093 <- vector(mode = "list", length = n_trans2_9s)
fits_c_rp063 <- vector(mode = "list", length = n_trans2_9s)
fits_c_rp023 <- vector(mode = "list", length = n_trans2_9s)
fits_c_rp033 <- vector(mode = "list", length = n_trans2_9s)
fits_c_rp013 <- vector(mode = "list", length = n_trans2_9s)


ms_d_match_surv_oct_2022$arrival<-ms_d_match_surv_oct_2022$Tstart
#APR 2022, had to center the variable to avoid convergence issues
ms_d_match_surv_oct_2022_exp<-ms_d_match_surv_oct_2022

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
no_attempts <- 30

form_9s<-
  function(i=1){
  f<-dplyr::case_when(i==9~paste0("Surv(time, status) ~ ",fitform2," + arrival")[[3]], i==8~paste0("Surv(time, status) ~ ",fitform2," + arrival")[[3]], i==7~paste0("Surv(time, status) ~ ",fitform2," + arrival")[[3]], i==6~paste0("Surv(time, status) ~ ",fitform2," + arrival")[[3]], i==5~paste0("Surv(time, status) ~ ",fitform2," + arrival")[[3]], i==4~paste0("Surv(time, status) ~ ",fitform2," + arrival")[[3]],i==3~paste0("Surv(time, status) ~ ",fitform2," + arrival")[[3]],i==2~paste0("Surv(time, status) ~ ",fitform2,"")[[3]],T~paste0("Surv(time, status) ~ ",fitform2,"")[[3]])
  return(f)
}


#It looks like the hazard is decreasing for a large portion of the follow-up, so the best fitting shape parameter is negative, and positive shape parameters give zero likelihood. I managed to get it to fit using
#https://treeage.zendesk.com/hc/en-us/articles/360002682794-Gompertz-distribution-with-negative-shape-parameter-
#Some implementations of the Gompertz restrict a to be strictly positive, which ensures that the
#probability of survival decreases to zero as x increases to infinity. The more flexible implementation
#given here is consistent with streg in Stata

#Gompertz distribution with unrestricted shape parameter,

for (i in 1:2){
  options(warn = 2)
    r <- NULL
    attempt <- 0
    while( is.null(r) && attempt <= no_attempts ) {
      attempt <- attempt + 1
      try(
        r <- flexsurvreg(formula= as.formula(form_9s(i)),
                                   data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,arrival=scale(arrival, scale=F)), trans == i), inits=c(-1, 1/mean(subset(ms_d_match_surv_oct_2022_exp, trans == i)[["time"]])),
                        # control=list(reltol=1e-16,fnscale = 16e2),
                                   dist = "gompertz")
      )
    }
    fits_c_gomp3[[i]] <- r
}
options(warn=0)


for (i in c(3,5)){

    r <- NULL
    attempt <- 0
    while( is.null(r) && attempt <= no_attempts ) {
      attempt <- attempt + 1
    try(
        r <- flexsurvreg(formula= as.formula(form_9s(i)),
            data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,arrival=scale(arrival, scale=F)), trans == i),
                                   dist = "lnorm")
      )
    }
    fits_c_logn3[[i]] <- r
}  


for (i in c(4,6,7,8)){
    options(warn = 2)
    r <- NULL
    attempt <- 0
    while( is.null(r) && attempt <= no_attempts ) {
        attempt <- attempt + 1
    try(
        r <- flexsurvreg(formula= as.formula(form_9s(i)),
             data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,arrival=scale(arrival, scale=F)), trans == i), 
                         control=list(reltol=1e-16,fnscale = 16e2, trace=100),
                             dist = "gengamma.orig", )
        )
    }
    fits_c_ggam_orig3[[i]] <- r
}

## initial  value 26.486558 
## iter  10 value 22.852706
## iter  20 value 22.832333
## iter  30 value 22.820315
## iter  40 value 22.811104
## iter  50 value 22.808577
## iter  60 value 22.806071
## iter  70 value 22.803390
## iter  80 value 22.800994
## iter  90 value 22.799291
## iter 100 value 22.798912
## final  value 22.798912 
## stopped after 100 iterations
## initial  value 8.284789 
## iter  10 value 7.469907
## final  value 7.465570 
## converged
## initial  value 0.823551 
## iter  10 value 0.727668
## final  value 0.724608 
## converged
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite. 
## initial  value 2.735506 
## iter  10 value 2.504116
## final  value 2.501762 
## converged
## Error in flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,  : 
##   (convertido del aviso) Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite.

options(warn=0)

for (i in c(4,6,7,8)){
    options(warn = 2)
    r <- NULL
    attempt <- 0
    while( is.null(r) && attempt <= no_attempts ) {
      attempt <- attempt + 1
    try(
        r <- flexsurvreg(formula= as.formula(form_9s(i)),
            data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,arrival=scale(arrival, scale=F)), trans == i),
            control=list(reltol=1e-16,fnscale = 16e2, trace=100),
                                   dist = "gengamma")
      )
    }
    fits_c_ggam3[[i]] <- r
}

## initial  value 26.128908 
## iter  10 value 22.801817
## iter  20 value 22.771902
## iter  30 value 22.771502
## iter  40 value 22.771501
## final  value 22.771501 
## converged
## initial  value 8.246908 
## iter  10 value 7.408580
## final  value 7.408111 
## converged
## initial  value 0.813966 
## final  value -20807.645072 
## converged
## initial  value 2.715623 
## iter  10 value 2.478881
## iter  20 value 2.476830
## final  value 2.476796 
## converged

options(warn=0)

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
invisible("RP")
#method: SANN, "Nelder-Mead", "CG", L-BFGS-B y BFGS Brent
#file:///C:/Users/CISSFO~1/AppData/Local/Temp/MicrosoftEdgeDownloads/0a14410e-d817-4ad6-a810-99e535568c58/RumenickPereiraDaSilva_DISSERT.pdf

fits_c_rp073[[1]] <- flexsurvspline(Surv(time, status)~ factor(tipo_de_plan_res_1),
    k=7,#k is equivalent to df-1 in the notation of stpm for Stata
    data = subset(ms_d_match_surv_oct_2022_exp, trans == 1), 
    # knots= 
    #inits=c(-1, 1/mean(subset(ms_d_match_surv_oct_2022_exp, trans == i)[["time"]])),
    #control=list(reltol=1e-16, fnscale=16e4, maxit = 4e3, trace=3e2, abstol=1e-16), 
    #en STATA, -41654.94 log-likelihood
    #control= list(trace=3e2, maxit = 4e3, reltol=1e-16, abstol=1e-16), #lower = -Inf, upper = 41654
    hessian=T#,
    #method = "SANN",
    #control = list(maxit = 2e4, temp = 20, trace=1e1) # para SANN
    # knots = log(c(4,12))
    )

## Warning in flexsurvreg(formula = Surv(time, status) ~
## factor(tipo_de_plan_res_1), : Optimisation has probably not converged to the
## maximum likelihood - Hessian is not positive definite.

## Error in fits_c_rp073[[1]] <- flexsurvspline(Surv(time, status) ~ factor(tipo_de_plan_res_1), : objeto 'fits_c_rp073' no encontrado

fits_c_rp083[[2]] <- flexsurvspline(Surv(time, status)~ factor(tipo_de_plan_res_1),
    k=8,#k is equivalent to df-1 in the notation of stpm for Stata
    data = subset(ms_d_match_surv_oct_2022_exp, trans == 2), 
    # knots= 
    #inits=c(-1, 1/mean(subset(ms_d_match_surv_oct_2022_exp, trans == i)[["time"]])),
    #control=list(reltol=1e-16,fnscale=-16e4, maxit = 4e3, trace=3e2), 
    #en STATA, -41654.94 log-likelihood
    #control= list(trace=3e2, maxit = 4e3, reltol=1e-16, abstol=1e-16), #lower = -Inf, upper = 41654
    hessian=T#,
    #method = "SANN",
    #control = list(maxit = 2e4, temp = 20, trace=1e1) # para SANN
    # knots = log(c(4,12))
    )

## Warning in flexsurvreg(formula = Surv(time, status) ~
## factor(tipo_de_plan_res_1), : Optimisation has probably not converged to the
## maximum likelihood - Hessian is not positive definite.

for (i in c(3)){
    options(warn = 2)
    r <- NULL
    attempt <- 0
    while( is.null(r) && attempt <= no_attempts ) {
      attempt <- attempt + 1
    try(
        r <- flexsurvspline(formula= Surv(time, status) ~ factor(tipo_de_plan_res_1) + arrival,
                            k=9,
            data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,arrival=scale(arrival, scale=F)), trans == i),
            control=list(reltol=1e-16, trace=3e2, maxit = 1e3, reltol=1e-16, abstol=1e-16) #fnscale = 1600
        ))
    }
    fits_c_rp093[[i]] <- r
}

## initial  value 20328.716621 
## iter  10 value 14391.711902
## iter  20 value 13877.869092
## iter  30 value 13877.862912
## iter  40 value 13877.862912
## final  value 13877.862912 
## converged

options(warn = 0)

for (i in 4){
    options(warn = 2)
    r <- NULL
    attempt <- 0
    while( is.null(r) && attempt <= no_attempts ) {
      attempt <- attempt + 1
    try(
        r <- flexsurvspline(formula= as.formula(form_9s(i)),
                            k=5,
            data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,arrival=scale(arrival, scale=F)), trans == i),
            control=list(reltol=1e-16,fnscale = 16e2, trace=3e2)    
        ))
    }
    fits_c_rp063[[i]] <- r
}

## initial  value 33.233092 
## iter  10 value 22.841030
## iter  20 value 22.778953
## iter  30 value 22.778953
## final  value 22.778953 
## converged

options(warn = 0)

for (i in 5){
    options(warn = 2)
    r <- NULL
    attempt <- 0
    while( is.null(r) && attempt <= no_attempts ) {
      attempt <- attempt + 1
    try(
        r <- flexsurvspline(formula= as.formula(form_9s(i)),
                            k=1,
            data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,arrival=scale(arrival, scale=F)), trans == i),
            control=list(reltol=1e-16,fnscale = 16e2, trace=3e2)    
        ))
    }
    fits_c_rp023[[i]] <- r
}

## initial  value 3.525742 
## iter  10 value 2.561575
## iter  20 value 2.560053
## iter  30 value 2.558692
## iter  40 value 2.557811
## iter  50 value 2.556941
## iter  60 value 2.556154
## iter  70 value 2.554859
## iter  80 value 2.554448
## iter  90 value 2.553994
## iter 100 value 2.553564
## final  value 2.553564 
## stopped after 100 iterations

options(warn = 0)

for (i in c(6,7)){
    options(warn = 2)
    r <- NULL
    attempt <- 0
    while( is.null(r) && attempt <= no_attempts ) {
      attempt <- attempt + 1
    try(
        r <- flexsurvspline(formula= as.formula(form_9s(i)),
                            k=2,
            data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,arrival=scale(arrival, scale=F)), trans == i),
            control=list(reltol=1e-16,fnscale = 16e2, trace=3e2)    
        ))
    }
    fits_c_rp033[[i]] <- r
}

## initial  value 10.139025 
## iter  10 value 7.393558
## iter  20 value 7.393254
## iter  30 value 7.389455
## iter  40 value 7.389245
## iter  50 value 7.389221
## iter  60 value 7.389200
## iter  70 value 7.389189
## iter  80 value 7.389185
## iter  90 value 7.389182
## iter 100 value 7.389180
## final  value 7.389180 
## stopped after 100 iterations
## initial  value 0.972015 
## iter  10 value 0.714250
## iter  20 value 0.713039
## iter  30 value 0.713024
## iter  40 value 0.713024
## iter  50 value 0.713024
## iter  60 value 0.713024
## iter  70 value 0.713024
## iter  80 value 0.713024
## iter  90 value 0.713024
## iter 100 value 0.713024
## final  value 0.713024 
## stopped after 100 iterations

options(warn = 0)

for (i in 8){
    options(warn = 2)
    r <- NULL
    attempt <- 0
    while( is.null(r) && attempt <= no_attempts ) {
      attempt <- attempt + 1
    try(
        r <- flexsurvspline(formula= as.formula(form_9s(i)),
                            k=0,
            data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp,arrival=scale(arrival, scale=F)), trans == i),
            control=list(reltol=1e-16,fnscale = 16e2, trace=3e2)    
        ))
    }
    fits_c_rp013[[i]] <- r
}

## initial  value 3.084199 
## iter  10 value 2.503741
## iter  20 value 2.497120
## iter  30 value 2.496260
## iter  40 value 2.496257
## iter  50 value 2.496254
## iter  60 value 2.496254
## iter  70 value 2.496254
## iter  80 value 2.496254
## final  value 2.496254 
## converged

options(warn = 0)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

Time for this code chunk to run: 0 minutes

The scale of arival is centered is set in 269.7 days for the 5 states model, and 145.6 for the 9 states model.

#https://wilmarigl.de/wp-content/uploads/2018/01/tutorial_hr_parsurvmodels.pdf

#> ### First fix a patient with reference values for the covariates
#> ### and 0 for all the time-dependent covariates; this will give the
#> ### baseline hazards

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_    
# Database to contrast adjustments
newdat_a <- data.table::data.table(tipo_de_plan_res_1= factor(c(rep(1,1*n_trans2))),
  arrival=rep(0,),
  strata= rep(1:n_trans2,1)
                       )
newdat_b <- data.table::data.table(tipo_de_plan_res_1= factor(c(rep(0,1*n_trans2))),
  arrival=rep(0,),
  strata= rep(1:n_trans2,1)
                       )

newdat_c <- data.table::data.table(tipo_de_plan_res_1= factor(c(rep(1,1*n_trans2_9s))),
  arrival=rep(0,),
  strata= rep(1:n_trans2_9s,1)
                       )
newdat_d <- data.table::data.table(tipo_de_plan_res_1= factor(c(rep(0,1*n_trans2_9s))),
  arrival=rep(0,),
  strata= rep(1:n_trans2_9s,1)
                       )

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#APR 2022- SCALING
newdat_a_sc<-mutate(newdat_a,arrival=scale(1096- attr(scale(ms_d_match_surv_exp$arrival,scale=F),"scaled:center"),scale=F))
newdat_b_sc<-mutate(newdat_b,arrival=scale(1096- attr(scale(ms_d_match_surv_exp$arrival,scale=F),"scaled:center"),scale=F))

newdat_c_sc<-mutate(newdat_c,arrival=scale(731- attr(scale(ms_d_match_surv_oct_2022_exp$arrival,scale=F),"scaled:center"),scale=F))
newdat_d_sc<-mutate(newdat_d,arrival=scale(731- attr(scale(ms_d_match_surv_oct_2022_exp$arrival,scale=F),"scaled:center"),scale=F))

 newdat_a$arrival<-ifelse(newdat_a$arrival==0,1096,newdat_a$arrival)
 newdat_b$arrival<-ifelse(newdat_b$arrival==0,1096,newdat_b$arrival)
 newdat_c$arrival<-ifelse(newdat_c$arrival==0,1096,newdat_c$arrival)
 newdat_d$arrival<-ifelse(newdat_d$arrival==0,1096,newdat_d$arrival)

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

fitted_flexsurvreg2<-data.frame()
fit_flexsurvreg2<-data.frame()

#7 distributions
#4 transitions
dists_w_covs_5s_w_arrival<-cbind.data.frame(covs=rep("fits_c_",(7*n_trans)),
              formal=rep(c("Generalized gamma", "Generalized gamma (original)", "Lognormal", paste0("RP0",1:4)),1*n_trans2),
              dist=rep(c("gengamma","gengamma.orig", "lnorm", rep("no dist",4)),n_trans2),
              model=rep(c("ggam2", "ggam_orig2", "logn2", paste0("rp0",1:4,2)),1*n_trans2),
              trans=rep(1:n_trans2, each=7))

dists_w_covs_5s_w_arrival<-dists_w_covs_5s_w_arrival %>% 
  dplyr::filter(model %in% c(paste0("rp0",1:4,"2"),"ggam2","logn2")) %>% 
  dplyr::filter(dplyr::case_when(model=="logn2" & trans==4~T,
                                 model=="ggam2" & trans %in% 1:3~T,
                                 model=="rp022" & trans==4~T,
                                 model=="rp042" & trans==1~T,
                                 model=="rp032" & trans==2~T,
                                 model=="rp012" & trans==3~T,
                                 T~F)) %>% 
  dplyr::mutate(get_nd_a=dplyr::case_when(!grepl("ggam|logn",model)~"newdat_a",T~"newdat_a_sc")) %>% 
  dplyr::mutate(get_nd_b=dplyr::case_when(!grepl("ggam|logn",model)~"newdat_b",T~"newdat_b_sc"))

Time for this code chunk to run: 0 minutes

# m2_1_gom m2_2_gom m2_3_logn m2_4_ggam m2_5_logn m2_6_ggam m2_7_ggam m2_8_ggam 
#m2_1_rp9 m2_2_rp10 m2_3_rp10 m2_4_rp7 m2_5_rp3 m2_6_rp4 m2_7_rp4 m2_8_rp2
invisible("Every model is scaled in the arrival")

fitted_flexsurvreg3<-data.frame()
fit_flexsurvreg3<-data.frame()

Time for this code chunk to run: 0 minutes

# Semi-parametric models
#Since there are tied event times, we need to specify ties="breslow" in order to obtain the Aalen-Johansen estimator of
#the transition probability

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
formula3_mstate<-
as.formula(paste0("Surv(time, status) ~", paste0(fitform2," + arrival + strata(trans)")[[3]]))

cox_fits <- survival::coxph(formula3_mstate, 
                            data = ms_d_match_surv_exp, method = "breslow") 

cox_fits_9s <- survival::coxph(formula3_mstate, 
                            data = ms_d_match_surv_oct_2022_exp, method = "breslow") 


paste0("Test the Proportional Hazards Assumption of a Cox Regression:")

## [1] "Test the Proportional Hazards Assumption of a Cox Regression:"

cox.zph(cox_fits)

##                            chisq df       p
## factor(tipo_de_plan_res_1)  30.2  1 3.9e-08
## arrival                     28.3  1 1.0e-07
## GLOBAL                      56.9  2 4.4e-13

paste0(c("Not-proportional hazards"))

## [1] "Not-proportional hazards"

invisible(c("dists_w_covs_5s_w_arrival ver para ver la distribución"))
#1° TRANS, un modelo con splines cúblicos restringidos a 4 nudos (RP05) y la distribución F generalizado (después G-gamma en STATA) obtuvieron un mejor ajuste. En R, el único que Gen-F es peor que RP
#2° TRANS, un modelo con splines cúblicos restringidos a 3 nudos (RP04) y la distribución F generalizado (después G-gamma en STATA) obtuvieron un mejor ajuste.
#3° TRANS, un modelo con splines cúblicos restringidos a 2 nudos (RP03) y la distribución F generalizado (Lognormal en STATA) obtuvieron un mejor ajuste.
#4° TRANS, un modelo con splines cúblicos restringidos a 2 nudos (RP03) y la distribución F generalizado (Lognormal en STATA) obtuvieron un mejor ajuste.
#EN STATA, todos los RPs son mejores que los paramétricos. En R, sólo la transición 3 y 4 tienen menor error en  Gen-F en el max time

invisible("At October 2022")
##m2_1_ggam m2_2_ggam m2_3_ggam m2_4_logn
#m2_1_rp5 m2_2_rp4 m2_3_rp2 m2_4_rp3


sel_param_fits_a <- vector(length = n_trans, mode = "list") 
    sel_param_fits_a[[1]]<- fits_c_ggam2[[1]] #fits_c_genf2[[1]] 
    sel_param_fits_a[[2]]<- fits_c_ggam2[[2]]
    sel_param_fits_a[[3]]<- fits_c_ggam2[[3]] 
    sel_param_fits_a[[4]]<- fits_c_logn2[[4]] 

sel_param_fits_b <- vector(length = n_trans, mode = "list") 
    sel_param_fits_b[[1]]<- fits_c_rp042[[1]] 
    sel_param_fits_b[[2]]<- fits_c_rp032[[2]]
    sel_param_fits_b[[3]]<- fits_c_rp012[[3]]
    sel_param_fits_b[[4]]<- fits_c_rp022[[4]]


# m2_1_gom m2_2_gom m2_3_logn m2_4_ggam m2_5_logn m2_6_ggam m2_7_ggam m2_8_ggam 
#m2_1_rp9 m2_2_rp10 m2_3_rp10 m2_4_rp7 m2_5_rp3 m2_6_rp4 m2_7_rp4 m2_8_rp2

sel_param_fits_c <- vector(length = n_trans2_9s, mode = "list") 
    sel_param_fits_c[[1]]<- fits_c_gomp3[[1]] #fits_c_genf2[[1]] 
    sel_param_fits_c[[2]]<- fits_c_gomp3[[2]]
    sel_param_fits_c[[3]]<- fits_c_logn3[[3]] 
    sel_param_fits_c[[4]]<- fits_c_ggam3[[4]] 
    sel_param_fits_c[[5]]<- fits_c_logn3[[5]] 
    sel_param_fits_c[[6]]<- fits_c_ggam3[[6]] 
    sel_param_fits_c[[7]]<- fits_c_ggam3[[7]] 
    sel_param_fits_c[[8]]<- fits_c_ggam3[[8]] 

sel_param_fits_d <- vector(length = n_trans2_9s, mode = "list") 
    sel_param_fits_d[[1]]<- fits_c_rp083[[1]] 
    sel_param_fits_d[[2]]<- fits_c_rp093[[2]]
    sel_param_fits_d[[3]]<- fits_c_rp093[[3]]
    sel_param_fits_d[[4]]<- fits_c_rp063[[4]]   
    sel_param_fits_d[[5]]<- fits_c_rp023[[5]] 
    sel_param_fits_d[[6]]<- fits_c_rp033[[6]]
    sel_param_fits_d[[7]]<- fits_c_rp033[[7]]
    sel_param_fits_d[[8]]<- fits_c_rp013[[8]]

Time for this code chunk to run: 0 minutes

#https://stats.stackexchange.com/questions/533495/can-one-compare-hazard-ratio-and-average-hazard-ratio?noredirect=1&lq=1
#2_1_gom m2_2_gom m2_3_logn m2_4_ggam m2_5_logn m2_6_ggam m2_7_ggam m2_8_ggam 

calcAHRgeo <- function(x, h0, h1, ft, wt){
integrand <- function(x){ log(h1(x)/h0(x)) * wt(x) * ft(x) }
logahr <- integrate(integrand, lower=0, upper=x)$value
ahr <- exp(logahr)
return(ahr)
}


#https://wilmarigl.de/wp-content/uploads/2018/01/tutorial_hr_parsurvmodels.pdf
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
invisible("Log-normal")#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

logn_aft_to_ahr<-
function(x, trans){
  fit.lnm<-get(x)[[trans]]
    # Probability Density Functions
    f0 <- function(x,
    meanlog = coef(fit.lnm)["meanlog"],
    sdlog = exp(coef(fit.lnm)["sdlog"])){
    dlnorm(x=x, meanlog=meanlog, sdlog=sdlog)
    }
    f1 <- function(x,
    meanlog = coef(fit.lnm)["meanlog"]+coef(fit.lnm)["factor(tipo_de_plan_res_1)1"],
    sdlog = exp(coef(fit.lnm)["sdlog"])){
    dlnorm(x=x, meanlog=meanlog, sdlog=sdlog)
    }
    ft <- function(x){ (f0(x) + f1(x))}
    # Survival functions
    S0 <- function(x,
    meanlog = coef(fit.lnm)["meanlog"],
    sdlog = exp(coef(fit.lnm)["sdlog"])){
    plnorm(q=x, meanlog=meanlog, sdlog=sdlog, lower=FALSE)
    }
    S1 <- function(x,
    meanlog = coef(fit.lnm)["meanlog"]+coef(fit.lnm)["factor(tipo_de_plan_res_1)1"],
    sdlog = exp(coef(fit.lnm)["sdlog"])){
    plnorm(q=x, meanlog=meanlog, sdlog=sdlog, lower=FALSE)
    }
    St <- function(x){ ( S0(x) * f0(x) + S1(x) * f1(x) ) / ( f0(x) + f1(x) ) }
    ## St <- function(x){ (S0(x) + S1(x))/2} ## assuming same number of events
    # Hazard functions
    h0 <- function(x, f=f0, S=S0){f(x)/S(x)}
    h1 <- function(x, f=f1, S=S1){f(x)/S(x)}
    newList <- list("90 days" = calcAHRgeo(x=90, h0=h0, h1=h1, ft=ft, wt=St), 
                    "1 year" = calcAHRgeo(x=365.25, h0=h0, h1=h1, ft=ft, wt=St),
                    "3 years" = calcAHRgeo(x=1096, h0=h0, h1=h1, ft=ft, wt=St),
                    "5 years" = calcAHRgeo(x=1826, h0=h0, h1=h1, ft=ft, wt=St)
                    )
    return(newList)
}

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
invisible("Generalized gamma")#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
# Probability Density Functions
ggam_aft_to_ahr<-
function(x, trans){
  fit.ggm<-get(x)[[trans]]
  print(fit.ggm)
    f0 <- function(x,
    mu = coef(fit.ggm)["mu"],
    sigma =exp(coef(fit.ggm)["sigma"]),
    Q = coef(fit.ggm)["Q"]){
    dgengamma(x=x, mu=mu, sigma=sigma, Q=Q)
    }
    f1 <- function(x,
    mu = coef(fit.ggm)["mu"]+coef(fit.ggm)["factor(tipo_de_plan_res_1)1"],
    sigma =exp(coef(fit.ggm)["sigma"]),
    Q = coef(fit.ggm)["Q"]){
    dgengamma(x=x, mu=mu, sigma=sigma, Q=Q)
    }
    ft <- function(x){ (f0(x) + f1(x))}
    # Cumulative Density Functions
    S0 <- function(x,
    mu = coef(fit.ggm)["mu"],
    sigma =exp(coef(fit.ggm)["sigma"]),
    Q = coef(fit.ggm)["Q"]){
    pgengamma(q=x, mu=mu, sigma=sigma, Q=Q, lower=FALSE)
    }
    S1 <- function(x,
    mu = coef(fit.ggm)["mu"]+coef(fit.ggm)["factor(tipo_de_plan_res_1)1"],
    sigma =exp(coef(fit.ggm)["sigma"]),
    Q = coef(fit.ggm)["Q"]){
    pgengamma(q=x, mu=mu, sigma=sigma, Q=Q, lower=FALSE)
    }
    St <- function(x){ ( S0(x) * f0(x) + S1(x) * f1(x) ) / ( f0(x) + f1(x) ) }
    ## St <- function(x){ (S0(x) + S1(x))/2} ## assuming same number of events
    ## Hazard functions
    h0 <- function(x,
    mu = coef(fit.ggm)["mu"],
    sigma =exp(coef(fit.ggm)["sigma"]),
    Q = coef(fit.ggm)["Q"]){
    hgengamma(x=x, mu=mu, sigma=sigma, Q=Q)
    }
    h1 <- function(x,
    mu = coef(fit.ggm)["mu"]+coef(fit.ggm)["factor(tipo_de_plan_res_1)1"],
    sigma =exp(coef(fit.ggm)["sigma"]),
    Q = coef(fit.ggm)["Q"]){
    hgengamma(x=x, mu=mu, sigma=sigma, Q=Q)
    }
    
    newList <- list("90 days" = calcAHRgeo(x=90, h0=h0, h1=h1, ft=ft, wt=St), 
                    "1 year" = calcAHRgeo(x=365.25, h0=h0, h1=h1, ft=ft, wt=St),
                    "3 years" = calcAHRgeo(x=1096, h0=h0, h1=h1, ft=ft, wt=St),
                    "5 years" = calcAHRgeo(x=1826, h0=h0, h1=h1, ft=ft, wt=St)
                    )
    return(newList)
}

print("Average hazard ratio")

## [1] "Average hazard ratio"

cat("Model of 5 states, first transition")

## Model of 5 states, first transition

ggam_aft_to_ahr("fits_c_ggam2",1)

## Call:
## flexsurvreg(formula = as.formula(form(i)), data = subset(dplyr::mutate(ms_d_match_surv_exp, 
##     arrival = scale(arrival, scale = F)), trans == i), dist = "gengamma")
## 
## Estimates: 
##                              data mean  est      L95%     U95%     se     
## mu                                NA     8.1644   8.0647   8.2641   0.0509
## sigma                             NA     2.4307   2.3796   2.4828   0.0263
## Q                                 NA    -1.0995  -1.2133  -0.9857   0.0581
## factor(tipo_de_plan_res_1)1   0.5000    -0.5727  -0.6405  -0.5050   0.0346
##                              exp(est)  L95%     U95%   
## mu                                NA        NA       NA
## sigma                             NA        NA       NA
## Q                                 NA        NA       NA
## factor(tipo_de_plan_res_1)1   0.5640    0.5270   0.6035
## 
## N = 22452,  Events: 6370,  Censored: 16082
## Total time at risk: 34109835
## Log-likelihood = -60116.56, df = 4
## AIC = 120241.1

## $`90 days`
## [1] 1.034761
## 
## $`1 year`
## [1] 1.121543
## 
## $`3 years`
## [1] 1.190868
## 
## $`5 years`
## [1] 1.214286

cat("Model of 9 states, transitions 3 4 5 6 7 8")

## Model of 9 states, transitions 3 4 5 6 7 8

logn_aft_to_ahr("fits_c_logn3",3)

## $`90 days`
## [1] 1.107305
## 
## $`1 year`
## [1] 1.197835
## 
## $`3 years`
## [1] 1.285328
## 
## $`5 years`
## [1] 1.327338

logn_aft_to_ahr("fits_c_logn3",5)

## $`90 days`
## [1] 1.010998
## 
## $`1 year`
## [1] 1.03859
## 
## $`3 years`
## [1] 1.07001
## 
## $`5 years`
## [1] 1.083333

ggam_aft_to_ahr( "fits_c_ggam3", 4)

## Call:
## flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp, 
##     arrival = scale(arrival, scale = F)), trans == i), dist = "gengamma", 
##     control = list(reltol = 1e-16, fnscale = 1600, trace = 100))
## 
## Estimates: 
##                              data mean  est        L95%       U95%     
## mu                                  NA   9.425990   9.271101   9.580879
## sigma                               NA   3.423292   3.242992   3.613615
## Q                                   NA  -0.174012  -0.326673  -0.021351
## factor(tipo_de_plan_res_1)1   0.448764  -1.191746  -1.337199  -1.046293
## arrival                       4.843927  -0.001043  -0.001544  -0.000543
##                              se         exp(est)   L95%       U95%     
## mu                            0.079026         NA         NA         NA
## sigma                         0.094502         NA         NA         NA
## Q                             0.077889         NA         NA         NA
## factor(tipo_de_plan_res_1)1   0.074212   0.303691   0.262580   0.351237
## arrival                       0.000255   0.998957   0.998458   0.999457
## 
## N = 13067,  Events: 4046,  Censored: 9021
## Total time at risk: 18299896
## Log-likelihood = -36434.4, df = 5
## AIC = 72878.8

## $`90 days`
## [1] 1.131721
## 
## $`1 year`
## [1] 1.220509
## 
## $`3 years`
## [1] 1.295448
## 
## $`5 years`
## [1] 1.328334

ggam_aft_to_ahr( "fits_c_ggam3", 6)

## Call:
## flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp, 
##     arrival = scale(arrival, scale = F)), trans == i), dist = "gengamma", 
##     control = list(reltol = 1e-16, fnscale = 1600, trace = 100))
## 
## Estimates: 
##                              data mean  est        L95%       U95%     
## mu                                  NA   7.50e+00   7.32e+00   7.69e+00
## sigma                               NA   2.18e+00   2.06e+00   2.30e+00
## Q                                   NA  -7.23e-01  -9.69e-01  -4.77e-01
## factor(tipo_de_plan_res_1)1   5.43e-01   1.45e-02  -1.33e-01   1.62e-01
## arrival                       5.55e+02   2.48e-04   1.47e-04   3.49e-04
##                              se         exp(est)   L95%       U95%     
## mu                            9.65e-02         NA         NA         NA
## sigma                         6.19e-02         NA         NA         NA
## Q                             1.25e-01         NA         NA         NA
## factor(tipo_de_plan_res_1)1   7.55e-02   1.01e+00   8.75e-01   1.18e+00
## arrival                       5.14e-05   1.00e+00   1.00e+00   1.00e+00
## 
## N = 4046,  Events: 1305,  Censored: 2741
## Total time at risk: 4698233
## Log-likelihood = -11852.98, df = 5
## AIC = 23715.95

## $`90 days`
## [1] 0.9987035
## 
## $`1 year`
## [1] 0.9962212
## 
## $`3 years`
## [1] 0.9944871
## 
## $`5 years`
## [1] 0.9939478

try(ggam_aft_to_ahr( "fits_c_ggam3", 7)) #error

## Call:
## flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp, 
##     arrival = scale(arrival, scale = F)), trans == i), dist = "gengamma", 
##     control = list(reltol = 1e-16, fnscale = 1600, trace = 100))
## 
## Estimates: 
##                              data mean  est        L95%       U95%     
## mu                                  NA   6.44e+00   5.72e+00   7.16e+00
## sigma                               NA   1.02e+00   6.71e-01   1.56e+00
## Q                                   NA   1.89e-10  -8.40e-05   8.40e-05
## factor(tipo_de_plan_res_1)1   7.67e-01   2.82e-06  -6.10e-01   6.10e-01
## arrival                       1.32e+03   2.05e-03   1.60e-03   2.50e-03
##                              se         exp(est)   L95%       U95%     
## mu                            3.66e-01         NA         NA         NA
## sigma                         2.20e-01         NA         NA         NA
## Q                             4.28e-05         NA         NA         NA
## factor(tipo_de_plan_res_1)1   3.11e-01   1.00e+00   5.44e-01   1.84e+00
## arrival                       2.31e-04   1.00e+00   1.00e+00   1.00e+00
## 
## N = 447,  Events: 127,  Censored: 320
## Total time at risk: 424351
## Log-likelihood = 33292232, df = 5
## AIC = -66584454
## 
## Error in integrate(integrand, lower = 0, upper = x) : 
##   maximum number of subdivisions reached

ggam_aft_to_ahr( "fits_c_ggam3", 8)

## Call:
## flexsurvreg(formula = as.formula(form_9s(i)), data = subset(dplyr::mutate(ms_d_match_surv_oct_2022_exp, 
##     arrival = scale(arrival, scale = F)), trans == i), dist = "gengamma", 
##     control = list(reltol = 1e-16, fnscale = 1600, trace = 100))
## 
## Estimates: 
##                              data mean  est        L95%       U95%     
## mu                                  NA   7.30e+00   7.05e+00   7.56e+00
## sigma                               NA   1.80e+00   1.61e+00   2.02e+00
## Q                                   NA  -4.23e-01  -9.26e-01   8.01e-02
## factor(tipo_de_plan_res_1)1   5.61e-01  -2.60e-01  -4.99e-01  -2.16e-02
## arrival                       1.06e+03   3.19e-04   6.95e-05   5.69e-04
##                              se         exp(est)   L95%       U95%     
## mu                            1.30e-01         NA         NA         NA
## sigma                         1.06e-01         NA         NA         NA
## Q                             2.57e-01         NA         NA         NA
## factor(tipo_de_plan_res_1)1   1.22e-01   7.71e-01   6.07e-01   9.79e-01
## arrival                       1.27e-04   1.00e+00   1.00e+00   1.00e+00
## 
## N = 1305,  Events: 447,  Censored: 858
## Total time at risk: 1259238
## Log-likelihood = -3962.874, df = 5
## AIC = 7935.748

## $`90 days`
## [1] 1.029869
## 
## $`1 year`
## [1] 1.094424
## 
## $`3 years`
## [1] 1.141215
## 
## $`5 years`
## [1] 1.154815

#aHR

# cat("The average HR (residential:outpatient) for patients with a follow-up period of",
#     "3 months",
#     "\nthe average hazard ratio is ",
#     round(ahr.maxt3m, 2), ".")

Time for this code chunk to run: 0 minutes

#https://stats.stackexchange.com/questions/533495/can-one-compare-hazard-ratio-and-average-hazard-ratio?noredirect=1&lq=1

#The OC is the probability P(T1 < T0) that a randomly chosen survival time T1 from group G1 is smaller than a randomly
#chosen survival time T0 from group G0. 

# The concordance probability represents the pairwise probability of lower patient risk given longer survival time. The c-index and the concordance probability estimate have been used to estimate the concordance probability when patient-specific risk scores are continuous. 
# average HRs which them selves approximate the odds of concordance, defined for two treatment groups A and B as OC = P(TA < TB)/P(TB < TA), where TA and TB denote survival times from two subjects randomly selected from these groups. Odds of concordance can be conveniently transformed into concordance probabilities c = P(TA < TB) = OC/(OC + 1), which are intuitive effect size measures
# probabilidad de que grupo B[ambulatorio] tenga >T que A[residencial]
# /probabilidad de que grupo A[residencial] tenga >T que B[ambulatorio]
# AHR (average hazard ratio) setting, the weight for all cases at risk at an event time t is set to the product of the estimated survival probability and the inverse of the estimated probability of censoring at that time.
# AHR: La ponderación por cada tiempo evento t es= p(supervivencia)* 1/p(censura|t)

Time for this code chunk to run: 0 minutes

Session Info

Sys.getenv("R_LIBS_USER")

## [1] "C:/Users/CISS Fondecyt/OneDrive/Documentos/R/win-library/4.0"

rstudioapi::getSourceEditorContext()

## Document Context: 
## - id:        'B97877EB'
## - path:      'C:/Users/CISS Fondecyt/Mi unidad/Alvacast/SISTRAT 2019 (github)/SUD_CL/Matching_Process5_SEP_22.Rmd'
## - contents:  <1575 rows>
## Document Selection:
## - [77, 31] -- [77, 31]: ''

if (grepl("CISS Fondecyt",rstudioapi::getSourceEditorContext()$path)==T){
    save.image("C:/Users/CISS Fondecyt/Mi unidad/Alvacast/SISTRAT 2019 (github)/mult_state_3_apr22.RData")
  } else if (grepl("andre",rstudioapi::getSourceEditorContext()$path)==T){
    save.image("C:/Users/andre/Desktop/SUD_CL/mult_state_3_apr22.RData")
  } else if (grepl("E:",rstudioapi::getSourceEditorContext()$path)==T){
    save.image("E:/Mi unidad/Alvacast/SISTRAT 2019 (github)/mult_state_3_apr22.RData")
  } else if (grepl("G:",rstudioapi::getSourceEditorContext()$path)==T){
    save.image("G:/Mi unidad/Alvacast/SISTRAT 2019 (github)/mult_state_3_apr22.RData")
  } else {
    save.image("~/mult_state_3_apr22.RData")
    path.expand("~/mult_state_3_apr22.RData")
  }

sessionInfo()

## R version 4.0.2 (2020-06-22)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19042)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=Spanish_Chile.1252  LC_CTYPE=Spanish_Chile.1252   
## [3] LC_MONETARY=Spanish_Chile.1252 LC_NUMERIC=C                  
## [5] LC_TIME=Spanish_Chile.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] Metrics_0.1.4           muhaz_1.2.6.4           flexsurv_2.2           
##  [4] mstate_0.3.1            DiagrammeR_1.0.6.1.9000 Amelia_1.8.0           
##  [7] Rcpp_1.0.7              igraph_1.2.6            eha_2.9.0              
## [10] cobalt_4.3.1            MatchIt_4.2.0           tableone_0.13.0        
## [13] stargazer_5.2.2         reshape2_1.4.4          gridExtra_2.3          
## [16] foreign_0.8-81          survMisc_0.5.5          ggfortify_0.4.12       
## [19] survminer_0.4.9         ggpubr_0.4.0            epiR_2.0.33            
## [22] forcats_0.5.1           purrr_0.3.4             readr_2.0.1            
## [25] tibble_3.0.3            tidyverse_1.3.1         dplyr_1.0.7            
## [28] treemapify_2.5.5        sf_1.0-2                ggiraph_0.7.10         
## [31] finalfit_1.0.3          lsmeans_2.30-0          emmeans_1.6.3          
## [34] RColorBrewer_1.1-2      panelr_0.7.5            lme4_1.1-27.1          
## [37] Matrix_1.2-18           data.table_1.14.2       codebook_0.9.2         
## [40] devtools_2.4.2          usethis_2.0.1           sqldf_0.4-11           
## [43] RSQLite_2.2.8           gsubfn_0.7              proto_1.0.0            
## [46] broom_0.7.9             zoo_1.8-9               rbokeh_0.5.2           
## [49] janitor_2.1.0           plotly_4.9.4.1          kableExtra_1.3.4       
## [52] Hmisc_4.5-0             Formula_1.2-4           survival_3.1-12        
## [55] lattice_0.20-41         ggplot2_3.3.5           stringr_1.4.0          
## [58] stringi_1.4.6           tidyr_1.1.3             knitr_1.33             
## [61] matrixStats_0.60.0      boot_1.3-28            
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.1.4             tidyselect_1.1.1       htmlwidgets_1.5.3.9000
##   [4] grid_4.0.2             jtools_2.1.3           munsell_0.5.0         
##   [7] codetools_0.2-16       units_0.7-2            statmod_1.4.37        
##  [10] chron_2.3-56           withr_2.5.0            colorspace_1.4-1      
##  [13] uuid_0.1-4             rstudioapi_0.13        ggsignif_0.6.3        
##  [16] KMsurv_0.1-5           bit64_4.0.5            rprojroot_2.0.2       
##  [19] coda_0.19-4            vctrs_0.3.8            generics_0.1.3        
##  [22] TH.data_1.0-10         xfun_0.25              R6_2.5.1              
##  [25] cachem_1.0.6           assertthat_0.2.1       scales_1.1.1          
##  [28] multcomp_1.4-17        nnet_7.3-16            gtable_0.3.1          
##  [31] processx_3.5.2         sandwich_3.0-1         rlang_0.4.11          
##  [34] systemfonts_1.0.2      splines_4.0.2          rstatix_0.7.0         
##  [37] lazyeval_0.2.2         hexbin_1.28.2          checkmate_2.0.0       
##  [40] yaml_2.2.1             abind_1.4-5            modelr_0.1.8          
##  [43] backports_1.1.7        tools_4.0.2            tcltk_4.0.2           
##  [46] ellipsis_0.3.2         jquerylib_0.1.4        proxy_0.4-26          
##  [49] sessioninfo_1.1.1      plyr_1.8.6             visNetwork_2.0.9      
##  [52] base64enc_0.1-3        BiasedUrn_1.07         classInt_0.4-3        
##  [55] ps_1.6.0               prettyunits_1.1.1      rpart_4.1-15          
##  [58] cowplot_1.1.1          deSolve_1.28           haven_2.4.3           
##  [61] cluster_2.1.2          survey_4.1-1           fs_1.5.0              
##  [64] crul_1.1.0             magrittr_2.0.1         openxlsx_4.2.4        
##  [67] reprex_2.0.1           mvtnorm_1.1-2          pkgload_1.2.1         
##  [70] hms_1.1.0              evaluate_0.14          xtable_1.8-4          
##  [73] rio_0.5.27             jpeg_0.1-9             readxl_1.3.1          
##  [76] testthat_3.0.4         compiler_4.0.2         mice_3.13.0           
##  [79] maps_3.3.0             KernSmooth_2.23-17     crayon_1.4.1          
##  [82] minqa_1.2.4            htmltools_0.5.1.1      tzdb_0.1.2            
##  [85] lubridate_1.7.10       DBI_1.1.1              dbplyr_2.1.1          
##  [88] MASS_7.3-51.6          car_3.0-11             mitools_2.4           
##  [91] cli_3.0.1              pryr_0.1.5             quadprog_1.5-8        
##  [94] pkgconfig_2.0.3        km.ci_0.5-2            numDeriv_2016.8-1.1   
##  [97] xml2_1.3.2             svglite_2.0.0          bslib_0.2.5.1         
## [100] ggcorrplot_0.1.3       webshot_0.5.2          estimability_1.3      
## [103] rvest_1.0.1            snakecase_0.11.0       callr_3.7.0           
## [106] digest_0.6.25          httpcode_0.3.0         rmarkdown_2.11        
## [109] cellranger_1.1.0       htmlTable_2.2.1        curl_4.3              
## [112] nloptr_1.2.2.2         lifecycle_1.0.1        nlme_3.1-148          
## [115] jsonlite_1.7.2         carData_3.0-4          desc_1.3.0            
## [118] viridisLite_0.4.1      fansi_0.4.1            labelled_2.8.0        
## [121] pillar_1.7.0           fastmap_1.1.0          httr_1.4.2            
## [124] pkgbuild_1.2.0         glue_1.4.1             remotes_2.4.0         
## [127] zip_2.2.0              png_0.1-7              pander_0.6.3          
## [130] bit_4.0.4              class_7.3-19           sass_0.4.0            
## [133] gistr_0.9.0            blob_1.2.2             ggfittext_0.9.1       
## [136] latticeExtra_0.6-29    memoise_2.0.0          e1071_1.7-8

sesion_info <- devtools::session_info()
dplyr::select(
  tibble::as_tibble(sesion_info$packages),
  c(package, loadedversion, source)
) %>% 
  DT::datatable(filter = 'top', colnames = c('Row number' =1,'Variable' = 2, 'Percentage'= 3),
              caption = htmltools::tags$caption(
        style = 'caption-side: top; text-align: left;',
        '', htmltools::em('Packages')),
      options=list(
initComplete = htmlwidgets::JS(
      "function(settings, json) {",
      "$(this.api().tables().body()).css({'font-size': '80%'});",
      "}")))

Time for this code chunk to run: 0 minutes

Ambulatory or residential? a multi-state analysis of treatments for substance use disorders (Step 3)

October 28, 2022

Cumulative Transition Hazards of Joint Models

Session Info