Calibration, Clinical Utility and Specificity of Clinical Decision Support Tools in Inflammatory Bowel Disease

Author

Dahham Alsoud

Published

July 1, 2024

Modified

July 1, 2024

The aim of this analysis notebook is to reproduce the reported analysis from the paper Calibration, Clinical Utility and Specificity of Clinical Decision Support Tools in Inflammatory Bowel Disease.

Requirements:

To run the analysis, you will need:

  • Raw data files: These should be placed inside the input folder within the home directory of your project. The required files are:
    • Pheno_CD_VDZ.xlsx: Excel sheet containing metadata on CD patients starting VDZ.
    • Pheno_UC_VDZ.xlsx: Excel sheet containing metadata on UC patients starting VDZ.
    • Pheno_CD_UST.xlsx: Excel sheet containing metadata on CD patients starting UST.
    • VDZ_UST_troughs.RData: R workspace containing two dataframes with trough levels of VDZ and UST.

These data files can be obtained upon reasonable request to the corresponding authors, Séverine Vermeire, via severine.vermeire@uzleuven.be and Bram Verstockt, via bram.verstockt@uzleuven.be.

  • Complementary files deposited in the GitHub repository for this analysis:
    • renv.lock, .Rprofile, renv/settings.json and renv/activate.R: These files are necessary to reproduce the same versions of the packages used in the original analysis.
    • helpers.R: This script should be sourced at the beginning of the analysis to produce some customized functions.
    • variable-selection-instability.R: This script reproduces the bootstrap analysis exploring the stability of variable selection in clinical prediction models.

1. Install and load needed packages/functions

if (!require(pacman)) install.packages("pacman")


pacman::p_load(rio, here, magrittr, janitor, tidyverse, skimr, phdcocktail, gtsummary, survival, 
               huxtable, openxlsx, ggpubr, powerjoin, rms, dcurves, epiR, pROC, glue, predRupdate)


source("helpers.R")

2. Import phenodata and pre-process input

vdz_uc_starters <- import(here("input", list.files("input", pattern = "uc_vdz", ignore.case = TRUE)))
vdz_cd_starters <- import(here("input", list.files("input", pattern = "cd_vdz", ignore.case = TRUE)))
ust_cd_starters <- import(here("input", list.files("input", pattern = "cd_ust", ignore.case = TRUE)))

Fix excel dates

important_date_vrs <- c("start_date", "visit_date", "diagnosis_date", "birth_date", "last_followup_date", "stop_date", 
                        "stop_date_itt", "dose_optimisation_date", "surgery_after_date", "date_lab", "endoscopy_date")

vdz_uc_starters <- convert_date_columns(vdz_uc_starters, important_date_vrs)
vdz_cd_starters <- convert_date_columns(vdz_cd_starters, important_date_vrs)
ust_cd_starters %<>% mutate(across(where(is.POSIXct), ymd))

Add dates of outcomes assessment columns

vdz_uc_starters <- add_assessment_dates(vdz_uc_starters)
vdz_cd_starters <- add_assessment_dates(vdz_cd_starters)
ust_cd_starters <- add_assessment_dates(ust_cd_starters)

Copy outcome data to the baseline row

vdz_uc_starters %<>%
  group_by(patient_id) %>%
  fill(c(clinical_response_pmayo:cs_dependent_outcome, clinical_assessment_date, endoscopic_assessment_date), .direction = "up") %>%
  ungroup()

vdz_cd_starters %<>%
  group_by(patient_id) %>%
  fill(c(clinical_response:cs_dependent_outcome, clinical_assessment_date, endoscopic_assessment_date), .direction = "up") %>%
  ungroup()

ust_cd_starters %<>%
  group_by(patient_id) %>%
  fill(c(clinical_response:cs_dependent_outcome, clinical_assessment_date, endoscopic_assessment_date), .direction = "up") %>%
  ungroup()

Filter eligible patients

vdz_uc_eligible <- vdz_uc_starters %>% filter(excluded == 0) 
vdz_cd_eligible <- vdz_cd_starters %>% filter(excluded == 0) 
ust_cd_eligible <- ust_cd_starters %>% filter(excluded == 0)

Filter included patients

vdz_uc_included <- vdz_uc_eligible %>% filter(baseline_disease_activity_type != "clinical")
vdz_uc_included %<>% filter(pga > 1)
vdz_uc_included %<>% filter(clinical_assessment == 1)
vdz_uc_included %<>% filter(mes > 1, !is.na(disease_duration_yrs), !is.na(previous_antitnf), !is.na(albumin_g_L)) 

vdz_cd_included <- vdz_cd_eligible %>% filter(baseline_disease_activity_type != "clinical")
vdz_cd_included %<>% filter(pga > 1)
vdz_cd_included %<>% filter(clinical_assessment == 1)
vdz_cd_included %<>% filter(!is.na(intestinal_resections_before), !is.na(fistulizing_disease_prior), 
                            !is.na(previous_antitnf), !is.na(albumin_g_L), !is.na(crp_mg_L)) 

ust_cd_included <- ust_cd_eligible %>% filter(baseline_disease_activity_type != "clinical") 
ust_cd_included %<>% filter(pga > 1) 
ust_cd_included %<>% filter(clinical_assessment == 1) 
ust_cd_included %<>% filter(!is.na(cdst_category)) 

3. Get baseline characteristics (Table 1)

Identify needed columns and define new data frames

tblone_vrs <- c("gender", "age_yrs", "disease_duration_yrs", "smoking", "bmi_kg_m2", "albumin_g_L","crp_mg_L", "calprotectin_ug_g", 
                "concomitant_cs", "concomitant_imm", "concomitant_5asa", "previous_advancedrx_amount", "previous_antitnf", 
                "intestinal_resections_before", "age_at_diagnosis_yrs", 
                "disease_location", "upper_gi_disease", "disease_behaviour","peri_anal_disease", "ses_cd",
                "disease_extent", "mes",
                "fistulizing_disease_prior", "fistulizing_disease_current")

vdz_cd_tblone_df <- vdz_cd_included %>% select(any_of(tblone_vrs))
vdz_uc_tblone_df <- vdz_uc_included %>% select(any_of(tblone_vrs))
ust_cd_tblone_df <- ust_cd_included %>% select(any_of(tblone_vrs))

Manipulate columns

continuous_vrs <- c("age_yrs", "disease_duration_yrs", "bmi_kg_m2", "albumin_g_L","crp_mg_L", "calprotectin_ug_g", "ses_cd")

vdz_cd_tblone_df %<>% mutate(across(all_of(continuous_vrs), ~ round(as.numeric(.), 1)))
ust_cd_tblone_df %<>% mutate(across(all_of(continuous_vrs), ~ round(as.numeric(.), 1)))
vdz_uc_tblone_df %<>% mutate(across(any_of(continuous_vrs), ~ round(as.numeric(.), 1)))

vdz_cd_tblone_df %<>% mutate(disease_behaviour = if_else(str_detect(disease_behaviour, "B3"), "B3", disease_behaviour))
ust_cd_tblone_df %<>% mutate(disease_behaviour = if_else(str_detect(disease_behaviour, "B3"), "B3", disease_behaviour))

ust_cd_tblone_df %<>% mutate(female_gender = if_else(gender == "F", 1, 0), .after = gender) %>% select(-gender) 
vdz_cd_tblone_df %<>% mutate(female_gender = if_else(gender == "F", 1, 0), .after = gender) %>% select(-gender)
vdz_uc_tblone_df %<>% mutate(female_gender = if_else(gender == "F", 1, 0), .after = gender) %>% select(-gender)

ust_cd_tblone_df %<>% mutate(prior_smoker = if_else(smoking == 0, 0, 1), .after = smoking) %>% select(-smoking) 
vdz_cd_tblone_df %<>% mutate(prior_smoker = if_else(smoking == 0, 0, 1), .after = smoking) %>% select(-smoking)
vdz_uc_tblone_df %<>% mutate(prior_smoker = if_else(smoking == 0, 0, 1), .after = smoking) %>% select(-smoking)


ust_cd_tblone_df %<>% get_age_montreal() %>% select(-age_at_diagnosis_yrs) 
vdz_cd_tblone_df %<>% get_age_montreal() %>% select(-age_at_diagnosis_yrs)
vdz_uc_tblone_df %<>% get_age_montreal() %>% select(-age_at_diagnosis_yrs)


ust_cd_tblone_df %<>% recode_steroids()
vdz_cd_tblone_df %<>% recode_steroids()
vdz_uc_tblone_df %<>% recode_steroids()

ust_cd_tblone_df %<>% mutate(concomitant_imm = if_else(concomitant_imm == "0", 0, 1),
                             concomitant_5asa = if_else(concomitant_5asa == "0", 0, 1))
vdz_cd_tblone_df %<>% mutate(concomitant_imm = if_else(concomitant_imm == "0", 0, 1),
                             concomitant_5asa = if_else(concomitant_5asa == "0", 0, 1))
vdz_uc_tblone_df %<>% mutate(concomitant_imm = if_else(concomitant_imm == "0", 0, 1),
                             concomitant_5asa = if_else(concomitant_5asa == "0", 0, 1))


ust_cd_tblone_df %<>% recode_previous_advancedrx()
vdz_cd_tblone_df %<>% recode_previous_advancedrx()
vdz_uc_tblone_df %<>% recode_previous_advancedrx()


categorical_vrs <- c("concomitant_cs", "previous_advancedrx_amount", "disease_location", "disease_behaviour", "disease_extent", "mes", "age_montreal")

dichotomous_vrs <- c("female_gender", "prior_smoker", "concomitant_imm", "concomitant_5asa", "previous_antitnf", "upper_gi_disease", "peri_anal_disease",
                     "intestinal_resections_before", "fistulizing_disease_prior", "fistulizing_disease_current")

Recode variables to publication shape

ibd_data_dict$variable_label[ibd_data_dict$variable == "ses_cd"] <- "Simple endoscopic score for CD" 

ibd_data_dict %<>%
  add_row(variable = "prior_smoker", variable_label = "Prior smoking history", value = NA, value_label = NA) %>% 
  add_row(variable = "intestinal_resections_before", variable_label = "Prior bowel surgery", value = NA, value_label = NA) %>% 
  add_row(variable = "previous_advancedrx_amount", variable_label = "Number of previous advanced therapies", value = NA, value_label = NA) %>% 
  add_row(variable = "albumin_g_L", variable_label = "Serum albumin (g/L)", value = NA, value_label = NA) %>% 
  add_row(variable = "crp_mg_L", variable_label = "C-reactive protein (mg/L)", value = NA, value_label = NA) %>% 
  add_row(variable = "previous_antitnf", variable_label = "Prior anti-TNF exposure", value = NA, value_label = NA) %>% 
  add_row(variable = "fistulizing_disease_prior", variable_label = "Prior fistulizing disease", value = NA, value_label = NA) %>% 
  add_row(variable = "fistulizing_disease_current", variable_label = "Baseline actively draining fistula", value = NA, value_label = NA) 

categoricals_to_recode <- c("concomitant_cs", "disease_location", "disease_behaviour", "disease_extent", "age_montreal")

ust_cd_tblone_df$previous_advancedrx_amount <- factor(ust_cd_tblone_df$previous_advancedrx_amount, levels = c("0", "1", "2", "≥ 3"), ordered = TRUE)
vdz_cd_tblone_df$previous_advancedrx_amount <- factor(vdz_cd_tblone_df$previous_advancedrx_amount, levels = c("0", "1", "2", "≥ 3"), ordered = TRUE)
vdz_uc_tblone_df$previous_advancedrx_amount <- factor(vdz_uc_tblone_df$previous_advancedrx_amount, levels = c("0", "1", "2", "≥ 3"), ordered = TRUE)
vdz_uc_tblone_df$mes <- factor(vdz_uc_tblone_df$mes, levels = c("2", "3"), ordered = TRUE)

ust_cd_tblone_df <- recode_vrs(ust_cd_tblone_df, data_dictionary = ibd_data_dict, vrs = categoricals_to_recode, factor = TRUE)
vdz_cd_tblone_df <- recode_vrs(vdz_cd_tblone_df, data_dictionary = ibd_data_dict, vrs = categoricals_to_recode, factor = TRUE)
vdz_uc_tblone_df <- recode_vrs(vdz_uc_tblone_df, data_dictionary = ibd_data_dict, vrs = categoricals_to_recode, factor = TRUE)

Make the tables

theme_gtsummary_journal(journal = "jama")
theme_gtsummary_compact()

VDZ-CD

vdz_cd_tblone <- vdz_cd_tblone_df %>% 
  tbl_summary(type = list(any_of(continuous_vrs) ~ "continuous",
                          any_of(categorical_vrs) ~ "categorical",
                          any_of(dichotomous_vrs) ~ "dichotomous"),
              missing_text = "missing, n",
              digits = list(all_categorical() ~ c(0, 1))) %>%
  bold_labels() %>%
  italicize_levels()

labels_to_levels <- c("upper_gi_disease", "peri_anal_disease")

tableone_body <- vdz_cd_tblone[["table_body"]]

tableone_body$row_type[tableone_body$variable %in% labels_to_levels] <- "level"

tableone_body$stat_label[tableone_body$variable %in% labels_to_levels] <- NA

tableone_body <- tableone_body[tableone_body$label != "None",]

vdz_cd_tblone[["table_body"]]  <- tableone_body

VDZ-UC

vdz_uc_tblone <- vdz_uc_tblone_df %>% 
  tbl_summary(type = list(any_of(continuous_vrs) ~ "continuous",
                          any_of(categorical_vrs) ~ "categorical",
                          any_of(dichotomous_vrs) ~ "dichotomous"),
              missing_text = "missing, n",
              digits = list(all_categorical() ~ c(0, 1))) %>%
  bold_labels() %>%
  italicize_levels()

tableone_body <- vdz_uc_tblone[["table_body"]]

tableone_body <- tableone_body[tableone_body$label != "None",]

vdz_uc_tblone[["table_body"]]  <- tableone_body

UST-CD

ust_cd_tblone <- ust_cd_tblone_df %>% 
  tbl_summary(type = list(any_of(continuous_vrs) ~ "continuous",
                          any_of(categorical_vrs) ~ "categorical",
                          any_of(dichotomous_vrs) ~ "dichotomous"),
              missing_text = "missing, n",
              digits = list(all_categorical() ~ c(0, 1))) %>%
  bold_labels() %>%
  italicize_levels()

tableone_body <- ust_cd_tblone[["table_body"]]

tableone_body$row_type[tableone_body$variable %in% labels_to_levels] <- "level"

tableone_body$stat_label[tableone_body$variable %in% labels_to_levels] <- NA

tableone_body <- tableone_body[tableone_body$label != "None",]

ust_cd_tblone[["table_body"]]  <- tableone_body

Merge the tables

tbl_merge(
    tbls = list(vdz_cd_tblone, vdz_uc_tblone, ust_cd_tblone),
    tab_spanner = c("**VDZ-CD**", "**VDZ-UC**", "**UST-CD**")
  )
Characteristic VDZ-CD VDZ-UC UST-CD
N = 280 N = 218 N = 194
Female, n (%) 170 (60.7) 118 (54.1) 121 (62.4)
Age (year), Median (IQR) 41 (29 – 55) 41 (29 – 53) 41 (30 – 56)
Disease duration (year), Median (IQR) 11 (4 – 22) 8 (2 – 14) 13 (6 – 24)
Prior smoking history, n (%) 134 (47.9) 66 (30.3) 91 (46.9)
Body mass index (kg/m²), Median (IQR) 23.8 (21.3 – 27.1) 23.8 (20.7 – 27.3) 23.4 (20.6 – 26.9)
Serum albumin (g/L), Median (IQR) 41.9 (39.2 – 44.1) 42.0 (39.4 – 44.7) 41.2 (38.9 – 43.3)
C-reactive protein (mg/L), Median (IQR) 5 (2 – 14) 3 (1 – 8) 8 (3 – 19)
Faecal calprotectin (ug/g), Median (IQR) 649 (271 – 1,800) 1,673 (665 – 1,800) 985 (405 – 1,800)
    missing, n 159 125 50
Concomitant corticosteroids, n (%)


    Topical 37 (13.2) 77 (35.3) 12 (6.2)
    Systemic 34 (12.1) 35 (16.1) 13 (6.7)
Concomitant immunomodulators, n (%) 14 (5.0) 7 (3.2) 0 (0.0)
Concomitant 5-aminosalicylates, n (%) 4 (1.4) 112 (51.4) 0 (0.0)
Number of previous advanced therapies, n (%)


    0 72 (25.7) 82 (37.6) 39 (20.1)
    1 81 (28.9) 78 (35.8) 38 (19.6)
    2 104 (37.1) 50 (22.9) 54 (27.8)
    ≥ 3 23 (8.2) 8 (3.7) 63 (32.5)
Prior anti-TNF exposure, n (%) 204 (72.9) 136 (62.4) 151 (77.8)
Prior bowel surgery, n (%) 131 (46.8) 0 (0.0) 96 (49.5)
Age at diagnosis (year), n (%)


    < 17 years 42 (15.0) 21 (9.6) 27 (13.9)
    17-40 years 190 (67.9) 132 (60.6) 139 (71.6)
    > 40 years 48 (17.1) 65 (29.8) 28 (14.4)
Disease location, n (%)


    Ileal 81 (28.9)
60 (30.9)
    Colonic 41 (14.6)
22 (11.3)
    Ileocolonic 158 (56.4)
112 (57.7)
    Upper GI modifier 19 (6.8)
18 (9.3)
Disease behaviour, n (%)


    Inflammatory 109 (38.9)
78 (40.2)
    Fibrostenotic 110 (39.3)
73 (37.6)
    Penetrating 61 (21.8)
43 (22.2)
    Perianal modifier 86 (30.7)
69 (35.6)
Simple endoscopic score for CD, Median (IQR) 7.5 (5.0 – 15.0)
10 (6 – 16)
    missing, n 184
40
Prior fistulizing disease, n (%) 95 (33.9)
70 (36.1)
Baseline actively draining fistula, n (%) 14 (5.0)
9 (4.6)
Disease extent


    Proctitis
18 (8.3)
    Left-sided colitis
115 (52.8)
    Extensive colitis
85 (39.0)
Mayo endoscopic subscore


    2
98 (45.0)
    3
120 (55.0)

4. Calculate CDST points and assign response categories

VDZ-UC

Ensure that we have all CDST variables in numeric shape ready for calculations

vdz_uc_included %<>% mutate(no_previous_antitnf = if_else(previous_antitnf == 1, 0, 1), .after = previous_antitnf)

is.numeric(vdz_uc_included$no_previous_antitnf)
[1] TRUE
is.numeric(vdz_uc_included$albumin_g_L)
[1] TRUE
vdz_uc_included %<>% mutate(moderate_endo_activity = if_else(mes == 2, 1, 0), .after = mes)

is.numeric(vdz_uc_included$moderate_endo_activity)
[1] TRUE
vdz_uc_included %<>% mutate(dxduration_above_two = if_else(disease_duration_yrs >= 2, 1, 0), .after = disease_duration_yrs)

is.numeric(vdz_uc_included$dxduration_above_two)
[1] TRUE

Calculate CDST points

vdz_uc_included %<>% mutate(cdst_points = 3*dxduration_above_two + 3*no_previous_antitnf + 2*moderate_endo_activity + 0.65*albumin_g_L)

Assign CDST categories

vdz_uc_included %<>% mutate(cdst_category = case_when(cdst_points <= 26 ~ "Low",
                                                      cdst_points > 26 & cdst_points <= 32 ~ "Intermediate",
                                                      cdst_points > 32 ~ "High") 
)

vdz_uc_included %>% count(cdst_category)
cdst_categoryn
High95
Intermediate105
Low18
vdz_uc_included$cdst_category <- factor(vdz_uc_included$cdst_category, levels = c("Low", "Intermediate", "High"), ordered = TRUE)

VDZ-CD

Ensure that we have all CDST variables in numeric shape ready for calculations

vdz_cd_included %<>% mutate(no_previous_antitnf = if_else(previous_antitnf == 1, 0, 1), .after = previous_antitnf)

is.numeric(vdz_cd_included$no_previous_antitnf)
[1] TRUE
vdz_cd_included %<>% mutate(no_surgery_before = if_else(intestinal_resections_before == 1, 0, 1), .after = intestinal_resections_before)

is.numeric(vdz_cd_included$no_surgery_before)
[1] TRUE
vdz_cd_included %<>% mutate(no_fistulizing_before = if_else(fistulizing_disease_prior == 1, 0, 1), .after = fistulizing_disease_prior)

is.numeric(vdz_cd_included$no_fistulizing_before)
[1] TRUE
is.numeric(vdz_cd_included$albumin_g_L)
[1] TRUE
is.numeric(vdz_cd_included$crp_mg_L)
[1] TRUE

Calculate CDST points

vdz_cd_included %<>% mutate(cdst_points = 2*no_surgery_before + 3*no_previous_antitnf + 2*no_fistulizing_before + 0.4*albumin_g_L - case_when(crp_mg_L >= 3 & crp_mg_L <= 10 ~ 0.5,
                                                                                                                                              crp_mg_L > 10 ~ 3,
                                                                                                                                              .default = 0))

Assign CDST categories

vdz_cd_included %<>% mutate(cdst_category = case_when(cdst_points <= 13 ~ "Low",
                                                      cdst_points > 13 & cdst_points <= 19 ~ "Intermediate",
                                                      cdst_points > 19 ~ "High") 
)

vdz_cd_included %>% count(cdst_category)
cdst_categoryn
High121
Intermediate146
Low13
vdz_cd_included$cdst_category <- factor(vdz_cd_included$cdst_category, levels = c("Low", "Intermediate", "High"), ordered = TRUE)

UST-CD

ust_cd_included %>% count(cdst_category)
cdst_categoryn
High80
Intermediate95
Low19
ust_cd_included$cdst_category <- factor(ust_cd_included$cdst_category, levels = c("Low", "Intermediate", "High"), ordered = TRUE)

5. Calculate discrimination for effectiveness outcomes

VDZ-CD

Clinical response

vdz_cd_included %<>% mutate(clinical_assessment_before_optimisation = clinical_assessment_date <= dose_optimisation_date) %>%
  replace_na(list(clinical_assessment_before_optimisation = TRUE))

vdz_cd_included %<>% mutate(clinical_response_cdst = clinical_response == 1 & clinical_assessment_before_optimisation)

vdz_cd_included %>% count(cdst_category, clinical_response_cdst)
cdst_categoryclinical_response_cdstn
LowFALSE7
LowTRUE6
IntermediateFALSE64
IntermediateTRUE82
HighFALSE31
HighTRUE90
options(scipen = 999)

chisq_vdz_cd_clinical_response <- chisq.test(vdz_cd_included$cdst_category, vdz_cd_included$clinical_response_cdst)

vdz_cd_cdst_tabulation_clinical_response <- vdz_cd_included %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(clinical_response_cdst),
            percentage = scales::percent(achieved/total))

Clinical remission

vdz_cd_included %<>% mutate(clinical_remission_cdst = clinical_remission == 1 & clinical_assessment_before_optimisation)

vdz_cd_included %>% count(cdst_category, clinical_remission_cdst)
cdst_categoryclinical_remission_cdstn
LowFALSE13
IntermediateFALSE126
IntermediateTRUE20
HighFALSE96
HighTRUE25
chisq_vdz_cd_clinical_remission <- chisq.test(vdz_cd_included$cdst_category, vdz_cd_included$clinical_remission_cdst)

vdz_cd_cdst_tabulation_clinical_remission <- vdz_cd_included %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(clinical_remission_cdst),
            percentage = scales::percent(achieved/total))

Endoscopic response

vdz_cd_included_endoscopic <- vdz_cd_included %>%
  filter(endoscopic_assessment == 1)

vdz_cd_included_endoscopic %<>% mutate(endoscopic_assessment_before_optimisation = endoscopic_assessment_date <= dose_optimisation_date) %>%
  replace_na(list(endoscopic_assessment_before_optimisation = TRUE))

vdz_cd_included_endoscopic %<>% mutate(endoscopic_response_cdst = endoscopic_response == 1 & endoscopic_assessment_before_optimisation)


vdz_cd_included_endoscopic %>% count(cdst_category, endoscopic_response_cdst)
cdst_categoryendoscopic_response_cdstn
LowFALSE8
LowTRUE2
IntermediateFALSE51
IntermediateTRUE53
HighFALSE33
HighTRUE66
chisq_vdz_cd_endoscopic_response <- chisq.test(vdz_cd_included_endoscopic$cdst_category, vdz_cd_included_endoscopic$endoscopic_response_cdst)

vdz_cd_cdst_tabulation_endoscopic_response <- vdz_cd_included_endoscopic %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(endoscopic_response_cdst),
            percentage = scales::percent(achieved/total))

Endoscopic remission

vdz_cd_included_endoscopic %<>% mutate(endoscopic_remission_cdst = endoscopic_remission == 1 & endoscopic_assessment_before_optimisation)

vdz_cd_included_endoscopic %>% count(cdst_category, endoscopic_remission_cdst)
cdst_categoryendoscopic_remission_cdstn
LowFALSE9
LowTRUE1
IntermediateFALSE84
IntermediateTRUE20
HighFALSE57
HighTRUE42
chisq_vdz_cd_endoscopic_remission <- chisq.test(vdz_cd_included_endoscopic$cdst_category, vdz_cd_included_endoscopic$endoscopic_remission_cdst)

vdz_cd_cdst_tabulation_endoscopic_remission <- vdz_cd_included_endoscopic %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(endoscopic_remission_cdst),
           percentage = scales::percent(achieved/total))

VDZ-UC

Clinical response

vdz_uc_included %<>% mutate(clinical_assessment_before_optimisation = clinical_assessment_date <= dose_optimisation_date) %>%
  replace_na(list(clinical_assessment_before_optimisation = TRUE))

vdz_uc_included %<>% mutate(clinical_response_cdst = clinical_response_pga == 1 & clinical_assessment_before_optimisation)

vdz_uc_included %>% count(cdst_category, clinical_response_cdst)
cdst_categoryclinical_response_cdstn
LowFALSE9
LowTRUE9
IntermediateFALSE47
IntermediateTRUE58
HighFALSE20
HighTRUE75
chisq_vdz_uc_clinical_response <- chisq.test(vdz_uc_included$cdst_category, vdz_uc_included$clinical_response_cdst)

vdz_uc_cdst_tabulation_clinical_response <- vdz_uc_included %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(clinical_response_cdst),
            percentage = scales::percent(achieved/total))

Clinical remission

vdz_uc_included %<>% mutate(clinical_remission_cdst = clinical_remission_pga == 1 & clinical_assessment_before_optimisation)

vdz_uc_included %>% count(cdst_category, clinical_remission_cdst)
cdst_categoryclinical_remission_cdstn
LowFALSE15
LowTRUE3
IntermediateFALSE90
IntermediateTRUE15
HighFALSE62
HighTRUE33
chisq_vdz_uc_clinical_remission <- chisq.test(vdz_uc_included$cdst_category, vdz_uc_included$clinical_remission_cdst)

vdz_uc_cdst_tabulation_clinical_remission <- vdz_uc_included %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(clinical_remission_cdst),
            percentage = scales::percent(achieved/total))

Endoscopic response

vdz_uc_included_endoscopic <- vdz_uc_included %>%
  filter(endoscopic_assessment == 1)

vdz_uc_included_endoscopic %<>% mutate(endoscopic_assessment_before_optimisation = endoscopic_assessment_date <= dose_optimisation_date) %>%
  replace_na(list(endoscopic_assessment_before_optimisation = TRUE))

vdz_uc_included_endoscopic %<>% mutate(endoscopic_response_cdst = endoscopic_response == 1 & endoscopic_assessment_before_optimisation)

vdz_uc_included_endoscopic %>% count(cdst_category, endoscopic_response_cdst)
cdst_categoryendoscopic_response_cdstn
LowFALSE10
LowTRUE8
IntermediateFALSE63
IntermediateTRUE42
HighFALSE26
HighTRUE68
chisq_vdz_uc_endoscopic_response <- chisq.test(vdz_uc_included_endoscopic$cdst_category, vdz_uc_included_endoscopic$endoscopic_response_cdst)

vdz_uc_cdst_tabulation_endoscopic_response <- vdz_uc_included_endoscopic %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(endoscopic_response_cdst),
            percentage = scales::percent(achieved/total))

Endoscopic remission

vdz_uc_included_endoscopic %<>% mutate(endoscopic_remission_cdst = endoscopic_remission == 1 & endoscopic_assessment_before_optimisation)

vdz_uc_included_endoscopic %>% count(cdst_category, endoscopic_remission_cdst)
cdst_categoryendoscopic_remission_cdstn
LowFALSE11
LowTRUE7
IntermediateFALSE79
IntermediateTRUE26
HighFALSE53
HighTRUE41
chisq_vdz_uc_endoscopic_remission <- chisq.test(vdz_uc_included_endoscopic$cdst_category, vdz_uc_included_endoscopic$endoscopic_remission_cdst)

vdz_uc_cdst_tabulation_endoscopic_remission <- vdz_uc_included_endoscopic %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(endoscopic_remission_cdst),
            percentage = scales::percent(achieved/total))

UST-CD

Clinical response

ust_cd_included %<>% mutate(clinical_assessment_before_optimisation = clinical_assessment_date <= first_dose_optimisation_date) %>%
  replace_na(list(clinical_assessment_before_optimisation = TRUE))

ust_cd_included %<>% mutate(clinical_response_cdst = clinical_response == 1 & clinical_assessment_before_optimisation)

ust_cd_included %>% count(cdst_category, clinical_response_cdst)
cdst_categoryclinical_response_cdstn
LowFALSE4
LowTRUE15
IntermediateFALSE45
IntermediateTRUE50
HighFALSE31
HighTRUE49
chisq_ust_cd_clinical_response <- chisq.test(ust_cd_included$cdst_category, ust_cd_included$clinical_response_cdst)

ust_cd_cdst_tabulation_clinical_response <- ust_cd_included %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(clinical_response_cdst),
            percentage = scales::percent(achieved/total))

Clinical remission

ust_cd_included %<>% mutate(clinical_remission_cdst = clinical_remission == 1 & clinical_assessment_before_optimisation)

ust_cd_included %>% count(cdst_category, clinical_remission_cdst)
cdst_categoryclinical_remission_cdstn
LowFALSE16
LowTRUE3
IntermediateFALSE86
IntermediateTRUE9
HighFALSE61
HighTRUE19
chisq_ust_cd_clinical_remission <- chisq.test(ust_cd_included$cdst_category, ust_cd_included$clinical_remission_cdst)

ust_cd_cdst_tabulation_clinical_remission <- ust_cd_included %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(clinical_remission_cdst),
            percentage = scales::percent(achieved/total))

Endoscopic response

ust_cd_included_endoscopic <- ust_cd_included %>%
  filter(endoscopic_assessment == 1)

ust_cd_included_endoscopic %<>% mutate(endoscopic_assessment_before_optimisation = endoscopic_assessment_date <= first_dose_optimisation_date) %>%
  replace_na(list(endoscopic_assessment_before_optimisation = TRUE))

ust_cd_included_endoscopic %<>% mutate(endoscopic_response_cdst = endoscopic_response == 1 & endoscopic_assessment_before_optimisation)


ust_cd_included_endoscopic %>% count(cdst_category, endoscopic_response_cdst)
cdst_categoryendoscopic_response_cdstn
LowFALSE6
LowTRUE11
IntermediateFALSE39
IntermediateTRUE42
HighFALSE33
HighTRUE35
chisq_ust_cd_endoscopic_response <- chisq.test(ust_cd_included_endoscopic$cdst_category, ust_cd_included_endoscopic$endoscopic_response_cdst)

ust_cd_cdst_tabulation_endoscopic_response <- ust_cd_included_endoscopic %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(endoscopic_response_cdst),
            percentage = scales::percent(achieved/total))

Endoscopic remission

ust_cd_included_endoscopic %<>% mutate(endoscopic_remission_cdst = endoscopic_remission == 1 & endoscopic_assessment_before_optimisation)

ust_cd_included_endoscopic %>% count(cdst_category, endoscopic_remission_cdst)
cdst_categoryendoscopic_remission_cdstn
LowFALSE15
LowTRUE2
IntermediateFALSE74
IntermediateTRUE7
HighFALSE54
HighTRUE14
chisq_ust_cd_endoscopic_remission <- chisq.test(ust_cd_included_endoscopic$cdst_category, ust_cd_included_endoscopic$endoscopic_remission_cdst)

ust_cd_cdst_tabulation_endoscopic_remission <- ust_cd_included_endoscopic %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(endoscopic_remission_cdst),
            percentage = scales::percent(achieved/total))

6. Visualize discrimination for effectiveness outcomes

VDZ-CD

vdz_cd_clinical_response_gg <- ggplot(data = vdz_cd_included, aes(x = cdst_category, fill = clinical_response_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 160), breaks = seq(0, 160, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Clinical response") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold", size = 13),
        axis.title.x = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13)) +
  annotate(geom = "text", label = paste0(vdz_cd_cdst_tabulation_clinical_response$percentage, "\n", "(", vdz_cd_cdst_tabulation_clinical_response$achieved, "/", vdz_cd_cdst_tabulation_clinical_response$total, ")"),
           x = 1:3, y = vdz_cd_cdst_tabulation_clinical_response$achieved + 7,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_vdz_cd_clinical_response$p.value), x = 2, y = 158.5, size = 4, fontface = "bold")

vdz_cd_clinical_remission_gg <- ggplot(data = vdz_cd_included, aes(x = cdst_category, fill = clinical_remission_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 160), breaks = seq(0, 160, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Clinical remission") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13)) +
  annotate(geom = "text", label = paste0(vdz_cd_cdst_tabulation_clinical_remission$percentage, "\n", "(", vdz_cd_cdst_tabulation_clinical_remission$achieved, "/", vdz_cd_cdst_tabulation_clinical_remission$total, ")"),
           x = 1:3, y = vdz_cd_cdst_tabulation_clinical_remission$achieved + 7,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_vdz_cd_clinical_remission$p.value), x = 2, y = 158.5, size = 4, fontface = "bold")

vdz_cd_endoscopic_response_gg <- ggplot(data = vdz_cd_included_endoscopic, aes(x = cdst_category, fill = endoscopic_response_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Endoscopic response") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold", size = 13),
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13),
        axis.text = element_text(face = "bold"),
        axis.title.x = element_blank(),
        legend.position = "none") +
  annotate(geom = "text", label = paste0(vdz_cd_cdst_tabulation_endoscopic_response$percentage, "\n", "(", vdz_cd_cdst_tabulation_endoscopic_response$achieved, "/", vdz_cd_cdst_tabulation_endoscopic_response$total, ")"),
           x = 1:3, y = vdz_cd_cdst_tabulation_endoscopic_response$achieved + 5,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_vdz_cd_endoscopic_response$p.value), x = 2, y = 118.5, size = 4, fontface = "bold")

vdz_cd_endoscopic_remission_gg <- ggplot(data = vdz_cd_included_endoscopic, aes(x = cdst_category, fill = endoscopic_remission_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(title = "Endoscopic remission") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold"),
        axis.text = element_text(face = "bold"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13)) +
  annotate(geom = "text", label = paste0(vdz_cd_cdst_tabulation_endoscopic_remission$percentage, "\n", "(", vdz_cd_cdst_tabulation_endoscopic_remission$achieved, "/", vdz_cd_cdst_tabulation_endoscopic_remission$total, ")"),
           x = 1:3, y = vdz_cd_cdst_tabulation_endoscopic_remission$achieved + 5,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_vdz_cd_endoscopic_remission$p.value), x = 2, y = 118.5, size = 4, fontface = "bold")

Combine figures in 4 panels

vdz_cd_bars_combined <- ggarrange(vdz_cd_clinical_response_gg, vdz_cd_clinical_remission_gg, vdz_cd_endoscopic_response_gg, vdz_cd_endoscopic_remission_gg, nrow = 2, ncol = 2)

annotate_figure(vdz_cd_bars_combined, bottom = text_grob("Clinical Decision Support Tool's response probability groups", face = "bold", vjust = 0.1))

VDZ-UC

vdz_uc_clinical_response_gg <- ggplot(data = vdz_uc_included, aes(x = cdst_category, fill = clinical_response_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Clinical response") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold", size = 13),
        axis.title.x = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13)) +
  annotate(geom = "text", label = paste0(vdz_uc_cdst_tabulation_clinical_response$percentage, "\n", "(", vdz_uc_cdst_tabulation_clinical_response$achieved, "/", vdz_uc_cdst_tabulation_clinical_response$total, ")"),
           x = 1:3, y = vdz_uc_cdst_tabulation_clinical_response$achieved + 5,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_vdz_uc_clinical_response$p.value), x = 2, y = 118.5, size = 4, fontface = "bold")

vdz_uc_clinical_remission_gg <- ggplot(data = vdz_uc_included, aes(x = cdst_category, fill = clinical_remission_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Clinical remission") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13)) +
  annotate(geom = "text", label = paste0(vdz_uc_cdst_tabulation_clinical_remission$percentage, "\n", "(", vdz_uc_cdst_tabulation_clinical_remission$achieved, "/", vdz_uc_cdst_tabulation_clinical_remission$total, ")"),
           x = 1:3, y = vdz_uc_cdst_tabulation_clinical_remission$achieved + 5,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_vdz_uc_clinical_remission$p.value), x = 2, y = 118.5, size = 4, fontface = "bold")

vdz_uc_endoscopic_response_gg <- ggplot(data = vdz_uc_included_endoscopic, aes(x = cdst_category, fill = endoscopic_response_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Endoscopic response") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold", size = 13),
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13),
        axis.text = element_text(face = "bold"),
        axis.title.x = element_blank(),
        legend.position = "none") +
  annotate(geom = "text", label = paste0(vdz_uc_cdst_tabulation_endoscopic_response$percentage, "\n", "(", vdz_uc_cdst_tabulation_endoscopic_response$achieved, "/", vdz_uc_cdst_tabulation_endoscopic_response$total, ")"),
           x = 1:3, y = vdz_uc_cdst_tabulation_endoscopic_response$achieved + 5,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_vdz_uc_endoscopic_response$p.value), x = 2, y = 118.5, size = 4, fontface = "bold")

vdz_uc_endoscopic_remission_gg <- ggplot(data = vdz_uc_included_endoscopic, aes(x = cdst_category, fill = endoscopic_remission_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(title = "Endoscopic remission") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold"),
        axis.text = element_text(face = "bold"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13)) +
  annotate(geom = "text", label = paste0(vdz_uc_cdst_tabulation_endoscopic_remission$percentage, "\n", "(", vdz_uc_cdst_tabulation_endoscopic_remission$achieved, "/", vdz_uc_cdst_tabulation_endoscopic_remission$total, ")"),
           x = 1:3, y = vdz_uc_cdst_tabulation_endoscopic_remission$achieved + 5,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_vdz_uc_endoscopic_remission$p.value), x = 2, y = 118.5, size = 4, fontface = "bold")

Combine figures in 4 panels

vdz_uc_bars_combined <- ggarrange(vdz_uc_clinical_response_gg, vdz_uc_clinical_remission_gg, vdz_uc_endoscopic_response_gg, vdz_uc_endoscopic_remission_gg, nrow = 2, ncol = 2)

annotate_figure(vdz_uc_bars_combined, bottom = text_grob("Clinical Decision Support Tool's response probability groups", face = "bold", vjust = 0.1))

UST-CD

ust_cd_clinical_response_gg <- ggplot(data = ust_cd_included, aes(x = cdst_category, fill = clinical_response_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Clinical response") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold", size = 13),
        axis.title.x = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13)) +
  annotate(geom = "text", label = paste0(ust_cd_cdst_tabulation_clinical_response$percentage, "\n", "(", ust_cd_cdst_tabulation_clinical_response$achieved, "/", ust_cd_cdst_tabulation_clinical_response$total, ")"),
           x = 1:3, y = ust_cd_cdst_tabulation_clinical_response$achieved + 5,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_ust_cd_clinical_response$p.value), x = 2, y = 118.5, size = 4, fontface = "bold")

ust_cd_clinical_remission_gg <- ggplot(data = ust_cd_included, aes(x = cdst_category, fill = clinical_remission_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Clinical remission") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13)) +
  annotate(geom = "text", label = paste0(ust_cd_cdst_tabulation_clinical_remission$percentage, "\n", "(", ust_cd_cdst_tabulation_clinical_remission$achieved, "/", ust_cd_cdst_tabulation_clinical_remission$total, ")"),
           x = 1:3, y = ust_cd_cdst_tabulation_clinical_remission$achieved + 5,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_ust_cd_clinical_remission$p.value), x = 2, y = 118.5, size = 4, fontface = "bold")

ust_cd_endoscopic_response_gg <- ggplot(data = ust_cd_included_endoscopic, aes(x = cdst_category, fill = endoscopic_response_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Endoscopic response") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold", size = 13),
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13),
        axis.text = element_text(face = "bold"),
        axis.title.x = element_blank(),
        legend.position = "none") +
  annotate(geom = "text", label = paste0(ust_cd_cdst_tabulation_endoscopic_response$percentage, "\n", "(", ust_cd_cdst_tabulation_endoscopic_response$achieved, "/", ust_cd_cdst_tabulation_endoscopic_response$total, ")"),
           x = 1:3, y = ust_cd_cdst_tabulation_endoscopic_response$achieved + 4,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_ust_cd_endoscopic_response$p.value), x = 2, y = 98.5, size = 4, fontface = "bold")

ust_cd_endoscopic_remission_gg <- ggplot(data = ust_cd_included_endoscopic, aes(x = cdst_category, fill = endoscopic_remission_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(title = "Endoscopic remission") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold"),
        axis.text = element_text(face = "bold"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13)) +
  annotate(geom = "text", label = paste0(ust_cd_cdst_tabulation_endoscopic_remission$percentage, "\n", "(", ust_cd_cdst_tabulation_endoscopic_remission$achieved, "/", ust_cd_cdst_tabulation_endoscopic_remission$total, ")"),
           x = 1:3, y = ust_cd_cdst_tabulation_endoscopic_remission$achieved + 4,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_ust_cd_endoscopic_remission$p.value), x = 2, y = 98.5, size = 4, fontface = "bold")

Combine them in 4 panels

ust_cd_bars_combined <- ggarrange(ust_cd_clinical_response_gg, ust_cd_clinical_remission_gg, ust_cd_endoscopic_response_gg, ust_cd_endoscopic_remission_gg, nrow = 2, ncol = 2)

annotate_figure(ust_cd_bars_combined, bottom = text_grob("Clinical Decision Support Tool's response probability groups", face = "bold", vjust = 0.1))

7. Calculate other discrimination metrics for effectiveness outcomes

VDZ-CD

vdz_cd_included %<>% mutate(cdst_category_high = cdst_category == "High")

vector_vdz_cd_discrimination <- vdz_cd_included %>%
  count(cdst_category_high, clinical_remission_cdst) %>%
  arrange(cdst_category_high, clinical_remission_cdst)

vector_vdz_cd_discrimination <- rev(vector_vdz_cd_discrimination$n)

epi_test_vdz_cd <- epi.tests(vector_vdz_cd_discrimination, method = "exact", digits = 2, 
                             conf.level = 0.95)

roc_vdz_cd <- roc(vdz_cd_included$clinical_remission_cdst, vdz_cd_included$cdst_points)

VDZ-UC

vdz_uc_included %<>% mutate(cdst_category_high = cdst_category == "High")

vector_vdz_uc_discrimination <- vdz_uc_included %>%
  count(cdst_category_high, clinical_remission_cdst) %>%
  arrange(cdst_category_high, clinical_remission_cdst)

vector_vdz_uc_discrimination <- rev(vector_vdz_uc_discrimination$n)

epi_test_vdz_uc <- epi.tests(vector_vdz_uc_discrimination, method = "exact", digits = 2, 
                             conf.level = 0.95)

roc_vdz_uc <- roc(vdz_uc_included$clinical_remission_cdst, vdz_uc_included$cdst_points)

UST-CD

ust_cd_included %<>% mutate(cdst_category_high = cdst_category == "High")

vector_ust_cd_discrimination <- ust_cd_included %>%
  count(cdst_category_high, clinical_remission_cdst) %>%
  arrange(cdst_category_high, clinical_remission_cdst)

vector_ust_cd_discrimination <- rev(vector_ust_cd_discrimination$n)

epi_test_ust_cd <- epi.tests(vector_ust_cd_discrimination, method = "exact", digits = 2, 
                             conf.level = 0.95)

Combine metrics

metrics_combined <- bind_rows("vdz-cd" = epi_test_vdz_cd$detail[epi_test_vdz_cd$detail[["statistic"]] %in% c("se", "sp", "lr.pos", "lr.neg"),],
                              "vdz-uc" = epi_test_vdz_uc$detail[epi_test_vdz_uc$detail[["statistic"]] %in% c("se", "sp", "lr.pos", "lr.neg"),],
                              "ust-cd" = epi_test_vdz_cd$detail[epi_test_vdz_cd$detail[["statistic"]] %in% c("se", "sp", "lr.pos", "lr.neg"),],
                              .id = "tool")

metrics_combined %<>%
  mutate(across(c(est, lower, upper), ~ if_else(statistic %in% c("se", "sp"), . * 100, .))) %>%
  mutate(across(c(est, lower, upper), 
                ~ if_else(statistic %in% c("se", "sp"), round(., 1), round(., 2))))  

metrics_combined %<>%
  mutate(across(where(is.numeric), ~ round(., 2))) %>%
  mutate(value = str_glue("{est} ({lower}-{upper})")) %>%
  select(-c(est, lower, upper))

metrics_combined %<>%
  pivot_wider(names_from = statistic, values_from = value)


roc_vdz_cd_ci <- glue("{round(ci(roc_vdz_cd)[2] * 100, 1)} ({round(ci(roc_vdz_cd)[1] * 100, 1)}-{round(ci(roc_vdz_cd)[3] * 100, 1)})")
roc_vdz_uc_ci <- glue("{round(ci(roc_vdz_uc)[2] * 100, 1)} ({round(ci(roc_vdz_uc)[1] * 100, 1)}-{round(ci(roc_vdz_uc)[3] * 100, 1)})")

metrics_combined <- cbind(metrics_combined, auc = c(roc_vdz_cd_ci, roc_vdz_uc_ci, "*"))

metrics_combined$tool <- c("VDZ in CD patients", "VDZ in UC patients", "UST in CD patients")

colnames(metrics_combined) <- c(
  "CDST",
  "Sensitivity (%) (95% CI)",
  "Specificity (%) (95% CI)",
  "PLR (95% CI)",
  "NLR (95% CI)",
  "AUC (%) (95% CI)"
)

metrics_combined %>%
  gt::gt() %>%
  gt::tab_style(
    style = gt::cell_text(align = "left"),
    locations = gt::cells_body(columns = c(CDST))
  ) %>%
  gt::tab_style(
    style = gt::cell_text(align = "center"),
    locations = gt::cells_body(columns = c(`Sensitivity (%) (95% CI)`, `Specificity (%) (95% CI)`, `PLR (95% CI)`, `NLR (95% CI)`, `AUC (%) (95% CI)`))
  ) %>%
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_labels(everything())
  )
CDST Sensitivity (%) (95% CI) Specificity (%) (95% CI) PLR (95% CI) NLR (95% CI) AUC (%) (95% CI)
VDZ in CD patients 55.6 (40-70.4) 59.1 (52.6-65.5) 1.36 (1-1.84) 0.75 (0.53-1.06) 61.3 (53.2-69.4)
VDZ in UC patients 64.7 (50.1-77.6) 62.9 (55.1-70.2) 1.74 (1.31-2.31) 0.56 (0.38-0.83) 65 (56.3-73.7)
UST in CD patients 55.6 (40-70.4) 59.1 (52.6-65.5) 1.36 (1-1.84) 0.75 (0.53-1.06) *

8. Get rates of therapy outcomes

outcomes_not_endoscopic <- c("clinical_response", "clinical_response_pga", "clinical_response_cdst", "clinical_remission", 
                             "clinical_remission_pga", "clinical_remission_cdst", "persistence", "persistence_without_escalation")

outcomes_endoscopic <- c("endoscopic_response", "endoscopic_response_cdst", "endoscopic_remission", "endoscopic_remission_cdst")



vdz_cd_outcomes_not_endoscopic <- outcome_pct(vdz_cd_included, outcomes_not_endoscopic)

vdz_cd_outcomes_endoscopic <- outcome_pct(vdz_cd_included_endoscopic, outcomes_endoscopic)

vdz_cd_outcomes <- bind_rows(vdz_cd_outcomes_endoscopic, vdz_cd_outcomes_not_endoscopic)



vdz_uc_outcomes_not_endoscopic <- outcome_pct(vdz_uc_included, outcomes_not_endoscopic)

vdz_uc_outcomes_endoscopic <- outcome_pct(vdz_uc_included_endoscopic, outcomes_endoscopic)

vdz_uc_outcomes <- bind_rows(vdz_uc_outcomes_endoscopic, vdz_uc_outcomes_not_endoscopic)



ust_cd_outcomes_not_endoscopic <- outcome_pct(ust_cd_included, outcomes_not_endoscopic)

ust_cd_outcomes_endoscopic <- outcome_pct(ust_cd_included_endoscopic, outcomes_endoscopic)

ust_cd_outcomes <- bind_rows(ust_cd_outcomes_endoscopic, ust_cd_outcomes_not_endoscopic)


combined_outcomes <- vdz_cd_outcomes %>% select(outcome, vdz_cd = report) %>%
  bind_cols(vdz_uc_outcomes %>% select(vdz_uc = report),
            ust_cd_outcomes %>% select(ust_cd = report)) %>%
  mutate(outcome = str_replace_all(outcome, "_", " "),        
         outcome = str_to_sentence(outcome),                     
         outcome = str_replace(outcome, "cdst", "without escalation"))




combined_outcomes %>%
  gt::gt() %>%
  gt::cols_label(
    outcome = "Outcome",
    vdz_cd = "VDZ-CD",
    vdz_uc = "VDZ-UC",
    ust_cd = "UST-CD"
  ) %>%
  gt::tab_style(
    style = gt::cell_text(align = "left"),
    locations = gt::cells_body(columns = outcome)
  ) %>%
  gt::tab_style(
    style = gt::cell_text(align = "center"),
    locations = gt::cells_body(columns = c(vdz_cd, vdz_uc, ust_cd))
  ) %>%
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_labels(everything())
  )
Outcome VDZ-CD VDZ-UC UST-CD
Endoscopic response 123/213 (57.7%) 118/217 (54.4%) 89/166 (53.6%)
Endoscopic response without escalation 121/213 (56.8%) 118/217 (54.4%) 88/166 (53%)
Endoscopic remission 64/213 (30%) 74/217 (34.1%) 24/166 (14.5%)
Endoscopic remission without escalation 63/213 (29.6%) 74/217 (34.1%) 23/166 (13.9%)
Clinical response 185/280 (66.1%) 142/218 (65.1%) 116/194 (59.8%)
Clinical response without escalation 178/280 (63.6%) 142/218 (65.1%) 114/194 (58.8%)
Clinical remission 45/280 (16.1%) 51/218 (23.4%) 31/194 (16%)
Clinical remission without escalation 45/280 (16.1%) 51/218 (23.4%) 31/194 (16%)

9. Calculate & visualize discrimination for trough levels

Load trough data and get it in shape for analysis

load("input/VDZ_UST_troughs.RData")

UST_troughs_induction <- UST_troughs %>% filter(interval == 16)
VDZ_troughs_induction <- VDZ_troughs %>% filter(between(interval, 13, 15))

cdst_vdz_uc_troughs <- power_inner_join(vdz_uc_included %>% select_keys_and(cdst_category),
                                        VDZ_troughs_induction %>% select_keys_and(trough),
                                        by = "patient_id")
cdst_vdz_uc_troughs %>% count(cdst_category)
cdst_categoryn
Low6
Intermediate54
High45
cdst_vdz_uc_troughs %<>% group_by(cdst_category) %>% mutate(median = round(median(trough),1), total = n()) %>% ungroup()


cdst_vdz_cd_troughs <- power_inner_join(vdz_cd_included %>% select_keys_and(cdst_category),
                                        VDZ_troughs_induction %>% select_keys_and(trough),
                                        by = "patient_id")
cdst_vdz_cd_troughs %>% count(cdst_category)
cdst_categoryn
Low11
Intermediate80
High55
cdst_vdz_cd_troughs %<>% group_by(cdst_category) %>% mutate(median = round(median(trough),1), total = n()) %>% ungroup()


cdst_ust_cd_troughs <- power_inner_join(ust_cd_included %>% select_keys_and(cdst_category),
                                        VDZ_troughs_induction %>% select_keys_and(trough),
                                        by = "patient_id")
cdst_ust_cd_troughs %>% count(cdst_category)
cdst_categoryn
Low5
Intermediate24
High18
cdst_ust_cd_troughs %<>% group_by(cdst_category) %>% mutate(median = round(median(trough),1), total = n()) %>% ungroup()

vdz_cd_troughs_medians <- cdst_vdz_cd_troughs %>%
  distinct(cdst_category, median, total)

vdz_uc_troughs_medians <- cdst_vdz_uc_troughs %>%
  distinct(cdst_category, median, total)

ust_cd_troughs_medians <- cdst_ust_cd_troughs %>%
  distinct(cdst_category, median, total)

Test for significance

kruskal_vdz_cd_troughs <- kruskal.test(trough ~ cdst_category, data = cdst_vdz_cd_troughs)
kruskal_vdz_uc_troughs <- kruskal.test(trough ~ cdst_category, data = cdst_vdz_uc_troughs)
kruskal_ust_cd_troughs <- kruskal.test(trough ~ cdst_category, data = cdst_ust_cd_troughs)

Make box plots per cdst category

vdz_cd_troughs_gg <- ggplot(data = cdst_vdz_cd_troughs, aes(x = cdst_category, y = trough)) +
  geom_boxplot(width = 0.35, outlier.shape = NA, coef = FALSE)+
  geom_jitter(size = 0.4) +
  geom_text(data = vdz_cd_troughs_medians, aes(label = str_c("median = ", median, " \n(n", "=", total, ")"), y = -0.5), size = 3, fontface = "bold") +
  coord_cartesian(expand = 1.5, ylim = c(0, 70)) +
  scale_y_continuous(breaks = seq(0, 70, 10)) +
  theme(axis.ticks.x = element_blank(),
        panel.background = element_blank(),
        axis.line.y = element_line(),
        axis.title = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "None") + 
  annotate(geom = "text", label = format_p_value(kruskal_vdz_cd_troughs$p.value), x = 2, y = 68, size = 4, fontface = "bold")

vdz_uc_troughs_gg <- ggplot(data = cdst_vdz_uc_troughs, aes(x = cdst_category, y = trough)) +
  geom_boxplot(width = 0.35, outlier.shape = NA, coef = FALSE)+
  geom_jitter(size = 0.4) +
  geom_text(data = vdz_uc_troughs_medians, aes(label = str_c("median = ", median, " \n(n", "=", total, ")"), y = -0.5), size = 3, fontface = "bold") +
  coord_cartesian(expand = 1.5, ylim = c(0, 41)) +
  scale_y_continuous(breaks = seq(0, 41, 10)) +
  theme(axis.ticks.x = element_blank(),
        panel.background = element_blank(),
        axis.line.y = element_line(),
        axis.title = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "None") +
  annotate(geom = "text", label = format_p_value(kruskal_vdz_uc_troughs$p.value), x = 2, y = 40, size = 4, fontface = "bold")

ust_cd_troughs_gg <- ggplot(data = cdst_ust_cd_troughs, aes(x = cdst_category, y = trough)) +
  geom_boxplot(width = 0.35, outlier.shape = NA, coef = FALSE)+
  geom_jitter(size = 0.4) +
  geom_text(data = ust_cd_troughs_medians, aes(label = str_c("median = ", median, " \n(n", "=", total, ")"), y = -0.5), size = 3, fontface = "bold") +
  coord_cartesian(expand = 1.5, ylim = c(0, 50)) +
  scale_y_continuous(breaks = seq(0, 50, 10)) +
  theme(axis.ticks.x = element_blank(),
        panel.background = element_blank(),
        axis.line.y = element_line(),
        axis.title = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "None") +
  annotate(geom = "text", label = format_p_value(kruskal_ust_cd_troughs$p.value), x = 2, y = 48.8, size = 4, fontface = "bold")

Combine the plots

troughs_combined <- ggarrange(vdz_cd_troughs_gg, vdz_uc_troughs_gg, ust_cd_troughs_gg, labels = c("A", "B", "C"), nrow = 1, ncol = 3,
                              label.x = c(-0.05, -0.05, -0.05))


annotate_figure(troughs_combined, bottom = text_grob("Clinical Decision Support Tool's response probability groups", face = "bold", vjust = 0.1),
                left = text_grob("Post-induction trough levels (ug/mL)", face = "bold", vjust = 0.4, rot = 90))

10. Calculate & visualize discrimination for dose optimization

VDZ-CD

vdz_cd_included %<>% mutate(dose_optimisation = recode(dose_optimisation,
                                                       "0" = FALSE,
                                                       "1" = TRUE))

vdz_cd_included %>% count(cdst_category, dose_optimisation)
cdst_categorydose_optimisationn
LowFALSE7
LowTRUE6
IntermediateFALSE88
IntermediateTRUE58
HighFALSE82
HighTRUE39
chisq_vdz_cd_optimisation <- chisq.test(vdz_cd_included$cdst_category, vdz_cd_included$dose_optimisation)

vdz_cd_cdst_tabulation_optimisation <- vdz_cd_included %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            optimized = sum(dose_optimisation),
            percentage =scales::percent(optimized/total))

vdz_cd_optimisation_gg <- ggplot(data = vdz_cd_included, aes(x = cdst_category, fill = dose_optimisation)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 160), breaks = seq(0, 160, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Crohn’s disease") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold"),
        axis.title.x = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13)) +
  annotate(geom = "text", label = paste0(vdz_cd_cdst_tabulation_optimisation$percentage, "\n", "(", vdz_cd_cdst_tabulation_optimisation$optimized, "/", vdz_cd_cdst_tabulation_optimisation$total, ")"),
           x = 1:3, y = vdz_cd_cdst_tabulation_optimisation$optimized + 4.5,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_vdz_cd_optimisation$p.value), x = 2, y = 158, size = 4, fontface = "bold")

VDZ-UC

vdz_uc_included %<>% mutate(dose_optimisation = recode(dose_optimisation,
                                                       "0" = FALSE,
                                                       "1" = TRUE))

vdz_uc_included %>% count(cdst_category, dose_optimisation)
cdst_categorydose_optimisationn
LowFALSE9
LowTRUE9
IntermediateFALSE62
IntermediateTRUE43
HighFALSE66
HighTRUE29
chisq_vdz_uc_optimisation <- chisq.test(vdz_uc_included$cdst_category, vdz_uc_included$dose_optimisation)

vdz_uc_cdst_tabulation_optimisation <- vdz_uc_included %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            optimized = sum(dose_optimisation),
            percentage =scales::percent(optimized/total))

vdz_uc_optimisation_gg <- ggplot(data = vdz_uc_included, aes(x = cdst_category, fill = dose_optimisation)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(title = "Ulcerative Colitis") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13)) +
  annotate(geom = "text", label = paste0(vdz_uc_cdst_tabulation_optimisation$percentage, "\n", "(", vdz_uc_cdst_tabulation_optimisation$optimized, "/", vdz_uc_cdst_tabulation_optimisation$total, ")"),
           x = 1:3, y = vdz_uc_cdst_tabulation_optimisation$optimized + 3.3,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_vdz_uc_optimisation$p.value), x = 2, y = 118.4, size = 4, fontface = "bold")

Combine plots in 2 panels

vdz_uc_optimisation_combined <- ggarrange(vdz_cd_optimisation_gg, vdz_uc_optimisation_gg, nrow = 1, ncol = 2)


annotate_figure(vdz_uc_optimisation_combined, bottom = text_grob("Clinical Decision Support Tool's response probability groups", face = "bold", vjust = 0.1))

11. Assess specificity of the VDZ-CD tool in UST-CD cohort

Prepare VDZ-CD tool variables

ust_cd_included %<>% mutate(no_previous_antitnf = if_else(previous_antitnf == 1, 0, 1), .after = previous_antitnf)

is.numeric(ust_cd_included$no_previous_antitnf)
[1] TRUE
ust_cd_included %<>% mutate(no_surgery_before = if_else(intestinal_resections_before == 1, 0, 1), .after = intestinal_resections_before)

is.numeric(ust_cd_included$no_surgery_before)
[1] TRUE
ust_cd_included %<>% mutate(no_fistulizing_before = if_else(fistulizing_disease_prior == 1, 0, 1), .after = fistulizing_disease_prior)

is.numeric(ust_cd_included$no_fistulizing_before)
[1] TRUE
is.numeric(ust_cd_included$albumin_g_L)
[1] TRUE
is.numeric(ust_cd_included$crp_mg_L)
[1] TRUE

Calculate CDST points for the vdz tool

ust_cd_included %<>% mutate(cdst_points_vdz = 2*no_surgery_before + 3*no_previous_antitnf + 2*no_fistulizing_before + 0.4*albumin_g_L - case_when(crp_mg_L >= 3 & crp_mg_L <= 10 ~ 0.5,
                                                                                                                                                  crp_mg_L > 10 ~ 3,
                                                                                                                                                  .default = 0))

Assign CDST categories for the vdz tool

ust_cd_included %<>% mutate(cdst_category_vdz = case_when(cdst_points_vdz <= 13 ~ "Low",
                                                          cdst_points_vdz > 13 & cdst_points_vdz <= 19 ~ "Intermediate",
                                                          cdst_points_vdz > 19 ~ "High") 
)

ust_cd_included %>% count(cdst_category_vdz)
cdst_category_vdzn
High67
Intermediate114
Low13
ust_cd_included$cdst_category_vdz <- factor(ust_cd_included$cdst_category_vdz, levels = c("Low", "Intermediate", "High"), ordered = TRUE)

Get agreement percentage between vdz and ust tools

ust_cd_included %>%
  summarise(agreement = round(sum(cdst_category == cdst_category_vdz)/n()*100, 1))
agreement
73.7
ust_cd_included %>%
  filter(clinical_remission_cdst == 1) %>%
  summarise(agreement = round(sum(cdst_category == cdst_category_vdz)/n()*100, 1))
agreement
77.4
ust_cd_included %>%
  filter(clinical_response_cdst == 0) %>%
  summarise(agreement = round(sum(cdst_category == cdst_category_vdz)/n()*100, 1))
agreement
73.8

Calculate discrimination percentages for: Clinica response

chisq_ust_cd_clinical_response_vdz <- chisq.test(ust_cd_included$cdst_category_vdz, ust_cd_included$clinical_response_cdst)

ust_cd_cdst_tabulation_clinical_response_vdz <- ust_cd_included %>%
  group_by(cdst_category_vdz) %>%
  summarise(total = n(),
            achieved = sum(clinical_response_cdst),
            percentage =scales::percent(achieved/total))

Clinica remission

chisq_ust_cd_clinical_remission_vdz <- chisq.test(ust_cd_included$cdst_category_vdz, ust_cd_included$clinical_remission_cdst)

ust_cd_cdst_tabulation_clinical_remission_vdz <- ust_cd_included %>%
  group_by(cdst_category_vdz) %>%
  summarise(total = n(),
            achieved = sum(clinical_remission_cdst),
            percentage =scales::percent(achieved/total))

Endoscopic response

ust_cd_included_endoscopic <- power_left_join(ust_cd_included_endoscopic,
                                              ust_cd_included %>% select_keys_and(cdst_category_vdz),
                                              by = "patient_id")

chisq_ust_cd_endoscopic_response_vdz <- chisq.test(ust_cd_included_endoscopic$cdst_category_vdz, ust_cd_included_endoscopic$endoscopic_response_cdst)

ust_cd_cdst_tabulation_endoscopic_response_vdz <- ust_cd_included_endoscopic %>%
  group_by(cdst_category_vdz) %>%
  summarise(total = n(),
            achieved = sum(endoscopic_response_cdst),
            percentage =scales::percent(achieved/total))

Endoscopic remission

chisq_ust_cd_endoscopic_remission_vdz <- chisq.test(ust_cd_included_endoscopic$cdst_category_vdz, ust_cd_included_endoscopic$endoscopic_remission_cdst)

ust_cd_cdst_tabulation_endoscopic_remission_vdz <- ust_cd_included_endoscopic %>%
  group_by(cdst_category_vdz) %>%
  summarise(total = n(),
            achieved = sum(endoscopic_remission_cdst),
            percentage =scales::percent(achieved/total))

Visualize discrimination bars

ust_cd_clinical_response_vdz_gg <- ggplot(data = ust_cd_included, aes(x = cdst_category_vdz, fill = clinical_response_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 140), breaks = seq(0, 140, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Clinical response") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold", size = 13),
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13),
        axis.title.x = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none") +
  annotate(geom = "text", label = paste0(ust_cd_cdst_tabulation_clinical_response_vdz$percentage, "\n", "(", ust_cd_cdst_tabulation_clinical_response_vdz$achieved, "/", ust_cd_cdst_tabulation_clinical_response_vdz$total, ")"),
           x = 1:3, y = ust_cd_cdst_tabulation_clinical_response_vdz$achieved + 6,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_ust_cd_clinical_response_vdz$p.value), x = 2, y = 138.5, size = 4, fontface = "bold")

ust_cd_clinical_remission_vdz_gg <- ggplot(data = ust_cd_included, aes(x = cdst_category_vdz, fill = clinical_remission_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 140), breaks = seq(0, 140, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(title = "Clinical remission") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold", size = 13),
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none") +
  annotate(geom = "text", label = paste0(ust_cd_cdst_tabulation_clinical_remission_vdz$percentage, "\n", "(", ust_cd_cdst_tabulation_clinical_remission_vdz$achieved, "/", ust_cd_cdst_tabulation_clinical_remission_vdz$total, ")"),
           x = 1:3, y = ust_cd_cdst_tabulation_clinical_remission_vdz$achieved + 6,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_ust_cd_clinical_remission_vdz$p.value), x = 2, y = 138.5, size = 4, fontface = "bold")

ust_cd_endoscopic_response_vdz_gg <- ggplot(data = ust_cd_included_endoscopic, aes(x = cdst_category_vdz, fill = endoscopic_response_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Endoscopic response") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold", size = 13),
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13),
        axis.title.x = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none") +
  annotate(geom = "text", label = paste0(ust_cd_cdst_tabulation_endoscopic_response_vdz$percentage, "\n", "(", ust_cd_cdst_tabulation_endoscopic_response_vdz$achieved, "/", ust_cd_cdst_tabulation_endoscopic_response_vdz$total, ")"),
           x = 1:3, y = ust_cd_cdst_tabulation_endoscopic_response_vdz$achieved + 5.3,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_ust_cd_endoscopic_response_vdz$p.value), x = 2, y = 118.5, size = 4, fontface = "bold")

ust_cd_endoscopic_remission_vdz_gg <- ggplot(data = ust_cd_included_endoscopic, aes(x = cdst_category_vdz, fill = endoscopic_remission_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(title = "Endoscopic remission") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold", size = 13),
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none") +
  annotate(geom = "text", label = paste0(ust_cd_cdst_tabulation_endoscopic_remission_vdz$percentage, "\n", "(", ust_cd_cdst_tabulation_endoscopic_remission_vdz$achieved, "/", ust_cd_cdst_tabulation_endoscopic_remission_vdz$total, ")"),
           x = 1:3, y = ust_cd_cdst_tabulation_endoscopic_remission_vdz$achieved + 5.3,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_ust_cd_endoscopic_remission_vdz$p.value), x = 2, y = 118.5, size = 4, fontface = "bold")

Combine plots in 4 panels

ust_cd_vdz_bars_combined <- ggarrange(ust_cd_clinical_response_vdz_gg, ust_cd_clinical_remission_vdz_gg, ust_cd_endoscopic_response_vdz_gg, ust_cd_endoscopic_remission_vdz_gg, nrow = 2, ncol = 2)


annotate_figure(ust_cd_vdz_bars_combined, bottom = text_grob("Clinical Decision Support Tool's response probability groups", face = "bold", vjust = 0.1))

12. Evaluate CDST calibration

Calculate logits and convert them to probabilities

vdz_cd_included %<>%
  mutate(logit = -3.0722 +  0.2305*no_surgery_before + 0.3483*no_previous_antitnf + 0.1979*no_fistulizing_before + 0.0436*albumin_g_L - 0.0098*crp_mg_L,
         prob = logit2prob(logit))

vdz_uc_included %<>%
  mutate(logit = -3.7038 + 0.2622*dxduration_above_two + 0.2820*no_previous_antitnf + 0.1847*moderate_endo_activity + 0.0647*albumin_g_L,
         prob = logit2prob(logit))

Define a clinically-relevant outcome: persistence & persistence without escalation

vdz_cd_included %<>% mutate(persistence_without_escalation = stop_current_drug_itt == 0 & dose_optimisation == 0)
vdz_uc_included %<>% mutate(persistence_without_escalation = stop_current_drug_itt == 0 & dose_optimisation == 0)
ust_cd_included %<>% mutate(persistence_without_escalation = stop_current_drug_itt == 0 & dose_optimisation == 0)

vdz_cd_included %<>% mutate(persistence = stop_current_drug_itt == 0)
vdz_uc_included %<>% mutate(persistence = stop_current_drug_itt == 0)
ust_cd_included %<>% mutate(persistence = stop_current_drug_itt == 0)

Visualize calibration plots

par(mfrow=c(3,2))

rms::val.prob(vdz_cd_included$prob, vdz_cd_included$clinical_remission_cdst, statloc = F, logistic.cal = F, g = 10)
            Dxy         C (ROC)              R2               D        D:Chi-sq 
  0.17862884161   0.58931442080  -0.08314444580  -0.05113826943 -13.31871543951 
            D:p               U        U:Chi-sq             U:p               Q 
             NA   0.05932023319  18.60966529419   0.00009098348  -0.11045850262 
          Brier       Intercept           Slope            Emax             E90 
  0.14476423642  -0.65070440020   1.01127074257   0.16792792579   0.16166755327 
           Eavg             S:z             S:p 
  0.10881941218  -3.82536680788   0.00013057752 
mtext('A', adj = 0, line = 1, font = 2)

rms::val.prob(vdz_uc_included$prob, vdz_uc_included$clinical_remission_cdst, statloc = F, logistic.cal = F, g = 10)
          Dxy       C (ROC)            R2             D      D:Chi-sq 
 0.3035106258  0.6517553129 -0.0616547022 -0.0446573598 -8.7353044358 
          D:p             U      U:Chi-sq           U:p             Q 
           NA  0.0567842995 14.3789772935  0.0007544748 -0.1014416593 
        Brier     Intercept         Slope          Emax           E90 
 0.1867562420 -0.5028711530  1.1430083801  0.4119663381  0.1703225522 
         Eavg           S:z           S:p 
 0.1209726878 -3.4821176169  0.0004974651 
mtext('B', adj = 0, line = 1, font = 2)

rms::val.prob(vdz_cd_included$prob, vdz_cd_included$persistence_without_escalation, statloc = F, logistic.cal = F, g = 10)
         Dxy      C (ROC)           R2            D     D:Chi-sq          D:p 
 0.357506793  0.678753397  0.068796680  0.047490031 14.297208726           NA 
           U     U:Chi-sq          U:p            Q        Brier    Intercept 
 0.042172491 13.808297370  0.001003613  0.005317541  0.215184305  1.270721589 
       Slope         Emax          E90         Eavg          S:z          S:p 
 1.928436064  0.319500179  0.189713594  0.090107399  2.020589697  0.043322256 
mtext('C', adj = 0, line = 1, font = 2)

rms::val.prob(vdz_uc_included$prob, vdz_uc_included$persistence_without_escalation, statloc = F, logistic.cal = F, g = 10)
        Dxy     C (ROC)          R2           D    D:Chi-sq         D:p 
 0.31491549  0.65745774  0.07109483  0.04712249 11.27270173          NA 
          U    U:Chi-sq         U:p           Q       Brier   Intercept 
 0.01502521  5.27549670  0.07152213  0.03209727  0.20177544  0.30622051 
      Slope        Emax         E90        Eavg         S:z         S:p 
 1.89496508  0.18284498  0.10107597  0.06405022 -1.99595232  0.04593911 
mtext('D', adj = 0, line = 1, font = 2)

rms::val.prob(vdz_cd_included$prob, vdz_cd_included$persistence, statloc = F, logistic.cal = F, g = 10)
                       Dxy                    C (ROC) 
  0.3067973055725658482196   0.6533986527862829518654 
                        R2                          D 
 -0.2791177371817529229148  -0.1936330041949115565725 
                  D:Chi-sq                        D:p 
-53.2172411745752356182493                         NA 
                         U                   U:Chi-sq 
  0.2710559208947198595041  77.8956578505215588847932 
                       U:p                          Q 
  0.0000000000000000000000  -0.4646889250896314438322 
                     Brier                  Intercept 
  0.2920824410165137607898   1.7383988222184387772984 
                     Slope                       Emax 
  1.6477130652951839095977   0.3993827373260716573355 
                       E90                       Eavg 
  0.3285369249726436402526   0.2405269877686071477996 
                       S:z                        S:p 
  8.4043262035508128349193   0.0000000000000000430325 
mtext('E', adj = 0, line = 1, font = 2)

rms::val.prob(vdz_uc_included$prob, vdz_uc_included$persistence, statloc = F, logistic.cal = F, g = 10)
        Dxy     C (ROC)          R2           D    D:Chi-sq         D:p 
 0.26418485  0.63209243  0.02392832  0.01342047  3.92566262          NA 
          U    U:Chi-sq         U:p           Q       Brier   Intercept 
 0.02648111  7.77288151  0.02051825 -0.01306064  0.24170389  0.61371546 
      Slope        Emax         E90        Eavg         S:z         S:p 
 1.40477097  0.16086479  0.14688310  0.08653667  1.95501037  0.05058185 
mtext('F', adj = 0, line = 1, font = 2)

13. Decision curve analysis

Plot the curves

For clinical remission

vdz_cd_dca_plot_remission <- dca(clinical_remission_cdst ~ prob,
                       data = vdz_cd_included,
                       thresholds = seq(0, 0.5, 0.01),
                       label = list(prob = "Clinical Decision Support Tool")) %>%
  plot(smooth = FALSE, style = "bw") + theme(panel.grid = element_blank()) +
  scale_x_continuous(breaks = seq(0, 0.5, 0.05), labels = scales::percent_format(accuracy = 1)) +
  scale_y_continuous(breaks = seq(0, 0.7, 0.1))

vdz_uc_dca_plot_remission <- dca(clinical_remission_cdst ~ prob,
                       data = vdz_uc_included,
                       thresholds = seq(0, 0.55, 0.01),
                       label = list(prob = "Clinical Decision Support Tool")) %>%
  plot(smooth = FALSE, style = "bw") + theme(panel.grid = element_blank()) +
  scale_x_continuous(breaks = seq(0, 0.55, 0.05), labels = scales::percent_format(accuracy = 1)) +
  scale_y_continuous(breaks = seq(0, 0.6, 0.1))

For persistence without escalation

vdz_cd_dca_plot_persistence_without_escalation <- dca(persistence_without_escalation ~ prob,
                                 data = vdz_cd_included,
                                 thresholds = seq(0, 0.5, 0.01),
                                 label = list(prob = "Clinical Decision Support Tool")) %>%
  plot(smooth = FALSE, style = "bw") + theme(panel.grid = element_blank()) +
  scale_x_continuous(breaks = seq(0, 0.5, 0.05), labels = scales::percent_format(accuracy = 1)) +
  scale_y_continuous(breaks = seq(0, 0.7, 0.1))

vdz_uc_dca_plot_persistence_without_escalation <- dca(persistence_without_escalation ~ prob,
                                 data = vdz_uc_included,
                                 thresholds = seq(0, 0.55, 0.01),
                                 label = list(prob = "Clinical Decision Support Tool")) %>%
  plot(smooth = FALSE, style = "bw") + theme(panel.grid = element_blank()) +
  scale_x_continuous(breaks = seq(0, 0.55, 0.05), labels = scales::percent_format(accuracy = 1)) +
  scale_y_continuous(breaks = seq(0, 0.6, 0.1))

For persistence

vdz_cd_dca_plot_persistence <- dca(persistence ~ prob,
                                                      data = vdz_cd_included,
                                                      thresholds = seq(0, 0.5, 0.01),
                                                      label = list(prob = "Clinical Decision Support Tool")) %>%
  plot(smooth = FALSE, style = "bw") + theme(panel.grid = element_blank()) +
  scale_x_continuous(breaks = seq(0, 0.5, 0.05), labels = scales::percent_format(accuracy = 1)) +
  scale_y_continuous(breaks = seq(0, 0.7, 0.1))

vdz_uc_dca_plot_persistence <- dca(persistence ~ prob,
                                                      data = vdz_uc_included,
                                                      thresholds = seq(0, 0.55, 0.01),
                                                      label = list(prob = "Clinical Decision Support Tool")) %>%
  plot(smooth = FALSE, style = "bw") + theme(panel.grid = element_blank()) +
  scale_x_continuous(breaks = seq(0, 0.55, 0.05), labels = scales::percent_format(accuracy = 1)) +
  scale_y_continuous(breaks = seq(0, 0.6, 0.1))

Combine the plots

ggarrange(vdz_cd_dca_plot_remission, vdz_uc_dca_plot_remission, 
          vdz_cd_dca_plot_persistence_without_escalation, vdz_uc_dca_plot_persistence_without_escalation,
          vdz_cd_dca_plot_persistence, vdz_uc_dca_plot_persistence,
          labels = c("A", "B", "C", "D", "E", "F"), label.y = c(1.012, 1.012, 1.012, 1.012, 1.012, 1.012), nrow = 3, ncol = 2,
          common.legend = TRUE, legend="bottom")

Make net benefit tables

For clinical remission

vdz_cd_dca_tbl_remission <- dca(clinical_remission_cdst ~ prob,
                                data = vdz_cd_included,
                                thresholds = seq(0.1, 0.5, 0.1),
                                label = list(prob = "Clinical Decision Support Tool")) %>%
  as_tibble() %>%
  select(label, threshold, net_benefit) 

vdz_uc_dca_tbl_remission <- dca(clinical_remission_cdst ~ prob,
                                data = vdz_uc_included,
                                thresholds = seq(0.1, 0.5, 0.1),
                                label = list(prob = "Clinical Decision Support Tool")) %>%
  as_tibble() %>%
  select(label, threshold, net_benefit)


combined_dca_tbl_remission <- bind_cols(
  vdz_cd_dca_tbl_remission %>%
    rename_with(~ paste0("cd_", .)),
  vdz_uc_dca_tbl_remission %>%
    rename_with(~ paste0("uc_", .))
)

combined_dca_tbl_remission %>%
  gt::gt() %>%
  gt::cols_label(
    cd_label = "Strategy",
    cd_threshold = "Decision Threshold",
    cd_net_benefit = "Net Benefit",
    uc_label = "Strategy",
    uc_threshold = "Decision Threshold",
    uc_net_benefit = "Net Benefit"
  ) %>%
  gt::fmt_percent(
    columns = c(cd_threshold, uc_threshold),
    decimals = 0
  ) %>%
  gt::fmt(
    columns = c(cd_net_benefit, uc_net_benefit),
    fns = function(x) ifelse(x == 0, "0", formatC(x, format = "f", digits = 3))
  ) %>%
  gt::tab_spanner(
    label = "Crohn's Disease",
    columns = c(cd_label, cd_threshold, cd_net_benefit)
  ) %>%
  gt::tab_spanner(
    label = "Ulcerative Colitis",
    columns = c(uc_label, uc_threshold, uc_net_benefit)
  ) %>%
  gt::cols_align("left", columns = everything()) %>%
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_spanners(spanners = c("Crohn's Disease", "Ulcerative Colitis"))
  ) %>%
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_labels(columns = everything())
  )
Crohn's Disease Ulcerative Colitis
Strategy Decision Threshold Net Benefit Strategy Decision Threshold Net Benefit
Treat All 10% 0.067 Treat All 10% 0.149
Treat All 20% -0.049 Treat All 20% 0.042
Treat All 30% -0.199 Treat All 30% -0.094
Treat All 40% -0.399 Treat All 40% -0.277
Treat All 50% -0.679 Treat All 50% -0.532
Treat None 10% 0 Treat None 10% 0
Treat None 20% 0 Treat None 20% 0
Treat None 30% 0 Treat None 30% 0
Treat None 40% 0 Treat None 40% 0
Treat None 50% 0 Treat None 50% 0
Clinical Decision Support Tool 10% 0.069 Clinical Decision Support Tool 10% 0.144
Clinical Decision Support Tool 20% -0.019 Clinical Decision Support Tool 20% 0.039
Clinical Decision Support Tool 30% -0.034 Clinical Decision Support Tool 30% -0.042
Clinical Decision Support Tool 40% -0.005 Clinical Decision Support Tool 40% -0.028
Clinical Decision Support Tool 50% 0 Clinical Decision Support Tool 50% -0.009

For persistence without escalation

vdz_cd_dca_tbl_persistence_without_escalation <- dca(persistence_without_escalation ~ prob,
                                                     data = vdz_cd_included,
                                                     thresholds = seq(0.1, 0.5, 0.1),
                                                     label = list(prob = "Clinical Decision Support Tool")) %>%
  as_tibble() %>%
  select(label, threshold, net_benefit) 

vdz_uc_dca_tbl_persistence_without_escalation <- dca(persistence_without_escalation ~ prob,
                                                     data = vdz_uc_included,
                                                     thresholds = seq(0.1, 0.5, 0.1),
                                                     label = list(prob = "Clinical Decision Support Tool")) %>%
  as_tibble() %>%
  select(label, threshold, net_benefit)


combined_dca_tbl_persistence_without_escalation <- bind_cols(
  vdz_cd_dca_tbl_persistence_without_escalation %>%
    rename_with(~ paste0("cd_", .)),
  vdz_uc_dca_tbl_persistence_without_escalation %>%
    rename_with(~ paste0("uc_", .))
)

combined_dca_tbl_persistence_without_escalation %>%
  gt::gt() %>%
  gt::cols_label(
    cd_label = "Strategy",
    cd_threshold = "Decision Threshold",
    cd_net_benefit = "Net Benefit",
    uc_label = "Strategy",
    uc_threshold = "Decision Threshold",
    uc_net_benefit = "Net Benefit"
  ) %>%
  gt::fmt_percent(
    columns = c(cd_threshold, uc_threshold),
    decimals = 0
  ) %>%
  gt::fmt(
    columns = c(cd_net_benefit, uc_net_benefit),
    fns = function(x) ifelse(x == 0, "0", formatC(x, format = "f", digits = 3))
  ) %>%
  gt::tab_spanner(
    label = "Crohn's Disease",
    columns = c(cd_label, cd_threshold, cd_net_benefit)
  ) %>%
  gt::tab_spanner(
    label = "Ulcerative Colitis",
    columns = c(uc_label, uc_threshold, uc_net_benefit)
  ) %>%
  gt::cols_align("left", columns = everything()) %>%
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_spanners(spanners = c("Crohn's Disease", "Ulcerative Colitis"))
  ) %>%
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_labels(columns = everything())
  )
Crohn's Disease Ulcerative Colitis
Strategy Decision Threshold Net Benefit Strategy Decision Threshold Net Benefit
Treat All 10% 0.270 Treat All 10% 0.230
Treat All 20% 0.179 Treat All 20% 0.134
Treat All 30% 0.061 Treat All 30% 0.010
Treat All 40% -0.095 Treat All 40% -0.154
Treat All 50% -0.314 Treat All 50% -0.385
Treat None 10% 0 Treat None 10% 0
Treat None 20% 0 Treat None 20% 0
Treat None 30% 0 Treat None 30% 0
Treat None 40% 0 Treat None 40% 0
Treat None 50% 0 Treat None 50% 0
Clinical Decision Support Tool 10% 0.272 Clinical Decision Support Tool 10% 0.231
Clinical Decision Support Tool 20% 0.204 Clinical Decision Support Tool 20% 0.142
Clinical Decision Support Tool 30% 0.088 Clinical Decision Support Tool 30% 0.050
Clinical Decision Support Tool 40% 0.019 Clinical Decision Support Tool 40% 0.018
Clinical Decision Support Tool 50% 0 Clinical Decision Support Tool 50% 0.009

For persistence

vdz_cd_dca_tbl_persistence <- dca(persistence ~ prob,
                                  data = vdz_cd_included,
                                  thresholds = seq(0.1, 0.5, 0.1),
                                  label = list(prob = "Clinical Decision Support Tool")) %>%
  as_tibble() %>%
  select(label, threshold, net_benefit) 

vdz_uc_dca_tbl_persistence <- dca(persistence ~ prob,
                                  data = vdz_uc_included,
                                  thresholds = seq(0.1, 0.5, 0.1),
                                  label = list(prob = "Clinical Decision Support Tool")) %>%
  as_tibble() %>%
  select(label, threshold, net_benefit)


combined_dca_tbl_persistence <- bind_cols(
  vdz_cd_dca_tbl_persistence %>%
    rename_with(~ paste0("cd_", .)),
  vdz_uc_dca_tbl_persistence %>%
    rename_with(~ paste0("uc_", .))
)

combined_dca_tbl_persistence %>%
  gt::gt() %>%
  gt::cols_label(
    cd_label = "Strategy",
    cd_threshold = "Decision Threshold",
    cd_net_benefit = "Net Benefit",
    uc_label = "Strategy",
    uc_threshold = "Decision Threshold",
    uc_net_benefit = "Net Benefit"
  ) %>%
  gt::fmt_percent(
    columns = c(cd_threshold, uc_threshold),
    decimals = 0
  ) %>%
  gt::fmt(
    columns = c(cd_net_benefit, uc_net_benefit),
    fns = function(x) ifelse(x == 0, "0", formatC(x, format = "f", digits = 3))
  ) %>%
  gt::tab_spanner(
    label = "Crohn's Disease",
    columns = c(cd_label, cd_threshold, cd_net_benefit)
  ) %>%
  gt::tab_spanner(
    label = "Ulcerative Colitis",
    columns = c(uc_label, uc_threshold, uc_net_benefit)
  ) %>%
  gt::cols_align("left", columns = everything()) %>%
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_spanners(spanners = c("Crohn's Disease", "Ulcerative Colitis"))
  ) %>%
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_labels(columns = everything())
  )
Crohn's Disease Ulcerative Colitis
Strategy Decision Threshold Net Benefit Strategy Decision Threshold Net Benefit
Treat All 10% 0.452 Treat All 10% 0.373
Treat All 20% 0.384 Treat All 20% 0.295
Treat All 30% 0.296 Treat All 30% 0.194
Treat All 40% 0.179 Treat All 40% 0.060
Treat All 50% 0.014 Treat All 50% -0.128
Treat None 10% 0 Treat None 10% 0
Treat None 20% 0 Treat None 20% 0
Treat None 30% 0 Treat None 30% 0
Treat None 40% 0 Treat None 40% 0
Treat None 50% 0 Treat None 50% 0
Clinical Decision Support Tool 10% 0.454 Clinical Decision Support Tool 10% 0.374
Clinical Decision Support Tool 20% 0.374 Clinical Decision Support Tool 20% 0.303
Clinical Decision Support Tool 30% 0.144 Clinical Decision Support Tool 30% 0.194
Clinical Decision Support Tool 40% 0.031 Clinical Decision Support Tool 40% 0.057
Clinical Decision Support Tool 50% 0 Clinical Decision Support Tool 50% 0.009

14. Assess the relationship between CDST variables and outcome

Calculate probabilities

VDZ-CD

vdz_cd_modelvars <- c("no_previous_antitnf", "no_surgery_before", "no_fistulizing_before", "albumin_g_L", "crp_mg_L")

vdz_cd_coefs_df <- vdz_cd_included %>%
  select(c(all_of(vdz_cd_modelvars), "clinical_remission_cdst"))

vdz_cd_mvassociations_mdl <- glm(formula = paste("clinical_remission_cdst ~", paste(vdz_cd_modelvars, collapse = "+")), data = vdz_cd_coefs_df, family = "binomial")

vdz_cd_mvassociations_df <- vdz_cd_coefs_df %>%
  mutate(probs = predict(vdz_cd_mvassociations_mdl, vdz_cd_coefs_df, type = "response"))

vdz_cd_mvassociations_df %<>%
  mutate(logit = predict(vdz_cd_mvassociations_mdl, vdz_cd_coefs_df))

VDZ-UC

vdz_uc_modelvars <- c("no_previous_antitnf", "dxduration_above_two", "moderate_endo_activity", "albumin_g_L")

vdz_uc_coefs_df <- vdz_uc_included %>%
  select(c(all_of(vdz_uc_modelvars), "clinical_remission_cdst"))

vdz_uc_mvassociations_mdl <- glm(formula = paste("clinical_remission_cdst ~", paste(vdz_uc_modelvars, collapse = "+")), data = vdz_uc_coefs_df, family = "binomial")

vdz_uc_mvassociations_df <- vdz_uc_coefs_df %>%
  mutate(probs = predict(vdz_uc_mvassociations_mdl, vdz_uc_coefs_df, type = "response"))

vdz_uc_mvassociations_df %<>%
  mutate(logit = predict(vdz_uc_mvassociations_mdl, vdz_uc_coefs_df))

UST-CD

ust_cd_included %<>% mutate(no_previous_antitnf = if_else(previous_antitnf == 1, 0, 1), .after = previous_antitnf)

is.numeric(ust_cd_included$no_previous_antitnf)
[1] TRUE
ust_cd_included %<>% mutate(no_surgery_before = if_else(intestinal_resections_before == 1, 0, 1), .after = intestinal_resections_before)

is.numeric(ust_cd_included$no_surgery_before)
[1] TRUE
ust_cd_included %<>% mutate(no_prior_smoking = if_else(smoking %in% c(1,2), 0, 1), .after = smoking)

is.numeric(ust_cd_included$no_prior_smoking)
[1] TRUE
ust_cd_included %<>% mutate(no_fistulizing_current = if_else(fistulizing_disease_current == 1, 0, 1), .after = fistulizing_disease_current)

is.numeric(ust_cd_included$no_fistulizing_current)
[1] TRUE
is.numeric(ust_cd_included$albumin_g_L)
[1] TRUE
ust_cd_modelvars <- c("no_previous_antitnf", "no_surgery_before", "no_prior_smoking", "no_fistulizing_current", "albumin_g_L")
ust_cd_coefs_df <- ust_cd_included %>%
  select(c(all_of(ust_cd_modelvars), "clinical_remission_cdst"))

ust_cd_mvassociations_mdl <- glm(formula = paste("clinical_remission_cdst ~", paste(ust_cd_modelvars, collapse = "+")), data = ust_cd_coefs_df, family = "binomial")

ust_cd_mvassociations_df <- ust_cd_coefs_df %>%
  mutate(probs = predict(ust_cd_mvassociations_mdl, ust_cd_coefs_df, type = "response"))

Visualize associations

VDZ-CD

vdz_cd_mvassociations_df$no_previous_antitnf <- fct_recode(as.character(vdz_cd_mvassociations_df$no_previous_antitnf), "Yes" = "0", "No" = "1")
vdz_cd_mvassociations_df$no_surgery_before <- fct_recode(as.character(vdz_cd_mvassociations_df$no_surgery_before), "Yes" = "0", "No" = "1")
vdz_cd_mvassociations_df$no_fistulizing_before <- fct_recode(as.character(vdz_cd_mvassociations_df$no_fistulizing_before), "Yes" = "0", "No" = "1")

vdz_cd_mv_antitnf <- ggplot2::ggplot(data = vdz_cd_mvassociations_df, aes(x = no_previous_antitnf, y = probs)) + 
  geom_boxplot(outlier.shape = NA) +
  ylab("Probability of clinical remission") +
  xlab("Prior anti-TNF exposure") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())


vdz_cd_mv_surgery <- ggplot2::ggplot(data = vdz_cd_mvassociations_df, aes(x = no_surgery_before, y = probs)) +
  geom_boxplot(outlier.shape = NA) +
  ylab("Probability of clinical remission") +
  xlab("Prior bowel surgery") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())


vdz_cd_mv_fistulizing <- ggplot2::ggplot(data = vdz_cd_mvassociations_df, aes(x = no_fistulizing_before, y = probs)) +
  geom_boxplot(outlier.shape = NA) +
  ylab("Probability of clinical remission") +
  xlab("Prior fistulizing disease") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())


vdz_cd_mv_albumin <- ggplot2::ggplot(data = vdz_cd_mvassociations_df, aes(x = albumin_g_L, y = probs)) +
  geom_point() +
  ylab("Probability of clinical remission") +
  xlab("Baseline albumin (g/L)") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())


vdz_cd_mv_crp <- ggplot2::ggplot(data = vdz_cd_mvassociations_df, aes(x = crp_mg_L, y = probs)) +
  geom_point() +
  ylab("Probability of clinical remission") +
  xlab("Baseline C-reactive protein (mg/L)") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank()) +
  coord_cartesian(xlim = c(0, 150))

vdz_cd_mv_combined <- ggarrange(vdz_cd_mv_antitnf, vdz_cd_mv_surgery, vdz_cd_mv_fistulizing, vdz_cd_mv_albumin, vdz_cd_mv_crp)

annotate_figure(vdz_cd_mv_combined, left = text_grob("Probability of clinical remission", face = "bold", rot = 90))

VDZ-UC

vdz_uc_mvassociations_df$no_previous_antitnf <- fct_recode(as.character(vdz_uc_mvassociations_df$no_previous_antitnf), "Yes" = "0", "No" = "1")
vdz_uc_mvassociations_df$dxduration_above_two <- fct_recode(as.character(vdz_uc_mvassociations_df$dxduration_above_two), "< 2" = "0", "≥ 2" = "1")
vdz_uc_mvassociations_df$moderate_endo_activity <- fct_recode(as.character(vdz_uc_mvassociations_df$moderate_endo_activity), "3" = "0", "2" = "1")

vdz_uc_mv_antitnf <- ggplot2::ggplot(data = vdz_uc_mvassociations_df, aes(x = no_previous_antitnf, y = probs)) + 
  geom_boxplot(outlier.shape = NA) +
  ylab("Probability of clinical remission") +
  xlab("Prior anti-TNF exposure") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())

vdz_uc_mv_duration <- ggplot2::ggplot(data = vdz_uc_mvassociations_df, aes(x = dxduration_above_two, y = probs)) + 
  geom_boxplot(outlier.shape = NA) +
  ylab("Probability of clinical remission") +
  xlab("Disease duration (year)") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())

vdz_uc_mv_endo <- ggplot2::ggplot(data = vdz_uc_mvassociations_df, aes(x = moderate_endo_activity, y = probs)) + 
  geom_boxplot(outlier.shape = NA) +
  ylab("Probability of clinical remission") +
  xlab("Mayo endoscopic subscore") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())

vdz_uc_mv_albumin <- ggplot2::ggplot(data = vdz_uc_mvassociations_df, aes(x = albumin_g_L, y = probs)) +
  geom_point() +
  ylab("Probability of clinical remission") +
  xlab("Baseline albumin (g/L)") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())

vdz_uc_mv_combined <- ggarrange(vdz_uc_mv_antitnf, vdz_uc_mv_duration, vdz_uc_mv_endo, vdz_uc_mv_albumin)

annotate_figure(vdz_uc_mv_combined, left = text_grob("Probability of clinical remission", face = "bold", rot = 90))

UST-CD

ust_cd_mvassociations_df$no_previous_antitnf <- fct_recode(as.character(ust_cd_mvassociations_df$no_previous_antitnf), "Yes" = "0", "No" = "1")
ust_cd_mvassociations_df$no_surgery_before <- fct_recode(as.character(ust_cd_mvassociations_df$no_surgery_before), "Yes" = "0", "No" = "1")
ust_cd_mvassociations_df$no_prior_smoking <- fct_recode(as.character(ust_cd_mvassociations_df$no_prior_smoking), "Yes" = "0", "No" = "1")
ust_cd_mvassociations_df$no_fistulizing_current <- fct_recode(as.character(ust_cd_mvassociations_df$no_fistulizing_current), "Yes" = "0", "No" = "1")

ust_cd_mv_antitnf <- ggplot2::ggplot(data = ust_cd_mvassociations_df, aes(x = no_previous_antitnf, y = probs)) + 
  geom_boxplot(outlier.shape = NA) +
  ylab("Probability of clinical remission") +
  xlab("Prior anti-TNF exposure") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())


ust_cd_mv_surgery <- ggplot2::ggplot(data = ust_cd_mvassociations_df, aes(x = no_surgery_before, y = probs)) +
  geom_boxplot(outlier.shape = NA) +
  ylab("Probability of clinical remission") +
  xlab("Prior bowel surgery") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())


ust_cd_mv_smoking <- ggplot2::ggplot(data = ust_cd_mvassociations_df, aes(x = no_prior_smoking, y = probs)) +
  geom_boxplot(outlier.shape = NA) +
  ylab("Probability of clinical remission") +
  xlab("Prior smoking history") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())

ust_cd_mv_fistulizing <- ggplot2::ggplot(data = ust_cd_mvassociations_df, aes(x = no_fistulizing_current, y = probs)) +
  geom_boxplot(outlier.shape = NA) +
  ylab("Probability of clinical remission") +
  xlab("Baseline actively draining fistula") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())


ust_cd_mv_albumin <- ggplot2::ggplot(data = ust_cd_mvassociations_df, aes(x = albumin_g_L, y = probs)) +
  geom_point() +
  ylab("Probability of clinical remission") +
  xlab("Baseline albumin (g/L)") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())

ust_cd_mv_combined <- ggarrange(ust_cd_mv_antitnf, ust_cd_mv_surgery, ust_cd_mv_smoking, ust_cd_mv_fistulizing, ust_cd_mv_albumin)

annotate_figure(ust_cd_mv_combined, left = text_grob("Probability of clinical remission", face = "bold", rot = 90))

15. Check linearity assumption for numeric variables

linearity_albumin <- ggplot(vdz_cd_mvassociations_df, aes(logit, albumin_g_L))+
  geom_point(size = 0.5, alpha = 0.5) +
  geom_smooth(method = "loess", col = "black") + 
  ylab("Baseline albumin (g/L)") +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_text(face = "bold")) +
  coord_cartesian(expand = 0)

linearity_crp <- ggplot(vdz_cd_mvassociations_df, aes(logit, crp_mg_L))+
  geom_point(size = 0.5, alpha = 0.5) +
  geom_smooth(method = "loess", col = "black") + 
  ylab("Baseline C-reactive protein (mg/L)") +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_text(face = "bold")) +
  coord_cartesian(expand = 0)

linearity_albumin_uc <- ggplot(vdz_uc_mvassociations_df, aes(logit, albumin_g_L))+
  geom_point(size = 0.5, alpha = 0.5) +
  geom_smooth(method = "loess", col = "black") + 
  ylab("Baseline albumin (g/L)") +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_text(face = "bold")) +
  coord_cartesian(expand = 0)

linearity_combined <- ggarrange(linearity_albumin, linearity_crp, linearity_albumin_uc, labels = c("A", "B", "C"), nrow = 1)

annotate_figure(linearity_combined, bottom = text_grob("Log odds of clinical remission", face = "bold"))