Calibration, Clinical Utility and Specificity of Clinical Decision Support Tools in Inflammatory Bowel Disease

Author

Dahham Alsoud

Published

July 1, 2024

Modified

July 1, 2024

The aim of this analysis notebook is to reproduce the reported analysis from the paper Calibration, Clinical Utility and Specificity of Clinical Decision Support Tools in Inflammatory Bowel Disease.

Requirements:

To run the analysis, you will need:

Raw data files: These should be placed inside the input folder within the home directory of your project. The required files are:
- Pheno_CD_VDZ.xlsx: Excel sheet containing metadata on CD patients starting VDZ.
- Pheno_UC_VDZ.xlsx: Excel sheet containing metadata on UC patients starting VDZ.
- Pheno_CD_UST.xlsx: Excel sheet containing metadata on CD patients starting UST.
- VDZ_UST_troughs.RData: R workspace containing two dataframes with trough levels of VDZ and UST.

These data files can be obtained upon reasonable request to the corresponding authors, Séverine Vermeire, via severine.vermeire@uzleuven.be and Bram Verstockt, via bram.verstockt@uzleuven.be.

Complementary files deposited in the GitHub repository for this analysis:
- renv.lock, .Rprofile, renv/settings.json and renv/activate.R: These files are necessary to reproduce the same versions of the packages used in the original analysis.
- helpers.R: This script should be sourced at the beginning of the analysis to produce some customized functions.
- variable-selection-instability.R: This script reproduces the bootstrap analysis exploring the stability of variable selection in clinical prediction models.

1. Install and load needed packages/functions

if (!require(pacman)) install.packages("pacman")


pacman::p_load(rio, here, magrittr, janitor, tidyverse, skimr, phdcocktail, gtsummary, survival, 
               huxtable, openxlsx, ggpubr, powerjoin, rms, dcurves, epiR, pROC, glue, predRupdate)


source("helpers.R")

2. Import phenodata and pre-process input

vdz_uc_starters <- import(here("input", list.files("input", pattern = "uc_vdz", ignore.case = TRUE)))
vdz_cd_starters <- import(here("input", list.files("input", pattern = "cd_vdz", ignore.case = TRUE)))
ust_cd_starters <- import(here("input", list.files("input", pattern = "cd_ust", ignore.case = TRUE)))

Fix excel dates

important_date_vrs <- c("start_date", "visit_date", "diagnosis_date", "birth_date", "last_followup_date", "stop_date", 
                        "stop_date_itt", "dose_optimisation_date", "surgery_after_date", "date_lab", "endoscopy_date")

vdz_uc_starters <- convert_date_columns(vdz_uc_starters, important_date_vrs)
vdz_cd_starters <- convert_date_columns(vdz_cd_starters, important_date_vrs)
ust_cd_starters %<>% mutate(across(where(is.POSIXct), ymd))

Add dates of outcomes assessment columns

vdz_uc_starters <- add_assessment_dates(vdz_uc_starters)
vdz_cd_starters <- add_assessment_dates(vdz_cd_starters)
ust_cd_starters <- add_assessment_dates(ust_cd_starters)

Copy outcome data to the baseline row

vdz_uc_starters %<>%
  group_by(patient_id) %>%
  fill(c(clinical_response_pmayo:cs_dependent_outcome, clinical_assessment_date, endoscopic_assessment_date), .direction = "up") %>%
  ungroup()

vdz_cd_starters %<>%
  group_by(patient_id) %>%
  fill(c(clinical_response:cs_dependent_outcome, clinical_assessment_date, endoscopic_assessment_date), .direction = "up") %>%
  ungroup()

ust_cd_starters %<>%
  group_by(patient_id) %>%
  fill(c(clinical_response:cs_dependent_outcome, clinical_assessment_date, endoscopic_assessment_date), .direction = "up") %>%
  ungroup()

Filter eligible patients

vdz_uc_eligible <- vdz_uc_starters %>% filter(excluded == 0) 
vdz_cd_eligible <- vdz_cd_starters %>% filter(excluded == 0) 
ust_cd_eligible <- ust_cd_starters %>% filter(excluded == 0)

Filter included patients

vdz_uc_included <- vdz_uc_eligible %>% filter(baseline_disease_activity_type != "clinical")
vdz_uc_included %<>% filter(pga > 1)
vdz_uc_included %<>% filter(clinical_assessment == 1)
vdz_uc_included %<>% filter(mes > 1, !is.na(disease_duration_yrs), !is.na(previous_antitnf), !is.na(albumin_g_L)) 

vdz_cd_included <- vdz_cd_eligible %>% filter(baseline_disease_activity_type != "clinical")
vdz_cd_included %<>% filter(pga > 1)
vdz_cd_included %<>% filter(clinical_assessment == 1)
vdz_cd_included %<>% filter(!is.na(intestinal_resections_before), !is.na(fistulizing_disease_prior), 
                            !is.na(previous_antitnf), !is.na(albumin_g_L), !is.na(crp_mg_L)) 

ust_cd_included <- ust_cd_eligible %>% filter(baseline_disease_activity_type != "clinical") 
ust_cd_included %<>% filter(pga > 1) 
ust_cd_included %<>% filter(clinical_assessment == 1) 
ust_cd_included %<>% filter(!is.na(cdst_category))

3. Get baseline characteristics (Table 1)

Identify needed columns and define new data frames

tblone_vrs <- c("gender", "age_yrs", "disease_duration_yrs", "smoking", "bmi_kg_m2", "albumin_g_L","crp_mg_L", "calprotectin_ug_g", 
                "concomitant_cs", "concomitant_imm", "concomitant_5asa", "previous_advancedrx_amount", "previous_antitnf", 
                "intestinal_resections_before", "age_at_diagnosis_yrs", 
                "disease_location", "upper_gi_disease", "disease_behaviour","peri_anal_disease", "ses_cd",
                "disease_extent", "mes",
                "fistulizing_disease_prior", "fistulizing_disease_current")

vdz_cd_tblone_df <- vdz_cd_included %>% select(any_of(tblone_vrs))
vdz_uc_tblone_df <- vdz_uc_included %>% select(any_of(tblone_vrs))
ust_cd_tblone_df <- ust_cd_included %>% select(any_of(tblone_vrs))

Manipulate columns

continuous_vrs <- c("age_yrs", "disease_duration_yrs", "bmi_kg_m2", "albumin_g_L","crp_mg_L", "calprotectin_ug_g", "ses_cd")

vdz_cd_tblone_df %<>% mutate(across(all_of(continuous_vrs), ~ round(as.numeric(.), 1)))
ust_cd_tblone_df %<>% mutate(across(all_of(continuous_vrs), ~ round(as.numeric(.), 1)))
vdz_uc_tblone_df %<>% mutate(across(any_of(continuous_vrs), ~ round(as.numeric(.), 1)))

vdz_cd_tblone_df %<>% mutate(disease_behaviour = if_else(str_detect(disease_behaviour, "B3"), "B3", disease_behaviour))
ust_cd_tblone_df %<>% mutate(disease_behaviour = if_else(str_detect(disease_behaviour, "B3"), "B3", disease_behaviour))

ust_cd_tblone_df %<>% mutate(female_gender = if_else(gender == "F", 1, 0), .after = gender) %>% select(-gender) 
vdz_cd_tblone_df %<>% mutate(female_gender = if_else(gender == "F", 1, 0), .after = gender) %>% select(-gender)
vdz_uc_tblone_df %<>% mutate(female_gender = if_else(gender == "F", 1, 0), .after = gender) %>% select(-gender)

ust_cd_tblone_df %<>% mutate(prior_smoker = if_else(smoking == 0, 0, 1), .after = smoking) %>% select(-smoking) 
vdz_cd_tblone_df %<>% mutate(prior_smoker = if_else(smoking == 0, 0, 1), .after = smoking) %>% select(-smoking)
vdz_uc_tblone_df %<>% mutate(prior_smoker = if_else(smoking == 0, 0, 1), .after = smoking) %>% select(-smoking)


ust_cd_tblone_df %<>% get_age_montreal() %>% select(-age_at_diagnosis_yrs) 
vdz_cd_tblone_df %<>% get_age_montreal() %>% select(-age_at_diagnosis_yrs)
vdz_uc_tblone_df %<>% get_age_montreal() %>% select(-age_at_diagnosis_yrs)


ust_cd_tblone_df %<>% recode_steroids()
vdz_cd_tblone_df %<>% recode_steroids()
vdz_uc_tblone_df %<>% recode_steroids()

ust_cd_tblone_df %<>% mutate(concomitant_imm = if_else(concomitant_imm == "0", 0, 1),
                             concomitant_5asa = if_else(concomitant_5asa == "0", 0, 1))
vdz_cd_tblone_df %<>% mutate(concomitant_imm = if_else(concomitant_imm == "0", 0, 1),
                             concomitant_5asa = if_else(concomitant_5asa == "0", 0, 1))
vdz_uc_tblone_df %<>% mutate(concomitant_imm = if_else(concomitant_imm == "0", 0, 1),
                             concomitant_5asa = if_else(concomitant_5asa == "0", 0, 1))


ust_cd_tblone_df %<>% recode_previous_advancedrx()
vdz_cd_tblone_df %<>% recode_previous_advancedrx()
vdz_uc_tblone_df %<>% recode_previous_advancedrx()


categorical_vrs <- c("concomitant_cs", "previous_advancedrx_amount", "disease_location", "disease_behaviour", "disease_extent", "mes", "age_montreal")

dichotomous_vrs <- c("female_gender", "prior_smoker", "concomitant_imm", "concomitant_5asa", "previous_antitnf", "upper_gi_disease", "peri_anal_disease",
                     "intestinal_resections_before", "fistulizing_disease_prior", "fistulizing_disease_current")

Recode variables to publication shape

ibd_data_dict$variable_label[ibd_data_dict$variable == "ses_cd"] <- "Simple endoscopic score for CD" 

ibd_data_dict %<>%
  add_row(variable = "prior_smoker", variable_label = "Prior smoking history", value = NA, value_label = NA) %>% 
  add_row(variable = "intestinal_resections_before", variable_label = "Prior bowel surgery", value = NA, value_label = NA) %>% 
  add_row(variable = "previous_advancedrx_amount", variable_label = "Number of previous advanced therapies", value = NA, value_label = NA) %>% 
  add_row(variable = "albumin_g_L", variable_label = "Serum albumin (g/L)", value = NA, value_label = NA) %>% 
  add_row(variable = "crp_mg_L", variable_label = "C-reactive protein (mg/L)", value = NA, value_label = NA) %>% 
  add_row(variable = "previous_antitnf", variable_label = "Prior anti-TNF exposure", value = NA, value_label = NA) %>% 
  add_row(variable = "fistulizing_disease_prior", variable_label = "Prior fistulizing disease", value = NA, value_label = NA) %>% 
  add_row(variable = "fistulizing_disease_current", variable_label = "Baseline actively draining fistula", value = NA, value_label = NA) 

categoricals_to_recode <- c("concomitant_cs", "disease_location", "disease_behaviour", "disease_extent", "age_montreal")

ust_cd_tblone_df$previous_advancedrx_amount <- factor(ust_cd_tblone_df$previous_advancedrx_amount, levels = c("0", "1", "2", "≥ 3"), ordered = TRUE)
vdz_cd_tblone_df$previous_advancedrx_amount <- factor(vdz_cd_tblone_df$previous_advancedrx_amount, levels = c("0", "1", "2", "≥ 3"), ordered = TRUE)
vdz_uc_tblone_df$previous_advancedrx_amount <- factor(vdz_uc_tblone_df$previous_advancedrx_amount, levels = c("0", "1", "2", "≥ 3"), ordered = TRUE)
vdz_uc_tblone_df$mes <- factor(vdz_uc_tblone_df$mes, levels = c("2", "3"), ordered = TRUE)

ust_cd_tblone_df <- recode_vrs(ust_cd_tblone_df, data_dictionary = ibd_data_dict, vrs = categoricals_to_recode, factor = TRUE)
vdz_cd_tblone_df <- recode_vrs(vdz_cd_tblone_df, data_dictionary = ibd_data_dict, vrs = categoricals_to_recode, factor = TRUE)
vdz_uc_tblone_df <- recode_vrs(vdz_uc_tblone_df, data_dictionary = ibd_data_dict, vrs = categoricals_to_recode, factor = TRUE)

Make the tables

theme_gtsummary_journal(journal = "jama")
theme_gtsummary_compact()

VDZ-CD

vdz_cd_tblone <- vdz_cd_tblone_df %>% 
  tbl_summary(type = list(any_of(continuous_vrs) ~ "continuous",
                          any_of(categorical_vrs) ~ "categorical",
                          any_of(dichotomous_vrs) ~ "dichotomous"),
              missing_text = "missing, n",
              digits = list(all_categorical() ~ c(0, 1))) %>%
  bold_labels() %>%
  italicize_levels()

labels_to_levels <- c("upper_gi_disease", "peri_anal_disease")

tableone_body <- vdz_cd_tblone[["table_body"]]

tableone_body$row_type[tableone_body$variable %in% labels_to_levels] <- "level"

tableone_body$stat_label[tableone_body$variable %in% labels_to_levels] <- NA

tableone_body <- tableone_body[tableone_body$label != "None",]

vdz_cd_tblone[["table_body"]]  <- tableone_body

VDZ-UC

vdz_uc_tblone <- vdz_uc_tblone_df %>% 
  tbl_summary(type = list(any_of(continuous_vrs) ~ "continuous",
                          any_of(categorical_vrs) ~ "categorical",
                          any_of(dichotomous_vrs) ~ "dichotomous"),
              missing_text = "missing, n",
              digits = list(all_categorical() ~ c(0, 1))) %>%
  bold_labels() %>%
  italicize_levels()

tableone_body <- vdz_uc_tblone[["table_body"]]

tableone_body <- tableone_body[tableone_body$label != "None",]

vdz_uc_tblone[["table_body"]]  <- tableone_body

UST-CD

ust_cd_tblone <- ust_cd_tblone_df %>% 
  tbl_summary(type = list(any_of(continuous_vrs) ~ "continuous",
                          any_of(categorical_vrs) ~ "categorical",
                          any_of(dichotomous_vrs) ~ "dichotomous"),
              missing_text = "missing, n",
              digits = list(all_categorical() ~ c(0, 1))) %>%
  bold_labels() %>%
  italicize_levels()

tableone_body <- ust_cd_tblone[["table_body"]]

tableone_body$row_type[tableone_body$variable %in% labels_to_levels] <- "level"

tableone_body$stat_label[tableone_body$variable %in% labels_to_levels] <- NA

tableone_body <- tableone_body[tableone_body$label != "None",]

ust_cd_tblone[["table_body"]]  <- tableone_body

Merge the tables

tbl_merge(
    tbls = list(vdz_cd_tblone, vdz_uc_tblone, ust_cd_tblone),
    tab_spanner = c("**VDZ-CD**", "**VDZ-UC**", "**UST-CD**")
  )

Characteristic	VDZ-CD	VDZ-UC	UST-CD
Characteristic	N = 280	N = 218	N = 194
Female, n (%)	170 (60.7)	118 (54.1)	121 (62.4)
Age (year), Median (IQR)	41 (29 – 55)	41 (29 – 53)	41 (30 – 56)
Disease duration (year), Median (IQR)	11 (4 – 22)	8 (2 – 14)	13 (6 – 24)
Prior smoking history, n (%)	134 (47.9)	66 (30.3)	91 (46.9)
Body mass index (kg/m²), Median (IQR)	23.8 (21.3 – 27.1)	23.8 (20.7 – 27.3)	23.4 (20.6 – 26.9)
Serum albumin (g/L), Median (IQR)	41.9 (39.2 – 44.1)	42.0 (39.4 – 44.7)	41.2 (38.9 – 43.3)
C-reactive protein (mg/L), Median (IQR)	5 (2 – 14)	3 (1 – 8)	8 (3 – 19)
Faecal calprotectin (ug/g), Median (IQR)	649 (271 – 1,800)	1,673 (665 – 1,800)	985 (405 – 1,800)
missing, n	159	125	50
Concomitant corticosteroids, n (%)
Topical	37 (13.2)	77 (35.3)	12 (6.2)
Systemic	34 (12.1)	35 (16.1)	13 (6.7)
Concomitant immunomodulators, n (%)	14 (5.0)	7 (3.2)	0 (0.0)
Concomitant 5-aminosalicylates, n (%)	4 (1.4)	112 (51.4)	0 (0.0)
Number of previous advanced therapies, n (%)
0	72 (25.7)	82 (37.6)	39 (20.1)
1	81 (28.9)	78 (35.8)	38 (19.6)
2	104 (37.1)	50 (22.9)	54 (27.8)
≥ 3	23 (8.2)	8 (3.7)	63 (32.5)
Prior anti-TNF exposure, n (%)	204 (72.9)	136 (62.4)	151 (77.8)
Prior bowel surgery, n (%)	131 (46.8)	0 (0.0)	96 (49.5)
Age at diagnosis (year), n (%)
< 17 years	42 (15.0)	21 (9.6)	27 (13.9)
17-40 years	190 (67.9)	132 (60.6)	139 (71.6)
> 40 years	48 (17.1)	65 (29.8)	28 (14.4)
Disease location, n (%)
Ileal	81 (28.9)		60 (30.9)
Colonic	41 (14.6)		22 (11.3)
Ileocolonic	158 (56.4)		112 (57.7)
Upper GI modifier	19 (6.8)		18 (9.3)
Disease behaviour, n (%)
Inflammatory	109 (38.9)		78 (40.2)
Fibrostenotic	110 (39.3)		73 (37.6)
Penetrating	61 (21.8)		43 (22.2)
Perianal modifier	86 (30.7)		69 (35.6)
Simple endoscopic score for CD, Median (IQR)	7.5 (5.0 – 15.0)		10 (6 – 16)
missing, n	184		40
Prior fistulizing disease, n (%)	95 (33.9)		70 (36.1)
Baseline actively draining fistula, n (%)	14 (5.0)		9 (4.6)
Disease extent
Proctitis		18 (8.3)
Left-sided colitis		115 (52.8)
Extensive colitis		85 (39.0)
Mayo endoscopic subscore
2		98 (45.0)
3		120 (55.0)

4. Calculate CDST points and assign response categories

VDZ-UC

Ensure that we have all CDST variables in numeric shape ready for calculations

vdz_uc_included %<>% mutate(no_previous_antitnf = if_else(previous_antitnf == 1, 0, 1), .after = previous_antitnf)

is.numeric(vdz_uc_included$no_previous_antitnf)

[1] TRUE

is.numeric(vdz_uc_included$albumin_g_L)

[1] TRUE

vdz_uc_included %<>% mutate(moderate_endo_activity = if_else(mes == 2, 1, 0), .after = mes)

is.numeric(vdz_uc_included$moderate_endo_activity)

[1] TRUE

vdz_uc_included %<>% mutate(dxduration_above_two = if_else(disease_duration_yrs >= 2, 1, 0), .after = disease_duration_yrs)

is.numeric(vdz_uc_included$dxduration_above_two)

[1] TRUE

Calculate CDST points

vdz_uc_included %<>% mutate(cdst_points = 3*dxduration_above_two + 3*no_previous_antitnf + 2*moderate_endo_activity + 0.65*albumin_g_L)

Assign CDST categories

vdz_uc_included %<>% mutate(cdst_category = case_when(cdst_points <= 26 ~ "Low",
                                                      cdst_points > 26 & cdst_points <= 32 ~ "Intermediate",
                                                      cdst_points > 32 ~ "High") 
)

vdz_uc_included %>% count(cdst_category)

cdst_category	n
High	95
Intermediate	105
Low	18

vdz_uc_included$cdst_category <- factor(vdz_uc_included$cdst_category, levels = c("Low", "Intermediate", "High"), ordered = TRUE)

VDZ-CD

Ensure that we have all CDST variables in numeric shape ready for calculations

vdz_cd_included %<>% mutate(no_previous_antitnf = if_else(previous_antitnf == 1, 0, 1), .after = previous_antitnf)

is.numeric(vdz_cd_included$no_previous_antitnf)

[1] TRUE

vdz_cd_included %<>% mutate(no_surgery_before = if_else(intestinal_resections_before == 1, 0, 1), .after = intestinal_resections_before)

is.numeric(vdz_cd_included$no_surgery_before)

[1] TRUE

vdz_cd_included %<>% mutate(no_fistulizing_before = if_else(fistulizing_disease_prior == 1, 0, 1), .after = fistulizing_disease_prior)

is.numeric(vdz_cd_included$no_fistulizing_before)

[1] TRUE

is.numeric(vdz_cd_included$albumin_g_L)

[1] TRUE

is.numeric(vdz_cd_included$crp_mg_L)

[1] TRUE

Calculate CDST points

vdz_cd_included %<>% mutate(cdst_points = 2*no_surgery_before + 3*no_previous_antitnf + 2*no_fistulizing_before + 0.4*albumin_g_L - case_when(crp_mg_L >= 3 & crp_mg_L <= 10 ~ 0.5,
                                                                                                                                              crp_mg_L > 10 ~ 3,
                                                                                                                                              .default = 0))

Assign CDST categories

vdz_cd_included %<>% mutate(cdst_category = case_when(cdst_points <= 13 ~ "Low",
                                                      cdst_points > 13 & cdst_points <= 19 ~ "Intermediate",
                                                      cdst_points > 19 ~ "High") 
)

vdz_cd_included %>% count(cdst_category)

cdst_category	n
High	121
Intermediate	146
Low	13

vdz_cd_included$cdst_category <- factor(vdz_cd_included$cdst_category, levels = c("Low", "Intermediate", "High"), ordered = TRUE)

UST-CD

ust_cd_included %>% count(cdst_category)

cdst_category	n
High	80
Intermediate	95
Low	19

ust_cd_included$cdst_category <- factor(ust_cd_included$cdst_category, levels = c("Low", "Intermediate", "High"), ordered = TRUE)

5. Calculate discrimination for effectiveness outcomes

VDZ-CD

Clinical response

vdz_cd_included %<>% mutate(clinical_assessment_before_optimisation = clinical_assessment_date <= dose_optimisation_date) %>%
  replace_na(list(clinical_assessment_before_optimisation = TRUE))

vdz_cd_included %<>% mutate(clinical_response_cdst = clinical_response == 1 & clinical_assessment_before_optimisation)

vdz_cd_included %>% count(cdst_category, clinical_response_cdst)

cdst_category	clinical_response_cdst	n
Low	FALSE	7
Low	TRUE	6
Intermediate	FALSE	64
Intermediate	TRUE	82
High	FALSE	31
High	TRUE	90

options(scipen = 999)

chisq_vdz_cd_clinical_response <- chisq.test(vdz_cd_included$cdst_category, vdz_cd_included$clinical_response_cdst)

vdz_cd_cdst_tabulation_clinical_response <- vdz_cd_included %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(clinical_response_cdst),
            percentage = scales::percent(achieved/total))

Clinical remission

vdz_cd_included %<>% mutate(clinical_remission_cdst = clinical_remission == 1 & clinical_assessment_before_optimisation)

vdz_cd_included %>% count(cdst_category, clinical_remission_cdst)

cdst_category	clinical_remission_cdst	n
Low	FALSE	13
Intermediate	FALSE	126
Intermediate	TRUE	20
High	FALSE	96
High	TRUE	25

chisq_vdz_cd_clinical_remission <- chisq.test(vdz_cd_included$cdst_category, vdz_cd_included$clinical_remission_cdst)

vdz_cd_cdst_tabulation_clinical_remission <- vdz_cd_included %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(clinical_remission_cdst),
            percentage = scales::percent(achieved/total))

Endoscopic response

vdz_cd_included_endoscopic <- vdz_cd_included %>%
  filter(endoscopic_assessment == 1)

vdz_cd_included_endoscopic %<>% mutate(endoscopic_assessment_before_optimisation = endoscopic_assessment_date <= dose_optimisation_date) %>%
  replace_na(list(endoscopic_assessment_before_optimisation = TRUE))

vdz_cd_included_endoscopic %<>% mutate(endoscopic_response_cdst = endoscopic_response == 1 & endoscopic_assessment_before_optimisation)


vdz_cd_included_endoscopic %>% count(cdst_category, endoscopic_response_cdst)

cdst_category	endoscopic_response_cdst	n
Low	FALSE	8
Low	TRUE	2
Intermediate	FALSE	51
Intermediate	TRUE	53
High	FALSE	33
High	TRUE	66

chisq_vdz_cd_endoscopic_response <- chisq.test(vdz_cd_included_endoscopic$cdst_category, vdz_cd_included_endoscopic$endoscopic_response_cdst)

vdz_cd_cdst_tabulation_endoscopic_response <- vdz_cd_included_endoscopic %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(endoscopic_response_cdst),
            percentage = scales::percent(achieved/total))

Endoscopic remission

vdz_cd_included_endoscopic %<>% mutate(endoscopic_remission_cdst = endoscopic_remission == 1 & endoscopic_assessment_before_optimisation)

vdz_cd_included_endoscopic %>% count(cdst_category, endoscopic_remission_cdst)

cdst_category	endoscopic_remission_cdst	n
Low	FALSE	9
Low	TRUE	1
Intermediate	FALSE	84
Intermediate	TRUE	20
High	FALSE	57
High	TRUE	42

chisq_vdz_cd_endoscopic_remission <- chisq.test(vdz_cd_included_endoscopic$cdst_category, vdz_cd_included_endoscopic$endoscopic_remission_cdst)

vdz_cd_cdst_tabulation_endoscopic_remission <- vdz_cd_included_endoscopic %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(endoscopic_remission_cdst),
           percentage = scales::percent(achieved/total))

VDZ-UC

Clinical response

vdz_uc_included %<>% mutate(clinical_assessment_before_optimisation = clinical_assessment_date <= dose_optimisation_date) %>%
  replace_na(list(clinical_assessment_before_optimisation = TRUE))

vdz_uc_included %<>% mutate(clinical_response_cdst = clinical_response_pga == 1 & clinical_assessment_before_optimisation)

vdz_uc_included %>% count(cdst_category, clinical_response_cdst)

cdst_category	clinical_response_cdst	n
Low	FALSE	9
Low	TRUE	9
Intermediate	FALSE	47
Intermediate	TRUE	58
High	FALSE	20
High	TRUE	75

chisq_vdz_uc_clinical_response <- chisq.test(vdz_uc_included$cdst_category, vdz_uc_included$clinical_response_cdst)

vdz_uc_cdst_tabulation_clinical_response <- vdz_uc_included %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(clinical_response_cdst),
            percentage = scales::percent(achieved/total))

Clinical remission

vdz_uc_included %<>% mutate(clinical_remission_cdst = clinical_remission_pga == 1 & clinical_assessment_before_optimisation)

vdz_uc_included %>% count(cdst_category, clinical_remission_cdst)

cdst_category	clinical_remission_cdst	n
Low	FALSE	15
Low	TRUE	3
Intermediate	FALSE	90
Intermediate	TRUE	15
High	FALSE	62
High	TRUE	33

chisq_vdz_uc_clinical_remission <- chisq.test(vdz_uc_included$cdst_category, vdz_uc_included$clinical_remission_cdst)

vdz_uc_cdst_tabulation_clinical_remission <- vdz_uc_included %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(clinical_remission_cdst),
            percentage = scales::percent(achieved/total))

Endoscopic response

vdz_uc_included_endoscopic <- vdz_uc_included %>%
  filter(endoscopic_assessment == 1)

vdz_uc_included_endoscopic %<>% mutate(endoscopic_assessment_before_optimisation = endoscopic_assessment_date <= dose_optimisation_date) %>%
  replace_na(list(endoscopic_assessment_before_optimisation = TRUE))

vdz_uc_included_endoscopic %<>% mutate(endoscopic_response_cdst = endoscopic_response == 1 & endoscopic_assessment_before_optimisation)

vdz_uc_included_endoscopic %>% count(cdst_category, endoscopic_response_cdst)

cdst_category	endoscopic_response_cdst	n
Low	FALSE	10
Low	TRUE	8
Intermediate	FALSE	63
Intermediate	TRUE	42
High	FALSE	26
High	TRUE	68

chisq_vdz_uc_endoscopic_response <- chisq.test(vdz_uc_included_endoscopic$cdst_category, vdz_uc_included_endoscopic$endoscopic_response_cdst)

vdz_uc_cdst_tabulation_endoscopic_response <- vdz_uc_included_endoscopic %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(endoscopic_response_cdst),
            percentage = scales::percent(achieved/total))

Endoscopic remission

vdz_uc_included_endoscopic %<>% mutate(endoscopic_remission_cdst = endoscopic_remission == 1 & endoscopic_assessment_before_optimisation)

vdz_uc_included_endoscopic %>% count(cdst_category, endoscopic_remission_cdst)

cdst_category	endoscopic_remission_cdst	n
Low	FALSE	11
Low	TRUE	7
Intermediate	FALSE	79
Intermediate	TRUE	26
High	FALSE	53
High	TRUE	41

chisq_vdz_uc_endoscopic_remission <- chisq.test(vdz_uc_included_endoscopic$cdst_category, vdz_uc_included_endoscopic$endoscopic_remission_cdst)

vdz_uc_cdst_tabulation_endoscopic_remission <- vdz_uc_included_endoscopic %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(endoscopic_remission_cdst),
            percentage = scales::percent(achieved/total))

UST-CD

Clinical response

ust_cd_included %<>% mutate(clinical_assessment_before_optimisation = clinical_assessment_date <= first_dose_optimisation_date) %>%
  replace_na(list(clinical_assessment_before_optimisation = TRUE))

ust_cd_included %<>% mutate(clinical_response_cdst = clinical_response == 1 & clinical_assessment_before_optimisation)

ust_cd_included %>% count(cdst_category, clinical_response_cdst)

cdst_category	clinical_response_cdst	n
Low	FALSE	4
Low	TRUE	15
Intermediate	FALSE	45
Intermediate	TRUE	50
High	FALSE	31
High	TRUE	49

chisq_ust_cd_clinical_response <- chisq.test(ust_cd_included$cdst_category, ust_cd_included$clinical_response_cdst)

ust_cd_cdst_tabulation_clinical_response <- ust_cd_included %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(clinical_response_cdst),
            percentage = scales::percent(achieved/total))

Clinical remission

ust_cd_included %<>% mutate(clinical_remission_cdst = clinical_remission == 1 & clinical_assessment_before_optimisation)

ust_cd_included %>% count(cdst_category, clinical_remission_cdst)

cdst_category	clinical_remission_cdst	n
Low	FALSE	16
Low	TRUE	3
Intermediate	FALSE	86
Intermediate	TRUE	9
High	FALSE	61
High	TRUE	19

chisq_ust_cd_clinical_remission <- chisq.test(ust_cd_included$cdst_category, ust_cd_included$clinical_remission_cdst)

ust_cd_cdst_tabulation_clinical_remission <- ust_cd_included %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(clinical_remission_cdst),
            percentage = scales::percent(achieved/total))

Endoscopic response

ust_cd_included_endoscopic <- ust_cd_included %>%
  filter(endoscopic_assessment == 1)

ust_cd_included_endoscopic %<>% mutate(endoscopic_assessment_before_optimisation = endoscopic_assessment_date <= first_dose_optimisation_date) %>%
  replace_na(list(endoscopic_assessment_before_optimisation = TRUE))

ust_cd_included_endoscopic %<>% mutate(endoscopic_response_cdst = endoscopic_response == 1 & endoscopic_assessment_before_optimisation)


ust_cd_included_endoscopic %>% count(cdst_category, endoscopic_response_cdst)

cdst_category	endoscopic_response_cdst	n
Low	FALSE	6
Low	TRUE	11
Intermediate	FALSE	39
Intermediate	TRUE	42
High	FALSE	33
High	TRUE	35

chisq_ust_cd_endoscopic_response <- chisq.test(ust_cd_included_endoscopic$cdst_category, ust_cd_included_endoscopic$endoscopic_response_cdst)

ust_cd_cdst_tabulation_endoscopic_response <- ust_cd_included_endoscopic %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(endoscopic_response_cdst),
            percentage = scales::percent(achieved/total))

Endoscopic remission

ust_cd_included_endoscopic %<>% mutate(endoscopic_remission_cdst = endoscopic_remission == 1 & endoscopic_assessment_before_optimisation)

ust_cd_included_endoscopic %>% count(cdst_category, endoscopic_remission_cdst)

cdst_category	endoscopic_remission_cdst	n
Low	FALSE	15
Low	TRUE	2
Intermediate	FALSE	74
Intermediate	TRUE	7
High	FALSE	54
High	TRUE	14

chisq_ust_cd_endoscopic_remission <- chisq.test(ust_cd_included_endoscopic$cdst_category, ust_cd_included_endoscopic$endoscopic_remission_cdst)

ust_cd_cdst_tabulation_endoscopic_remission <- ust_cd_included_endoscopic %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            achieved = sum(endoscopic_remission_cdst),
            percentage = scales::percent(achieved/total))

6. Visualize discrimination for effectiveness outcomes

VDZ-CD

vdz_cd_clinical_response_gg <- ggplot(data = vdz_cd_included, aes(x = cdst_category, fill = clinical_response_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 160), breaks = seq(0, 160, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Clinical response") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold", size = 13),
        axis.title.x = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13)) +
  annotate(geom = "text", label = paste0(vdz_cd_cdst_tabulation_clinical_response$percentage, "\n", "(", vdz_cd_cdst_tabulation_clinical_response$achieved, "/", vdz_cd_cdst_tabulation_clinical_response$total, ")"),
           x = 1:3, y = vdz_cd_cdst_tabulation_clinical_response$achieved + 7,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_vdz_cd_clinical_response$p.value), x = 2, y = 158.5, size = 4, fontface = "bold")

vdz_cd_clinical_remission_gg <- ggplot(data = vdz_cd_included, aes(x = cdst_category, fill = clinical_remission_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 160), breaks = seq(0, 160, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Clinical remission") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13)) +
  annotate(geom = "text", label = paste0(vdz_cd_cdst_tabulation_clinical_remission$percentage, "\n", "(", vdz_cd_cdst_tabulation_clinical_remission$achieved, "/", vdz_cd_cdst_tabulation_clinical_remission$total, ")"),
           x = 1:3, y = vdz_cd_cdst_tabulation_clinical_remission$achieved + 7,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_vdz_cd_clinical_remission$p.value), x = 2, y = 158.5, size = 4, fontface = "bold")

vdz_cd_endoscopic_response_gg <- ggplot(data = vdz_cd_included_endoscopic, aes(x = cdst_category, fill = endoscopic_response_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Endoscopic response") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold", size = 13),
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13),
        axis.text = element_text(face = "bold"),
        axis.title.x = element_blank(),
        legend.position = "none") +
  annotate(geom = "text", label = paste0(vdz_cd_cdst_tabulation_endoscopic_response$percentage, "\n", "(", vdz_cd_cdst_tabulation_endoscopic_response$achieved, "/", vdz_cd_cdst_tabulation_endoscopic_response$total, ")"),
           x = 1:3, y = vdz_cd_cdst_tabulation_endoscopic_response$achieved + 5,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_vdz_cd_endoscopic_response$p.value), x = 2, y = 118.5, size = 4, fontface = "bold")

vdz_cd_endoscopic_remission_gg <- ggplot(data = vdz_cd_included_endoscopic, aes(x = cdst_category, fill = endoscopic_remission_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(title = "Endoscopic remission") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold"),
        axis.text = element_text(face = "bold"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13)) +
  annotate(geom = "text", label = paste0(vdz_cd_cdst_tabulation_endoscopic_remission$percentage, "\n", "(", vdz_cd_cdst_tabulation_endoscopic_remission$achieved, "/", vdz_cd_cdst_tabulation_endoscopic_remission$total, ")"),
           x = 1:3, y = vdz_cd_cdst_tabulation_endoscopic_remission$achieved + 5,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_vdz_cd_endoscopic_remission$p.value), x = 2, y = 118.5, size = 4, fontface = "bold")

Combine figures in 4 panels

vdz_cd_bars_combined <- ggarrange(vdz_cd_clinical_response_gg, vdz_cd_clinical_remission_gg, vdz_cd_endoscopic_response_gg, vdz_cd_endoscopic_remission_gg, nrow = 2, ncol = 2)

annotate_figure(vdz_cd_bars_combined, bottom = text_grob("Clinical Decision Support Tool's response probability groups", face = "bold", vjust = 0.1))

VDZ-UC

vdz_uc_clinical_response_gg <- ggplot(data = vdz_uc_included, aes(x = cdst_category, fill = clinical_response_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Clinical response") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold", size = 13),
        axis.title.x = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13)) +
  annotate(geom = "text", label = paste0(vdz_uc_cdst_tabulation_clinical_response$percentage, "\n", "(", vdz_uc_cdst_tabulation_clinical_response$achieved, "/", vdz_uc_cdst_tabulation_clinical_response$total, ")"),
           x = 1:3, y = vdz_uc_cdst_tabulation_clinical_response$achieved + 5,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_vdz_uc_clinical_response$p.value), x = 2, y = 118.5, size = 4, fontface = "bold")

vdz_uc_clinical_remission_gg <- ggplot(data = vdz_uc_included, aes(x = cdst_category, fill = clinical_remission_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Clinical remission") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13)) +
  annotate(geom = "text", label = paste0(vdz_uc_cdst_tabulation_clinical_remission$percentage, "\n", "(", vdz_uc_cdst_tabulation_clinical_remission$achieved, "/", vdz_uc_cdst_tabulation_clinical_remission$total, ")"),
           x = 1:3, y = vdz_uc_cdst_tabulation_clinical_remission$achieved + 5,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_vdz_uc_clinical_remission$p.value), x = 2, y = 118.5, size = 4, fontface = "bold")

vdz_uc_endoscopic_response_gg <- ggplot(data = vdz_uc_included_endoscopic, aes(x = cdst_category, fill = endoscopic_response_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Endoscopic response") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold", size = 13),
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13),
        axis.text = element_text(face = "bold"),
        axis.title.x = element_blank(),
        legend.position = "none") +
  annotate(geom = "text", label = paste0(vdz_uc_cdst_tabulation_endoscopic_response$percentage, "\n", "(", vdz_uc_cdst_tabulation_endoscopic_response$achieved, "/", vdz_uc_cdst_tabulation_endoscopic_response$total, ")"),
           x = 1:3, y = vdz_uc_cdst_tabulation_endoscopic_response$achieved + 5,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_vdz_uc_endoscopic_response$p.value), x = 2, y = 118.5, size = 4, fontface = "bold")

vdz_uc_endoscopic_remission_gg <- ggplot(data = vdz_uc_included_endoscopic, aes(x = cdst_category, fill = endoscopic_remission_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(title = "Endoscopic remission") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold"),
        axis.text = element_text(face = "bold"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13)) +
  annotate(geom = "text", label = paste0(vdz_uc_cdst_tabulation_endoscopic_remission$percentage, "\n", "(", vdz_uc_cdst_tabulation_endoscopic_remission$achieved, "/", vdz_uc_cdst_tabulation_endoscopic_remission$total, ")"),
           x = 1:3, y = vdz_uc_cdst_tabulation_endoscopic_remission$achieved + 5,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_vdz_uc_endoscopic_remission$p.value), x = 2, y = 118.5, size = 4, fontface = "bold")

Combine figures in 4 panels

vdz_uc_bars_combined <- ggarrange(vdz_uc_clinical_response_gg, vdz_uc_clinical_remission_gg, vdz_uc_endoscopic_response_gg, vdz_uc_endoscopic_remission_gg, nrow = 2, ncol = 2)

annotate_figure(vdz_uc_bars_combined, bottom = text_grob("Clinical Decision Support Tool's response probability groups", face = "bold", vjust = 0.1))

UST-CD

ust_cd_clinical_response_gg <- ggplot(data = ust_cd_included, aes(x = cdst_category, fill = clinical_response_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Clinical response") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold", size = 13),
        axis.title.x = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13)) +
  annotate(geom = "text", label = paste0(ust_cd_cdst_tabulation_clinical_response$percentage, "\n", "(", ust_cd_cdst_tabulation_clinical_response$achieved, "/", ust_cd_cdst_tabulation_clinical_response$total, ")"),
           x = 1:3, y = ust_cd_cdst_tabulation_clinical_response$achieved + 5,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_ust_cd_clinical_response$p.value), x = 2, y = 118.5, size = 4, fontface = "bold")

ust_cd_clinical_remission_gg <- ggplot(data = ust_cd_included, aes(x = cdst_category, fill = clinical_remission_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Clinical remission") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13)) +
  annotate(geom = "text", label = paste0(ust_cd_cdst_tabulation_clinical_remission$percentage, "\n", "(", ust_cd_cdst_tabulation_clinical_remission$achieved, "/", ust_cd_cdst_tabulation_clinical_remission$total, ")"),
           x = 1:3, y = ust_cd_cdst_tabulation_clinical_remission$achieved + 5,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_ust_cd_clinical_remission$p.value), x = 2, y = 118.5, size = 4, fontface = "bold")

ust_cd_endoscopic_response_gg <- ggplot(data = ust_cd_included_endoscopic, aes(x = cdst_category, fill = endoscopic_response_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Endoscopic response") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold", size = 13),
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13),
        axis.text = element_text(face = "bold"),
        axis.title.x = element_blank(),
        legend.position = "none") +
  annotate(geom = "text", label = paste0(ust_cd_cdst_tabulation_endoscopic_response$percentage, "\n", "(", ust_cd_cdst_tabulation_endoscopic_response$achieved, "/", ust_cd_cdst_tabulation_endoscopic_response$total, ")"),
           x = 1:3, y = ust_cd_cdst_tabulation_endoscopic_response$achieved + 4,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_ust_cd_endoscopic_response$p.value), x = 2, y = 98.5, size = 4, fontface = "bold")

ust_cd_endoscopic_remission_gg <- ggplot(data = ust_cd_included_endoscopic, aes(x = cdst_category, fill = endoscopic_remission_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(title = "Endoscopic remission") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold"),
        axis.text = element_text(face = "bold"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13)) +
  annotate(geom = "text", label = paste0(ust_cd_cdst_tabulation_endoscopic_remission$percentage, "\n", "(", ust_cd_cdst_tabulation_endoscopic_remission$achieved, "/", ust_cd_cdst_tabulation_endoscopic_remission$total, ")"),
           x = 1:3, y = ust_cd_cdst_tabulation_endoscopic_remission$achieved + 4,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_ust_cd_endoscopic_remission$p.value), x = 2, y = 98.5, size = 4, fontface = "bold")

Combine them in 4 panels

ust_cd_bars_combined <- ggarrange(ust_cd_clinical_response_gg, ust_cd_clinical_remission_gg, ust_cd_endoscopic_response_gg, ust_cd_endoscopic_remission_gg, nrow = 2, ncol = 2)

annotate_figure(ust_cd_bars_combined, bottom = text_grob("Clinical Decision Support Tool's response probability groups", face = "bold", vjust = 0.1))

7. Calculate other discrimination metrics for effectiveness outcomes

VDZ-CD

vdz_cd_included %<>% mutate(cdst_category_high = cdst_category == "High")

vector_vdz_cd_discrimination <- vdz_cd_included %>%
  count(cdst_category_high, clinical_remission_cdst) %>%
  arrange(cdst_category_high, clinical_remission_cdst)

vector_vdz_cd_discrimination <- rev(vector_vdz_cd_discrimination$n)

epi_test_vdz_cd <- epi.tests(vector_vdz_cd_discrimination, method = "exact", digits = 2, 
                             conf.level = 0.95)

roc_vdz_cd <- roc(vdz_cd_included$clinical_remission_cdst, vdz_cd_included$cdst_points)

VDZ-UC

vdz_uc_included %<>% mutate(cdst_category_high = cdst_category == "High")

vector_vdz_uc_discrimination <- vdz_uc_included %>%
  count(cdst_category_high, clinical_remission_cdst) %>%
  arrange(cdst_category_high, clinical_remission_cdst)

vector_vdz_uc_discrimination <- rev(vector_vdz_uc_discrimination$n)

epi_test_vdz_uc <- epi.tests(vector_vdz_uc_discrimination, method = "exact", digits = 2, 
                             conf.level = 0.95)

roc_vdz_uc <- roc(vdz_uc_included$clinical_remission_cdst, vdz_uc_included$cdst_points)

UST-CD

ust_cd_included %<>% mutate(cdst_category_high = cdst_category == "High")

vector_ust_cd_discrimination <- ust_cd_included %>%
  count(cdst_category_high, clinical_remission_cdst) %>%
  arrange(cdst_category_high, clinical_remission_cdst)

vector_ust_cd_discrimination <- rev(vector_ust_cd_discrimination$n)

epi_test_ust_cd <- epi.tests(vector_ust_cd_discrimination, method = "exact", digits = 2, 
                             conf.level = 0.95)

Combine metrics

metrics_combined <- bind_rows("vdz-cd" = epi_test_vdz_cd$detail[epi_test_vdz_cd$detail[["statistic"]] %in% c("se", "sp", "lr.pos", "lr.neg"),],
                              "vdz-uc" = epi_test_vdz_uc$detail[epi_test_vdz_uc$detail[["statistic"]] %in% c("se", "sp", "lr.pos", "lr.neg"),],
                              "ust-cd" = epi_test_vdz_cd$detail[epi_test_vdz_cd$detail[["statistic"]] %in% c("se", "sp", "lr.pos", "lr.neg"),],
                              .id = "tool")

metrics_combined %<>%
  mutate(across(c(est, lower, upper), ~ if_else(statistic %in% c("se", "sp"), . * 100, .))) %>%
  mutate(across(c(est, lower, upper), 
                ~ if_else(statistic %in% c("se", "sp"), round(., 1), round(., 2))))  

metrics_combined %<>%
  mutate(across(where(is.numeric), ~ round(., 2))) %>%
  mutate(value = str_glue("{est} ({lower}-{upper})")) %>%
  select(-c(est, lower, upper))

metrics_combined %<>%
  pivot_wider(names_from = statistic, values_from = value)


roc_vdz_cd_ci <- glue("{round(ci(roc_vdz_cd)[2] * 100, 1)} ({round(ci(roc_vdz_cd)[1] * 100, 1)}-{round(ci(roc_vdz_cd)[3] * 100, 1)})")
roc_vdz_uc_ci <- glue("{round(ci(roc_vdz_uc)[2] * 100, 1)} ({round(ci(roc_vdz_uc)[1] * 100, 1)}-{round(ci(roc_vdz_uc)[3] * 100, 1)})")

metrics_combined <- cbind(metrics_combined, auc = c(roc_vdz_cd_ci, roc_vdz_uc_ci, "*"))

metrics_combined$tool <- c("VDZ in CD patients", "VDZ in UC patients", "UST in CD patients")

colnames(metrics_combined) <- c(
  "CDST",
  "Sensitivity (%) (95% CI)",
  "Specificity (%) (95% CI)",
  "PLR (95% CI)",
  "NLR (95% CI)",
  "AUC (%) (95% CI)"
)

metrics_combined %>%
  gt::gt() %>%
  gt::tab_style(
    style = gt::cell_text(align = "left"),
    locations = gt::cells_body(columns = c(CDST))
  ) %>%
  gt::tab_style(
    style = gt::cell_text(align = "center"),
    locations = gt::cells_body(columns = c(`Sensitivity (%) (95% CI)`, `Specificity (%) (95% CI)`, `PLR (95% CI)`, `NLR (95% CI)`, `AUC (%) (95% CI)`))
  ) %>%
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_labels(everything())
  )

CDST	Sensitivity (%) (95% CI)	Specificity (%) (95% CI)	PLR (95% CI)	NLR (95% CI)	AUC (%) (95% CI)
VDZ in CD patients	55.6 (40-70.4)	59.1 (52.6-65.5)	1.36 (1-1.84)	0.75 (0.53-1.06)	61.3 (53.2-69.4)
VDZ in UC patients	64.7 (50.1-77.6)	62.9 (55.1-70.2)	1.74 (1.31-2.31)	0.56 (0.38-0.83)	65 (56.3-73.7)
UST in CD patients	55.6 (40-70.4)	59.1 (52.6-65.5)	1.36 (1-1.84)	0.75 (0.53-1.06)	*

8. Get rates of therapy outcomes

outcomes_not_endoscopic <- c("clinical_response", "clinical_response_pga", "clinical_response_cdst", "clinical_remission", 
                             "clinical_remission_pga", "clinical_remission_cdst", "persistence", "persistence_without_escalation")

outcomes_endoscopic <- c("endoscopic_response", "endoscopic_response_cdst", "endoscopic_remission", "endoscopic_remission_cdst")



vdz_cd_outcomes_not_endoscopic <- outcome_pct(vdz_cd_included, outcomes_not_endoscopic)

vdz_cd_outcomes_endoscopic <- outcome_pct(vdz_cd_included_endoscopic, outcomes_endoscopic)

vdz_cd_outcomes <- bind_rows(vdz_cd_outcomes_endoscopic, vdz_cd_outcomes_not_endoscopic)



vdz_uc_outcomes_not_endoscopic <- outcome_pct(vdz_uc_included, outcomes_not_endoscopic)

vdz_uc_outcomes_endoscopic <- outcome_pct(vdz_uc_included_endoscopic, outcomes_endoscopic)

vdz_uc_outcomes <- bind_rows(vdz_uc_outcomes_endoscopic, vdz_uc_outcomes_not_endoscopic)



ust_cd_outcomes_not_endoscopic <- outcome_pct(ust_cd_included, outcomes_not_endoscopic)

ust_cd_outcomes_endoscopic <- outcome_pct(ust_cd_included_endoscopic, outcomes_endoscopic)

ust_cd_outcomes <- bind_rows(ust_cd_outcomes_endoscopic, ust_cd_outcomes_not_endoscopic)


combined_outcomes <- vdz_cd_outcomes %>% select(outcome, vdz_cd = report) %>%
  bind_cols(vdz_uc_outcomes %>% select(vdz_uc = report),
            ust_cd_outcomes %>% select(ust_cd = report)) %>%
  mutate(outcome = str_replace_all(outcome, "_", " "),        
         outcome = str_to_sentence(outcome),                     
         outcome = str_replace(outcome, "cdst", "without escalation"))




combined_outcomes %>%
  gt::gt() %>%
  gt::cols_label(
    outcome = "Outcome",
    vdz_cd = "VDZ-CD",
    vdz_uc = "VDZ-UC",
    ust_cd = "UST-CD"
  ) %>%
  gt::tab_style(
    style = gt::cell_text(align = "left"),
    locations = gt::cells_body(columns = outcome)
  ) %>%
  gt::tab_style(
    style = gt::cell_text(align = "center"),
    locations = gt::cells_body(columns = c(vdz_cd, vdz_uc, ust_cd))
  ) %>%
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_labels(everything())
  )

Outcome	VDZ-CD	VDZ-UC	UST-CD
Endoscopic response	123/213 (57.7%)	118/217 (54.4%)	89/166 (53.6%)
Endoscopic response without escalation	121/213 (56.8%)	118/217 (54.4%)	88/166 (53%)
Endoscopic remission	64/213 (30%)	74/217 (34.1%)	24/166 (14.5%)
Endoscopic remission without escalation	63/213 (29.6%)	74/217 (34.1%)	23/166 (13.9%)
Clinical response	185/280 (66.1%)	142/218 (65.1%)	116/194 (59.8%)
Clinical response without escalation	178/280 (63.6%)	142/218 (65.1%)	114/194 (58.8%)
Clinical remission	45/280 (16.1%)	51/218 (23.4%)	31/194 (16%)
Clinical remission without escalation	45/280 (16.1%)	51/218 (23.4%)	31/194 (16%)

9. Calculate & visualize discrimination for trough levels

Load trough data and get it in shape for analysis

load("input/VDZ_UST_troughs.RData")

UST_troughs_induction <- UST_troughs %>% filter(interval == 16)
VDZ_troughs_induction <- VDZ_troughs %>% filter(between(interval, 13, 15))

cdst_vdz_uc_troughs <- power_inner_join(vdz_uc_included %>% select_keys_and(cdst_category),
                                        VDZ_troughs_induction %>% select_keys_and(trough),
                                        by = "patient_id")
cdst_vdz_uc_troughs %>% count(cdst_category)

cdst_category	n
Low	6
Intermediate	54
High	45

cdst_vdz_uc_troughs %<>% group_by(cdst_category) %>% mutate(median = round(median(trough),1), total = n()) %>% ungroup()


cdst_vdz_cd_troughs <- power_inner_join(vdz_cd_included %>% select_keys_and(cdst_category),
                                        VDZ_troughs_induction %>% select_keys_and(trough),
                                        by = "patient_id")
cdst_vdz_cd_troughs %>% count(cdst_category)

cdst_category	n
Low	11
Intermediate	80
High	55

cdst_vdz_cd_troughs %<>% group_by(cdst_category) %>% mutate(median = round(median(trough),1), total = n()) %>% ungroup()


cdst_ust_cd_troughs <- power_inner_join(ust_cd_included %>% select_keys_and(cdst_category),
                                        VDZ_troughs_induction %>% select_keys_and(trough),
                                        by = "patient_id")
cdst_ust_cd_troughs %>% count(cdst_category)

cdst_category	n
Low	5
Intermediate	24
High	18

cdst_ust_cd_troughs %<>% group_by(cdst_category) %>% mutate(median = round(median(trough),1), total = n()) %>% ungroup()

vdz_cd_troughs_medians <- cdst_vdz_cd_troughs %>%
  distinct(cdst_category, median, total)

vdz_uc_troughs_medians <- cdst_vdz_uc_troughs %>%
  distinct(cdst_category, median, total)

ust_cd_troughs_medians <- cdst_ust_cd_troughs %>%
  distinct(cdst_category, median, total)

Test for significance

kruskal_vdz_cd_troughs <- kruskal.test(trough ~ cdst_category, data = cdst_vdz_cd_troughs)
kruskal_vdz_uc_troughs <- kruskal.test(trough ~ cdst_category, data = cdst_vdz_uc_troughs)
kruskal_ust_cd_troughs <- kruskal.test(trough ~ cdst_category, data = cdst_ust_cd_troughs)

Make box plots per cdst category

vdz_cd_troughs_gg <- ggplot(data = cdst_vdz_cd_troughs, aes(x = cdst_category, y = trough)) +
  geom_boxplot(width = 0.35, outlier.shape = NA, coef = FALSE)+
  geom_jitter(size = 0.4) +
  geom_text(data = vdz_cd_troughs_medians, aes(label = str_c("median = ", median, " \n(n", "=", total, ")"), y = -0.5), size = 3, fontface = "bold") +
  coord_cartesian(expand = 1.5, ylim = c(0, 70)) +
  scale_y_continuous(breaks = seq(0, 70, 10)) +
  theme(axis.ticks.x = element_blank(),
        panel.background = element_blank(),
        axis.line.y = element_line(),
        axis.title = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "None") + 
  annotate(geom = "text", label = format_p_value(kruskal_vdz_cd_troughs$p.value), x = 2, y = 68, size = 4, fontface = "bold")

vdz_uc_troughs_gg <- ggplot(data = cdst_vdz_uc_troughs, aes(x = cdst_category, y = trough)) +
  geom_boxplot(width = 0.35, outlier.shape = NA, coef = FALSE)+
  geom_jitter(size = 0.4) +
  geom_text(data = vdz_uc_troughs_medians, aes(label = str_c("median = ", median, " \n(n", "=", total, ")"), y = -0.5), size = 3, fontface = "bold") +
  coord_cartesian(expand = 1.5, ylim = c(0, 41)) +
  scale_y_continuous(breaks = seq(0, 41, 10)) +
  theme(axis.ticks.x = element_blank(),
        panel.background = element_blank(),
        axis.line.y = element_line(),
        axis.title = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "None") +
  annotate(geom = "text", label = format_p_value(kruskal_vdz_uc_troughs$p.value), x = 2, y = 40, size = 4, fontface = "bold")

ust_cd_troughs_gg <- ggplot(data = cdst_ust_cd_troughs, aes(x = cdst_category, y = trough)) +
  geom_boxplot(width = 0.35, outlier.shape = NA, coef = FALSE)+
  geom_jitter(size = 0.4) +
  geom_text(data = ust_cd_troughs_medians, aes(label = str_c("median = ", median, " \n(n", "=", total, ")"), y = -0.5), size = 3, fontface = "bold") +
  coord_cartesian(expand = 1.5, ylim = c(0, 50)) +
  scale_y_continuous(breaks = seq(0, 50, 10)) +
  theme(axis.ticks.x = element_blank(),
        panel.background = element_blank(),
        axis.line.y = element_line(),
        axis.title = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "None") +
  annotate(geom = "text", label = format_p_value(kruskal_ust_cd_troughs$p.value), x = 2, y = 48.8, size = 4, fontface = "bold")

Combine the plots

troughs_combined <- ggarrange(vdz_cd_troughs_gg, vdz_uc_troughs_gg, ust_cd_troughs_gg, labels = c("A", "B", "C"), nrow = 1, ncol = 3,
                              label.x = c(-0.05, -0.05, -0.05))


annotate_figure(troughs_combined, bottom = text_grob("Clinical Decision Support Tool's response probability groups", face = "bold", vjust = 0.1),
                left = text_grob("Post-induction trough levels (ug/mL)", face = "bold", vjust = 0.4, rot = 90))

10. Calculate & visualize discrimination for dose optimization

VDZ-CD

vdz_cd_included %<>% mutate(dose_optimisation = recode(dose_optimisation,
                                                       "0" = FALSE,
                                                       "1" = TRUE))

vdz_cd_included %>% count(cdst_category, dose_optimisation)

cdst_category	dose_optimisation	n
Low	FALSE	7
Low	TRUE	6
Intermediate	FALSE	88
Intermediate	TRUE	58
High	FALSE	82
High	TRUE	39

chisq_vdz_cd_optimisation <- chisq.test(vdz_cd_included$cdst_category, vdz_cd_included$dose_optimisation)

vdz_cd_cdst_tabulation_optimisation <- vdz_cd_included %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            optimized = sum(dose_optimisation),
            percentage =scales::percent(optimized/total))

vdz_cd_optimisation_gg <- ggplot(data = vdz_cd_included, aes(x = cdst_category, fill = dose_optimisation)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 160), breaks = seq(0, 160, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Crohn’s disease") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold"),
        axis.title.x = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13)) +
  annotate(geom = "text", label = paste0(vdz_cd_cdst_tabulation_optimisation$percentage, "\n", "(", vdz_cd_cdst_tabulation_optimisation$optimized, "/", vdz_cd_cdst_tabulation_optimisation$total, ")"),
           x = 1:3, y = vdz_cd_cdst_tabulation_optimisation$optimized + 4.5,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_vdz_cd_optimisation$p.value), x = 2, y = 158, size = 4, fontface = "bold")

VDZ-UC

vdz_uc_included %<>% mutate(dose_optimisation = recode(dose_optimisation,
                                                       "0" = FALSE,
                                                       "1" = TRUE))

vdz_uc_included %>% count(cdst_category, dose_optimisation)

cdst_category	dose_optimisation	n
Low	FALSE	9
Low	TRUE	9
Intermediate	FALSE	62
Intermediate	TRUE	43
High	FALSE	66
High	TRUE	29

chisq_vdz_uc_optimisation <- chisq.test(vdz_uc_included$cdst_category, vdz_uc_included$dose_optimisation)

vdz_uc_cdst_tabulation_optimisation <- vdz_uc_included %>%
  group_by(cdst_category) %>%
  summarise(total = n(),
            optimized = sum(dose_optimisation),
            percentage =scales::percent(optimized/total))

vdz_uc_optimisation_gg <- ggplot(data = vdz_uc_included, aes(x = cdst_category, fill = dose_optimisation)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(title = "Ulcerative Colitis") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13)) +
  annotate(geom = "text", label = paste0(vdz_uc_cdst_tabulation_optimisation$percentage, "\n", "(", vdz_uc_cdst_tabulation_optimisation$optimized, "/", vdz_uc_cdst_tabulation_optimisation$total, ")"),
           x = 1:3, y = vdz_uc_cdst_tabulation_optimisation$optimized + 3.3,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_vdz_uc_optimisation$p.value), x = 2, y = 118.4, size = 4, fontface = "bold")

Combine plots in 2 panels

vdz_uc_optimisation_combined <- ggarrange(vdz_cd_optimisation_gg, vdz_uc_optimisation_gg, nrow = 1, ncol = 2)


annotate_figure(vdz_uc_optimisation_combined, bottom = text_grob("Clinical Decision Support Tool's response probability groups", face = "bold", vjust = 0.1))

11. Assess specificity of the VDZ-CD tool in UST-CD cohort

Prepare VDZ-CD tool variables

ust_cd_included %<>% mutate(no_previous_antitnf = if_else(previous_antitnf == 1, 0, 1), .after = previous_antitnf)

is.numeric(ust_cd_included$no_previous_antitnf)

[1] TRUE

ust_cd_included %<>% mutate(no_surgery_before = if_else(intestinal_resections_before == 1, 0, 1), .after = intestinal_resections_before)

is.numeric(ust_cd_included$no_surgery_before)

[1] TRUE

ust_cd_included %<>% mutate(no_fistulizing_before = if_else(fistulizing_disease_prior == 1, 0, 1), .after = fistulizing_disease_prior)

is.numeric(ust_cd_included$no_fistulizing_before)

[1] TRUE

is.numeric(ust_cd_included$albumin_g_L)

[1] TRUE

is.numeric(ust_cd_included$crp_mg_L)

[1] TRUE

Calculate CDST points for the vdz tool

ust_cd_included %<>% mutate(cdst_points_vdz = 2*no_surgery_before + 3*no_previous_antitnf + 2*no_fistulizing_before + 0.4*albumin_g_L - case_when(crp_mg_L >= 3 & crp_mg_L <= 10 ~ 0.5,
                                                                                                                                                  crp_mg_L > 10 ~ 3,
                                                                                                                                                  .default = 0))

Assign CDST categories for the vdz tool

ust_cd_included %<>% mutate(cdst_category_vdz = case_when(cdst_points_vdz <= 13 ~ "Low",
                                                          cdst_points_vdz > 13 & cdst_points_vdz <= 19 ~ "Intermediate",
                                                          cdst_points_vdz > 19 ~ "High") 
)

ust_cd_included %>% count(cdst_category_vdz)

cdst_category_vdz	n
High	67
Intermediate	114
Low	13

ust_cd_included$cdst_category_vdz <- factor(ust_cd_included$cdst_category_vdz, levels = c("Low", "Intermediate", "High"), ordered = TRUE)

Get agreement percentage between vdz and ust tools

ust_cd_included %>%
  summarise(agreement = round(sum(cdst_category == cdst_category_vdz)/n()*100, 1))

agreement
73.7

ust_cd_included %>%
  filter(clinical_remission_cdst == 1) %>%
  summarise(agreement = round(sum(cdst_category == cdst_category_vdz)/n()*100, 1))

agreement
77.4

ust_cd_included %>%
  filter(clinical_response_cdst == 0) %>%
  summarise(agreement = round(sum(cdst_category == cdst_category_vdz)/n()*100, 1))

agreement
73.8

Calculate discrimination percentages for: Clinica response

chisq_ust_cd_clinical_response_vdz <- chisq.test(ust_cd_included$cdst_category_vdz, ust_cd_included$clinical_response_cdst)

ust_cd_cdst_tabulation_clinical_response_vdz <- ust_cd_included %>%
  group_by(cdst_category_vdz) %>%
  summarise(total = n(),
            achieved = sum(clinical_response_cdst),
            percentage =scales::percent(achieved/total))

Clinica remission

chisq_ust_cd_clinical_remission_vdz <- chisq.test(ust_cd_included$cdst_category_vdz, ust_cd_included$clinical_remission_cdst)

ust_cd_cdst_tabulation_clinical_remission_vdz <- ust_cd_included %>%
  group_by(cdst_category_vdz) %>%
  summarise(total = n(),
            achieved = sum(clinical_remission_cdst),
            percentage =scales::percent(achieved/total))

Endoscopic response

ust_cd_included_endoscopic <- power_left_join(ust_cd_included_endoscopic,
                                              ust_cd_included %>% select_keys_and(cdst_category_vdz),
                                              by = "patient_id")

chisq_ust_cd_endoscopic_response_vdz <- chisq.test(ust_cd_included_endoscopic$cdst_category_vdz, ust_cd_included_endoscopic$endoscopic_response_cdst)

ust_cd_cdst_tabulation_endoscopic_response_vdz <- ust_cd_included_endoscopic %>%
  group_by(cdst_category_vdz) %>%
  summarise(total = n(),
            achieved = sum(endoscopic_response_cdst),
            percentage =scales::percent(achieved/total))

Endoscopic remission

chisq_ust_cd_endoscopic_remission_vdz <- chisq.test(ust_cd_included_endoscopic$cdst_category_vdz, ust_cd_included_endoscopic$endoscopic_remission_cdst)

ust_cd_cdst_tabulation_endoscopic_remission_vdz <- ust_cd_included_endoscopic %>%
  group_by(cdst_category_vdz) %>%
  summarise(total = n(),
            achieved = sum(endoscopic_remission_cdst),
            percentage =scales::percent(achieved/total))

Visualize discrimination bars

ust_cd_clinical_response_vdz_gg <- ggplot(data = ust_cd_included, aes(x = cdst_category_vdz, fill = clinical_response_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 140), breaks = seq(0, 140, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Clinical response") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold", size = 13),
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13),
        axis.title.x = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none") +
  annotate(geom = "text", label = paste0(ust_cd_cdst_tabulation_clinical_response_vdz$percentage, "\n", "(", ust_cd_cdst_tabulation_clinical_response_vdz$achieved, "/", ust_cd_cdst_tabulation_clinical_response_vdz$total, ")"),
           x = 1:3, y = ust_cd_cdst_tabulation_clinical_response_vdz$achieved + 6,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_ust_cd_clinical_response_vdz$p.value), x = 2, y = 138.5, size = 4, fontface = "bold")

ust_cd_clinical_remission_vdz_gg <- ggplot(data = ust_cd_included, aes(x = cdst_category_vdz, fill = clinical_remission_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 140), breaks = seq(0, 140, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(title = "Clinical remission") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold", size = 13),
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none") +
  annotate(geom = "text", label = paste0(ust_cd_cdst_tabulation_clinical_remission_vdz$percentage, "\n", "(", ust_cd_cdst_tabulation_clinical_remission_vdz$achieved, "/", ust_cd_cdst_tabulation_clinical_remission_vdz$total, ")"),
           x = 1:3, y = ust_cd_cdst_tabulation_clinical_remission_vdz$achieved + 6,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_ust_cd_clinical_remission_vdz$p.value), x = 2, y = 138.5, size = 4, fontface = "bold")

ust_cd_endoscopic_response_vdz_gg <- ggplot(data = ust_cd_included_endoscopic, aes(x = cdst_category_vdz, fill = endoscopic_response_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(y = "Number of patients", title = "Endoscopic response") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold", size = 13),
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13),
        axis.title.x = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none") +
  annotate(geom = "text", label = paste0(ust_cd_cdst_tabulation_endoscopic_response_vdz$percentage, "\n", "(", ust_cd_cdst_tabulation_endoscopic_response_vdz$achieved, "/", ust_cd_cdst_tabulation_endoscopic_response_vdz$total, ")"),
           x = 1:3, y = ust_cd_cdst_tabulation_endoscopic_response_vdz$achieved + 5.3,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_ust_cd_endoscopic_response_vdz$p.value), x = 2, y = 118.5, size = 4, fontface = "bold")

ust_cd_endoscopic_remission_vdz_gg <- ggplot(data = ust_cd_included_endoscopic, aes(x = cdst_category_vdz, fill = endoscopic_remission_cdst)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
  scale_fill_manual(values = c("#D55E00", "#999999"), labels = c("yes", "no"), breaks = c(TRUE, FALSE)) + 
  coord_cartesian(expand = 0) +
  labs(title = "Endoscopic remission") +
  theme(axis.ticks = element_blank(),
        panel.background = element_blank(),
        axis.title = element_text(face = "bold", size = 13),
        plot.title = element_text(hjust = 0.5, face = "bold", size = 13),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text = element_text(face = "bold"),
        legend.position = "none") +
  annotate(geom = "text", label = paste0(ust_cd_cdst_tabulation_endoscopic_remission_vdz$percentage, "\n", "(", ust_cd_cdst_tabulation_endoscopic_remission_vdz$achieved, "/", ust_cd_cdst_tabulation_endoscopic_remission_vdz$total, ")"),
           x = 1:3, y = ust_cd_cdst_tabulation_endoscopic_remission_vdz$achieved + 5.3,
           size = 3, fontface = "bold") + 
  annotate(geom = "text", label = format_p_value(chisq_ust_cd_endoscopic_remission_vdz$p.value), x = 2, y = 118.5, size = 4, fontface = "bold")

Combine plots in 4 panels

ust_cd_vdz_bars_combined <- ggarrange(ust_cd_clinical_response_vdz_gg, ust_cd_clinical_remission_vdz_gg, ust_cd_endoscopic_response_vdz_gg, ust_cd_endoscopic_remission_vdz_gg, nrow = 2, ncol = 2)


annotate_figure(ust_cd_vdz_bars_combined, bottom = text_grob("Clinical Decision Support Tool's response probability groups", face = "bold", vjust = 0.1))

12. Evaluate CDST calibration

Calculate logits and convert them to probabilities

vdz_cd_included %<>%
  mutate(logit = -3.0722 +  0.2305*no_surgery_before + 0.3483*no_previous_antitnf + 0.1979*no_fistulizing_before + 0.0436*albumin_g_L - 0.0098*crp_mg_L,
         prob = logit2prob(logit))

vdz_uc_included %<>%
  mutate(logit = -3.7038 + 0.2622*dxduration_above_two + 0.2820*no_previous_antitnf + 0.1847*moderate_endo_activity + 0.0647*albumin_g_L,
         prob = logit2prob(logit))

Define a clinically-relevant outcome: persistence & persistence without escalation

vdz_cd_included %<>% mutate(persistence_without_escalation = stop_current_drug_itt == 0 & dose_optimisation == 0)
vdz_uc_included %<>% mutate(persistence_without_escalation = stop_current_drug_itt == 0 & dose_optimisation == 0)
ust_cd_included %<>% mutate(persistence_without_escalation = stop_current_drug_itt == 0 & dose_optimisation == 0)

vdz_cd_included %<>% mutate(persistence = stop_current_drug_itt == 0)
vdz_uc_included %<>% mutate(persistence = stop_current_drug_itt == 0)
ust_cd_included %<>% mutate(persistence = stop_current_drug_itt == 0)

Visualize calibration plots

par(mfrow=c(3,2))

rms::val.prob(vdz_cd_included$prob, vdz_cd_included$clinical_remission_cdst, statloc = F, logistic.cal = F, g = 10)

            Dxy         C (ROC)              R2               D        D:Chi-sq 
  0.17862884161   0.58931442080  -0.08314444580  -0.05113826943 -13.31871543951 
            D:p               U        U:Chi-sq             U:p               Q 
             NA   0.05932023319  18.60966529419   0.00009098348  -0.11045850262 
          Brier       Intercept           Slope            Emax             E90 
  0.14476423642  -0.65070440020   1.01127074257   0.16792792579   0.16166755327 
           Eavg             S:z             S:p 
  0.10881941218  -3.82536680788   0.00013057752

mtext('A', adj = 0, line = 1, font = 2)

rms::val.prob(vdz_uc_included$prob, vdz_uc_included$clinical_remission_cdst, statloc = F, logistic.cal = F, g = 10)

          Dxy       C (ROC)            R2             D      D:Chi-sq 
 0.3035106258  0.6517553129 -0.0616547022 -0.0446573598 -8.7353044358 
          D:p             U      U:Chi-sq           U:p             Q 
           NA  0.0567842995 14.3789772935  0.0007544748 -0.1014416593 
        Brier     Intercept         Slope          Emax           E90 
 0.1867562420 -0.5028711530  1.1430083801  0.4119663381  0.1703225522 
         Eavg           S:z           S:p 
 0.1209726878 -3.4821176169  0.0004974651

mtext('B', adj = 0, line = 1, font = 2)

rms::val.prob(vdz_cd_included$prob, vdz_cd_included$persistence_without_escalation, statloc = F, logistic.cal = F, g = 10)

         Dxy      C (ROC)           R2            D     D:Chi-sq          D:p 
 0.357506793  0.678753397  0.068796680  0.047490031 14.297208726           NA 
           U     U:Chi-sq          U:p            Q        Brier    Intercept 
 0.042172491 13.808297370  0.001003613  0.005317541  0.215184305  1.270721589 
       Slope         Emax          E90         Eavg          S:z          S:p 
 1.928436064  0.319500179  0.189713594  0.090107399  2.020589697  0.043322256

mtext('C', adj = 0, line = 1, font = 2)

rms::val.prob(vdz_uc_included$prob, vdz_uc_included$persistence_without_escalation, statloc = F, logistic.cal = F, g = 10)

        Dxy     C (ROC)          R2           D    D:Chi-sq         D:p 
 0.31491549  0.65745774  0.07109483  0.04712249 11.27270173          NA 
          U    U:Chi-sq         U:p           Q       Brier   Intercept 
 0.01502521  5.27549670  0.07152213  0.03209727  0.20177544  0.30622051 
      Slope        Emax         E90        Eavg         S:z         S:p 
 1.89496508  0.18284498  0.10107597  0.06405022 -1.99595232  0.04593911

mtext('D', adj = 0, line = 1, font = 2)

rms::val.prob(vdz_cd_included$prob, vdz_cd_included$persistence, statloc = F, logistic.cal = F, g = 10)

                       Dxy                    C (ROC) 
  0.3067973055725658482196   0.6533986527862829518654 
                        R2                          D 
 -0.2791177371817529229148  -0.1936330041949115565725 
                  D:Chi-sq                        D:p 
-53.2172411745752356182493                         NA 
                         U                   U:Chi-sq 
  0.2710559208947198595041  77.8956578505215588847932 
                       U:p                          Q 
  0.0000000000000000000000  -0.4646889250896314438322 
                     Brier                  Intercept 
  0.2920824410165137607898   1.7383988222184387772984 
                     Slope                       Emax 
  1.6477130652951839095977   0.3993827373260716573355 
                       E90                       Eavg 
  0.3285369249726436402526   0.2405269877686071477996 
                       S:z                        S:p 
  8.4043262035508128349193   0.0000000000000000430325

mtext('E', adj = 0, line = 1, font = 2)

rms::val.prob(vdz_uc_included$prob, vdz_uc_included$persistence, statloc = F, logistic.cal = F, g = 10)

        Dxy     C (ROC)          R2           D    D:Chi-sq         D:p 
 0.26418485  0.63209243  0.02392832  0.01342047  3.92566262          NA 
          U    U:Chi-sq         U:p           Q       Brier   Intercept 
 0.02648111  7.77288151  0.02051825 -0.01306064  0.24170389  0.61371546 
      Slope        Emax         E90        Eavg         S:z         S:p 
 1.40477097  0.16086479  0.14688310  0.08653667  1.95501037  0.05058185

mtext('F', adj = 0, line = 1, font = 2)

13. Decision curve analysis

Plot the curves

For clinical remission

vdz_cd_dca_plot_remission <- dca(clinical_remission_cdst ~ prob,
                       data = vdz_cd_included,
                       thresholds = seq(0, 0.5, 0.01),
                       label = list(prob = "Clinical Decision Support Tool")) %>%
  plot(smooth = FALSE, style = "bw") + theme(panel.grid = element_blank()) +
  scale_x_continuous(breaks = seq(0, 0.5, 0.05), labels = scales::percent_format(accuracy = 1)) +
  scale_y_continuous(breaks = seq(0, 0.7, 0.1))

vdz_uc_dca_plot_remission <- dca(clinical_remission_cdst ~ prob,
                       data = vdz_uc_included,
                       thresholds = seq(0, 0.55, 0.01),
                       label = list(prob = "Clinical Decision Support Tool")) %>%
  plot(smooth = FALSE, style = "bw") + theme(panel.grid = element_blank()) +
  scale_x_continuous(breaks = seq(0, 0.55, 0.05), labels = scales::percent_format(accuracy = 1)) +
  scale_y_continuous(breaks = seq(0, 0.6, 0.1))

For persistence without escalation

vdz_cd_dca_plot_persistence_without_escalation <- dca(persistence_without_escalation ~ prob,
                                 data = vdz_cd_included,
                                 thresholds = seq(0, 0.5, 0.01),
                                 label = list(prob = "Clinical Decision Support Tool")) %>%
  plot(smooth = FALSE, style = "bw") + theme(panel.grid = element_blank()) +
  scale_x_continuous(breaks = seq(0, 0.5, 0.05), labels = scales::percent_format(accuracy = 1)) +
  scale_y_continuous(breaks = seq(0, 0.7, 0.1))

vdz_uc_dca_plot_persistence_without_escalation <- dca(persistence_without_escalation ~ prob,
                                 data = vdz_uc_included,
                                 thresholds = seq(0, 0.55, 0.01),
                                 label = list(prob = "Clinical Decision Support Tool")) %>%
  plot(smooth = FALSE, style = "bw") + theme(panel.grid = element_blank()) +
  scale_x_continuous(breaks = seq(0, 0.55, 0.05), labels = scales::percent_format(accuracy = 1)) +
  scale_y_continuous(breaks = seq(0, 0.6, 0.1))

For persistence

vdz_cd_dca_plot_persistence <- dca(persistence ~ prob,
                                                      data = vdz_cd_included,
                                                      thresholds = seq(0, 0.5, 0.01),
                                                      label = list(prob = "Clinical Decision Support Tool")) %>%
  plot(smooth = FALSE, style = "bw") + theme(panel.grid = element_blank()) +
  scale_x_continuous(breaks = seq(0, 0.5, 0.05), labels = scales::percent_format(accuracy = 1)) +
  scale_y_continuous(breaks = seq(0, 0.7, 0.1))

vdz_uc_dca_plot_persistence <- dca(persistence ~ prob,
                                                      data = vdz_uc_included,
                                                      thresholds = seq(0, 0.55, 0.01),
                                                      label = list(prob = "Clinical Decision Support Tool")) %>%
  plot(smooth = FALSE, style = "bw") + theme(panel.grid = element_blank()) +
  scale_x_continuous(breaks = seq(0, 0.55, 0.05), labels = scales::percent_format(accuracy = 1)) +
  scale_y_continuous(breaks = seq(0, 0.6, 0.1))

Combine the plots

ggarrange(vdz_cd_dca_plot_remission, vdz_uc_dca_plot_remission, 
          vdz_cd_dca_plot_persistence_without_escalation, vdz_uc_dca_plot_persistence_without_escalation,
          vdz_cd_dca_plot_persistence, vdz_uc_dca_plot_persistence,
          labels = c("A", "B", "C", "D", "E", "F"), label.y = c(1.012, 1.012, 1.012, 1.012, 1.012, 1.012), nrow = 3, ncol = 2,
          common.legend = TRUE, legend="bottom")

Make net benefit tables

For clinical remission

vdz_cd_dca_tbl_remission <- dca(clinical_remission_cdst ~ prob,
                                data = vdz_cd_included,
                                thresholds = seq(0.1, 0.5, 0.1),
                                label = list(prob = "Clinical Decision Support Tool")) %>%
  as_tibble() %>%
  select(label, threshold, net_benefit) 

vdz_uc_dca_tbl_remission <- dca(clinical_remission_cdst ~ prob,
                                data = vdz_uc_included,
                                thresholds = seq(0.1, 0.5, 0.1),
                                label = list(prob = "Clinical Decision Support Tool")) %>%
  as_tibble() %>%
  select(label, threshold, net_benefit)


combined_dca_tbl_remission <- bind_cols(
  vdz_cd_dca_tbl_remission %>%
    rename_with(~ paste0("cd_", .)),
  vdz_uc_dca_tbl_remission %>%
    rename_with(~ paste0("uc_", .))
)

combined_dca_tbl_remission %>%
  gt::gt() %>%
  gt::cols_label(
    cd_label = "Strategy",
    cd_threshold = "Decision Threshold",
    cd_net_benefit = "Net Benefit",
    uc_label = "Strategy",
    uc_threshold = "Decision Threshold",
    uc_net_benefit = "Net Benefit"
  ) %>%
  gt::fmt_percent(
    columns = c(cd_threshold, uc_threshold),
    decimals = 0
  ) %>%
  gt::fmt(
    columns = c(cd_net_benefit, uc_net_benefit),
    fns = function(x) ifelse(x == 0, "0", formatC(x, format = "f", digits = 3))
  ) %>%
  gt::tab_spanner(
    label = "Crohn's Disease",
    columns = c(cd_label, cd_threshold, cd_net_benefit)
  ) %>%
  gt::tab_spanner(
    label = "Ulcerative Colitis",
    columns = c(uc_label, uc_threshold, uc_net_benefit)
  ) %>%
  gt::cols_align("left", columns = everything()) %>%
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_spanners(spanners = c("Crohn's Disease", "Ulcerative Colitis"))
  ) %>%
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_labels(columns = everything())
  )

Crohn's Disease			Ulcerative Colitis
Strategy	Decision Threshold	Net Benefit	Strategy	Decision Threshold	Net Benefit
Treat All	10%	0.067	Treat All	10%	0.149
Treat All	20%	-0.049	Treat All	20%	0.042
Treat All	30%	-0.199	Treat All	30%	-0.094
Treat All	40%	-0.399	Treat All	40%	-0.277
Treat All	50%	-0.679	Treat All	50%	-0.532
Treat None	10%	0	Treat None	10%	0
Treat None	20%	0	Treat None	20%	0
Treat None	30%	0	Treat None	30%	0
Treat None	40%	0	Treat None	40%	0
Treat None	50%	0	Treat None	50%	0
Clinical Decision Support Tool	10%	0.069	Clinical Decision Support Tool	10%	0.144
Clinical Decision Support Tool	20%	-0.019	Clinical Decision Support Tool	20%	0.039
Clinical Decision Support Tool	30%	-0.034	Clinical Decision Support Tool	30%	-0.042
Clinical Decision Support Tool	40%	-0.005	Clinical Decision Support Tool	40%	-0.028
Clinical Decision Support Tool	50%	0	Clinical Decision Support Tool	50%	-0.009

For persistence without escalation

vdz_cd_dca_tbl_persistence_without_escalation <- dca(persistence_without_escalation ~ prob,
                                                     data = vdz_cd_included,
                                                     thresholds = seq(0.1, 0.5, 0.1),
                                                     label = list(prob = "Clinical Decision Support Tool")) %>%
  as_tibble() %>%
  select(label, threshold, net_benefit) 

vdz_uc_dca_tbl_persistence_without_escalation <- dca(persistence_without_escalation ~ prob,
                                                     data = vdz_uc_included,
                                                     thresholds = seq(0.1, 0.5, 0.1),
                                                     label = list(prob = "Clinical Decision Support Tool")) %>%
  as_tibble() %>%
  select(label, threshold, net_benefit)


combined_dca_tbl_persistence_without_escalation <- bind_cols(
  vdz_cd_dca_tbl_persistence_without_escalation %>%
    rename_with(~ paste0("cd_", .)),
  vdz_uc_dca_tbl_persistence_without_escalation %>%
    rename_with(~ paste0("uc_", .))
)

combined_dca_tbl_persistence_without_escalation %>%
  gt::gt() %>%
  gt::cols_label(
    cd_label = "Strategy",
    cd_threshold = "Decision Threshold",
    cd_net_benefit = "Net Benefit",
    uc_label = "Strategy",
    uc_threshold = "Decision Threshold",
    uc_net_benefit = "Net Benefit"
  ) %>%
  gt::fmt_percent(
    columns = c(cd_threshold, uc_threshold),
    decimals = 0
  ) %>%
  gt::fmt(
    columns = c(cd_net_benefit, uc_net_benefit),
    fns = function(x) ifelse(x == 0, "0", formatC(x, format = "f", digits = 3))
  ) %>%
  gt::tab_spanner(
    label = "Crohn's Disease",
    columns = c(cd_label, cd_threshold, cd_net_benefit)
  ) %>%
  gt::tab_spanner(
    label = "Ulcerative Colitis",
    columns = c(uc_label, uc_threshold, uc_net_benefit)
  ) %>%
  gt::cols_align("left", columns = everything()) %>%
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_spanners(spanners = c("Crohn's Disease", "Ulcerative Colitis"))
  ) %>%
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_labels(columns = everything())
  )

Crohn's Disease			Ulcerative Colitis
Strategy	Decision Threshold	Net Benefit	Strategy	Decision Threshold	Net Benefit
Treat All	10%	0.270	Treat All	10%	0.230
Treat All	20%	0.179	Treat All	20%	0.134
Treat All	30%	0.061	Treat All	30%	0.010
Treat All	40%	-0.095	Treat All	40%	-0.154
Treat All	50%	-0.314	Treat All	50%	-0.385
Treat None	10%	0	Treat None	10%	0
Treat None	20%	0	Treat None	20%	0
Treat None	30%	0	Treat None	30%	0
Treat None	40%	0	Treat None	40%	0
Treat None	50%	0	Treat None	50%	0
Clinical Decision Support Tool	10%	0.272	Clinical Decision Support Tool	10%	0.231
Clinical Decision Support Tool	20%	0.204	Clinical Decision Support Tool	20%	0.142
Clinical Decision Support Tool	30%	0.088	Clinical Decision Support Tool	30%	0.050
Clinical Decision Support Tool	40%	0.019	Clinical Decision Support Tool	40%	0.018
Clinical Decision Support Tool	50%	0	Clinical Decision Support Tool	50%	0.009

For persistence

vdz_cd_dca_tbl_persistence <- dca(persistence ~ prob,
                                  data = vdz_cd_included,
                                  thresholds = seq(0.1, 0.5, 0.1),
                                  label = list(prob = "Clinical Decision Support Tool")) %>%
  as_tibble() %>%
  select(label, threshold, net_benefit) 

vdz_uc_dca_tbl_persistence <- dca(persistence ~ prob,
                                  data = vdz_uc_included,
                                  thresholds = seq(0.1, 0.5, 0.1),
                                  label = list(prob = "Clinical Decision Support Tool")) %>%
  as_tibble() %>%
  select(label, threshold, net_benefit)


combined_dca_tbl_persistence <- bind_cols(
  vdz_cd_dca_tbl_persistence %>%
    rename_with(~ paste0("cd_", .)),
  vdz_uc_dca_tbl_persistence %>%
    rename_with(~ paste0("uc_", .))
)

combined_dca_tbl_persistence %>%
  gt::gt() %>%
  gt::cols_label(
    cd_label = "Strategy",
    cd_threshold = "Decision Threshold",
    cd_net_benefit = "Net Benefit",
    uc_label = "Strategy",
    uc_threshold = "Decision Threshold",
    uc_net_benefit = "Net Benefit"
  ) %>%
  gt::fmt_percent(
    columns = c(cd_threshold, uc_threshold),
    decimals = 0
  ) %>%
  gt::fmt(
    columns = c(cd_net_benefit, uc_net_benefit),
    fns = function(x) ifelse(x == 0, "0", formatC(x, format = "f", digits = 3))
  ) %>%
  gt::tab_spanner(
    label = "Crohn's Disease",
    columns = c(cd_label, cd_threshold, cd_net_benefit)
  ) %>%
  gt::tab_spanner(
    label = "Ulcerative Colitis",
    columns = c(uc_label, uc_threshold, uc_net_benefit)
  ) %>%
  gt::cols_align("left", columns = everything()) %>%
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_spanners(spanners = c("Crohn's Disease", "Ulcerative Colitis"))
  ) %>%
  gt::tab_style(
    style = gt::cell_text(weight = "bold"),
    locations = gt::cells_column_labels(columns = everything())
  )

Crohn's Disease			Ulcerative Colitis
Strategy	Decision Threshold	Net Benefit	Strategy	Decision Threshold	Net Benefit
Treat All	10%	0.452	Treat All	10%	0.373
Treat All	20%	0.384	Treat All	20%	0.295
Treat All	30%	0.296	Treat All	30%	0.194
Treat All	40%	0.179	Treat All	40%	0.060
Treat All	50%	0.014	Treat All	50%	-0.128
Treat None	10%	0	Treat None	10%	0
Treat None	20%	0	Treat None	20%	0
Treat None	30%	0	Treat None	30%	0
Treat None	40%	0	Treat None	40%	0
Treat None	50%	0	Treat None	50%	0
Clinical Decision Support Tool	10%	0.454	Clinical Decision Support Tool	10%	0.374
Clinical Decision Support Tool	20%	0.374	Clinical Decision Support Tool	20%	0.303
Clinical Decision Support Tool	30%	0.144	Clinical Decision Support Tool	30%	0.194
Clinical Decision Support Tool	40%	0.031	Clinical Decision Support Tool	40%	0.057
Clinical Decision Support Tool	50%	0	Clinical Decision Support Tool	50%	0.009

14. Assess the relationship between CDST variables and outcome

Calculate probabilities

VDZ-CD

vdz_cd_modelvars <- c("no_previous_antitnf", "no_surgery_before", "no_fistulizing_before", "albumin_g_L", "crp_mg_L")

vdz_cd_coefs_df <- vdz_cd_included %>%
  select(c(all_of(vdz_cd_modelvars), "clinical_remission_cdst"))

vdz_cd_mvassociations_mdl <- glm(formula = paste("clinical_remission_cdst ~", paste(vdz_cd_modelvars, collapse = "+")), data = vdz_cd_coefs_df, family = "binomial")

vdz_cd_mvassociations_df <- vdz_cd_coefs_df %>%
  mutate(probs = predict(vdz_cd_mvassociations_mdl, vdz_cd_coefs_df, type = "response"))

vdz_cd_mvassociations_df %<>%
  mutate(logit = predict(vdz_cd_mvassociations_mdl, vdz_cd_coefs_df))

VDZ-UC

vdz_uc_modelvars <- c("no_previous_antitnf", "dxduration_above_two", "moderate_endo_activity", "albumin_g_L")

vdz_uc_coefs_df <- vdz_uc_included %>%
  select(c(all_of(vdz_uc_modelvars), "clinical_remission_cdst"))

vdz_uc_mvassociations_mdl <- glm(formula = paste("clinical_remission_cdst ~", paste(vdz_uc_modelvars, collapse = "+")), data = vdz_uc_coefs_df, family = "binomial")

vdz_uc_mvassociations_df <- vdz_uc_coefs_df %>%
  mutate(probs = predict(vdz_uc_mvassociations_mdl, vdz_uc_coefs_df, type = "response"))

vdz_uc_mvassociations_df %<>%
  mutate(logit = predict(vdz_uc_mvassociations_mdl, vdz_uc_coefs_df))

UST-CD

ust_cd_included %<>% mutate(no_previous_antitnf = if_else(previous_antitnf == 1, 0, 1), .after = previous_antitnf)

is.numeric(ust_cd_included$no_previous_antitnf)

[1] TRUE

ust_cd_included %<>% mutate(no_surgery_before = if_else(intestinal_resections_before == 1, 0, 1), .after = intestinal_resections_before)

is.numeric(ust_cd_included$no_surgery_before)

[1] TRUE

ust_cd_included %<>% mutate(no_prior_smoking = if_else(smoking %in% c(1,2), 0, 1), .after = smoking)

is.numeric(ust_cd_included$no_prior_smoking)

[1] TRUE

ust_cd_included %<>% mutate(no_fistulizing_current = if_else(fistulizing_disease_current == 1, 0, 1), .after = fistulizing_disease_current)

is.numeric(ust_cd_included$no_fistulizing_current)

[1] TRUE

is.numeric(ust_cd_included$albumin_g_L)

[1] TRUE

ust_cd_modelvars <- c("no_previous_antitnf", "no_surgery_before", "no_prior_smoking", "no_fistulizing_current", "albumin_g_L")
ust_cd_coefs_df <- ust_cd_included %>%
  select(c(all_of(ust_cd_modelvars), "clinical_remission_cdst"))

ust_cd_mvassociations_mdl <- glm(formula = paste("clinical_remission_cdst ~", paste(ust_cd_modelvars, collapse = "+")), data = ust_cd_coefs_df, family = "binomial")

ust_cd_mvassociations_df <- ust_cd_coefs_df %>%
  mutate(probs = predict(ust_cd_mvassociations_mdl, ust_cd_coefs_df, type = "response"))

Visualize associations

VDZ-CD

vdz_cd_mvassociations_df$no_previous_antitnf <- fct_recode(as.character(vdz_cd_mvassociations_df$no_previous_antitnf), "Yes" = "0", "No" = "1")
vdz_cd_mvassociations_df$no_surgery_before <- fct_recode(as.character(vdz_cd_mvassociations_df$no_surgery_before), "Yes" = "0", "No" = "1")
vdz_cd_mvassociations_df$no_fistulizing_before <- fct_recode(as.character(vdz_cd_mvassociations_df$no_fistulizing_before), "Yes" = "0", "No" = "1")

vdz_cd_mv_antitnf <- ggplot2::ggplot(data = vdz_cd_mvassociations_df, aes(x = no_previous_antitnf, y = probs)) + 
  geom_boxplot(outlier.shape = NA) +
  ylab("Probability of clinical remission") +
  xlab("Prior anti-TNF exposure") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())


vdz_cd_mv_surgery <- ggplot2::ggplot(data = vdz_cd_mvassociations_df, aes(x = no_surgery_before, y = probs)) +
  geom_boxplot(outlier.shape = NA) +
  ylab("Probability of clinical remission") +
  xlab("Prior bowel surgery") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())


vdz_cd_mv_fistulizing <- ggplot2::ggplot(data = vdz_cd_mvassociations_df, aes(x = no_fistulizing_before, y = probs)) +
  geom_boxplot(outlier.shape = NA) +
  ylab("Probability of clinical remission") +
  xlab("Prior fistulizing disease") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())


vdz_cd_mv_albumin <- ggplot2::ggplot(data = vdz_cd_mvassociations_df, aes(x = albumin_g_L, y = probs)) +
  geom_point() +
  ylab("Probability of clinical remission") +
  xlab("Baseline albumin (g/L)") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())


vdz_cd_mv_crp <- ggplot2::ggplot(data = vdz_cd_mvassociations_df, aes(x = crp_mg_L, y = probs)) +
  geom_point() +
  ylab("Probability of clinical remission") +
  xlab("Baseline C-reactive protein (mg/L)") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank()) +
  coord_cartesian(xlim = c(0, 150))

vdz_cd_mv_combined <- ggarrange(vdz_cd_mv_antitnf, vdz_cd_mv_surgery, vdz_cd_mv_fistulizing, vdz_cd_mv_albumin, vdz_cd_mv_crp)

annotate_figure(vdz_cd_mv_combined, left = text_grob("Probability of clinical remission", face = "bold", rot = 90))

VDZ-UC

vdz_uc_mvassociations_df$no_previous_antitnf <- fct_recode(as.character(vdz_uc_mvassociations_df$no_previous_antitnf), "Yes" = "0", "No" = "1")
vdz_uc_mvassociations_df$dxduration_above_two <- fct_recode(as.character(vdz_uc_mvassociations_df$dxduration_above_two), "< 2" = "0", "≥ 2" = "1")
vdz_uc_mvassociations_df$moderate_endo_activity <- fct_recode(as.character(vdz_uc_mvassociations_df$moderate_endo_activity), "3" = "0", "2" = "1")

vdz_uc_mv_antitnf <- ggplot2::ggplot(data = vdz_uc_mvassociations_df, aes(x = no_previous_antitnf, y = probs)) + 
  geom_boxplot(outlier.shape = NA) +
  ylab("Probability of clinical remission") +
  xlab("Prior anti-TNF exposure") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())

vdz_uc_mv_duration <- ggplot2::ggplot(data = vdz_uc_mvassociations_df, aes(x = dxduration_above_two, y = probs)) + 
  geom_boxplot(outlier.shape = NA) +
  ylab("Probability of clinical remission") +
  xlab("Disease duration (year)") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())

vdz_uc_mv_endo <- ggplot2::ggplot(data = vdz_uc_mvassociations_df, aes(x = moderate_endo_activity, y = probs)) + 
  geom_boxplot(outlier.shape = NA) +
  ylab("Probability of clinical remission") +
  xlab("Mayo endoscopic subscore") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())

vdz_uc_mv_albumin <- ggplot2::ggplot(data = vdz_uc_mvassociations_df, aes(x = albumin_g_L, y = probs)) +
  geom_point() +
  ylab("Probability of clinical remission") +
  xlab("Baseline albumin (g/L)") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())

vdz_uc_mv_combined <- ggarrange(vdz_uc_mv_antitnf, vdz_uc_mv_duration, vdz_uc_mv_endo, vdz_uc_mv_albumin)

annotate_figure(vdz_uc_mv_combined, left = text_grob("Probability of clinical remission", face = "bold", rot = 90))

UST-CD

ust_cd_mvassociations_df$no_previous_antitnf <- fct_recode(as.character(ust_cd_mvassociations_df$no_previous_antitnf), "Yes" = "0", "No" = "1")
ust_cd_mvassociations_df$no_surgery_before <- fct_recode(as.character(ust_cd_mvassociations_df$no_surgery_before), "Yes" = "0", "No" = "1")
ust_cd_mvassociations_df$no_prior_smoking <- fct_recode(as.character(ust_cd_mvassociations_df$no_prior_smoking), "Yes" = "0", "No" = "1")
ust_cd_mvassociations_df$no_fistulizing_current <- fct_recode(as.character(ust_cd_mvassociations_df$no_fistulizing_current), "Yes" = "0", "No" = "1")

ust_cd_mv_antitnf <- ggplot2::ggplot(data = ust_cd_mvassociations_df, aes(x = no_previous_antitnf, y = probs)) + 
  geom_boxplot(outlier.shape = NA) +
  ylab("Probability of clinical remission") +
  xlab("Prior anti-TNF exposure") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())


ust_cd_mv_surgery <- ggplot2::ggplot(data = ust_cd_mvassociations_df, aes(x = no_surgery_before, y = probs)) +
  geom_boxplot(outlier.shape = NA) +
  ylab("Probability of clinical remission") +
  xlab("Prior bowel surgery") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())


ust_cd_mv_smoking <- ggplot2::ggplot(data = ust_cd_mvassociations_df, aes(x = no_prior_smoking, y = probs)) +
  geom_boxplot(outlier.shape = NA) +
  ylab("Probability of clinical remission") +
  xlab("Prior smoking history") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())

ust_cd_mv_fistulizing <- ggplot2::ggplot(data = ust_cd_mvassociations_df, aes(x = no_fistulizing_current, y = probs)) +
  geom_boxplot(outlier.shape = NA) +
  ylab("Probability of clinical remission") +
  xlab("Baseline actively draining fistula") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())


ust_cd_mv_albumin <- ggplot2::ggplot(data = ust_cd_mvassociations_df, aes(x = albumin_g_L, y = probs)) +
  geom_point() +
  ylab("Probability of clinical remission") +
  xlab("Baseline albumin (g/L)") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), labels = scales::percent_format(accuracy = 1)) +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank())

ust_cd_mv_combined <- ggarrange(ust_cd_mv_antitnf, ust_cd_mv_surgery, ust_cd_mv_smoking, ust_cd_mv_fistulizing, ust_cd_mv_albumin)

annotate_figure(ust_cd_mv_combined, left = text_grob("Probability of clinical remission", face = "bold", rot = 90))

15. Check linearity assumption for numeric variables

linearity_albumin <- ggplot(vdz_cd_mvassociations_df, aes(logit, albumin_g_L))+
  geom_point(size = 0.5, alpha = 0.5) +
  geom_smooth(method = "loess", col = "black") + 
  ylab("Baseline albumin (g/L)") +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_text(face = "bold")) +
  coord_cartesian(expand = 0)

linearity_crp <- ggplot(vdz_cd_mvassociations_df, aes(logit, crp_mg_L))+
  geom_point(size = 0.5, alpha = 0.5) +
  geom_smooth(method = "loess", col = "black") + 
  ylab("Baseline C-reactive protein (mg/L)") +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_text(face = "bold")) +
  coord_cartesian(expand = 0)

linearity_albumin_uc <- ggplot(vdz_uc_mvassociations_df, aes(logit, albumin_g_L))+
  geom_point(size = 0.5, alpha = 0.5) +
  geom_smooth(method = "loess", col = "black") + 
  ylab("Baseline albumin (g/L)") +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.ticks = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_text(face = "bold")) +
  coord_cartesian(expand = 0)

linearity_combined <- ggarrange(linearity_albumin, linearity_crp, linearity_albumin_uc, labels = c("A", "B", "C"), nrow = 1)

annotate_figure(linearity_combined, bottom = text_grob("Log odds of clinical remission", face = "bold"))