---
title: '3-state microsimulation model'
subtitle: 'Includes sex specific probability of dying when healthy and state occupation: probability of dying when sick depends on the time of being sick'
author: "The DARTH workgroup"
output:
  html_document: default
  pdf_document: default
---

Developed by the Decision Analysis in R for Technologies in Health (DARTH) workgroup.
If you use this code as a basis for your own work, please cite our papers (see suggestions below) and in your acknowledgments for both the code and publications please add:
**The authors thank the Decision Analysis in R for Technologies in Health (DARTH) workgroup for providing the coding framework in R.**

\newpage

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, eval = TRUE)
options(scipen = 999)  # disable scientific notation
rm(list = ls())        # clear memory (removes all the variables from the workspace)
```

Change `eval` to `TRUE` if you want to knit this document.

# 01 Load packages

```{r}
if (!require('pacman')) install.packages('pacman'); library(pacman) 
# load (install if required) packages from CRAN
p_load("devtools", "dplyr", "scales", "ellipse", "ggplot2", "lazyeval", "igraph", "truncnorm", "ggraph", "reshape2", "knitr", "markdown", "stringr", "dampack", "matrixStats")
# load (install if required) packages from GitHub
#install_github("DARTH-git/darthtools", force = TRUE) # Uncomment if there is a newer version or if you use darthtools for the first time
p_load_gh("DARTH-git/darthtools")
```

# 02 Load functions

```{r}

#create microsimulation trace
plot_trace_microsim <- function (m_M, v_n_states = v_names_states) 
{
  n_states <- length(v_n_states)
  m_TR <- t(apply(m_M, 2, function(x) table(factor(x, levels = v_n_states, 
                                                   ordered = TRUE))))
  m_TR <- m_TR/nrow(m_M)
  colnames(m_TR) <- v_n_states
  rownames(m_TR) <- paste("Cycle", 0:(ncol(m_M) - 1), sep = " ")
  plot(0:(ncol(m_M) - 1), m_TR[, 1], type = "l", main = "Health state trace", 
       ylim = c(0, 1), ylab = "Proportion of cohort", xlab = "Cycle")
  for (n_states in 2:length(v_n_states)) {
    lines(0:(ncol(m_M) - 1), m_TR[, n_states], col = n_states)
  }
  legend("topright", v_n_states, col = 1:length(v_n_states), 
         lty = rep(1, length(v_n_states)), bty = "n", cex = 0.65)
}
```

# 03 Model input
## 03.1 Define model input parameters

```{r}
## General setup
set.seed(1)                                      # set the seed 
cycle_length    <- 1/12                          # cycle length equal to one year (use 1/12 for monthly)
n_cycles        <- 60 /cycle_length              # number of cycles
n_i             <- 10000                         # number of individuals

# the 3 health states of the model:
v_names_states  <- c("H",  # Healthy (H)
                     "S",  # Sick (S1)
                     "D")  # Dead (D)    

v_names_cycles  <- paste("cycle", 0:n_cycles)    # cycle names
n_states        <- length(v_names_states)        # number of health states 

### Discounting factors 
d_c <- 0.03 # annual discount rate for costs 
d_e <- 0.015 # annual discount rate for QALYs

### Strategies 
v_names_str     <- c("Standard of care",         # store the strategy names
                     "Treatment A", 
                     "Treatment B")  
n_str           <- length(v_names_str)           # number of strategies


### Transition intensity rates
r_HS_SoC  <- 0.05  # rate of becoming sick when healthy, under standard of care
r_HS_trtA <- 0.04  # rate of becoming sick when healthy, under treatment A
r_HS_trtB <- 0.02  # rate of becoming sick when healthy, under treatment B

### Converting rates to probabilities
# p = 1 - exp( -r * cycle_length)
p_HS_SoC  <- rate_to_prob(r = r_HS_SoC,   time = cycle_length)  # probability of becoming sick when healthy, under SoC
p_HS_trtA <- rate_to_prob(r = r_HS_trtA, time = cycle_length) # probability of becoming sick when healthy, under treatment A
p_HS_trtB <- rate_to_prob(r = r_HS_trtB, time = cycle_length) # probability of becoming sick when healthy, under treatment B


# Transition probabilities 
r_HD_female <- 0.0382  # annual rate of healthy -> dead when female
r_HD_male   <- 0.0463  # annual rate of healthy -> dead when male

p_HD_female <- rate_to_prob(r = r_HD_female, time = cycle_length)
p_HD_male   <- rate_to_prob(r = r_HD_male,   time = cycle_length)
df_p_HD     <- data.frame(Sex = c("Female", "Male"), p_HD = c(p_HD_female, p_HD_male)) # structure the sex specific mortality in a data frame - used later to match each individual with the corresponding sex specific mortality 
v_r_SD      <- c(0.1, 0.2, 0.3, 0.4, 0.5, rep(0.7, n_cycles*cycle_length - 5)) # annual rate of dying in sick state by cycle

v_p_SD      <- rate_to_prob(r = v_r_SD, time = cycle_length) 
v_p_SD      <- rep(x = v_p_SD, each = 1/cycle_length)
### State rewards
#### Costs 
c_H        <- 400   * cycle_length   # cost of one cycle in healthy state
c_S        <- 1000  * cycle_length   # cost of one cycle in sick state
c_D        <- 0     # cost of one cycle in dead state

c_trtA     <- 800   * cycle_length  # cost of treatment A (per cycle) in healthy state
c_trtB     <- 1500  * cycle_length  # cost of treatment B (per cycle) in healthy state

#### Utilities
u_H        <- 1     # utility when healthy 
u_S        <- 0.5   # utility when sick
u_D        <- 0     # utility when dead

```

## 03.2 Calculate internal model parameters

```{r}
### Cycle-specific discount weight for costs and effects 
v_dwc     <- 1 / ((1 + (d_c * cycle_length)) ^ (0:n_cycles))
v_dwe     <- 1 / ((1 + (d_e * cycle_length)) ^ (0:n_cycles))

# Within-cycle correction (WCC) - method  options Simpson's 1/3 rule, "half-cycle" or "none" 
v_wcc    <- darthtools::gen_wcc(n_cycles = n_cycles, 
                                method = "Simpson1/3") # vector of wcc

```

# 04 Sample individual level characteristics

## 04.1 Static characteristics

```{r}
# sample the sex of an individual based on a equal probability (50% female)
v_sex <- sample(x = c("Female", "Male"), prob = c(0.5, 0.5), size = n_i, replace = TRUE) 
```

## 04.2 Dynamic characteristics 

```{r}
# Specify the initial health state of the individuals 
# everyone begins in the healthy state (in this example)
v_M_init  <- rep("H", times = n_i)   
v_n_cycles_s_init <- rep(0, n_i)  # a vector with the time of being sick at the start of the model 
```

## 04.3 Create a dataframe with the individual characteristics 

```{r}
# create a data frame with each individual's 
# ID number, sex, initial time in sick state and initial state at time 0
df_X  <- data.frame(ID = 1:n_i, Sex = v_sex, n_cycles_s = v_n_cycles_s_init, M_init = v_M_init)
# NOTE: we use n_cycles_s for the number of times being sick, we start the data frame with the initial "history" of time being sick by saving v_Ts_init. However, during the simulation this value is updated and therefore called numer of times being sick.
head(df_X) # print the first rows of the data `frame
```

# 05 Define Simulation Functions

## 05.1 Probability function

The `Probs` function updates the transition probabilities of every cycle is shown below.

```{r}
Probs <- function(M_t, df_X, Trt = "SoC") { 
  # Arguments:
    # M_t:  health state occupied at cycle t (character variable)
    # df_X: data frame with individual characteristics data 
    # Trt:  treatment
  # Returns: 
    # transition probabilities for that cycle
  
  # Treatment specific transition probabilities
  if (Trt == "SoC") {
    p_HS <- p_HS_SoC
  } else if (Trt == "A") {
    p_HS <- p_HS_trtA 
  } else if (Trt == "B") {
    p_HS <- p_HS_trtB
  }
  
  # create matrix of state transition probabilities
  m_p_t           <- matrix(data = 0, nrow = n_i, ncol = n_states)  
  # give the state names to the rows
  colnames(m_p_t) <-  v_names_states                               
  
  # lookup baseline probability and rate of dying based on individual characteristics
  p_HD_all <- inner_join(x = df_X, y = df_p_HD, by = c("Sex"))
  p_HD     <- p_HD_all[M_t == "H", "p_HD"]
  
  # update m_p_t with the appropriate probabilities 
  # transition probabilities when Healthy 
  m_p_t[M_t == "H", "H"] <-  1 - p_HS - p_HD
  m_p_t[M_t == "H", "S"] <-      p_HS 
  m_p_t[M_t == "H", "D"] <-             p_HD              
  
  # transition probabilities when Sick 
  m_p_t[M_t == "S", "H"] <-  0
  m_p_t[M_t == "S", "S"] <-  1 - v_p_SD[df_X$n_cycles_s]
  m_p_t[M_t == "S", "D"] <-      v_p_SD[df_X$n_cycles_s]     
 
  # transition probabilities when Dead
  m_p_t[M_t == "D", "H"] <- 0
  m_p_t[M_t == "D", "S"] <- 0
  m_p_t[M_t == "D", "D"] <- 1  
  
  return(m_p_t)
}       
```

## 05.2 Cost function

The `Costs` function estimates the costs at every cycle.

```{r}
Costs <- function (M_t, Trt = "SoC") {
  # Arguments:
    # M_t: health state occupied at cycle t (character variable)
    # Trt:  treatment
  # Returns: 
    # costs accrued in this cycle

  
  # Treatment specific costs
  if (Trt == "SoC") {
    c_trt <- 0
  } else if (Trt == "A") {
    c_trt <- c_trtA
  } else if (Trt == "B") {
    c_trt <- c_trtB
  }
  
  c_t <- c()
  c_t[M_t == "H"] <- c_H + c_trt  # costs accrued by being healthy this cycle
  c_t[M_t == "S"] <- c_S          # costs accrued by being sick this cycle
  c_t[M_t == "D"] <- c_D          # costs at dead state
  
  return(c_t)  # return costs accrued this cycle
}
```

## 05.3 Health outcome function

The `Effs` function to update the utilities at every cycle.

```{r}
Effs <- function (M_t, cycle_length = 1) {
  # Arguments:
    # M_t: health state occupied at cycle t (character variable)
    # cl:  cycle length (default is 1)
  # Returns: 
    # QALYs accrued this cycle 
  
  u_t <- c() 
  u_t[M_t == "H"] <- u_H  # utility for being healthy this cycle
  u_t[M_t == "S"] <- u_S  # utility for being sick this cycle
  u_t[M_t == "D"] <- u_D  # utility for dead state
  
  QALYs <- u_t * cycle_length  # calculate the QALYs during cycle t
  return(QALYs)      # return the QALYs accrued this cycle
}
```

## 05.4 Microsimulation function

Below we develop the microsimulation function that allows the model to be run.

```{r}
MicroSim <- function(n_i, df_X, Trt = "SoC", seed = 1, cycle_length = 1, verbose ) {
  # Arguments:  
    # n_i: number of individuals
    # df_X: data frame with individual data 
    # Trt: treatment
    # seed: seed for the random number generator, default is 1
    # cycle_length: cycle lenght 
  # Returns:
    # results: data frame with total cost and QALYs
  
  set.seed(seed) # set a seed to be able to reproduce the same results
  
  # create three matrices called m_M, m_C and m_E
  # number of rows is equal to the n_i, the number of columns is equal to n_cycles 
  # (the initial state and all the n_cycles cycles)
  # m_M is used to store the health state information over time for every individual
  # m_C is used to store the costs information over time for every individual
  # m_E is used to store the effects information over time for every individual
  
  m_M <- m_C <- m_E <-  matrix(nrow = n_i, ncol = n_cycles + 1, 
                               dimnames = list(paste("ind"  , 1:n_i, sep = " "), 
                                               paste("cycle", 0:n_cycles, sep = " ")))  
 
  m_M[, 1] <- as.character(df_X$M_init) # initial health state at cycle 0 for individual i
  m_C[, 1] <- Costs(m_M[, 1])           # costs per individual during cycle 0
  m_E[, 1] <- Effs( m_M[, 1], cycle_length = cycle_length)   # QALYs per individual during cycle 0
  
  # open a loop for time running cycles 1 to n_cycles 
  for (t in 1:n_cycles) {
    print(t)
    
    if(t==34){browser()}
    
    # calculate the transition probabilities for the cycle based on health state t
    
    
    m_P <- Probs(m_M[, t], df_X, Trt = Trt)
    # check if transition probabilities are between 0 and 1
     check_transition_probability(m_P, verbose = verbose)
    # check if each of the rows of the transition probabilities matrix sum to one
     check_sum_of_transition_array(m_P, n_rows = n_i, n_cycles = n_cycles, verbose = verbose)
    
    # sample the next health state and store that state in matrix m_M
    m_M[, t + 1]  <- samplev(m_P, 1)    
    # calculate costs per individual during cycle t + 1
    m_C[, t + 1]  <- Costs(m_M[, t + 1], Trt = Trt)  
    # calculate QALYs per individual during cycle t + 1
    m_E[, t + 1]  <- Effs (m_M[, t + 1], cycle_length = cycle_length)  
    
    # update time since illness onset for t + 1 
    # NOTE: this code has a "reset of history" for time being sick
    # once someone is not "Sick" anymore, we reset n_cycles_s (set back to zero)
    # when you don't want a "reset" replace the last zero with teh current value
    df_X$n_cycles_s <- if_else(m_M[, t + 1] == "S", df_X$n_cycles_s + 1, 0) 
    
    # Display simulation progress
    if(t/(n_cycles/10) == round(t/(n_cycles/10), 0)) { # display progress every 10%
      cat('\r', paste(t/n_cycles * 100, "% done", sep = " "))
    }
    
  } # close the loop for the time points 
  
  
  # Discounted total expected QALYs and Costs per strategy and apply cycle correction ####
  tc      <- m_C %*% (v_dwc * v_wcc)  # total (discounted and cycle corrected) cost per individual
  te      <- m_E %*% (v_dwe * v_wcc)  # total (discounted and cycle corrected) QALYs per individual 
  
  tc_hat  <- mean(tc)       # average (discounted and cycle corrected) cost 
  te_hat  <- mean(te)       # average (discounted and cycle corrected) QALY  
  # store the results from the simulation in a list
  results <- list(m_M = m_M, m_C = m_C, m_E = m_E, tc = tc , te = te, 
                  tc_hat = tc_hat, te_hat = te_hat)   
  
  return(results)  # return the results

} # end of the `MicroSim` function  
```

## 06 Run Microsimulation

```{r, eval = TRUE, echo = TRUE, warning = FALSE, message = FALSE, results = FALSE }
# 06 Run Microsimulation
# By specifying all the arguments in the `MicroSim()` the simulation can be started
# Run the simulation model

t2 <- Sys.time()
outcomes_SoC  <- MicroSim(n_i = n_i, df_X = df_X, seed = 1, cycle_length = cycle_length, Trt = "SoC",verbose = F)
Sys.time() - t2
outcomes_trtA <- MicroSim(n_i = n_i, df_X = df_X, seed = 1, cycle_length = cycle_length, Trt = "A", verbose = F)
outcomes_trtB <- MicroSim(n_i = n_i, df_X = df_X, seed = 1, cycle_length = cycle_length, Trt = "B", verbose = F)
```

# 07 Visualize results

```{r}
# Standard of Care
plot(density(outcomes_SoC$tc), main = paste("Total cost per person"),  xlab = "Cost ($)")
plot(density(outcomes_SoC$te), main = paste("Total QALYs per person"), xlab = "QALYs")
plot_trace_microsim(m_M = outcomes_SoC$m_M)      # health state trace
```

```{r}
# Treatment A
plot(density(outcomes_trtA$tc), main = paste("Total cost per person"),  xlab = "Cost ($)")
plot(density(outcomes_trtA$te), main = paste("Total QALYs per person"), xlab = "QALYs")
plot_trace_microsim(outcomes_trtA$m_M)     # health state trace
```

```{r}
# Treatment B
plot(density(outcomes_trtB$tc), main = paste("Total cost per person"),  xlab = "Cost ($)")
plot(density(outcomes_trtB$te), main = paste("Total QALYs per person"), xlab = "QALYs")
plot_trace_microsim(outcomes_trtB$m_M)     # health state trace
```

# 08 Cost-effectiveness analysis (CEA) 

```{r}
# store the mean costs of each strategy in a new variable C (vector of costs)
v_C <- c(outcomes_SoC$tc_hat, outcomes_trtA$tc_hat, outcomes_trtB$tc_hat)
# store the mean QALYs of each strategy in a new variable E (vector of effects)
v_E <- c(outcomes_SoC$te_hat, outcomes_trtA$te_hat, outcomes_trtB$te_hat)

# use dampack to calculate the ICER
df_cea <- calculate_icers(cost       = v_C,
                          effect     = v_E,
                          strategies = v_names_str)
df_cea
```

```{R}
## CEA table in proper format 
table_cea <- format_table_cea(df_cea) 
table_cea
```

```{r}
## CEA frontier 
plot(df_cea, label = "all", txtsize = 14) +
  expand_limits(x = max(table_cea$QALYs) + 0.1) +
  theme(legend.position = c(0.8, 0.3))
```


We kindly request you to add the following Acknowledgement paragraph to your further work where DARTH code formed the basis. We also like to remind you that you can add other sources of reference to this paragraph to acknowledge code you got from others. 

# Acknowlegdement
For this work we made use of the template developed by the Decision Analysis in R for Technologies in Health (DARTH) workgroup: <http://darthworkgroup.com>.

The notation of our code is based on the following provided framework and coding convention: Alarid-Escudero, F., Krijkamp, E., Pechlivanoglou, P. et al. A Need for Change! A Coding Framework for Improving Transparency in Decision Modeling. PharmacoEconomics 37, 1329–1339 (2019). <https://doi.org/10.1007/s40273-019-00837-x>.


Other work from DARTH can be found on the website: <http://darthworkgroup.com/publications/>

# Copyright for assignment work
Copyright 2017, THE HOSPITAL FOR SICK CHILDREN AND THE COLLABORATING INSTITUTIONS.All rights reserved in Canada, the United States and worldwide. Copyright, trademarks, trade names and any and all associated intellectual property are exclusively owned by THE HOSPITAL FOR Sick CHILDREN and the collaborating  institutions. These materials may be used, reproduced, modified, distributed and adapted with proper attribution.