forked from mrosenblum/AdaptiveDesignStreamlinedOptimizer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ComputePerformanceMetrics.R
331 lines (292 loc) · 15.9 KB
/
ComputePerformanceMetrics.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
#######
# This code is added by Tianchen Qian, in order to calculate
# bias, variance, MSE, CI coverage, CI inflation factor of a given design,
# using the simulated test statistics.
#######
#######
# Major update log:
# 2017.07.18: added function prototype, implemented compute.performance().
# 2017.07.19: implemented compute.performance.under.single.scenario()
# to compute bias, variance, mse, ci.coverage.
# 2017.08.01: added ci.inflation.factor output in compute.performance.under.single.scenario().
# 2017.08.01: added ci.level argument in compute.performance() to allow user-specified confidence level.
# 2017.08.01: added n.treatment.excluding.control and n.population arguments in
# compute.performance() to allow for different number of treatments/populations.
# 2017.08.11: add argument restrict.enrollment in compute.performance() to allow
# only enrolling subpopulation 1 in the first stage.
# ASSUMING ONLY TWO STAGES (HARD CODED FOR NOW)
# 2017.08.17: add argument convert.to.hazard.ratio.scale and ni.margin in compute.performance()
# to convert the estimator of log hazard ratio (minus log non-inferiority-margin)
# to hazard ratio scale, when dealing with survival outcome. In this case,
# bias, variance, and mse will be computed on hazard ratio scale.
# (ci.coverage and ci.inflation.factor are still on log hazard ratio scale.)
# 2017.09.11: now also handles one stage trials
#######
#######
# Note:
# 1. I use the word "scenario" to denote a arm*population combination.
# So we are considering L * J scenarios: L arms * J populations.
# 2. The ordering in the test statistics is: "arm1.pop1", ..., "arm1.popJ", ..., "armL.pop1", ..., "armL.popJ".
# This is the same ordering as in 2populations2arms.R.
# 3. When estimating survival outcome, hazard ratio is understood as (hazard.rate.treatment/hazard.rate.control).
#######
# This function computes performance measures of simulated trials under a single scenario
# (for a single treatment arm and a single population).
# Input:
# test.statistics: a matrix of test statistics with dimension: n.sim * n.stages.
# cov.var.matrix: the (true) covariance matrix of the test statistics.
# stage.decision: a vector of length n.sim, indicating at which stage the trial stops for
# this single scenario.
# information.level: a vector of information levels at all K stages. That is, the inverse
# of variance of the estimators at all K stages.
# true.estimand.value: the true estimand value (the basis of bias calculation)
# ci.level: the confidence level of confidence interval (used in computing ci.coverage and ci.inflation.factor).
# Default ci.level = 0.95.
# restrict.enrollment: if TRUE, only enroll subpopulation 1 in the first stage. In this case
# subpopulation 2 may not be enrolled even at the second stage (in which case stage.decision = 0).
# convert.to.hazard.ratio.scale: if TRUE (only in the case of survival outcomes), convert estimators and
# estimand from log hazard ratio (minus log ni.margin) to hazard ratio scale.
# Bias, variance, and mse will be computed on hazard ratio scale.
# (ci.coverage and ci.inflation.factor are still on log hazard ratio scale.)
# ni.margin: the non-inferiority margin. Used to convert to hazard ratio scale for survival outcome with
# non-inferiority trials.
# Output: A list consisting of four elements:
# bias: a number.
# variance: a number.
# mse: a number.
# ci.coverage: a number.
# ci.inflation.factor: a number, the inflation factor for ci to have correct coverage.
compute.performance.under.single.scenario <- function(
test.statistics,
cov.var.matrix,
stage.decision,
information.level,
true.estimand.value,
ci.level = 0.95,
restrict.enrollment,
convert.to.hazard.ratio.scale = FALSE,
ni.margin = NULL
) {
# for single stage trials, convert test.statistics from vector to matrix
single.stage.trial <- is.vector(test.statistics)
if (single.stage.trial) {
test.statistics <- matrix(test.statistics, ncol = 1)
cov.var.matrix <- matrix(cov.var.matrix, ncol = 1)
}
# checks on dimensions
stopifnot(nrow(test.statistics) == length(stage.decision))
stopifnot(ncol(test.statistics) == nrow(cov.var.matrix))
stopifnot(ncol(test.statistics) == length(information.level))
stopifnot(length(true.estimand.value) == 1)
# check on valid argument values
stopifnot((ci.level > 0 & ci.level <= 1))
# check that stage.decision can be 0 only if restrict.enrollment = TRUE
stopifnot((!is.null(restrict.enrollment) && restrict.enrollment) | all(stage.decision > 0))
enrolled.sim.index <- which(stage.decision > 0)
test.statistics <- test.statistics[enrolled.sim.index, ]
stage.decision <- stage.decision[enrolled.sim.index]
# number of MC evaluations (excluding simulations where the corresponding subpopulation is never enrolled)
if (single.stage.trial) {
n.sim <- length(test.statistics)
} else {
n.sim <- nrow(test.statistics)
}
# diagonal matrix of estimator variances
if (single.stage.trial) {
D <- matrix(information.level^(-1), ncol = 1, nrow = 1)
} else {
D <- diag(information.level^(-1))
}
# obtain estimator at the end of each simulated trial
if (single.stage.trial) {
estimators <- matrix(test.statistics, ncol = 1)
} else {
estimators <- test.statistics
}
for (istage in 1:nrow(D)) {
# using for loop to avoid the impact of NA in stage 1 on stage 2, when restrict.enrollment = TRUE
estimators[, istage] <- estimators[, istage] * D[istage, istage]^(1/2)
}
estimators.at.end.of.trial <- rep(NA, n.sim)
for (i.sim in 1:n.sim) {
estimators.at.end.of.trial[i.sim] <- estimators[i.sim, stage.decision[i.sim]]
}
# extract known variance of the estimator at the end of each trial
variance.at.end.of.trial <- information.level[stage.decision]^(-1)
# compute ci.coverage
normal.quantile <- qnorm(1 - (1 - ci.level) / 2)
ci.left <- estimators.at.end.of.trial - normal.quantile * sqrt(variance.at.end.of.trial)
ci.right <- estimators.at.end.of.trial + normal.quantile * sqrt(variance.at.end.of.trial)
ci.coverage <- mean((true.estimand.value >= ci.left) & (true.estimand.value <= ci.right))
# compute ci.inflation.factor
ci.inflation.factor <- tryCatch(
{
binary.search(function(inflation.factor) {
ci.left.inflated <- estimators.at.end.of.trial - inflation.factor * normal.quantile * sqrt(variance.at.end.of.trial)
ci.right.inflated <- estimators.at.end.of.trial + inflation.factor * normal.quantile * sqrt(variance.at.end.of.trial)
ci.coverage.inflated <- mean((true.estimand.value >= ci.left.inflated) & (true.estimand.value <= ci.right.inflated))
return(ci.coverage.inflated - ci.level)
}, interval = c(0, 20))
},
error = function(cond) {
message("\nCatched error in binary.search() when computing ci.inflation.factor:")
message(cond)
message("\n")
return("Cannot find ci.inflation.factor using binary.search()")
})
# compute bias, variance, mse
if (convert.to.hazard.ratio.scale) {
if (is.null(ni.margin)) {
ni.margin <- 1
}
# convert estimator to hazard ratio scale (hazard.rate.treatment/hazard.rate.control)
estimators.at.end.of.trial <- exp(log(ni.margin) - estimators.at.end.of.trial)
true.estimand.value <- exp(log(ni.margin) - true.estimand.value)
}
bias <- mean(estimators.at.end.of.trial - true.estimand.value)
variance <- var(estimators.at.end.of.trial)
mse <- mean((estimators.at.end.of.trial - true.estimand.value)^2)
return(list(bias = bias,
variance = variance,
mse = mse,
ci.coverage = ci.coverage,
ci.inflation.factor = ci.inflation.factor))
}
# An example of simulating 10 trials, each with 2 stages.
# This example is to showcase that the function runs.
# Not a reasonable example in a real trial setting.
if (0) {
n.sim <- 100
test.statistics <- cbind(rnorm(n.sim), rnorm(n.sim))
cov.var.matrix <- diag(c(1, 1))
stage.decision <- sample(c(1,2), size = n.sim, replace = T)
information.level <- c(10, 20)
true.estimand.value <- 0
compute.performance.under.single.scenario(
test.statistics, cov.var.matrix, stage.decision, information.level, true.estimand.value)
compute.performance.under.single.scenario(
test.statistics, cov.var.matrix, stage.decision, information.level, true.estimand.value, ci.level = 0.80)
compute.performance.under.single.scenario(
test.statistics, cov.var.matrix, stage.decision, information.level, true.estimand.value, ci.level = -1)
}
# This function computes performance measures of simulated trials under all scenarios
# (for all treatment comparisons and all populations).
# Input:
# test.statistics: a matrix of test statistics with dimension: n.sim * (n.stages * n.arms * n.pops).
# Order of the test statistics within a stage is given by
# ("arm1.pop1", ..., "arm1.popJ", ..., "armL.pop1", ..., "armL.popJ").
# cov.var.matrix: the (true) covariance matrix of the test statistics.
# stage.decision: a matrix with dimension n.sim * (n.arms * n.pops), indicating at which stage the decision
# is made, in the order of ("arm1.pop1", ..., "arm1.popJ", ..., "armL.pop1", ..., "armL.popJ").
# information.level: a matrix with dimension: n.stages * (n.arms * n.pops). The k-th row is the
# information levels (inverse of estimator's variance) for stage k, with the order
# ("arm1.pop1", ..., "arm1.popJ", ..., "armL.pop1", ..., "armL.popJ").
# This is the output "information.vector" from construct.test.statistics.joint.distribution()
# in the file "Backend2Populations2Arms.R" on master branch.
# true.estimand.value: a vector of the true estimand value (the basis of bias calculation),
# in the order of ("arm1.pop1", ..., "arm1.popJ", ..., "armL.pop1", ..., "armL.popJ").
# ci.level: the confidence level of confidence interval (used in computing ci.coverage and ci.inflation.factor).
# Default = 0.95.
# n.treatment.excluding.control: number of comparator treatments (excluding control). Default = 2.
# n.population: number of subpopulations. Default = 2.
# restrict.enrollment: if TRUE, only enroll subpopulation 1 in the first stage. In this case
# subpopulation 2 may not be enrolled even at the second stage (in which case stage.decision = 0).
# convert.to.hazard.ratio.scale: if TRUE (only in the case of survival outcomes), convert estimators and
# estimand from log hazard ratio (minus log ni.margin) to hazard ratio scale.
# Bias, variance, and mse will be computed on hazard ratio scale.
# (ci.coverage and ci.inflation.factor are still on log hazard ratio scale.)
# ni.margin: the non-inferiority margin. Used to convert to hazard ratio scale for survival outcome with
# non-inferiority trials.
# Output: A list of lists (one list for each scenario), each list consisting of five elements:
# bias, variance, mse, ci.coverage, ci.inflation.factor.
compute.performance <- function(
test.statistics,
cov.var.matrix,
stage.decision,
information.level,
true.estimand.value,
ci.level = 0.95,
n.treatment.excluding.control = 2,
n.population = 2,
restrict.enrollment = FALSE,
convert.to.hazard.ratio.scale = FALSE,
ni.margin = NULL
) {
# Hard coded modification when restrict.enrollment = TRUE
# browser()
if (!is.null(restrict.enrollment) && restrict.enrollment) {
if (n.population != 2 | ncol(test.statistics) != 3) {
stop("restrict.enrollment = TRUE only allows for 2 subpopulation 2 stage trials.")
}
test.statistics <- cbind(test.statistics[, 1], rep(NA, nrow(test.statistics)), test.statistics[, 2:3])
cov.var.matrix <- cbind(cov.var.matrix[, 1], rep(NA, nrow(cov.var.matrix)), cov.var.matrix[, 2:3])
cov.var.matrix <- rbind(cov.var.matrix[1, ], rep(NA, ncol(cov.var.matrix)), cov.var.matrix[2:3, ])
}
# messages dealing with survival outcome
#if (convert.to.hazard.ratio.scale) {
# if (is.null(ni.margin)) {
# message("Superiority trial for survival outcome.\n",
# " Performance metrics reported are on hazard ratio scale.\n",
# " Estimators are internally converted from log(hazard ratio) to hazard ratio.\n",
# " ci.coverage and ci.inflation.factor are still computed on log(hazard ratio) scale.")
# } else {
# message("Non-inferiority trial for survival outcome.\n",
# " Performance metrics reported are on hazard ratio scale.\n",
# " Estimators are internally converted from log(hazard ratio) - log(ni.margin) to hazard ratio.\n",
# " ci.coverage and ci.inflation.factor are still computed on log(hazard ratio) scale.")
# }
#}
# number of scenarios (arm*population combinations)
n.arm.pop <- n.treatment.excluding.control * n.population
# number of stages
n.stages <- ncol(test.statistics) / n.arm.pop
# scenarios <- c("arm1.pop1", "arm1.pop2", "arm2.pop1", "arm2.pop2")
scenarios <- as.vector(matrix(outer(
paste0("arm", 1:n.treatment.excluding.control), paste0("pop", 1:n.population), paste, sep = "."
), nrow = 1, byrow = TRUE))
# a list to store the results under all scenarios
perf.metric.list <- list()
for (i.scn in 1:length(scenarios)) {
# an index to extract test statistics under a single scenario
index <- seq(from = i.scn, by = n.arm.pop, length = n.stages)
perf.metric <- compute.performance.under.single.scenario(
test.statistics[, index],
cov.var.matrix[index, index],
stage.decision[, i.scn],
information.level[, i.scn],
true.estimand.value[i.scn],
ci.level,
restrict.enrollment = restrict.enrollment,
convert.to.hazard.ratio.scale = convert.to.hazard.ratio.scale,
ni.margin = ni.margin
)
# concatenate the result under single scenario to the overall list
perf.metric.list <- c(perf.metric.list, list(perf.metric))
}
names(perf.metric.list) <- scenarios
return(perf.metric.list)
}
# An example of simulating 10 trials, each with 2 stages, 2 comparator amrs, 2 populations
# This example is to showcase that the function runs.
# Not a reasonable example in a real trial setting.
if (0) {
n.sim <- 100
test.statistics <- cbind(rnorm(n.sim), rnorm(n.sim),
rnorm(n.sim), rnorm(n.sim),
rnorm(n.sim), rnorm(n.sim),
rnorm(n.sim), rnorm(n.sim))
cov.var.matrix <- diag(rep(1, 8))
stage.decision <- cbind(sample(c(1,2), size = n.sim, replace = T),
sample(c(1,2), size = n.sim, replace = T),
sample(c(1,2), size = n.sim, replace = T),
sample(c(1,2), size = n.sim, replace = T))
information.level <- cbind(c(10, 20),
c(10, 20),
c(10, 20),
c(10, 20))
true.estimand.value <- c(0, 1, 0, 1)
compute.performance(
test.statistics, cov.var.matrix, stage.decision, information.level, true.estimand.value)
compute.performance(
test.statistics, cov.var.matrix, stage.decision, information.level, true.estimand.value, ci.level = 0.80)
}