Skip to content

Commit

Permalink
Merge pull request #141 from nt-williams/lmtp_survival
Browse files Browse the repository at this point in the history
Wrapper function for estimating the entire survival curve
  • Loading branch information
nt-williams authored Jun 29, 2024
2 parents 323d366 + 0c31ee3 commit c539f2f
Show file tree
Hide file tree
Showing 10 changed files with 338 additions and 2 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: lmtp
Title: Non-Parametric Causal Effects of Feasible Interventions Based on Modified Treatment Policies
Version: 1.4.0
Version: 1.4.1
Authors@R:
c(person(given = "Nicholas",
family = "Williams",
Expand Down Expand Up @@ -37,7 +37,8 @@ Imports:
data.table (>= 1.13.0),
checkmate (>= 2.1.0),
SuperLearner,
schoolmath
schoolmath,
isotone
URL: https://beyondtheate.com/, https://github.com/nt-williams/lmtp
BugReports: https://github.com/nt-williams/lmtp/issues
Suggests:
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

S3method(print,lmtp)
S3method(print,lmtp_contrast)
S3method(print,lmtp_survival)
S3method(tidy,lmtp)
S3method(tidy,lmtp_survival)
export(create_node_list)
export(event_locf)
export(ipsi)
Expand All @@ -11,6 +13,7 @@ export(lmtp_control)
export(lmtp_ipw)
export(lmtp_sdr)
export(lmtp_sub)
export(lmtp_survival)
export(lmtp_tmle)
export(static_binary_off)
export(static_binary_on)
Expand Down
10 changes: 10 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
# lmtp 1.4.1

### New Features

- Added `lmtp_survival()` function for estimating the entire survival curve. Enforces monotonicity using isotonic regression (see issue \#140.

### Bug Fixes

### General

# lmtp 1.4.0

### New Features
Expand Down
129 changes: 129 additions & 0 deletions R/lmtp_survival.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#' LMTP Survival Curve Estimator
#'
#' Wrapper around \code{lmtp_tmle} and \code{lmtp_sdr} for survival outcomes to estimate the entire survival curve.
#' Estimates are reconstructed using isotonic regression to enforce monotonicity of the survival curve.
#' \bold{Confidence intervals correspond to marginal confidence intervals for the survival curve, not simultaneous intervals.}
#'
#' @param data \[\code{data.frame}\]\cr
#' A \code{data.frame} in wide format containing all necessary variables
#' for the estimation problem. Must not be a \code{data.table}.
#' @param trt \[\code{character}\] or \[\code{list}\]\cr
#' A vector containing the column names of treatment variables ordered by time.
#' Or, a list of vectors, the same length as the number of time points of observation.
#' Vectors should contain column names for the treatment variables at each time point. The list
#' should be ordered following the time ordering of the model.
#' @param outcomes \[\code{character}\]\cr
#' A vector containing the columns names of intermediate outcome variables and the final
#' outcome variable ordered by time. Only numeric values are allowed. Variables should be coded as 0 and 1.
#' @param baseline \[\code{character}\]\cr
#' An optional vector containing the column names of baseline covariates to be
#' included for adjustment at every time point.
#' @param time_vary \[\code{list}\]\cr
#' A list the same length as the number of time points of observation with
#' the column names for new time-varying covariates introduced at each time point. The list
#' should be ordered following the time ordering of the model.
#' @param cens \[\code{character}\]\cr
#' An optional vector of column names of censoring indicators the same
#' length as the number of time points of observation. If missingness in the outcome is
#' present or if time-to-event outcome, must be provided.
#' @param shift \[\code{closure}\]\cr
#' A two argument function that specifies how treatment variables should be shifted.
#' See examples for how to specify shift functions for continuous, binary, and categorical exposures.
#' @param shifted \[\code{data.frame}\]\cr
#' An optional data frame, the same as in \code{data}, but modified according
#' to the treatment policy of interest. If specified, \code{shift} is ignored.
#' @param estimator \[\code{character(1)}\]\cr
#' The estimator to use. Either \code{"lmtp_tmle"} or \code{"lmtp_sdr"}.
#' @param k \[\code{integer(1)}\]\cr
#' An integer specifying how previous time points should be
#' used for estimation at the given time point. Default is \code{Inf},
#' all time points.
#' @param mtp \[\code{logical(1)}\]\cr
#' Is the intervention of interest a modified treatment policy?
#' Default is \code{FALSE}. If treatment variables are continuous this should be \code{TRUE}.
#' @param id \[\code{character(1)}\]\cr
#' An optional column name containing cluster level identifiers.
#' @param learners_outcome \[\code{character}\]\cr A vector of \code{SuperLearner} algorithms for estimation
#' of the outcome regression. Default is \code{"SL.glm"}, a main effects GLM.
#' @param learners_trt \[\code{character}\]\cr A vector of \code{SuperLearner} algorithms for estimation
#' of the exposure mechanism. Default is \code{"SL.glm"}, a main effects GLM.
#' \bold{Only include candidate learners capable of binary classification}.
#' @param folds \[\code{integer(1)}\]\cr
#' The number of folds to be used for cross-fitting.
#' @param weights \[\code{numeric(nrow(data))}\]\cr
#' An optional vector containing sampling weights.
#' @param control \[\code{list()}\]\cr
#' Output of \code{lmtp_control()}.
#'
#' @return A list of class \code{lmtp_survival} containing \code{lmtp} objects for each time point.
#'
#' @example inst/examples/lmtp_survival-ex.R
#' @export
lmtp_survival <- function(data, trt, outcomes, baseline = NULL, time_vary = NULL,
cens = NULL, shift = NULL, shifted = NULL,
estimator = c("lmtp_tmle", "lmtp_sdr"),
k = Inf,
mtp = FALSE,
id = NULL,
learners_outcome = "SL.glm",
learners_trt = "SL.glm",
folds = 10, weights = NULL,
control = lmtp_control()) {

checkmate::assertCharacter(outcomes, min.len = 2, null.ok = FALSE, unique = TRUE, any.missing = FALSE)

estimator <- match.arg(estimator)
tau <- length(outcomes)
estimates <- vector("list", tau)

t <- 1
cli::cli_progress_step("Working on time {t}/{tau}...")
for (t in 1:tau) {
args <- list(
data = data,
trt = trt,
outcome = outcomes[1:t],
baseline = baseline,
time_vary = time_vary,
cens = cens[1:t],
shift = shift,
shifted = shifted,
k = k,
mtp = mtp,
outcome_type = ifelse(t == 1, "binomial", "survival"),
id = id,
learners_outcome = learners_outcome,
learners_trt = learners_trt,
folds = folds,
weights = weights,
control = control
)

estimates[[t]] <- future::future({
if (estimator == "lmtp_tmle") do.call(lmtp_tmle, args)
else do.call(lmtp_sdr, args)
},
seed = TRUE)
cli::cli_progress_update()
}

cli::cli_progress_done()
estimates <- future::value(estimates)
estimates <- fix_surv_time1(estimates)
estimates <- isotonic_projection(estimates)

class(estimates) <- "lmtp_survival"
estimates
}

isotonic_projection <- function(x, alpha = 0.05) {
cv <- abs(qnorm(p = alpha / 2))
estim <- tidy.lmtp_survival(x)
iso_fit <- isotone::gpava(1:length(x), estim$estimate)
for (i in seq_along(x)) {
x[[i]]$theta <- iso_fit$y[i]
x[[i]]$low <- x[[i]]$theta - (qnorm(0.975) * x[[i]]$standard_error)
x[[i]]$high <- x[[i]]$theta + (qnorm(0.975) * x[[i]]$standard_error)
}
x
}
5 changes: 5 additions & 0 deletions R/print.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,8 @@ print.lmtp_contrast <- function(x, ...) {
x$vals$p.value <- format.pval(x$vals$p.value, digits = 3, eps = 0.001)
print(format(x$vals, digits = 3))
}

#' @export
print.lmtp_survival <- function(x, ...) {
print(as.data.frame(tidy.lmtp_survival(x)))
}
14 changes: 14 additions & 0 deletions R/tidy.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,17 @@ tidy.lmtp <- function(x, ...) {
class(out) <- c("tbl_df", "tbl", "data.frame")
out
}

#' Tidy a(n) lmtp_survival object
#'
#' @param x A `lmtp_survival` object produced by a call to [lmtp::lmtp_survival()].
#' @param ... Unused, included for generic consistency only.
#'
#' @example inst/examples/lmtp_survival-ex.R
#'
#' @export
tidy.lmtp_survival <- function(x, ...) {
out <- do.call("rbind", lapply(x, tidy))
out$t <- 1:length(x)
out[, c(ncol(out), 1:ncol(out) - 1)]
}
10 changes: 10 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -227,3 +227,13 @@ is_normalized <- function(x, tolerance = .Machine$double.eps^0.5) {
# Check if the mean is approximately 1 within the given tolerance
abs(mean(x) - 1) < tolerance
}

fix_surv_time1 <- function(x) {
x[[1]]$theta <- 1 - x[[1]]$theta
high <- x[[1]]$high
low <- x[[1]]$low
x[[1]]$high <- 1 - low
x[[1]]$low <- 1 - high
x[[1]]$eif <- 1 - x[[1]]$eif
x
}
14 changes: 14 additions & 0 deletions inst/examples/lmtp_survival-ex.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
\donttest{
# Time-to-event analysis with a binary time-invariant exposure. Interested in
# the effect of treatment being given to all observations on the cumulative
# incidence of the outcome.
A <- "trt"
Y <- paste0("Y.", 1:6)
C <- paste0("C.", 0:5)
W <- c("W1", "W2")

curve <- lmtp_survival(sim_point_surv, A, Y, W, cens = C, folds = 1,
shift = static_binary_on, estimator = "lmtp_tmle")

tidy(curve)
}
118 changes: 118 additions & 0 deletions man/lmtp_survival.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 32 additions & 0 deletions man/tidy.lmtp_survival.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit c539f2f

Please sign in to comment.