From 89017287bc1e6a488694bc798228af3f42b1bb4b Mon Sep 17 00:00:00 2001 From: nt-williams Date: Fri, 2 Jun 2023 11:31:34 -0600 Subject: [PATCH 01/16] Adding mlr3superlearner to dependencies --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 58ebf48..1544a77 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,7 +36,7 @@ Imports: progressr, data.table (>= 1.13.0), checkmate (>= 2.1.0), - SuperLearner + mlr3superlearner URL: https://github.com/nt-williams/lmtp BugReports: https://github.com/nt-williams/lmtp/issues Suggests: From e622eac0eba874784d7938061bde36144d8dd150 Mon Sep 17 00:00:00 2001 From: nt-williams Date: Fri, 2 Jun 2023 12:30:58 -0600 Subject: [PATCH 02/16] Estimators working with new ensemble --- R/density_ratios.R | 16 ++++++-------- R/gcomp.R | 18 +++++++--------- R/sdr.R | 53 ++++++++++++++++++---------------------------- R/sl.R | 25 +++++++++++----------- R/tmle.R | 22 +++++++++---------- R/utils.R | 3 +++ 6 files changed, 61 insertions(+), 76 deletions(-) diff --git a/R/density_ratios.R b/R/density_ratios.R index 2abbfb9..8b62bc1 100644 --- a/R/density_ratios.R +++ b/R/density_ratios.R @@ -37,14 +37,12 @@ estimate_r <- function(natural, shifted, trt, cens, risk, tau, node_list, learne vars <- c(node_list[[t]], cens[[t]]) stacked <- stack_data(natural$train, shifted$train, trt, cens, t) - fit <- run_ensemble( - stacked[jrt & drt, ][["tmp_lmtp_stack_indicator"]], - stacked[jrt & drt, vars], - learners, - "binomial", - stacked[jrt & drt, ]$lmtp_id, - lrnr_folds - ) + fit <- run_ensemble(stacked[jrt & drt, c("lmtp_id", vars, "tmp_lmtp_stack_indicator")], + "tmp_lmtp_stack_indicator", + learners, + "binomial", + "lmtp_id", + lrnr_folds) if (full_fits) { fits[[t]] <- fit @@ -53,7 +51,7 @@ estimate_r <- function(natural, shifted, trt, cens, risk, tau, node_list, learne } pred <- matrix(-999L, nrow = nrow(natural$valid), ncol = 1) - pred[jrv & drv, ] <- bound(SL_predict(fit, natural$valid[jrv & drv, vars]), .Machine$double.eps) + pred[jrv & drv, ] <- bound(SL_predict(fit, natural$valid[jrv & drv, ]), .Machine$double.eps) ratios <- density_ratios(pred, irv, drv, frv, mtp) densratios[, t] <- ratios diff --git a/R/gcomp.R b/R/gcomp.R index 53faabe..024ab09 100644 --- a/R/gcomp.R +++ b/R/gcomp.R @@ -46,14 +46,12 @@ estimate_sub <- function(natural, shifted, outcome, node_list, cens, risk, learners <- check_variation(natural$train[i & rt, ][[outcome]], learners) - fit <- run_ensemble( - natural$train[i & rt, ][[outcome]], - natural$train[i & rt, vars], - learners, - outcome_type, - id = natural$train[i & rt, ][["lmtp_id"]], - lrnr_folds - ) + fit <- run_ensemble(natural$train[i & rt, c("lmtp_id", vars, outcome)], + outcome, + learners, + outcome_type, + "lmtp_id", + lrnr_folds) if (full_fits) { fits[[t]] <- fit @@ -61,8 +59,8 @@ estimate_sub <- function(natural, shifted, outcome, node_list, cens, risk, fits[[t]] <- extract_sl_weights(fit) } - natural$train[jt & rt, pseudo] <- bound(SL_predict(fit, shifted$train[jt & rt, vars]), 1e-05) - m[jv & rv, t] <- bound(SL_predict(fit, shifted$valid[jv & rv, vars]), 1e-05) + natural$train[jt & rt, pseudo] <- bound(SL_predict(fit, shifted$train[jt & rt, ]), 1e-05) + m[jv & rv, t] <- bound(SL_predict(fit, shifted$valid[jv & rv, ]), 1e-05) natural$train[!rt, pseudo] <- 0 m[!rv, t] <- 0 diff --git a/R/sdr.R b/R/sdr.R index 3313a7b..c71f01d 100644 --- a/R/sdr.R +++ b/R/sdr.R @@ -46,20 +46,12 @@ estimate_sdr <- function(natural, shifted, outcome, node_list, cens, risk, tau, if (t == tau) { learners <- check_variation(natural$train[i & rt, ][[outcome]], learners) - fit <- run_ensemble( - natural$train[i & rt, ][[outcome]], - natural$train[i & rt, vars], - learners, - outcome_type, - id = natural$train[i & rt, ][["lmtp_id"]], - lrnr_folds - ) - - if (full_fits) { - fits[[t]] <- fit - } else { - fits[[t]] <- extract_sl_weights(fit) - } + fit <- run_ensemble(natural$train[i & rt, c("lmtp_id", vars, outcome)], + outcome, + learners, + outcome_type, + "lmtp_id", + lrnr_folds) } if (t < tau) { @@ -68,27 +60,24 @@ estimate_sdr <- function(natural, shifted, outcome, node_list, cens, risk, tau, learners <- check_variation(natural$train[i & rt, ][[pseudo]], learners) - fit <- run_ensemble( - natural$train[i & rt, ][[pseudo]], - natural$train[i & rt, vars], - learners, - "continuous", - id = natural$train[i & rt, ][["lmtp_id"]], - lrnr_folds - ) - - if (full_fits) { - fits[[t]] <- fit - } else { - fits[[t]] <- extract_sl_weights(fit) - } + fit <- run_ensemble(natural$train[i & rt, c("lmtp_id", vars, pseudo)], + pseudo, + learners, + "continuous", + "lmtp_id", + lrnr_folds) } + if (full_fits) { + fits[[t]] <- fit + } else { + fits[[t]] <- extract_sl_weights(fit) + } - m_natural_train[jt & rt, t] <- bound(SL_predict(fit, natural$train[jt & rt, vars]), 1e-05) - m_shifted_train[jt & rt, t] <- bound(SL_predict(fit, shifted$train[jt & rt, vars]), 1e-05) - m_natural_valid[jv & rv, t] <- bound(SL_predict(fit, natural$valid[jv & rv, vars]), 1e-05) - m_shifted_valid[jv & rv, t] <- bound(SL_predict(fit, shifted$valid[jv & rv, vars]), 1e-05) + m_natural_train[jt & rt, t] <- bound(SL_predict(fit, natural$train[jt & rt, ]), 1e-05) + m_shifted_train[jt & rt, t] <- bound(SL_predict(fit, shifted$train[jt & rt, ]), 1e-05) + m_natural_valid[jv & rv, t] <- bound(SL_predict(fit, natural$valid[jv & rv, ]), 1e-05) + m_shifted_valid[jv & rv, t] <- bound(SL_predict(fit, shifted$valid[jv & rv, ]), 1e-05) m_natural_train[!rt, t] <- 0 m_shifted_train[!rt, t] <- 0 diff --git a/R/sl.R b/R/sl.R index d297479..430d142 100644 --- a/R/sl.R +++ b/R/sl.R @@ -5,20 +5,19 @@ check_variation <- function(outcome, learners) { learners } -#' @importFrom nnls nnls -run_ensemble <- function(Y, X, learners, outcome_type, id, folds) { - family <- ifelse(outcome_type == "binomial", binomial(), gaussian()) - cv_control <- SuperLearner::SuperLearner.CV.control(V = folds) - fit <- SuperLearner::SuperLearner( - Y, X, family = family[[1]], SL.library = learners, - id = id, method = "method.NNLS", - env = environment(SuperLearner::SuperLearner), - cvControl = cv_control - ) +run_ensemble <- function(data, y, learners, outcome_type, id, folds) { + fit <- mlr3superlearner(data = data, + target = y, + library = learners, + outcome_type = outcome_type, + folds = folds, + group = id) - if (!sum(fit$coef != 0)) { + if (all(fit$weights$coef == 0)) { warning("SuperLearner fit failed. Trying main-effects GLM.", call. = FALSE) - fit <- glm(lmtp_tmp_outcome_vector ~ ., data = cbind(lmtp_tmp_outcome_vector = Y, X), family = family[[1]]) + tmp <- data[, setdiff(names(data), c(y, id))] + tmp$lmtp_tmp_outcome_vector <- data[[y]] + fit <- glm(lmtp_tmp_outcome_vector ~ ., data = tmp, family = ifelse(outcome_type == "continuous", "gaussian", "binomial")) } fit } @@ -27,5 +26,5 @@ SL_predict <- function(fit, newdata) { if (inherits(fit, "glm")) { return(as.vector(predict(fit, newdata, type = "response"))) } - predict(fit, newdata)$pred[, 1] + predict(fit, newdata)[, 1] } diff --git a/R/tmle.R b/R/tmle.R index 8493d7d..d410b5a 100644 --- a/R/tmle.R +++ b/R/tmle.R @@ -53,14 +53,12 @@ estimate_tmle <- function(natural, shifted, outcome, node_list, cens, risk, tau, learners <- check_variation(natural$train[i & rt, ][[outcome]], learners) - fit <- run_ensemble( - natural$train[i & rt, ][[outcome]], - natural$train[i & rt, vars], - learners, - outcome_type, - id = natural$train[i & rt,][["lmtp_id"]], - lrnr_folds - ) + fit <- run_ensemble(natural$train[i & rt, c("lmtp_id", vars, outcome)], + outcome, + learners, + outcome_type, + "lmtp_id", + lrnr_folds) if (full_fits) { fits[[t]] <- fit @@ -68,10 +66,10 @@ estimate_tmle <- function(natural, shifted, outcome, node_list, cens, risk, tau, fits[[t]] <- extract_sl_weights(fit) } - m_natural_train[jt & rt, t] <- bound(SL_predict(fit, natural$train[jt & rt, vars]), 1e-05) - m_shifted_train[jt & rt, t] <- bound(SL_predict(fit, shifted$train[jt & rt, vars]), 1e-05) - m_natural_valid[jv & rv, t] <- bound(SL_predict(fit, natural$valid[jv & rv, vars]), 1e-05) - m_shifted_valid[jv & rv, t] <- bound(SL_predict(fit, shifted$valid[jv & rv, vars]), 1e-05) + m_natural_train[jt & rt, t] <- bound(SL_predict(fit, natural$train[jt & rt, ]), 1e-05) + m_shifted_train[jt & rt, t] <- bound(SL_predict(fit, shifted$train[jt & rt, ]), 1e-05) + m_natural_valid[jv & rv, t] <- bound(SL_predict(fit, natural$valid[jv & rv, ]), 1e-05) + m_shifted_valid[jv & rv, t] <- bound(SL_predict(fit, shifted$valid[jv & rv, ]), 1e-05) wts <- { if (is.null(weights)) diff --git a/R/utils.R b/R/utils.R index e201622..fb43176 100644 --- a/R/utils.R +++ b/R/utils.R @@ -151,6 +151,9 @@ final_outcome <- function(outcomes) { } extract_sl_weights <- function(fit) { + if (inherits(fit, "mlr3superlearner")) { + return(cbind(Risk = fit$weights$cvRisk, Coef = fit$weights$coef)) + } fit$coef } From 3dd58e154ee5a3db9b42aded90291227ddde1dd8 Mon Sep 17 00:00:00 2001 From: nt-williams Date: Fri, 2 Jun 2023 13:04:22 -0600 Subject: [PATCH 03/16] Updated documentation. --- DESCRIPTION | 8 ++++---- NAMESPACE | 1 - R/estimators.R | 34 ++++++++++++--------------------- README.Rmd | 30 ++--------------------------- README.md | 42 +++++++---------------------------------- inst/examples/ipw-ex.R | 6 +++--- inst/examples/sdr-ex.R | 6 +++--- inst/examples/sub-ex.R | 6 +++--- inst/examples/tmle-ex.R | 6 +++--- man/lmtp_ipw.Rd | 12 +++++------- man/lmtp_sdr.Rd | 17 +++++++---------- man/lmtp_sub.Rd | 11 +++++------ man/lmtp_tmle.Rd | 17 +++++++---------- 13 files changed, 61 insertions(+), 135 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1544a77..fbea1e5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: lmtp Title: Non-Parametric Causal Effects of Feasible Interventions Based on Modified Treatment Policies -Version: 1.3.1 +Version: 1.4.0.9000 Authors@R: c(person(given = "Nicholas", family = "Williams", @@ -19,7 +19,8 @@ Description: Non-parametric estimators for casual effects based on longitudinal irrespective of treatment variable type. For both continuous and binary outcomes, additive treatment effects can be calculated and relative risks and odds ratios may be calculated for binary outcomes. Depends: - R (>= 2.10) + R (>= 2.10), + mlr3superlearner License: AGPL-3 Encoding: UTF-8 LazyData: true @@ -35,8 +36,7 @@ Imports: future (>= 1.17.0), progressr, data.table (>= 1.13.0), - checkmate (>= 2.1.0), - mlr3superlearner + checkmate (>= 2.1.0) URL: https://github.com/nt-williams/lmtp BugReports: https://github.com/nt-williams/lmtp/issues Suggests: diff --git a/NAMESPACE b/NAMESPACE index ca2eef5..93b3b47 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,7 +18,6 @@ importFrom(data.table,.SD) importFrom(data.table,`:=`) importFrom(data.table,as.data.table) importFrom(generics,tidy) -importFrom(nnls,nnls) importFrom(stats,as.formula) importFrom(stats,binomial) importFrom(stats,coef) diff --git a/R/estimators.R b/R/estimators.R index 0651dd4..bd44e6c 100644 --- a/R/estimators.R +++ b/R/estimators.R @@ -46,11 +46,8 @@ #' An optional, ordered vector of the bounds for a continuous outcomes. If \code{NULL}, #' the bounds will be taken as the minimum and maximum of the observed data. #' Should be left as \code{NULL} if the outcome type is binary. -#' @param learners_outcome \[\code{character}\]\cr A vector of \code{SuperLearner} algorithms for estimation -#' of the outcome regression. Default is \code{"SL.glm"}, a main effects GLM. -#' @param learners_trt \[\code{character}\]\cr A vector of \code{SuperLearner} algorithms for estimation -#' of the exposure mechanism. Default is \code{"SL.glm"}, a main effects GLM. -#' \bold{Only include candidate learners capable of binary classification}. +#' @param learners_outcome \[\code{character}\]\cr +#' @param learners_trt \[\code{character}\]\cr \bold{Only include candidate learners capable of binary classification}. #' @param folds \[\code{integer(1)}\]\cr #' The number of folds to be used for cross-fitting. #' @param weights \[\code{numeric(nrow(data))}\]\cr @@ -104,8 +101,8 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, mtp = FALSE, outcome_type = c("binomial", "continuous", "survival"), # intervention_type = c("static", "dynamic", "mtp"), id = NULL, bounds = NULL, - learners_outcome = "SL.glm", - learners_trt = "SL.glm", + learners_outcome = "glm", + learners_trt = "glm", folds = 10, weights = NULL, .bound = 1e-5, .trim = 0.999, .learners_outcome_folds = 10, .learners_trt_folds = 10, .return_full_fits = FALSE, ...) { @@ -236,11 +233,8 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, #' An optional, ordered vector of the bounds for a continuous outcomes. If \code{NULL}, #' the bounds will be taken as the minimum and maximum of the observed data. #' Should be left as \code{NULL} if the outcome type is binary. -#' @param learners_outcome \[\code{character}\]\cr A vector of \code{SuperLearner} algorithms for estimation -#' of the outcome regression. Default is \code{"SL.glm"}, a main effects GLM. -#' @param learners_trt \[\code{character}\]\cr A vector of \code{SuperLearner} algorithms for estimation -#' of the exposure mechanism. Default is \code{"SL.glm"}, a main effects GLM. -#' \bold{Only include candidate learners capable of binary classification}. +#' @param learners_outcome \[\code{character}\]\cr +#' @param learners_trt \[\code{character}\]\cr \bold{Only include candidate learners capable of binary classification}. #' @param folds \[\code{integer(1)}\]\cr #' The number of folds to be used for cross-fitting. #' @param weights \[\code{numeric(nrow(data))}\]\cr @@ -295,8 +289,8 @@ lmtp_sdr <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, # intervention_type = c("static", "dynamic", "mtp"), outcome_type = c("binomial", "continuous", "survival"), id = NULL, bounds = NULL, - learners_outcome = "SL.glm", - learners_trt = "SL.glm", + learners_outcome = "glm", + learners_trt = "glm", folds = 10, weights = NULL, .bound = 1e-5, .trim = 0.999, .learners_outcome_folds = 10, .learners_trt_folds = 10, .return_full_fits = FALSE, ...) { @@ -424,8 +418,7 @@ lmtp_sdr <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, #' An optional, ordered vector of the bounds for a continuous outcomes. If \code{NULL}, #' the bounds will be taken as the minimum and maximum of the observed data. #' Should be left as \code{NULL} if the outcome type is binary. -#' @param learners \[\code{character}\]\cr A vector of \code{SuperLearner} algorithms for estimation -#' of the outcome regression. Default is \code{"SL.glm"}, a main effects GLM. +#' @param learners \[\code{character}\]\cr #' @param folds \[\code{integer(1)}\]\cr #' The number of folds to be used for cross-fitting. #' @param weights \[\code{numeric(nrow(data))}\]\cr @@ -457,7 +450,7 @@ lmtp_sdr <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, lmtp_sub <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens = NULL, shift = NULL, shifted = NULL, k = Inf, outcome_type = c("binomial", "continuous", "survival"), - id = NULL, bounds = NULL, learners = "SL.glm", + id = NULL, bounds = NULL, learners = "glm", folds = 10, weights = NULL, .bound = 1e-5, .learners_folds = 10, .return_full_fits = FALSE) { @@ -567,9 +560,7 @@ lmtp_sub <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens #' Outcome variable type (i.e., continuous, binomial, survival). #' @param id \[\code{character(1)}\]\cr #' An optional column name containing cluster level identifiers. -#' @param learners \[\code{character}\]\cr A vector of \code{SuperLearner} algorithms for estimation -#' of the exposure mechanism. Default is \code{"SL.glm"}, a main effects GLM. -#' \bold{Only include candidate learners capable of binary classification}. +#' @param learners \[\code{character}\]\cr \bold{Only include candidate learners capable of binary classification}. #' @param folds \[\code{integer(1)}\]\cr #' The number of folds to be used for cross-fitting. #' @param weights \[\code{numeric(nrow(data))}\]\cr @@ -612,10 +603,9 @@ lmtp_sub <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens #' @example inst/examples/ipw-ex.R lmtp_ipw <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens = NULL, shift = NULL, shifted = NULL, mtp = FALSE, - # intervention_type = c("static", "dynamic", "mtp"), k = Inf, id = NULL, outcome_type = c("binomial", "continuous", "survival"), - learners = "SL.glm", + learners = "glm", folds = 10, weights = NULL, .bound = 1e-5, .trim = 0.999, .learners_folds = 10, .return_full_fits = FALSE, ...) { diff --git a/README.Rmd b/README.Rmd index 3cc9f4b..f4584dd 100644 --- a/README.Rmd +++ b/README.Rmd @@ -30,7 +30,7 @@ Nick Williams and Ivan Diaz --- -**lmtp** is an R package that provides an estimation framework for the casual effects of feasible interventions based on point-treatment and longitudinal modified treatment policies as described in Diaz, Williams, Hoffman, and Schenck (2020). Two primary estimators are supported, a targeted maximum likelihood (TML) estimator and a sequentially doubly robust (SDR) estimator (a G-computation and an inverse probability of treatment weighting estimator are provided for the sake of being thorough but their use is recommended against in favor of the TML and SDR estimators). Both binary and continuous outcomes (both with censoring) are allowed. **lmtp** is built atop the [`SuperLearner`](https://CRAN.R-project.org/package=SuperLearner) package to utilize ensemble machine learning for estimation. The treatment mechanism is estimated via a density ratio classification procedure irrespective of treatment variable type providing decreased computation time when treatment is continuous. Dynamic treatment regimes are also supported. +**lmtp** is an R package that provides an estimation framework for the casual effects of feasible interventions based on point-treatment and longitudinal modified treatment policies as described in Diaz, Williams, Hoffman, and Schenck (2020). Two primary estimators are supported, a targeted maximum likelihood (TML) estimator and a sequentially doubly robust (SDR) estimator (a G-computation and an inverse probability of treatment weighting estimator are provided for the sake of being thorough but their use is recommended against in favor of the TML and SDR estimators). Both binary and continuous outcomes (both with censoring) are allowed. **lmtp** is built atop the [`mlr3superlearner`](https://github.com/nt-williams/mlr3superlearner) package to utilize ensemble machine learning for estimation. The treatment mechanism is estimated via a density ratio classification procedure irrespective of treatment variable type providing decreased computation time when treatment is continuous. Dynamic treatment regimes are also supported. A list of papers using **lmtp** is [here](https://gist.github.com/nt-williams/15068f5849a67ff4d2cb7f2dcf97b3de). @@ -38,34 +38,8 @@ For an in-depth look at the package's functionality, please consult the accompan ## Installation -**lmtp** can be installed from CRAN with: - -```r -install.packages("lmtp") -``` - -The stable, development version can be installed from GitHub with: - -```r -devtools::install_github("nt-williams/lmtp@devel") -``` - -The **sl3** compatible version can be installed from GitHub with: - -```r -devtools::install_github("nt-williams/lmtp@sl3") -``` - -A version allowing for different covariates sets for the treatment and outcome regressions: - -```r -devtools::install_github("nt-williams/lmtp@2covarSets") -``` - -A version allowing for different covariates sets for the treatment and outcome regressions and that uses **sl3**: - ```r -devtools::install_github("nt-williams/lmtp@2covarSets-sl3") +remotes::install_github("nt-williams/lmtp@mlr3superlearner") ``` ## What even is a modified treatment policy? diff --git a/README.md b/README.md index e9f9c1b..686b4cc 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ treatment weighting estimator are provided for the sake of being thorough but their use is recommended against in favor of the TML and SDR estimators). Both binary and continuous outcomes (both with censoring) are allowed. **lmtp** is built atop the -[`SuperLearner`](https://CRAN.R-project.org/package=SuperLearner) +[`mlr3superlearner`](https://github.com/nt-williams/mlr3superlearner) package to utilize ensemble machine learning for estimation. The treatment mechanism is estimated via a density ratio classification procedure irrespective of treatment variable type providing decreased @@ -49,36 +49,8 @@ accompanying vignette. ## Installation -**lmtp** can be installed from CRAN with: - -``` r -install.packages("lmtp") -``` - -The stable, development version can be installed from GitHub with: - -``` r -devtools::install_github("nt-williams/lmtp@devel") -``` - -The **sl3** compatible version can be installed from GitHub with: - -``` r -devtools::install_github("nt-williams/lmtp@sl3") -``` - -A version allowing for different covariates sets for the treatment and -outcome regressions: - -``` r -devtools::install_github("nt-williams/lmtp@2covarSets") -``` - -A version allowing for different covariates sets for the treatment and -outcome regressions and that uses **sl3**: - ``` r -devtools::install_github("nt-williams/lmtp@2covarSets-sl3") +remotes::install_github("nt-williams/lmtp@mlr3superlearner") ``` ## What even is a modified treatment policy? @@ -197,11 +169,11 @@ causal effects for binary, categorical, and continuous exposures in both the point treatment and longitudinal setting using traditional causal effects or modified treatment policies. -- [`txshift`](https://github.com/nhejazi/txshift) -- [`tmle3`](https://github.com/tlverse/tmle3) -- [`tmle3shift`](https://github.com/tlverse/tmle3shift) -- [`ltmle`](https://CRAN.R-project.org/package=ltmle) -- [`tmle`](https://CRAN.R-project.org/package=tmle) +- [`txshift`](https://github.com/nhejazi/txshift) +- [`tmle3`](https://github.com/tlverse/tmle3) +- [`tmle3shift`](https://github.com/tlverse/tmle3shift) +- [`ltmle`](https://CRAN.R-project.org/package=ltmle) +- [`tmle`](https://CRAN.R-project.org/package=tmle) ## Citation diff --git a/inst/examples/ipw-ex.R b/inst/examples/ipw-ex.R index cade356..a8701db 100644 --- a/inst/examples/ipw-ex.R +++ b/inst/examples/ipw-ex.R @@ -74,12 +74,12 @@ # Example 2.4 # Using the same data as examples 2.1, 2.2, and 2.3, but now treating the exposure - # as an ordered categorical variable. To account for the exposure being a + # as a categorical variable. To account for the exposure being a # factor we just need to modify the shift function (and the original data) # so as to respect this. tmp <- sim_t4 for (i in A) { - tmp[[i]] <- factor(tmp[[i]], levels = 0:5, ordered = TRUE) + tmp[[i]] <- factor(tmp[[i]], levels = 0:5) } policy <- function(data, trt) { @@ -92,7 +92,7 @@ out[[i]] <- as.numeric(as.character(a[i])) - 1 } } - factor(unlist(out), levels = 0:5, ordered = TRUE) + factor(unlist(out), levels = 0:5) } lmtp_ipw(tmp, A, "Y", time_vary = L, shift = policy, diff --git a/inst/examples/sdr-ex.R b/inst/examples/sdr-ex.R index caaae42..a58d2bb 100644 --- a/inst/examples/sdr-ex.R +++ b/inst/examples/sdr-ex.R @@ -74,12 +74,12 @@ # Example 2.4 # Using the same data as examples 2.1, 2.2, and 2.3, but now treating the exposure - # as an ordered categorical variable. To account for the exposure being a + # as a categorical variable. To account for the exposure being a # factor we just need to modify the shift function (and the original data) # so as to respect this. tmp <- sim_t4 for (i in A) { - tmp[[i]] <- factor(tmp[[i]], levels = 0:5, ordered = TRUE) + tmp[[i]] <- factor(tmp[[i]], levels = 0:5) } policy <- function(data, trt) { @@ -92,7 +92,7 @@ out[[i]] <- as.numeric(as.character(a[i])) - 1 } } - factor(unlist(out), levels = 0:5, ordered = TRUE) + factor(unlist(out), levels = 0:5) } lmtp_sdr(tmp, A, "Y", time_vary = L, shift = policy, diff --git a/inst/examples/sub-ex.R b/inst/examples/sub-ex.R index ca5f689..cb9fd29 100644 --- a/inst/examples/sub-ex.R +++ b/inst/examples/sub-ex.R @@ -72,12 +72,12 @@ # Example 2.4 # Using the same data as examples 2.1, 2.2, and 2.3, but now treating the exposure - # as an ordered categorical variable. To account for the exposure being a + # as a categorical variable. To account for the exposure being a # factor we just need to modify the shift function (and the original data) # so as to respect this. tmp <- sim_t4 for (i in A) { - tmp[[i]] <- factor(tmp[[i]], levels = 0:5, ordered = TRUE) + tmp[[i]] <- factor(tmp[[i]], levels = 0:5) } policy <- function(data, trt) { @@ -90,7 +90,7 @@ out[[i]] <- as.numeric(as.character(a[i])) - 1 } } - factor(unlist(out), levels = 0:5, ordered = TRUE) + factor(unlist(out), levels = 0:5) } lmtp_sub(tmp, A, "Y", time_vary = L, shift = policy, k = 0, folds = 2) diff --git a/inst/examples/tmle-ex.R b/inst/examples/tmle-ex.R index 25d353f..c8e9cb9 100644 --- a/inst/examples/tmle-ex.R +++ b/inst/examples/tmle-ex.R @@ -73,12 +73,12 @@ # Example 2.4 # Using the same data as examples 2.1, 2.2, and 2.3, but now treating the exposure - # as an ordered categorical variable. To account for the exposure being a + # as a categorical variable. To account for the exposure being a # factor we just need to modify the shift function (and the original data) # so as to respect this. tmp <- sim_t4 for (i in A) { - tmp[[i]] <- factor(tmp[[i]], levels = 0:5, ordered = TRUE) + tmp[[i]] <- factor(tmp[[i]], levels = 0:5) } policy <- function(data, trt) { @@ -91,7 +91,7 @@ out[[i]] <- as.numeric(as.character(a[i])) - 1 } } - factor(unlist(out), levels = 0:5, ordered = TRUE) + factor(unlist(out), levels = 0:5) } lmtp_tmle(tmp, A, "Y", time_vary = L, shift = policy, diff --git a/man/lmtp_ipw.Rd b/man/lmtp_ipw.Rd index 6d9156a..1ba54ba 100644 --- a/man/lmtp_ipw.Rd +++ b/man/lmtp_ipw.Rd @@ -17,7 +17,7 @@ lmtp_ipw( k = Inf, id = NULL, outcome_type = c("binomial", "continuous", "survival"), - learners = "SL.glm", + learners = "glm", folds = 10, weights = NULL, .bound = 1e-05, @@ -78,9 +78,7 @@ An optional column name containing cluster level identifiers.} \item{outcome_type}{[\code{character(1)}]\cr Outcome variable type (i.e., continuous, binomial, survival).} -\item{learners}{[\code{character}]\cr A vector of \code{SuperLearner} algorithms for estimation -of the exposure mechanism. Default is \code{"SL.glm"}, a main effects GLM. -\bold{Only include candidate learners capable of binary classification}.} +\item{learners}{[\code{character}]\cr \bold{Only include candidate learners capable of binary classification}.} \item{folds}{[\code{integer(1)}]\cr The number of folds to be used for cross-fitting.} @@ -211,12 +209,12 @@ by some amount, use \code{mtp = TRUE}}. # Example 2.4 # Using the same data as examples 2.1, 2.2, and 2.3, but now treating the exposure - # as an ordered categorical variable. To account for the exposure being a + # as a categorical variable. To account for the exposure being a # factor we just need to modify the shift function (and the original data) # so as to respect this. tmp <- sim_t4 for (i in A) { - tmp[[i]] <- factor(tmp[[i]], levels = 0:5, ordered = TRUE) + tmp[[i]] <- factor(tmp[[i]], levels = 0:5) } policy <- function(data, trt) { @@ -229,7 +227,7 @@ by some amount, use \code{mtp = TRUE}}. out[[i]] <- as.numeric(as.character(a[i])) - 1 } } - factor(unlist(out), levels = 0:5, ordered = TRUE) + factor(unlist(out), levels = 0:5) } lmtp_ipw(tmp, A, "Y", time_vary = L, shift = policy, diff --git a/man/lmtp_sdr.Rd b/man/lmtp_sdr.Rd index 3bf9217..3f64448 100644 --- a/man/lmtp_sdr.Rd +++ b/man/lmtp_sdr.Rd @@ -18,8 +18,8 @@ lmtp_sdr( outcome_type = c("binomial", "continuous", "survival"), id = NULL, bounds = NULL, - learners_outcome = "SL.glm", - learners_trt = "SL.glm", + learners_outcome = "glm", + learners_trt = "glm", folds = 10, weights = NULL, .bound = 1e-05, @@ -86,12 +86,9 @@ An optional, ordered vector of the bounds for a continuous outcomes. If \code{NU the bounds will be taken as the minimum and maximum of the observed data. Should be left as \code{NULL} if the outcome type is binary.} -\item{learners_outcome}{[\code{character}]\cr A vector of \code{SuperLearner} algorithms for estimation -of the outcome regression. Default is \code{"SL.glm"}, a main effects GLM.} +\item{learners_outcome}{[\code{character}]\cr} -\item{learners_trt}{[\code{character}]\cr A vector of \code{SuperLearner} algorithms for estimation -of the exposure mechanism. Default is \code{"SL.glm"}, a main effects GLM. -\bold{Only include candidate learners capable of binary classification}.} +\item{learners_trt}{[\code{character}]\cr \bold{Only include candidate learners capable of binary classification}.} \item{folds}{[\code{integer(1)}]\cr The number of folds to be used for cross-fitting.} @@ -231,12 +228,12 @@ by some amount, use \code{mtp = TRUE}}. # Example 2.4 # Using the same data as examples 2.1, 2.2, and 2.3, but now treating the exposure - # as an ordered categorical variable. To account for the exposure being a + # as a categorical variable. To account for the exposure being a # factor we just need to modify the shift function (and the original data) # so as to respect this. tmp <- sim_t4 for (i in A) { - tmp[[i]] <- factor(tmp[[i]], levels = 0:5, ordered = TRUE) + tmp[[i]] <- factor(tmp[[i]], levels = 0:5) } policy <- function(data, trt) { @@ -249,7 +246,7 @@ by some amount, use \code{mtp = TRUE}}. out[[i]] <- as.numeric(as.character(a[i])) - 1 } } - factor(unlist(out), levels = 0:5, ordered = TRUE) + factor(unlist(out), levels = 0:5) } lmtp_sdr(tmp, A, "Y", time_vary = L, shift = policy, diff --git a/man/lmtp_sub.Rd b/man/lmtp_sub.Rd index d9018b9..0e1f070 100644 --- a/man/lmtp_sub.Rd +++ b/man/lmtp_sub.Rd @@ -17,7 +17,7 @@ lmtp_sub( outcome_type = c("binomial", "continuous", "survival"), id = NULL, bounds = NULL, - learners = "SL.glm", + learners = "glm", folds = 10, weights = NULL, .bound = 1e-05, @@ -77,8 +77,7 @@ An optional, ordered vector of the bounds for a continuous outcomes. If \code{NU the bounds will be taken as the minimum and maximum of the observed data. Should be left as \code{NULL} if the outcome type is binary.} -\item{learners}{[\code{character}]\cr A vector of \code{SuperLearner} algorithms for estimation -of the outcome regression. Default is \code{"SL.glm"}, a main effects GLM.} +\item{learners}{[\code{character}]\cr} \item{folds}{[\code{integer(1)}]\cr The number of folds to be used for cross-fitting.} @@ -191,12 +190,12 @@ continuous, or time-to-event outcomes. Supports binary, categorical, and continu # Example 2.4 # Using the same data as examples 2.1, 2.2, and 2.3, but now treating the exposure - # as an ordered categorical variable. To account for the exposure being a + # as a categorical variable. To account for the exposure being a # factor we just need to modify the shift function (and the original data) # so as to respect this. tmp <- sim_t4 for (i in A) { - tmp[[i]] <- factor(tmp[[i]], levels = 0:5, ordered = TRUE) + tmp[[i]] <- factor(tmp[[i]], levels = 0:5) } policy <- function(data, trt) { @@ -209,7 +208,7 @@ continuous, or time-to-event outcomes. Supports binary, categorical, and continu out[[i]] <- as.numeric(as.character(a[i])) - 1 } } - factor(unlist(out), levels = 0:5, ordered = TRUE) + factor(unlist(out), levels = 0:5) } lmtp_sub(tmp, A, "Y", time_vary = L, shift = policy, k = 0, folds = 2) diff --git a/man/lmtp_tmle.Rd b/man/lmtp_tmle.Rd index b00e377..6e75a0a 100644 --- a/man/lmtp_tmle.Rd +++ b/man/lmtp_tmle.Rd @@ -18,8 +18,8 @@ lmtp_tmle( outcome_type = c("binomial", "continuous", "survival"), id = NULL, bounds = NULL, - learners_outcome = "SL.glm", - learners_trt = "SL.glm", + learners_outcome = "glm", + learners_trt = "glm", folds = 10, weights = NULL, .bound = 1e-05, @@ -86,12 +86,9 @@ An optional, ordered vector of the bounds for a continuous outcomes. If \code{NU the bounds will be taken as the minimum and maximum of the observed data. Should be left as \code{NULL} if the outcome type is binary.} -\item{learners_outcome}{[\code{character}]\cr A vector of \code{SuperLearner} algorithms for estimation -of the outcome regression. Default is \code{"SL.glm"}, a main effects GLM.} +\item{learners_outcome}{[\code{character}]\cr} -\item{learners_trt}{[\code{character}]\cr A vector of \code{SuperLearner} algorithms for estimation -of the exposure mechanism. Default is \code{"SL.glm"}, a main effects GLM. -\bold{Only include candidate learners capable of binary classification}.} +\item{learners_trt}{[\code{character}]\cr \bold{Only include candidate learners capable of binary classification}.} \item{folds}{[\code{integer(1)}]\cr The number of folds to be used for cross-fitting.} @@ -230,12 +227,12 @@ by some amount, use \code{mtp = TRUE}}. # Example 2.4 # Using the same data as examples 2.1, 2.2, and 2.3, but now treating the exposure - # as an ordered categorical variable. To account for the exposure being a + # as a categorical variable. To account for the exposure being a # factor we just need to modify the shift function (and the original data) # so as to respect this. tmp <- sim_t4 for (i in A) { - tmp[[i]] <- factor(tmp[[i]], levels = 0:5, ordered = TRUE) + tmp[[i]] <- factor(tmp[[i]], levels = 0:5) } policy <- function(data, trt) { @@ -248,7 +245,7 @@ by some amount, use \code{mtp = TRUE}}. out[[i]] <- as.numeric(as.character(a[i])) - 1 } } - factor(unlist(out), levels = 0:5, ordered = TRUE) + factor(unlist(out), levels = 0:5) } lmtp_tmle(tmp, A, "Y", time_vary = L, shift = policy, From 3a36d8710944553a5913bc9d9fac60f0584e9ea7 Mon Sep 17 00:00:00 2001 From: nt-williams Date: Wed, 5 Jul 2023 16:42:20 -0600 Subject: [PATCH 04/16] Creating lmtp_control function. --- R/lmtp_options.R | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 R/lmtp_options.R diff --git a/R/lmtp_options.R b/R/lmtp_options.R new file mode 100644 index 0000000..508a2f3 --- /dev/null +++ b/R/lmtp_options.R @@ -0,0 +1,18 @@ +lmtp_control <- function(...) { + change <- list(...) + control <- list(.bound = 1e-5, + .trim = 0.999, + .learners_trt_metalearner = "glm", + .learners_outcome_metalearner = "glm", + .learners_outcome_folds = 10, + .learners_trt_folds = 10, + .return_full_fits = FALSE) + + if (length(change) == 0) return(control) + + for (arg in names(change)) { + control[[arg]] <- change[[arg]] + } + + control +} From 572c89a45eab53ab3b892817e954be8b94884579 Mon Sep 17 00:00:00 2001 From: nt-williams Date: Wed, 5 Jul 2023 21:39:59 -0600 Subject: [PATCH 05/16] Refactoring and adding lmtp_control --- NAMESPACE | 1 + R/density_ratios.R | 36 +++--- R/estimators.R | 261 ++++++++++++++++++-------------------------- R/gcomp.R | 30 ++--- R/lmtp_Task.R | 2 +- R/lmtp_options.R | 23 +++- R/sdr.R | 69 +++++++----- R/sl.R | 17 +-- R/tmle.R | 60 +++++----- R/utils.R | 2 +- man/lmtp_control.Rd | 36 ++++++ man/lmtp_ipw.Rd | 21 +--- man/lmtp_sdr.Rd | 25 +---- man/lmtp_sub.Rd | 15 +-- man/lmtp_tmle.Rd | 25 +---- 15 files changed, 290 insertions(+), 333 deletions(-) create mode 100644 man/lmtp_control.Rd diff --git a/NAMESPACE b/NAMESPACE index 93b3b47..d02c2d2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ S3method(tidy,lmtp) export(create_node_list) export(event_locf) export(lmtp_contrast) +export(lmtp_control) export(lmtp_ipw) export(lmtp_sdr) export(lmtp_sub) diff --git a/R/density_ratios.R b/R/density_ratios.R index 8b62bc1..7492833 100644 --- a/R/density_ratios.R +++ b/R/density_ratios.R @@ -1,25 +1,26 @@ -cf_r <- function(Task, learners, mtp, lrnr_folds, trim, full_fits, pb) { - fopts <- options("lmtp.bound", "lmtp.trt.length") +cf_r <- function(task, learners, mtp, control, progress_bar) { out <- list() - - for (fold in seq_along(Task$folds)) { + for (fold in seq_along(task$folds)) { out[[fold]] <- future::future({ - options(fopts) - - estimate_r( - get_folded_data(Task$natural, Task$folds, fold), - get_folded_data(Task$shifted, Task$folds, fold), - Task$trt, Task$cens, Task$risk, Task$tau, Task$node_list$trt, - learners, pb, mtp, lrnr_folds, full_fits - ) + estimate_r(get_folded_data(task$natural, task$folds, fold), + get_folded_data(task$shifted, task$folds, fold), + task$trt, + task$cens, + task$risk, + task$tau, + task$node_list$trt, + learners, + mtp, + control, + progress_bar) }, seed = TRUE) } - trim_ratios(recombine_ratios(future::value(out), Task$folds), trim) + trim_ratios(recombine_ratios(future::value(out), task$folds), control$.trim) } -estimate_r <- function(natural, shifted, trt, cens, risk, tau, node_list, learners, pb, mtp, lrnr_folds, full_fits) { +estimate_r <- function(natural, shifted, trt, cens, risk, tau, node_list, learners, mtp, control, progress_bar) { densratios <- matrix(nrow = nrow(natural$valid), ncol = tau) fits <- list() @@ -42,9 +43,10 @@ estimate_r <- function(natural, shifted, trt, cens, risk, tau, node_list, learne learners, "binomial", "lmtp_id", - lrnr_folds) + control$.learners_trt_metalearner, + control$.learners_trt_folds) - if (full_fits) { + if (control$.return_full_fits) { fits[[t]] <- fit } else { fits[[t]] <- extract_sl_weights(fit) @@ -56,7 +58,7 @@ estimate_r <- function(natural, shifted, trt, cens, risk, tau, node_list, learne ratios <- density_ratios(pred, irv, drv, frv, mtp) densratios[, t] <- ratios - pb() + progress_bar() } list(ratios = densratios, fits = fits) diff --git a/R/estimators.R b/R/estimators.R index bd44e6c..2da7087 100644 --- a/R/estimators.R +++ b/R/estimators.R @@ -52,19 +52,8 @@ #' The number of folds to be used for cross-fitting. #' @param weights \[\code{numeric(nrow(data))}\]\cr #' An optional vector containing sampling weights. -#' @param .bound \[\code{numeric(1)}\]\cr -#' Determines that maximum and minimum values (scaled) predictions -#' will be bounded by. The default is 1e-5, bounding predictions by 1e-5 and 0.9999. -#' @param .trim \[\code{numeric(1)}\]\cr -#' Determines the amount the density ratios should be trimmed. -#' The default is 0.999, trimming the density ratios greater than the 0.999 percentile -#' to the 0.999 percentile. A value of 1 indicates no trimming. -#' @param .learners_outcome_folds \[\code{integer(1)}\]\cr -#' The number of cross-validation folds for \code{learners_outcome}. -#' @param .learners_trt_folds \[\code{integer(1)}\]\cr -#' The number of cross-validation folds for \code{learners_trt}. -#' @param .return_full_fits \[\code{logical(1)}\]\cr -#' Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights. +#' @param control \[\code{list()}\]\cr +#' Output of \code{lmtp_control()}. #' @param ... Extra arguments. Exists for backwards compatibility. #' #' @details @@ -103,9 +92,8 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, id = NULL, bounds = NULL, learners_outcome = "glm", learners_trt = "glm", - folds = 10, weights = NULL, .bound = 1e-5, .trim = 0.999, - .learners_outcome_folds = 10, .learners_trt_folds = 10, - .return_full_fits = FALSE, ...) { + folds = 10, weights = NULL, + control = lmtp_control(), ...) { assertNotDataTable(data) checkmate::assertCharacter(outcome, len = if (match.arg(outcome_type) != "survival") 1, @@ -128,12 +116,11 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, checkmate::assertNumeric(weights, len = nrow(data), finite = TRUE, any.missing = FALSE, null.ok = TRUE) checkmate::assertNumber(k, lower = 0, upper = Inf) checkmate::assertNumber(folds, lower = 1, upper = nrow(data) - 1) - checkmate::assertNumber(.learners_outcome_folds, null.ok = TRUE) - checkmate::assertNumber(.learners_trt_folds, null.ok = TRUE) checkmate::assertSubset(c(trt, outcome, baseline, unlist(time_vary), cens, id), names(data)) - checkmate::assertNumber(.bound) - checkmate::assertNumber(.trim, upper = 1) - checkmate::assertLogical(.return_full_fits, len = 1) + checkmate::assertNumber(control$.learners_outcome_folds, null.ok = TRUE) + checkmate::assertNumber(control$.learners_trt_folds, null.ok = TRUE) + checkmate::assertNumber(control$.trim, upper = 1) + checkmate::assertLogical(control$.return_full_fits, len = 1) extras <- list(...) if ("intervention_type" %in% names(extras)) { @@ -142,28 +129,26 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, call. = FALSE) } - Task <- lmtp_Task$new( - data = data, - trt = trt, - outcome = outcome, - time_vary = time_vary, - baseline = baseline, - cens = cens, - k = k, - shift = shift, - shifted = shifted, - id = id, - outcome_type = match.arg(outcome_type), - V = folds, - weights = weights, - bounds = bounds, - bound = .bound - ) - - pb <- progressr::progressor(Task$tau*folds*2) - - ratios <- cf_r(Task, learners_trt, mtp, .learners_trt_folds, .trim, .return_full_fits, pb) - estims <- cf_tmle(Task, "tmp_lmtp_scaled_outcome", ratios$ratios, learners_outcome, .learners_outcome_folds, .return_full_fits, pb) + Task <- lmtp_Task$new(data = data, + trt = trt, + outcome = outcome, + time_vary = time_vary, + baseline = baseline, + cens = cens, + k = k, + shift = shift, + shifted = shifted, + id = id, + outcome_type = match.arg(outcome_type), + V = folds, + weights = weights, + bounds = bounds) + + progress_bar <- progressr::progressor(Task$tau*folds*2) + + ratios <- cf_r(Task, learners_trt, mtp, control, progress_bar) + estims <- cf_tmle(Task, "tmp_lmtp_scaled_outcome", ratios$ratios, + learners_outcome, control, progress_bar) theta_dr( list( @@ -239,19 +224,8 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, #' The number of folds to be used for cross-fitting. #' @param weights \[\code{numeric(nrow(data))}\]\cr #' An optional vector containing sampling weights. -#' @param .bound \[\code{numeric(1)}\]\cr -#' Determines that maximum and minimum values (scaled) predictions -#' will be bounded by. The default is 1e-5, bounding predictions by 1e-5 and 0.9999. -#' @param .trim \[\code{numeric(1)}\]\cr -#' Determines the amount the density ratios should be trimmed. -#' The default is 0.999, trimming the density ratios greater than the 0.999 percentile -#' to the 0.999 percentile. A value of 1 indicates no trimming. -#' @param .learners_outcome_folds \[\code{integer(1)}\]\cr -#' The number of cross-validation folds for \code{learners_outcome}. -#' @param .learners_trt_folds \[\code{integer(1)}\]\cr -#' The number of cross-validation folds for \code{learners_trt}. -#' @param .return_full_fits \[\code{logical(1)}\]\cr -#' Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights. +#' @param control \[\code{list()}\]\cr +#' Output of \code{lmtp_control()}. #' @param ... Extra arguments. Exists for backwards compatibility. #' #' @details @@ -291,9 +265,8 @@ lmtp_sdr <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, id = NULL, bounds = NULL, learners_outcome = "glm", learners_trt = "glm", - folds = 10, weights = NULL, .bound = 1e-5, .trim = 0.999, - .learners_outcome_folds = 10, .learners_trt_folds = 10, - .return_full_fits = FALSE, ...) { + folds = 10, weights = NULL, + control = lmtp_control(), ...) { assertNotDataTable(data) checkmate::assertCharacter(outcome, len = if (match.arg(outcome_type) != "survival") 1, @@ -316,30 +289,26 @@ lmtp_sdr <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, checkmate::assertNumeric(weights, len = nrow(data), finite = TRUE, any.missing = FALSE, null.ok = TRUE) checkmate::assertNumber(k, lower = 0, upper = Inf) checkmate::assertNumber(folds, lower = 1, upper = nrow(data) - 1) - checkmate::assertNumber(.learners_outcome_folds, null.ok = TRUE) - checkmate::assertNumber(.learners_trt_folds, null.ok = TRUE) checkmate::assertSubset(c(trt, outcome, baseline, unlist(time_vary), cens, id), names(data)) - checkmate::assertNumber(.bound) - checkmate::assertNumber(.trim, upper = 1) - checkmate::assertLogical(.return_full_fits, len = 1) - - Task <- lmtp_Task$new( - data = data, - trt = trt, - outcome = outcome, - time_vary = time_vary, - baseline = baseline, - cens = cens, - k = k, - shift = shift, - shifted = shifted, - id = id, - outcome_type = match.arg(outcome_type), - V = folds, - weights = weights, - bounds = bounds, - bound = .bound - ) + checkmate::assertNumber(control$.learners_outcome_folds, null.ok = TRUE) + checkmate::assertNumber(control$.learners_trt_folds, null.ok = TRUE) + checkmate::assertNumber(control$.trim, upper = 1) + checkmate::assertLogical(control$.return_full_fits, len = 1) + + Task <- lmtp_Task$new(data = data, + trt = trt, + outcome = outcome, + time_vary = time_vary, + baseline = baseline, + cens = cens, + k = k, + shift = shift, + shifted = shifted, + id = id, + outcome_type = match.arg(outcome_type), + V = folds, + weights = weights, + bounds = bounds) extras <- list(...) if ("intervention_type" %in% names(extras)) { @@ -348,10 +317,11 @@ lmtp_sdr <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, call. = FALSE) } - pb <- progressr::progressor(Task$tau*folds*2) + progress_bar <- progressr::progressor(Task$tau*folds*2) - ratios <- cf_r(Task, learners_trt, mtp, .learners_trt_folds, .trim, .return_full_fits, pb) - estims <- cf_sdr(Task, "tmp_lmtp_scaled_outcome", ratios$ratios, learners_outcome, .learners_outcome_folds, .return_full_fits, pb) + ratios <- cf_r(Task, learners_trt, mtp, control, progress_bar) + estims <- cf_sdr(Task, "tmp_lmtp_scaled_outcome", ratios$ratios, + learners_outcome, control, progress_bar) theta_dr( list( @@ -423,13 +393,8 @@ lmtp_sdr <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, #' The number of folds to be used for cross-fitting. #' @param weights \[\code{numeric(nrow(data))}\]\cr #' An optional vector containing sampling weights. -#' @param .bound \[\code{numeric(1)}\]\cr -#' Determines that maximum and minimum values (scaled) predictions -#' will be bounded by. The default is 1e-5, bounding predictions by 1e-5 and 0.9999. -#' @param .learners_folds \[\code{integer(1)}\]\cr -#' The number of cross-validation folds for \code{learners}. -#' @param .return_full_fits \[\code{logical(1)}\]\cr -#' Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights. +#' @param control \[\code{list()}\]\cr +#' Output of \code{lmtp_control()}. #' #' @return A list of class \code{lmtp} containing the following components: #' @@ -451,8 +416,8 @@ lmtp_sub <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens shift = NULL, shifted = NULL, k = Inf, outcome_type = c("binomial", "continuous", "survival"), id = NULL, bounds = NULL, learners = "glm", - folds = 10, weights = NULL, .bound = 1e-5, .learners_folds = 10, - .return_full_fits = FALSE) { + folds = 10, weights = NULL, + control = lmtp_control()) { assertNotDataTable(data) checkmate::assertCharacter(outcome, len = if (match.arg(outcome_type) != "survival") 1, @@ -475,32 +440,28 @@ lmtp_sub <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens checkmate::assertNumeric(weights, len = nrow(data), finite = TRUE, any.missing = FALSE, null.ok = TRUE) checkmate::assertNumber(k, lower = 0, upper = Inf) checkmate::assertNumber(folds, lower = 1, upper = nrow(data) - 1) - checkmate::assertNumber(.learners_folds, null.ok = TRUE) checkmate::assertSubset(c(trt, outcome, baseline, unlist(time_vary), cens, id), names(data)) - checkmate::assertNumber(.bound) - checkmate::assertLogical(.return_full_fits, len = 1) - - Task <- lmtp_Task$new( - data = data, - trt = trt, - outcome = outcome, - time_vary = time_vary, - baseline = baseline, - cens = cens, - k = k, - shift = shift, - shifted = shifted, - id = id, - outcome_type = match.arg(outcome_type), - V = folds, - weights = weights, - bounds = bounds, - bound = .bound - ) - - pb <- progressr::progressor(Task$tau*folds) - - estims <- cf_sub(Task, "tmp_lmtp_scaled_outcome", learners, .learners_folds, .return_full_fits, pb) + checkmate::assertNumber(control$.learners_trt_folds, null.ok = TRUE) + checkmate::assertLogical(control$.return_full_fits, len = 1) + + Task <- lmtp_Task$new(data = data, + trt = trt, + outcome = outcome, + time_vary = time_vary, + baseline = baseline, + cens = cens, + k = k, + shift = shift, + shifted = shifted, + id = id, + outcome_type = match.arg(outcome_type), + V = folds, + weights = weights, + bounds = bounds) + + progress_bar <- progressr::progressor(Task$tau*folds) + + estims <- cf_sub(Task, "tmp_lmtp_scaled_outcome", learners, control, progress_bar) theta_sub( eta = list( @@ -565,17 +526,8 @@ lmtp_sub <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens #' The number of folds to be used for cross-fitting. #' @param weights \[\code{numeric(nrow(data))}\]\cr #' An optional vector containing sampling weights. -#' @param .bound \[\code{numeric(1)}\]\cr -#' Determines that maximum and minimum values (scaled) predictions -#' will be bounded by. The default is 1e-5, bounding predictions by 1e-5 and 0.9999. -#' @param .trim \[\code{numeric(1)}\]\cr -#' Determines the amount the density ratios should be trimmed. -#' The default is 0.999, trimming the density ratios greater than the 0.999 percentile -#' to the 0.999 percentile. A value of 1 indicates no trimming. -#' @param .learners_folds \[\code{integer(1)}\]\cr -#' The number of cross-validation folds for \code{learners}. -#' @param .return_full_fits \[\code{logical(1)}\]\cr -#' Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights. +#' @param control \[\code{list()}\]\cr +#' Output of \code{lmtp_control()}. #' @param ... Extra arguments. Exists for backwards compatibility. #' #' @details @@ -607,8 +559,7 @@ lmtp_ipw <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens outcome_type = c("binomial", "continuous", "survival"), learners = "glm", folds = 10, weights = NULL, - .bound = 1e-5, .trim = 0.999, .learners_folds = 10, - .return_full_fits = FALSE, ...) { + control = lmtp_control(), ...) { assertNotDataTable(data) checkmate::assertCharacter(outcome, len = if (match.arg(outcome_type) != "survival") 1, @@ -630,31 +581,27 @@ lmtp_ipw <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens checkmate::assertNumeric(weights, len = nrow(data), finite = TRUE, any.missing = FALSE, null.ok = TRUE) checkmate::assertNumber(k, lower = 0, upper = Inf) checkmate::assertNumber(folds, lower = 1, upper = nrow(data) - 1) - checkmate::assertNumber(.learners_folds, null.ok = TRUE) checkmate::assertSubset(c(trt, outcome, baseline, unlist(time_vary), cens, id), names(data)) - checkmate::assertNumber(.bound) - checkmate::assertNumber(.trim, upper = 1) - checkmate::assertLogical(.return_full_fits, len = 1) - - Task <- lmtp_Task$new( - data = data, - trt = trt, - outcome = outcome, - time_vary = time_vary, - baseline = baseline, - cens = cens, - k = k, - shift = shift, - shifted = shifted, - id = id, - outcome_type = match.arg(outcome_type), - V = folds, - weights = weights, - bounds = NULL, - bound = .bound - ) - - pb <- progressr::progressor(Task$tau*folds) + checkmate::assertNumber(control$.learners_outcome_folds, null.ok = TRUE) + checkmate::assertNumber(control$.trim, upper = 1) + checkmate::assertLogical(control$.return_full_fits, len = 1) + + Task <- lmtp_Task$new(data = data, + trt = trt, + outcome = outcome, + time_vary = time_vary, + baseline = baseline, + cens = cens, + k = k, + shift = shift, + shifted = shifted, + id = id, + outcome_type = match.arg(outcome_type), + V = folds, + weights = weights, + bounds = NULL) + + progress_bar <- progressr::progressor(Task$tau*folds) extras <- list(...) if ("intervention_type" %in% names(extras)) { @@ -663,7 +610,7 @@ lmtp_ipw <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens call. = FALSE) } - ratios <- cf_r(Task, learners, mtp, .learners_folds, .trim, .return_full_fits, pb) + ratios <- cf_r(Task, learners, mtp, control, progress_bar) theta_ipw( eta = list( diff --git a/R/gcomp.R b/R/gcomp.R index 024ab09..719803a 100644 --- a/R/gcomp.R +++ b/R/gcomp.R @@ -1,16 +1,19 @@ -cf_sub <- function(Task, outcome, learners, lrnr_folds, full_fits, pb) { +cf_sub <- function(Task, outcome, learners, control, progress_bar) { out <- list() for (fold in seq_along(Task$folds)) { out[[fold]] <- future::future({ - estimate_sub( - get_folded_data(Task$natural, Task$folds, fold), - get_folded_data(Task$shifted, Task$folds, fold), - outcome, - Task$node_list$outcome, Task$cens, - Task$risk, Task$tau, Task$outcome_type, - learners, lrnr_folds, pb, full_fits - ) + estimate_sub(get_folded_data(Task$natural, Task$folds, fold), + get_folded_data(Task$shifted, Task$folds, fold), + outcome, + Task$node_list$outcome, + Task$cens, + Task$risk, + Task$tau, + Task$outcome_type, + learners, + control, + progress_bar) }, seed = TRUE) } @@ -24,7 +27,7 @@ cf_sub <- function(Task, outcome, learners, lrnr_folds, full_fits, pb) { } estimate_sub <- function(natural, shifted, outcome, node_list, cens, risk, - tau, outcome_type, learners, lrnr_folds, pb, full_fits) { + tau, outcome_type, learners, control, progress_bar) { m <- matrix(nrow = nrow(natural$valid), ncol = tau) fits <- list() @@ -51,9 +54,10 @@ estimate_sub <- function(natural, shifted, outcome, node_list, cens, risk, learners, outcome_type, "lmtp_id", - lrnr_folds) + control$.learners_outcome_metalearner, + control$.learners_outcome_folds) - if (full_fits) { + if (control$.return_full_fits) { fits[[t]] <- fit } else { fits[[t]] <- extract_sl_weights(fit) @@ -65,7 +69,7 @@ estimate_sub <- function(natural, shifted, outcome, node_list, cens, risk, natural$train[!rt, pseudo] <- 0 m[!rv, t] <- 0 - pb() + progress_bar() } list(m = m, fits = fits) diff --git a/R/lmtp_Task.R b/R/lmtp_Task.R index 74795bd..edb07fe 100644 --- a/R/lmtp_Task.R +++ b/R/lmtp_Task.R @@ -16,7 +16,7 @@ lmtp_Task <- R6::R6Class( bounds = NULL, folds = NULL, weights = NULL, - initialize = function(data, trt, outcome, time_vary, baseline, cens, k, shift, shifted, id, outcome_type = NULL, V = 10, weights = NULL, bounds = NULL, bound = NULL) { + initialize = function(data, trt, outcome, time_vary, baseline, cens, k, shift, shifted, id, outcome_type = NULL, V = 10, weights = NULL, bounds = NULL) { self$tau <- determine_tau(outcome, trt) self$n <- nrow(data) self$trt <- trt diff --git a/R/lmtp_options.R b/R/lmtp_options.R index 508a2f3..23b83fb 100644 --- a/R/lmtp_options.R +++ b/R/lmtp_options.R @@ -1,7 +1,26 @@ +#' Set LMTP Estimation Parameters +#' +#' @param .trim \[\code{numeric(1)}\]\cr +#' Determines the amount the density ratios should be trimmed. +#' The default is 0.999, trimming the density ratios greater than the 0.999 percentile +#' to the 0.999 percentile. A value of 1 indicates no trimming. +#' @param .learners_trt_metalearner \[\code{character(1)}\]\cr +#' @param .learners_outcome_metalearner \[\code{character(1)}\]\cr +#' @param .learners_outcome_folds \[\code{integer(1)}\]\cr +#' The number of cross-validation folds for \code{learners_outcome}. +#' @param .learners_trt_folds \[\code{integer(1)}\]\cr +#' The number of cross-validation folds for \code{learners_trt}. +#' @param .return_full_fits \[\code{logical(1)}\]\cr +#' Return full \code{mlr3superlearner} fits? Default is \code{FALSE}. +#' +#' @return A list of parameters controlling the estimation procedure. +#' @export +#' +#' @examples +#' lmtp_control(.trim = 0.975) lmtp_control <- function(...) { change <- list(...) - control <- list(.bound = 1e-5, - .trim = 0.999, + control <- list(.trim = 0.999, .learners_trt_metalearner = "glm", .learners_outcome_metalearner = "glm", .learners_outcome_folds = 10, diff --git a/R/sdr.R b/R/sdr.R index c71f01d..e4c478b 100644 --- a/R/sdr.R +++ b/R/sdr.R @@ -1,35 +1,39 @@ -cf_sdr <- function(Task, outcome, ratios, learners, lrnr_folds, full_fits, pb) { +cf_sdr <- function(task, outcome, ratios, learners, control, progress_bar) { out <- list() - for (fold in seq_along(Task$folds)) { + for (fold in seq_along(task$folds)) { out[[fold]] <- future::future({ - estimate_sdr( - get_folded_data(Task$natural, Task$folds, fold), - get_folded_data(Task$shifted, Task$folds, fold), - outcome, Task$node_list$outcome, - Task$cens, Task$risk, Task$tau, Task$outcome_type, - get_folded_data(ratios, Task$folds, fold)$train, - learners, lrnr_folds, pb, full_fits - ) + estimate_sdr(get_folded_data(task$natural, task$folds, fold), + get_folded_data(task$shifted, task$folds, fold), + outcome, task$node_list$outcome, + task$cens, + task$risk, + task$tau, + task$outcome_type, + get_folded_data(ratios, task$folds, fold)$train, + learners, + control, + progress_bar) }, seed = TRUE) } out <- future::value(out) - list( - natural = recombine_outcome(out, "natural", Task$folds), - shifted = recombine_outcome(out, "shifted", Task$folds), - fits = lapply(out, function(x) x[["fits"]]) - ) + list(natural = recombine_outcome(out, "natural", task$folds), + shifted = recombine_outcome(out, "shifted", task$folds), + fits = lapply(out, function(x) x[["fits"]])) } estimate_sdr <- function(natural, shifted, outcome, node_list, cens, risk, tau, - outcome_type, ratios, learners, lrnr_folds, pb, full_fits) { + outcome_type, ratios, learners, control, progress_bar) { + + m_natural_train <- m_shifted_train <- cbind(matrix(nrow = nrow(natural$train), + ncol = tau), + natural$train[[outcome]]) - m_natural_train <- m_shifted_train <- - cbind(matrix(nrow = nrow(natural$train), ncol = tau), natural$train[[outcome]]) - m_natural_valid <- m_shifted_valid <- - cbind(matrix(nrow = nrow(natural$valid), ncol = tau), natural$valid[[outcome]]) + m_natural_valid <- m_shifted_valid <- cbind(matrix(nrow = nrow(natural$valid), + ncol = tau), + natural$valid[[outcome]]) fits <- list() @@ -51,11 +55,17 @@ estimate_sdr <- function(natural, shifted, outcome, node_list, cens, risk, tau, learners, outcome_type, "lmtp_id", - lrnr_folds) + control$.learners_outcome_metalearner, + control$.learners_outcome_folds) } if (t < tau) { - densratio <- transform_sdr(ratio_sdr(ratios, t, tau), t, tau, m_shifted_train, m_natural_train) + densratio <- transform_sdr(ratio_sdr(ratios, t, tau), + t, + tau, + m_shifted_train, + m_natural_train) + natural$train[, pseudo] <- shifted$train[, pseudo] <- densratio learners <- check_variation(natural$train[i & rt, ][[pseudo]], learners) @@ -65,10 +75,11 @@ estimate_sdr <- function(natural, shifted, outcome, node_list, cens, risk, tau, learners, "continuous", "lmtp_id", - lrnr_folds) + control$.learners_outcome_metalearner, + control$.learners_outcome_folds) } - if (full_fits) { + if (control$.return_full_fits) { fits[[t]] <- fit } else { fits[[t]] <- extract_sl_weights(fit) @@ -84,14 +95,12 @@ estimate_sdr <- function(natural, shifted, outcome, node_list, cens, risk, tau, m_natural_valid[!rv, t] <- 0 m_shifted_valid[!rv, t] <- 0 - pb() + progress_bar() } - list( - natural = m_natural_valid, - shifted = m_shifted_valid, - fits = fits - ) + list(natural = m_natural_valid, + shifted = m_shifted_valid, + fits = fits) } ratio_sdr <- function(ratio, tau, max_tau) { diff --git a/R/sl.R b/R/sl.R index 430d142..fbdc711 100644 --- a/R/sl.R +++ b/R/sl.R @@ -5,20 +5,21 @@ check_variation <- function(outcome, learners) { learners } -run_ensemble <- function(data, y, learners, outcome_type, id, folds) { +run_ensemble <- function(data, y, learners, outcome_type, id, metalearner, folds) { fit <- mlr3superlearner(data = data, target = y, library = learners, + metalearner = metalearner, outcome_type = outcome_type, folds = folds, group = id) - if (all(fit$weights$coef == 0)) { - warning("SuperLearner fit failed. Trying main-effects GLM.", call. = FALSE) - tmp <- data[, setdiff(names(data), c(y, id))] - tmp$lmtp_tmp_outcome_vector <- data[[y]] - fit <- glm(lmtp_tmp_outcome_vector ~ ., data = tmp, family = ifelse(outcome_type == "continuous", "gaussian", "binomial")) - } + # if (all(fit$weights$coef == 0)) { + # warning("SuperLearner fit failed. Trying main-effects GLM.", call. = FALSE) + # tmp <- data[, setdiff(names(data), c(y, id))] + # tmp$lmtp_tmp_outcome_vector <- data[[y]] + # fit <- glm(lmtp_tmp_outcome_vector ~ ., data = tmp, family = ifelse(outcome_type == "continuous", "gaussian", "binomial")) + # } fit } @@ -26,5 +27,5 @@ SL_predict <- function(fit, newdata) { if (inherits(fit, "glm")) { return(as.vector(predict(fit, newdata, type = "response"))) } - predict(fit, newdata)[, 1] + predict(fit, newdata) } diff --git a/R/tmle.R b/R/tmle.R index d410b5a..4b17953 100644 --- a/R/tmle.R +++ b/R/tmle.R @@ -1,37 +1,38 @@ -cf_tmle <- function(Task, outcome, ratios, learners, lrnr_folds, full_fits, pb) { +cf_tmle <- function(task, outcome, ratios, learners, control, progress_bar) { out <- list() - ratios <- matrix( - t(apply(ratios, 1, cumprod)), - nrow = nrow(ratios), - ncol = ncol(ratios) - ) + ratios <- matrix(t(apply(ratios, 1, cumprod)), + nrow = nrow(ratios), + ncol = ncol(ratios)) - for (fold in seq_along(Task$folds)) { + for (fold in seq_along(task$folds)) { out[[fold]] <- future::future({ - estimate_tmle( - get_folded_data(Task$natural, Task$folds, fold), - get_folded_data(Task$shifted, Task$folds, fold), - outcome, Task$node_list$outcome, Task$cens, Task$risk, - Task$tau, Task$outcome_type, - get_folded_data(ratios, Task$folds, fold)$train, - Task$weights[Task$folds[[fold]]$training_set], - learners, lrnr_folds, pb, full_fits - ) + estimate_tmle(get_folded_data(task$natural, task$folds, fold), + get_folded_data(task$shifted, task$folds, fold), + outcome, + task$node_list$outcome, + task$cens, + task$risk, + task$tau, + task$outcome_type, + get_folded_data(ratios, task$folds, fold)$train, + task$weights[task$folds[[fold]]$training_set], + learners, + control, + progress_bar) }, seed = TRUE) } out <- future::value(out) - list( - natural = recombine_outcome(out, "natural", Task$folds), - shifted = cbind(recombine_outcome(out, "shifted", Task$folds), Task$natural[["tmp_lmtp_scaled_outcome"]]), - fits = lapply(out, function(x) x[["fits"]]) - ) + list(natural = recombine_outcome(out, "natural", task$folds), + shifted = cbind(recombine_outcome(out, "shifted", task$folds), + task$natural[["tmp_lmtp_scaled_outcome"]]), + fits = lapply(out, function(x) x[["fits"]])) } -estimate_tmle <- function(natural, shifted, outcome, node_list, cens, risk, tau, outcome_type, ratios, weights, learners, lrnr_folds, pb, full_fits) { +estimate_tmle <- function(natural, shifted, outcome, node_list, cens, risk, tau, outcome_type, ratios, weights, learners, control, progress_bar) { m_natural_train <- m_shifted_train <- matrix(nrow = nrow(natural$train), ncol = tau) m_natural_valid <- m_shifted_valid <- matrix(nrow = nrow(natural$valid), ncol = tau) @@ -58,9 +59,10 @@ estimate_tmle <- function(natural, shifted, outcome, node_list, cens, risk, tau, learners, outcome_type, "lmtp_id", - lrnr_folds) + control$.learners_outcome_metalearner, + control$.learners_outcome_folds) - if (full_fits) { + if (control$.return_full_fits) { fits[[t]] <- fit } else { fits[[t]] <- extract_sl_weights(fit) @@ -94,12 +96,10 @@ estimate_tmle <- function(natural, shifted, outcome, node_list, cens, risk, tau, m_natural_valid[!rv, t] <- 0 m_shifted_valid[!rv, t] <- 0 - pb() + progress_bar() } - list( - natural = m_natural_valid, - shifted = m_shifted_valid, - fits = fits - ) + list(natural = m_natural_valid, + shifted = m_shifted_valid, + fits = fits) } diff --git a/R/utils.R b/R/utils.R index fb43176..dec540c 100644 --- a/R/utils.R +++ b/R/utils.R @@ -152,7 +152,7 @@ final_outcome <- function(outcomes) { extract_sl_weights <- function(fit) { if (inherits(fit, "mlr3superlearner")) { - return(cbind(Risk = fit$weights$cvRisk, Coef = fit$weights$coef)) + return(cbind(Risk = fit$risk)) } fit$coef } diff --git a/man/lmtp_control.Rd b/man/lmtp_control.Rd new file mode 100644 index 0000000..983e950 --- /dev/null +++ b/man/lmtp_control.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lmtp_options.R +\name{lmtp_control} +\alias{lmtp_control} +\title{Set LMTP Estimation Parameters} +\usage{ +lmtp_control(...) +} +\arguments{ +\item{.trim}{[\code{numeric(1)}]\cr +Determines the amount the density ratios should be trimmed. +The default is 0.999, trimming the density ratios greater than the 0.999 percentile +to the 0.999 percentile. A value of 1 indicates no trimming.} + +\item{.learners_trt_metalearner}{[\code{character(1)}]\cr} + +\item{.learners_outcome_metalearner}{[\code{character(1)}]\cr} + +\item{.learners_outcome_folds}{[\code{integer(1)}]\cr +The number of cross-validation folds for \code{learners_outcome}.} + +\item{.learners_trt_folds}{[\code{integer(1)}]\cr +The number of cross-validation folds for \code{learners_trt}.} + +\item{.return_full_fits}{[\code{logical(1)}]\cr +Return full \code{mlr3superlearner} fits? Default is \code{FALSE}.} +} +\value{ +A list of parameters controlling the estimation procedure. +} +\description{ +Set LMTP Estimation Parameters +} +\examples{ +lmtp_control(.trim = 0.975) +} diff --git a/man/lmtp_ipw.Rd b/man/lmtp_ipw.Rd index 1ba54ba..de90757 100644 --- a/man/lmtp_ipw.Rd +++ b/man/lmtp_ipw.Rd @@ -20,10 +20,7 @@ lmtp_ipw( learners = "glm", folds = 10, weights = NULL, - .bound = 1e-05, - .trim = 0.999, - .learners_folds = 10, - .return_full_fits = FALSE, + control = lmtp_control(), ... ) } @@ -86,20 +83,8 @@ The number of folds to be used for cross-fitting.} \item{weights}{[\code{numeric(nrow(data))}]\cr An optional vector containing sampling weights.} -\item{.bound}{[\code{numeric(1)}]\cr -Determines that maximum and minimum values (scaled) predictions -will be bounded by. The default is 1e-5, bounding predictions by 1e-5 and 0.9999.} - -\item{.trim}{[\code{numeric(1)}]\cr -Determines the amount the density ratios should be trimmed. -The default is 0.999, trimming the density ratios greater than the 0.999 percentile -to the 0.999 percentile. A value of 1 indicates no trimming.} - -\item{.learners_folds}{[\code{integer(1)}]\cr -The number of cross-validation folds for \code{learners}.} - -\item{.return_full_fits}{[\code{logical(1)}]\cr -Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights.} +\item{control}{[\code{list()}]\cr +Output of \code{lmtp_control()}.} \item{...}{Extra arguments. Exists for backwards compatibility.} } diff --git a/man/lmtp_sdr.Rd b/man/lmtp_sdr.Rd index 3f64448..9023398 100644 --- a/man/lmtp_sdr.Rd +++ b/man/lmtp_sdr.Rd @@ -22,11 +22,7 @@ lmtp_sdr( learners_trt = "glm", folds = 10, weights = NULL, - .bound = 1e-05, - .trim = 0.999, - .learners_outcome_folds = 10, - .learners_trt_folds = 10, - .return_full_fits = FALSE, + control = lmtp_control(), ... ) } @@ -96,23 +92,8 @@ The number of folds to be used for cross-fitting.} \item{weights}{[\code{numeric(nrow(data))}]\cr An optional vector containing sampling weights.} -\item{.bound}{[\code{numeric(1)}]\cr -Determines that maximum and minimum values (scaled) predictions -will be bounded by. The default is 1e-5, bounding predictions by 1e-5 and 0.9999.} - -\item{.trim}{[\code{numeric(1)}]\cr -Determines the amount the density ratios should be trimmed. -The default is 0.999, trimming the density ratios greater than the 0.999 percentile -to the 0.999 percentile. A value of 1 indicates no trimming.} - -\item{.learners_outcome_folds}{[\code{integer(1)}]\cr -The number of cross-validation folds for \code{learners_outcome}.} - -\item{.learners_trt_folds}{[\code{integer(1)}]\cr -The number of cross-validation folds for \code{learners_trt}.} - -\item{.return_full_fits}{[\code{logical(1)}]\cr -Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights.} +\item{control}{[\code{list()}]\cr +Output of \code{lmtp_control()}.} \item{...}{Extra arguments. Exists for backwards compatibility.} } diff --git a/man/lmtp_sub.Rd b/man/lmtp_sub.Rd index 0e1f070..088fb9b 100644 --- a/man/lmtp_sub.Rd +++ b/man/lmtp_sub.Rd @@ -20,9 +20,7 @@ lmtp_sub( learners = "glm", folds = 10, weights = NULL, - .bound = 1e-05, - .learners_folds = 10, - .return_full_fits = FALSE + control = lmtp_control() ) } \arguments{ @@ -85,15 +83,8 @@ The number of folds to be used for cross-fitting.} \item{weights}{[\code{numeric(nrow(data))}]\cr An optional vector containing sampling weights.} -\item{.bound}{[\code{numeric(1)}]\cr -Determines that maximum and minimum values (scaled) predictions -will be bounded by. The default is 1e-5, bounding predictions by 1e-5 and 0.9999.} - -\item{.learners_folds}{[\code{integer(1)}]\cr -The number of cross-validation folds for \code{learners}.} - -\item{.return_full_fits}{[\code{logical(1)}]\cr -Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights.} +\item{control}{[\code{list()}]\cr +Output of \code{lmtp_control()}.} } \value{ A list of class \code{lmtp} containing the following components: diff --git a/man/lmtp_tmle.Rd b/man/lmtp_tmle.Rd index 6e75a0a..834da5a 100644 --- a/man/lmtp_tmle.Rd +++ b/man/lmtp_tmle.Rd @@ -22,11 +22,7 @@ lmtp_tmle( learners_trt = "glm", folds = 10, weights = NULL, - .bound = 1e-05, - .trim = 0.999, - .learners_outcome_folds = 10, - .learners_trt_folds = 10, - .return_full_fits = FALSE, + control = lmtp_control(), ... ) } @@ -96,23 +92,8 @@ The number of folds to be used for cross-fitting.} \item{weights}{[\code{numeric(nrow(data))}]\cr An optional vector containing sampling weights.} -\item{.bound}{[\code{numeric(1)}]\cr -Determines that maximum and minimum values (scaled) predictions -will be bounded by. The default is 1e-5, bounding predictions by 1e-5 and 0.9999.} - -\item{.trim}{[\code{numeric(1)}]\cr -Determines the amount the density ratios should be trimmed. -The default is 0.999, trimming the density ratios greater than the 0.999 percentile -to the 0.999 percentile. A value of 1 indicates no trimming.} - -\item{.learners_outcome_folds}{[\code{integer(1)}]\cr -The number of cross-validation folds for \code{learners_outcome}.} - -\item{.learners_trt_folds}{[\code{integer(1)}]\cr -The number of cross-validation folds for \code{learners_trt}.} - -\item{.return_full_fits}{[\code{logical(1)}]\cr -Return full SuperLearner fits? Default is \code{FALSE}, return only SuperLearner weights.} +\item{control}{[\code{list()}]\cr +Output of \code{lmtp_control()}.} \item{...}{Extra arguments. Exists for backwards compatibility.} } From 64af01a91e597e6aefec7710d3441ed4ba27e5b5 Mon Sep 17 00:00:00 2001 From: nt-williams Date: Fri, 14 Jul 2023 11:00:24 -0600 Subject: [PATCH 06/16] Bug fix in EIF calc. --- R/sdr.R | 18 +----------------- R/theta.R | 12 ++++++------ R/tmle.R | 3 ++- R/utils.R | 7 +++++++ 4 files changed, 16 insertions(+), 24 deletions(-) diff --git a/R/sdr.R b/R/sdr.R index e4c478b..a9ef82d 100644 --- a/R/sdr.R +++ b/R/sdr.R @@ -60,7 +60,7 @@ estimate_sdr <- function(natural, shifted, outcome, node_list, cens, risk, tau, } if (t < tau) { - densratio <- transform_sdr(ratio_sdr(ratios, t, tau), + densratio <- transform_sdr(compute_weights(ratios, t + 1, tau), t, tau, m_shifted_train, @@ -102,19 +102,3 @@ estimate_sdr <- function(natural, shifted, outcome, node_list, cens, risk, tau, shifted = m_shifted_valid, fits = fits) } - -ratio_sdr <- function(ratio, tau, max_tau) { - out <- t( - apply( - ratio[, (tau + 1):max_tau, drop = FALSE], - 1, - cumprod - ) - ) - - if (tau != max_tau - 1) { - return(out) - } - - t(out) -} diff --git a/R/theta.R b/R/theta.R index ebb85b5..71ede56 100644 --- a/R/theta.R +++ b/R/theta.R @@ -62,15 +62,15 @@ eif <- function(r, tau, shifted, natural) { natural[is.na(natural)] <- -999 shifted[is.na(shifted)] <- -999 m <- shifted[, 2:(tau + 1), drop = FALSE] - natural[, 1:tau, drop = FALSE] - rowSums(r * m, na.rm = TRUE) + shifted[, 1] + rowSums(compute_weights(r, 1, tau) * m, na.rm = TRUE) + shifted[, 1] + # rowSums(r * m, na.rm = TRUE) + shifted[, 1] } theta_dr <- function(eta, augmented = FALSE) { - inflnce <- eif( - r = eta$r, tau = eta$tau, - shifted = eta$m$shifted, - natural = eta$m$natural - ) + inflnce <- eif(r = eta$r, + tau = eta$tau, + shifted = eta$m$shifted, + natural = eta$m$natural) theta <- { if (augmented) diff --git a/R/tmle.R b/R/tmle.R index 4b17953..31883bb 100644 --- a/R/tmle.R +++ b/R/tmle.R @@ -77,7 +77,8 @@ estimate_tmle <- function(natural, shifted, outcome, node_list, cens, risk, tau, if (is.null(weights)) ratios[i & rt, t] else - ratios[i & rt, t] * weights[i & rt] + compute_weights(ratios[i & rt, ], 1, t) * weights[i & rt] + # ratios[i & rt, t] * weights[i & rt] } fit <- sw( diff --git a/R/utils.R b/R/utils.R index dec540c..12f7270 100644 --- a/R/utils.R +++ b/R/utils.R @@ -209,3 +209,10 @@ risk_indicators <- function(x) { x[1:(length(x) - 1)] } + +compute_weights <- function(r, t, tau) { + out <- apply(r[, t:tau, drop = FALSE], 1, cumprod) + if (is.null(ncol(out))) return(out) + if (ncol(out) > ncol(r)) return(t(out)) + out +} From ab5fe054e7bb982ca14de105d3b98485191ccfde Mon Sep 17 00:00:00 2001 From: nt-williams Date: Wed, 20 Sep 2023 13:40:52 -0700 Subject: [PATCH 07/16] Updating to new version of mlr3superlearner --- R/density_ratios.R | 1 - R/estimators.R | 18 ++++++++---------- R/gcomp.R | 8 ++++---- R/lmtp_Task.R | 5 +++-- R/lmtp_options.R | 16 ++++------------ R/sdr.R | 13 ++++++------- R/sl.R | 10 +--------- R/tmle.R | 12 ++++++------ man/lmtp_control.Rd | 4 ---- man/lmtp_ipw.Rd | 2 +- man/lmtp_sdr.Rd | 4 ++-- man/lmtp_sub.Rd | 2 +- man/lmtp_tmle.Rd | 4 ++-- tests/testthat/test-checks.R | 6 +++--- tests/testthat/test-point_treatment.R | 12 +++++++----- tests/testthat/test-survey.R | 16 ++++++++-------- tests/testthat/test-time_varying_treatment.R | 4 ++-- 17 files changed, 58 insertions(+), 79 deletions(-) diff --git a/R/density_ratios.R b/R/density_ratios.R index 7492833..ca1e84f 100644 --- a/R/density_ratios.R +++ b/R/density_ratios.R @@ -43,7 +43,6 @@ estimate_r <- function(natural, shifted, trt, cens, risk, tau, node_list, learne learners, "binomial", "lmtp_id", - control$.learners_trt_metalearner, control$.learners_trt_folds) if (control$.return_full_fits) { diff --git a/R/estimators.R b/R/estimators.R index 2da7087..43b326a 100644 --- a/R/estimators.R +++ b/R/estimators.R @@ -88,10 +88,9 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens = NULL, shift = NULL, shifted = NULL, k = Inf, mtp = FALSE, outcome_type = c("binomial", "continuous", "survival"), - # intervention_type = c("static", "dynamic", "mtp"), id = NULL, bounds = NULL, - learners_outcome = "glm", - learners_trt = "glm", + learners_outcome = c("mean", "glm"), + learners_trt = c("mean", "glm"), folds = 10, weights = NULL, control = lmtp_control(), ...) { @@ -157,7 +156,7 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, r = ratios$ratios, tau = Task$tau, folds = Task$folds, - id = Task$id, + id = Task$natural$lmtp_id, outcome_type = Task$outcome_type, bounds = Task$bounds, weights = Task$weights, @@ -260,11 +259,10 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, lmtp_sdr <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens = NULL, shift = NULL, shifted = NULL, k = Inf, mtp = FALSE, - # intervention_type = c("static", "dynamic", "mtp"), outcome_type = c("binomial", "continuous", "survival"), id = NULL, bounds = NULL, - learners_outcome = "glm", - learners_trt = "glm", + learners_outcome = c("mean", "glm"), + learners_trt = c("mean", "glm"), folds = 10, weights = NULL, control = lmtp_control(), ...) { @@ -330,7 +328,7 @@ lmtp_sdr <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, r = ratios$ratios, tau = Task$tau, folds = Task$folds, - id = Task$id, + id = Task$natural$lmtp_id, outcome_type = Task$outcome_type, bounds = Task$bounds, weights = Task$weights, @@ -415,7 +413,7 @@ lmtp_sdr <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, lmtp_sub <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens = NULL, shift = NULL, shifted = NULL, k = Inf, outcome_type = c("binomial", "continuous", "survival"), - id = NULL, bounds = NULL, learners = "glm", + id = NULL, bounds = NULL, learners = c("mean", "glm"), folds = 10, weights = NULL, control = lmtp_control()) { @@ -557,7 +555,7 @@ lmtp_ipw <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens shift = NULL, shifted = NULL, mtp = FALSE, k = Inf, id = NULL, outcome_type = c("binomial", "continuous", "survival"), - learners = "glm", + learners = c("mean", "glm"), folds = 10, weights = NULL, control = lmtp_control(), ...) { diff --git a/R/gcomp.R b/R/gcomp.R index 719803a..2b3149d 100644 --- a/R/gcomp.R +++ b/R/gcomp.R @@ -9,6 +9,7 @@ cf_sub <- function(Task, outcome, learners, control, progress_bar) { Task$node_list$outcome, Task$cens, Task$risk, + Task$id, Task$tau, Task$outcome_type, learners, @@ -26,7 +27,7 @@ cf_sub <- function(Task, outcome, learners, control, progress_bar) { ) } -estimate_sub <- function(natural, shifted, outcome, node_list, cens, risk, +estimate_sub <- function(natural, shifted, outcome, node_list, cens, risk, id, tau, outcome_type, learners, control, progress_bar) { m <- matrix(nrow = nrow(natural$valid), ncol = tau) @@ -49,12 +50,11 @@ estimate_sub <- function(natural, shifted, outcome, node_list, cens, risk, learners <- check_variation(natural$train[i & rt, ][[outcome]], learners) - fit <- run_ensemble(natural$train[i & rt, c("lmtp_id", vars, outcome)], + fit <- run_ensemble(natural$train[i & rt, c(id, vars, outcome)], outcome, learners, outcome_type, - "lmtp_id", - control$.learners_outcome_metalearner, + id, control$.learners_outcome_folds) if (control$.return_full_fits) { diff --git a/R/lmtp_Task.R b/R/lmtp_Task.R index edb07fe..1989776 100644 --- a/R/lmtp_Task.R +++ b/R/lmtp_Task.R @@ -16,7 +16,8 @@ lmtp_Task <- R6::R6Class( bounds = NULL, folds = NULL, weights = NULL, - initialize = function(data, trt, outcome, time_vary, baseline, cens, k, shift, shifted, id, outcome_type = NULL, V = 10, weights = NULL, bounds = NULL) { + initialize = function(data, trt, outcome, time_vary, baseline, cens, k, + shift, shifted, id, outcome_type = NULL, V = 10, weights = NULL, bounds = NULL) { self$tau <- determine_tau(outcome, trt) self$n <- nrow(data) self$trt <- trt @@ -27,7 +28,7 @@ lmtp_Task <- R6::R6Class( self$survival <- outcome_type == "survival" self$bounds <- y_bounds(data[[final_outcome(outcome)]], self$outcome_type, bounds) data$lmtp_id <- create_ids(data, id) - self$id <- data$lmtp_id + self$id <- id self$folds <- setup_cv(data, data$lmtp_id, V) shifted <- { diff --git a/R/lmtp_options.R b/R/lmtp_options.R index 23b83fb..d1240ba 100644 --- a/R/lmtp_options.R +++ b/R/lmtp_options.R @@ -4,8 +4,6 @@ #' Determines the amount the density ratios should be trimmed. #' The default is 0.999, trimming the density ratios greater than the 0.999 percentile #' to the 0.999 percentile. A value of 1 indicates no trimming. -#' @param .learners_trt_metalearner \[\code{character(1)}\]\cr -#' @param .learners_outcome_metalearner \[\code{character(1)}\]\cr #' @param .learners_outcome_folds \[\code{integer(1)}\]\cr #' The number of cross-validation folds for \code{learners_outcome}. #' @param .learners_trt_folds \[\code{integer(1)}\]\cr @@ -21,17 +19,11 @@ lmtp_control <- function(...) { change <- list(...) control <- list(.trim = 0.999, - .learners_trt_metalearner = "glm", - .learners_outcome_metalearner = "glm", - .learners_outcome_folds = 10, - .learners_trt_folds = 10, + .learners_outcome_folds = NULL, + .learners_trt_folds = NULL, .return_full_fits = FALSE) - if (length(change) == 0) return(control) - - for (arg in names(change)) { - control[[arg]] <- change[[arg]] - } - + change <- change[names(change) %in% names(control)] + control[names(change)] <- change control } diff --git a/R/sdr.R b/R/sdr.R index a9ef82d..21b434d 100644 --- a/R/sdr.R +++ b/R/sdr.R @@ -7,6 +7,7 @@ cf_sdr <- function(task, outcome, ratios, learners, control, progress_bar) { outcome, task$node_list$outcome, task$cens, task$risk, + task$id, task$tau, task$outcome_type, get_folded_data(ratios, task$folds, fold)$train, @@ -24,7 +25,7 @@ cf_sdr <- function(task, outcome, ratios, learners, control, progress_bar) { fits = lapply(out, function(x) x[["fits"]])) } -estimate_sdr <- function(natural, shifted, outcome, node_list, cens, risk, tau, +estimate_sdr <- function(natural, shifted, outcome, node_list, cens, risk, id, tau, outcome_type, ratios, learners, control, progress_bar) { m_natural_train <- m_shifted_train <- cbind(matrix(nrow = nrow(natural$train), @@ -50,12 +51,11 @@ estimate_sdr <- function(natural, shifted, outcome, node_list, cens, risk, tau, if (t == tau) { learners <- check_variation(natural$train[i & rt, ][[outcome]], learners) - fit <- run_ensemble(natural$train[i & rt, c("lmtp_id", vars, outcome)], + fit <- run_ensemble(natural$train[i & rt, c(id, vars, outcome)], outcome, learners, outcome_type, - "lmtp_id", - control$.learners_outcome_metalearner, + id, control$.learners_outcome_folds) } @@ -70,12 +70,11 @@ estimate_sdr <- function(natural, shifted, outcome, node_list, cens, risk, tau, learners <- check_variation(natural$train[i & rt, ][[pseudo]], learners) - fit <- run_ensemble(natural$train[i & rt, c("lmtp_id", vars, pseudo)], + fit <- run_ensemble(natural$train[i & rt, c(id, vars, pseudo)], pseudo, learners, "continuous", - "lmtp_id", - control$.learners_outcome_metalearner, + id, control$.learners_outcome_folds) } diff --git a/R/sl.R b/R/sl.R index fbdc711..8f50af6 100644 --- a/R/sl.R +++ b/R/sl.R @@ -5,21 +5,13 @@ check_variation <- function(outcome, learners) { learners } -run_ensemble <- function(data, y, learners, outcome_type, id, metalearner, folds) { +run_ensemble <- function(data, y, learners, outcome_type, id, folds) { fit <- mlr3superlearner(data = data, target = y, library = learners, - metalearner = metalearner, outcome_type = outcome_type, folds = folds, group = id) - - # if (all(fit$weights$coef == 0)) { - # warning("SuperLearner fit failed. Trying main-effects GLM.", call. = FALSE) - # tmp <- data[, setdiff(names(data), c(y, id))] - # tmp$lmtp_tmp_outcome_vector <- data[[y]] - # fit <- glm(lmtp_tmp_outcome_vector ~ ., data = tmp, family = ifelse(outcome_type == "continuous", "gaussian", "binomial")) - # } fit } diff --git a/R/tmle.R b/R/tmle.R index 31883bb..1441377 100644 --- a/R/tmle.R +++ b/R/tmle.R @@ -13,6 +13,7 @@ cf_tmle <- function(task, outcome, ratios, learners, control, progress_bar) { task$node_list$outcome, task$cens, task$risk, + task$id, task$tau, task$outcome_type, get_folded_data(ratios, task$folds, fold)$train, @@ -32,7 +33,8 @@ cf_tmle <- function(task, outcome, ratios, learners, control, progress_bar) { fits = lapply(out, function(x) x[["fits"]])) } -estimate_tmle <- function(natural, shifted, outcome, node_list, cens, risk, tau, outcome_type, ratios, weights, learners, control, progress_bar) { +estimate_tmle <- function(natural, shifted, outcome, node_list, cens, risk, id, + tau, outcome_type, ratios, weights, learners, control, progress_bar) { m_natural_train <- m_shifted_train <- matrix(nrow = nrow(natural$train), ncol = tau) m_natural_valid <- m_shifted_valid <- matrix(nrow = nrow(natural$valid), ncol = tau) @@ -54,12 +56,11 @@ estimate_tmle <- function(natural, shifted, outcome, node_list, cens, risk, tau, learners <- check_variation(natural$train[i & rt, ][[outcome]], learners) - fit <- run_ensemble(natural$train[i & rt, c("lmtp_id", vars, outcome)], + fit <- run_ensemble(natural$train[i & rt, c(id, vars, outcome)], outcome, learners, outcome_type, - "lmtp_id", - control$.learners_outcome_metalearner, + id, control$.learners_outcome_folds) if (control$.return_full_fits) { @@ -77,8 +78,7 @@ estimate_tmle <- function(natural, shifted, outcome, node_list, cens, risk, tau, if (is.null(weights)) ratios[i & rt, t] else - compute_weights(ratios[i & rt, ], 1, t) * weights[i & rt] - # ratios[i & rt, t] * weights[i & rt] + ratios[i & rt, t] * weights[i & rt] } fit <- sw( diff --git a/man/lmtp_control.Rd b/man/lmtp_control.Rd index 983e950..89e4451 100644 --- a/man/lmtp_control.Rd +++ b/man/lmtp_control.Rd @@ -12,10 +12,6 @@ Determines the amount the density ratios should be trimmed. The default is 0.999, trimming the density ratios greater than the 0.999 percentile to the 0.999 percentile. A value of 1 indicates no trimming.} -\item{.learners_trt_metalearner}{[\code{character(1)}]\cr} - -\item{.learners_outcome_metalearner}{[\code{character(1)}]\cr} - \item{.learners_outcome_folds}{[\code{integer(1)}]\cr The number of cross-validation folds for \code{learners_outcome}.} diff --git a/man/lmtp_ipw.Rd b/man/lmtp_ipw.Rd index de90757..bf44a4a 100644 --- a/man/lmtp_ipw.Rd +++ b/man/lmtp_ipw.Rd @@ -17,7 +17,7 @@ lmtp_ipw( k = Inf, id = NULL, outcome_type = c("binomial", "continuous", "survival"), - learners = "glm", + learners = c("mean", "glm"), folds = 10, weights = NULL, control = lmtp_control(), diff --git a/man/lmtp_sdr.Rd b/man/lmtp_sdr.Rd index 9023398..95aab95 100644 --- a/man/lmtp_sdr.Rd +++ b/man/lmtp_sdr.Rd @@ -18,8 +18,8 @@ lmtp_sdr( outcome_type = c("binomial", "continuous", "survival"), id = NULL, bounds = NULL, - learners_outcome = "glm", - learners_trt = "glm", + learners_outcome = c("mean", "glm"), + learners_trt = c("mean", "glm"), folds = 10, weights = NULL, control = lmtp_control(), diff --git a/man/lmtp_sub.Rd b/man/lmtp_sub.Rd index 088fb9b..8d9dc5a 100644 --- a/man/lmtp_sub.Rd +++ b/man/lmtp_sub.Rd @@ -17,7 +17,7 @@ lmtp_sub( outcome_type = c("binomial", "continuous", "survival"), id = NULL, bounds = NULL, - learners = "glm", + learners = c("mean", "glm"), folds = 10, weights = NULL, control = lmtp_control() diff --git a/man/lmtp_tmle.Rd b/man/lmtp_tmle.Rd index 834da5a..e73b436 100644 --- a/man/lmtp_tmle.Rd +++ b/man/lmtp_tmle.Rd @@ -18,8 +18,8 @@ lmtp_tmle( outcome_type = c("binomial", "continuous", "survival"), id = NULL, bounds = NULL, - learners_outcome = "glm", - learners_trt = "glm", + learners_outcome = c("mean", "glm"), + learners_trt = c("mean", "glm"), folds = 10, weights = NULL, control = lmtp_control(), diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R index 31cce90..010660e 100644 --- a/tests/testthat/test-checks.R +++ b/tests/testthat/test-checks.R @@ -154,13 +154,13 @@ test_that("Contrast assertions", { L <- list(c("L1"), c("L2")) cens <- c("C1", "C2") - fit <- lmtp_sub(sim_cens, A, "Y", time_vary = L, cens = cens, folds = 1) + fit <- suppressWarnings(lmtp_sub(sim_cens, A, "Y", time_vary = L, cens = cens, folds = 1)) expect_error( lmtp_contrast(fit, ref = 0.1), "Assertion on 'fits' failed: Contrasts not implemented for substitution/IPW estimators." ) - fit <- lmtp_sdr(sim_cens, A, "Y", time_vary = L, cens = cens, folds = 1) + fit <- suppressWarnings(lmtp_sdr(sim_cens, A, "Y", time_vary = L, cens = cens, folds = 1)) expect_error( lmtp_contrast(fit, ref = c(0.1, 0.2)), "Assertion on 'ref' failed: Must either be a single numeric value or another lmtp object." @@ -171,7 +171,7 @@ test_that("Contrast assertions", { "Assertion on 'fits' failed: Objects must be of type 'lmtp'." ) - fit <- lmtp_sdr(sim_cens, A, "Y", time_vary = L, cens = cens, folds = 1, outcome_type = "continuous") + fit <- suppressWarnings(lmtp_sdr(sim_cens, A, "Y", time_vary = L, cens = cens, folds = 1, outcome_type = "continuous")) expect_error( lmtp_contrast(fit, ref = 0.1, type = "rr"), "Assertion on 'type' failed: 'rr' specified but one or more outcome types are not 'binomial' or 'survival'." diff --git a/tests/testthat/test-point_treatment.R b/tests/testthat/test-point_treatment.R index 638e103..4aa03c3 100644 --- a/tests/testthat/test-point_treatment.R +++ b/tests/testthat/test-point_treatment.R @@ -6,17 +6,19 @@ n <- 1e4 W1 <- rbinom(n, size = 1, prob = 0.5) W2 <- rbinom(n, size = 1, prob = 0.65) A <- rbinom(n, size = 1, prob = plogis(-0.4 + 0.2 * W2 + 0.15 * W1)) -Y.1 <-rbinom(n, size = 1, prob = plogis(-1 + 1 - 0.1 * W1 + 0.3 * W2)) +Y.1 <- rbinom(n, size = 1, prob = plogis(-1 + 1 - 0.1 * W1 + 0.3 * W2)) Y.0 <- rbinom(n, size = 1, prob = plogis(-1 + 0 - 0.1 * W1 + 0.3 * W2)) Y <- Y.1 * A + Y.0 * (1 - A) tmp <- data.frame(W1, W2, A, Y, Y.1, Y.0) truth <- mean(tmp$Y.1) -sub <- lmtp_sub(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1) -ipw <- lmtp_ipw(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1) -tmle <- lmtp_tmle(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1) -sdr <- lmtp_sdr(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1) +suppressWarnings({ + sub <- lmtp_sub(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1) + ipw <- lmtp_ipw(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1) + tmle <- lmtp_tmle(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1) + sdr <- lmtp_sdr(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1) +}) # tests test_that("point treatment fidelity", { diff --git a/tests/testthat/test-survey.R b/tests/testthat/test-survey.R index 4cef245..36079dd 100644 --- a/tests/testthat/test-survey.R +++ b/tests/testthat/test-survey.R @@ -18,17 +18,17 @@ S <- rbinom(n, 1, prob_S) tmp <- tmp[S == 1, ] wts <- 1 / prob_S[S == 1] -sub <- lmtp_sub(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, - weights = wts, folds = 2) +sub <- sw(lmtp_sub(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, + weights = wts, folds = 2)) -ipw <- lmtp_ipw(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, - weights = wts, folds = 2) +ipw <- sw(lmtp_ipw(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, + weights = wts, folds = 2)) -tmle <- lmtp_tmle(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, - weights = wts, folds = 2) +tmle <- sw(lmtp_tmle(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, + weights = wts, folds = 2)) -sdr <- lmtp_sdr(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, - weights = wts, folds = 2) +sdr <- sw(lmtp_sdr(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, + weights = wts, folds = 2)) # tests diff --git a/tests/testthat/test-time_varying_treatment.R b/tests/testthat/test-time_varying_treatment.R index dc79793..4c9fbd0 100644 --- a/tests/testthat/test-time_varying_treatment.R +++ b/tests/testthat/test-time_varying_treatment.R @@ -5,7 +5,7 @@ a <- c("A_1", "A_2", "A_3", "A_4") time_varying <- list(c("L_1"), c("L_2"), c("L_3"), c("L_4")) for (i in a) { - tmp[[i]] <- factor(tmp[[i]], levels = 0:5, ordered = TRUE) + tmp[[i]] <- factor(tmp[[i]], levels = 0:5, ordered = FALSE) } d <- function(data, trt) { @@ -18,7 +18,7 @@ d <- function(data, trt) { out[[i]] <- as.numeric(as.character(a[i])) - 1 } } - factor(unlist(out), levels = 0:5, ordered = TRUE) + factor(unlist(out), levels = 0:5, ordered = FALSE) } truth <- 0.305 From 0218e887a6a99ef01cd734dcf1ad0b1ee451e1b4 Mon Sep 17 00:00:00 2001 From: nt-williams Date: Wed, 20 Sep 2023 13:46:16 -0700 Subject: [PATCH 08/16] Update README --- README.md | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 5b7ce49..22c9934 100644 --- a/README.md +++ b/README.md @@ -51,24 +51,7 @@ Studies](https://muse.jhu.edu/article/883479). ## Installation ``` r -<<<<<<< HEAD remotes::install_github("nt-williams/lmtp@mlr3superlearner") -======= -install.packages("lmtp") -``` - -The stable, development version can be installed from GitHub with: - -``` r -devtools::install_github("nt-williams/lmtp@devel") -``` - -A version allowing for different covariates sets for the treatment, -censoring, and outcome regressions: - -``` r -devtools::install_github("nt-williams/lmtp@separate-variable-sets") ->>>>>>> master ``` ## What even is a modified treatment policy? @@ -114,6 +97,9 @@ regimes for binary, continuous, and survival outcomes. ``` r library(lmtp) +#> Loading required package: mlr3superlearner +#> Loading required package: mlr3learners +#> Loading required package: mlr3 # the data: 4 treatment nodes with time varying covariates and a binary outcome head(sim_t4) From d95a1ab9af9370929e958cccc38391f50e69a2a7 Mon Sep 17 00:00:00 2001 From: nt-williams Date: Tue, 26 Sep 2023 09:47:03 -0700 Subject: [PATCH 09/16] Complete merge --- cran-comments.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100755 cran-comments.md diff --git a/cran-comments.md b/cran-comments.md new file mode 100755 index 0000000..5d2efc6 --- /dev/null +++ b/cran-comments.md @@ -0,0 +1,13 @@ +## Version 1.3.2 submission + +## Test environments + +- local R installation (Mac OS), R release +- local R installation (Mac OS), R old release +- Windows-latest (on GitHub actions), R release +- Windows (Win-builder), R devel +- Windows (Win-builder), R old release + +## R CMD check results + +0 errors \| 0 warnings \| 0 notes From 124f42faed0090e88e396580554a9fcf1d0e3b4e Mon Sep 17 00:00:00 2001 From: nt-williams Date: Mon, 6 Nov 2023 08:38:46 -0800 Subject: [PATCH 10/16] stratified folds for CV --- R/lmtp_Task.R | 2 +- R/utils.R | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/R/lmtp_Task.R b/R/lmtp_Task.R index 1989776..606d786 100644 --- a/R/lmtp_Task.R +++ b/R/lmtp_Task.R @@ -29,7 +29,7 @@ lmtp_Task <- R6::R6Class( self$bounds <- y_bounds(data[[final_outcome(outcome)]], self$outcome_type, bounds) data$lmtp_id <- create_ids(data, id) self$id <- id - self$folds <- setup_cv(data, data$lmtp_id, V) + self$folds <- setup_cv(data, V, data$lmtp_id, final_outcome(outcome), self$outcome_type) shifted <- { if (is.null(shifted) && !is.null(shift)) diff --git a/R/utils.R b/R/utils.R index 2c40155..a4653d9 100644 --- a/R/utils.R +++ b/R/utils.R @@ -6,8 +6,15 @@ determine_tau <- function(outcome, trt) { length(outcome) } -setup_cv <- function(data, id, V = 10) { - out <- origami::make_folds(data, cluster_ids = id, V = V) +setup_cv <- function(data, V = 10, id, strata, outcome_type) { + if (length(unique(id)) == nrow(data) & outcome_type == "binomial") { + strata <- data[[strata]] + strata[is.na(strata)] <- 2 + out <- origami::make_folds(data, V = V, strata_ids = strata) + } else { + out <- origami::make_folds(data, cluster_ids = id, V = V) + } + if (V > 1) { return(out) } From 39e8916bd850942883b5f5de7ea3ec4a4b5c6161 Mon Sep 17 00:00:00 2001 From: nt-williams Date: Thu, 21 Mar 2024 12:50:37 -0700 Subject: [PATCH 11/16] Fix for issue #130 --- R/sdr.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/sdr.R b/R/sdr.R index e44c2d2..9eb3043 100644 --- a/R/sdr.R +++ b/R/sdr.R @@ -78,12 +78,15 @@ estimate_sdr <- function(natural, shifted, outcome, node_list, cens, risk, id, t control$.learners_outcome_folds) } +<<<<<<< HEAD if (control$.return_full_fits) { fits[[t]] <- fit } else { fits[[t]] <- extract_sl_weights(fit) } +======= +>>>>>>> 3d968bb (Fix for issue #130) trt_var <- names(shifted$train)[t] under_shift_train <- natural$train[jt & rt, vars] under_shift_train[[trt_var]] <- shifted$train[jt & rt, trt_var] From 7c145f6455487faa1396c4d3512308e0eca99eb0 Mon Sep 17 00:00:00 2001 From: Herb Susmann Date: Fri, 5 Apr 2024 13:50:49 -0400 Subject: [PATCH 12/16] Fixed merge conflict --- R/sdr.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/R/sdr.R b/R/sdr.R index 9eb3043..e44c2d2 100644 --- a/R/sdr.R +++ b/R/sdr.R @@ -78,15 +78,12 @@ estimate_sdr <- function(natural, shifted, outcome, node_list, cens, risk, id, t control$.learners_outcome_folds) } -<<<<<<< HEAD if (control$.return_full_fits) { fits[[t]] <- fit } else { fits[[t]] <- extract_sl_weights(fit) } -======= ->>>>>>> 3d968bb (Fix for issue #130) trt_var <- names(shifted$train)[t] under_shift_train <- natural$train[jt & rt, vars] under_shift_train[[trt_var]] <- shifted$train[jt & rt, trt_var] From a1b434f9dce4c7c5bb2a217cce62b85f30483f37 Mon Sep 17 00:00:00 2001 From: Herb Susmann Date: Mon, 22 Apr 2024 16:37:01 -0400 Subject: [PATCH 13/16] Added option for Riesz Representer estimation using empirical loss minimization --- R/estimators.R | 35 +++++++++++++++++----- R/riesz_representer.R | 69 +++++++++++++++++++++++++++++++++++++++++++ R/sl_riesz.R | 30 +++++++++++++++++++ R/theta.R | 9 +++++- R/tmle.R | 10 ++++--- 5 files changed, 140 insertions(+), 13 deletions(-) create mode 100644 R/riesz_representer.R create mode 100644 R/sl_riesz.R diff --git a/R/estimators.R b/R/estimators.R index 43b326a..fd1ff6f 100644 --- a/R/estimators.R +++ b/R/estimators.R @@ -48,6 +48,8 @@ #' Should be left as \code{NULL} if the outcome type is binary. #' @param learners_outcome \[\code{character}\]\cr #' @param learners_trt \[\code{character}\]\cr \bold{Only include candidate learners capable of binary classification}. +#' @param trt_method \[\code{character}\]\cr +#' Method for estimating treatment assignment mechanism (default or riesz) #' @param folds \[\code{integer(1)}\]\cr #' The number of folds to be used for cross-fitting. #' @param weights \[\code{numeric(nrow(data))}\]\cr @@ -91,6 +93,7 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, id = NULL, bounds = NULL, learners_outcome = c("mean", "glm"), learners_trt = c("mean", "glm"), + trt_method = "default", folds = 10, weights = NULL, control = lmtp_control(), ...) { @@ -145,8 +148,14 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, progress_bar <- progressr::progressor(Task$tau*folds*2) - ratios <- cf_r(Task, learners_trt, mtp, control, progress_bar) - estims <- cf_tmle(Task, "tmp_lmtp_scaled_outcome", ratios$ratios, + if(trt_method == "default") { + ratios <- cf_r(Task, learners_trt, mtp, control, progress_bar) + } + else { + ratios <- cf_rr(Task, learners_trt, mtp, control, progress_bar) + } + + estims <- cf_tmle(Task, "tmp_lmtp_scaled_outcome", trt_method == "riesz", ratios$ratios, learners_outcome, control, progress_bar) theta_dr( @@ -154,6 +163,7 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, estimator = "TMLE", m = list(natural = estims$natural, shifted = estims$shifted), r = ratios$ratios, + cumulated = trt_method == "riesz", tau = Task$tau, folds = Task$folds, id = Task$natural$lmtp_id, @@ -520,6 +530,8 @@ lmtp_sub <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens #' @param id \[\code{character(1)}\]\cr #' An optional column name containing cluster level identifiers. #' @param learners \[\code{character}\]\cr \bold{Only include candidate learners capable of binary classification}. +#' @param trt_method \[\code{character}\]\cr +#' Method for estimating treatment assignment mechanism (default or riesz) #' @param folds \[\code{integer(1)}\]\cr #' The number of folds to be used for cross-fitting. #' @param weights \[\code{numeric(nrow(data))}\]\cr @@ -556,6 +568,7 @@ lmtp_ipw <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens k = Inf, id = NULL, outcome_type = c("binomial", "continuous", "survival"), learners = c("mean", "glm"), + trt_method = "default", folds = 10, weights = NULL, control = lmtp_control(), ...) { @@ -608,15 +621,21 @@ lmtp_ipw <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens call. = FALSE) } - ratios <- cf_r(Task, learners, mtp, control, progress_bar) - - theta_ipw( - eta = list( - r = matrix( + if(trt_method == "default") { + ratios <- cf_r(Task, learners, mtp, control, progress_bar) + ratios$ratios <- matrix( t(apply(ratios$ratios, 1, cumprod)), nrow = nrow(ratios$ratios), ncol = ncol(ratios$ratios) - ), + ) + } + else { + ratios <- cf_rr(Task, learners, mtp, control, progress_bar) + } + + theta_ipw( + eta = list( + r = ratios$ratios, y = if (Task$survival) { convert_to_surv(data[[final_outcome(outcome)]]) } else { diff --git a/R/riesz_representer.R b/R/riesz_representer.R new file mode 100644 index 0000000..d2fad95 --- /dev/null +++ b/R/riesz_representer.R @@ -0,0 +1,69 @@ +cf_rr <- function(task, learners, mtp, control, progress_bar) { + out <- list() + for (fold in seq_along(task$folds)) { + out[[fold]] <- future::future({ + estimate_rr(get_folded_data(task$natural, task$folds, fold), + get_folded_data(task$shifted, task$folds, fold), + task$trt, + task$cens, + task$risk, + task$tau, + task$node_list$trt, + learners, + mtp, + control, + progress_bar) + }, + seed = TRUE) + } + + recombine_ratios(future::value(out), task$folds) +} + + +estimate_rr <- function(natural, shifted, trt, cens, risk, tau, node_list, learners, mtp, control, progress_bar) { + representers <- matrix(nrow = nrow(natural$valid), ncol = tau) + fits <- list() + + for (t in 1:tau) { + jrt <- censored(natural$train, cens, t)$j + drt <- at_risk(natural$train, risk, t) + irv <- censored(natural$valid, cens, t)$i + jrv <- censored(natural$valid, cens, t)$j + drv <- at_risk(natural$valid, risk, t) + + trt_t <- ifelse(length(trt) > 1, trt[t], trt) + + frv <- followed_rule(natural$valid[[trt_t]], shifted$valid[[trt_t]], mtp) + + vars <- c(node_list[[t]], cens[[t]]) + + conditional_indicator_train <- matrix(1, ncol = 1, nrow = nrow(natural$train)) + conditional_indicator_valid <- matrix(1, ncol = 1, nrow = nrow(natural$valid)) + fit <- run_riesz_ensemble( + learners, + natural$train[jrt & drt, vars, drop = FALSE], + shifted$train[jrt & drt, vars, drop = FALSE], + conditional_indicator_train[jrt & drt,,drop = FALSE], + natural$valid[jrv & drv, vars, drop = FALSE], + shifted$valid[jrv & drv, vars, drop = FALSE], + conditional_indicator_valid[jrv & drv, ,drop = FALSE], + folds = control$.learners_trt_folds + ) + + if (control$.return_full_fits) { + fits[[t]] <- fit + } else { + fits[[t]] <- extract_sl_weights(fit) + } + + pred <- matrix(-999L, nrow = nrow(natural$valid), ncol = 1) + pred[jrv & drv, ] <- fit$predictions + + representers[, t] <- pred + + progress_bar() + } + + list(ratios = representers, fits = fits) +} diff --git a/R/sl_riesz.R b/R/sl_riesz.R new file mode 100644 index 0000000..1060d43 --- /dev/null +++ b/R/sl_riesz.R @@ -0,0 +1,30 @@ +riesz_superlearner_weights <- function(learners, task_valid) { + risks <- lapply(learners, function(x) { + x$loss(task_valid) + }) + + weights <- numeric(length(learners)) + weights[which.min(risks)] <- 1 + list(weights = weights, risk = risks) +} + +#' @importFrom SuperRiesz super_riesz +run_riesz_ensemble <- function(learners, natural_train, shifted_train, conditional_train, + natural_valid, shifted_valid, conditional_valid, folds) { + + sl <- SuperRiesz::super_riesz( + natural_train, + list(shifted = shifted_train, weight = data.frame(weight = conditional_train / mean(conditional_train))), + library = learners, + folds = folds, + m = \(alpha, data) alpha(data("shifted")) * data("weight")[,1] + ) + predictions = predict(sl, shifted_valid) * mean(conditional_valid[, 1]) + + list( + predictions = predictions, + fits = sl, + coef = sl$weights, + risk = sl$risk + ) +} diff --git a/R/theta.R b/R/theta.R index 986625a..ca420fa 100644 --- a/R/theta.R +++ b/R/theta.R @@ -58,15 +58,22 @@ theta_ipw <- function(eta) { out } -eif <- function(r, tau, shifted, natural) { +eif <- function(r, cumulated, tau, shifted, natural) { natural[is.na(natural)] <- -999 shifted[is.na(shifted)] <- -999 m <- shifted[, 2:(tau + 1), drop = FALSE] - natural[, 1:tau, drop = FALSE] + if(cumulated == TRUE) { + weights <- r + } + else { + weights <- compute_weights(r, 1, tau) + } rowSums(compute_weights(r, 1, tau) * m, na.rm = TRUE) + shifted[, 1] } theta_dr <- function(eta, augmented = FALSE) { inflnce <- eif(r = eta$r, + cumulated = eta$cumulated, tau = eta$tau, shifted = eta$m$shifted, natural = eta$m$natural) diff --git a/R/tmle.R b/R/tmle.R index 2f23298..af4e690 100644 --- a/R/tmle.R +++ b/R/tmle.R @@ -1,9 +1,11 @@ -cf_tmle <- function(task, outcome, ratios, learners, control, progress_bar) { +cf_tmle <- function(task, outcome, cumulated, ratios, learners, control, progress_bar) { out <- list() - ratios <- matrix(t(apply(ratios, 1, cumprod)), - nrow = nrow(ratios), - ncol = ncol(ratios)) + if(cumulated == FALSE) { + ratios <- matrix(t(apply(ratios, 1, cumprod)), + nrow = nrow(ratios), + ncol = ncol(ratios)) + } for (fold in seq_along(task$folds)) { out[[fold]] <- future::future({ From 3864afbf284a9dc3bf6899f1f5b19cc834cf81e8 Mon Sep 17 00:00:00 2001 From: Herb Susmann Date: Tue, 23 Apr 2024 11:02:35 -0400 Subject: [PATCH 14/16] Added tests of Riesz treatment estimation method for TMLE --- R/estimators.R | 1 + R/sl_riesz.R | 1 + tests/testthat/test-censoring.R | 2 ++ tests/testthat/test-point_treatment.R | 2 ++ tests/testthat/test-shifted.R | 7 +++++++ tests/testthat/test-survey.R | 5 +++++ tests/testthat/test-time_varying_treatment.R | 2 ++ 7 files changed, 20 insertions(+) diff --git a/R/estimators.R b/R/estimators.R index fd1ff6f..4c1ad40 100644 --- a/R/estimators.R +++ b/R/estimators.R @@ -336,6 +336,7 @@ lmtp_sdr <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, estimator = "SDR", m = list(natural = estims$natural, shifted = estims$shifted), r = ratios$ratios, + cumulated = FALSE, tau = Task$tau, folds = Task$folds, id = Task$natural$lmtp_id, diff --git a/R/sl_riesz.R b/R/sl_riesz.R index 1060d43..16e07a3 100644 --- a/R/sl_riesz.R +++ b/R/sl_riesz.R @@ -12,6 +12,7 @@ riesz_superlearner_weights <- function(learners, task_valid) { run_riesz_ensemble <- function(learners, natural_train, shifted_train, conditional_train, natural_valid, shifted_valid, conditional_valid, folds) { + if(is.null(folds)) folds <- 5 sl <- SuperRiesz::super_riesz( natural_train, list(shifted = shifted_train, weight = data.frame(weight = conditional_train / mean(conditional_train))), diff --git a/tests/testthat/test-censoring.R b/tests/testthat/test-censoring.R index c7b2ae1..9ad4184 100644 --- a/tests/testthat/test-censoring.R +++ b/tests/testthat/test-censoring.R @@ -8,6 +8,7 @@ truth <- 0.8 sub <- sw(lmtp_sub(sim_cens, A, "Y", time_vary = L, cens = C, k = 0, shift = NULL, folds = 1)) ipw <- sw(lmtp_ipw(sim_cens, A, "Y", time_vary = L, cens = C, k = 0, shift = NULL, folds = 1)) tmle <- sw(lmtp_tmle(sim_cens, A, "Y", time_vary = L, cens = C, k = 0, shift = NULL, folds = 1)) +tmle_riesz <- sw(lmtp_tmle(sim_cens, A, "Y", time_vary = L, cens = C, k = 0, shift = NULL, folds = 1, trt_method = "riesz", learners_trt = c("glm"))) sdr <- sw(lmtp_sdr(sim_cens, A, "Y", time_vary = L, cens = C, k = 0, shift = NULL, folds = 1)) # tests @@ -15,5 +16,6 @@ test_that("estimator fidelity with censoring", { expect_equal(truth, sub$theta, tolerance = 0.15) expect_equal(truth, ipw$theta, tolerance = 0.15) expect_equal(truth, tmle$theta, tolerance = 0.15) + expect_equal(truth, tmle_riesz$theta, tolerance = 0.15) expect_equal(truth, sdr$theta, tolerance = 0.15) }) diff --git a/tests/testthat/test-point_treatment.R b/tests/testthat/test-point_treatment.R index 4aa03c3..5a1d27a 100644 --- a/tests/testthat/test-point_treatment.R +++ b/tests/testthat/test-point_treatment.R @@ -17,6 +17,7 @@ suppressWarnings({ sub <- lmtp_sub(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1) ipw <- lmtp_ipw(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1) tmle <- lmtp_tmle(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1) + tmle_riesz <- lmtp_tmle(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1, trt_method = "riesz", learners_trt = c("glm")) sdr <- lmtp_sdr(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, folds = 1) }) @@ -25,5 +26,6 @@ test_that("point treatment fidelity", { expect_equal(truth, sub$theta, tolerance = 0.025) expect_equal(truth, ipw$theta, tolerance = 0.025) expect_equal(truth, tmle$theta, tolerance = 0.025) + expect_equal(truth, tmle_riesz$theta, tolerance = 0.025) expect_equal(truth, sdr$theta, tolerance = 0.025) }) diff --git a/tests/testthat/test-shifted.R b/tests/testthat/test-shifted.R index 1f80130..8005276 100644 --- a/tests/testthat/test-shifted.R +++ b/tests/testthat/test-shifted.R @@ -22,6 +22,12 @@ tmle <- cens, k = 0, shifted = sc, outcome_type = "binomial", folds = 2, intervention_type = "mtp")) +tmle_riesz <- + sw(lmtp_tmle(sim_cens, a, "Y", nodes, baseline = NULL, + cens, k = 0, shifted = sc, trt_method = "riesz", + learners_trt = c("constant"), + outcome_type = "binomial", folds = 2, intervention_type = "mtp")) + sdr <- sw(lmtp_sdr(sim_cens, a, "Y", nodes, baseline = NULL, cens, k = 0, shifted = sc, @@ -32,5 +38,6 @@ test_that("estimator fidelity with shifted data supplied", { expect_equal(truth, sub$theta, tolerance = 0.05) expect_equal(truth, ipw$theta, tolerance = 0.025) expect_equal(truth, tmle$theta, tolerance = 0.025) + expect_equal(truth, tmle_riesz$theta, tolerance = 0.025) expect_equal(truth, sdr$theta, tolerance = 0.025) }) diff --git a/tests/testthat/test-survey.R b/tests/testthat/test-survey.R index 36079dd..0e2063f 100644 --- a/tests/testthat/test-survey.R +++ b/tests/testthat/test-survey.R @@ -27,6 +27,10 @@ ipw <- sw(lmtp_ipw(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binar tmle <- sw(lmtp_tmle(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, weights = wts, folds = 2)) +tmle_riesz <- sw(lmtp_tmle(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, + trt_method = "riesz", learners_trt = c("glm"), + weights = wts, folds = 2)) + sdr <- sw(lmtp_sdr(tmp, "A", "Y", baseline = c("W1", "W2"), shift = static_binary_on, weights = wts, folds = 2)) @@ -36,5 +40,6 @@ test_that("survey weight fidelity", { expect_equal(truth, sub$theta, tolerance = 0.025) expect_equal(truth, ipw$theta, tolerance = 0.025) expect_equal(truth, tmle$theta, tolerance = 0.025) + expect_equal(truth, tmle_riesz$theta, tolerance = 0.025) expect_equal(truth, sdr$theta, tolerance = 0.025) }) diff --git a/tests/testthat/test-time_varying_treatment.R b/tests/testthat/test-time_varying_treatment.R index 4c9fbd0..89d23fb 100644 --- a/tests/testthat/test-time_varying_treatment.R +++ b/tests/testthat/test-time_varying_treatment.R @@ -26,11 +26,13 @@ truth <- 0.305 sub <- sw(lmtp_sub(tmp, a, "Y", time_vary = time_varying, shift = d, folds = 1)) ipw <- sw(lmtp_ipw(tmp, a, "Y", time_vary = time_varying, shift = d, intervention_type = "mtp", folds = 1)) tmle <- sw(lmtp_tmle(tmp, a, "Y", time_vary = time_varying, shift = d, intervention_type = "mtp", folds = 1)) +tmle_riesz <- sw(lmtp_tmle(tmp, a, "Y", time_vary = time_varying, shift = d, intervention_type = "mtp", folds = 1, trt_method = "riesz", learners_trt = c("constant"))) sdr <- sw(lmtp_sdr(tmp, a, "Y", time_vary = time_varying, shift = d, intervention_type = "mtp", folds = 1)) test_that("time varying treatment fidelity, t = 4", { expect_equal(truth, sub$theta, tolerance = 0.025) expect_equal(truth, ipw$theta, tolerance = 0.05) expect_equal(truth, tmle$theta, tolerance = 0.025) + expect_equal(truth, tmle_riesz$theta, tolerance = 0.025) expect_equal(truth, sdr$theta, tolerance = 0.025) }) From efd34703c8ef783c2ca44bc22d2b2016690f2968 Mon Sep 17 00:00:00 2001 From: Herb Susmann Date: Tue, 23 Apr 2024 11:04:27 -0400 Subject: [PATCH 15/16] Updated DESCRIPTION to add import of SuperRiesz package --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index fbea1e5..3c8e0f0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,7 +36,8 @@ Imports: future (>= 1.17.0), progressr, data.table (>= 1.13.0), - checkmate (>= 2.1.0) + checkmate (>= 2.1.0), + SuperRiesz URL: https://github.com/nt-williams/lmtp BugReports: https://github.com/nt-williams/lmtp/issues Suggests: From e0c7c0ed63f19d4a44af9da1b75b72c1c1ed6022 Mon Sep 17 00:00:00 2001 From: Herb Susmann Date: Wed, 1 May 2024 13:25:26 -0400 Subject: [PATCH 16/16] Bug fix in EIF calculation --- R/theta.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/theta.R b/R/theta.R index 9b0b521..e0a56f0 100644 --- a/R/theta.R +++ b/R/theta.R @@ -68,7 +68,7 @@ eif <- function(r, cumulated, tau, shifted, natural) { else { weights <- compute_weights(r, 1, tau) } - rowSums(compute_weights(r, 1, tau) * m, na.rm = TRUE) + shifted[, 1] + rowSums(weights * m, na.rm = TRUE) + shifted[, 1] } theta_dr <- function(eta, augmented = FALSE) {