nt-williams · herbps10 · Jun 2, 2023 · Jun 2, 2023 · Jun 2, 2023 · Jul 5, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -36,7 +36,8 @@ Imports:
     future (>= 1.17.0),
     progressr,
     data.table (>= 1.13.0),
-    checkmate (>= 2.1.0)
+    checkmate (>= 2.1.0),
+    SuperRiesz
 URL: https://github.com/nt-williams/lmtp
 BugReports: https://github.com/nt-williams/lmtp/issues
 Suggests: 

diff --git a/R/density_ratios.R b/R/density_ratios.R
@@ -12,7 +12,7 @@ cf_r <- function(Task, learners, mtp, control, pb) {
     seed = TRUE)
   }
 
-  trim_ratios(recombine_ratios(future::value(out), Task$folds), control$.trim)
+  trim_ratios(recombine_ratios(future::value(out), task$folds), control$.trim)
 }
 
 estimate_r <- function(natural, shifted, trt, cens, risk, tau, node_list, learners, pb, mtp, control) {
@@ -56,7 +56,7 @@ estimate_r <- function(natural, shifted, trt, cens, risk, tau, node_list, learne
     ratios <- density_ratios(pred, irv, drv, frv, mtp)
     densratios[, t] <- ratios
 
-    pb()
+    progress_bar()
   }
 
   list(ratios = densratios, fits = fits)

diff --git a/R/estimators.R b/R/estimators.R
@@ -54,6 +54,8 @@
 #' @param learners_trt \[\code{character}\]\cr A vector of \code{mlr3superlearner} algorithms for estimation
 #'  of the outcome regression. Default is \code{c("mean", "glm")}.
 #'  \bold{Only include candidate learners capable of binary classification}.
+#' @param trt_method \[\code{character}\]\cr
+#'  Method for estimating treatment assignment mechanism (default or riesz)
 #' @param folds \[\code{integer(1)}\]\cr
 #'  The number of folds to be used for cross-fitting.
 #' @param weights \[\code{numeric(nrow(data))}\]\cr
@@ -96,6 +98,7 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL,
                       id = NULL, bounds = NULL,
                       learners_outcome = c("mean", "glm"),
                       learners_trt = c("mean", "glm"),
+                      trt_method = "default",
                       folds = 10, weights = NULL,
                       control = lmtp_control()) {
 
@@ -146,7 +149,13 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL,
 
   pb <- progressr::progressor(Task$tau*folds*2)
 
-  ratios <- cf_r(Task, learners_trt, mtp, control, pb)
+  if (trt_method == "default") {
+    ratios <- cf_r(Task, learners_trt, mtp, control, pb)
+  }
+  else {
+    ratios <- cf_rr(Task, learners_trt, mtp, control, pb)
+  }
+
   estims <- cf_tmle(Task, "tmp_lmtp_scaled_outcome",
                     ratios$ratios, learners_outcome, control, pb)
 
@@ -155,9 +164,10 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL,
       estimator = "TMLE",
       m = list(natural = estims$natural, shifted = estims$shifted),
       r = ratios$ratios,
+      cumulated = trt_method == "riesz",
       tau = Task$tau,
       folds = Task$folds,
-      id = Task$id,
+      id = Task$natural$lmtp_id,
       outcome_type = Task$outcome_type,
       bounds = Task$bounds,
       weights = Task$weights,
@@ -265,7 +275,6 @@ lmtp_tmle <- function(data, trt, outcome, baseline = NULL, time_vary = NULL,
 lmtp_sdr <- function(data, trt, outcome, baseline = NULL, time_vary = NULL,
                      cens = NULL, shift = NULL, shifted = NULL, k = Inf,
                      mtp = FALSE,
-                     # intervention_type = c("static", "dynamic", "mtp"),
                      outcome_type = c("binomial", "continuous", "survival"),
                      id = NULL, bounds = NULL,
                      learners_outcome = c("mean", "glm"),
@@ -329,9 +338,10 @@ lmtp_sdr <- function(data, trt, outcome, baseline = NULL, time_vary = NULL,
       estimator = "SDR",
       m = list(natural = estims$natural, shifted = estims$shifted),
       r = ratios$ratios,
+      cumulated = FALSE,
       tau = Task$tau,
       folds = Task$folds,
-      id = Task$id,
+      id = Task$natural$lmtp_id,
       outcome_type = Task$outcome_type,
       bounds = Task$bounds,
       weights = Task$weights,
@@ -536,6 +546,8 @@ lmtp_sub <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens
 #' @param learners \[\code{character}\]\cr A vector of \code{mlr3superlearner} algorithms for estimation
 #'  of the outcome regression. Default is \code{c("mean", "glm")}.
 #'  \bold{Only include candidate learners capable of binary classification}.
+#' @param trt_method \[\code{character}\]\cr
+#'  Method for estimating treatment assignment mechanism (default or riesz)
 #' @param folds \[\code{integer(1)}\]\cr
 #'  The number of folds to be used for cross-fitting.
 #' @param weights \[\code{numeric(nrow(data))}\]\cr
@@ -568,10 +580,10 @@ lmtp_sub <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens
 #' @example inst/examples/ipw-ex.R
 lmtp_ipw <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens = NULL,
                      shift = NULL, shifted = NULL, mtp = FALSE,
-                     # intervention_type = c("static", "dynamic", "mtp"),
                      k = Inf, id = NULL,
                      outcome_type = c("binomial", "continuous", "survival"),
                      learners = c("mean", "glm"),
+                     trt_method = "default",
                      folds = 10, weights = NULL,
                      control = lmtp_control()) {
 
@@ -619,16 +631,21 @@ lmtp_ipw <- function(data, trt, outcome, baseline = NULL, time_vary = NULL, cens
   )
 
   pb <- progressr::progressor(Task$tau*folds)
-
-  ratios <- cf_r(Task, learners, mtp, control, pb)
-
-  theta_ipw(
-    eta = list(
-      r = matrix(
+
+  if (trt_method == "default") {
+    ratios <- cf_r(Task, learners, mtp, control, pb)
+    ratios$ratios <- matrix(
         t(apply(ratios$ratios, 1, cumprod)),
         nrow = nrow(ratios$ratios),
         ncol = ncol(ratios$ratios)
-      ),
+      )
+  } else {
+    ratios <- cf_rr(Task, learners, mtp, control, pb)
+  }
+
+  theta_ipw(
+    eta = list(
+      r = ratios$ratios,
       y = if (Task$survival) {
         convert_to_surv(data[[final_outcome(outcome)]])
       } else {

diff --git a/R/gcomp.R b/R/gcomp.R
@@ -17,7 +17,7 @@ cf_sub <- function(Task, outcome, learners, control, pb) {
   out <- future::value(out)
 
   list(
-    m = recombine_outcome(out, "m", Task$folds),
+    m = recombine_outcome(out, "m", task$folds),
     fits = lapply(out, function(x) x[["fits"]])
   )
 }
@@ -74,7 +74,7 @@ estimate_sub <- function(natural, shifted, trt, outcome, node_list, cens, risk,
     natural$train[!rt, pseudo] <- 0
     m[!rv, t] <- 0
 
-    pb()
+    progress_bar()
   }
 
   list(m = m, fits = fits)

diff --git a/R/lmtp_options.R b/R/lmtp_options.R
@@ -0,0 +1,29 @@
+#' Set LMTP Estimation Parameters
+#'
+#' @param .trim \[\code{numeric(1)}\]\cr
+#'  Determines the amount the density ratios should be trimmed.
+#'  The default is 0.999, trimming the density ratios greater than the 0.999 percentile
+#'  to the 0.999 percentile. A value of 1 indicates no trimming.
+#' @param .learners_outcome_folds \[\code{integer(1)}\]\cr
+#'  The number of cross-validation folds for \code{learners_outcome}.
+#' @param .learners_trt_folds \[\code{integer(1)}\]\cr
+#'  The number of cross-validation folds for \code{learners_trt}.
+#' @param .return_full_fits \[\code{logical(1)}\]\cr
+#'  Return full \code{mlr3superlearner} fits? Default is \code{FALSE}.
+#'
+#' @return A list of parameters controlling the estimation procedure.
+#' @export
+#'
+#' @examples
+#' lmtp_control(.trim = 0.975)
+lmtp_control <- function(...) {
+  change <- list(...)
+  control <- list(.trim = 0.999,
+                  .learners_outcome_folds = NULL,
+                  .learners_trt_folds = NULL,
+                  .return_full_fits = FALSE)
+  if (length(change) == 0) return(control)
+  change <- change[names(change) %in% names(control)]
+  control[names(change)] <- change
+  control
+}
diff --git a/R/riesz_representer.R b/R/riesz_representer.R
@@ -0,0 +1,69 @@
+cf_rr <- function(task, learners, mtp, control, progress_bar) {
+  out <- list()
+  for (fold in seq_along(task$folds)) {
+    out[[fold]] <- future::future({
+      estimate_rr(get_folded_data(task$natural, task$folds, fold),
+                 get_folded_data(task$shifted, task$folds, fold),
+                 task$trt,
+                 task$cens,
+                 task$risk,
+                 task$tau,
+                 task$node_list$trt,
+                 learners,
+                 mtp,
+                 control,
+                 progress_bar)
+    },
+    seed = TRUE)
+  }
+
+  recombine_ratios(future::value(out), task$folds)
+}
+
+
+estimate_rr <- function(natural, shifted, trt, cens, risk, tau, node_list, learners, mtp, control, progress_bar) {
+  representers <- matrix(nrow = nrow(natural$valid), ncol = tau)
+  fits <- list()
+
+  for (t in 1:tau) {
+    jrt <- censored(natural$train, cens, t)$j
+    drt <- at_risk(natural$train, risk, t)
+    irv <- censored(natural$valid, cens, t)$i
+    jrv <- censored(natural$valid, cens, t)$j
+    drv <- at_risk(natural$valid, risk, t)
+
+    trt_t <- ifelse(length(trt) > 1, trt[t], trt)
+
+    frv <- followed_rule(natural$valid[[trt_t]], shifted$valid[[trt_t]], mtp)
+
+    vars <- c(node_list[[t]], cens[[t]])
+
+    conditional_indicator_train <- matrix(1, ncol = 1, nrow = nrow(natural$train))
+    conditional_indicator_valid <- matrix(1, ncol = 1, nrow = nrow(natural$valid))
+    fit <- run_riesz_ensemble(
+      learners,
+      natural$train[jrt & drt, vars, drop = FALSE],
+      shifted$train[jrt & drt, vars, drop = FALSE],
+      conditional_indicator_train[jrt & drt,,drop = FALSE],
+      natural$valid[jrv & drv, vars, drop = FALSE],
+      shifted$valid[jrv & drv, vars, drop = FALSE],
+      conditional_indicator_valid[jrv & drv, ,drop = FALSE],
+      folds = control$.learners_trt_folds
+    )
+
+    if (control$.return_full_fits) {
+      fits[[t]] <- fit
+    } else {
+      fits[[t]] <- extract_sl_weights(fit)
+    }
+
+    pred <- matrix(-999L, nrow = nrow(natural$valid), ncol = 1)
+    pred[jrv & drv, ] <- fit$predictions
+
+    representers[, t] <- pred
+
+    progress_bar()
+  }
+
+  list(ratios = representers, fits = fits)
+}
diff --git a/R/sdr.R b/R/sdr.R
@@ -16,18 +16,21 @@ cf_sdr <- function(Task, outcome, ratios, learners, control, pb) {
 
   out <- future::value(out)
 
-  list(natural = recombine_outcome(out, "natural", Task$folds),
-       shifted = recombine_outcome(out, "shifted", Task$folds),
+  list(natural = recombine_outcome(out, "natural", task$folds),
+       shifted = recombine_outcome(out, "shifted", task$folds),
        fits = lapply(out, function(x) x[["fits"]]))
 }
 
 estimate_sdr <- function(natural, shifted, trt, outcome, node_list, cens, risk, tau,
                          outcome_type, ratios, learners, control, pb) {
 
-  m_natural_train <- m_shifted_train <-
-    cbind(matrix(nrow = nrow(natural$train), ncol = tau), natural$train[[outcome]])
-  m_natural_valid <- m_shifted_valid <-
-    cbind(matrix(nrow = nrow(natural$valid), ncol = tau), natural$valid[[outcome]])
+  m_natural_train <- m_shifted_train <- cbind(matrix(nrow = nrow(natural$train),
+                                                     ncol = tau),
+                                              natural$train[[outcome]])
+
+  m_natural_valid <- m_shifted_valid <- cbind(matrix(nrow = nrow(natural$valid),
+                                                     ncol = tau),
+                                              natural$valid[[outcome]])
 
   fits <- vector("list", length = tau)
 
@@ -57,12 +60,13 @@ estimate_sdr <- function(natural, shifted, trt, outcome, node_list, cens, risk,
     }
 
     if (t < tau) {
-      tmp <- transform_sdr(compute_weights(ratios, t + 1, tau),
-                           t, tau,
-                           m_shifted_train,
-                           m_natural_train)
+      densratio <- transform_sdr(compute_weights(ratios, t + 1, tau),
+                                 t,
+                                 tau,
+                                 m_shifted_train,
+                                 m_natural_train)
 
-      natural$train[, pseudo] <- shifted$train[, pseudo] <- tmp
+      natural$train[, pseudo] <- shifted$train[, pseudo] <- densratio
 
       fit <- run_ensemble(natural$train[i & rt, c("lmtp_id", vars, pseudo)],
                           pseudo,
@@ -100,7 +104,7 @@ estimate_sdr <- function(natural, shifted, trt, outcome, node_list, cens, risk,
     m_natural_valid[!rv, t] <- 0
     m_shifted_valid[!rv, t] <- 0
 
-    pb()
+    progress_bar()
   }
 
   list(natural = m_natural_valid,

diff --git a/R/sl_riesz.R b/R/sl_riesz.R
@@ -0,0 +1,31 @@
+riesz_superlearner_weights <- function(learners, task_valid) {
+  risks <- lapply(learners, function(x) {
+    x$loss(task_valid)
+  })
+
+  weights <- numeric(length(learners))
+  weights[which.min(risks)] <- 1
+  list(weights = weights, risk = risks)
+}
+
+#' @importFrom SuperRiesz super_riesz
+run_riesz_ensemble <- function(learners, natural_train, shifted_train, conditional_train,
+                               natural_valid, shifted_valid, conditional_valid, folds) {
+
+  if(is.null(folds)) folds <- 5
+  sl <- SuperRiesz::super_riesz(
+    natural_train,
+    list(shifted = shifted_train, weight = data.frame(weight = conditional_train / mean(conditional_train))),
+    library = learners,
+    folds = folds,
+    m = \(alpha, data) alpha(data("shifted")) * data("weight")[,1]
+  )
+  predictions = predict(sl, shifted_valid) * mean(conditional_valid[, 1])
+
+  list(
+    predictions = predictions,
+    fits = sl,
+    coef = sl$weights,
+    risk = sl$risk
+  )
+}
diff --git a/R/theta.R b/R/theta.R
@@ -58,15 +58,22 @@ theta_ipw <- function(eta) {
   out
 }
 
-eif <- function(r, tau, shifted, natural) {
+eif <- function(r, cumulated, tau, shifted, natural) {
   natural[is.na(natural)] <- -999
   shifted[is.na(shifted)] <- -999
   m <- shifted[, 2:(tau + 1), drop = FALSE] - natural[, 1:tau, drop = FALSE]
-  rowSums(compute_weights(r, 1, tau) * m, na.rm = TRUE) + shifted[, 1]
+  if(cumulated == TRUE) {
+    weights <- r
+  }
+  else {
+    weights <- compute_weights(r, 1, tau)
+  }
+  rowSums(weights * m, na.rm = TRUE) + shifted[, 1]
 }
 
 theta_dr <- function(eta, augmented = FALSE) {
   inflnce <- eif(r = eta$r,
+                 cumulated = eta$cumulated,
                  tau = eta$tau,
                  shifted = eta$m$shifted,
                  natural = eta$m$natural)