Mapping cum.g to original dataset #23

gah-bo · 2023-09-15T02:36:04Z

We are using LTMLE for an analysis with 4 periods (quarters). We're trying to peep at the inner workings - from my understanding, LTMLE provides propensity scores, allowing us to calculate the joint probabilities for sequences of treatments, such as P(1,1,1,1) and P(0,0,0,0). Our intent is to compare always treated vs never treated with and without balancing with cumulative P(treatment).

We believe cum.g to be the object we are after.

However I don't know how to 'map' this object back to the original dataset. I assume the Nth row corresponds to the Nth row in the original dataset. I also assume that, given that

abar = list(treatment = c(1, 0, 1, 0), control = c(0, 0, 0, 0) )

then

treatment_cum.g <- LTMLE[["cum.g"]][,,1]

and

control_cum.g <- LTMLE[["cum.g"]][,,2]

I am looking for confirmation on those. Lastly, the columns -- both of the above slices of cum.g have 8 columns, 1 for each A and C node, as per the documentation. But they're labeled V1-V8, and I don't know to which they each correspond.

LTMLE calls:

LTMLE_mod_1010x <- ltmle(Surv_No, Anodes = Anodes, Cnodes = Cnodes, Lnodes = Lnodes, Ynodes = Ynodes, survivalOutcome = FALSE, abar = list(treatment = c(1, 0, 1, 0), control = c(0, 0, 0, 0) ), SL.library = list(Q = NULL, g = SL.libx), estimate.time = FALSE)

and so on for all combinations of 1's and 0's

gah-bo · 2023-09-21T02:40:48Z

Is this the proper way to replicate the cum.b matrices?

library(ltmle)
library(SuperLearner)
library(randomForest)

set.seed(1234) 
n = 1000
periods=3

# Generate variables for L, A, and C nodes
# code for generating data is from LTMLE manual, last pages
rexpit <- function(x) rbinom(n = length(x), size = 1, prob = plogis(x))
W <- rnorm(n)

A1 <- rexpit(W + .5)
A2 <- rexpit(W + 2 * A1)
A3 <- rexpit(2 * A1 - A2)

C1 <- rbinom(n, 1, 0.1)
C2 <- pmax(C1, rbinom(n, 1, 0.3))
C3 <- pmax(C2, rbinom(n, 1, 0.9))

Y <- rexpit(W - A1 + 0.5 * A2 + 2 * A3)

data <- data.frame(W, 
                   A1, A2, A3, 
                   Y)

# write.csv(data, "Y:\\personalStaffFolders\\Gabriel\\data.csv")

Anodes <- paste0("A", 1:periods)
Cnodes <- paste0("C", 1:periods)
Lnodes <- c("W")   
Ynodes <- c("Y") 

# Control plan: set A1-A3 to 0
control <- matrix(0, nrow=n, ncol=periods)
# DYNAMIC Treatment plan (from LTMLE manual):           set A1 to 1 if W > 0, set A2 to 1 if W > 1.5, always set A3 to 1 
treatment_dynamic <- cbind(W > 0, W > 1.5, 1)
# FIXED   Treatment plan ( P(A1|W) replicable with logit): set A1-A3 to 1
treatment_fixed <- matrix(1, nrow=n, ncol=periods)

ltmleMODEL <- ltmle(data = data, 
                Anodes = Anodes,
                Lnodes = Lnodes,
                Ynodes = Ynodes,
                survivalOutcome = FALSE, 
                abar = list(treatment_fixed,control) # if swap to treatment_dynamic, phat1 will NOT equal cum_g_treatment V1 column
                )

# Take cum.g matrices as dataframes
cum_g_treatment <- as.data.frame(ltmleMODEL$cum.g[,,1])
cum_g_control <- as.data.frame(ltmleMODEL$cum.g[,,2])


# Estimate P(A1 = 1 | W)
# data$A11 <- ifelse(data$W > 0, 1, 0)
cum_g_treatment$phat1 <- predict(glm(A1 ~ W, family = binomial(link="logit"), data = data), type = "response")

# Estimate P(A1 = 1 & A2 = 1 |W)
data$A12 <- ifelse(data$A1 == 1 & data$A2 == 1, 1, 0)
# data$A12 <- ifelse(data$A11 == 1 & data$W > 1.5, 1, 0)
cum_g_treatment$phat2 <- predict(glm(A12 ~ W, family = binomial(link="logit"), data = data), type = "response")

# Estimate P(A1 = 1 & A2 = 1 & A3 = 1 |W)
data$A13 <- ifelse(data$A12 == 1 & data$A3 == 1, 1, 0)
cum_g_treatment$phat3 <- predict(glm(A13 ~ W, family = binomial(link="logit"), data = data), type = "response")

# Should be 0 or close
cum_g_treatment$diff1 <- round((cum_g_treatment$V1 - cum_g_treatment$phat1), digits=3)
cum_g_treatment$diff2 <- round((cum_g_treatment$V2 - cum_g_treatment$phat2), digits=3)
cum_g_treatment$diff3 <- round((cum_g_treatment$V3 - cum_g_treatment$phat3), digits=3)


cum_g_treatment$W <- abs(data$W)


cum_g_treatment <- cum_g_treatment[, c("W",
                                       "V1", "phat1", "diff1",
                                       "V2", "phat2", "diff2",
                                       "V3", "phat3", "diff3")]


##### STOP
# 1. Open cum_g_treatment
# 2. note that V1=phat1, v2 is approx phat 2, and v3 is approx phat3...
# 3. BUT sort diff3, desc -- note it can be up to -23 percentage points
# ... also, sorting by W, desc is super similar than sorting by diff3, at least for the first dozens of rows
# ... 


# these equalities do not hold if we use treatment_dynamic 
# 4. Moreover, note that when using treatment_fixed, this sums to 1's...
# (cum_g_treatment$V1 + cum_g_control$V1)
# ... but this does NOT
# round((cum_g_treatment$V2 + cum_g_control$V2), digits=1)


colMeans(cum_g_treatment)

# This extends on cum.g test, NO censoring.rmd; **which must be run first to populate global env!!!**
data_CENSORING <- data.frame(W, 
                             C1, C2, C3,
                             A1, A2, A3, 
                             Y)

ltmleMODEL_CENSORING <- ltmle(data = data_CENSORING, 
                              Anodes = Anodes,
                              Cnodes = Cnodes,
                              Lnodes = Lnodes,
                              Ynodes = Ynodes,
                              survivalOutcome = FALSE,
                              abar = list(treatment_fixed,control) # if swap to treatment_dynamic, phat1 will NOT equal cum_g_treatment_CENSORING V1 column
                )


# Take cum.g matrices as dataframes
cum_g_treatment_CENSORING <- as.data.frame(ltmleMODEL_CENSORING$cum.g[,,1])
cum_g_control_CENSORING <- as.data.frame(ltmleMODEL_CENSORING$cum.g[,,2])

# Estimating C's with same logic as A's in rmd with NO censoring...
# Estimate P(C1 = 1 | W)
cum_g_treatment_CENSORING$phat1 <- predict(glm(C1 ~ W, family = binomial(link="logit"), data = data_CENSORING), type = "response")

# Estimate P(C1 = 1 & C2 = 1 |W)
data_CENSORING$C12 <- ifelse(data_CENSORING$C1 == 1 & data_CENSORING$C2 == 1, 1, 0)
cum_g_treatment_CENSORING$phat2 <- predict(glm(C12 ~ W, family = binomial(link="logit"), data = data_CENSORING), type = "response")

# Estimate P(C1 = 1 & C2 = 1 & C3 = 1 |W)
data_CENSORING$C13 <- ifelse(data_CENSORING$C12 == 1 & data_CENSORING$C3 == 1, 1, 0)
cum_g_treatment_CENSORING$phat3 <- predict(glm(C13 ~ W, family = binomial(link="logit"), data = data_CENSORING), type = "response")

# Estimate P(A1 = 1 | W)
data_CENSORING$A11 <- ifelse(data_CENSORING$A1 == 1 & data_CENSORING$C1 == 1, 1, 0)
# data_CENSORING$A11 <- ifelse(data_CENSORING$W > 0, 1, 0)
cum_g_treatment_CENSORING$phat4 <- predict(glm(A11 ~ W, family = binomial(link="logit"), data = data_CENSORING), type = "response")

# Estimate P(A1 = 1 & A2 = 1 |W)
data_CENSORING$A12 <- ifelse(data_CENSORING$A11 == 1 & data_CENSORING$A2 == 1 & data_CENSORING$C2 == 1, 1, 0)
# data_CENSORING$A12 <- ifelse(data_CENSORING$A11 == 1 & data_CENSORING$W > 1.5, 1, 0)
cum_g_treatment_CENSORING$phat5 <- predict(glm(A12 ~ W, family = binomial(link="logit"), data = data_CENSORING), type = "response")

# Estimate P(A1 = 1 & A2 = 1 & A3 = 1 |W)
data_CENSORING$A13 <- ifelse(data_CENSORING$A12 == 1 & data_CENSORING$A3 == 1 & data_CENSORING$C3 == 1, 1, 0)
cum_g_treatment_CENSORING$phat6 <- predict(glm(A13 ~ W, family = binomial(link="logit"), data = data_CENSORING), type = "response")


cum_g_treatment_CENSORING$diff1 <- round((cum_g_treatment_CENSORING$V1 - cum_g_treatment_CENSORING$phat1), digits=3)
cum_g_treatment_CENSORING$diff2 <- round((cum_g_treatment_CENSORING$V2 - cum_g_treatment_CENSORING$phat2), digits=3)
cum_g_treatment_CENSORING$diff3 <- round((cum_g_treatment_CENSORING$V3 - cum_g_treatment_CENSORING$phat3), digits=3)
cum_g_treatment_CENSORING$diff4 <- round((cum_g_treatment_CENSORING$V4 - cum_g_treatment_CENSORING$phat4), digits=3)
cum_g_treatment_CENSORING$diff5 <- round((cum_g_treatment_CENSORING$V5 - cum_g_treatment_CENSORING$phat5), digits=3)
cum_g_treatment_CENSORING$diff6 <- round((cum_g_treatment_CENSORING$V6 - cum_g_treatment_CENSORING$phat6), digits=3)


cum_g_treatment_CENSORING$W <- abs(data_CENSORING$W)


cum_g_treatment_CENSORING <- cum_g_treatment_CENSORING[, c("W",
                                                           "V1", "phat1", "diff1",
                                                           "V2", "phat2", "diff2",
                                                           "V3", "phat3", "diff3",
                                                           "V4", "phat4", "diff4",
                                                           "V5", "phat5", "diff5",
                                                           "V6", "phat6", "diff6")]



##### STOP
# 1. Open cum_g_treatment_CENSORING
# 2. note that for t=(1,2,3) (ie: Censoring nodes) the following hold:...
# ...V(t)=phat(t)...
# ...V1=V2=V3=phat1=phat2=phat3...
# ...row values in cum_g_treatment_CENSORING = cum_g_control_CENSORING
# 3. As for t=(4,5,6) V(t) approx phat(t), but we have to include Censoring in defining A11, A12, and A13

colMeans(cum_g_treatment_CENSORING)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Mapping cum.g to original dataset #23

Mapping cum.g to original dataset #23

gah-bo commented Sep 15, 2023 •

edited

Loading

gah-bo commented Sep 21, 2023 •

edited

Loading

Mapping cum.g to original dataset #23

Mapping cum.g to original dataset #23

Comments

gah-bo commented Sep 15, 2023 • edited Loading

gah-bo commented Sep 21, 2023 • edited Loading

gah-bo commented Sep 15, 2023 •

edited

Loading

gah-bo commented Sep 21, 2023 •

edited

Loading