diff --git a/.travis.yml b/.travis.yml index 24960d72..8944d9de 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,7 +13,7 @@ matrix: - DeclareDesign/DeclareDesign - os: linux - r: 3.4 + r: 3.5 after_success: - echo skipping source packaging on linux/oldrel r_github_packages: diff --git a/DESCRIPTION b/DESCRIPTION index c17f897c..41d7494d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,9 @@ Depends: estimatr (>= 0.14.0) Imports: generics, - rlang + rlang, + broom (>= 0.5.0.9) Suggests: - testthat + testthat, + mediation RoxygenNote: 6.1.1 diff --git a/NAMESPACE b/NAMESPACE index a26a82eb..a6feafd2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -38,6 +38,7 @@ importFrom(DeclareDesign,draw_estimates) importFrom(DeclareDesign,redesign) importFrom(DeclareDesign,set_diagnosands) importFrom(DeclareDesign,tidy_estimator) +importFrom(broom,tidy) importFrom(estimatr,difference_in_means) importFrom(estimatr,iv_robust) importFrom(estimatr,lm_lin) @@ -62,6 +63,7 @@ importFrom(rlang,quo_text) importFrom(rlang,quos) importFrom(rlang,sym) importFrom(stats,formula) +importFrom(stats,lm) importFrom(stats,poly) importFrom(stats,qnorm) importFrom(stats,rbinom) diff --git a/R/mediation_analysis_designer.R b/R/mediation_analysis_designer.R index b05a7535..36dac55b 100644 --- a/R/mediation_analysis_designer.R +++ b/R/mediation_analysis_designer.R @@ -1,27 +1,29 @@ #' Create a design for mediation analysis -#' -#' A mediation analysis design that examines the effect of treatment (Z) on mediator (M) and the effect of mediator (M) on outcome (Y) (given Z=0) -#' as well as direct effect of treatment (Z) on outcome (Y) (given M=0). Analysis is implemented using an interacted regression model. -#' Note this model is not guaranteed to be unbiased despite randomization of Z because of possible violations of sequential ignorability. #' -#' @details +#' A mediation analysis design that examines the effect of (Z) on mediator (M), the natural and controlled direct effect of treatment (Z) on outcome (Y) as well as the natural and controlled indirect effect of treatment (Z) on outcome (Y) through mediator (M). +#' Analysis is implemented using a set of two linear structural models: a first stage model and a interacted model. Note estimates are not guaranteed to be unbiased despite randomization of Z because of possible violations of sequential ignorability. +#' #' +#' @details #' See \href{https://declaredesign.org/library/articles/mediation_analysis.html}{vignette online}. #' #' @param N An integer. Size of sample. -#' @param a A number. Parameter governing effect of treatment (Z) on mediator (M). -#' @param b A number. Effect of mediator (M) on outcome (Y) when Z = 0. -#' @param c A number. Interaction between mediator (M) and (Z) for outcome (Y). -#' @param d A number. Direct effect of treatment (Z) on outcome (Y), when M = 0. +#' @param Z_on_M A number. Parameter governing effect of treatment (Z) on mediator (M). +#' @param M_on_Y_Z0 A number. Effect of mediator (M) on outcome (Y) when Z = 0. +#' @param M_on_Y_Z1 A number. Interaction between mediator (M) and (Z) for outcome (Y). +#' @param Z_on_Y_M0 A number. Effect of treatment (Z) on outcome (Y), when M = 0. #' @param rho A number in [-1,1]. Correlation between mediator (M) and outcome (Y) error terms. Non zero correlation implies a violation of sequential ignorability. +#' @param mediation_package A logical value. If 'TRUE' direct and indirect effects are estimated using \code{mediate} function from \code{mediation} package. Default is 'FALSE'. #' @return A mediation analysis design. #' @author \href{https://declaredesign.org/}{DeclareDesign Team} #' @concept experiment #' @concept mediation #' @importFrom DeclareDesign declare_assignment declare_estimands declare_estimator declare_population declare_potential_outcomes declare_reveal declare_step diagnose_design draw_estimands #' @importFrom fabricatr fabricate fabricate -#' @importFrom randomizr conduct_ra -#' @importFrom estimatr lm_robust +#' @importFrom randomizr conduct_ra +#' @importFrom stats lm +#' @importFrom estimatr tidy lm_robust +#' @importFrom broom tidy #' @export #' @examples #' # Generate a mediation analysis design using default arguments: @@ -32,45 +34,133 @@ #' } #' #' # A design with a violation of sequential ignorability and heterogeneous effects: -#' mediation_2 <- mediation_analysis_designer(a = 1, rho = .5, c = 1, d = .75) +#' mediation_2 <- mediation_analysis_designer(Z_on_M =1, rho = .5, M_on_Y_Z1 = 1, Z_on_Y_M0 =.75) #' draw_estimands(mediation_2) #' \dontrun{ #' diagnose_design(mediation_2, sims = 1000) #' } #' -mediation_analysis_designer <- function(N = 200, a = 1, b = .4, c = 0, d = .5, rho = 0) +#' +mediation_analysis_designer <- function(N = 200, + Z_on_M = 1, + M_on_Y_Z0 = .4, + M_on_Y_Z1 = 0, + Z_on_Y_M0 = .5, + rho = 0, + mediation_package = FALSE) + { if(abs(rho) > 1) stop("rho must be in [-1, 1]") + + mediation_analysis_expr <- mediate_estimator_expr <- NULL + + # I: Inquiry + + estimands_expr <- rlang::expr( + estimands <- declare_estimands( + FirstStage = mean(M_Z_1 - M_Z_0), + natural_indirect_0 = mean(Y_nat1_Z_0 - Y_nat0_Z_0), + natural_direct_0 = mean(Y_nat0_Z_1 - Y_nat0_Z_0), + natural_direct_1 = mean(Y_nat1_Z_1 - Y_nat1_Z_0), + controlled_indirect_0 = mean(Y_M_1_Z_0 - Y_M_0_Z_0), + controlled_direct_0 = mean(Y_M_0_Z_1 - Y_M_0_Z_0), + controlled_direct_1 = mean(Y_M_1_Z_1 - Y_M_1_Z_0) + ) + ) + + # Design + + mediation_design_expr <- rlang::expr( + mediation_analysis_design <- population + + POs_M + POs_Y + POs_Y_nat_0 + POs_Y_nat_1 + + estimands + assignment + + reveal_M + reveal_Y + reveal_nat0 + reveal_nat1 + manipulation + + mediator_regression + stage2_1 + stage2_2 + stage2_3 + ) + + if(mediation_package){ + stopifnot(requireNamespace("mediation")) + + # I: Inquiry + estimands_expr <- rlang::expr( + estimands <- declare_estimands( + FirstStage = mean(M_Z_1 - M_Z_0), + natural_indirect_0 = mean(Y_nat1_Z_0 - Y_nat0_Z_0), + natural_indirect_1 = mean(Y_nat1_Z_1 - Y_nat0_Z_1), + natural_direct_0 = mean(Y_nat0_Z_1 - Y_nat0_Z_0), + natural_direct_1 = mean(Y_nat1_Z_1 - Y_nat1_Z_0), + controlled_indirect_0 = mean(Y_M_1_Z_0 - Y_M_0_Z_0) , + controlled_indirect_1 = mean(Y_M_1_Z_1 - Y_M_0_Z_1), + controlled_direct_0 = mean(Y_M_0_Z_1 - Y_M_0_Z_0), + controlled_direct_1 = mean(Y_M_1_Z_1 - Y_M_1_Z_0) + ) + ) + + # A: Answer Strategy + + + + + mediate_estimator_expr <- rlang::expr( + mediate_estimator <- declare_estimator(handler = function(data){ + + # QBA: Quasi-Bayesian Approximation + e1 <- lm(M ~ Z, data = data) + e2 <- lm(Y ~ M + Z + M:Z, data = data) + m <- mediation::mediate(e1, e2, sims = 100, treat = "Z", mediator = "M") + + estimates <- tidy(m, conf.int = TRUE) + estimates <- rbind(estimates, estimates) + estimates$estimator_label <- rep(c("qba - indirect_0", "qba - indirect_1", "qba - direct_0", "qba - direct_1") , 2) + estimates$estimand_label <- c("natural_indirect_0", "natural_indirect_1", "natural_direct_0", "natural_direct_1", + c("controlled_indirect_0", "controlled_indirect_1", "controlled_direct_0", "controlled_direct_1") ) + estimates$outcome <- rep("Y", 4) + estimates$term <- rep(c("indirect_0", "indirect_1", "direct_0", "direct_1"), 2) + as.data.frame(estimates) + }, + label = "mediate") + ) + + # Design + mediation_design_expr <- rlang::expr( + population + + POs_M + POs_Y + POs_Y_nat_0 + POs_Y_nat_1 + + estimands + assignment + + reveal_M + reveal_Y + reveal_nat0 + reveal_nat1 + manipulation + + mediator_regression + stage2_1 + stage2_2 + stage2_3 + mediate_estimator + ) + } + + + {{{ + # M: Model + population <- declare_population( N = N, e1 = rnorm(N), e2 = rnorm(n = N, mean = rho * e1, sd = sqrt(1 - rho^2)) ) - POs_M <- declare_potential_outcomes(M ~ 1*(a * Z + e1 > 0)) - POs_Y <- declare_potential_outcomes(Y ~ d * Z + b * M + c * M * Z + e2, + + POs_M <- declare_potential_outcomes(M ~ 1 * (Z_on_M * Z + e1 > 0)) + POs_Y <- declare_potential_outcomes(Y ~ Z_on_Y_M0 * Z + M_on_Y_Z0 * M + M_on_Y_Z1 * M * Z + e2, conditions = list(M = 0:1, Z = 0:1)) + POs_Y_nat_0 <- declare_potential_outcomes( - Y_nat0_Z_0 = b * M_Z_0 + e2, - Y_nat0_Z_1 = d + b * M_Z_0 + c * M_Z_0 + e2) + Y_nat0_Z_0 = M_on_Y_Z0 * M_Z_0 + e2, + Y_nat0_Z_1 = Z_on_Y_M0 + M_on_Y_Z0 * M_Z_0 + M_on_Y_Z1 * M_Z_0 + e2) POs_Y_nat_1 <- declare_potential_outcomes( - Y_nat1_Z_0 = b * M_Z_1 + e2, - Y_nat1_Z_1 = d + b * M_Z_1 + c * M_Z_1 + e2) + Y_nat1_Z_0 = M_on_Y_Z0 * M_Z_1 + e2, + Y_nat1_Z_1 = Z_on_Y_M0 + M_on_Y_Z0 * M_Z_1 + M_on_Y_Z1 * M_Z_1 + e2) # I: Inquiry - estimands <- declare_estimands( - FirstStage = mean(M_Z_1 - M_Z_0), - Indirect_0 = mean(Y_M_1_Z_0 - Y_M_0_Z_0), - Indirect_1 = mean(Y_M_1_Z_1 - Y_M_0_Z_1), - Controlled_Direct_0 = mean(Y_M_0_Z_1 - Y_M_0_Z_0), - Controlled_Direct_1 = mean(Y_M_1_Z_1 - Y_M_1_Z_0), - Natural_Direct_0 = mean(Y_nat0_Z_1 - Y_nat0_Z_0), - Natural_Direct_1 = mean(Y_nat1_Z_1 - Y_nat1_Z_0) - ) + + rlang::eval_bare(estimands_expr) # D: Data strategy + assignment <- declare_assignment() reveal_M <- declare_reveal(M, Z) reveal_Y <- declare_reveal(Y, assignment_variable = c("M","Z")) @@ -79,70 +169,107 @@ mediation_analysis_designer <- function(N = 200, a = 1, b = .4, c = 0, d = .5, r manipulation <- declare_step(Not_M = 1 - M, handler = fabricate) # A: Answer Strategy + mediator_regression <- declare_estimator( M ~ Z, model = lm_robust, - estimand = "FirstStage", - label = "Stage 1") + label = "Stage 1", + estimand = "FirstStage" + ) + + stage2_1 <- declare_estimator( - Y ~ Z * M, + Y ~ Z * M, model = lm_robust, term = c("M"), - estimand = c("Indirect_0"), - label = "Stage 2" + label = "Stage 2", + estimand = c( "natural_indirect_0", "controlled_indirect_0") ) + stage2_2 <- declare_estimator( - Y ~ Z * M, + Y ~ Z * M, model = lm_robust, term = c("Z"), - estimand = c("Controlled_Direct_0", "Natural_Direct_0"), - label = "Direct_0" + label = "Direct_0", + estimand = c("natural_direct_0", "controlled_direct_0") ) + stage2_3 <- declare_estimator( - Y ~ Z * Not_M, + Y ~ Z * Not_M, model = lm_robust, term = c("Z"), - estimand = c("Controlled_Direct_1", "Natural_Direct_1"), - label = "Direct_1" + label = "Direct_1", + estimand = c("natural_direct_1", "controlled_direct_1") ) + + + rlang::eval_bare(mediate_estimator_expr) + # Design - mediation_analysis_design <- population + - POs_M + POs_Y + POs_Y_nat_0 + POs_Y_nat_1 + - estimands + assignment + - reveal_M + reveal_Y + reveal_nat0 + reveal_nat1 + manipulation + - mediator_regression + stage2_1 + stage2_2 + stage2_3 + + mediation_analysis_design <- rlang::eval_bare(mediation_design_expr) + }}} - attr(mediation_analysis_design, "code") <- - construct_design_code(mediation_analysis_designer, match.call.defaults()) - mediation_analysis_design + design_code <- + construct_design_code( + mediation_analysis_designer, + match.call.defaults(), + arguments_as_values = TRUE, + exclude_args = "mediation_package" + ) + + if(mediation_package) + substitutes <- list(rlang::quo_text(mediation_analysis_expr), rlang::quo_text(mediate_estimator_expr)) + else + substitutes <- list("", "") + + + design_code <- + gsub("rlang::eval_bare\\(mediation_analysis_expr\\)", substitutes[[1]], design_code) + design_code <- + gsub("rlang::eval_bare\\(mediate_estimator_expr\\)", substitutes[[2]], design_code) + + design_code <- + gsub("rlang::eval_bare\\(mediation_design_expr\\)", rlang::quo_text(mediation_design_expr), design_code) + design_code <- + gsub("rlang::eval_bare\\(estimands_expr\\)", rlang::quo_text(estimands_expr), design_code) + + attr(mediation_analysis_design, "code") <- design_code + return(mediation_analysis_design) + } + +attr(mediation_analysis_designer,"shiny_arguments") <- list( + N = c(100, 50, 1000), + Z_on_M = seq(from = .5, to = -.5, by = -.5), + M_on_Y_Z0 = seq(from = .5, to = -.5, by = -.5), + M_on_Y_Z1 = seq(from = .5, to = -.5, by = -.5), + Z_on_Y_M0 = seq(from = .5, to = -.5, by = -.5), + rho = c(.2, seq(from = -1, to = 1, by = .5)), + mediation_package = c(FALSE, TRUE) +) + + attr(mediation_analysis_designer,"definitions") <- data.frame( - names = c("N", "a", "b", "c", "d", "rho"), + names = c("N", "Z_on_M", "M_on_Y_Z0 ", "M_on_Y_Z1", "Z_on_Y_M0", "rho", "mediation_package"), tips = c("Size of sample", "Effect of treatment (Z) on mediator (M)", - "Effect of mediator (M) on outcome (Y)", + "Effect of mediator (M) on outcome (Y) when Z = 0.", "Interaction between mediator (M) and (Z) for outcome (Y)", - "Direct effect of treatment (Z) on outcome (Y)", - "Correlation of mediator (M) and outcome (Y) error terms"), - class = c("integer", rep("numeric", 5)), - min = c(1, rep(-Inf, 4), -1), - max = c(1, rep(Inf, 4), 1), - inspector_min = c(100, rep(0, 4), -1), - inspector_step = c(50, 0.1, .2), + "Effect of treatment (Z) on outcome (Y), when M = 0", + "Correlation of mediator (M) and outcome (Y) error terms", + "If 'TRUE' direct and indirect effects are estimated using mediation::mediate()"), + class = c("integer", rep("numeric", 5), "logical"), + min = c(1, rep(-Inf, 4), -1, 0), + max = c(1, rep(Inf, 4), 1, 1), + inspector_min = c(100, rep(0, 4), -1, 0), + inspector_step = c(50, 0.1, .2, 0.3, 0.4, 0.2, 0), stringsAsFactors = FALSE ) -attr(mediation_analysis_designer,"shiny_arguments") <- list( - N = c(100, 50, 1000), - a = seq(from = .5, to = -.5, by = -.5), - b = seq(from = .5, to = -.5, by = -.5), - d = seq(from = .5, to = -.5, by = -.5), - rho = c(.2, seq(from = -1, to = 1, by = .5)) -) - attr(mediation_analysis_designer,"description") <- "
A mediation analysis design with sample size N
that examines
the effect of treatment (Z) on mediator (M) and the effect of mediator (M) on
@@ -153,5 +280,3 @@ outcome (Y) (given Z=0) as well as direct effect of treatment (Z) on outcome
Error terms on mediator (M) and outcome (Y) correlated by rho
"
-
-
diff --git a/man/mediation_analysis_designer.Rd b/man/mediation_analysis_designer.Rd
index a8b83fdf..2aeaa5e3 100644
--- a/man/mediation_analysis_designer.Rd
+++ b/man/mediation_analysis_designer.Rd
@@ -4,29 +4,31 @@
\alias{mediation_analysis_designer}
\title{Create a design for mediation analysis}
\usage{
-mediation_analysis_designer(N = 200, a = 1, b = 0.4, c = 0,
- d = 0.5, rho = 0)
+mediation_analysis_designer(N = 200, Z_on_M = 1, M_on_Y_Z0 = 0.4,
+ M_on_Y_Z1 = 0, Z_on_Y_M0 = 0.5, rho = 0,
+ mediation_package = FALSE)
}
\arguments{
\item{N}{An integer. Size of sample.}
-\item{a}{A number. Parameter governing effect of treatment (Z) on mediator (M).}
+\item{Z_on_M}{A number. Parameter governing effect of treatment (Z) on mediator (M).}
-\item{b}{A number. Effect of mediator (M) on outcome (Y) when Z = 0.}
+\item{M_on_Y_Z0}{A number. Effect of mediator (M) on outcome (Y) when Z = 0.}
-\item{c}{A number. Interaction between mediator (M) and (Z) for outcome (Y).}
+\item{M_on_Y_Z1}{A number. Interaction between mediator (M) and (Z) for outcome (Y).}
-\item{d}{A number. Direct effect of treatment (Z) on outcome (Y), when M = 0.}
+\item{Z_on_Y_M0}{A number. Effect of treatment (Z) on outcome (Y), when M = 0.}
\item{rho}{A number in [-1,1]. Correlation between mediator (M) and outcome (Y) error terms. Non zero correlation implies a violation of sequential ignorability.}
+
+\item{mediation_package}{A logical value. If 'TRUE' direct and indirect effects are estimated using \code{mediate} function from \code{mediation} package. Default is 'FALSE'.}
}
\value{
A mediation analysis design.
}
\description{
-A mediation analysis design that examines the effect of treatment (Z) on mediator (M) and the effect of mediator (M) on outcome (Y) (given Z=0)
-as well as direct effect of treatment (Z) on outcome (Y) (given M=0). Analysis is implemented using an interacted regression model.
-Note this model is not guaranteed to be unbiased despite randomization of Z because of possible violations of sequential ignorability.
+A mediation analysis design that examines the effect of (Z) on mediator (M), the natural and controlled direct effect of treatment (Z) on outcome (Y) as well as the natural and controlled indirect effect of treatment (Z) on outcome (Y) through mediator (M).
+Analysis is implemented using a set of two linear structural models: a first stage model and a interacted model. Note estimates are not guaranteed to be unbiased despite randomization of Z because of possible violations of sequential ignorability.
}
\details{
See \href{https://declaredesign.org/library/articles/mediation_analysis.html}{vignette online}.
@@ -40,12 +42,13 @@ diagnose_design(mediation_1, sims = 1000)
}
# A design with a violation of sequential ignorability and heterogeneous effects:
-mediation_2 <- mediation_analysis_designer(a = 1, rho = .5, c = 1, d = .75)
+mediation_2 <- mediation_analysis_designer(Z_on_M =1, rho = .5, M_on_Y_Z1 = 1, Z_on_Y_M0 =.75)
draw_estimands(mediation_2)
\dontrun{
diagnose_design(mediation_2, sims = 1000)
}
+
}
\author{
\href{https://declaredesign.org/}{DeclareDesign Team}
diff --git a/tests/testthat.R b/tests/testthat.R
index 66f95a21..ef196d6d 100644
--- a/tests/testthat.R
+++ b/tests/testthat.R
@@ -1,5 +1,7 @@
library(testthat)
library(DesignLibrary)
+library(DeclareDesign)
+library(mediation)
test_check(package = "DesignLibrary")
diff --git a/tests/testthat/test_designers.R b/tests/testthat/test_designers.R
index c5e191c3..82ac18b7 100644
--- a/tests/testthat/test_designers.R
+++ b/tests/testthat/test_designers.R
@@ -1,6 +1,4 @@
-
context(desc = "Testing that designers in the library work as they should")
-
functions <- ls("package:DesignLibrary")
designers <- functions[grepl("_designer\\b",functions)]
designers <- designers[!grepl("simple",designers)]
@@ -151,8 +149,21 @@ test_that(desc = "two_arm_designer errors when it should",
test_that(desc = "mediation_analysis_designer errors when it should",
code = {
- expect_error(mediation_analysis_designer(rho = 10))
- })
+
+ expect_error(mediation_analysis_designer(rho = 10))
+ expect_error(mediation_analysis_designer(mediation_package = "true"))
+
+ })
+
+
+test_that(desc = "mediation_analysis_designer with mediate package errors",
+ code = {
+ if(isNamespaceLoaded("mediation")) {
+ med_design <- mediation_analysis_designer(mediation_package = TRUE)
+ expect_true("design" %in% class(med_design))
+ expect_true("data.frame" %in% class(draw_estimates(med_design)))}
+ })
+
test_that(desc = "spillover_designer errors when it should",
code = {
diff --git a/vignettes/mediation_analysis.Rmd b/vignettes/mediation_analysis.Rmd
index 2c028327..f80f5254 100644
--- a/vignettes/mediation_analysis.Rmd
+++ b/vignettes/mediation_analysis.Rmd
@@ -74,7 +74,6 @@ designs <- expand_design(mediation_analysis_designer, rho = c(0,.5))
diagnosis <- diagnose_design(designs)
```
-
```{r,eval = TRUE, echo = FALSE}
kable(reshape_diagnosis(diagnosis)[,-1], digits = 2)
```
@@ -83,6 +82,3 @@ Our diagnosis indicates that when the error terms are not correlated, the direct
Unfortunately, the assumption of no correlation is not always guaranteed, since $M$ is not assigned at random and might be correlated with $Y$.
-## Further Reading
-
-