v2.8.0 - Include ANCOM-BC. fix #19

Russel88 · Mar 8, 2022 · 42a71b5 · 42a71b5
1 parent b2a3cc3
commit 42a71b5
Show file tree

Hide file tree

Showing 17 changed files with 213 additions and 43 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -11,6 +11,7 @@ bioc_packages:
 - DESeq2
 - ALDEx2
 - impute
+- ANCOMBC
 
 before_install:
 - sudo apt-get install libgsl-dev

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: DAtest
 Title: Comparing Differential Abundance/Expression Methods
-Version: 2.7.18
+Version: 2.8.0
 Authors@R: person("Jakob", "Russel",
     email = "[email protected]", 
     role = c("aut", "cre"))
@@ -45,9 +45,10 @@ Suggests: lsmeans,
     mvabund (>= 4.0),
     phyloseq (>= 1.26),
     DESeq2 (>= 1.22),
-    metagenomeSeq (>= 1.24)
+    metagenomeSeq (>= 1.24),
+    ANCOMBC
 License: GPL (>= 3) | file LICENSE
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 7.1.1
+RoxygenNote: 7.1.2
 BugReports: https://github.com/Russel88/DAtest/issues
diff --git a/NAMESPACE b/NAMESPACE
@@ -6,6 +6,7 @@ S3method(print,DA)
 S3method(summary,DA)
 S3method(summary,DAPower)
 export(DA.TukeyHSD)
+export(DA.abc)
 export(DA.adx)
 export(DA.anova)
 export(DA.aoa)

diff --git a/R/DA.abc.R b/R/DA.abc.R
@@ -0,0 +1,108 @@
+#' ANCOM-BC
+#'
+#' Implementation of ANCOM-BC for \code{DAtest}
+#' @param data Either a matrix with counts/abundances, OR a \code{phyloseq} object. If a matrix/data.frame is provided rows should be taxa/genes/proteins and columns samples
+#' @param predictor The predictor of interest. Either a Factor or Numeric, OR if \code{data} is a \code{phyloseq} object the name of the variable in \code{sample_data(data)} in quotation
+#' @param covars Either a named list with covariables, OR if \code{data} is a \code{phyloseq} object a character vector with names of the variables in \code{sample_data(data)}
+#' @param out.all If TRUE, will run global test which will produce one p-value for the \code{predictor}. If FALSE will run standard test and will output p-value from one level of the predictor specified by \code{coeff}. If NULL (default) set as TRUE for multi-class \code{predictor} and FALSE otherwise
+#' @param p.adj Character. P-value adjustment. Default "fdr". See \code{p.adjust} for details
+#' @param coeff Integer. The beta coefficient and p-value will be associated with this coefficient. This coefficient is by default compared to the intercept (1. level of \code{predictor}) Default 2, i.e. the 2. level of the \code{predictor}.
+#' @param allResults If TRUE will return raw results from the \code{ancombc} function
+#' @param ... Additional arguments for the \code{ancombc} function
+#' @return A data.frame with with results.
+#' @examples
+#' # Creating random count_table and predictor
+#' set.seed(4)
+#' mat <- matrix(rnbinom(200, size = 0.1, mu = 500), nrow = 20, ncol = 10)
+#' rownames(mat) <- 1:20
+#' pred <- c(rep("Control", 5), rep("Treatment", 5))
+#' 
+#' # Running ANCOM-BC
+#' res <- DA.abc(data = mat, predictor = pred)
+#' @export
+DA.abc <- function(data, predictor, covars = NULL, out.all = NULL, p.adj = "fdr", coeff = 2, allResults = FALSE, ...){
+
+  ok1 <- tryCatch({
+    loadNamespace("ANCOMBC")
+    TRUE
+  }, error=function(...) FALSE)
+  ok2 <- tryCatch({
+    loadNamespace("phyloseq")
+    TRUE
+  }, error=function(...) FALSE)
+  ok <- ok1 && ok2
+
+  if (ok){
+    # Convert to phyloseq
+    if(is(data, "phyloseq")){
+      phy_data <- data
+      org_pred <- unlist(phyloseq::sample_data(data)[, predictor])
+      if(!is.null(covars)){
+        form <- paste(predictor, paste(covars, collapse="+"), sep="+")
+      } else {
+        form <- predictor
+      }
+
+    } else {
+      org_pred <- predictor
+      predictor <- "predictor"
+      otu_table <-  phyloseq::otu_table(data, taxa_are_rows = TRUE)
+      if(!is.null(covars)){
+        samp_table <- phyloseq::sample_data(data.frame(row.names = colnames(data), predictor = org_pred))
+        form <- paste("predictor", paste(names(covars), collapse="+"), sep="+")
+        for(covar_sub in names(covars)){
+          samp_table[, covar_sub] <- covars[covar_sub]
+        }
+      } else {
+        samp_table <-  phyloseq::sample_data(data.frame(row.names = colnames(data), predictor = org_pred))
+        form <- "predictor"
+      }
+      phy_data <- phyloseq::phyloseq(otu_table, samp_table)
+    }
+
+    coeff.ref <- 1
+    if(coeff == coeff.ref) stop("coeff and coeff.ref cannot be the same")
+
+    # out.all
+    if(is.null(out.all)){
+      if(length(unique(org_pred)) == 2) out.all <- FALSE
+      if(length(unique(org_pred)) > 2) out.all <- TRUE
+    }
+
+    # Run test
+    if(out.all){
+      abc_res <- ANCOMBC::ancombc(phy_data, formula = form, p_adj_method = p.adj, group = predictor, global = TRUE, ...)
+
+      res <- data.frame(Feature = rownames(abc_res$res_global),
+                        Beta = NA,
+                        pval = abc_res$res_global$p_val,
+                        pval.adj = abc_res$res_global$q_val)
+
+    }
+    if(!out.all){
+      abc_res <- ANCOMBC::ancombc(phy_data, formula = form, p_adj_method = p.adj, global = FALSE, ...)
+
+      res <- data.frame(Feature = rownames(abc_res$res$beta),
+                        Beta = as.numeric(abc_res$res$beta[, coeff - 1]),
+                        pval = as.numeric(abc_res$res$p_val[, coeff - 1]),
+                        pval.adj = as.numeric(abc_res$res$q_val[, coeff - 1]))
+
+    }
+
+    if(!is.numeric(predictor)){
+      res$ordering <- NA
+      res[!is.na(res$Beta) & res$Beta > 0,"ordering"] <- paste0(levels(as.factor(org_pred))[coeff],">",levels(as.factor(org_pred))[coeff.ref])
+      res[!is.na(res$Beta) & res$Beta < 0,"ordering"] <- paste0(levels(as.factor(org_pred))[coeff.ref],">",levels(as.factor(org_pred))[coeff])
+    }
+
+    res$Method <- "ANCOM-BC (abc)"
+
+    if(is(data, "phyloseq")) res <- addTax(data, res)
+
+    if(allResults) return(abc_res) else return(res)
+  } else {
+    stop("ANCOM-BC package required")
+  }
+
+}
+
diff --git a/R/allDA.R b/R/allDA.R
@@ -44,16 +44,16 @@
 #' \donttest{
 #' # Include a paired variable for dependent/blocked samples
 #' subject <- rep(1:5, 2)
-#' res <- allDA(data = mat, predictor = pred, paired = subject)
+#' res <- allDA(data = mat, predictor = pred, paired = subject, cores = 1)
 #' 
 #' # Include covariates
 #' covar1 <- rnorm(10)
 #' covar2 <- rep(c("A","B"), 5)
 #' res <- allDA(data = mat, predictor = pred, 
-#'              covars = list(FirstCovar = covar1, CallItWhatYouWant = covar2))
+#'              covars = list(FirstCovar = covar1, CallItWhatYouWant = covar2), cores = 1)
 #' 
 #' # Data is absolute abundance
-#' res <- allDA(data = mat, predictor = pred, relative = FALSE)
+#' res <- allDA(data = mat, predictor = pred, relative = FALSE, cores = 1)
 #' }
 #' @export
 
@@ -62,7 +62,7 @@ allDA <- function(data, predictor, paired = NULL, covars = NULL,
                             "erq","erq2","neb","qpo","poi","sam",
                             "lrm","llm","llm2","lma","lmc",
                             "ere","ere2","pea","spe",
-                            "wil","kru","qua","fri",
+                            "wil","kru","qua","fri","abc",
                             "ttt","ltt","ltt2","tta","ttc","ttr",
                             "aov","lao","lao2","aoa","aoc",
                             "vli","lim","lli","lli2","lia","lic"),
@@ -198,6 +198,7 @@ allDA <- function(data, predictor, paired = NULL, covars = NULL,
                                ere2 = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,predictor, p.adj), argsL[[i]])),
                                msf = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,predictor, p.adj), argsL[[i]])),
                                zig = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,predictor,paired,covars, p.adj), argsL[[i]])),
+                               abc = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,predictor,covars,out.all, p.adj), argsL[[i]])),
                                ds2 = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,predictor,paired,covars,out.all, p.adj), argsL[[i]])),
                                ds2x = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,predictor,paired,covars,out.all, p.adj), argsL[[i]])),
                                per = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,predictor,paired, relative, p.adj), argsL[[i]])),
@@ -347,7 +348,8 @@ allDA <- function(data, predictor, paired = NULL, covars = NULL,
                    zig = paste0("predictor",levels(as.factor(predictor))[2]),
                    ds2 = "log2FoldChange",
                    ds2x = "log2FoldChange",
-                   mva = "log2FC")
+                   mva = "log2FC",
+                   abc = "Beta")
 
   if(!is.numeric(predictor) & length(unique(predictor)) > 2){
     df.est <- NULL

diff --git a/R/plot.DA.R b/R/plot.DA.R
@@ -20,7 +20,7 @@ plot.DA <- function(x, sort = "Score", p = FALSE, bins = 20, ...){
     pval.all <- lapply(x$results, function(x) lapply(x, function(y) y[,c("pval","Method","Spiked")]))
     df.all <- suppressWarnings(do.call(rbind, do.call(rbind,pval.all)))
     df.all <- df.all[df.all$Spiked == "No",]
-    df.all <- df.all[!df.all$Method %in% c("ANCOM (anc)","SAMseq (sam)"),]
+    df.all <- df.all[!df.all$Method %in% c("SAMseq (sam)"),]
 
     ## Plot
     ggplot(df.all, aes_string(x = "pval")) +

diff --git a/R/powerDA.R b/R/powerDA.R
@@ -39,16 +39,17 @@
 #' \donttest{
 #' # Include a paired variable for dependent/blocked samples
 #' subject <- rep(1:10, 2)
-#' res <- powerDA(data = mat, predictor = pred, paired = subject, test = "ttt")
+#' res <- powerDA(data = mat, predictor = pred, paired = subject, test = "ttt", cores = 1)
 #' 
 #' # Include covariates
 #' covar1 <- rnorm(20)
 #' covar2 <- rep(c("A","B"), 10)
 #' res <- powerDA(data = mat, predictor = pred, 
-#'                covars = list(FirstCovar = covar1, CallItWhatYouWant = covar2), test = "lrm")
+#'                covars = list(FirstCovar = covar1, CallItWhatYouWant = covar2), 
+#'                test = "lrm", cores = 1)
 #' 
 #' # Data is absolute abundance
-#' res <- powerDA(data = mat, predictor = pred, relative = FALSE, test = "ttt")
+#' res <- powerDA(data = mat, predictor = pred, relative = FALSE, test = "ttt", cores = 1)
 #' }
 #' @export
 
@@ -214,6 +215,7 @@ powerDA <- function(data, predictor, paired = NULL, covars = NULL, test = NULL,
                                ere2 = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[what.run]],rands[[run.no]],p.adj), args)),
                                msf = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[what.run]],rands[[run.no]],p.adj), args)),
                                zig = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[what.run]],rands[[run.no]],paired,covars,p.adj), args)),
+                               abc = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[what.run]],rands[[run.no]],covars,out.all,p.adj), args)),
                                ds2 = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[what.run]],rands[[run.no]],paired,covars,out.all,p.adj), args)),
                                ds2x = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[what.run]],rands[[run.no]],paired,covars,out.all,p.adj), args)),
                                per = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[what.run]],rands[[run.no]],paired, relative,p.adj), args)),

diff --git a/R/pruneTests.R b/R/pruneTests.R
@@ -11,10 +11,11 @@ pruneTests <- function(tests, predictor, paired, covars, relative, decimal, zero
   if(!"pscl" %in% rownames(installed.packages())) tests <- tests[!tests %in% c("zpo","znb")]
   if(!"samr" %in% rownames(installed.packages())) tests <- tests[!tests %in% c("sam")]
   if(!"mvabund" %in% rownames(installed.packages())) tests <- tests[!tests %in% c("mva")]
+  if(!"ANCOMBC" %in% rownames(installed.packages())) tests <- tests[!tests %in% c("abc")]
 
   # Exclude tests that do not work with a paired argument
   if(!is.null(paired)){
-    tests <- tests[!tests %in% c("qpo","zpo","znb","bay","adx","ere","ere2","msf","aov","lao","lao2","aoa","aoc","kru","rai","spe","pea")]
+    tests <- tests[!tests %in% c("abc","qpo","zpo","znb","bay","adx","ere","ere2","msf","aov","lao","lao2","aoa","aoc","kru","rai","spe","pea")]
     # Exclude tests that only work with one value for each combination of predictor and paired arguments
     if(!all(table(paired,predictor) == 1)){
       tests <- tests[!tests %in% c("ttt","ttr","ltt","ltt2","wil","per","fri","qua","sam","tta","ttc")]
@@ -31,7 +32,7 @@ pruneTests <- function(tests, predictor, paired, covars, relative, decimal, zero
 
   # Only include some tests if there are more than two levels in predictor
   if(length(unique(predictor)) > 2){
-    tests <- tests[tests %in% c("bay","sam","qua","fri","znb","zpo","vli","qpo","poi","neb","erq","erq2","ds2","ds2x","lim","lli","lli2","aov","lao","lao2","aoa","aoc","kru","lrm","llm","llm2","spe","pea","zig","lma","lmc","lia","lic")]
+    tests <- tests[tests %in% c("abc","bay","sam","qua","fri","znb","zpo","vli","qpo","poi","neb","erq","erq2","ds2","ds2x","lim","lli","lli2","aov","lao","lao2","aoa","aoc","kru","lrm","llm","llm2","spe","pea","zig","lma","lmc","lia","lic")]
     # Exclude if only works for two-class paired
     if(!is.null(paired)){
       tests <- tests[!tests %in% c("sam")]
@@ -43,23 +44,23 @@ pruneTests <- function(tests, predictor, paired, covars, relative, decimal, zero
 
   # Only include specific tests if predictor is numeric
   if(is.numeric(predictor)){
-    tests <- tests[tests %in% c("mva","sam","znb","zpo","vli","qpo","poi","neb","erq","erq2","lim","lli","lli2","lrm","llm","llm2","spe","pea","lma","lmc","lia","lic")]
+    tests <- tests[tests %in% c("abc","mva","sam","znb","zpo","vli","qpo","poi","neb","erq","erq2","lim","lli","lli2","lrm","llm","llm2","spe","pea","lma","lmc","lia","lic")]
   } else {
     # Exclude if not numeric
     tests <- tests[!tests %in% c("spe","pea")]
   }
 
   # Exclude if relative is false
   if(relative == FALSE){
-    tests <- tests[!tests %in% c("sam","vli","ltt2","erq","ere","ere2","erq2","msf","zig","bay","ds2","ds2x","adx","lli2","lao2","aoa","aoc","llm2","rai","tta","ttc","lma","lmc","lia","lic")]
+    tests <- tests[!tests %in% c("abc","sam","vli","ltt2","erq","ere","ere2","erq2","msf","zig","bay","ds2","ds2x","adx","lli2","lao2","aoa","aoc","llm2","rai","tta","ttc","lma","lmc","lia","lic")]
   } else {
     # Exclude if relative is TRUE
     tests <- tests[!tests %in% c("lrm","lim")]
   }
 
   # Exclude if decimal is TRUE
   if(decimal){
-    tests <- tests[!tests %in% c("znb","zpo","qpo","poi","neb","mva")]
+    tests <- tests[!tests %in% c("abc","znb","zpo","qpo","poi","neb","mva")]
   }
 
   # Only include if covars are present

diff --git a/R/runtimeDA.R b/R/runtimeDA.R
@@ -31,7 +31,7 @@
 #' runtimeDA(mat, pred, cores = 1, tests = c("ttt","wil"), tests.slow = c("neb"))
 #' @export
 runtimeDA <- function(data, predictor, paired = NULL, covars = NULL, subsamples = c(500,1000,1500,2000), subsamples.slow = c(100,150,200,250), 
-                      tests =  c("sam", "qua", "fri", "vli", "qpo", "pea", "wil", "ttt", "ttr", "ltt", "ltt2","ere", "ere2", "msf", "zig", "lim", "lli", "lli2", "aov", "lao", "lao2", "kru", "lrm", "llm", "llm2", "spe", "aoa", "aoc", "tta", "ttc", "lma", "lmc", "lia", "lic"), 
+                      tests =  c("abc", "sam", "qua", "fri", "vli", "qpo", "pea", "wil", "ttt", "ttr", "ltt", "ltt2","ere", "ere2", "msf", "zig", "lim", "lli", "lli2", "aov", "lao", "lao2", "kru", "lrm", "llm", "llm2", "spe", "aoa", "aoc", "tta", "ttc", "lma", "lmc", "lia", "lic"), 
                       tests.slow = c("mva", "neb", "bay", "per", "ds2", "ds2x", "zpo", "znb", "adx", "poi", "erq", "erq2"), cores = (detectCores()-1), ...){
 
   stopifnot(exists("data"),exists("predictor"))

diff --git a/R/testDA.R b/R/testDA.R
@@ -44,16 +44,16 @@
 #' \donttest{
 #' # Include a paired variable for dependent/blocked samples
 #' subject <- rep(1:10, 2)
-#' res <- testDA(data = mat, predictor = pred, paired = subject)
+#' res <- testDA(data = mat, predictor = pred, paired = subject, cores = 1, R = 1)
 #' 
 #' # Include covariates
 #' covar1 <- rnorm(20)
 #' covar2 <- rep(c("A","B"), 10)
 #' res <- testDA(data = mat, predictor = pred, 
-#'               covars = list(FirstCovar = covar1, CallItWhatYouWant = covar2))
+#'               covars = list(FirstCovar = covar1, CallItWhatYouWant = covar2), cores = 1, R = 1)
 #' 
 #' # Data is absolute abundance
-#' res <- testDA(data = mat, predictor = pred, relative = FALSE)
+#' res <- testDA(data = mat, predictor = pred, relative = FALSE, cores = 1, R = 1)
 #' }
 #' 
 #' @import stats snow doSNOW foreach utils doParallel
@@ -66,7 +66,7 @@ testDA <- function(data, predictor, paired = NULL, covars = NULL, R = 20,
                              "erq","erq2","neb","qpo","poi","sam",
                              "lrm","llm","llm2","lma","lmc",
                              "ere","ere2","pea","spe",
-                             "wil","kru","qua","fri",
+                             "wil","kru","qua","fri","abc",
                              "ttt","ltt","ltt2","tta","ttc","ttr",
                              "aov","lao","lao2","aoa","aoc",
                              "vli","lim","lli","lli2","lia","lic"),
@@ -254,6 +254,7 @@ testDA <- function(data, predictor, paired = NULL, covars = NULL, R = 20,
                                ere2 = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[run.no]],rands[[run.no]], p.adj), argsL[[i]])),
                                msf = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[run.no]],rands[[run.no]], p.adj), argsL[[i]])),
                                zig = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[run.no]],rands[[run.no]],paired,covars, p.adj), argsL[[i]])),
+                               abc = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[run.no]],rands[[run.no]],covars,out.all, p.adj), argsL[[i]])),
                                ds2 = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[run.no]],rands[[run.no]],paired,covars,out.all, p.adj), argsL[[i]])),
                                ds2x = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[run.no]],rands[[run.no]],paired,covars,out.all, p.adj), argsL[[i]])),
                                per = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[run.no]],rands[[run.no]],paired, relative, p.adj), argsL[[i]])),