Skip to content

Commit

Permalink
v2.8.0 - Include ANCOM-BC. fix #19
Browse files Browse the repository at this point in the history
  • Loading branch information
Jakob Russel committed Mar 8, 2022
1 parent b2a3cc3 commit 42a71b5
Show file tree
Hide file tree
Showing 17 changed files with 213 additions and 43 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ bioc_packages:
- DESeq2
- ALDEx2
- impute
- ANCOMBC

before_install:
- sudo apt-get install libgsl-dev
Expand Down
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: DAtest
Title: Comparing Differential Abundance/Expression Methods
Version: 2.7.18
Version: 2.8.0
Authors@R: person("Jakob", "Russel",
email = "[email protected]",
role = c("aut", "cre"))
Expand Down Expand Up @@ -45,9 +45,10 @@ Suggests: lsmeans,
mvabund (>= 4.0),
phyloseq (>= 1.26),
DESeq2 (>= 1.22),
metagenomeSeq (>= 1.24)
metagenomeSeq (>= 1.24),
ANCOMBC
License: GPL (>= 3) | file LICENSE
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.1.1
RoxygenNote: 7.1.2
BugReports: https://github.com/Russel88/DAtest/issues
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ S3method(print,DA)
S3method(summary,DA)
S3method(summary,DAPower)
export(DA.TukeyHSD)
export(DA.abc)
export(DA.adx)
export(DA.anova)
export(DA.aoa)
Expand Down
108 changes: 108 additions & 0 deletions R/DA.abc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#' ANCOM-BC
#'
#' Implementation of ANCOM-BC for \code{DAtest}
#' @param data Either a matrix with counts/abundances, OR a \code{phyloseq} object. If a matrix/data.frame is provided rows should be taxa/genes/proteins and columns samples
#' @param predictor The predictor of interest. Either a Factor or Numeric, OR if \code{data} is a \code{phyloseq} object the name of the variable in \code{sample_data(data)} in quotation
#' @param covars Either a named list with covariables, OR if \code{data} is a \code{phyloseq} object a character vector with names of the variables in \code{sample_data(data)}
#' @param out.all If TRUE, will run global test which will produce one p-value for the \code{predictor}. If FALSE will run standard test and will output p-value from one level of the predictor specified by \code{coeff}. If NULL (default) set as TRUE for multi-class \code{predictor} and FALSE otherwise
#' @param p.adj Character. P-value adjustment. Default "fdr". See \code{p.adjust} for details
#' @param coeff Integer. The beta coefficient and p-value will be associated with this coefficient. This coefficient is by default compared to the intercept (1. level of \code{predictor}) Default 2, i.e. the 2. level of the \code{predictor}.
#' @param allResults If TRUE will return raw results from the \code{ancombc} function
#' @param ... Additional arguments for the \code{ancombc} function
#' @return A data.frame with with results.
#' @examples
#' # Creating random count_table and predictor
#' set.seed(4)
#' mat <- matrix(rnbinom(200, size = 0.1, mu = 500), nrow = 20, ncol = 10)
#' rownames(mat) <- 1:20
#' pred <- c(rep("Control", 5), rep("Treatment", 5))
#'
#' # Running ANCOM-BC
#' res <- DA.abc(data = mat, predictor = pred)
#' @export
DA.abc <- function(data, predictor, covars = NULL, out.all = NULL, p.adj = "fdr", coeff = 2, allResults = FALSE, ...){

ok1 <- tryCatch({
loadNamespace("ANCOMBC")
TRUE
}, error=function(...) FALSE)
ok2 <- tryCatch({
loadNamespace("phyloseq")
TRUE
}, error=function(...) FALSE)
ok <- ok1 && ok2

if (ok){
# Convert to phyloseq
if(is(data, "phyloseq")){
phy_data <- data
org_pred <- unlist(phyloseq::sample_data(data)[, predictor])
if(!is.null(covars)){
form <- paste(predictor, paste(covars, collapse="+"), sep="+")
} else {
form <- predictor
}

} else {
org_pred <- predictor
predictor <- "predictor"
otu_table <- phyloseq::otu_table(data, taxa_are_rows = TRUE)
if(!is.null(covars)){
samp_table <- phyloseq::sample_data(data.frame(row.names = colnames(data), predictor = org_pred))
form <- paste("predictor", paste(names(covars), collapse="+"), sep="+")
for(covar_sub in names(covars)){
samp_table[, covar_sub] <- covars[covar_sub]
}
} else {
samp_table <- phyloseq::sample_data(data.frame(row.names = colnames(data), predictor = org_pred))
form <- "predictor"
}
phy_data <- phyloseq::phyloseq(otu_table, samp_table)
}

coeff.ref <- 1
if(coeff == coeff.ref) stop("coeff and coeff.ref cannot be the same")

# out.all
if(is.null(out.all)){
if(length(unique(org_pred)) == 2) out.all <- FALSE
if(length(unique(org_pred)) > 2) out.all <- TRUE
}

# Run test
if(out.all){
abc_res <- ANCOMBC::ancombc(phy_data, formula = form, p_adj_method = p.adj, group = predictor, global = TRUE, ...)

res <- data.frame(Feature = rownames(abc_res$res_global),
Beta = NA,
pval = abc_res$res_global$p_val,
pval.adj = abc_res$res_global$q_val)

}
if(!out.all){
abc_res <- ANCOMBC::ancombc(phy_data, formula = form, p_adj_method = p.adj, global = FALSE, ...)

res <- data.frame(Feature = rownames(abc_res$res$beta),
Beta = as.numeric(abc_res$res$beta[, coeff - 1]),
pval = as.numeric(abc_res$res$p_val[, coeff - 1]),
pval.adj = as.numeric(abc_res$res$q_val[, coeff - 1]))

}

if(!is.numeric(predictor)){
res$ordering <- NA
res[!is.na(res$Beta) & res$Beta > 0,"ordering"] <- paste0(levels(as.factor(org_pred))[coeff],">",levels(as.factor(org_pred))[coeff.ref])
res[!is.na(res$Beta) & res$Beta < 0,"ordering"] <- paste0(levels(as.factor(org_pred))[coeff.ref],">",levels(as.factor(org_pred))[coeff])
}

res$Method <- "ANCOM-BC (abc)"

if(is(data, "phyloseq")) res <- addTax(data, res)

if(allResults) return(abc_res) else return(res)
} else {
stop("ANCOM-BC package required")
}

}

12 changes: 7 additions & 5 deletions R/allDA.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,16 @@
#' \donttest{
#' # Include a paired variable for dependent/blocked samples
#' subject <- rep(1:5, 2)
#' res <- allDA(data = mat, predictor = pred, paired = subject)
#' res <- allDA(data = mat, predictor = pred, paired = subject, cores = 1)
#'
#' # Include covariates
#' covar1 <- rnorm(10)
#' covar2 <- rep(c("A","B"), 5)
#' res <- allDA(data = mat, predictor = pred,
#' covars = list(FirstCovar = covar1, CallItWhatYouWant = covar2))
#' covars = list(FirstCovar = covar1, CallItWhatYouWant = covar2), cores = 1)
#'
#' # Data is absolute abundance
#' res <- allDA(data = mat, predictor = pred, relative = FALSE)
#' res <- allDA(data = mat, predictor = pred, relative = FALSE, cores = 1)
#' }
#' @export

Expand All @@ -62,7 +62,7 @@ allDA <- function(data, predictor, paired = NULL, covars = NULL,
"erq","erq2","neb","qpo","poi","sam",
"lrm","llm","llm2","lma","lmc",
"ere","ere2","pea","spe",
"wil","kru","qua","fri",
"wil","kru","qua","fri","abc",
"ttt","ltt","ltt2","tta","ttc","ttr",
"aov","lao","lao2","aoa","aoc",
"vli","lim","lli","lli2","lia","lic"),
Expand Down Expand Up @@ -198,6 +198,7 @@ allDA <- function(data, predictor, paired = NULL, covars = NULL,
ere2 = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,predictor, p.adj), argsL[[i]])),
msf = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,predictor, p.adj), argsL[[i]])),
zig = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,predictor,paired,covars, p.adj), argsL[[i]])),
abc = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,predictor,covars,out.all, p.adj), argsL[[i]])),
ds2 = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,predictor,paired,covars,out.all, p.adj), argsL[[i]])),
ds2x = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,predictor,paired,covars,out.all, p.adj), argsL[[i]])),
per = do.call(get(noquote(paste0("DA.",i))),c(list(count_table,predictor,paired, relative, p.adj), argsL[[i]])),
Expand Down Expand Up @@ -347,7 +348,8 @@ allDA <- function(data, predictor, paired = NULL, covars = NULL,
zig = paste0("predictor",levels(as.factor(predictor))[2]),
ds2 = "log2FoldChange",
ds2x = "log2FoldChange",
mva = "log2FC")
mva = "log2FC",
abc = "Beta")

if(!is.numeric(predictor) & length(unique(predictor)) > 2){
df.est <- NULL
Expand Down
2 changes: 1 addition & 1 deletion R/plot.DA.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ plot.DA <- function(x, sort = "Score", p = FALSE, bins = 20, ...){
pval.all <- lapply(x$results, function(x) lapply(x, function(y) y[,c("pval","Method","Spiked")]))
df.all <- suppressWarnings(do.call(rbind, do.call(rbind,pval.all)))
df.all <- df.all[df.all$Spiked == "No",]
df.all <- df.all[!df.all$Method %in% c("ANCOM (anc)","SAMseq (sam)"),]
df.all <- df.all[!df.all$Method %in% c("SAMseq (sam)"),]

## Plot
ggplot(df.all, aes_string(x = "pval")) +
Expand Down
8 changes: 5 additions & 3 deletions R/powerDA.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,17 @@
#' \donttest{
#' # Include a paired variable for dependent/blocked samples
#' subject <- rep(1:10, 2)
#' res <- powerDA(data = mat, predictor = pred, paired = subject, test = "ttt")
#' res <- powerDA(data = mat, predictor = pred, paired = subject, test = "ttt", cores = 1)
#'
#' # Include covariates
#' covar1 <- rnorm(20)
#' covar2 <- rep(c("A","B"), 10)
#' res <- powerDA(data = mat, predictor = pred,
#' covars = list(FirstCovar = covar1, CallItWhatYouWant = covar2), test = "lrm")
#' covars = list(FirstCovar = covar1, CallItWhatYouWant = covar2),
#' test = "lrm", cores = 1)
#'
#' # Data is absolute abundance
#' res <- powerDA(data = mat, predictor = pred, relative = FALSE, test = "ttt")
#' res <- powerDA(data = mat, predictor = pred, relative = FALSE, test = "ttt", cores = 1)
#' }
#' @export

Expand Down Expand Up @@ -214,6 +215,7 @@ powerDA <- function(data, predictor, paired = NULL, covars = NULL, test = NULL,
ere2 = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[what.run]],rands[[run.no]],p.adj), args)),
msf = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[what.run]],rands[[run.no]],p.adj), args)),
zig = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[what.run]],rands[[run.no]],paired,covars,p.adj), args)),
abc = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[what.run]],rands[[run.no]],covars,out.all,p.adj), args)),
ds2 = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[what.run]],rands[[run.no]],paired,covars,out.all,p.adj), args)),
ds2x = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[what.run]],rands[[run.no]],paired,covars,out.all,p.adj), args)),
per = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[what.run]],rands[[run.no]],paired, relative,p.adj), args)),
Expand Down
11 changes: 6 additions & 5 deletions R/pruneTests.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@ pruneTests <- function(tests, predictor, paired, covars, relative, decimal, zero
if(!"pscl" %in% rownames(installed.packages())) tests <- tests[!tests %in% c("zpo","znb")]
if(!"samr" %in% rownames(installed.packages())) tests <- tests[!tests %in% c("sam")]
if(!"mvabund" %in% rownames(installed.packages())) tests <- tests[!tests %in% c("mva")]
if(!"ANCOMBC" %in% rownames(installed.packages())) tests <- tests[!tests %in% c("abc")]

# Exclude tests that do not work with a paired argument
if(!is.null(paired)){
tests <- tests[!tests %in% c("qpo","zpo","znb","bay","adx","ere","ere2","msf","aov","lao","lao2","aoa","aoc","kru","rai","spe","pea")]
tests <- tests[!tests %in% c("abc","qpo","zpo","znb","bay","adx","ere","ere2","msf","aov","lao","lao2","aoa","aoc","kru","rai","spe","pea")]
# Exclude tests that only work with one value for each combination of predictor and paired arguments
if(!all(table(paired,predictor) == 1)){
tests <- tests[!tests %in% c("ttt","ttr","ltt","ltt2","wil","per","fri","qua","sam","tta","ttc")]
Expand All @@ -31,7 +32,7 @@ pruneTests <- function(tests, predictor, paired, covars, relative, decimal, zero

# Only include some tests if there are more than two levels in predictor
if(length(unique(predictor)) > 2){
tests <- tests[tests %in% c("bay","sam","qua","fri","znb","zpo","vli","qpo","poi","neb","erq","erq2","ds2","ds2x","lim","lli","lli2","aov","lao","lao2","aoa","aoc","kru","lrm","llm","llm2","spe","pea","zig","lma","lmc","lia","lic")]
tests <- tests[tests %in% c("abc","bay","sam","qua","fri","znb","zpo","vli","qpo","poi","neb","erq","erq2","ds2","ds2x","lim","lli","lli2","aov","lao","lao2","aoa","aoc","kru","lrm","llm","llm2","spe","pea","zig","lma","lmc","lia","lic")]
# Exclude if only works for two-class paired
if(!is.null(paired)){
tests <- tests[!tests %in% c("sam")]
Expand All @@ -43,23 +44,23 @@ pruneTests <- function(tests, predictor, paired, covars, relative, decimal, zero

# Only include specific tests if predictor is numeric
if(is.numeric(predictor)){
tests <- tests[tests %in% c("mva","sam","znb","zpo","vli","qpo","poi","neb","erq","erq2","lim","lli","lli2","lrm","llm","llm2","spe","pea","lma","lmc","lia","lic")]
tests <- tests[tests %in% c("abc","mva","sam","znb","zpo","vli","qpo","poi","neb","erq","erq2","lim","lli","lli2","lrm","llm","llm2","spe","pea","lma","lmc","lia","lic")]
} else {
# Exclude if not numeric
tests <- tests[!tests %in% c("spe","pea")]
}

# Exclude if relative is false
if(relative == FALSE){
tests <- tests[!tests %in% c("sam","vli","ltt2","erq","ere","ere2","erq2","msf","zig","bay","ds2","ds2x","adx","lli2","lao2","aoa","aoc","llm2","rai","tta","ttc","lma","lmc","lia","lic")]
tests <- tests[!tests %in% c("abc","sam","vli","ltt2","erq","ere","ere2","erq2","msf","zig","bay","ds2","ds2x","adx","lli2","lao2","aoa","aoc","llm2","rai","tta","ttc","lma","lmc","lia","lic")]
} else {
# Exclude if relative is TRUE
tests <- tests[!tests %in% c("lrm","lim")]
}

# Exclude if decimal is TRUE
if(decimal){
tests <- tests[!tests %in% c("znb","zpo","qpo","poi","neb","mva")]
tests <- tests[!tests %in% c("abc","znb","zpo","qpo","poi","neb","mva")]
}

# Only include if covars are present
Expand Down
2 changes: 1 addition & 1 deletion R/runtimeDA.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
#' runtimeDA(mat, pred, cores = 1, tests = c("ttt","wil"), tests.slow = c("neb"))
#' @export
runtimeDA <- function(data, predictor, paired = NULL, covars = NULL, subsamples = c(500,1000,1500,2000), subsamples.slow = c(100,150,200,250),
tests = c("sam", "qua", "fri", "vli", "qpo", "pea", "wil", "ttt", "ttr", "ltt", "ltt2","ere", "ere2", "msf", "zig", "lim", "lli", "lli2", "aov", "lao", "lao2", "kru", "lrm", "llm", "llm2", "spe", "aoa", "aoc", "tta", "ttc", "lma", "lmc", "lia", "lic"),
tests = c("abc", "sam", "qua", "fri", "vli", "qpo", "pea", "wil", "ttt", "ttr", "ltt", "ltt2","ere", "ere2", "msf", "zig", "lim", "lli", "lli2", "aov", "lao", "lao2", "kru", "lrm", "llm", "llm2", "spe", "aoa", "aoc", "tta", "ttc", "lma", "lmc", "lia", "lic"),
tests.slow = c("mva", "neb", "bay", "per", "ds2", "ds2x", "zpo", "znb", "adx", "poi", "erq", "erq2"), cores = (detectCores()-1), ...){

stopifnot(exists("data"),exists("predictor"))
Expand Down
9 changes: 5 additions & 4 deletions R/testDA.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,16 @@
#' \donttest{
#' # Include a paired variable for dependent/blocked samples
#' subject <- rep(1:10, 2)
#' res <- testDA(data = mat, predictor = pred, paired = subject)
#' res <- testDA(data = mat, predictor = pred, paired = subject, cores = 1, R = 1)
#'
#' # Include covariates
#' covar1 <- rnorm(20)
#' covar2 <- rep(c("A","B"), 10)
#' res <- testDA(data = mat, predictor = pred,
#' covars = list(FirstCovar = covar1, CallItWhatYouWant = covar2))
#' covars = list(FirstCovar = covar1, CallItWhatYouWant = covar2), cores = 1, R = 1)
#'
#' # Data is absolute abundance
#' res <- testDA(data = mat, predictor = pred, relative = FALSE)
#' res <- testDA(data = mat, predictor = pred, relative = FALSE, cores = 1, R = 1)
#' }
#'
#' @import stats snow doSNOW foreach utils doParallel
Expand All @@ -66,7 +66,7 @@ testDA <- function(data, predictor, paired = NULL, covars = NULL, R = 20,
"erq","erq2","neb","qpo","poi","sam",
"lrm","llm","llm2","lma","lmc",
"ere","ere2","pea","spe",
"wil","kru","qua","fri",
"wil","kru","qua","fri","abc",
"ttt","ltt","ltt2","tta","ttc","ttr",
"aov","lao","lao2","aoa","aoc",
"vli","lim","lli","lli2","lia","lic"),
Expand Down Expand Up @@ -254,6 +254,7 @@ testDA <- function(data, predictor, paired = NULL, covars = NULL, R = 20,
ere2 = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[run.no]],rands[[run.no]], p.adj), argsL[[i]])),
msf = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[run.no]],rands[[run.no]], p.adj), argsL[[i]])),
zig = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[run.no]],rands[[run.no]],paired,covars, p.adj), argsL[[i]])),
abc = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[run.no]],rands[[run.no]],covars,out.all, p.adj), argsL[[i]])),
ds2 = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[run.no]],rands[[run.no]],paired,covars,out.all, p.adj), argsL[[i]])),
ds2x = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[run.no]],rands[[run.no]],paired,covars,out.all, p.adj), argsL[[i]])),
per = do.call(get(noquote(paste0("DA.",i))),c(list(count_tables[[run.no]],rands[[run.no]],paired, relative, p.adj), argsL[[i]])),
Expand Down
Loading

0 comments on commit 42a71b5

Please sign in to comment.