new version

Russel88 · Dec 12, 2018 · f2f953f · f2f953f
1 parent 588a2fb
commit f2f953f
Show file tree

Hide file tree

Showing 81 changed files with 1,416 additions and 2,057 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: DAtest
 Title: Comparing Differential Abundance methods
-Version: 2.7.11
+Version: 2.7.12
 Authors@R: person("Jakob", "Russel", email = "[email protected]", role = c("aut", "cre"))
 Description: What the title says.
 Depends: R (>= 3.2.5)
@@ -9,4 +9,4 @@ Suggests: lsmeans, eulerr, pscl, samr, statmod
 License: GPL (>= 3)
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 6.0.1
+RoxygenNote: 6.1.1
diff --git a/NAMESPACE b/NAMESPACE
@@ -7,8 +7,9 @@ S3method(summary,DA)
 S3method(summary,DAPower)
 export(DA.TukeyHSD)
 export(DA.adx)
-export(DA.anc)
 export(DA.anova)
+export(DA.aoa)
+export(DA.aoc)
 export(DA.aov)
 export(DA.bay)
 export(DA.drop1)
@@ -22,11 +23,15 @@ export(DA.fri)
 export(DA.kru)
 export(DA.lao)
 export(DA.lao2)
+export(DA.lia)
+export(DA.lic)
 export(DA.lim)
 export(DA.lli)
 export(DA.lli2)
 export(DA.llm)
 export(DA.llm2)
+export(DA.lma)
+export(DA.lmc)
 export(DA.lrm)
 export(DA.lsmeans)
 export(DA.ltt)
@@ -43,6 +48,8 @@ export(DA.qua)
 export(DA.rai)
 export(DA.sam)
 export(DA.spe)
+export(DA.tta)
+export(DA.ttc)
 export(DA.ttt)
 export(DA.vli)
 export(DA.wil)
@@ -53,7 +60,9 @@ export(DA.zzz)
 export(add.tax.DA)
 export(allDA)
 export(featurePlot)
-export(groupSig)
+export(gm_mean)
+export(norm_alr)
+export(norm_clr)
 export(powerDA)
 export(preDA)
 export(prune.tests.DA)

diff --git a/R/DA.anc.R b/R/DA.anc.R
diff --git a/R/DA.aoa.R b/R/DA.aoa.R
@@ -0,0 +1,67 @@
+#' ANOVA - Multiplicative zero-correction and additive log-ratio normalization.
+#' 
+#' Apply ANOVA on multiple features with one \code{predictor}.
+#' 
+#' Note: Last feature in the data is used as reference for the log-ratio transformation.
+#' @param data Either a matrix with counts/abundances, OR a \code{phyloseq} object. If a matrix/data.frame is provided rows should be taxa/genes/proteins and columns samples
+#' @param predictor The predictor of interest. Factor, OR if \code{data} is a \code{phyloseq} object the name of the variable in \code{sample_data(data)} in quotation
+#' @param covars Either a named list with covariables, OR if \code{data} is a \code{phyloseq} object a character vector with names of the variables in \code{sample_data(data)}
+#' @param p.adj Character. P-value adjustment. Default "fdr". See \code{p.adjust} for details
+#' @param delta Numeric. Pseudocount for zero-correction. Default 1
+#' @param allResults If TRUE will return raw results from the \code{aov} function
+#' @param ... Additional arguments for the \code{aov} functions
+#' @export
+
+DA.aoa <- function(data, predictor, covars = NULL, p.adj = "fdr", delta = 1, allResults = FALSE, ...){
+
+  # Extract from phyloseq
+  if(class(data) == "phyloseq"){
+    DAdata <- DA.phyloseq(data, predictor, paired = NULL, covars)
+    count_table <- DAdata$count_table
+    predictor <- DAdata$predictor
+    covars <- DAdata$covars
+  } else {
+    count_table <- data
+  }
+  if(!is.null(covars)){
+    for(i in 1:length(covars)){
+      assign(names(covars)[i], covars[[i]])
+    }
+  }
+
+  # Define model
+  if(is.null(covars)){
+    form <- paste("x ~ predictor")
+  } else {
+    form <- paste("x ~ ",paste(names(covars), collapse="+"),"+ predictor",sep = "")
+  }
+
+  # Define function
+  ao <- function(x){
+    tryCatch(as.numeric(summary(aov(as.formula(form), ...))[[1]][(length(covars)+1),5]), error = function(e){NA}) 
+  }
+
+  # Zero-correction
+  count_table <- apply(count_table, 2, function(y) sapply(y,function(x) ifelse(x==0,delta,(1-(sum(y==0)*delta)/sum(y))*x)))
+  if(any(count_table <= 0)) stop("count_table should only contain positive values")
+
+  # ALR transformation
+  count_table <- norm_alr(count_table)
+
+  # Run tests
+  if(allResults){
+    ao <- function(x){
+      tryCatch(aov(as.formula(form), ...), error = function(e){NA}) 
+    }
+    return(apply(count_table,1,ao))
+  } else {
+    res <- data.frame(pval = apply(count_table,1,ao))
+    res$pval.adj <- p.adjust(res$pval, method = p.adj)
+    res$Feature <- rownames(res)
+    res$Method <- "ANOVA - ALR (aoa)"
+    if(class(data) == "phyloseq") res <- add.tax.DA(data, res)
+    return(res)
+  }
+}
+
+
diff --git a/R/DA.aoc.R b/R/DA.aoc.R
@@ -0,0 +1,65 @@
+#' ANOVA - Multiplicative zero-correction and additive log-ratio normalization.
+#' 
+#' Apply ANOVA on multiple features with one \code{predictor}.
+#' @param data Either a matrix with counts/abundances, OR a \code{phyloseq} object. If a matrix/data.frame is provided rows should be taxa/genes/proteins and columns samples
+#' @param predictor The predictor of interest. Factor, OR if \code{data} is a \code{phyloseq} object the name of the variable in \code{sample_data(data)} in quotation
+#' @param covars Either a named list with covariables, OR if \code{data} is a \code{phyloseq} object a character vector with names of the variables in \code{sample_data(data)}
+#' @param p.adj Character. P-value adjustment. Default "fdr". See \code{p.adjust} for details
+#' @param delta Numeric. Pseudocount for zero-correction. Default 1
+#' @param allResults If TRUE will return raw results from the \code{aov} function
+#' @param ... Additional arguments for the \code{aov} functions
+#' @export
+
+DA.aoc <- function(data, predictor, covars = NULL, p.adj = "fdr", delta = 1, allResults = FALSE, ...){
+
+  # Extract from phyloseq
+  if(class(data) == "phyloseq"){
+    DAdata <- DA.phyloseq(data, predictor, paired = NULL, covars)
+    count_table <- DAdata$count_table
+    predictor <- DAdata$predictor
+    covars <- DAdata$covars
+  } else {
+    count_table <- data
+  }
+  if(!is.null(covars)){
+    for(i in 1:length(covars)){
+      assign(names(covars)[i], covars[[i]])
+    }
+  }
+
+  # Define model
+  if(is.null(covars)){
+    form <- paste("x ~ predictor")
+  } else {
+    form <- paste("x ~ ",paste(names(covars), collapse="+"),"+ predictor",sep = "")
+  }
+
+  # Define function
+  ao <- function(x){
+    tryCatch(as.numeric(summary(aov(as.formula(form), ...))[[1]][(length(covars)+1),5]), error = function(e){NA}) 
+  }
+
+  # Zero-correction
+  count_table <- apply(count_table, 2, function(y) sapply(y,function(x) ifelse(x==0,delta,(1-(sum(y==0)*delta)/sum(y))*x)))
+  if(any(count_table <= 0)) stop("count_table should only contain positive values")
+
+  # ALR transformation
+  count_table <- norm_clr(count_table)
+
+  # Run tests
+  if(allResults){
+    ao <- function(x){
+      tryCatch(aov(as.formula(form), ...), error = function(e){NA}) 
+    }
+    return(apply(count_table,1,ao))
+  } else {
+    res <- data.frame(pval = apply(count_table,1,ao))
+    res$pval.adj <- p.adjust(res$pval, method = p.adj)
+    res$Feature <- rownames(res)
+    res$Method <- "ANOVA - CLR (aoc)"
+    if(class(data) == "phyloseq") res <- add.tax.DA(data, res)
+    return(res)
+  }
+}
+
+
diff --git a/R/DA.bay.R b/R/DA.bay.R
@@ -3,12 +3,11 @@
 #' Implementation of baySeq for \code{DAtest}
 #' @param data Either a matrix with counts/abundances, OR a \code{phyloseq} object. If a matrix/data.frame is provided rows should be taxa/genes/proteins and columns samples
 #' @param predictor The predictor of interest. Factor, OR if \code{data} is a \code{phyloseq} object the name of the variable in \code{sample_data(data)} in quotation
-#' @param p.adj Character. P-value adjustment. Default "fdr". See \code{p.adjust} for details
 #' @param allResults If TRUE will return raw results from the \code{getLikelihoods} function
 #' @param ... Additional arguments to the \code{getPriors.NB} and \code{getLikelihoods} functions
 #' @export
 
-DA.bay <- function(data, predictor, p.adj = "fdr", allResults = FALSE, ...){
+DA.bay <- function(data, predictor, allResults = FALSE, ...){
 
   suppressMessages(library(baySeq))
 
@@ -37,12 +36,9 @@ DA.bay <- function(data, predictor, p.adj = "fdr", allResults = FALSE, ...){
 
   # Extract results
   tc <- topCounts(CD, group = "DE", number=nrow(count_table))
-  tc <- tc[,c(1,rev(ncol(tc)-0:4))]
+  tc <- tc[,c(1,rev(ncol(tc)-0:3))]
 
-  if(is.null(tc$ordering))
-    output_df <- data.frame(Feature = as.character(tc$annotation), pval = 1 - tc$Likelihood, pval.adj = p.adjust(1 - tc$Likelihood, method = p.adj))
-  if(!is.null(tc$ordering))
-    output_df <- data.frame(Feature = as.character(tc$annotation), pval = 1 - tc$Likelihood, pval.adj = p.adjust(1 - tc$Likelihood, method = p.adj), ordering = tc$ordering)
+  output_df <- data.frame(Feature = as.character(tc$name), pval = (1 - tc$likes), pval.adj = tc$FDR.DE, ordering = tc$DE)
 
   output_df$Method <- "baySeq (bay)"
 

diff --git a/R/DA.kru.R b/R/DA.kru.R
@@ -24,7 +24,7 @@ DA.kru <- function(data, predictor, relative = TRUE, p.adj = "fdr", allResults =
 
   # Define function
   kru <- function(x){
-    tryCatch(kruskal.test(as.numeric(x) ~ predictor, ...)$p.value, error = function(e){NA}) 
+    tryCatch(kruskal.test(as.numeric(x) ~ predictor, ...), error = function(e){NA}) 
   }
 
   # Relative abundance
@@ -35,10 +35,12 @@ DA.kru <- function(data, predictor, relative = TRUE, p.adj = "fdr", allResults =
   }
 
   # Run tests
+  tests <- apply(count.rel,1,kru)
+
   if(allResults){
-    return(apply(count.rel,1,kru))
+    return(tests)
   } else {
-    res <- data.frame(pval = apply(count.rel,1,kru))
+    res <- data.frame(pval = sapply(tests, function(x) x$p.value))
     res$pval.adj <- p.adjust(res$pval, method = p.adj)
     res$Feature <- rownames(res)
     res$Method <- "Kruskal-Wallis (kru)" 

diff --git a/R/DA.lia.R b/R/DA.lia.R
@@ -0,0 +1,98 @@
+#' LIMMA - Multiplicative zero-correction and additive log-ratio normalization.
+#'
+#' Note: Last feature in the data is used as reference for the log-ratio transformation.
+#' @param data Either a matrix with counts/abundances, OR a \code{phyloseq} object. If a matrix/data.frame is provided rows should be taxa/genes/proteins and columns samples
+#' @param predictor The predictor of interest. Either a Factor or Numeric, OR if \code{data} is a \code{phyloseq} object the name of the variable in \code{sample_data(data)} in quotation
+#' @param paired For paired/blocked experimental designs. Either a Factor with Subject/Block ID for running paired/blocked analysis, OR if \code{data} is a \code{phyloseq} object the name of the variable in \code{sample_data(data)} in quotation
+#' @param covars Either a named list with covariables, OR if \code{data} is a \code{phyloseq} object a character vector with names of the variables in \code{sample_data(data)}
+#' @param out.all If TRUE will output results from F-tests, if FALSE t-statistic results from 2. level of the \code{predictor}. If NULL (default) set as TRUE for multi-class \code{predictor} and FALSE otherwise
+#' @param p.adj Character. P-value adjustment. Default "fdr". See \code{p.adjust} for details
+#' @param delta Numeric. Pseudocount zero-correction. Default 1
+#' @param coeff Integer. The p-value and log2FoldChange will be associated with this coefficient. Default 2, i.e. the 2. level of the \code{predictor}.
+#' @param allResults If TRUE will return raw results from the \code{eBayes} function
+#' @param ... Additional arguments for the \code{eBayes} and \code{lmFit} functions
+#' @export
+
+DA.lia <- function(data, predictor, paired = NULL, covars = NULL, out.all = NULL, p.adj = "fdr", delta = 1, coeff = 2, allResults = FALSE,  ...){
+
+  suppressMessages(library(limma))
+  if(!is.null(paired)) suppressMessages(library(statmod))
+
+  # Extract from phyloseq
+  if(class(data) == "phyloseq"){
+    DAdata <- DA.phyloseq(data, predictor, paired, covars)
+    count_table <- DAdata$count_table
+    predictor <- DAdata$predictor
+    paired <- DAdata$paired
+    covars <- DAdata$covars
+  } else {
+    count_table <- data
+  }
+  if(!is.null(covars)){
+    for(i in 1:length(covars)){
+      assign(names(covars)[i], covars[[i]])
+    }
+  }
+
+  # out.all
+  if(is.null(out.all)){
+    if(length(unique(predictor)) == 2) out.all <- FALSE
+    if(length(unique(predictor)) > 2) out.all <- TRUE
+    if(is.numeric(predictor)) out.all <- FALSE
+  }
+
+  # Zero-correction
+  count_table <- apply(count_table, 2, function(y) sapply(y,function(x) ifelse(x==0,delta,(1-(sum(y==0)*delta)/sum(y))*x)))
+  if(any(count_table <= 0)) stop("count_table should only contain positive values")
+
+  # ALR transformation
+  count_table <- as.data.frame(norm_alr(count_table))
+
+  # Arguments
+  limma.args <- list(...)
+  lmFit.args <- limma.args[names(limma.args) %in% names(formals(lmFit))]
+  eBayes.args <- limma.args[names(limma.args) %in% names(formals(eBayes))]
+
+  if(is.null(covars)){
+    form <- paste("~ predictor")
+  } else {
+    form <- paste("~ predictor+",paste(names(covars), collapse="+"),sep = "")
+  }
+  design <- model.matrix(as.formula(form))
+
+  # Lienar fit
+  if(is.null(paired)){
+    fit <- do.call(lmFit,c(list(count_table, design),lmFit.args))
+  } else {
+    dupcor <-  duplicateCorrelation(count_table, design, block = paired)
+    fit <- do.call(lmFit,c(list(count_table, design, block = paired, correlation = dupcor$cor),lmFit.args))
+  }
+
+  # Empirical bayes
+  fit.eb <- do.call(eBayes, c(list(fit),eBayes.args))
+
+  # Extract results
+  if(is.numeric(predictor[1])){
+      res <- topTable(fit.eb, number = nrow(count_table), adjust.method = p.adj, coef = 2)
+      colnames(res)[4:5] <- c("pval","pval.adj")
+  } else {
+    if(out.all){
+      res <- topTable(fit.eb, number = nrow(count_table), adjust.method = p.adj, coef = 2:length(levels(as.factor(predictor))))
+      colnames(res)[length(levels(as.factor(predictor)))+2:3] <- c("pval","pval.adj")
+    } else {
+      res <- topTable(fit.eb, number = nrow(count_table), adjust.method = p.adj, coef = coeff)
+      colnames(res)[4:5] <- c("pval","pval.adj")
+      res$ordering <- NA
+      res[!is.na(res$logFC) & res$logFC > 0,"ordering"] <- paste0(levels(as.factor(predictor))[coeff],">",levels(as.factor(predictor))[1])
+      res[!is.na(res$logFC) & res$logFC < 0,"ordering"] <- paste0(levels(as.factor(predictor))[1],">",levels(as.factor(predictor))[coeff])
+    }
+  }
+
+  res$Feature <- rownames(res)
+  res$Method <- "LIMMA - ALR (lia)"
+
+  if(class(data) == "phyloseq") res <- add.tax.DA(data, res)
+
+  if(allResults) return(fit.eb) else return(res) 
+}
+