release v0.8.2

Merge branch 'devel' of github.com:broadinstitute/inferCNV Former-commit-id: 699187c Former-commit-id: e0ed404
broadinstitute · Nov 8, 2018 · bcceb91 · bcceb91
2 parents d3ebc5f + 8138feb
commit bcceb91
Show file tree

Hide file tree

Showing 15 changed files with 334 additions and 122 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: infercnv
 Type: Package
 Title: Infer Copy Number Variation from Single-Cell RNA-Seq Data
-Version: 0.8.1
-Date: 2017-05-25
+Version: 0.8.2
+Date: 2018-11-08
 Authors@R: c( person("Timothy", "Tickle", email = "[email protected]", role = c("aut", "cre")), person("Itay", "Tirosh", email = "[email protected]", role = "aut"), person("Christophe", "Georgescu", email = "[email protected]", role = "aut"), person("Maxwell", "Brown", email = "[email protected]", role = "aut"), person("Brian", "Haas", email = "[email protected]", role = "aut")) 
 Author: Timothy Tickle [aut, cre], Itay Tirosh [aut], Christophe Georgescu [aut], Maxwell Brown [aut], Brian Haas [aut]
 Maintainer: Christophe Georgescu <[email protected]>

diff --git a/NAMESPACE b/NAMESPACE
@@ -37,6 +37,8 @@ import(RColorBrewer)
 import(coin)
 import(futile.logger)
 importFrom(Matrix,Matrix)
+importFrom(Matrix,colSums)
+importFrom(Matrix,rowMeans)
 importFrom(ape,as.phylo)
 importFrom(ape,write.tree)
 importFrom(binhf,ansc)

diff --git a/R/NextGenHeatMap.R b/R/NextGenHeatMap.R
@@ -84,21 +84,13 @@ Create_NGCHM <- function(infercnv_obj,
  }
 
  ## set variables 
- reference_idx = row.names(plot_data[unlist(infercnv_obj@reference_grouped_cell_indices),])
  ref_index = infercnv_obj@reference_grouped_cell_indices
- ref_groups = names(infercnv_obj@reference_grouped_cell_indices)
+ reference_idx = row.names(plot_data[unlist(ref_index),])
+ ref_groups = names(ref_index)
 
  # ---------------------- Import Dendrogram & Order Rows -----------------------------------------------------------------------------------
  # IF Cluster By Group is set to TRUE:
  # Get the order of the rows (cell lines) from the dendrogram created by infer_cnv 
- #
-
- ## import and read the dendrogram for the observed data created using the ape library
- #den_path <- paste(out_dir, "observations_dendrogram.txt", sep=.Platform$file.sep)
- #phylo <- ape::read.tree(file = den_path)
- # if multiphylo trees, need to iterate to get to labels 
- #obs_order <- rev(unlist(lapply(1:length(phylo), function(x) phylo[[x]]$tip.label))) # vector holding cell line order taken from the dendrogram
-
 
  # read the file containing the groupings created by infer_cnv
  row_groups_path <- paste(out_dir, "observation_groupings.txt", sep=.Platform$file.sep)
@@ -213,75 +205,63 @@ Create_NGCHM <- function(infercnv_obj,
  display = "visible",
  thickness = as.integer(20))
 
- # Covariate bar for annotation groups 
+ # Covariate to identify Reference and Observed data
  annotation_col <- as.character(unlist(row_groups["Annotation.Color"])) # group colors
  annotation_group <- as.character(unlist(row_groups["Annotation.Group"]))# group number
  names(annotation_group) <- cells
  names(annotation_col) <- cells
- annotation_palette <- get_group_color_palette()(length(unique(annotation_group)))
  annotation_unique_group <- unique(annotation_group)
- ## create color mapping
- colMap_annotation <- NGCHM::chmNewColorMap(values = as.vector(annotation_unique_group), # row names are the cells 
- colors = annotation_palette,
- missing.color = "white")
- annotation_cov <- NGCHM::chmNewCovariate(fullname = 'Annotation', 
- values = annotation_group, 
- value.properties = colMap_annotation,
- type = "discrete")
- hm <- NGCHM::chmAddCovariateBar(hm, "row", annotation_cov, 
- display = "visible", 
- thickness = as.integer(20))
- # Covariate to identify Reference and Observed data
 
- cell_type <- replace(row_order, 1:length(row_order) %in% unlist(infercnv_obj@observation_grouped_cell_indices), paste("Observed"))
+ len <-lengths(ref_index)
+ ref_bar_labels <- unlist(sapply(1:length(len), function(x){ rep(ref_groups[x],len[x]) }))
+ names(ref_bar_labels) <- reference_idx
 
- ref_groups = names(infercnv_obj@reference_grouped_cell_indices)
+ # if you want the exact coloring as the original inferCNV plots 
+ #annotation_palette <- c(get_group_color_palette()(length(ref_index)), get_group_color_palette()(length(annotation_unique_group)))
 
- ## Label the references based on index locations 
- if (length(ref_groups) > 1) {
- for(i in 1:length(ref_groups)){ 
- cell_type <- replace(cell_type, infercnv_obj@reference_grouped_cell_indices[[i]], paste("Reference",toString(i),sep = "")) 
- }
- } else {
- for(i in 1:length(ref_groups)){ 
- cell_type <- replace(cell_type, 1:length(cell_type) %in% infercnv_obj@reference_grouped_cell_indices[[1]], paste("Reference"))
- }
- }
- # make a new variable for later use that has the cell type and cell ID as the name 
- ## cell ID's need to map to cell types 
- names(cell_type) <- row_order
+ # combine reference and observed labels 
+ annotation_group <- c(ref_bar_labels,annotation_group)
+
+ # change the observed group names in bar to group namnes 
+ observed_data <- infercnv_obj@observation_grouped_cell_indices
+ lapply(1:length(observed_data), function(x) { 
+ tmp <- names(observed_data[x])
+ annotation_group <<- replace(annotation_group, observed_data[[x]], tmp) } )
+ unique_group <- unique(annotation_group)
+ annotation_palette <- get_group_color_palette()(length(unique_group))
 
- # check if all reference cells are in cell type 
- if (!(all(reference_idx %in% names(cell_type)))){
- missing_refs <- reference_idx[which(!(reference_idx %in% names(cell_type)))]
+ # check if all reference cells are included 
+ if (!(all(reference_idx %in% names(annotation_group)))){
+ missing_refs <- reference_idx[which(!(reference_idx %in% names(annotation_group)))]
  error_message <- paste("Error: Not all references are accounted for.",
  "Make sure the reference names match the names in the data.\n",
  "Check the following reference cell lines: ", 
  paste(missing_refs, collapse = ","))
  stop(error_message)
  }
- if (!is.null(cell_type)){
- ## unique group names 
- types <- unique(cell_type)
- ## create colors for groups 
- type_palette <- get_group_color_palette()(length(types))
- names(type_palette) <- types 
-
- colMap_type <- NGCHM::chmNewColorMap(values = types, 
- names = types,
- colors = type_palette,
- missing.color = "white", 
- type = "linear")
-
- type_cov <- NGCHM::chmNewCovariate(fullname = 'Cell Type', 
- values = cell_type, 
- value.properties = colMap_type,
- type = "discrete")
- hm <- NGCHM::chmAddCovariateBar(hm, "row", type_cov, 
- display = "visible", 
- thickness = as.integer(20))
+ # check if all observed cells are included
+ observed_idx <- row.names(plot_data[unlist(infercnv_obj@observation_grouped_cell_indices),])
+ if (!(all(observed_idx %in% names(annotation_group)))){
+ missing_obs <- reference_idx[which(!(observed_idx %in% names(annotation_group)))]
+ error_message <- paste("Error: Not all observed cell lines are accounted for.",
+ "Make sure the reference names match the names in the data.\n",
+ "Check the following reference cell lines: ", 
+ paste(missing_obs, collapse = ","))
+ stop(error_message)
  }
 
+ ## create color mapping
+ colMap_annotation <- NGCHM::chmNewColorMap(values = as.vector(unique_group), 
+ colors = annotation_palette,
+ missing.color = "white")
+ annotation_cov <- NGCHM::chmNewCovariate(fullname = 'Annotation', 
+ values = annotation_group, 
+ value.properties = colMap_annotation,
+ type = "discrete")
+ hm <- NGCHM::chmAddCovariateBar(hm, "row", annotation_cov, 
+ display = "visible", 
+ thickness = as.integer(20))
+
  #---------------------------------------Export the heat map-----------------------------------------------------------------------------------------------------------------------
  ## adjust the size of the heat map 
  #hm@width <- as.integer(500)

diff --git a/R/inferCNV_constants.R b/R/inferCNV_constants.R
@@ -20,7 +20,7 @@ C_OUTPUT_FORMAT <- c("pdf", "png")
 #' @importFrom ape write.tree as.phylo
 #' @importFrom fastcluster hclust
 #' @import RColorBrewer
-#' @importFrom Matrix Matrix
+#' @importFrom Matrix Matrix rowMeans colSums
 #' @import coin
 #' @importFrom dplyr %>% count
 

diff --git a/R/inferCNV_heatmap.R b/R/inferCNV_heatmap.R
@@ -563,7 +563,7 @@ plot_cnv <- function(infercnv_obj,
  observation_file_base,
  sep=" "))
  row.names(obs_data) <- orig_row_names
- write.table(obs_data[data_observations$rowInd,data_observations$colInd],
+ write.table(t(obs_data[data_observations$rowInd,data_observations$colInd]),
  file=observation_file_base)
  }
 }

diff --git a/R/inferCNV_ops.R b/R/inferCNV_ops.R
@@ -66,6 +66,10 @@
 #'
 #' @param include.spike If true, introduces an artificial spike-in of data at ~0x and 2x for scaling residuals between 0-2. (default: F)
 #'
+#' @param spike_in_chrs vector listing of chr names to use for modeling spike-ins (default: NULL - uses the two largest chrs. ex. c('chr1', 'chr2') )
+#'
+#' @param spike_in_multiplier vector of weights matching spike_in_chrs (default: c(0.01, 2.0) for modeling loss/gain of both chrs)
+#'
 #' @param pseudocount Number of counts to add to each gene of each cell post-filtering of genes and cells and pre-total sum count normalization. (default: 0)
 #'
 #' @param debug If true, output debug level logging.
@@ -107,7 +111,7 @@ run <- function(infercnv_obj,
  use_zscores=FALSE,
  remove_genes_at_chr_ends=FALSE,
 
- mask_nonDE_genes=TRUE,
+ mask_nonDE_genes=FALSE,
  mask_nonDE_pval=0.05,
  test.use='wilcoxon',
 
@@ -116,7 +120,11 @@ run <- function(infercnv_obj,
  debug=FALSE, #for debug level logging
 
  include.spike = FALSE,
-
+
+ # must specify both below if to be used, and must match in vec length
+ spike_in_chrs = NULL, # use defaults
+ spike_in_multiplier_vec = NULL, # use defaults
+
  pseudocount = 0
 
  ) {
@@ -202,10 +210,14 @@ run <- function(infercnv_obj,
  if (include.spike) {
  step_count = step_count + 1
  flog.info(sprintf("\n\n\tSTEP %02d: Spiking in genes with variation added for tracking\n", step_count))
+
+ if (! (is.null(spike_in_chrs) && is.null(spike_in_multiplier_vec)) ) {
+ infercnv_obj <- spike_in_variation_chrs(infercnv_obj, spike_in_chrs, spike_in_multiplier_vec)
+ } else {
+ infercnv_obj <- spike_in_variation_chrs(infercnv_obj)
+ }
 
- infercnv_obj <- spike_in_variation_chrs(infercnv_obj)
-
- # Plot incremental steps.
+ # Plot incremental steps.
  if (plot_steps){
 
  infercnv_obj_spiked <- infercnv_obj
@@ -657,9 +669,6 @@ run <- function(infercnv_obj,
  output_filename=sprintf("infercnv.%02d_scaled_by_spike", step_count))
  }
 
- # remove the spike now
- infercnv_obj <- remove_spike(infercnv_obj)
-
  }
 
 
@@ -697,6 +706,12 @@ run <- function(infercnv_obj,
  }
  }
 
+ if (include.spike) {
+ # remove the spike before making the final plot.
+ infercnv_obj <- remove_spike(infercnv_obj)
+ }
+
+
  save('infercnv_obj', file=file.path(out_dir, "run.final.infercnv_obj"))
 
  flog.info("Making the final infercnv heatmap")
@@ -1143,7 +1158,7 @@ center_cell_expr_across_chromosome <- function(infercnv_obj, method="mean") { #
 
 #' @title require_above_min_mean_expr_cutoff ()
 #'
-#' @description Filters out genes that have fewer than the corresponding mean value across the reference cell values.
+#' @description Filters out genes that have fewer than the corresponding mean value across all cell values.
 #'
 #' @param infercnv_obj infercnv_object
 #'
@@ -1158,10 +1173,8 @@ require_above_min_mean_expr_cutoff <- function(infercnv_obj, min_mean_expr_cutof
 
  flog.info(paste("::above_min_mean_expr_cutoff:Start", sep=""))
 
- # restrict to reference cells:
- ref_cells_data <- infercnv_obj@expr.data[ , get_reference_grouped_cell_indices(infercnv_obj) ]
 
- indices <-.below_min_mean_expr_cutoff(ref_cells_data, min_mean_expr_cutoff)
+ indices <-.below_min_mean_expr_cutoff(infercnv_obj@expr.data, min_mean_expr_cutoff)
  if (length(indices) > 0) {
  flog.info(sprintf("Removing %d genes from matrix as below mean expr threshold: %g",
  length(indices), min_mean_expr_cutoff))
@@ -1195,7 +1208,7 @@ require_above_min_mean_expr_cutoff <- function(infercnv_obj, min_mean_expr_cutof
 
 #' @title require_above_min_cells_ref()
 #'
-#' @description Filters out genes that have fewer than specified number of reference cells expressing them.
+#' @description Filters out genes that have fewer than specified number of cells expressing them.
 #'
 #' @param infercnv_obj infercnv_object
 #' 
@@ -1207,15 +1220,11 @@ require_above_min_mean_expr_cutoff <- function(infercnv_obj, min_mean_expr_cutof
 #'
 
 require_above_min_cells_ref <- function(infercnv_obj, min_cells_per_gene) {
-
- ref_cell_indices = get_reference_grouped_cell_indices(infercnv_obj)
 
- ref_data = infercnv_obj@expr.data[,ref_cell_indices]
-
- ref_genes_passed = which(apply(ref_data, 1, function(x) { sum(x>0 & ! is.na(x)) >= min_cells_per_gene}))
+ genes_passed = which(apply(infercnv_obj@expr.data, 1, function(x) { sum(x>0 & ! is.na(x)) >= min_cells_per_gene}))
 
- num_genes_total = dim(ref_data)[1]
- num_removed = num_genes_total - length(ref_genes_passed)
+ num_genes_total = dim(infercnv_obj@expr.data)[1]
+ num_removed = num_genes_total - length(genes_passed)
  if (num_removed > 0) {
 
  flog.info(sprintf("Removed %d genes having fewer than %d min cells per gene = %g %% genes removed here",
@@ -1229,7 +1238,7 @@ require_above_min_cells_ref <- function(infercnv_obj, min_cells_per_gene) {
  }
 
 
- infercnv_obj <- remove_genes(infercnv_obj, -1 * ref_genes_passed)
+ infercnv_obj <- remove_genes(infercnv_obj, -1 * genes_passed)
 
 
  }
@@ -1904,7 +1913,9 @@ anscombe_transform <- function(infercnv_obj) {
 
 }
 
-
+#' @keywords internal
+#' @noRd
+#'
 add_pseudocount <- function(infercnv_obj, pseudocount) {
 
  flog.info(sprintf("Adding pseudocount: %g", pseudocount))