From 3e2611f4072023654a3abf475263748f122d423d Mon Sep 17 00:00:00 2001 From: TimothyTickle Date: Tue, 30 May 2017 23:49:09 +0200 Subject: [PATCH] Now an installable library. Former-commit-id: 04f0b920d4c5ca16665a2e9573c20592a7f05310 --- DESCRIPTION | 2 +- NAMESPACE | 2 + R/inferCNV.R | 664 +++++++++++------------ man/above_cutoff.Rd | 19 - man/average_over_ref.Rd | 25 - man/center_smoothed.Rd | 19 - man/center_with_threshold.Rd | 17 - man/color.palette.Rd | 20 - man/create_sep_list.Rd | 26 - man/get.sep.Rd | 23 - man/get_group_color_palette.Rd | 13 - man/heatmap.cnv.Rd | 58 -- man/infer_cnv.Rd | 99 ++-- man/order_reduce.Rd | 19 +- man/plot_cnv.Rd | 49 +- man/plot_cnv_observations.Rd | 35 -- man/plot_cnv_references.Rd | 29 - man/plot_observations_layout.Rd | 17 - man/plot_step.Rd | 18 - man/remove_noise.Rd | 19 - man/remove_outliers_norm.Rd | 25 - man/remove_tails.Rd | 22 - man/smooth_ends_helper.Rd | 18 - man/smooth_window.Rd | 21 - man/smooth_window_helper.Rd | 19 - man/split_references.Rd | 24 - {src => scripts}/gtf_to_position_file.py | 0 {src => scripts}/inferCNV.R | 50 +- vignettes/inferCNV.Rmd | 1 - 29 files changed, 447 insertions(+), 906 deletions(-) delete mode 100644 man/above_cutoff.Rd delete mode 100644 man/average_over_ref.Rd delete mode 100644 man/center_smoothed.Rd delete mode 100644 man/center_with_threshold.Rd delete mode 100644 man/color.palette.Rd delete mode 100644 man/create_sep_list.Rd delete mode 100644 man/get.sep.Rd delete mode 100644 man/get_group_color_palette.Rd delete mode 100644 man/heatmap.cnv.Rd delete mode 100644 man/plot_cnv_observations.Rd delete mode 100644 man/plot_cnv_references.Rd delete mode 100644 man/plot_observations_layout.Rd delete mode 100644 man/plot_step.Rd delete mode 100644 man/remove_noise.Rd delete mode 100644 man/remove_outliers_norm.Rd delete mode 100644 man/remove_tails.Rd delete mode 100644 man/smooth_ends_helper.Rd delete mode 100644 man/smooth_window.Rd delete mode 100644 man/smooth_window_helper.Rd delete mode 100644 man/split_references.Rd rename {src => scripts}/gtf_to_position_file.py (100%) rename {src => scripts}/inferCNV.R (94%) diff --git a/DESCRIPTION b/DESCRIPTION index 2bd00552..6ddb7932 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -18,7 +18,7 @@ Suggests: testthat, rmarkdown VignetteBuilder: knitr -RoxygenNote: 5.0.1 +RoxygenNote: 6.0.1 Imports: RColorBrewer, gplots, diff --git a/NAMESPACE b/NAMESPACE index 912514f3..f93c4649 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,3 +1,5 @@ # Generated by roxygen2: do not edit by hand export(infer_cnv) +export(order_reduce) +export(plot_cnv) diff --git a/R/inferCNV.R b/R/inferCNV.R index d15eaa39..65d2d159 100755 --- a/R/inferCNV.R +++ b/R/inferCNV.R @@ -4,21 +4,21 @@ CHR = "chr" START = "start" STOP = "stop" -#' Remove the average of the genes of the reference observations from all -#' observations' expression. Normalization by column. -#' -#' Args: -#' @param average_data: Matrix containing the data to remove average from -#' (this includes the reference observations). -#' Row = Genes, Col = Cells. -#' @param ref_observations: Indices of reference observations. -#' Only these are used in the average. -#' @param ref_groups: A list of vectors of indices refering to the -#' different groups of the reference indices. -#' -#' Returns: -#' @return: Expression with the average gene expression in the reference -#' observations removed. +# Remove the average of the genes of the reference observations from all +# observations' expression. Normalization by column. +# +# Args: +# average_data: Matrix containing the data to remove average +# from (this includes the reference observations). +# Row = Genes, Col = Cells. +# ref_observations: Indices of reference observations. +# Only these are used in the average. +# ref_groups: A list of vectors of indices refering to the +# different groups of the reference indices. +# +# Returns: +# Expression with the average gene expression in the reference +# observations removed. average_over_ref <- function(average_data, ref_observations, ref_groups){ @@ -61,16 +61,16 @@ average_over_ref <- function(average_data, } # Not testing, parameters ok. -#' Helper function allowing greater control over the steps in a color palette. -#' Source:http://menugget.blogspot.com/2011/11/define-color-steps-for- -#' colorramppalette.html#more -#' -#' Args: -#' @param steps: Vector of colors to change use in the palette -#' @param between: Steps where gradients change -#' -#' Returns: -#' @return: Color palette +# Helper function allowing greater control over the steps in a color palette. +# Source:http://menugget.blogspot.com/2011/11/define-color-steps-for- +# colorramppalette.html#more + +# Args: +# steps: Vector of colors to change use in the palette +# between: Steps where gradients change +# +# Returns: +# Color palette color.palette <- function(steps, between=NULL, ...){ @@ -100,22 +100,22 @@ color.palette <- function(steps, return(pal) } -#' Create a sepList forthe heatmap.3 plotting function given integer vectors -#' of rows and columns where speration should take place. -#' The expected input to the heatmap function is a list of 2 lists. -#' The first list are column based rectangles, and the second row. -#' To define a rectagle the index of the row or column where the line of the rectagle -#' should be placed is done with a vector of integers, left, bottom, right and top line. -#' Ie. list(list(c(1,0,3,10), c(5, 0, 10,10)), list(c(1,2,3,4))) -#' -#' Args: -#' @param row_count: Total number of rows -#' @param col_count: Total number of columns -#' @param row_seps: Vector of integers indices for row breaks -#' @param col_seps: Vector of integer indices for column breaks -#' -#' Returns -#' @return: List of lists of vectors +# Create a sepList forthe heatmap.3 plotting function given integer vectors +# of rows and columns where speration should take place. +# The expected input to the heatmap function is a list of 2 lists. +# The first list are column based rectangles, and the second row. +# To define a rectagle the index of the row or column where the line of the rectagle +# should be placed is done with a vector of integers, left, bottom, right and top line. +# Ie. list(list(c(1,0,3,10), c(5, 0, 10,10)), list(c(1,2,3,4))) +# +# Args: +# row_count: Total number of rows +# col_count: Total number of columns +# row_seps: Vector of integers indices for row breaks +# col_seps: Vector of integer indices for column breaks +# +# Returns +# List of lists of vectors create_sep_list <- function(row_count, col_count, row_seps=NULL, @@ -157,20 +157,20 @@ create_sep_list <- function(row_count, return(sepList) } -#' Split up reference observations in to k groups and return indices -#' for the different groups. -#' -#' Args: -#' @param average_data: Matrix containing data. Row = Genes, Col = Cells. -#' @param ref_obs: Indices of reference obervations. -#' @param num_groups: The number of groups to partition nodes in or a list -#' of already partitioned indices. -#' -#' Returns: -#' @return: Returns a list of grouped reference observations given as -#' vectors of groups. These are indices relative to the reference -#' observations only, so a return 1 indicates the first reference -#' row, not the first row. +# Split up reference observations in to k groups and return indices +# for the different groups. +# +# Args: +# average_data: Matrix containing data. Row = Genes, Col = Cells. +# ref_obs: Indices of reference obervations. +# num_groups: The number of groups to partition nodes in or a list +# of already partitioned indices. +# +# Returns: +# Returns a list of grouped reference observations given as +# vectors of groups. These are indices relative to the reference +# observations only, so a return 1 indicates the first reference +# row, not the first row. split_references <- function(average_data, ref_obs, num_groups){ @@ -224,20 +224,20 @@ split_references <- function(average_data, return(ret_groups) } -#' Set outliers to some upper or lower bound. Then normalize values to -#' approximately [-1, 1]. This is to prep the data for visualization. -#' -#' Args: -#' @param data: data to remove outliers. Outliers removed within columns. -#' @param out_method: Method to remove outliers [(average_bound, NA (hard threshold))] -#' @param lower_bound: Lower bound which identifies a measurement -#' as an outlier. -#' @param upper_bound: Upper bound which identifies a measurement -#' as an outlier. -#' @param plot_step: True will plot this analysis step. -#' -#' Returns: -#' @return: Return data matrix with outliers removed +# Set outliers to some upper or lower bound. Then normalize values to +# approximately [-1, 1]. This is to prep the data for visualization. +# +# Args: +# data: data to remove outliers. Outliers removed within columns. +# out_method: Method to remove outliers [(average_bound, NA (hard threshold))] +# lower_bound: Lower bound which identifies a measurement +# as an outlier. +# upper_bound: Upper bound which identifies a measurement +# as an outlier. +# plot_step: True will plot this analysis step. +# +# Returns: +# Return data matrix with outliers removed remove_outliers_norm <- function(data, out_method=NA, lower_bound=NA, @@ -293,15 +293,15 @@ remove_outliers_norm <- function(data, return(data) } -#' Center data after smoothing. Center with in cells using median. -#' -#' Args: -#' @param data_smoothed: Matrix to center. -#' Row = Genes, Col = cells. -#' -#' Returns: -#' @return: Matrix that is median centered. -#' Row = Genes, Col = cells. +# Center data after smoothing. Center with in cells using median. +# +# Args: +# data_smoothed: Matrix to center. +# Row = Genes, Col = cells. +# +# Returns: +# Matrix that is median centered. +# Row = Genes, Col = cells. center_smoothed <- function(data_smoothed){ logging::loginfo(paste("::center_smoothed:Start")) @@ -310,14 +310,14 @@ center_smoothed <- function(data_smoothed){ return(t(apply(data_smoothed, 1, "-", row_median))) } -#' Center data and threshold (both negative and postive values) -#' -#' Args: -#' @param center_data: Matrix to center. Row = Genes, Col = Cells. -#' @param threshold: Values will be required to be with -/+1 * -#' threshold after centering. -#' Returns: -#' @return: Centered and thresholded matrix +# Center data and threshold (both negative and postive values) +# +# Args: +# center_data: Matrix to center. Row = Genes, Col = Cells. +# threshold: Values will be required to be with -/+1 * +# threshold after centering. +# Returns: +# Centered and thresholded matrix center_with_threshold <- function(center_data, threshold){ logging::loginfo(paste("::center_with_threshold:Start", sep="")) @@ -330,56 +330,55 @@ center_with_threshold <- function(center_data, threshold){ return(center_data) } -#' Returns the color palette for contigs. -#' -#' Returns: -#' @return: Color Palette +# Returns the color palette for contigs. +# +# Returns: +# Color Palette get_group_color_palette <- function(){ return(colorRampPalette(RColorBrewer::brewer.pal(12,"Set3"))) } -# params ok -#' Infer CNV changes given a matrix of RNASeq counts. -#' Output a pdf and matrix of final values. +#' @title Infer CNV changes given a matrix of RNASeq counts. Output a pdf and matrix of final values. #' -#' Args: -#' @param data: Expression matrix (genes X samples), +#' @param data: Expression matrix (genes X samples), #' assumed to be log2(TPM+1) . -#' @param gene_order: Ordering of the genes (data's rows) +#' @param gene_order: Ordering of the genes (data's rows) #' according to their genomic location #' To include all genes use 0. -#' @param cutoff: Cut-off for the average expression of genes to be +#' @param cutoff: Cut-off for the average expression of genes to be #' used for CNV inference. -#' @param reference_obs: Column names of the subset of samples (data's columns) +#' @param reference_obs: Column names of the subset of samples (data's columns) #' that should be used as references. #' If not given, the average of all samples will #' be the reference. -#' @param transform_data: Indicator to log2 + 1 transform -#' @param window_length: Length of the window for the moving average +#' @param transform_data: Indicator to log2 + 1 transform +#' @param window_length: Length of the window for the moving average #' (smoothing). Should be an odd integer. -#' @param max_centered_threshold: The maximum value a a value can have after +#' @param max_centered_threshold: The maximum value a a value can have after #' centering. Also sets a lower bound of #' -1 * this value. -#' @param noise_threshold: The minimum difference a value can be from the +#' @param noise_threshold: The minimum difference a value can be from the #' average reference in order for it not to be #' removed as noise. -#' @param num_ref_groups: The number of reference groups of a list of +#' @param num_ref_groups: The number of reference groups of a list of #' indicies for each group of reference indices in #' relation to reference_obs. -#' @param num_obs_groups: Number of groups to break the observations into. -#' @param pdf_path: The path to what to save the pdf as. The raw data is +#' @param out_path: The path to what to save the pdf as. The raw data is #' also written to this path but with the extension .txt . -#' @param plot_steps: If true turns on plotting intermediate steps. -#' @param contig_tail: Length of the tail removed from the ends of contigs. -#' @param cluster_reference: If given, clustering of observation will only be -#' relative to genes on this contig. -#' @param method_bound: Method to use for bounding values in the visualization. -#' @param lower_bound_vis: Lower bound to normalize data to for visualization. -#' @param upper_bound_vis: Upper bound to normalize data to for visualization. +#' @param plot_steps: If true turns on plotting intermediate steps. +#' @param contig_tail: Length of the tail removed from the ends of contigs. +#' @param method_bound: Method to use for bounding values in the visualization. +#' @param lower_bound_vis: Lower bound to normalize data to for visualization. +#' @param upper_bound_vis: Upper bound to normalize data to for visualization. #' -#' Returns: -#' @return: No return. +#' @return +#' Returns a list including: +#' CNV matrix before visualization. +#' CNV matrix after outlier removal for visualization. +#' Contig order +#' Column names of the subset of samples that should be used as references. +#' Names of samples in reference groups. #' @export infer_cnv <- function(data, gene_order, @@ -390,11 +389,9 @@ infer_cnv <- function(data, max_centered_threshold, noise_threshold, num_ref_groups, - num_obs_groups, out_path, plot_steps=FALSE, contig_tail= (window_length - 1) / 2, - cluster_reference=NULL, method_bound_vis=NA, lower_bound_vis=NA, upper_bound_vis=NA){ @@ -590,22 +587,20 @@ infer_cnv <- function(data, ".", sep="")) ret_list[["CONTIGS"]] = paste(as.vector(as.matrix(chr_order))) - ret_list[["N_OBS_GROUPS"]] = num_obs_groups ret_list[["REF_OBS_IDX"]] = reference_obs - ret_list[["CLUST_REF"]] = cluster_reference ret_list[["REF_GROUPS"]] = groups_ref return(ret_list) } # Not testing, params ok -#' Log intermediate step with a plot and text file of the steps. -#' -#' Args: -#' @param data: The data frame to plot. -#' @param plot_name: The absolute path to the pdf to be plotted. -#' -#' Returns: -#' @return: No return +# Log intermediate step with a plot and text file of the steps. +# +# Args: +# data: The data frame to plot. +# plot_name: The absolute path to the pdf to be plotted. +# +# Returns: +# No return plot_step <- function(data, plot_name){ text_file <- unlist(strsplit(plot_name, "\\.")) text_file <- paste(c(text_file[1:length(text_file)], "txt"), @@ -617,27 +612,25 @@ plot_step <- function(data, plot_name){ write.table(data, file=text_file) } -# Not Testing, params ok. -#' Plot the matrix as a heatmap. -#' Clustering is on observation only, gene position is preserved. +#' @title Plot the matrix as a heatmap. Clustering is on observation only, gene position is preserved. #' -#' Args: -#' @param plot_data: Data matrix to plot (columns are observations). -#' @param contigs: The contigs the data is group in in order of rows. -#' @param reference_idx: Vector of reference indices. -#' @param ref_contig: If given, will focus cluster on only genes in this contig -#' @param reg_groups: Groups of vector indices (as indices in reference_idx) -#' @param out_dir: Directory in which to save pdf and other output. -#' @param title: Plot title. -#' @param obs_title: Title for the observations matrix. -#' @param ref_title: Title for the reference matrix. -#' @param contig_cex: Contig text size. -#' @param k_obs_groups: Number of groups to break observation into -#' @param color_safe_pal: Logical indication of using a color blindness safe +#' @param plot_data: Data matrix to plot (columns are observations). +#' @param contigs: The contigs the data is group in in order of rows. +#' @param reference_idx: Vector of reference indices. +#' @param ref_contig: If given, will focus cluster on only genes in this contig +#' @param reg_groups: Groups of vector indices (as indices in reference_idx) +#' @param out_dir: Directory in which to save pdf and other output. +#' @param title: Plot title. +#' @param obs_title: Title for the observations matrix. +#' @param ref_title: Title for the reference matrix. +#' @param contig_cex: Contig text size. +#' @param k_obs_groups: Number of groups to break observation into +#' @param color_safe_pal: Logical indication of using a color blindness safe #' palette. #' -#' Returns: -#' @return: No return +#' @return +#' No return, void. +#' @export plot_cnv <- function(plot_data, contigs, reference_idx, @@ -753,27 +746,27 @@ plot_cnv <- function(plot_data, } # TODO Tested, test make files so turned off but can turn on and should pass. -#' Plot the observational samples -#' -#' Args: -#' @param obs_data: Data to plot as observations. Rows = Cells, Col = Genes -#' @param col_pal: The color palette to use. -#' @param contig_colors: The colors for the contig bar. -#' @param contig_labels: The labels for the contigs. -#' @param contig_names: Names of the contigs -#' @param contig_seps: Indices for line seperators of contigs. -#' @param num_obs_groups: Number of groups of observations to create -#' @param file_base_name: Base of the file to used to make output file names. -#' @param cnv_title: Title of the plot. -#' @param cnv_obs_title: Title for the observation matrix. -#' @param contig_lab_size: Text size for contigs. -#' @param cluster_contig: A value directs cluster to only genes on this contig -#' @param layout_lmat: lmat values to use in layout -#' @param layout_lhei: lhei values to use in layout -#' @param layout_lwid: lwid values to use in layout -#' -#' Returns: -#' @return: Void +# Plot the observational samples +# +# Args: +# obs_data: Data to plot as observations. Rows = Cells, Col = Genes +# col_pal: The color palette to use. +# contig_colors: The colors for the contig bar. +# contig_labels: The labels for the contigs. +# contig_names: Names of the contigs +# contig_seps: Indices for line seperators of contigs. +# num_obs_groups: Number of groups of observations to create +# file_base_name: Base of the file to used to make output file names. +# cnv_title: Title of the plot. +# cnv_obs_title: Title for the observation matrix. +# contig_lab_size: Text size for contigs. +# cluster_contig: A value directs cluster to only genes on this contig +# layout_lmat: lmat values to use in layout +# layout_lhei: lhei values to use in layout +# layout_lwid: lwid values to use in layout +# +# Returns: +# Void plot_cnv_observations <- function(obs_data, col_pal, contig_colors, @@ -920,14 +913,14 @@ plot_cnv_observations <- function(obs_data, } # Not Testing, params ok. -#' Create the layout for the plot -#' This is a modification of the original -#' layout from the GMD heatmap.3 function -#' -#' Returns: -#' @return: list with slots "lmat" (layout matrix), -#' "lhei" (height, numerix vector), -#' and "lwid" (widths, numeric vector) +# Create the layout for the plot +# This is a modification of the original +# layout from the GMD heatmap.3 function +# +# Returns: +# list with slots "lmat" (layout matrix), +# "lhei" (height, numerix vector), +# and "lwid" (widths, numeric vector) plot_observations_layout <- function() { ## Plot observational samples @@ -960,23 +953,23 @@ plot_observations_layout <- function() } # TODO Tested, test make files so turned off but can turn on and should pass. -#' Plot the reference samples -#' -#' Args: -#' @param ref_data: Data to plot as references. Rows = Cells, Col = Genes -#' @param ref_groups: Groups of references to plot together. -#' @param col_pal: The color palette to use. -#' @param contig_seps: Indices for line seperators of contigs. -#' @param file_base_name: Base of the file to used to make output file names. -#' @param cnv_ref_title: Title for reference matrix. -#' @param layout_lmat: lmat values to use in the layout. -#' @param layout_lwid: lwid values to use in the layout. -#' @param layout_lhei: lhei values to use in the layout. -#' @param layout_add: Indicates the ref image shoudl be added to the previous plot. -#' @param testing: Turns off plotting when true. -#' -#' Returns: -#' @return: Void +# Plot the reference samples +# +# Args: +# ref_data: Data to plot as references. Rows = Cells, Col = Genes +# ref_groups: Groups of references to plot together. +# col_pal: The color palette to use. +# contig_seps: Indices for line seperators of contigs. +# file_base_name: Base of the file to used to make output file names. +# cnv_ref_title: Title for reference matrix. +# layout_lmat: lmat values to use in the layout. +# layout_lwid: lwid values to use in the layout. +# layout_lhei: lhei values to use in the layout. +# layout_add: Indicates the ref image shoudl be added to the previous plot. +# testing: Turns off plotting when true. +# +# Returns: +# Void plot_cnv_references <- function(ref_data, ref_groups, col_pal, @@ -1086,15 +1079,15 @@ plot_cnv_references <- function(ref_data, file=reference_data_file) } -#' Return the indices of the rows that average above the cut off -#' -#' Args: -#' @param data: Data to measure the average row and evaluate -#' against the cutoff. Row = Genes, Col = Cells. -#' @param cutoff: Threshold to be above to be kept. -#' -#' Returns: -#' @return: Returns a vector of row indicies to keep (are above the cutoff). +# Return the indices of the rows that average above the cut off +# +# Args: +# data: Data to measure the average row and evaluate +# against the cutoff. Row = Genes, Col = Cells. +# cutoff: Threshold to be above to be kept. +# +# Returns: +# Returns a vector of row indicies to keep (are above the cutoff). above_cutoff <- function(data, cutoff){ logging::loginfo(paste("::above_cutoff:Start", sep="")) @@ -1112,14 +1105,14 @@ above_cutoff <- function(data, cutoff){ #' Order the data and subset the data to data in the genomic position file. #' #' Args: -#' @param data: Data (expression) matrix where the row names should be in +#' @param data: Data (expression) matrix where the row names should be in #' the row names of the genomic_position file. -#' @param genomic_position: Data frame read in from the genomic position file +#' @param genomic_position: Data frame read in from the genomic position file #' -#' Returns: -#' @return: Returns a matrix of expression in the order of the +#' @return Returns a matrix of expression in the order of the #' genomic_position file. NULL is returned if the genes in both #' data parameters do not match. +#' @export order_reduce <- function(data, genomic_position){ logging::loginfo(paste("::order_reduce:Start.", sep="")) ret_results <- list(expr=NULL, order=NULL, chr_order=NULL) @@ -1176,16 +1169,16 @@ order_reduce <- function(data, genomic_position){ return(ret_results) } -#' Remove values that are too close to the average and are considered noise. -#' -#' Args: -#' @param smooth_matrix: A matrix of values, smoothed, and with average -#' reference removed. Row = Genes, Col = Cells. -#' @param threshold: The amount of difference a value must be from the -#' reference before the value can be kept and not -#' removed as noise. -#' Returns: -#' @return: Denoised matrix +# Remove values that are too close to the average and are considered noise. +# +# Args: +# smooth_matrix: A matrix of values, smoothed, and with average +# reference removed. Row = Genes, Col = Cells. +# threshold: The amount of difference a value must be from the +# reference before the value can be kept and not +# removed as noise. +# Returns: +# Denoised matrix remove_noise <- function(smooth_matrix, threshold){ logging::loginfo(paste("::remove_noise:Start.", sep="")) @@ -1195,19 +1188,19 @@ remove_noise <- function(smooth_matrix, threshold){ return(smooth_matrix) } -#' Remove the tails of values of a specific chromosome. -#' The smooth_matrix values are expected to be in genomic order. -#' If the tail is too large and no contig will be left 1/3 of the -#' contig is left. -#' -#' Args: -#' @param smooth_matrix: Smoothed values in genomic order. -#' Row = Genes, Col = Cells. -#' @param chr: Indices of the chr in which the tails are to be removed. -#' @param tail_length: Length of the tail to remove on both ends of the -#' chr indices. -#' Returns: -#' @return: Indices to remove. +# Remove the tails of values of a specific chromosome. +# The smooth_matrix values are expected to be in genomic order. +# If the tail is too large and no contig will be left 1/3 of the +# contig is left. +# +# Args: +# smooth_matrix: Smoothed values in genomic order. +# Row = Genes, Col = Cells. +# chr: Indices of the chr in which the tails are to be removed. +# tail_length: Length of the tail to remove on both ends of the +# chr indices. +# Returns: +# Indices to remove. remove_tails <- function(smooth_matrix, chr, tail_length){ logging::loginfo(paste("::remove_tails:Start.", sep="")) @@ -1225,17 +1218,17 @@ remove_tails <- function(smooth_matrix, chr, tail_length){ return(remove_indices) } -#' Smooth a matrix by column using a simple moving average. -#' Tails of the averages use a window length that is truncated to -#' available data. -#' -#' Args: -#' @param data: Data matrix to smooth. Row = Genes, Col = Cells. -#' @param window_length: Length of window to use for the moving average. -#' Should be a positive, odd integer. -#' -#' Returns: -#' @return: Matrix with columns smoothed with a simple moving average. +# Smooth a matrix by column using a simple moving average. +# Tails of the averages use a window length that is truncated to +# available data. +# +# Args: +# data: Data matrix to smooth. Row = Genes, Col = Cells. +# window_length: Length of window to use for the moving average. +# Should be a positive, odd integer. +# +# Returns: +# Matrix with columns smoothed with a simple moving average. smooth_window <- function(data, window_length){ logging::loginfo(paste("::smooth_window:Start.", sep="")) @@ -1270,14 +1263,14 @@ smooth_window <- function(data, window_length){ return(data_sm) } -#' Helper function for smoothing the ends of a moving average. -#' -#' Args: -#' @param obs_data: Data to smooth -#' @param obs_tails: Length of the tail to smooth. -#' -#' Returns: -#' @return: Data smoothed. +# Helper function for smoothing the ends of a moving average. +# +# Args: +# obs_data: Data to smooth +# obs_tails: Length of the tail to smooth. +# +# Returns: +# Data smoothed. smooth_ends_helper <- function(obs_data, obs_tails){ end_data <- rep(NA,length(obs_data)) obs_count <- length(obs_data) @@ -1294,15 +1287,15 @@ smooth_ends_helper <- function(obs_data, obs_tails){ return(end_data) } -#' Smooth vector of values over the given window length. -#' -#' Args: -#' @param obs_data: Vector of data to smooth with a moving average. -#' @param window_length: Length of the window for smoothing. -#' Must be and odd, positive, integer. -#' -#' Returns: -#' @return: Vector of values smoothed with a moving average. +# Smooth vector of values over the given window length. +# +# Args: +# obs_data: Vector of data to smooth with a moving average. +# window_length: Length of the window for smoothing. +# Must be and odd, positive, integer. +# +# Returns: +# Vector of values smoothed with a moving average. smooth_window_helper <- function(obs_data, window_length){ return(filter(obs_data, rep(1 / window_length, window_length), sides=2)) @@ -1319,17 +1312,17 @@ smooth_window_helper <- function(obs_data, window_length){ # The heatmap.cnv function should be considered a modification # of th GMD library function heatmap.3, all credit goes to # their authors. -##' Please note this code is from the library GMD -##' All credit for this code goes to GMD's authors. -##' I do not recommend using this version of the code, which -##' has been poorly modified for our use but recommend using -##' the official version from the package GMD -##' https://cran.r-project.org/web/packages/GMD/index.html -##' A copy of gtools::invalid -##' -##' see \code{invalid} in package:gtools for details -##' @title Test if a value is missing, empty, or contains only NA or NULL values -##' @param x value to be tested +## Please note this code is from the library GMD +## All credit for this code goes to GMD's authors. +## I do not recommend using this version of the code, which +## has been poorly modified for our use but recommend using +## the official version from the package GMD +## https://cran.r-project.org/web/packages/GMD/index.html +## A copy of gtools::invalid +## +## see \code{invalid} in package:gtools for details +## Test if a value is missing, empty, or contains only NA or NULL values +## param: x value to be tested .invalid <- function(x) { @@ -1342,19 +1335,18 @@ smooth_window_helper <- function(obs_data, window_length){ else return(FALSE) } -##' Please note this code is from the library GMD -##' All credit for this code goes to GMD's authors. -##' I do not recommend using this version of the code, which -##' has been poorly modified for our use but recommend using -##' the official version from the package GMD -##' https://cran.r-project.org/web/packages/GMD/index.html -##' Call a function with arguments -##' -##' Call a function with arguments -##' @title Call a function with arguments -##' @param FUN function or function name -##' @param ... unnameed function arguments -##' @param MoreArgs named (or unnameed) function arguments +## Please note this code is from the library GMD +## All credit for this code goes to GMD's authors. +## I do not recommend using this version of the code, which +## has been poorly modified for our use but recommend using +## the official version from the package GMD +## https://cran.r-project.org/web/packages/GMD/index.html +## Call a function with arguments +## +## Call a function with arguments +## FUN function or function name +## ... unnameed function arguments +## MoreArgs named (or unnameed) function arguments .call.FUN <- function(FUN,...,MoreArgs) { @@ -1375,20 +1367,19 @@ smooth_window_helper <- function(obs_data, window_length){ return(ret) } -##' Please note this code is from the library GMD -##' All credit for this code goes to GMD's authors. -##' I do not recommend using this version of the code, which -##' has been poorly modified for our use but recommend using -##' the official version from the package GMD -##' https://cran.r-project.org/web/packages/GMD/index.html -##' Scale values to make them follow Standard Normal Distribution -##' -##' Scale values to make them follow Standard Normal Distribution -##' @title Scale values to make them follow Standard Normal Distribution -##' @param x numeric -##' @param scale character, indicating the type to scale. -##' @param na.rm logical -##' @return an object with the same dimention of `x'. +## Please note this code is from the library GMD +## All credit for this code goes to GMD's authors. +## I do not recommend using this version of the code, which +## has been poorly modified for our use but recommend using +## the official version from the package GMD +## https://cran.r-project.org/web/packages/GMD/index.html +## Scale values to make them follow Standard Normal Distribution +## +## Scale values to make them follow Standard Normal Distribution +## param x numeric +## param scale character, indicating the type to scale. +## param na.rm logical +## return an object with the same dimention of `x'. .scale.data <- function(x,scale,na.rm=TRUE) { @@ -1404,18 +1395,17 @@ smooth_window_helper <- function(obs_data, window_length){ x } -##' Please note this code is from the library GMD -##' All credit for this code goes to GMD's author's. -##' I do not recommend using this version of the code, which -##' has been poorly modified for our use but recommend using -##' the official version from the package GMD -##' https://cran.r-project.org/web/packages/GMD/index.html -##' Scale values to a new range: c(low, high) -##' @title Scale values to a new range. -##' @param x numeric -##' @param low numeric, lower bound of target values -##' @param high numeric, higher bound of target values -##' @return an object with the same dimention of `x'. +## Please note this code is from the library GMD +## All credit for this code goes to GMD's author's. +## I do not recommend using this version of the code, which +## has been poorly modified for our use but recommend using +## the official version from the package GMD +## https://cran.r-project.org/web/packages/GMD/index.html +## Scale values to a new range: c(low, high) +## x numeric +## low numeric, lower bound of target values +## high numeric, higher bound of target values +## return an object with the same dimention of `x'. .scale.x <- function(x,low=0,high=1,na.rm=TRUE) { @@ -1425,23 +1415,22 @@ smooth_window_helper <- function(obs_data, window_length){ a*x+b } -##' Please note this code is from the library GMD -##' All credit for this code goes to GMD's author's. -##' I do not recommend using this version of the code, which -##' has been poorly modified for our use but recommend using -##' the official version from the package GMD -##' https://cran.r-project.org/web/packages/GMD/index.html -##' Plot text -##' -##' Plot text -##' @title Plot text -##' @param x character, text to plot -##' @param cex -##' @param forecolor color of foreground -##' @param bg color of background -##' @param bordercolor color of border -##' @param axes as in \code{graphics:::plot} -##' @param ... additional arguments for \code{graphics:::text} +## Please note this code is from the library GMD +## All credit for this code goes to GMD's author's. +## I do not recommend using this version of the code, which +## has been poorly modified for our use but recommend using +## the official version from the package GMD +## https://cran.r-project.org/web/packages/GMD/index.html +## Plot text +## +## Plot text +## x character, text to plot +## cex +## forecolor color of foreground +## bg color of background +## bordercolor color of border +## axes as in \code{graphics:::plot} +## ... additional arguments for \code{graphics:::text} .plot.text <- function(x,xlim=c(0,1),ylim=c(0,1),cex=1,forecolor=par("fg"),bg=par("bg"),bordercolor=NA,axes=FALSE,...){ if (.invalid(x)){ x <- NULL @@ -1457,13 +1446,13 @@ smooth_window_helper <- function(obs_data, window_length){ text(0.5,0.5,x,cex=cex,...) } -##' Please note this code is from the library GMD -##' All credit for this code goes to GMD's author's. -##' I do not recommend using this version of the code, which -##' has been poorly modified for our use but recommend using -##' the official version from the package GMD -##' https://cran.r-project.org/web/packages/GMD/index.html -##' This was originally heatmap.3. +## Please note this code is from the library GMD +## All credit for this code goes to GMD's author's. +## I do not recommend using this version of the code, which +## has been poorly modified for our use but recommend using +## the official version from the package GMD +## https://cran.r-project.org/web/packages/GMD/index.html +## This was originally heatmap.3. heatmap.cnv <- function(x, @@ -2727,16 +2716,15 @@ heatmap.cnv <- invisible(ret) } -##' Please note this code is from the library GMD -##' All credit for this code goes to GMD's authors. -##' I do not recommend using this version of the code, which -##' has been poorly modified for our use but recommend using -##' the official version from the package GMD -##' https://cran.r-project.org/web/packages/GMD/index.html -##' Get row or column lines of separation for \code{heatmap.3} according to clusters -##' @title Get row or column lines of separation for heatmap.3 -##' @param clusters a numerical vector, indicating the cluster labels of observations. -##' @param type string, one of the following: \code{c("row","column","both")} +## Please note this code is from the library GMD +## All credit for this code goes to GMD's authors. +## I do not recommend using this version of the code, which +## has been poorly modified for our use but recommend using +## the official version from the package GMD +## https://cran.r-project.org/web/packages/GMD/index.html +## Get row or column lines of separation for \code{heatmap.3} according to clusters +## param clusters a numerical vector, indicating the cluster labels of observations. +## param type string, one of the following: \code{c("row","column","both")} get.sep <- function(clusters,type=c("row","column","both")) { diff --git a/man/above_cutoff.Rd b/man/above_cutoff.Rd deleted file mode 100644 index 95486887..00000000 --- a/man/above_cutoff.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV.R -\name{above_cutoff} -\alias{above_cutoff} -\title{Return the indices of the rows that average above the cut off} -\usage{ -above_cutoff(data, cutoff) -} -\description{ -Args: - @param data: Data to measure the average row and evaluate - against the cutoff. Row = Genes, Col = Cells. - @param cutoff: Threshold to be above to be kept. -} -\details{ -Returns: - @return: Returns a vector of row indicies to keep (are above the cutoff). -} - diff --git a/man/average_over_ref.Rd b/man/average_over_ref.Rd deleted file mode 100644 index 64c2206b..00000000 --- a/man/average_over_ref.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV.R -\name{average_over_ref} -\alias{average_over_ref} -\title{Remove the average of the genes of the reference observations from all -observations' expression. Normalization by column.} -\usage{ -average_over_ref(average_data, ref_observations, ref_groups) -} -\description{ -Args: - @param average_data: Matrix containing the data to remove average from - (this includes the reference observations). - Row = Genes, Col = Cells. - @param ref_observations: Indices of reference observations. - Only these are used in the average. - @param ref_groups: A list of vectors of indices refering to the - different groups of the reference indices. -} -\details{ -Returns: - @return: Expression with the average gene expression in the reference - observations removed. -} - diff --git a/man/center_smoothed.Rd b/man/center_smoothed.Rd deleted file mode 100644 index c0012480..00000000 --- a/man/center_smoothed.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV.R -\name{center_smoothed} -\alias{center_smoothed} -\title{Center data after smoothing. Center with in cells using median.} -\usage{ -center_smoothed(data_smoothed) -} -\description{ -Args: - @param data_smoothed: Matrix to center. - Row = Genes, Col = cells. -} -\details{ -Returns: - @return: Matrix that is median centered. - Row = Genes, Col = cells. -} - diff --git a/man/center_with_threshold.Rd b/man/center_with_threshold.Rd deleted file mode 100644 index 7a3f4750..00000000 --- a/man/center_with_threshold.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV.R -\name{center_with_threshold} -\alias{center_with_threshold} -\title{Center data and threshold (both negative and postive values)} -\usage{ -center_with_threshold(center_data, threshold) -} -\description{ -Args: - @param center_data: Matrix to center. Row = Genes, Col = Cells. - @param threshold: Values will be required to be with -/+1 * - threshold after centering. -Returns: - @return: Centered and thresholded matrix -} - diff --git a/man/color.palette.Rd b/man/color.palette.Rd deleted file mode 100644 index f14b19ee..00000000 --- a/man/color.palette.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV.R -\name{color.palette} -\alias{color.palette} -\title{Helper function allowing greater control over the steps in a color palette. -Source:http://menugget.blogspot.com/2011/11/define-color-steps-for- - colorramppalette.html#more} -\usage{ -color.palette(steps, between = NULL, ...) -} -\description{ -Args: - @param steps: Vector of colors to change use in the palette - @param between: Steps where gradients change -} -\details{ -Returns: - @return: Color palette -} - diff --git a/man/create_sep_list.Rd b/man/create_sep_list.Rd deleted file mode 100644 index db485ddc..00000000 --- a/man/create_sep_list.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV.R -\name{create_sep_list} -\alias{create_sep_list} -\title{Create a sepList forthe heatmap.3 plotting function given integer vectors -of rows and columns where speration should take place. -The expected input to the heatmap function is a list of 2 lists. -The first list are column based rectangles, and the second row. -To define a rectagle the index of the row or column where the line of the rectagle -should be placed is done with a vector of integers, left, bottom, right and top line. -Ie. list(list(c(1,0,3,10), c(5, 0, 10,10)), list(c(1,2,3,4)))} -\usage{ -create_sep_list(row_count, col_count, row_seps = NULL, col_seps = NULL) -} -\description{ -Args: - @param row_count: Total number of rows - @param col_count: Total number of columns - @param row_seps: Vector of integers indices for row breaks - @param col_seps: Vector of integer indices for column breaks -} -\details{ -Returns - @return: List of lists of vectors -} - diff --git a/man/get.sep.Rd b/man/get.sep.Rd deleted file mode 100644 index 2b9f867b..00000000 --- a/man/get.sep.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV.R -\name{get.sep} -\alias{get.sep} -\title{Get row or column lines of separation for heatmap.3} -\usage{ -get.sep(clusters, type = c("row", "column", "both")) -} -\arguments{ -\item{clusters}{a numerical vector, indicating the cluster labels of observations.} - -\item{type}{string, one of the following: \code{c("row","column","both")}} -} -\description{ -Please note this code is from the library GMD -All credit for this code goes to GMD's authors. -I do not recommend using this version of the code, which -has been poorly modified for our use but recommend using -the official version from the package GMD -https://cran.r-project.org/web/packages/GMD/index.html -Get row or column lines of separation for \code{heatmap.3} according to clusters -} - diff --git a/man/get_group_color_palette.Rd b/man/get_group_color_palette.Rd deleted file mode 100644 index efa699d8..00000000 --- a/man/get_group_color_palette.Rd +++ /dev/null @@ -1,13 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV.R -\name{get_group_color_palette} -\alias{get_group_color_palette} -\title{Returns the color palette for contigs.} -\usage{ -get_group_color_palette() -} -\description{ -Returns: - @return: Color Palette -} - diff --git a/man/heatmap.cnv.Rd b/man/heatmap.cnv.Rd deleted file mode 100644 index 0c901f97..00000000 --- a/man/heatmap.cnv.Rd +++ /dev/null @@ -1,58 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV.R -\name{heatmap.cnv} -\alias{heatmap.cnv} -\title{Please note this code is from the library GMD -All credit for this code goes to GMD's author's. -I do not recommend using this version of the code, which -has been poorly modified for our use but recommend using -the official version from the package GMD -https://cran.r-project.org/web/packages/GMD/index.html -This was originally heatmap.3.} -\usage{ -heatmap.cnv(x, diss = inherits(x, "dist"), Rowv = TRUE, Colv = TRUE, - dendrogram = c("both", "row", "column", "none"), dist.row, dist.col, - dist.FUN = gdist, dist.FUN.MoreArgs = list(method = "euclidean"), - hclust.row, hclust.col, hclust.FUN = hclust, - hclust.FUN.MoreArgs = list(method = "ward"), scale = c("none", "row", - "column"), na.rm = TRUE, cluster.by.row = FALSE, cluster.by.col = FALSE, - kr = NA, kc = NA, row.clusters = NA, col.clusters = NA, - revR = FALSE, revC = FALSE, add.expr, breaks, x.center, - color.FUN = gplots::bluered, sepList = list(NULL, NULL), - sep.color = c("gray45", "gray45"), sep.lty = 1, sep.lwd = 2, cellnote, - cex.note = 1, notecol = "cyan", na.color = par("bg"), - trace = c("none", "column", "row", "both"), tracecol = "cyan", hline, - vline, linecol = tracecol, labRow = TRUE, labCol = TRUE, - srtRow = NULL, srtCol = NULL, sideRow = 4, sideCol = 1, - margin.for.labRow, margin.for.labCol, ColIndividualColors, - RowIndividualColors, cexRow, cexCol, labRow.by.group = FALSE, - labCol.by.group = FALSE, key = TRUE, key.title = "Color Key", - key.xlab = "Value", key.ylab = "Count", keysize = 1.5, mapsize = 9, - mapratio = 4/3, sidesize = 3, cex.key.main = 0.75, - cex.key.xlab = 0.75, cex.key.ylab = 0.75, density.info = c("histogram", - "density", "none"), denscol = tracecol, densadj = 0.25, - main = "Heatmap", sub = "", xlab = "", ylab = "", cex.main = 2, - cex.sub = 1.5, font.main = 2, font.sub = 3, adj.main = 0.5, - mgp.main = c(1.5, 0.5, 0), mar.main = 3, mar.sub = 3, if.plot = TRUE, - plot.row.partition = FALSE, plot.col.partition = FALSE, - cex.partition = 1.25, color.partition.box = "gray45", - color.partition.border = "#FFFFFF", plot.row.individuals = FALSE, - plot.col.individuals = FALSE, plot.row.clusters = FALSE, - plot.col.clusters = FALSE, plot.row.clustering = FALSE, - plot.col.clustering = FALSE, plot.row.individuals.list = FALSE, - plot.col.individuals.list = FALSE, plot.row.clusters.list = FALSE, - plot.col.clusters.list = FALSE, plot.row.clustering.list = FALSE, - plot.col.clustering.list = FALSE, row.data = FALSE, col.data = FALSE, - if.plot.info = FALSE, text.box, cex.text = 1, force_lmat = NULL, - force_lwid = NULL, force_lhei = NULL, force_add = FALSE, ...) -} -\description{ -Please note this code is from the library GMD -All credit for this code goes to GMD's author's. -I do not recommend using this version of the code, which -has been poorly modified for our use but recommend using -the official version from the package GMD -https://cran.r-project.org/web/packages/GMD/index.html -This was originally heatmap.3. -} - diff --git a/man/infer_cnv.Rd b/man/infer_cnv.Rd index 344f0171..c89bf2b2 100644 --- a/man/infer_cnv.Rd +++ b/man/infer_cnv.Rd @@ -2,53 +2,64 @@ % Please edit documentation in R/inferCNV.R \name{infer_cnv} \alias{infer_cnv} -\title{Infer CNV changes given a matrix of RNASeq counts. -Output a pdf and matrix of final values.} +\title{Infer CNV changes given a matrix of RNASeq counts. Output a pdf and matrix of final values.} \usage{ infer_cnv(data, gene_order, cutoff, reference_obs, transform_data, window_length, max_centered_threshold, noise_threshold, num_ref_groups, - num_obs_groups, out_path, plot_steps = FALSE, contig_tail = (window_length - - 1)/2, cluster_reference = NULL, method_bound_vis = NA, - lower_bound_vis = NA, upper_bound_vis = NA) + out_path, plot_steps = FALSE, contig_tail = (window_length - 1)/2, + method_bound_vis = NA, lower_bound_vis = NA, upper_bound_vis = NA) } -\description{ -Args: - @param data: Expression matrix (genes X samples), - assumed to be log2(TPM+1) . - @param gene_order: Ordering of the genes (data's rows) - according to their genomic location - To include all genes use 0. - @param cutoff: Cut-off for the average expression of genes to be - used for CNV inference. - @param reference_obs: Column names of the subset of samples (data's columns) - that should be used as references. - If not given, the average of all samples will - be the reference. - @param transform_data: Indicator to log2 + 1 transform - @param window_length: Length of the window for the moving average - (smoothing). Should be an odd integer. - @param max_centered_threshold: The maximum value a a value can have after - centering. Also sets a lower bound of - -1 * this value. - @param noise_threshold: The minimum difference a value can be from the - average reference in order for it not to be - removed as noise. - @param num_ref_groups: The number of reference groups of a list of - indicies for each group of reference indices in - relation to reference_obs. - @param num_obs_groups: Number of groups to break the observations into. - @param pdf_path: The path to what to save the pdf as. The raw data is - also written to this path but with the extension .txt . - @param plot_steps: If true turns on plotting intermediate steps. - @param contig_tail: Length of the tail removed from the ends of contigs. - @param cluster_reference: If given, clustering of observation will only be - relative to genes on this contig. - @param method_bound: Method to use for bounding values in the visualization. - @param lower_bound_vis: Lower bound to normalize data to for visualization. - @param upper_bound_vis: Upper bound to normalize data to for visualization. +\arguments{ +\item{data:}{Expression matrix (genes X samples), +assumed to be log2(TPM+1) .} + +\item{gene_order:}{Ordering of the genes (data's rows) +according to their genomic location +To include all genes use 0.} + +\item{cutoff:}{Cut-off for the average expression of genes to be +used for CNV inference.} + +\item{reference_obs:}{Column names of the subset of samples (data's columns) +that should be used as references. +If not given, the average of all samples will +be the reference.} + +\item{transform_data:}{Indicator to log2 + 1 transform} + +\item{window_length:}{Length of the window for the moving average +(smoothing). Should be an odd integer.} + +\item{max_centered_threshold:}{The maximum value a a value can have after +centering. Also sets a lower bound of +-1 * this value.} + +\item{noise_threshold:}{The minimum difference a value can be from the +average reference in order for it not to be +removed as noise.} + +\item{num_ref_groups:}{The number of reference groups of a list of +indicies for each group of reference indices in +relation to reference_obs.} + +\item{out_path:}{The path to what to save the pdf as. The raw data is +also written to this path but with the extension .txt .} + +\item{plot_steps:}{If true turns on plotting intermediate steps.} + +\item{contig_tail:}{Length of the tail removed from the ends of contigs.} + +\item{method_bound:}{Method to use for bounding values in the visualization.} + +\item{lower_bound_vis:}{Lower bound to normalize data to for visualization.} + +\item{upper_bound_vis:}{Upper bound to normalize data to for visualization.} } -\details{ -Returns: - @return: No return. +\value{ +Returns a list including: + CNV matrix before visualization. + CNV matrix after outlier removal for visualization. + Contig order + Column names of the subset of samples that should be used as references. + Names of samples in reference groups. } - diff --git a/man/order_reduce.Rd b/man/order_reduce.Rd index 9de7ccd8..abe189d7 100644 --- a/man/order_reduce.Rd +++ b/man/order_reduce.Rd @@ -6,16 +6,17 @@ \usage{ order_reduce(data, genomic_position) } -\description{ -Args: - @param data: Data (expression) matrix where the row names should be in - the row names of the genomic_position file. - @param genomic_position: Data frame read in from the genomic position file +\arguments{ +\item{data:}{Data (expression) matrix where the row names should be in +the row names of the genomic_position file.} + +\item{genomic_position:}{Data frame read in from the genomic position file} } -\details{ -Returns: - @return: Returns a matrix of expression in the order of the +\value{ +Returns a matrix of expression in the order of the genomic_position file. NULL is returned if the genes in both data parameters do not match. } - +\description{ +Args: +} diff --git a/man/plot_cnv.Rd b/man/plot_cnv.Rd index ca5b4f42..0ba77de5 100644 --- a/man/plot_cnv.Rd +++ b/man/plot_cnv.Rd @@ -2,31 +2,38 @@ % Please edit documentation in R/inferCNV.R \name{plot_cnv} \alias{plot_cnv} -\title{Plot the matrix as a heatmap. -Clustering is on observation only, gene position is preserved.} +\title{Plot the matrix as a heatmap. Clustering is on observation only, gene position is preserved.} \usage{ plot_cnv(plot_data, contigs, reference_idx, ref_contig, ref_groups, out_dir, title, obs_title, ref_title, contig_cex = 1, k_obs_groups = 1, color_safe_pal = TRUE) } -\description{ -Args: - @param plot_data: Data matrix to plot (columns are observations). - @param contigs: The contigs the data is group in in order of rows. - @param reference_idx: Vector of reference indices. - @param ref_contig: If given, will focus cluster on only genes in this contig - @param reg_groups: Groups of vector indices (as indices in reference_idx) - @param out_dir: Directory in which to save pdf and other output. - @param title: Plot title. - @param obs_title: Title for the observations matrix. - @param ref_title: Title for the reference matrix. - @param contig_cex: Contig text size. - @param k_obs_groups: Number of groups to break observation into - @param color_safe_pal: Logical indication of using a color blindness safe - palette. +\arguments{ +\item{plot_data:}{Data matrix to plot (columns are observations).} + +\item{contigs:}{The contigs the data is group in in order of rows.} + +\item{reference_idx:}{Vector of reference indices.} + +\item{ref_contig:}{If given, will focus cluster on only genes in this contig} + +\item{reg_groups:}{Groups of vector indices (as indices in reference_idx)} + +\item{out_dir:}{Directory in which to save pdf and other output.} + +\item{title:}{Plot title.} + +\item{obs_title:}{Title for the observations matrix.} + +\item{ref_title:}{Title for the reference matrix.} + +\item{contig_cex:}{Contig text size.} + +\item{k_obs_groups:}{Number of groups to break observation into} + +\item{color_safe_pal:}{Logical indication of using a color blindness safe +palette.} } -\details{ -Returns: - @return: No return +\value{ +No return, void. } - diff --git a/man/plot_cnv_observations.Rd b/man/plot_cnv_observations.Rd deleted file mode 100644 index bfe32d29..00000000 --- a/man/plot_cnv_observations.Rd +++ /dev/null @@ -1,35 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV.R -\name{plot_cnv_observations} -\alias{plot_cnv_observations} -\title{Plot the observational samples} -\usage{ -plot_cnv_observations(obs_data, col_pal, contig_colors, contig_labels, - contig_names, contig_seps, num_obs_groups, file_base_name, cnv_title, - cnv_obs_title, contig_lab_size = 1, cluster_contig = NULL, - testing = FALSE, layout_lmat = NULL, layout_lhei = NULL, - layout_lwid = NULL) -} -\description{ -Args: - @param obs_data: Data to plot as observations. Rows = Cells, Col = Genes - @param col_pal: The color palette to use. - @param contig_colors: The colors for the contig bar. - @param contig_labels: The labels for the contigs. - @param contig_names: Names of the contigs - @param contig_seps: Indices for line seperators of contigs. - @param num_obs_groups: Number of groups of observations to create - @param file_base_name: Base of the file to used to make output file names. - @param cnv_title: Title of the plot. - @param cnv_obs_title: Title for the observation matrix. - @param contig_lab_size: Text size for contigs. - @param cluster_contig: A value directs cluster to only genes on this contig - @param layout_lmat: lmat values to use in layout - @param layout_lhei: lhei values to use in layout - @param layout_lwid: lwid values to use in layout -} -\details{ -Returns: - @return: Void -} - diff --git a/man/plot_cnv_references.Rd b/man/plot_cnv_references.Rd deleted file mode 100644 index 129427a7..00000000 --- a/man/plot_cnv_references.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV.R -\name{plot_cnv_references} -\alias{plot_cnv_references} -\title{Plot the reference samples} -\usage{ -plot_cnv_references(ref_data, ref_groups, col_pal, contig_seps, file_base_name, - cnv_ref_title, layout_lmat = NULL, layout_lwid = NULL, - layout_lhei = NULL, layout_add = FALSE, testing = FALSE) -} -\description{ -Args: - @param ref_data: Data to plot as references. Rows = Cells, Col = Genes - @param ref_groups: Groups of references to plot together. - @param col_pal: The color palette to use. - @param contig_seps: Indices for line seperators of contigs. - @param file_base_name: Base of the file to used to make output file names. - @param cnv_ref_title: Title for reference matrix. - @param layout_lmat: lmat values to use in the layout. - @param layout_lwid: lwid values to use in the layout. - @param layout_lhei: lhei values to use in the layout. - @param layout_add: Indicates the ref image shoudl be added to the previous plot. - @param testing: Turns off plotting when true. -} -\details{ -Returns: - @return: Void -} - diff --git a/man/plot_observations_layout.Rd b/man/plot_observations_layout.Rd deleted file mode 100644 index e7347d79..00000000 --- a/man/plot_observations_layout.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV.R -\name{plot_observations_layout} -\alias{plot_observations_layout} -\title{Create the layout for the plot -This is a modification of the original -layout from the GMD heatmap.3 function} -\usage{ -plot_observations_layout() -} -\description{ -Returns: - @return: list with slots "lmat" (layout matrix), - "lhei" (height, numerix vector), - and "lwid" (widths, numeric vector) -} - diff --git a/man/plot_step.Rd b/man/plot_step.Rd deleted file mode 100644 index de4dff05..00000000 --- a/man/plot_step.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV.R -\name{plot_step} -\alias{plot_step} -\title{Log intermediate step with a plot and text file of the steps.} -\usage{ -plot_step(data, plot_name) -} -\description{ -Args: - @param data: The data frame to plot. - @param plot_name: The absolute path to the pdf to be plotted. -} -\details{ -Returns: - @return: No return -} - diff --git a/man/remove_noise.Rd b/man/remove_noise.Rd deleted file mode 100644 index bd8de825..00000000 --- a/man/remove_noise.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV.R -\name{remove_noise} -\alias{remove_noise} -\title{Remove values that are too close to the average and are considered noise.} -\usage{ -remove_noise(smooth_matrix, threshold) -} -\description{ -Args: - @param smooth_matrix: A matrix of values, smoothed, and with average - reference removed. Row = Genes, Col = Cells. - @param threshold: The amount of difference a value must be from the - reference before the value can be kept and not - removed as noise. -Returns: - @return: Denoised matrix -} - diff --git a/man/remove_outliers_norm.Rd b/man/remove_outliers_norm.Rd deleted file mode 100644 index 9b19e972..00000000 --- a/man/remove_outliers_norm.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV.R -\name{remove_outliers_norm} -\alias{remove_outliers_norm} -\title{Set outliers to some upper or lower bound. Then normalize values to -approximately [-1, 1]. This is to prep the data for visualization.} -\usage{ -remove_outliers_norm(data, out_method = NA, lower_bound = NA, - upper_bound = NA, plot_step = NA) -} -\description{ -Args: - @param data: data to remove outliers. Outliers removed within columns. - @param out_method: Method to remove outliers [(average_bound, NA (hard threshold))] - @param lower_bound: Lower bound which identifies a measurement - as an outlier. - @param upper_bound: Upper bound which identifies a measurement - as an outlier. - @param plot_step: True will plot this analysis step. -} -\details{ -Returns: - @return: Return data matrix with outliers removed -} - diff --git a/man/remove_tails.Rd b/man/remove_tails.Rd deleted file mode 100644 index 13174f5d..00000000 --- a/man/remove_tails.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV.R -\name{remove_tails} -\alias{remove_tails} -\title{Remove the tails of values of a specific chromosome. -The smooth_matrix values are expected to be in genomic order. -If the tail is too large and no contig will be left 1/3 of the -contig is left.} -\usage{ -remove_tails(smooth_matrix, chr, tail_length) -} -\description{ -Args: - @param smooth_matrix: Smoothed values in genomic order. - Row = Genes, Col = Cells. - @param chr: Indices of the chr in which the tails are to be removed. - @param tail_length: Length of the tail to remove on both ends of the - chr indices. -Returns: - @return: Indices to remove. -} - diff --git a/man/smooth_ends_helper.Rd b/man/smooth_ends_helper.Rd deleted file mode 100644 index 1703f136..00000000 --- a/man/smooth_ends_helper.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV.R -\name{smooth_ends_helper} -\alias{smooth_ends_helper} -\title{Helper function for smoothing the ends of a moving average.} -\usage{ -smooth_ends_helper(obs_data, obs_tails) -} -\description{ -Args: - @param obs_data: Data to smooth - @param obs_tails: Length of the tail to smooth. -} -\details{ -Returns: - @return: Data smoothed. -} - diff --git a/man/smooth_window.Rd b/man/smooth_window.Rd deleted file mode 100644 index 966d4159..00000000 --- a/man/smooth_window.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV.R -\name{smooth_window} -\alias{smooth_window} -\title{Smooth a matrix by column using a simple moving average. -Tails of the averages use a window length that is truncated to -available data.} -\usage{ -smooth_window(data, window_length) -} -\description{ -Args: - @param data: Data matrix to smooth. Row = Genes, Col = Cells. - @param window_length: Length of window to use for the moving average. - Should be a positive, odd integer. -} -\details{ -Returns: - @return: Matrix with columns smoothed with a simple moving average. -} - diff --git a/man/smooth_window_helper.Rd b/man/smooth_window_helper.Rd deleted file mode 100644 index 4529761f..00000000 --- a/man/smooth_window_helper.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV.R -\name{smooth_window_helper} -\alias{smooth_window_helper} -\title{Smooth vector of values over the given window length.} -\usage{ -smooth_window_helper(obs_data, window_length) -} -\description{ -Args: - @param obs_data: Vector of data to smooth with a moving average. - @param window_length: Length of the window for smoothing. - Must be and odd, positive, integer. -} -\details{ -Returns: - @return: Vector of values smoothed with a moving average. -} - diff --git a/man/split_references.Rd b/man/split_references.Rd deleted file mode 100644 index 19ca9f21..00000000 --- a/man/split_references.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV.R -\name{split_references} -\alias{split_references} -\title{Split up reference observations in to k groups and return indices -for the different groups.} -\usage{ -split_references(average_data, ref_obs, num_groups) -} -\description{ -Args: - @param average_data: Matrix containing data. Row = Genes, Col = Cells. - @param ref_obs: Indices of reference obervations. - @param num_groups: The number of groups to partition nodes in or a list - of already partitioned indices. -} -\details{ -Returns: - @return: Returns a list of grouped reference observations given as - vectors of groups. These are indices relative to the reference - observations only, so a return 1 indicates the first reference - row, not the first row. -} - diff --git a/src/gtf_to_position_file.py b/scripts/gtf_to_position_file.py similarity index 100% rename from src/gtf_to_position_file.py rename to scripts/gtf_to_position_file.py diff --git a/src/inferCNV.R b/scripts/inferCNV.R similarity index 94% rename from src/inferCNV.R rename to scripts/inferCNV.R index b4b4fc1f..f9621c31 100755 --- a/src/inferCNV.R +++ b/scripts/inferCNV.R @@ -7,14 +7,16 @@ library("RColorBrewer", character.only=TRUE) library(GMD) library(optparse) library(logging) -#library(infercnv) -source("R/inferCNV.R") +library(infercnv) # Logging level choices C_LEVEL_CHOICES <- names(loglevels) # Visualization outlier thresholding and bounding method choices C_VIS_OUTLIER_CHOICES <- c("average_bound") +CHR = "chr" +START = "start" +STOP = "stop" #' Check arguments and make sure the user input meet certain #' additional requirements. @@ -456,7 +458,7 @@ if (length(input_reference_samples) != } # Order and reduce the expression to the genomic file. -order_ret <- order_reduce(data=expression_data, +order_ret <- infercnv::order_reduce(data=expression_data, genomic_position=input_gene_order) expression_data <- order_ret$expr input_gene_order <- order_ret$order @@ -468,28 +470,26 @@ if(is.null(expression_data)){ } # Run CNV inference -ret_list = infer_cnv(data=expression_data, - gene_order=input_gene_order, - cutoff=args$cutoff, - reference_obs=input_reference_samples, - transform_data=args$log_transform, - window_length=args$window_length, - max_centered_threshold=args$max_centered_expression, - noise_threshold=args$magnitude_filter, - num_ref_groups=args$num_groups, - cluster_reference=args$clustering_contig, - num_obs_groups=args$num_obs, - out_path=args$output_dir, - plot_steps=args$plot_steps, - contig_tail=args$contig_tail, - method_bound_vis=args$bound_method_vis, - lower_bound_vis=bounds_viz[1], - upper_bound_vis=bounds_viz[2]) +ret_list = infercnv::infer_cnv(data=expression_data, + gene_order=input_gene_order, + cutoff=args$cutoff, + reference_obs=input_reference_samples, + transform_data=args$log_transform, + window_length=args$window_length, + max_centered_threshold=args$max_centered_expression, + noise_threshold=args$magnitude_filter, + num_ref_groups=args$num_groups, + out_path=args$output_dir, + plot_steps=args$plot_steps, + contig_tail=args$contig_tail, + method_bound_vis=args$bound_method_vis, + lower_bound_vis=bounds_viz[1], + upper_bound_vis=bounds_viz[2]) # Log output logging::loginfo(paste("::infer_cnv:Writing final data to ", file.path(args$output_dir, - "expression_pre_vis_transform.txt"), sep="")) + "expression_pre_vis_transform.txt"), sep="_")) # Output data before viz outlier write.table(ret_list["PREVIZ"], file=file.path(args$output_dir, @@ -497,17 +497,17 @@ write.table(ret_list["PREVIZ"], # Output data after viz outlier write.table(ret_list["VIZ"], file=paste(args$output_dir, - "expression_post_viz_transform.txt")) + "expression_post_viz_transform.txt",sep="_")) logging::loginfo(paste("::infer_cnv:Current data dimensions (r,c)=", paste(dim(ret_list[["VIZ"]]), collapse=","), sep="")) logging::loginfo(paste("::infer_cnv:Drawing plots to file:", args$output_dir, sep="")) -plot_cnv(plot_data=ret_list[["VIZ"]], +infercnv::plot_cnv(plot_data=ret_list[["VIZ"]], contigs=ret_list[["CONTIGS"]], - k_obs_groups=ret_list[["N_OBS_GROUPS"]], + k_obs_groups=args$num_obs, reference_idx=ret_list[["REF_OBS_IDX"]], - ref_contig=ret_list[["CLUST_REF"]], + ref_contig=args$clustering_contig, contig_cex=args$contig_label_size, ref_groups=ret_list[["REF_GROUPS"]], out_dir=args$output_dir, diff --git a/vignettes/inferCNV.Rmd b/vignettes/inferCNV.Rmd index 2cf2963a..b10c0ef3 100644 --- a/vignettes/inferCNV.Rmd +++ b/vignettes/inferCNV.Rmd @@ -1,4 +1,3 @@ -[![Travis-CI Build Status](https://travis-ci.org/broadinstitute/inferCNV.svg?branch=master)](https://travis-ci.org/broadinstitute/inferCNV) ## Infer Copy Number Variation using Single-cell RNA-Seq Expression Data. Authors: Timothy Tickle, Itay Tirosh, Brian Haas