diff --git a/R/inferCNV.R b/R/inferCNV.R index 026564a2..2972cf3f 100755 --- a/R/inferCNV.R +++ b/R/inferCNV.R @@ -233,7 +233,8 @@ create_sep_list <- function(row_count, # row, not the first row. split_references <- function(average_data, ref_obs, - num_groups){ + num_groups, + hclust_method='complete') { logging::loginfo(paste("::split_references:Start", sep="")) ret_groups <- list() split_groups <- NULL @@ -269,7 +270,8 @@ split_references <- function(average_data, # Get HCLUST # Get reference observations only. average_reference_obs <- t(average_data[, ref_obs, drop=FALSE]) - hc <- hclust(dist(average_reference_obs)) + + hc <- hclust(dist(average_reference_obs), method=hclust_method) split_groups <- cutree(hc, k=num_groups) split_groups <- split_groups[hc$order] @@ -456,14 +458,16 @@ infer_cnv <- function(data, method_bound_vis=NA, lower_bound_vis=NA, upper_bound_vis=NA, - ref_subtract_method="by_mean") { + ref_subtract_method="by_mean", + hclust_method='complete') { logging::loginfo(paste("::infer_cnv:Start", sep="")) # Split the reference data into groups if requested groups_ref <- split_references(average_data=data, #data_smoothed, - ref_obs=reference_obs, - num_groups=num_ref_groups) + ref_obs=reference_obs, + num_groups=num_ref_groups, + hclust_method=hclust_method) logging::loginfo(paste("::infer_cnv:split_reference. ", "found ",length(groups_ref)," reference groups.", sep="")) @@ -896,6 +900,7 @@ plot_cnv <- function(plot_data, contig_cex=1, k_obs_groups=1, x.center=0, + hclust_method='average', color_safe_pal=TRUE, pdf_filename="infercnv.pdf"){ @@ -994,6 +999,7 @@ plot_cnv <- function(plot_data, contig_lab_size=contig_cex, breaksList=breaksList_t, x.center=x.center, + hclust_method=hclust_method, layout_lmat=force_layout[["lmat"]], layout_lhei=force_layout[["lhei"]], layout_lwid=force_layout[["lwid"]]) @@ -1049,6 +1055,7 @@ plot_cnv_observations <- function(obs_data, cluster_contig=NULL, breaksList, x.center, + hclust_method="average", testing=FALSE, layout_lmat=NULL, layout_lhei=NULL, @@ -1085,8 +1092,9 @@ plot_cnv_observations <- function(obs_data, sep=" ")) } } - # HCL with a inversely weighted euclidean distance. - obs_hcl <- hclust(dist(obs_data[,hcl_group_indices]),"average") + # HCL with a inversely weighted euclidean distance. + logging::loginfo(paste("clustering observations via method: ", hclust_method, sep="")) + obs_hcl <- hclust(dist(obs_data[,hcl_group_indices]), method=hclust_method) obs_dendrogram <- as.dendrogram(obs_hcl) write.tree(as.phylo(obs_hcl), file=paste(file_base_name, "observations_dendrogram.txt", sep=.Platform$file.sep)) diff --git a/scripts/inferCNV.R b/scripts/inferCNV.R index 7cf4d31e..1f0279f7 100755 --- a/scripts/inferCNV.R +++ b/scripts/inferCNV.R @@ -14,6 +14,7 @@ C_LEVEL_CHOICES <- names(loglevels) # Visualization outlier thresholding and bounding method choices C_VIS_OUTLIER_CHOICES <- c("average_bound") C_REF_SUBTRACT_METHODS <- c("by_mean", "by_quantiles") +C_HCLUST_METHODS <- c("ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median", "centroid") CHR = "chr" START = "start" @@ -71,7 +72,14 @@ check_arguments <- function(arguments){ paste(C_REF_SUBTRACT_METHODS, collapse=","), sep="") ) stop("error, must specify acceptable --ref_subtract_method") } - + + + if (! (arguments$hclust_method %in% C_HCLUST_METHODS) ) { + logging::logerror(paste(":: --hclust_method: acceptable values are: ", + paste(C_HCLUST_METHODS, collapse=","), sep="") ) + stop("error, must specify acceptable --hclust_method") + } + # Warn that an average of the samples is used in the absence of # normal / reference samples if (is.null(arguments$reference_observations)){ @@ -292,10 +300,21 @@ pargs <- optparse::add_option(pargs, c("--ref_subtract_method"), action="store", dest="ref_subtract_method", metavar="Reference_Subtraction_Method", - help=paste("Method used to subtract the reference values from the observations. Valid choices are", + help=paste("Method used to subtract the reference values from the observations. Valid choices are: ", paste(C_REF_SUBTRACT_METHODS, collapse=", "), " [Default %default]")) +pargs <- optparse::add_option(pargs, c("--hclust_method"), + type="character", + default="complete", + action="store", + dest="hclust_method", + metavar="Hierarchical_Clustering_Method", + help=paste("Method used for hierarchical clustering of cells. Valid choices are: ", + paste(C_HCLUST_METHODS, collapse=", "), + " [Default %default]")) + + pargs <- optparse::add_option(pargs,c("--obs_cluster_contig"), type="character", @@ -542,7 +561,8 @@ ret_list = infercnv::infer_cnv(data=expression_data, method_bound_vis=args$bound_method_vis, lower_bound_vis=bounds_viz[1], upper_bound_vis=bounds_viz[2], - ref_subtract_method=args$ref_subtract_method) + ref_subtract_method=args$ref_subtract_method, + hclust_method=args$hclust_method) # Log output logging::loginfo(paste("::infer_cnv:Writing final data to ", @@ -582,6 +602,7 @@ if (args$plot_steps) { ref_groups=ret_list[["REF_GROUPS"]], out_dir=args$output_dir, color_safe_pal=args$use_color_safe, + hclust_method=args$hclust_method, title=args$fig_main, obs_title=args$obs_main, ref_title=args$ref_main)