Skip to content

Commit

Permalink
Merge pull request #24 from brianjohnhaas/devel
Browse files Browse the repository at this point in the history
added --hclust_method parameter for flexiblity in clustering option choices

Former-commit-id: 26bd1a5
  • Loading branch information
brianjohnhaas authored May 11, 2018
2 parents 1495d46 + 36202f8 commit 9b953d7
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 10 deletions.
22 changes: 15 additions & 7 deletions R/inferCNV.R
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,8 @@ create_sep_list <- function(row_count,
# row, not the first row.
split_references <- function(average_data,
ref_obs,
num_groups){
num_groups,
hclust_method='complete') {
logging::loginfo(paste("::split_references:Start", sep=""))
ret_groups <- list()
split_groups <- NULL
Expand Down Expand Up @@ -269,7 +270,8 @@ split_references <- function(average_data,
# Get HCLUST
# Get reference observations only.
average_reference_obs <- t(average_data[, ref_obs, drop=FALSE])
hc <- hclust(dist(average_reference_obs))

hc <- hclust(dist(average_reference_obs), method=hclust_method)

split_groups <- cutree(hc, k=num_groups)
split_groups <- split_groups[hc$order]
Expand Down Expand Up @@ -456,14 +458,16 @@ infer_cnv <- function(data,
method_bound_vis=NA,
lower_bound_vis=NA,
upper_bound_vis=NA,
ref_subtract_method="by_mean") {
ref_subtract_method="by_mean",
hclust_method='complete') {

logging::loginfo(paste("::infer_cnv:Start", sep=""))

# Split the reference data into groups if requested
groups_ref <- split_references(average_data=data, #data_smoothed,
ref_obs=reference_obs,
num_groups=num_ref_groups)
ref_obs=reference_obs,
num_groups=num_ref_groups,
hclust_method=hclust_method)
logging::loginfo(paste("::infer_cnv:split_reference. ",
"found ",length(groups_ref)," reference groups.",
sep=""))
Expand Down Expand Up @@ -896,6 +900,7 @@ plot_cnv <- function(plot_data,
contig_cex=1,
k_obs_groups=1,
x.center=0,
hclust_method='average',
color_safe_pal=TRUE,
pdf_filename="infercnv.pdf"){

Expand Down Expand Up @@ -994,6 +999,7 @@ plot_cnv <- function(plot_data,
contig_lab_size=contig_cex,
breaksList=breaksList_t,
x.center=x.center,
hclust_method=hclust_method,
layout_lmat=force_layout[["lmat"]],
layout_lhei=force_layout[["lhei"]],
layout_lwid=force_layout[["lwid"]])
Expand Down Expand Up @@ -1049,6 +1055,7 @@ plot_cnv_observations <- function(obs_data,
cluster_contig=NULL,
breaksList,
x.center,
hclust_method="average",
testing=FALSE,
layout_lmat=NULL,
layout_lhei=NULL,
Expand Down Expand Up @@ -1085,8 +1092,9 @@ plot_cnv_observations <- function(obs_data,
sep=" "))
}
}
# HCL with a inversely weighted euclidean distance.
obs_hcl <- hclust(dist(obs_data[,hcl_group_indices]),"average")
# HCL with a inversely weighted euclidean distance.
logging::loginfo(paste("clustering observations via method: ", hclust_method, sep=""))
obs_hcl <- hclust(dist(obs_data[,hcl_group_indices]), method=hclust_method)
obs_dendrogram <- as.dendrogram(obs_hcl)
write.tree(as.phylo(obs_hcl),
file=paste(file_base_name, "observations_dendrogram.txt", sep=.Platform$file.sep))
Expand Down
27 changes: 24 additions & 3 deletions scripts/inferCNV.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ C_LEVEL_CHOICES <- names(loglevels)
# Visualization outlier thresholding and bounding method choices
C_VIS_OUTLIER_CHOICES <- c("average_bound")
C_REF_SUBTRACT_METHODS <- c("by_mean", "by_quantiles")
C_HCLUST_METHODS <- c("ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median", "centroid")

CHR = "chr"
START = "start"
Expand Down Expand Up @@ -71,7 +72,14 @@ check_arguments <- function(arguments){
paste(C_REF_SUBTRACT_METHODS, collapse=","), sep="") )
stop("error, must specify acceptable --ref_subtract_method")
}



if (! (arguments$hclust_method %in% C_HCLUST_METHODS) ) {
logging::logerror(paste(":: --hclust_method: acceptable values are: ",
paste(C_HCLUST_METHODS, collapse=","), sep="") )
stop("error, must specify acceptable --hclust_method")
}

# Warn that an average of the samples is used in the absence of
# normal / reference samples
if (is.null(arguments$reference_observations)){
Expand Down Expand Up @@ -292,10 +300,21 @@ pargs <- optparse::add_option(pargs, c("--ref_subtract_method"),
action="store",
dest="ref_subtract_method",
metavar="Reference_Subtraction_Method",
help=paste("Method used to subtract the reference values from the observations. Valid choices are",
help=paste("Method used to subtract the reference values from the observations. Valid choices are: ",
paste(C_REF_SUBTRACT_METHODS, collapse=", "),
" [Default %default]"))

pargs <- optparse::add_option(pargs, c("--hclust_method"),
type="character",
default="complete",
action="store",
dest="hclust_method",
metavar="Hierarchical_Clustering_Method",
help=paste("Method used for hierarchical clustering of cells. Valid choices are: ",
paste(C_HCLUST_METHODS, collapse=", "),
" [Default %default]"))



pargs <- optparse::add_option(pargs,c("--obs_cluster_contig"),
type="character",
Expand Down Expand Up @@ -542,7 +561,8 @@ ret_list = infercnv::infer_cnv(data=expression_data,
method_bound_vis=args$bound_method_vis,
lower_bound_vis=bounds_viz[1],
upper_bound_vis=bounds_viz[2],
ref_subtract_method=args$ref_subtract_method)
ref_subtract_method=args$ref_subtract_method,
hclust_method=args$hclust_method)

# Log output
logging::loginfo(paste("::infer_cnv:Writing final data to ",
Expand Down Expand Up @@ -582,6 +602,7 @@ if (args$plot_steps) {
ref_groups=ret_list[["REF_GROUPS"]],
out_dir=args$output_dir,
color_safe_pal=args$use_color_safe,
hclust_method=args$hclust_method,
title=args$fig_main,
obs_title=args$obs_main,
ref_title=args$ref_main)
Expand Down

0 comments on commit 9b953d7

Please sign in to comment.