Skip to content

Commit

Permalink
Merge pull request #247 from waveygang/easy-diverged
Browse files Browse the repository at this point in the history
scoring parameter updates for broader diversity and alignment mode optimizations
  • Loading branch information
ekg authored Jun 12, 2024
2 parents bb0d43d + 719381c commit 9ff0452
Show file tree
Hide file tree
Showing 8 changed files with 266 additions and 312 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ if (${CMAKE_BUILD_TYPE} MATCHES Release)
set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG")
set(CMAKE_C_FLAGS_RELEASE "-DNDEBUG")
if (NOT EXTRA_FLAGS)
set(EXTRA_FLAGS "-Ofast -march=native")
set(EXTRA_FLAGS "-Ofast -march=x86-64-v3")
endif()
endif ()

Expand Down
4 changes: 2 additions & 2 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@
reproducibleBuild = false;

# Use custom attributes to set compiler flags
CFLAGS = if enableOptimizations then "-O3 -march=native" else "";
CXXFLAGS = if enableOptimizations then "-O3 -march=native" else "";
CFLAGS = if enableOptimizations then "-Ofast -march=x86-64-v3" else "";
CXXFLAGS = if enableOptimizations then "-Ofast -march=x86-64-v3" else "";

postPatch = ''
mkdir -p include
Expand Down
496 changes: 241 additions & 255 deletions src/align/include/computeAlignments.hpp

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions src/common/wflign/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ endif()
message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")

if (${CMAKE_BUILD_TYPE} MATCHES Release)
#set(EXTRA_FLAGS "-Ofast -march=native -flto -fno-fat-lto-objects")
set(EXTRA_FLAGS "-Ofast -march=native")
#set(EXTRA_FLAGS "-Ofast -march=x86-64-v3 -flto -fno-fat-lto-objects")
set(EXTRA_FLAGS "-Ofast -march=x86-64-v3")
set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG") # reset CXX_FLAGS to replace -O3 with -Ofast
endif ()

Expand Down
2 changes: 1 addition & 1 deletion src/common/wflign/deps/WFA2-lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ if(NOT CMAKE_BUILD_TYPE)
endif()

if (${CMAKE_BUILD_TYPE} MATCHES Release)
set(OPTIMIZE_FLAGS "${OPTIMIZE_FLAGS} -march=native -D_FILE_OFFSET_BITS=64")
set(OPTIMIZE_FLAGS "${OPTIMIZE_FLAGS} -march=x86-64-v3")
endif()

if ((${CMAKE_BUILD_TYPE} MATCHES Release) OR (${CMAKE_BUILD_TYPE} MATCHES RelWithDebInfo))
Expand Down
2 changes: 1 addition & 1 deletion src/common/wflign/deps/WFA2-lib/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ ifeq ($(BUILD_EXAMPLES),1)
APPS+=examples
endif

all: CC_FLAGS+=-O3 -march=native #-flto -ffat-lto-objects
all: CC_FLAGS+=-O3 -march=x86-64-v3 #-flto -ffat-lto-objects
all: build

debug: build
Expand Down
22 changes: 3 additions & 19 deletions src/common/wflign/src/wflign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,29 +246,12 @@ int wflambda_extend_match(
delete aln;
}

// cleanup all sketches when they are too many
if (extend_data->num_sketches_allocated > extend_data->max_num_sketches_in_memory) {
clean_up_sketches(query_sketches);
clean_up_sketches(target_sketches);

//std::cerr << "extend_data->num_sketches_allocated " << extend_data->num_sketches_allocated << std::endl;
extend_data->num_sketches_allocated = 0;
}

// // cleanup all sketches when the breakpoint changes
// int last_breakpoint_v, last_breakpoint_h;
// extend_data->wflambda_aligner->getLastBreakpoint(&last_breakpoint_v, &last_breakpoint_h);
// if (extend_data->last_breakpoint_v != last_breakpoint_v || extend_data->last_breakpoint_h != last_breakpoint_h) {
// //std::cerr << v << "\t" << h << "\t" << last_breakpoint_v << "\t" << last_breakpoint_h << std::endl;
// extend_data->last_breakpoint_v = last_breakpoint_v;
// extend_data->last_breakpoint_h = last_breakpoint_h;
//
// clean_up_sketches(query_sketches);
// clean_up_sketches(target_sketches);
//
// //std::cerr << "extend_data->num_sketches_allocated " << extend_data->num_sketches_allocated << std::endl;
// //extend_data->num_sketches_allocated = 0;
// }
}
} else if (h < 0 || v < 0) { // It can be removed using an edit-distance
// mode as high-level of WF-inception
Expand Down Expand Up @@ -674,8 +657,9 @@ void WFlign::wflign_affine_wavefront(
extend_data.num_alignments_performed = 0;
#endif
extend_data.num_sketches_allocated = 0;
// 1 GB / (hash_t*mash_sketch_rate*segment_length*2); 1 GB = 1×8×1024×1024×1024; '*2' to account query/target sequences
extend_data.max_num_sketches_in_memory = std::ceil(8589934592.0 / (8.0*sizeof(rkmh::hash_t)*mash_sketch_rate*segment_length_to_use*2) );
// 128 MB of memory for sketches
extend_data.max_num_sketches_in_memory = 128 * 1024 * 1024
/ (sizeof(std::vector<rkmh::hash_t>) + mash_sketch_rate * segment_length_to_use * sizeof(rkmh::hash_t));
#ifdef WFA_PNG_TSV_TIMING
extend_data.emit_png = !prefix_wavefront_plot_in_png->empty() && wfplot_max_size > 0;
extend_data.high_order_dp_matrix_mismatch = &high_order_dp_matrix_mismatch;
Expand Down
46 changes: 15 additions & 31 deletions src/interface/parse_args.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,19 +103,16 @@ void parse_args(int argc,
args::Group alignment_opts(parser, "[ Alignment Options ]");
args::ValueFlag<std::string> align_input_paf(alignment_opts, "FILE", "derive precise alignments for this input PAF", {'i', "input-paf"});
args::Flag force_biwfa_alignment(alignment_opts, "force-biwfa", "force alignment with biWFA for all sequence pairs", {'I', "force-biwfa"});
args::Flag invert_filtering(alignment_opts, "A", "if an input PAF is specified, remove alignments with gap-compressed identity below --map-pct-id x 0.8, else keep all alignments "
"[default: if an input PAF is specified, keep all alignments, else remove alignments with gap-compressed identity below --map-pct-id x 0.8]",
{'O', "invert-filtering"});
args::ValueFlag<uint16_t> wflambda_segment_length(alignment_opts, "N", "wflambda segment length: size (in bp) of segment mapped in hierarchical WFA problem [default: 256]", {'W', "wflamda-segment"});
args::ValueFlag<std::string> wfa_score_params(alignment_opts, "mismatch,gap1,ext1",
"score parameters for the wfa alignment (affine); match score is fixed at 0 [default: 6,8,1]",
"score parameters for the wfa alignment (affine); match score is fixed at 0 [default: 2,3,1]",
{"wfa-params"});
args::ValueFlag<std::string> wfa_patching_score_params(alignment_opts, "mismatch,gap1,ext1,gap2,ext2",
"score parameters for the wfa patching alignment (convex); match score is fixed at 0 [default: 5,8,2,49,1]",
"score parameters for the wfa patching alignment (convex); match score is fixed at 0 [default: 3,4,2,24,1]",
{"wfa-patching-params"});
//wflign parameters
args::ValueFlag<std::string> wflign_score_params(alignment_opts, "mismatch,gap1,ext1",
"score parameters for the wflign alignment (affine); match score is fixed at 0 [default: 4,6,1]",
"score parameters for the wflign alignment (affine); match score is fixed at 0 [default: 2,3,1]",
{"wflign-params"});
args::ValueFlag<float> wflign_max_mash_dist(alignment_opts, "N", "maximum mash distance to perform the alignment in a wflambda segment [default: adaptive with respect to the estimated identity]", {'b', "max-mash-dist"});
args::ValueFlag<int> wflign_min_wavefront_length(alignment_opts, "N", "min wavefront length for heuristic WFlign [default: 1024]", {'j', "wflign-min-wf-len"});
Expand Down Expand Up @@ -282,9 +279,9 @@ void parse_args(int argc,
align_parameters.wfa_gap_opening_score = params[1];
align_parameters.wfa_gap_extension_score = params[2];
} else {
align_parameters.wfa_mismatch_score = -1;
align_parameters.wfa_gap_opening_score = -1;
align_parameters.wfa_gap_extension_score = -1;
align_parameters.wfa_mismatch_score = 2;
align_parameters.wfa_gap_opening_score = 3;
align_parameters.wfa_gap_extension_score = 1;
}

if (!args::get(wfa_patching_score_params).empty()) {
Expand All @@ -305,11 +302,11 @@ void parse_args(int argc,
align_parameters.wfa_patching_gap_opening_score2 = params[3];
align_parameters.wfa_patching_gap_extension_score2 = params[4];
} else {
align_parameters.wfa_patching_mismatch_score = -1;
align_parameters.wfa_patching_gap_opening_score1 = -1;
align_parameters.wfa_patching_gap_extension_score1 = -1;
align_parameters.wfa_patching_gap_opening_score2 = -1;
align_parameters.wfa_patching_gap_extension_score2 = -1;
align_parameters.wfa_patching_mismatch_score = 3;
align_parameters.wfa_patching_gap_opening_score1 = 4;
align_parameters.wfa_patching_gap_extension_score1 = 2;
align_parameters.wfa_patching_gap_opening_score2 = 24;
align_parameters.wfa_patching_gap_extension_score2 = 1;
}

if (!args::get(wflign_score_params).empty()) {
Expand All @@ -328,9 +325,9 @@ void parse_args(int argc,
align_parameters.wflign_gap_opening_score = params[1];
align_parameters.wflign_gap_extension_score = params[2];
} else {
align_parameters.wflign_mismatch_score = -1;
align_parameters.wflign_gap_opening_score = -1;
align_parameters.wflign_gap_extension_score = -1;
align_parameters.wflign_mismatch_score = 2;
align_parameters.wflign_gap_opening_score = 3;
align_parameters.wflign_gap_extension_score = 1;
}

if (wflign_max_mash_dist) {
Expand Down Expand Up @@ -507,20 +504,7 @@ void parse_args(int argc,
// std::cerr << "[wfmash] INFO, skch::parseandSave, read " << map_parameters.high_freq_kmers.size() << " high frequency kmers." << std::endl;
// }


if (align_input_paf) {
if (invert_filtering) {
align_parameters.min_identity = map_parameters.percentageIdentity * 0.8; // in [0,1]
} else {
align_parameters.min_identity = 0; // disabled
}
} else {
if (invert_filtering) {
align_parameters.min_identity = 0; // disable
} else {
align_parameters.min_identity = map_parameters.percentageIdentity * 0.8; // in [0,1]
}
}
align_parameters.min_identity = 0; // disabled

if (wflambda_segment_length) {
align_parameters.wflambda_segment_length = args::get(wflambda_segment_length);
Expand Down

0 comments on commit 9ff0452

Please sign in to comment.