Better benchmarking, fixed benchmarking bug, new comparison script.

- New scripts/benchmark_summary.py for comparing bedtools/granges. - Fixed TSV output bug.
vsbuffalo · Feb 21, 2024 · 5b6db72 · 5b6db72
1 parent b4b1a1b
commit 5b6db72
Show file tree

Hide file tree

Showing 4 changed files with 126 additions and 46 deletions.
diff --git a/benches/bedtools_comparison.rs b/benches/bedtools_comparison.rs
@@ -8,11 +8,11 @@ use criterion::{criterion_group, criterion_main, Criterion};
 use granges::test_utilities::{granges_binary_path, random_bedfile};
 use std::process::Command;
 
-const BED_LENGTH: usize = 10_000;
+const BED_LENGTH: usize = 100_000;
 
 fn bench_range_adjustment(c: &mut Criterion) {
  // create the benchmark group
- let mut group = c.benchmark_group("slop vs adjust");
+ let mut group = c.benchmark_group("adjust");
 
  // create the test data
  let input_bedfile = random_bedfile(BED_LENGTH);
@@ -56,7 +56,7 @@ fn bench_range_adjustment(c: &mut Criterion) {
 
 fn bench_filter_adjustment(c: &mut Criterion) {
  // create the benchmark group
- let mut group = c.benchmark_group("intersect vs filter");
+ let mut group = c.benchmark_group("filter");
 
  // create the test data
  let random_bedfile_left_tempfile = random_bedfile(BED_LENGTH);
@@ -99,5 +99,56 @@ fn bench_filter_adjustment(c: &mut Criterion) {
  });
 }
 
-criterion_group!(benches, bench_filter_adjustment, bench_range_adjustment);
+fn bench_flank(c: &mut Criterion) {
+ // create the benchmark group
+ let mut group = c.benchmark_group("flank");
+
+ // create the test data
+ let random_bedfile_tempfile = random_bedfile(BED_LENGTH);
+ let random_bedfile = random_bedfile_tempfile.path();
+
+ // configure the sample size for the group
+ // group.sample_size(10);
+ group.bench_function("bedtools_flank", |b| {
+ b.iter(|| {
+ let bedtools_output = Command::new("bedtools")
+ .arg("flank")
+ .arg("-g")
+ .arg("tests_data/hg38_seqlens.tsv")
+ .arg("-l")
+ .arg("20")
+ .arg("-r")
+ .arg("30")
+ .arg("-i")
+ .arg(random_bedfile)
+ .output()
+ .expect("bedtools flank failed");
+ assert!(bedtools_output.status.success());
+ });
+ });
+
+ group.bench_function("granges_filter", |b| {
+ b.iter(|| {
+ let granges_output = Command::new(granges_binary_path())
+ .arg("flank")
+ .arg("--genome")
+ .arg("tests_data/hg38_seqlens.tsv")
+ .arg("--left")
+ .arg("20")
+ .arg("--right")
+ .arg("30")
+ .arg(random_bedfile)
+ .output()
+ .expect("granges adjust failed");
+ assert!(granges_output.status.success());
+ });
+ });
+}
+
+criterion_group!(
+ benches,
+ bench_filter_adjustment,
+ bench_range_adjustment,
+ bench_flank,
+);
 criterion_main!(benches);
diff --git a/scripts/benchmark_summary.py b/scripts/benchmark_summary.py
@@ -0,0 +1,53 @@
+import os
+import json
+
+
+def extract_mean_point_estimate(estimates_path):
+ with open(estimates_path, "r") as file:
+ data = json.load(file)
+ return data["mean"]["point_estimate"]
+
+
+def calculate_ratios(criterion_dir):
+ comparisons = {} # Store comparison: ratio
+
+ subcommand_benches = [d for d in os.listdir(criterion_dir) if d != "report"]
+
+ for subcommand in subcommand_benches:
+ benches = dict()
+ runs = os.listdir(os.path.join(criterion_dir, subcommand))
+
+ for dir in runs:
+ if dir.startswith("report"):
+ continue
+ if dir.startswith("bedtools"):
+ bedtools_bench = os.path.join(
+ criterion_dir, subcommand, dir, "new", "estimates.json"
+ )
+ benches["bedtools"] = extract_mean_point_estimate(bedtools_bench)
+ if dir.startswith("granges"):
+ granges_bench = os.path.join(
+ criterion_dir, subcommand, dir, "new", "estimates.json"
+ )
+ benches["granges"] = extract_mean_point_estimate(granges_bench)
+ comparisons[subcommand] = benches
+
+ # Calculate and print ratios for each comparison
+ for comparison, tools in comparisons.items():
+ if len(tools) == 2:
+ bedtools_time, granges_time = tools.values()
+ # Calculate the ratio and convert it to a percentage
+ percent_faster = ((bedtools_time - granges_time) / bedtools_time) * 100
+ # Format the output to show 3 decimal places
+ print(
+ f"{comparison} - Granges is {percent_faster:.3f}% faster than Bedtools"
+ )
+
+
+def main():
+ criterion_dir = "target/criterion"
+ calculate_ratios(criterion_dir)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/src/commands.rs b/src/commands.rs
@@ -53,7 +53,7 @@ pub fn granges_adjust(
  let possibly_adjusted_range = adjust_range(range, -both, both, length);
 
  if let Some(range_adjusted) = possibly_adjusted_range {
- writer.write_all(&range_adjusted.to_tsv().into_bytes())?;
+ writeln!(writer, "{}", &range_adjusted.to_tsv())?;
  } else {
  skipped_ranges += 1;
  }
@@ -259,7 +259,7 @@ pub fn granges_flank(
  let flanking_ranges = range
  .flanking_ranges::<GenomicRangeRecord<String>>(left, right, length);
  for flanking_range in flanking_ranges {
- writer.write_all(&flanking_range.to_tsv().into_bytes())?;
+ writeln!(writer, "{}", &flanking_range.to_tsv())?;
  }
  }
  } else {
@@ -273,7 +273,7 @@ pub fn granges_flank(
  let flanking_ranges = range
  .flanking_ranges::<GenomicRangeEmptyRecord>(left, right, length);
  for flanking_range in flanking_ranges {
- writer.write_all(&flanking_range.to_tsv().into_bytes())?;
+ writeln!(writer, "{}", &flanking_range.to_tsv())?;
  }
  }
  }
@@ -290,7 +290,7 @@ pub fn granges_flank(
  let flanking_ranges = range
  .flanking_ranges::<GenomicRangeRecord<String>>(left, right, length);
  for flanking_range in flanking_ranges {
- writer.write_all(&flanking_range.to_tsv().into_bytes())?;
+ writeln!(writer, "{}", &flanking_range.to_tsv())?;
  }
  }
  } else {
@@ -304,7 +304,7 @@ pub fn granges_flank(
  let flanking_ranges = range
  .flanking_ranges::<GenomicRangeEmptyRecord>(left, right, length);
  for flanking_range in flanking_ranges {
- writer.write_all(&flanking_range.to_tsv().into_bytes())?;
+ writeln!(writer, "{}", &flanking_range.to_tsv())?;
  }
  }
  }

diff --git a/tests/bedtools_validation.rs b/tests/bedtools_validation.rs
@@ -76,7 +76,7 @@ fn test_against_bedtools_slop() {
 /// granges filter --genome <genome> --left <left> --right <right>
 #[test]
 fn test_against_bedtools_intersect_wa() {
- let num_ranges = 100_000;
+ let num_ranges = 1_000_000;
 
  let random_bedfile_left_tempfile = random_bedfile(num_ranges);
  let random_bedfile_right_tempfile = random_bedfile(num_ranges);
@@ -131,15 +131,11 @@ fn test_against_bedtools_intersect_wa() {
 /// granges filter --genome <genome> --left 10 --right 20 <input>
 #[test]
 fn test_against_bedtools_flank() {
- let num_ranges = 100_000;
+ let num_ranges = 1_000;
 
  let random_bedfile_tempfile = random_bedfile(num_ranges);
  let random_bedfile = random_bedfile_tempfile.path();
 
- // for testing: uncomment and results are local for inspection
- // let random_bedfile_left = Path::new("test_left.bed");
- // let random_bedfile_right = Path::new("test_right.bed");
-
  granges_random_bed(
  "tests_data/hg38_seqlens.tsv",
  num_ranges,
@@ -148,7 +144,6 @@ fn test_against_bedtools_flank() {
  )
  .expect("could not generate random BED file");
 
- let bedtools_outfile = temp_bedfile();
  let bedtools_output = Command::new("bedtools")
  .arg("flank")
  .arg("-g")
@@ -159,18 +154,9 @@ fn test_against_bedtools_flank() {
  .arg("30")
  .arg("-i")
  .arg(&random_bedfile)
- .arg(bedtools_outfile.path())
  .output()
  .expect("bedtools flank failed");
 
- let bedtools_sorted_output = Command::new("bedtools")
- .arg("sort")
- .arg("-i")
- .arg(bedtools_outfile.path())
- .output()
- .expect("bedtools intersect failed");
-
- let granges_outfile = temp_bedfile();
  let granges_output = Command::new(granges_binary_path())
  .arg("flank")
  .arg("--genome")
@@ -180,30 +166,20 @@ fn test_against_bedtools_flank() {
  .arg("--right")
  .arg("30")
  .arg(&random_bedfile)
- .arg(granges_outfile.path())
  .output()
  .expect("granges flank failed");
 
- let granges_sorted_output = Command::new("bedtools")
- .arg("sort")
- .arg("-i")
- .arg(bedtools_outfile.path())
- .output()
- .expect("bedtools intersect failed");
+ assert!(bedtools_output.status.success(), "{:?}", bedtools_output);
+ assert!(granges_output.status.success(), "{:?}", granges_output);
 
- assert!(
- bedtools_sorted_output.status.success(),
- "{:?}",
- bedtools_sorted_output
- );
- assert!(
- granges_sorted_output.status.success(),
- "{:?}",
- granges_sorted_output
- );
+ let bedtools_str = String::from_utf8_lossy(&bedtools_output.stdout);
+ let granges_str = String::from_utf8_lossy(&granges_output.stdout);
 
- assert_eq!(
- String::from_utf8_lossy(&bedtools_output.stdout),
- String::from_utf8_lossy(&granges_output.stdout)
- );
+ let mut bedtools_ranges: Vec<_> = bedtools_str.split("\n").collect();
+ let mut granges_ranges: Vec<_> = granges_str.split("\n").collect();
+
+ bedtools_ranges.sort();
+ granges_ranges.sort();
+
+ assert_eq!(bedtools_ranges, granges_ranges);
 }