Skip to content

Commit

Permalink
Better benchmarking, fixed benchmarking bug, new comparison script.
Browse files Browse the repository at this point in the history
 - New scripts/benchmark_summary.py for comparing bedtools/granges.
 - Fixed TSV output bug.
  • Loading branch information
vsbuffalo committed Feb 21, 2024
1 parent b4b1a1b commit 5b6db72
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 46 deletions.
59 changes: 55 additions & 4 deletions benches/bedtools_comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ use criterion::{criterion_group, criterion_main, Criterion};
use granges::test_utilities::{granges_binary_path, random_bedfile};
use std::process::Command;

const BED_LENGTH: usize = 10_000;
const BED_LENGTH: usize = 100_000;

fn bench_range_adjustment(c: &mut Criterion) {
// create the benchmark group
let mut group = c.benchmark_group("slop vs adjust");
let mut group = c.benchmark_group("adjust");

// create the test data
let input_bedfile = random_bedfile(BED_LENGTH);
Expand Down Expand Up @@ -56,7 +56,7 @@ fn bench_range_adjustment(c: &mut Criterion) {

fn bench_filter_adjustment(c: &mut Criterion) {
// create the benchmark group
let mut group = c.benchmark_group("intersect vs filter");
let mut group = c.benchmark_group("filter");

// create the test data
let random_bedfile_left_tempfile = random_bedfile(BED_LENGTH);
Expand Down Expand Up @@ -99,5 +99,56 @@ fn bench_filter_adjustment(c: &mut Criterion) {
});
}

criterion_group!(benches, bench_filter_adjustment, bench_range_adjustment);
fn bench_flank(c: &mut Criterion) {
// create the benchmark group
let mut group = c.benchmark_group("flank");

// create the test data
let random_bedfile_tempfile = random_bedfile(BED_LENGTH);
let random_bedfile = random_bedfile_tempfile.path();

// configure the sample size for the group
// group.sample_size(10);
group.bench_function("bedtools_flank", |b| {
b.iter(|| {
let bedtools_output = Command::new("bedtools")
.arg("flank")
.arg("-g")
.arg("tests_data/hg38_seqlens.tsv")
.arg("-l")
.arg("20")
.arg("-r")
.arg("30")
.arg("-i")
.arg(random_bedfile)
.output()
.expect("bedtools flank failed");
assert!(bedtools_output.status.success());
});
});

group.bench_function("granges_filter", |b| {
b.iter(|| {
let granges_output = Command::new(granges_binary_path())
.arg("flank")
.arg("--genome")
.arg("tests_data/hg38_seqlens.tsv")
.arg("--left")
.arg("20")
.arg("--right")
.arg("30")
.arg(random_bedfile)
.output()
.expect("granges adjust failed");
assert!(granges_output.status.success());
});
});
}

criterion_group!(
benches,
bench_filter_adjustment,
bench_range_adjustment,
bench_flank,
);
criterion_main!(benches);
53 changes: 53 additions & 0 deletions scripts/benchmark_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import os
import json


def extract_mean_point_estimate(estimates_path):
with open(estimates_path, "r") as file:
data = json.load(file)
return data["mean"]["point_estimate"]


def calculate_ratios(criterion_dir):
comparisons = {} # Store comparison: ratio

subcommand_benches = [d for d in os.listdir(criterion_dir) if d != "report"]

for subcommand in subcommand_benches:
benches = dict()
runs = os.listdir(os.path.join(criterion_dir, subcommand))

for dir in runs:
if dir.startswith("report"):
continue
if dir.startswith("bedtools"):
bedtools_bench = os.path.join(
criterion_dir, subcommand, dir, "new", "estimates.json"
)
benches["bedtools"] = extract_mean_point_estimate(bedtools_bench)
if dir.startswith("granges"):
granges_bench = os.path.join(
criterion_dir, subcommand, dir, "new", "estimates.json"
)
benches["granges"] = extract_mean_point_estimate(granges_bench)
comparisons[subcommand] = benches

# Calculate and print ratios for each comparison
for comparison, tools in comparisons.items():
if len(tools) == 2:
bedtools_time, granges_time = tools.values()
# Calculate the ratio and convert it to a percentage
percent_faster = ((bedtools_time - granges_time) / bedtools_time) * 100
# Format the output to show 3 decimal places
print(
f"{comparison} - Granges is {percent_faster:.3f}% faster than Bedtools"
)


def main():
criterion_dir = "target/criterion"
calculate_ratios(criterion_dir)


if __name__ == "__main__":
main()
10 changes: 5 additions & 5 deletions src/commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ pub fn granges_adjust(
let possibly_adjusted_range = adjust_range(range, -both, both, length);

if let Some(range_adjusted) = possibly_adjusted_range {
writer.write_all(&range_adjusted.to_tsv().into_bytes())?;
writeln!(writer, "{}", &range_adjusted.to_tsv())?;
} else {
skipped_ranges += 1;
}
Expand Down Expand Up @@ -259,7 +259,7 @@ pub fn granges_flank(
let flanking_ranges = range
.flanking_ranges::<GenomicRangeRecord<String>>(left, right, length);
for flanking_range in flanking_ranges {
writer.write_all(&flanking_range.to_tsv().into_bytes())?;
writeln!(writer, "{}", &flanking_range.to_tsv())?;
}
}
} else {
Expand All @@ -273,7 +273,7 @@ pub fn granges_flank(
let flanking_ranges = range
.flanking_ranges::<GenomicRangeEmptyRecord>(left, right, length);
for flanking_range in flanking_ranges {
writer.write_all(&flanking_range.to_tsv().into_bytes())?;
writeln!(writer, "{}", &flanking_range.to_tsv())?;
}
}
}
Expand All @@ -290,7 +290,7 @@ pub fn granges_flank(
let flanking_ranges = range
.flanking_ranges::<GenomicRangeRecord<String>>(left, right, length);
for flanking_range in flanking_ranges {
writer.write_all(&flanking_range.to_tsv().into_bytes())?;
writeln!(writer, "{}", &flanking_range.to_tsv())?;
}
}
} else {
Expand All @@ -304,7 +304,7 @@ pub fn granges_flank(
let flanking_ranges = range
.flanking_ranges::<GenomicRangeEmptyRecord>(left, right, length);
for flanking_range in flanking_ranges {
writer.write_all(&flanking_range.to_tsv().into_bytes())?;
writeln!(writer, "{}", &flanking_range.to_tsv())?;
}
}
}
Expand Down
50 changes: 13 additions & 37 deletions tests/bedtools_validation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ fn test_against_bedtools_slop() {
/// granges filter --genome <genome> --left <left> --right <right>
#[test]
fn test_against_bedtools_intersect_wa() {
let num_ranges = 100_000;
let num_ranges = 1_000_000;

let random_bedfile_left_tempfile = random_bedfile(num_ranges);
let random_bedfile_right_tempfile = random_bedfile(num_ranges);
Expand Down Expand Up @@ -131,15 +131,11 @@ fn test_against_bedtools_intersect_wa() {
/// granges filter --genome <genome> --left 10 --right 20 <input>
#[test]
fn test_against_bedtools_flank() {
let num_ranges = 100_000;
let num_ranges = 1_000;

let random_bedfile_tempfile = random_bedfile(num_ranges);
let random_bedfile = random_bedfile_tempfile.path();

// for testing: uncomment and results are local for inspection
// let random_bedfile_left = Path::new("test_left.bed");
// let random_bedfile_right = Path::new("test_right.bed");

granges_random_bed(
"tests_data/hg38_seqlens.tsv",
num_ranges,
Expand All @@ -148,7 +144,6 @@ fn test_against_bedtools_flank() {
)
.expect("could not generate random BED file");

let bedtools_outfile = temp_bedfile();
let bedtools_output = Command::new("bedtools")
.arg("flank")
.arg("-g")
Expand All @@ -159,18 +154,9 @@ fn test_against_bedtools_flank() {
.arg("30")
.arg("-i")
.arg(&random_bedfile)
.arg(bedtools_outfile.path())
.output()
.expect("bedtools flank failed");

let bedtools_sorted_output = Command::new("bedtools")
.arg("sort")
.arg("-i")
.arg(bedtools_outfile.path())
.output()
.expect("bedtools intersect failed");

let granges_outfile = temp_bedfile();
let granges_output = Command::new(granges_binary_path())
.arg("flank")
.arg("--genome")
Expand All @@ -180,30 +166,20 @@ fn test_against_bedtools_flank() {
.arg("--right")
.arg("30")
.arg(&random_bedfile)
.arg(granges_outfile.path())
.output()
.expect("granges flank failed");

let granges_sorted_output = Command::new("bedtools")
.arg("sort")
.arg("-i")
.arg(bedtools_outfile.path())
.output()
.expect("bedtools intersect failed");
assert!(bedtools_output.status.success(), "{:?}", bedtools_output);
assert!(granges_output.status.success(), "{:?}", granges_output);

assert!(
bedtools_sorted_output.status.success(),
"{:?}",
bedtools_sorted_output
);
assert!(
granges_sorted_output.status.success(),
"{:?}",
granges_sorted_output
);
let bedtools_str = String::from_utf8_lossy(&bedtools_output.stdout);
let granges_str = String::from_utf8_lossy(&granges_output.stdout);

assert_eq!(
String::from_utf8_lossy(&bedtools_output.stdout),
String::from_utf8_lossy(&granges_output.stdout)
);
let mut bedtools_ranges: Vec<_> = bedtools_str.split("\n").collect();
let mut granges_ranges: Vec<_> = granges_str.split("\n").collect();

bedtools_ranges.sort();
granges_ranges.sort();

assert_eq!(bedtools_ranges, granges_ranges);
}

0 comments on commit 5b6db72

Please sign in to comment.