diff --git a/additional_benchmarks/README.md b/additional_benchmarks/README.md new file mode 100644 index 0000000..094548a --- /dev/null +++ b/additional_benchmarks/README.md @@ -0,0 +1,7 @@ +# Additional Benchmarks + +## Environment + + mamba create -n granges_benchmark -c conda-forge -c bioconda --file requirements.txt --yes + pip install snakemake-executor-plugin-slurm + diff --git a/additional_benchmarks/Snakefile b/additional_benchmarks/Snakefile index 58b100c..b79ceae 100644 --- a/additional_benchmarks/Snakefile +++ b/additional_benchmarks/Snakefile @@ -1,8 +1,11 @@ import numpy as np +RSCRIPT = "~/.conda/envs/granges_benchmark/bin/R" GRANGES = "../target/release/granges" SEQLENS = "../tests_data/hg38_seqlens.tsv" -NREPS = 1 +NREPS = 50 + +localrules: granges rule granges: output: GRANGES @@ -14,6 +17,10 @@ rule granges: rule random_bed: input: seqlens=SEQLENS, granges=GRANGES output: "random_bed/{size}__{rep}.bed.gz" + resources: + runtime=30, + mem_mb_per_cpu=1800, + cpus_per_task=28 shell: """ {input.granges} random-bed --sort --num {wildcards.size} {input.seqlens} | gzip > {output} @@ -24,6 +31,10 @@ rule granges_filter: genome=SEQLENS, granges=GRANGES output: "results/granges_filter__size_{size}.bed" benchmark: repeat("benchmarks/granges_filter__size_{size}.tsv", NREPS) + resources: + runtime=30, + mem_mb_per_cpu=1800, + cpus_per_task=28 shell: """ {input.granges} filter --left {input.a} --right {input.b} --genome {input.genome} > {output} @@ -34,6 +45,10 @@ rule bedtools_intersect: input: a="random_bed/{size}__A.bed.gz", b="random_bed/{size}__B.bed.gz" output: "results/bedtools_intersect__size_{size}.bed" benchmark: repeat("benchmarks/bedtools_intersect__size_{size}.tsv", NREPS) + resources: + runtime=30, + mem_mb_per_cpu=1800, + cpus_per_task=28 shell: """ bedtools intersect -a {input.a} -b {input.b} > {output} @@ -42,14 +57,20 @@ rule bedtools_intersect: rule plyranges_join_overlap_inner: input: a="random_bed/{size}__A.bed.gz", b="random_bed/{size}__B.bed.gz" output: "results/plyranges_join_overlap_inner__size_{size}.bed" + params: rscript = RSCRIPT benchmark: repeat("benchmarks/plyranges_join_overlap_inner__size_{size}.tsv", NREPS) + resources: + runtime=300, + mem_mb_per_cpu=1800, + cpus_per_task=28 shell: """ - Rscript Rscripts/plyranges_join_overlap_inner.r {input.a} {input.b} > {output} + {params.rscript} Rscripts/plyranges_join_overlap_inner.r {input.a} {input.b} > {output} """ -tools = ["bedtools_intersect", "granges_filter", "plyranges_join_overlap_inner"] -sizes = np.logspace(3, 9, 10) +#tools = ["bedtools_intersect", "granges_filter", "plyranges_join_overlap_inner"] +tools = ["bedtools_intersect", "granges_filter"] +sizes = np.logspace(3, 9, 10).astype('int') all_benches = expand("results/{tool}__size_{size}.bed", tool=tools, size=sizes) diff --git a/additional_benchmarks/requirements.txt b/additional_benchmarks/requirements.txt new file mode 100644 index 0000000..4478738 --- /dev/null +++ b/additional_benchmarks/requirements.txt @@ -0,0 +1,5 @@ +snakemake +numpy +scipy +matplotlib +R