diff --git a/additional_benchmarks/Snakefile b/additional_benchmarks/Snakefile index e8d3c81..b6eb714 100644 --- a/additional_benchmarks/Snakefile +++ b/additional_benchmarks/Snakefile @@ -71,11 +71,35 @@ rule plyranges_join_overlap_inner: {params.rscript} Rscripts/plyranges_join_overlap_inner.r {input.a} {input.b} > {output} """ -#tools = ["bedtools_intersect", "granges_filter", "plyranges_join_overlap_inner"] tools = ["bedtools_intersect", "granges_filter"] sizes = np.logspace(3, 7, 10).astype('int') -all_benches = expand("results/{tool}__size_{size}.bed", tool=tools, size=sizes) +all_results = expand("results/{tool}__size_{size}.bed", tool=tools, size=sizes) rule all: - input: all_benches + input: all_results + + +rule combine: + input: expand("benchmarks/{tool}__size_{size}.tsv", tool=tools, size=sizes) + output: "combined_benchmarks.tsv" + run: + import pandas as pd + + def read_tsv(file): + return pd.read_csv(file, sep="\t") + + dfs = [] + for tool in tools: + for size in sizes: + file = f"benchmarks/{tool}__size_{size}.tsv" + df = read_tsv(file) + df["tool"] = tool + df["size"] = size + dfs.append(df) + + combined_df = pd.concat(dfs, ignore_index=True) + combined_df.to_csv(output[0], sep="\t", index=False) + +rule merge: + input: "combined_benchmarks.tsv"