Skip to content

Commit

Permalink
Fixed bug with plyranges
Browse files Browse the repository at this point in the history
  • Loading branch information
vsbuffalo committed May 19, 2024
1 parent 3588443 commit d511f32
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 30 deletions.
24 changes: 12 additions & 12 deletions additional_benchmarks/Snakefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np

RSCRIPT = "~/.conda/envs/granges_benchmark/bin/R"
RSCRIPT = "~/.conda/envs/granges_benchmark/bin/Rscript"
GRANGES = "../target/release/granges"
SEQLENS = "../tests_data/hg38_seqlens.tsv"
NREPS = 100
Expand Down Expand Up @@ -58,7 +58,7 @@ rule granges_filter:
benchmark: repeat("benchmarks/granges_filter__size_{size}.tsv", NREPS)
resources:
partition="savio3",
runtime=300,
runtime=600,
mem_mb=96000,
cpus_per_task=40
shell:
Expand All @@ -72,7 +72,7 @@ rule bedtools_intersect:
benchmark: repeat("benchmarks/bedtools_intersect__size_{size}.tsv", NREPS)
resources:
partition="savio3",
runtime=300,
runtime=600,
mem_mb=96000,
cpus_per_task=40
shell:
Expand All @@ -87,25 +87,25 @@ rule plyranges_join_overlap_inner:
benchmark: repeat("benchmarks/plyranges_join_overlap_inner__size_{size}.tsv", NREPS)
resources:
partition="savio3",
runtime=300,
runtime=600,
mem_mb=96000,
cpus_per_task=40
shell:
"""
{params.rscript} Rscripts/plyranges_join_overlap_inner.r {input.a} {input.b} > {output}
{params.rscript} rscripts/plyranges_join_overlap_inner.r {input.a} {input.b} {output}
"""

## ----- granges map / bedtools map -----
WINDOWS = "windows/width_100000.bed"

rule granges_map:
input: right=WINDOWS, left="random_bed/scores/{size}__A.bed.gz",
input: left=WINDOWS, right="random_bed/scores/{size}__A.bed.gz",
genome=SEQLENS, granges=GRANGES
output: "results/granges_map__size_{size}.bed"
benchmark: repeat("benchmarks/granges_map__size_{size}.tsv", NREPS)
resources:
partition="savio3",
runtime=300,
runtime=600,
mem_mb=96000,
cpus_per_task=40
shell:
Expand All @@ -114,12 +114,12 @@ rule granges_map:
"""

rule bedtools_map:
input: right=WINDOWS, left="random_bed/scores/{size}__A.bed.gz"
input: left=WINDOWS, right="random_bed/scores/{size}__A.bed.gz"
output: "results/bedtools_map__size_{size}.bed"
benchmark: repeat("benchmarks/bedtools_map__size_{size}.tsv", NREPS)
resources:
partition="savio3",
runtime=300,
runtime=600,
mem_mb=96000,
cpus_per_task=40
shell:
Expand All @@ -129,8 +129,8 @@ rule bedtools_map:

rule plyranges_map:
input:
right=WINDOWS,
left="random_bed/scores/{size}__A.bed.gz"
left=WINDOWS,
right="random_bed/scores/{size}__A.bed.gz"
output:
"results/plyranges_map__size_{size}.bed"
benchmark:
Expand All @@ -143,7 +143,7 @@ rule plyranges_map:
params: rscript = RSCRIPT
shell:
"""
{params.rscript} Rscripts/plyranges_map.r {input.left} {input.right} > {output}
{params.rscript} rscripts/plyranges_map.r {input.left} {input.right} {output}
"""


Expand Down
30 changes: 12 additions & 18 deletions additional_benchmarks/rscripts/plyranges_map.r
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
library(plyranges)
library(rtracklayer)

args <- commandArgs(trailingOnly = TRUE)

Expand All @@ -15,23 +16,16 @@ granges_a <- read_bed(bed_file_a)
granges_b <- read_bed(bed_file_b)

# Perform the map operation
mapped_result <- join_overlap_left(
granges_a,
granges_b,
suffix = c(".left", ".right")
) %>%
select(
seqnames,
start.left,
end.left,
width = width.left,
strand,
min = min(score.right),
max = max(score.right),
mean = mean(score.right),
sum = sum(score.right),
median = median(score.right)
mapped_granges <- granges_a %>%
join_overlap_inner(granges_b) %>%
group_by(seqnames, start, end, strand) %>%
summarise(
min = min(score, na.rm = TRUE),
max = max(score, na.rm = TRUE),
mean = mean(score, na.rm = TRUE),
sum = sum(score, na.rm = TRUE),
median = median(score, na.rm = TRUE)
)

# Write the mapped result to a BED file
write_bed(mapped_result, output_file)
mapped_df <- as.data.frame(mapped_granges)
write.table(mapped_df, file = output_file, sep = "\t", quote = FALSE, row.names = FALSE)

0 comments on commit d511f32

Please sign in to comment.