diff --git a/src/commands.rs b/src/commands.rs index 64eae73..32a1bb9 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -1,7 +1,7 @@ use std::path::PathBuf; use crate::{ - io::{OutputFile, parsers::GenomicRangesParsers}, + io::{OutputFile, parsers::GenomicRangesParser}, prelude::*, ranges::operations::adjust_range, reporting::{CommandOutput, Report}, @@ -68,11 +68,11 @@ pub fn granges_adjust( let ranges_iter = GenomicRangesFile::parsing_iterator(bedfile)?; match ranges_iter { - GenomicRangesParsers::Bed3(iter) => { + GenomicRangesParser::Bed3(iter) => { let gr = GRangesEmpty::from_iter(iter, &genome)?; gr.adjust_ranges(-both, both).to_tsv(output)? }, - GenomicRangesParsers::Bedlike(iter) => { + GenomicRangesParser::Bedlike(iter) => { // Note the call to try_unwrap_data() here: this is because // we know that the records *do* have data. Unwrapping the Option // values means that writing to TSV doesn't have to deal with this (which @@ -80,7 +80,7 @@ pub fn granges_adjust( let gr = GRanges::from_iter(iter.try_unwrap_data()?, &genome)?; gr.adjust_ranges(-both, both).to_tsv(output)? }, - GenomicRangesParsers::Unsupported => { + GenomicRangesParser::Unsupported => { return Err(GRangesError::UnsupportedGenomicRangesFileFormat) }, @@ -88,37 +88,53 @@ pub fn granges_adjust( } Ok(CommandOutput::new((), report)) } -// -// /// Retain only the ranges that have at least one overlap with another set of ranges. -// pub fn granges_filter( -// seqlens: &PathBuf, -// left_granges: GRanges, -// right_granges: GRanges, -// sort: bool, -// ) -> Result, GRangesError> -// where -// // we must be able to iterate over left ranges -// VecRangesEmpty: IterableRangeContainer, -// // we must be able to convert the right GRanges to interval trees -// GRanges: GenomicRangesToIntervalTrees<()>, -// { -// let right_granges = right_granges.to_coitrees()?; -// -// // let intersection = left_granges.filter_overlaps_(&right_granges)?; -// -// //// output stream -- header is None for now (TODO) -// //let output_stream = output.map_or(OutputFile::new_stdout(None), |file| { -// // OutputFile::new(file, None) -// //}); -// //let mut writer = output_stream.writer()?; -// -// // for reporting stuff to the user -// let mut report = Report::new(); -// -// //intersection.sort().to_tsv(output)?; -// -// Ok(CommandOutput::new((), report)) -// } + +/// Retain only the ranges that have at least one overlap with another set of ranges. +pub fn granges_filter( + seqlens: &PathBuf, + left_path: &PathBuf, + right_path: &PathBuf, + output: Option<&PathBuf>, + sort: bool, +) -> Result, GRangesError> { + let genome = read_seqlens(seqlens)?; + + let left_iter = GenomicRangesFile::parsing_iterator(left_path)?; + let right_iter = GenomicRangesFile::parsing_iterator(right_path)?; + + let output_stream = output.as_ref().map_or(OutputFile::new_stdout(None), |file| { + OutputFile::new(file, None) + }); + let mut writer = output_stream.writer()?; + + // for reporting stuff to the user + let mut report = Report::new(); + + match (left_iter, right_iter) { + (GenomicRangesParser::Bed3(left), GenomicRangesParser::Bed3(right)) => { + + let left_gr = GRangesEmpty::from_iter(left, &genome)?; let right_gr = + GRangesEmpty::from_iter(right, &genome)?; + + let right_gr = right_gr.to_coitrees()?; + + let intersection = left_gr.filter_overlaps(&right_gr)?; + intersection.to_tsv(output)?; + + Ok(CommandOutput::new((), report)) + } + (GenomicRangesParser::Bed3(left), GenomicRangesParser::Bedlike(right)) => { + Ok(CommandOutput::new((), report)) + } + (GenomicRangesParser::Bedlike(left), GenomicRangesParser::Bed3(right)) => { + Ok(CommandOutput::new((), report)) + } + (GenomicRangesParser::Bedlike(left), GenomicRangesParser::Bedlike(right)) => { + Ok(CommandOutput::new((), report)) + } + _ => Ok(CommandOutput::new((), report)), + } +} /// Generate a random BED-like file with genomic ranges. pub fn granges_random_bed( diff --git a/src/io/mod.rs b/src/io/mod.rs index e6cbc57..99946e3 100644 --- a/src/io/mod.rs +++ b/src/io/mod.rs @@ -6,4 +6,4 @@ pub mod noodles; pub mod parsers; pub use file::{InputFile, OutputFile}; -pub use parsers::{Bed3Iterator, BedlikeIterator, GenomicRangesFile, GenomicRangesParsers, TsvRecordIterator}; +pub use parsers::{Bed3Iterator, BedlikeIterator, GenomicRangesFile, GenomicRangesParser, TsvRecordIterator}; diff --git a/src/io/parsers.rs b/src/io/parsers.rs index a0a97b0..0f71e21 100644 --- a/src/io/parsers.rs +++ b/src/io/parsers.rs @@ -145,7 +145,7 @@ fn valid_bedlike(input_file: &mut InputFile) -> Result { /// Enum that connects a genomic ranges file type to its specific parser. #[derive(Debug)] -pub enum GenomicRangesParsers { +pub enum GenomicRangesParser { Bed3(Bed3Iterator), Bedlike(BedlikeIterator), Unsupported, @@ -215,14 +215,14 @@ impl GenomicRangesFile { /// /// This returns a [`GenomicRangesParsers`] enum, since the parsing iterator filetype /// cannot be known at compile time. - pub fn parsing_iterator(filepath: impl Clone + Into) -> Result { + pub fn parsing_iterator(filepath: impl Clone + Into) -> Result { let path = filepath.into(); dbg!(&path); match Self::detect(path)? { GenomicRangesFile::Bed3(path) => - Ok(GenomicRangesParsers::Bed3(Bed3Iterator::new(path)?)), + Ok(GenomicRangesParser::Bed3(Bed3Iterator::new(path)?)), GenomicRangesFile::Bedlike(path) => - Ok(GenomicRangesParsers::Bedlike(BedlikeIterator::new(path)?)), + Ok(GenomicRangesParser::Bedlike(BedlikeIterator::new(path)?)), GenomicRangesFile::Unsupported => { Err(GRangesError::UnsupportedGenomicRangesFileFormat) } diff --git a/src/lib.rs b/src/lib.rs index 69a940e..ff1b3e3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,7 +26,7 @@ pub mod prelude { pub use crate::error::GRangesError; pub use crate::granges::{GRanges, GRangesEmpty}; pub use crate::io::file::read_seqlens; - pub use crate::io::{Bed3Iterator, BedlikeIterator, GenomicRangesFile, TsvRecordIterator, GenomicRangesParsers}; + pub use crate::io::{Bed3Iterator, BedlikeIterator, GenomicRangesFile, TsvRecordIterator, GenomicRangesParser}; pub use crate::ranges::vec::{VecRangesEmpty, VecRangesIndexed}; pub use crate::traits::{ diff --git a/src/main/mod.rs b/src/main/mod.rs index 2b5abdb..368b956 100644 --- a/src/main/mod.rs +++ b/src/main/mod.rs @@ -2,7 +2,7 @@ use std::path::PathBuf; use clap::{Args, Parser, Subcommand}; use granges::{ - commands::{granges_adjust}, + commands::{granges_adjust, granges_filter}, io::{parsers::Bed3Iterator, OutputFile}, prelude::{read_seqlens, GRanges, GRangesError, GenomicRangesFile}, reporting::{Report, CommandOutput}, @@ -115,47 +115,7 @@ fn run() -> Result<(), GRangesError> { output, sort, }) => { - let genome = read_seqlens(seqlens)?; - let mut report = Report::new(); - - let left_filetype = GenomicRangesFile::detect(left)?; - let right_filetype = GenomicRangesFile::detect(right)?; - - match (left_filetype, right_filetype) { - (GenomicRangesFile::Bed3(left_file), GenomicRangesFile::Bed3(right_file)) => { - let left_iter = Bed3Iterator::new(left_file)?; - let right_iter = Bed3Iterator::new(right_file)?; - - let left_gr = GRangesEmpty::from_iter(left_iter, &genome)?; let right_gr = - GRangesEmpty::from_iter(right_iter, &genome)?; - - let right_gr = right_gr.to_coitrees()?; - - let intersection = left_gr.filter_overlaps(&right_gr); - right_gr.len(); - - let output_stream = output.as_ref().map_or(OutputFile::new_stdout(None), |file| { - OutputFile::new(file, None) - }); - let mut writer = output_stream.writer()?; - - // for reporting stuff to the user - let mut report = Report::new(); - - Ok(CommandOutput::new((), report)) - } - (GenomicRangesFile::Bed3(left_file), GenomicRangesFile::Bedlike(right_file)) => { - Ok(CommandOutput::new((), report)) - } - (GenomicRangesFile::Bedlike(left_file), GenomicRangesFile::Bed3(right_file)) => { - Ok(CommandOutput::new((), report)) - } - (GenomicRangesFile::Bedlike(left_file), GenomicRangesFile::Bedlike(right_file)) => { - Ok(CommandOutput::new((), report)) - } - _ => Ok(CommandOutput::new((), report)), - } - // granges_filter(left, right, output.as_ref(), *sort) + granges_filter(seqlens, left, right, output.as_ref(), *sort) } #[cfg(feature = "dev-commands")] Some(Commands::RandomBed {