From ba6a03c8c7dc2292b3d6887bc2cfaabdeb8ddb16 Mon Sep 17 00:00:00 2001 From: Vince Buffalo Date: Sat, 24 Feb 2024 19:14:26 -0800 Subject: [PATCH] New traits and operation functionality for bedtools map-like functionality. - Simplified `granges map` to just handle BED3 for left and BED5 right only. - New `Operation`, new operations command line interface, new `GRangesError`. - `map_data()` - Fixed BED5 having wrong field. - New partial code of BED6. - New `JoinDataOperations` trait, with implementations for combined join data types. --- src/commands.rs | 117 ++++++++---------------------- src/data/operations.rs | 78 ++++++++++---------- src/error.rs | 2 + src/granges.rs | 159 ++++++++++++++++++++++++++++------------- src/io/parsers.rs | 48 +++++++++++-- src/join.rs | 78 +++++++++++++++++--- src/main/mod.rs | 8 ++- src/traits.rs | 18 ++++- 8 files changed, 318 insertions(+), 190 deletions(-) diff --git a/src/commands.rs b/src/commands.rs index e216546..84346fb 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -4,7 +4,7 @@ use std::path::PathBuf; use crate::{ data::operations::Operation, - io::{parsers::GenomicRangesParser, OutputStream}, + io::{parsers::{GenomicRangesParser, Bed5Iterator}, OutputStream}, prelude::*, ranges::{operations::adjust_range, GenomicRangeEmptyRecord, GenomicRangeRecord}, reporting::{CommandOutput, Report}, @@ -398,6 +398,9 @@ pub fn granges_flank( Ok(CommandOutput::new((), report)) } + +/// # Developer Notes +/// This function is a great way to see GRange's methods in action. pub fn granges_map( seqlens: impl Into, left_path: &PathBuf, @@ -410,100 +413,38 @@ pub fn granges_map( let seqnames: Vec = genome.keys().cloned().collect(); let mut report = Report::new(); - let left_iter = GenomicRangesFile::parsing_iterator(left_path)?; - let right_iter = GenomicRangesFile::parsing_iterator(right_path)?; + let left_iter = Bed3Iterator::new(left_path)?; + let right_iter = Bed5Iterator::new(right_path)?; - match (left_iter, right_iter) { - (GenomicRangesParser::Bed5(left), GenomicRangesParser::Bed5(right)) => { - let left_gr; - let right_gr; + let left_gr; + let right_gr; - if skip_missing { - left_gr = GRanges::from_iter(left.retain_seqnames(&seqnames), &genome)?; - right_gr = GRanges::from_iter(right.retain_seqnames(&seqnames), &genome)?; - } else { - left_gr = GRanges::from_iter(left, &genome)?; - right_gr = GRanges::from_iter(right, &genome)?; - } - - let right_gr = right_gr.into_coitrees()?; - - let left_join = left_gr.left_overlaps(&right_gr)?; - - // TODO -- map function - // left_join.to_tsv(output)?; - - Ok(CommandOutput::new((), report)) - } - (GenomicRangesParser::Bed3(left), GenomicRangesParser::Bedlike(right)) => { - todo!(); - let left_gr; - let right_gr; - - if skip_missing { - left_gr = GRangesEmpty::from_iter(left.retain_seqnames(&seqnames), &genome)?; - right_gr = GRanges::from_iter( - right.try_unwrap_data().retain_seqnames(&seqnames), - &genome, - )?; - } else { - left_gr = GRangesEmpty::from_iter(left, &genome)?; - right_gr = GRanges::from_iter(right.try_unwrap_data(), &genome)?; - } - - let right_gr = right_gr.into_coitrees()?; - - let intersection = left_gr.filter_overlaps(&right_gr)?; - intersection.to_tsv(output)?; - - Ok(CommandOutput::new((), report)) - } - (GenomicRangesParser::Bedlike(left), GenomicRangesParser::Bed3(right)) => { - let left_gr; - let right_gr; - - if skip_missing { - left_gr = - GRanges::from_iter(left.try_unwrap_data().retain_seqnames(&seqnames), &genome)?; - right_gr = GRangesEmpty::from_iter(right.retain_seqnames(&seqnames), &genome)?; - } else { - left_gr = GRanges::from_iter(left.try_unwrap_data(), &genome)?; - right_gr = GRangesEmpty::from_iter(right, &genome)?; - } - - let right_gr = right_gr.into_coitrees()?; + if skip_missing { + left_gr = GRangesEmpty::from_iter(left_iter.retain_seqnames(&seqnames), &genome)?; + right_gr = GRanges::from_iter(right_iter.retain_seqnames(&seqnames), &genome)?; + } else { + left_gr = GRangesEmpty::from_iter(left_iter, &genome)?; + right_gr = GRanges::from_iter(right_iter, &genome)?; + } - let intersection = left_gr.filter_overlaps(&right_gr)?; - intersection.to_tsv(output)?; + let right_gr = right_gr.into_coitrees()? + .map_data(|bed5_cols| { + // extract out just the score + bed5_cols.score + })?; - Ok(CommandOutput::new((), report)) - } - (GenomicRangesParser::Bedlike(left), GenomicRangesParser::Bedlike(right)) => { - todo!(); - let left_gr; - let right_gr; + let left_join = left_gr.left_overlaps(&right_gr)?; - if skip_missing { - left_gr = - GRanges::from_iter(left.try_unwrap_data().retain_seqnames(&seqnames), &genome)?; - right_gr = GRanges::from_iter( - right.try_unwrap_data().retain_seqnames(&seqnames), - &genome, - )?; - } else { - left_gr = GRanges::from_iter(left.try_unwrap_data(), &genome)?; - right_gr = GRanges::from_iter(right.try_unwrap_data(), &genome)?; - } + let new_column = left_join.map_over_joins(|join_data| { + join_data. + operations.iter().map(|operation| operation.run(data)).collect() + })?; - let right_gr = right_gr.into_coitrees()?; - let intersection = left_gr.filter_overlaps(&right_gr)?; - intersection.to_tsv(output)?; + // TODO -- map function + // left_join.to_tsv(output)?; - Ok(CommandOutput::new((), report)) - } - _ => Err(GRangesError::UnsupportedGenomicRangesFileFormat), - } + Ok(CommandOutput::new((), report)) } /// Generate a random BED-like file with genomic ranges. @@ -512,7 +453,7 @@ pub fn granges_random_bed( num: usize, output: Option>, sort: bool, -) -> Result, GRangesError> { + ) -> Result, GRangesError> { // get the genome info let genome = read_seqlens(seqlens)?; diff --git a/src/data/operations.rs b/src/data/operations.rs index 406be3a..7677b33 100644 --- a/src/data/operations.rs +++ b/src/data/operations.rs @@ -4,6 +4,8 @@ use num_traits::{Float, ToPrimitive}; use std::iter::Sum; +use crate::error::GRangesError; + /// Calculate the median. /// /// This will clone and turn `numbers` into a `Vec`. @@ -18,6 +20,15 @@ pub fn median(numbers: &[F]) -> F { } } +/// A subset of types that represent operation results types. +pub enum OperationResult +where +T: Float, +{ + Float(T), + String(String), +} + /// The (subset of) standard `bedtools map` operations. pub enum Operation { Sum, @@ -28,42 +39,37 @@ pub enum Operation { Collapse, } -pub enum OperationResult -where - T: Float, -{ - Float(T), - String(String), -} - -pub fn float_compute(operation: Operation, data: &[T]) -> Option> -where - T: Float + Sum + ToPrimitive + Ord + Clone + ToString, -{ - match operation { - Operation::Sum => { - let sum: T = data.iter().cloned().sum(); - Some(OperationResult::Float(sum)) - } - Operation::Min => data.iter().cloned().min().map(OperationResult::Float), - Operation::Max => data.iter().cloned().max().map(OperationResult::Float), - Operation::Mean => { - if data.is_empty() { - None - } else { - let sum: T = data.iter().cloned().sum(); - let mean = sum / T::from(data.len()).unwrap(); - Some(OperationResult::Float(mean)) +impl Operation { + /// Do a particular (summarizing) operation on some generic data. + pub fn run(&self, data: &[T]) -> Option> + where + T: Float + Sum + ToPrimitive + Ord + Clone + ToString, + { + match self { + Operation::Sum => { + let sum: T = data.iter().cloned().sum(); + Some(OperationResult::Float(sum)) + } + Operation::Min => data.iter().cloned().min().map(OperationResult::Float), + Operation::Max => data.iter().cloned().max().map(OperationResult::Float), + Operation::Mean => { + if data.is_empty() { + None + } else { + let sum: T = data.iter().cloned().sum(); + let mean = sum / T::from(data.len()).unwrap(); + Some(OperationResult::Float(mean)) + } + } + Operation::Median => Some(OperationResult::Float(median(data))), + Operation::Collapse => { + let collapsed = data + .iter() + .map(|num| num.to_string()) + .collect::>() + .join(", "); + Some(OperationResult::String(collapsed)) + } } } - Operation::Median => Some(OperationResult::Float(median(data))), - Operation::Collapse => { - let collapsed = data - .iter() - .map(|num| num.to_string()) - .collect::>() - .join(", "); - Some(OperationResult::String(collapsed)) - } - } } diff --git a/src/error.rs b/src/error.rs index 65f7fc3..4224ba6 100644 --- a/src/error.rs +++ b/src/error.rs @@ -64,4 +64,6 @@ pub enum GRangesError { UnsupportedGenomicRangesFileFormat, #[error("Command line argument error: {0}")] ArgumentError(#[from] clap::error::Error), + #[error("No such operation: {0}")] + NoSuchOperation, } diff --git a/src/granges.rs b/src/granges.rs index 6cec984..3549d72 100644 --- a/src/granges.rs +++ b/src/granges.rs @@ -211,6 +211,8 @@ where } /// Take the data out of this [`GRanges`] object. + // NODE/TODO?: this is used a lot -- using .expect() + // reduce Results a lot. pub fn take_data(&mut self) -> Result { std::mem::take(&mut self.data).ok_or(GRangesError::NoDataContainer) } @@ -515,6 +517,23 @@ impl GRanges> { } } + +impl GRanges> +where C: RangeContainer { + /// Consume this [`GRanges>`] object, applying `func` to all elements + /// in [`Vec`], to return a new [`GRanges>`]. + /// + pub fn map_data(mut self, func: F) -> Result>, GRangesError> + where F: Fn(U) -> V { + let left_data = self.take_data()?; + let transformed_data = left_data.into_iter().map(func).collect(); + Ok(GRanges { + ranges: self.ranges, + data: Some(transformed_data), + }) + } +} + impl<'a, DL, DR> GRanges> { /// Push a genomic range with its data to the range and data containers in a [`GRanges] object. /// @@ -590,10 +609,10 @@ where /// Conduct a left overlap join, consuming self and returning a new /// [`GRanges`]. /// - /// The [`JoinData`] container contains references to both left and right - /// data containers and a [`Vec`]. Each [`LeftGroupedJoin`] represents - /// a summary of an overlap, which downstream operations use to calculate - /// statistics using the information about overlaps. + /// The [`JoinData`] container contains the owned left data container and has + /// a reference to the right data container, as as well as a [`Vec, @@ -630,12 +649,12 @@ where type Output = GRanges, JoinDataRightEmpty
>; /// Conduct a left overlap join, consuming self and returning a new - /// [`GRanges`]. + /// [`GRanges`]. /// - /// The [`JoinData`] container contains references to both left and right - /// data containers and a [`Vec`]. Each [`LeftGroupedJoin`] represents - /// a summary of an overlap, which downstream operations use to calculate - /// statistics using the information about overlaps. + /// The [`JoinData`] container contains the left data container and has + /// a reference to the right data container, as as well as a [`Vec, @@ -681,6 +700,13 @@ where { type Output = GRanges, JoinDataLeftEmpty<'a, DR>>; + /// Conduct a left overlap join, consuming self and returning a new + /// [`GRanges`]. + /// + /// The [`JoinDataLeftEmpty`] contains no left data, and a reference to the + /// right data container, as as well as a [`Vec, @@ -709,6 +735,14 @@ where impl<'a> LeftOverlaps<'a, GRangesEmpty> for GRangesEmpty { type Output = GRanges, JoinDataBothEmpty>; + /// Conduct a left overlap join, consuming self and returning a new + /// [`GRanges`]. + /// + /// The [`JoinDataBothEmpty`] contains no data, since neither left of right + /// [`GRanges`] objects had data. However, it does contain a [`Vec, @@ -734,17 +768,22 @@ where { /// Apply a function over the [`JoinData`] inside this [`GRanges`]. /// - /// This is a workhorse method that is used to summarize genomic overlaps. The + /// This is a powerful method that is used to summarize genomic overlaps. The /// user-specified function, `func` is applied to each "join" item in this [`GRanges`] /// object's data container (which is a [`JoinData`] storing the join information). /// This supplied `func` function returns some generic type `V` per join, which could be /// e.g. a median `f64` value, a `String` of all overlap right ranges' values concatenated, /// etc. /// + /// # Arugments + /// * `func`: a function that takes a [`CombinedJoinData`] (which contains + /// the associated data for the left range and overlapping right ranges) + /// and summarizes it into a new type `V`. + /// /// See [`CombinedJoinData`] and its convenience methods, which are designed /// to help downstream statistical calculations that could use the number of overlapping /// basepairs, overlapping fraction, etc. - pub fn apply_over_joins( + pub fn map_over_joins( mut self, func: F, ) -> Result>, GRangesError> @@ -757,7 +796,7 @@ where ) -> V, { let data = self.take_data()?; - let transformed_data: Vec = data.apply(func); + let transformed_data: Vec = data.map(func); let ranges = self.ranges; Ok(GRanges { ranges, @@ -766,13 +805,26 @@ where } } -/// Applies a function over joins for [`GRanges`] with both left and right data containers empty. -/// -/// Since both data containers are empty, this function effectively acts as a no-op, -/// directly returning a [`GRanges`] object with an empty vector, as there are no joins -/// to apply the function to. impl GRanges { - pub fn apply_over_joins( + /// Apply a function over the [`JoinData`] inside this [`GRanges`]. + /// + /// This is a powerful method that is used to summarize genomic overlaps. The + /// user-specified function, `func` is applied to each "join" item in this [`GRanges`] + /// object's data container (which is a [`JoinData`] storing the join information). + /// This supplied `func` function returns some generic type `V` per join, which could be + /// e.g. a median `f64` value, a `String` of all overlap right ranges' values concatenated, + /// etc. + /// + /// # Arugments + /// * `func`: a function that takes a [`CombinedJoinDataLeftEmpty`] (which contains + /// the associated data for the left range and overlapping right ranges) + /// and summarizes it into a new type `V`. + /// + /// See [`CombinedJoinDataLeftEmpty`] and its convenience methods, which are designed + /// to help downstream statistical calculations that could use the number of overlapping + /// basepairs, overlapping fraction, etc. + + pub fn map_over_joins( mut self, func: F, ) -> Result>, GRangesError> @@ -780,7 +832,7 @@ impl GRanges { F: Fn(CombinedJoinDataBothEmpty) -> V, { let data = self.take_data()?; - let transformed_data: Vec = data.apply(func); + let transformed_data: Vec = data.map(func); let ranges = self.ranges; Ok(GRanges { ranges, @@ -789,25 +841,28 @@ impl GRanges { } } -/// Applies a user-defined function over each join in the [`GRanges`], where the -/// right data container of the join was empty. -/// -/// This method is tailored for scenarios where there is meaningful data on the left to process, -/// but the right side is empty. The function provided is applied to each item from the left data container. -/// -/// # Parameters -/// -/// * `func` - A function to apply to each [`Com -/// -/// # Returns -/// -/// A new [`GRanges`] object containing the results of applying `func` to each left data item. -/// impl<'a, DL: Clone + 'a> GRanges> where DL: IndexedDataContainer, { - pub fn apply_over_joins( + /// Apply a function over the [`JoinData`] inside this [`GRanges`]. + /// + /// This is a powerful method that is used to summarize genomic overlaps. The + /// user-specified function, `func` is applied to each "join" item in this [`GRanges`] + /// object's data container (which is a [`JoinData`] storing the join information). + /// This supplied `func` function returns some generic type `V` per join, which could be + /// e.g. a median `f64` value, a `String` of all overlap right ranges' values concatenated, + /// etc. + /// + /// # Arugments + /// * `func`: a function that takes a [`CombinedJoinDataRightEmpty`] (which contains + /// the associated data for the left range and overlapping right ranges) + /// and summarizes it into a new type `V`. + /// + /// See [`CombinedJoinDataRightEmpty`] and its convenience methods, which are designed + /// to help downstream statistical calculations that could use the number of overlapping + /// basepairs, overlapping fraction, etc. + pub fn map_over_joins( mut self, func: F, ) -> Result>, GRangesError> @@ -815,7 +870,7 @@ where F: Fn(CombinedJoinDataRightEmpty<
::OwnedItem>) -> V, { let data = self.take_data()?; - let transformed_data: Vec = data.apply(func); + let transformed_data: Vec = data.map(func); let ranges = self.ranges; Ok(GRanges { ranges, @@ -824,23 +879,29 @@ where } } -/// Applies a user-defined function over each join in the GRanges, where the left data container is empty. -/// -/// Tailored for cases with meaningful data on the right and empty on the left. The provided -/// function is applied to each item from the right data container. -/// -/// # Parameters -/// -/// * `func` - A function to apply to each right data item. -/// -/// # Returns -/// -/// A new `GRanges` object containing the results of applying `func` to each right data item. impl<'a, DR: Clone + 'a> GRanges> where DR: IndexedDataContainer, { - pub fn apply_over_joins( + /// Apply a function over the [`JoinData`] inside this [`GRanges`]. + /// + /// This is a powerful method that is used to summarize genomic overlaps. The + /// user-specified function, `func` is applied to each "join" item in this [`GRanges`] + /// object's data container (which is a [`JoinData`] storing the join information). + /// This supplied `func` function returns some generic type `V` per join, which could be + /// e.g. a median `f64` value, a `String` of all overlap right ranges' values concatenated, + /// etc. + /// + /// # Arugments + /// * `func`: a function that takes a [`CombinedJoinDataLeftEmpty`] (which contains + /// the associated data for the left range and overlapping right ranges) + /// and summarizes it into a new type `V`. + /// + /// See [`CombinedJoinDataLeftEmpty`] and its convenience methods, which are designed + /// to help downstream statistical calculations that could use the number of overlapping + /// basepairs, overlapping fraction, etc. + + pub fn map_over_joins( mut self, func: F, ) -> Result>, GRangesError> @@ -848,7 +909,7 @@ where F: Fn(CombinedJoinDataLeftEmpty<::OwnedItem>) -> V, { let data = self.take_data()?; - let transformed_data: Vec = data.apply(func); + let transformed_data: Vec = data.map(func); let ranges = self.ranges; Ok(GRanges { ranges, diff --git a/src/io/parsers.rs b/src/io/parsers.rs index 263d11b..24cfd8f 100644 --- a/src/io/parsers.rs +++ b/src/io/parsers.rs @@ -417,19 +417,28 @@ impl TsvSerialize for Option { /// The additional two BED5 columns. #[derive(Clone, Debug)] pub struct Bed5Addition { - name: String, - strand: Option, + pub name: String, + pub score: f64, +} + +/// The additional three BED6 columns. +// TODO: not connectted yet +#[derive(Clone, Debug)] +pub struct Bed6Addition { + pub name: String, + pub score: f64, + pub strand: Option, } impl TsvSerialize for &Bed5Addition { fn to_tsv(&self) -> String { - format!("{}\t{}", self.name, self.strand.to_tsv()) + format!("{}\t{}", self.name, self.score) } } impl TsvSerialize for Bed5Addition { fn to_tsv(&self) -> String { - format!("{}\t{}", self.name, self.strand.to_tsv()) + format!("{}\t{}", self.name, self.score) } } @@ -891,9 +900,9 @@ pub fn parse_bed5(line: &str) -> Result, GRange let end: Position = parse_column(columns[2], line)?; let name = parse_column(columns[3], line)?; - let strand: Option = parse_strand(parse_column(columns[3], line)?)?; + let score: f64 = parse_column(columns[3], line)?; - let data = Bed5Addition { name, strand }; + let data = Bed5Addition { name, score }; Ok(GenomicRangeRecord { seqname, @@ -903,6 +912,33 @@ pub fn parse_bed5(line: &str) -> Result, GRange }) } +// TODO +///// Parses a BED6 format line into the three columns defining the range, and additional +///// columns +///// +//pub fn parse_bed6(line: &str) -> Result, GRangesError> { +// let columns: Vec<&str> = line.splitn(4, '\t').collect(); +// if columns.len() < 3 { +// return Err(GRangesError::BedlikeTooFewColumns(line.to_string())); +// } +// +// let seqname = parse_column(columns[0], line)?; +// let start: Position = parse_column(columns[1], line)?; +// let end: Position = parse_column(columns[2], line)?; +// +// let name = parse_column(columns[3], line)?; +// // let strand: Option = parse_strand(parse_column(columns[3], line)?)?; +// +// let data = Bed5Addition { name, score }; +// +// Ok(GenomicRangeRecord { +// seqname, +// start, +// end, +// data, +// }) +//} + #[cfg(test)] mod tests { use crate::io::{ diff --git a/src/join.rs b/src/join.rs index e431f9f..348814d 100644 --- a/src/join.rs +++ b/src/join.rs @@ -3,7 +3,7 @@ #![allow(clippy::all)] use crate::{ - traits::{GenericRange, IndexedDataContainer}, + traits::{GenericRange, IndexedDataContainer, JoinDataOperations}, Position, }; @@ -121,9 +121,23 @@ impl<'a, DL, DR> JoinData<'a, DL, DR> { } pub struct CombinedJoinData { - pub join: LeftGroupedJoin, // Information on the join - pub left_data: DL, // The left data element - pub right_data: Vec, // The right data elements + pub join: LeftGroupedJoin, // Information on the join + pub left_data: DL, // The left data element + pub right_data: Vec, // The right data elements +} + +impl JoinDataOperations for CombinedJoinData { + type LeftDataElementType = DL; + type RightDataElementType = DR; + fn join(&self) -> LeftGroupedJoin { + self.join + } + fn left_data(&self) -> Option { + Some(self.left_data) + } + fn right_data(&self) -> Option> { + Some(self.right_data) + } } impl<'a, DL, DR> JoinData<'a, DL, DR> @@ -133,7 +147,7 @@ where { /// Apply `func` to each element, putting the results into the returned /// `Vec`. - pub fn apply(&self, func: F) -> Vec + pub fn map(&self, func: F) -> Vec where F: Fn( CombinedJoinData< @@ -202,13 +216,28 @@ pub struct CombinedJoinDataLeftEmpty { pub right_data: Vec, // The right data element } +impl JoinDataOperations<(), DR> for CombinedJoinDataLeftEmpty { + type LeftDataElementType = (); + type RightDataElementType = DR; + fn join(&self) -> LeftGroupedJoin { + self.join + } + fn left_data(&self) -> Option { + None + } + fn right_data(&self) -> Option> { + Some(self.right_data) + } +} + + impl<'a, DR> JoinDataLeftEmpty<'a, DR> where DR: IndexedDataContainer + 'a, { /// Apply `func` to each element, putting the results into the returned /// `Vec`. - pub fn apply(&self, func: F) -> Vec + pub fn map(&self, func: F) -> Vec where F: Fn(CombinedJoinDataLeftEmpty<::OwnedItem>) -> V, { @@ -270,13 +299,30 @@ pub struct CombinedJoinDataRightEmpty
{ pub left_data: DL, // The right data element } + +impl
JoinDataOperations for CombinedJoinDataRightEmpty
{ + type LeftDataElementType = DL; + type RightDataElementType = (); + fn join(&self) -> LeftGroupedJoin { + self.join + } + fn left_data(&self) -> Option { + Some(self.left_data) + } + fn right_data(&self) -> Option> { + None + } +} + + + impl<'a, DL> JoinDataRightEmpty
where DL: IndexedDataContainer, { /// Apply `func` to each element, putting the results into the returned /// `Vec`. - pub fn apply(&self, func: F) -> Vec + pub fn map(&self, func: F) -> Vec where F: Fn(CombinedJoinDataRightEmpty<
::OwnedItem>) -> V, { @@ -331,10 +377,26 @@ pub struct CombinedJoinDataBothEmpty { pub join: LeftGroupedJoin, } + +impl JoinDataOperations<(), ()> for CombinedJoinDataBothEmpty { + type LeftDataElementType = (); + type RightDataElementType = (); + fn join(&self) -> LeftGroupedJoin { + self.join + } + fn left_data(&self) -> Option { + None + } + fn right_data(&self) -> Option> { + None + } +} + + impl JoinDataBothEmpty { /// Apply `func` to each element, putting the results into the returned /// `Vec`. - pub fn apply(&self, func: F) -> Vec + pub fn map(&self, func: F) -> Vec where F: Fn(CombinedJoinDataBothEmpty) -> V, { diff --git a/src/main/mod.rs b/src/main/mod.rs index e33d9d0..2048bf1 100644 --- a/src/main/mod.rs +++ b/src/main/mod.rs @@ -4,7 +4,7 @@ use clap::{Parser, Subcommand}; use granges::{ commands::{granges_adjust, granges_filter, granges_flank, ProcessingMode}, prelude::GRangesError, - Position, PositionOffset, + Position, PositionOffset, data::operations::{match_operation, Operation}, }; #[cfg(feature = "dev-commands")] @@ -128,6 +128,10 @@ enum Commands { #[arg(short, long, required = true)] right: PathBuf, + /// Operation + #[clap(short, long, value_parser = clap::value_parser!(Operation), multiple_occurrences(true))] + operation: Vec, + /// An optional output file (standard output will be used if not specified) #[arg(short, long)] output: Option, @@ -223,11 +227,11 @@ fn run() -> Result<(), GRangesError> { genome, left, right, + operation, output, skip_missing, in_mem, }) => { - unimplemented!() } #[cfg(feature = "dev-commands")] diff --git a/src/traits.rs b/src/traits.rs index 924026b..a3bc4fa 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -10,7 +10,7 @@ use crate::{ granges::GRanges, io::parsers::{FilteredRanges, UnwrappedRanges}, ranges::GenomicRangeRecord, - Position, + Position, join::LeftGroupedJoin, }; /// Traits for [`GRanges`] types that can be modified. @@ -78,6 +78,22 @@ pub trait GenericRange: Clone { } } +/// The [`JoinDataOperations`] trait unifies common operations +/// over combined join data types ([`CombinedJoinData`], +/// CombinedJoinDataBothEmpty`], etc). +/// +/// +/// [`CombinedJoinData`] crate::granges::join::CombinedJoinData +/// [`CombinedJoinDataBothEmpty`] crate::granges::join::CombinedJoinDataBothEmpty +pub trait JoinDataOperations { + type LeftDataElementType; + type RightDataElementType; + + fn join(&self) -> LeftGroupedJoin; + fn left_data(&self) -> Option; + fn right_data(&self) -> Option>; +} + /// The [`GenericRangeOperations`] trait extends additional functionality to [`GenericRange`], /// such as the creation of flanking regions. pub trait GenericRangeOperations: GenericRange {