diff --git a/Cargo.toml b/Cargo.toml index 10e5a47..8298b68 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "granges" -version = "0.3.0" +version = "0.4.0" edition = "2021" license = "MIT" authors = ["Vince Buffalo "] diff --git a/src/sequences/nucleotide.rs b/src/sequences/nucleotide.rs index 7bcee34..0c70463 100644 --- a/src/sequences/nucleotide.rs +++ b/src/sequences/nucleotide.rs @@ -169,12 +169,12 @@ impl Sequences for NucleotideSequences { end: Position, ) -> Result where - F: Fn(Self::Slice<'_>) -> V, + F: Fn(Self::Slice<'_>, Position, Position) -> V, { let seq = self.get_sequence(seqname)?; let range = try_range(start, end, seq.len().try_into().unwrap())?; let data = &seq[range]; - Ok(func(data)) + Ok(func(data, start, end)) } /// Get the length of a particular sequence. @@ -303,11 +303,11 @@ impl Sequences for LazyNucleotideSequences { end: Position, ) -> Result where - F: for<'b> Fn(&'b [u8]) -> V, + F: for<'b> Fn(&'b [u8], Position, Position) -> V, { let seq = self.get_sequence(seqname)?; let range = try_range(start, end, seq.len().try_into().unwrap())?; - Ok(func(&seq[range])) + Ok(func(&seq[range], start, end)) } /// Get the length of a particular sequence. @@ -382,7 +382,7 @@ pub fn gc_content(seq: &[u8]) -> f64 { /// /// # Arguments /// * `seq` - a byte slice. -pub fn gc_content_strict(seq: &[u8]) -> f64 { +pub fn gc_content_strict(seq: &[u8], _: Position, _: Position) -> f64 { let gc_count = seq .iter() .filter(|&&base| matches!(base.to_ascii_uppercase(), b'G' | b'C')) @@ -402,7 +402,7 @@ pub fn gc_content_strict(seq: &[u8]) -> f64 { #[cfg(test)] mod tests { use super::{gc_content_strict, LazyNucleotideSequences, NucleotideSequences}; - use crate::{granges::GRangesEmpty, sequences::nucleotide::Nucleotides, traits::Sequences}; + use crate::{granges::GRangesEmpty, sequences::nucleotide::Nucleotides, traits::Sequences, Position}; #[test] fn test_nucleotide_sequences() { @@ -438,7 +438,7 @@ mod tests { // chromosome sequence length through the apply funcs let seq1_len = seqlens.get("chr1").unwrap(); - fn get_len(seq: &[u8]) -> usize { + fn get_len(seq: &[u8], _: Position, _: Position) -> usize { seq.len() } @@ -473,7 +473,7 @@ mod tests { // len([l for l in 'TTCACTACTATTAGTACTCACGGCGCAATA'[3:10] if l == 'C']) let total_Cs = reference .region_map( - &|seq| seq.iter().filter(|c| **c == b'C').count(), + &|seq, _, _| seq.iter().filter(|c| **c == b'C').count(), "chr1", 3, 10, @@ -486,7 +486,7 @@ mod tests { // len([l for l in 'TTCACTACTATTAGTACTCACGGCGCAATA'[3:10] if l == 'A']) let total_As = reference .region_map( - &|seq| seq.iter().filter(|c| **c == b'A').count(), + &|seq, _, _| seq.iter().filter(|c| **c == b'A').count(), "chr1", 3, 10, @@ -505,7 +505,7 @@ mod tests { let windows = GRangesEmpty::from_windows(&seqlens, 10, None, false).unwrap(); - let make_string = |seq: &[u8]| String::from_utf8(seq.to_vec()).unwrap(); + let make_string = |seq: &[u8], _, _| String::from_utf8(seq.to_vec()).unwrap(); // our test: get subsequences (as Strings), with *no step*, and reconstruct the // original sequence diff --git a/src/traits.rs b/src/traits.rs index 43379de..a4154b7 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -354,7 +354,8 @@ pub trait Sequences { /// Apply a function on a [`Sequences::Slice`] of a sequence. /// /// # Arguments - /// * `func` - a function that takes a `Self::Slice` and returns a [`Result`] + /// * `func` - a function that takes a `Self::Slice`, `start`, and `end` positions + /// and returns a [`Result`]. /// * `seqname` - sequence name. /// * `start` - a [`Position`] start position. /// * `end` - a [`Position`] *inclusive* end position. @@ -370,6 +371,11 @@ pub trait Sequences { /// /// to validate the range and to avoid panics. /// + /// Note that the `start` and `end` positions can often be ignored + /// by the function processing the `Slice`. However, this information + /// is useful when functions need to know the slice coordinates to + /// e.g. combine with other data in this region. + /// fn region_map( &self, func: &F, @@ -378,7 +384,7 @@ pub trait Sequences { end: Position, ) -> Result where - F: Fn(::Slice<'_>) -> V; + F: Fn(::Slice<'_>, Position, Position) -> V; fn seqlens(&self) -> Result, GRangesError> { let mut seqlens = IndexMap::new(); @@ -401,7 +407,7 @@ pub trait Sequences { where V: Clone, C: IterableRangeContainer + 'b, - F: Fn(::Slice<'_>) -> V, + F: Fn(::Slice<'_>, Position, Position) -> V, { let granges_ref = granges.as_granges_ref(); let seqlens = &granges_ref.seqlens().clone();