Skip to content

Commit

Permalink
Changed region_map and region_map_into_granges interface.
Browse files Browse the repository at this point in the history
 - Now, both of these functions will pass a region tuple of
   the `(seqname, start, end)` coordinates to allow users to
   access the coordinates of the present slice.

 - Major version bump since this changes the API.
  • Loading branch information
vsbuffalo committed May 10, 2024
1 parent 9182a90 commit 39217cd
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 21 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "granges"
version = "0.3.0"
version = "0.4.0"
edition = "2021"
license = "MIT"
authors = ["Vince Buffalo <[email protected]>"]
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,7 @@ pub type PositionOffset = i64;

/// The main exports of the GRanges library.
pub mod prelude {
pub use crate::{Position, PositionOffset};
pub use crate::error::GRangesError;
pub use crate::granges::{GRanges, GRangesEmpty};
pub use crate::io::file::read_seqlens;
Expand Down
20 changes: 10 additions & 10 deletions src/sequences/nucleotide.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,12 +169,12 @@ impl Sequences for NucleotideSequences {
end: Position,
) -> Result<V, GRangesError>
where
F: Fn(Self::Slice<'_>) -> V,
F: Fn(Self::Slice<'_>, (&str, Position, Position)) -> V,
{
let seq = self.get_sequence(seqname)?;
let range = try_range(start, end, seq.len().try_into().unwrap())?;
let data = &seq[range];
Ok(func(data))
Ok(func(data, (&seqname, start, end)))
}

/// Get the length of a particular sequence.
Expand Down Expand Up @@ -303,11 +303,11 @@ impl Sequences for LazyNucleotideSequences {
end: Position,
) -> Result<V, GRangesError>
where
F: for<'b> Fn(&'b [u8]) -> V,
F: for<'b> Fn(&'b [u8], (&str, Position, Position)) -> V,
{
let seq = self.get_sequence(seqname)?;
let range = try_range(start, end, seq.len().try_into().unwrap())?;
Ok(func(&seq[range]))
Ok(func(&seq[range], (seqname, start, end)))
}

/// Get the length of a particular sequence.
Expand Down Expand Up @@ -382,7 +382,7 @@ pub fn gc_content(seq: &[u8]) -> f64 {
///
/// # Arguments
/// * `seq` - a byte slice.
pub fn gc_content_strict(seq: &[u8]) -> f64 {
pub fn gc_content_strict(seq: &[u8], _: (&str, Position, Position)) -> f64 {
let gc_count = seq
.iter()
.filter(|&&base| matches!(base.to_ascii_uppercase(), b'G' | b'C'))
Expand All @@ -402,7 +402,7 @@ pub fn gc_content_strict(seq: &[u8]) -> f64 {
#[cfg(test)]
mod tests {
use super::{gc_content_strict, LazyNucleotideSequences, NucleotideSequences};
use crate::{granges::GRangesEmpty, sequences::nucleotide::Nucleotides, traits::Sequences};
use crate::{granges::GRangesEmpty, sequences::nucleotide::Nucleotides, traits::Sequences, Position};

#[test]
fn test_nucleotide_sequences() {
Expand Down Expand Up @@ -438,7 +438,7 @@ mod tests {
// chromosome sequence length through the apply funcs
let seq1_len = seqlens.get("chr1").unwrap();

fn get_len(seq: &[u8]) -> usize {
fn get_len(seq: &[u8], _: (&str, Position, Position)) -> usize {
seq.len()
}

Expand Down Expand Up @@ -473,7 +473,7 @@ mod tests {
// len([l for l in 'TTCACTACTATTAGTACTCACGGCGCAATA'[3:10] if l == 'C'])
let total_Cs = reference
.region_map(
&|seq| seq.iter().filter(|c| **c == b'C').count(),
&|seq, _| seq.iter().filter(|c| **c == b'C').count(),
"chr1",
3,
10,
Expand All @@ -486,7 +486,7 @@ mod tests {
// len([l for l in 'TTCACTACTATTAGTACTCACGGCGCAATA'[3:10] if l == 'A'])
let total_As = reference
.region_map(
&|seq| seq.iter().filter(|c| **c == b'A').count(),
&|seq, _| seq.iter().filter(|c| **c == b'A').count(),
"chr1",
3,
10,
Expand All @@ -505,7 +505,7 @@ mod tests {

let windows = GRangesEmpty::from_windows(&seqlens, 10, None, false).unwrap();

let make_string = |seq: &[u8]| String::from_utf8(seq.to_vec()).unwrap();
let make_string = |seq: &[u8], _: (&str, Position, Position)| String::from_utf8(seq.to_vec()).unwrap();

// our test: get subsequences (as Strings), with *no step*, and reconstruct the
// original sequence
Expand Down
15 changes: 8 additions & 7 deletions src/sequences/numeric.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,12 @@ where
end: Position,
) -> Result<V, GRangesError>
where
F: for<'b> Fn(Self::Slice<'b>) -> V,
F: for<'b> Fn(Self::Slice<'b>, (&str, Position, Position)) -> V,
{
let seq = self.get_sequence(seqname)?;
let range = try_range(start, end, seq.len().try_into().unwrap())?;
let view = seq.slice(s![range]);
Ok(func(view))
Ok(func(view, (seqname, start, end)))
}

fn get_sequence_length(&self, seqname: &str) -> Result<Position, GRangesError> {
Expand Down Expand Up @@ -204,6 +204,7 @@ where
/// # Examples
///
/// ```
/// use crate::granges::prelude::Position;
/// use crate::granges::traits::Sequences;
/// use granges::sequences::numeric::NumericSequences1;
/// use granges::test_utilities::random_array1_sequences;
Expand All @@ -213,7 +214,7 @@ where
/// let mut data = random_array1_sequences(100);
/// let numeric_seq = NumericSequences1::new(data);
/// let result = numeric_seq.region_map(
/// &|view: ArrayView1<'_, f64> | view.sum(),
/// &|view: ArrayView1<'_, f64>, _: (&str, Position, Position)| view.sum(),
/// "chr1",
/// 0,
/// 10
Expand All @@ -227,13 +228,13 @@ where
end: Position,
) -> Result<V, GRangesError>
where
F: for<'b> Fn(Self::Slice<'_>) -> V,
F: for<'b> Fn(Self::Slice<'_>, (&str, Position, Position)) -> V,
{
let seq = self.get_sequence(seqname)?;
let range = try_range(start, end, seq.len().try_into().unwrap())?;
let seq = self.get_sequence(seqname)?;
let view = seq.slice(s![range, ..]);
Ok(func(view))
Ok(func(view, (seqname, start, end)))
}

/// Retrieves the length of a specific sequence.
Expand Down Expand Up @@ -355,12 +356,12 @@ where
end: Position,
) -> Result<V, GRangesError>
where
F: for<'b> Fn(ArrayView2<'b, T>) -> V,
F: for<'b> Fn(ArrayView2<'b, T>, (&str, Position, Position)) -> V,
{
let seq = self.get_sequence(seqname)?;
let range = try_range(start, end, seq.len().try_into().unwrap())?;
let view = seq.slice(s![range, ..]);
let value = func(view);
let value = func(view, (seqname, start, end));
Ok(value)
}

Expand Down
12 changes: 9 additions & 3 deletions src/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,8 @@ pub trait Sequences {
/// Apply a function on a [`Sequences::Slice`] of a sequence.
///
/// # Arguments
/// * `func` - a function that takes a `Self::Slice` and returns a [`Result<V, GRangesError>`]
/// * `func` - a function that takes a `Self::Slice`, `start`, and `end` positions
/// and returns a [`Result<V, GRangesError>`].
/// * `seqname` - sequence name.
/// * `start` - a [`Position`] start position.
/// * `end` - a [`Position`] *inclusive* end position.
Expand All @@ -370,6 +371,11 @@ pub trait Sequences {
///
/// to validate the range and to avoid panics.
///
/// Note that the `start` and `end` positions can often be ignored
/// by the function processing the `Slice`. However, this information
/// is useful when functions need to know the slice coordinates to
/// e.g. combine with other data in this region.
///
fn region_map<V, F>(
&self,
func: &F,
Expand All @@ -378,7 +384,7 @@ pub trait Sequences {
end: Position,
) -> Result<V, GRangesError>
where
F: Fn(<Self as Sequences>::Slice<'_>) -> V;
F: Fn(<Self as Sequences>::Slice<'_>, (&str, Position, Position)) -> V;

fn seqlens(&self) -> Result<IndexMap<String, Position>, GRangesError> {
let mut seqlens = IndexMap::new();
Expand All @@ -401,7 +407,7 @@ pub trait Sequences {
where
V: Clone,
C: IterableRangeContainer + 'b,
F: Fn(<Self as Sequences>::Slice<'_>) -> V,
F: Fn(<Self as Sequences>::Slice<'_>, (&str, Position, Position)) -> V,
{
let granges_ref = granges.as_granges_ref();
let seqlens = &granges_ref.seqlens().clone();
Expand Down

0 comments on commit 39217cd

Please sign in to comment.