Skip to content

Commit

Permalink
Removed unnecessary generic in RangeIterable, and made it an associat…
Browse files Browse the repository at this point in the history
…ed type -- greatly improves design

 - Removed the generic in the RangeIterable trait. This should have been an associated
    type, which greatly simplifies the logic and code.
 - Removed iter_ranges_empty(), which was possible with the above change.
 - Removed GRangesEmptyIterator, again possible by the change above.
 - Renamed RangeRecord to GenomicRangeRecord since that's more accurate (has seqname)
  • Loading branch information
vsbuffalo committed Feb 17, 2024
1 parent 3e5c09c commit f91a6b6
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 128 deletions.
31 changes: 9 additions & 22 deletions src/granges.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ use indexmap::IndexMap;

use crate::{
io::OutputFile,
iterators::{GRangesEmptyIterator, GRangesIterator},
iterators::GRangesIterator,
prelude::GRangesError,
ranges::{
coitrees::{COITrees, COITreesIndexed},
vec::{VecRanges, VecRangesEmpty, VecRangesIndexed},
RangeEmpty, RangeIndexed, RangeRecord,
GenomicRangeRecord, RangeEmpty, RangeIndexed,
},
traits::{GenericRange, IndexedDataContainer, RangeContainer, RangesIterable, TsvSerialize},
Position,
Expand Down Expand Up @@ -118,7 +118,7 @@ where

let seqnames = self.seqnames();
for range in self.iter_ranges() {
let record = range.to_record(&seqnames, self.data.as_ref().unwrap());
let record = range.to_record(&seqnames, self.data.as_ref());
writeln!(writer, "{}", record.to_tsv())?;
}
Ok(())
Expand Down Expand Up @@ -150,7 +150,7 @@ impl<U> GRanges<VecRangesIndexed, Vec<U>> {
seqlens: IndexMap<String, Position>,
) -> Result<GRanges<VecRangesIndexed, Vec<U>>, GRangesError>
where
I: Iterator<Item = Result<RangeRecord<U>, GRangesError>>,
I: Iterator<Item = Result<GenomicRangeRecord<U>, GRangesError>>,
{
let mut gr = GRanges::new_vec(&seqlens);
for possible_entry in iter {
Expand All @@ -167,7 +167,7 @@ impl GRanges<VecRangesEmpty, ()> {
seqlens: IndexMap<String, Position>,
) -> Result<GRanges<VecRangesEmpty, ()>, GRangesError>
where
I: Iterator<Item = Result<RangeRecord<()>, GRangesError>>,
I: Iterator<Item = Result<GenomicRangeRecord<()>, GRangesError>>,
{
let mut gr = GRanges::new_vec(&seqlens);
for possible_entry in iter {
Expand All @@ -180,7 +180,7 @@ impl GRanges<VecRangesEmpty, ()> {

impl<R> GRanges<R, ()>
where
R: RangeContainer + RangesIterable<RangeEmpty>,
R: RangeContainer + RangesIterable,
{
// TODO: candidate for a trait
pub fn to_bed3(&self, output: Option<impl Into<PathBuf>>) -> Result<(), GRangesError> {
Expand All @@ -191,8 +191,8 @@ where
let mut writer = output.writer()?;

let seqnames = self.seqnames();
for range in self.iter_ranges_empty() {
let record = range.to_record(&seqnames);
for range in self.iter_ranges() {
let record = range.to_record_empty::<()>(&seqnames);
writeln!(writer, "{}", record.to_tsv())?;
}
Ok(())
Expand All @@ -219,20 +219,7 @@ impl<T> GRanges<VecRangesIndexed, T> {

impl<R, T> GRanges<R, T>
where
R: RangesIterable<RangeEmpty>,
{
/// Create a new [`GRangesIterator`] to iterate through all
/// the ranges in this [`GRanges`] object. These ranges carry
/// no data index, unlike the method [`GRanges.iter_ranges()`]
/// available for range type for associated data containers.
pub fn iter_ranges_empty(&self) -> GRangesEmptyIterator<'_, R> {
GRangesEmptyIterator::new(&self.ranges)
}
}

impl<R, T> GRanges<R, T>
where
R: RangesIterable<RangeIndexed>,
R: RangesIterable,
{
/// Create a new [`GRangesIterator`] to iterate through all the ranges in this [`GRanges`] object.
pub fn iter_ranges(&self) -> GRangesIterator<'_, R> {
Expand Down
30 changes: 15 additions & 15 deletions src/io/parsers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use std::path::PathBuf;

use crate::error::GRangesError;
use crate::io::file::InputFile;
use crate::ranges::RangeRecord;
use crate::ranges::GenomicRangeRecord;
use crate::traits::GeneralRangeRecordIterator;
use crate::Position;

Expand All @@ -33,12 +33,12 @@ impl Bed3RecordIterator {
}

impl Iterator for Bed3RecordIterator {
type Item = Result<RangeRecord<()>, GRangesError>;
type Item = Result<GenomicRangeRecord<()>, GRangesError>;

fn next(&mut self) -> Option<Self::Item> {
self.bed_reader.records::<3>().next().map(|res| {
res.map_err(GRangesError::IOError)
.map(|record| RangeRecord {
.map(|record| GenomicRangeRecord {
seqname: record.reference_sequence_name().to_string(),
start: convert_noodles_position(record.start_position()),
end: convert_noodles_position(record.end_position()),
Expand Down Expand Up @@ -70,7 +70,7 @@ pub struct TsvRecordIterator<F, U> {

impl<F, U> TsvRecordIterator<F, U>
where
F: Fn(&str) -> Result<RangeRecord<U>, GRangesError>,
F: Fn(&str) -> Result<GenomicRangeRecord<U>, GRangesError>,
{
/// Create a new [`TsvRecordIterator`], which parses lines from the supplied
/// file path into [`RangeRecord<U>`] using the specified parsing function.
Expand All @@ -91,9 +91,9 @@ where

impl<F, U> Iterator for TsvRecordIterator<F, U>
where
F: Fn(&str) -> Result<RangeRecord<U>, GRangesError>,
F: Fn(&str) -> Result<GenomicRangeRecord<U>, GRangesError>,
{
type Item = Result<RangeRecord<U>, GRangesError>;
type Item = Result<GenomicRangeRecord<U>, GRangesError>;

fn next(&mut self) -> Option<Self::Item> {
let mut line = String::new();
Expand All @@ -112,12 +112,12 @@ where
/// columns, which are standard to all BED files.
#[allow(clippy::type_complexity)]
pub struct BedlikeIterator {
iter: TsvRecordIterator<fn(&str) -> Result<RangeRecord<String>, GRangesError>, String>,
iter: TsvRecordIterator<fn(&str) -> Result<GenomicRangeRecord<String>, GRangesError>, String>,
}

impl BedlikeIterator {
pub fn new(filepath: impl Into<PathBuf>) -> Result<Self, GRangesError> {
let parser: fn(&str) -> Result<RangeRecord<String>, GRangesError> = parse_bed_lazy;
let parser: fn(&str) -> Result<GenomicRangeRecord<String>, GRangesError> = parse_bed_lazy;

let iter = TsvRecordIterator::new(filepath, parser)?;
Ok(Self { iter })
Expand All @@ -128,7 +128,7 @@ impl BedlikeIterator {
}

impl Iterator for BedlikeIterator {
type Item = Result<RangeRecord<String>, GRangesError>;
type Item = Result<GenomicRangeRecord<String>, GRangesError>;

fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
Expand Down Expand Up @@ -164,7 +164,7 @@ impl Iterator for BedlikeIterator {
/// ```
pub struct FilteredIntervals<I, U>
where
I: Iterator<Item = Result<RangeRecord<U>, GRangesError>>,
I: Iterator<Item = Result<GenomicRangeRecord<U>, GRangesError>>,
{
inner: I,
retain_seqnames: Option<HashSet<String>>,
Expand All @@ -173,7 +173,7 @@ where

impl<I, U> FilteredIntervals<I, U>
where
I: Iterator<Item = Result<RangeRecord<U>, GRangesError>>,
I: Iterator<Item = Result<GenomicRangeRecord<U>, GRangesError>>,
{
pub fn new(
inner: I,
Expand All @@ -192,9 +192,9 @@ where

impl<I, U> Iterator for FilteredIntervals<I, U>
where
I: Iterator<Item = Result<RangeRecord<U>, GRangesError>>,
I: Iterator<Item = Result<GenomicRangeRecord<U>, GRangesError>>,
{
type Item = Result<RangeRecord<U>, GRangesError>;
type Item = Result<GenomicRangeRecord<U>, GRangesError>;

/// Get the next filtered entry, prioritizing exclude over retain.
fn next(&mut self) -> Option<Self::Item> {
Expand Down Expand Up @@ -282,7 +282,7 @@ pub fn parse_bedlike(line: &str) -> Result<(String, Position, Position, Vec<&str

/// Lazily parses a BED* format line into the first three columns defining the range,
/// storing the rest as a `String`.
pub fn parse_bed_lazy(line: &str) -> Result<RangeRecord<String>, GRangesError> {
pub fn parse_bed_lazy(line: &str) -> Result<GenomicRangeRecord<String>, GRangesError> {
let columns: Vec<&str> = line.splitn(4, '\t').collect();
if columns.len() < 3 {
return Err(GRangesError::BedlikeTooFewColumns(line.to_string()));
Expand All @@ -298,7 +298,7 @@ pub fn parse_bed_lazy(line: &str) -> Result<RangeRecord<String>, GRangesError> {
String::new()
};

Ok(RangeRecord {
Ok(GenomicRangeRecord {
seqname,
start,
end,
Expand Down
80 changes: 17 additions & 63 deletions src/iterators.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use genomap::GenomeMap;

use crate::{
ranges::{RangeEmpty, RangeEmptyRecord, RangeIndexed, RangeIndexedRecord},
traits::{RangeContainer, RangesIterable},
ranges::RangeIndexedRecord,
traits::{GenericRange, RangeContainer, RangesIterable},
};

/// An iterator yielding [`RangeIndexedRecord`], which store
Expand All @@ -11,15 +11,18 @@ use crate::{
/// # Developer Notes
/// Using indices, rather than references to data items and `&str` directly,
/// prevents lifetime complexity.
pub struct GRangesIterator<'a, R> {
pub struct GRangesIterator<'a, R>
where
R: RangesIterable,
{
ranges: &'a GenomeMap<R>,
current_seqname_index: usize,
current_range_iter: Box<dyn Iterator<Item = RangeIndexed> + 'a>,
current_range_iter: Box<dyn Iterator<Item = <R as RangesIterable>::RangeType> + 'a>,
}

impl<'a, R> GRangesIterator<'a, R>
where
R: RangesIterable<RangeIndexed>,
R: RangesIterable,
{
pub fn new(ranges: &'a GenomeMap<R>) -> Self {
let current_range_iter = ranges.get_by_index(0).unwrap().iter_ranges();
Expand All @@ -33,7 +36,8 @@ where

impl<'a, R> Iterator for GRangesIterator<'a, R>
where
R: RangeContainer + RangesIterable<RangeIndexed>,
R: RangeContainer + RangesIterable,
<R as RangesIterable>::RangeType: GenericRange,
{
type Item = RangeIndexedRecord;

Expand All @@ -42,62 +46,9 @@ where
if let Some(next_range) = self.current_range_iter.next() {
return Some(RangeIndexedRecord {
seqname_index: self.current_seqname_index,
start: next_range.start,
end: next_range.end,
index: next_range.index,
});
} else {
// try to load another sequence's set of ranges.
self.current_seqname_index += 1;
if self.current_seqname_index >= self.ranges.len() {
// we're out of range container iterators
return None;
}
self.current_range_iter = self
.ranges
.get_by_index(self.current_seqname_index)
.unwrap()
.iter_ranges();
}
}
}
}

/// An iterator over [`RangeEmptyRecord`], which store
/// indices to the sequence names (but carries no data index).
pub struct GRangesEmptyIterator<'a, R> {
ranges: &'a GenomeMap<R>,
current_seqname_index: usize,
current_range_iter: Box<dyn Iterator<Item = RangeEmpty> + 'a>,
}

impl<'a, R> GRangesEmptyIterator<'a, R>
where
R: RangesIterable<RangeEmpty>,
{
pub fn new(ranges: &'a GenomeMap<R>) -> Self {
let current_range_iter = ranges.get_by_index(0).unwrap().iter_ranges();
Self {
ranges,
current_seqname_index: 0,
current_range_iter,
}
}
}

impl<'a, R> Iterator for GRangesEmptyIterator<'a, R>
where
R: RangeContainer + RangesIterable<RangeEmpty>,
{
type Item = RangeEmptyRecord;

fn next(&mut self) -> Option<Self::Item> {
loop {
if let Some(next_range) = self.current_range_iter.next() {
return Some(RangeEmptyRecord {
seqname_index: self.current_seqname_index,
start: next_range.start,
end: next_range.end,
start: next_range.start(),
end: next_range.end(),
index: next_range.index(),
});
} else {
// try to load another sequence's set of ranges.
Expand Down Expand Up @@ -126,6 +77,9 @@ mod tests {
fn test_genomic_ranges_iterator() {
let gr = granges_test_case_01();
let mut iter = GRangesIterator::new(&gr.ranges);
assert_eq!(iter.next().unwrap(), RangeIndexedRecord::new(0, 0, 5, 0));
assert_eq!(
iter.next().unwrap(),
RangeIndexedRecord::new(0, 0, 5, Some(0))
);
}
}
6 changes: 4 additions & 2 deletions src/ranges/coitrees.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,15 +136,17 @@ impl From<Interval<&usize>> for RangeIndexed {
/// Their iterator does not consume, but we need an owned (or copyable,
/// as is case here) type to map out. Thus, we need this odd variant of
/// an iterator that doesn't return references and does not consume.
impl RangesIterable<RangeIndexed> for COITrees<usize> {
impl RangesIterable for COITrees<usize> {
type RangeType = RangeIndexed;
fn iter_ranges(&self) -> Box<dyn Iterator<Item = RangeIndexed> + '_> {
let iter = self.ranges.iter();
let converted_iter = iter.map(RangeIndexed::from);
Box::new(converted_iter)
}
}

impl RangesIterable<RangeEmpty> for COITrees<()> {
impl RangesIterable for COITrees<()> {
type RangeType = RangeEmpty;
fn iter_ranges(&self) -> Box<dyn Iterator<Item = RangeEmpty> + '_> {
let iter = self.ranges.iter();
let converted_iter = iter.map(RangeEmpty::from);
Expand Down
Loading

0 comments on commit f91a6b6

Please sign in to comment.