Skip to content

Commit

Permalink
Basic iterator trait, range pushing methods, to coitrees, and more!
Browse files Browse the repository at this point in the history
 - tests and test utilies
 - rough join stuff
 - interval/range conversion methods
 - new GRanges methods refined
 - iteration methods
 - read_seqlens
 - PathBuf-based arguments in io
 - lazy BED parsing
 - invalid BED tests
 - adjust range operation with tests
 - clippy & fmt
 - GitHub Rust workflow added
  • Loading branch information
vsbuffalo committed Feb 15, 2024
1 parent 8de9b47 commit 548fbd0
Show file tree
Hide file tree
Showing 25 changed files with 1,529 additions and 139 deletions.
22 changes: 22 additions & 0 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: Rust

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

env:
CARGO_TERM_COLOR: always

jobs:
build:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Build
run: cargo build --verbose
- name: Run tests
run: cargo test --verbose
28 changes: 24 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,33 @@
[package]
name = "granges2"
name = "granges"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
license = "MIT"
authors = ["Vince Buffalo <[email protected]>"]
keywords = ["genomics", "bioinformatics", "compbio"]
categories = ["science"]
documentation = "https://docs.rs/granges/"
repository = "https://github.com/vsbuffalo/granges"
description = "A Rust library and command line tool for genomic range operations."

[dependencies]
# clap = { version = "4.4.18", features = ["derive"], optional = true }
clap = { version = "4.4.18", features = ["derive"] }
coitrees = { version = "0.4.0", features = ["nosimd"] }
genomap = "0.1.5"
flate2 = "1.0.28"
genomap = "0.2.6"
indexmap = "2.2.3"
ndarray = "0.15.6"
noodles = { version = "0.63.0", features = ["core", "bed"] }
rand = "0.8.5"
thiserror = "1.0.57"

# [features]
# cli = [ "clap" ]

[[bin]]
name = "granges"
path = "src/main/mod.rs"
# required-features = ["cli"]


3 changes: 3 additions & 0 deletions src/data/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
//! Data container implementations.

pub mod vec;
62 changes: 62 additions & 0 deletions src/data/ndarray.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
//! Data container implementations for [`ndarray::Array1`] and [`ndarray::Array2`].

use ndarray::{Array1, Array2, ArrayView1};
use crate::traits::IndexedDataContainer;

impl<'a, U> IndexedDataContainer<'a> for Array1<U>
where
U: Copy + Default + 'a,
{
type Item = U;
type Output = Array1<U>;

fn get_value(&'a self, index: usize) -> Self::Item {
self[index]
}

fn len(&self) -> usize {
self.len()
}

fn is_valid_index(&self, index: usize) -> bool {
index < self.shape()[0]
}

fn new_from_indices(&self, indices: &[usize]) -> Self::Output {
Array1::from_iter(indices.iter().map(|&idx| self.get_value(idx)))
}
}

impl<'a, U> IndexedDataContainer<'a> for Array2<U>
where
U: Copy + Default + 'a,
{
type Item = ArrayView1<'a, U>;
type Output = Array2<U>;

fn get_value(&'a self, index: usize) -> Self::Item {
self.row(index)
}

fn len(&self) -> usize {
self.shape()[0]
}

fn is_valid_index(&self, index: usize) -> bool {
index < self.shape()[0]
}

fn new_from_indices(&self, indices: &[usize]) -> Self::Output {
let cols = self.shape()[1];

let rows_data: Vec<U> = indices
.iter()
.flat_map(|&idx| self.row(idx).iter().cloned().collect::<Vec<_>>())
.collect();

// create a new Array2<U> from the rows
// shape is (number of indices, number of columns)
Array2::from_shape_vec((indices.len(), cols), rows_data)
.expect("Shape and collected data size mismatch")
}
}
33 changes: 33 additions & 0 deletions src/data/vec.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
//! Data container implementations for [`Vec<U>`].


/// Trait methods for the commonly-used `Vec<U>` data container.
///
/// Note that the associated `Item` type is always a *reference* to the data elements.
impl<'a, U> IndexedDataContainer<'a> for Vec<U>
where
U: Clone + 'a,
{
type Item = &'a U;
type Output = Vec<U>;

fn get_value(&'a self, index: usize) -> Self::Item {
self.get(index).unwrap()
}

fn len(&self) -> usize {
self.len()
}

fn is_valid_index(&self, index: usize) -> bool {
self.get(index).is_some()
}

fn new_from_indices(&self, indices: &[usize]) -> Self::Output {
Vec::from_iter(indices.iter().map(|&idx| (*self.get_value(idx)).clone()))
}
}




24 changes: 22 additions & 2 deletions src/error.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,33 @@
use std::num::{ParseIntError, ParseFloatError};

use genomap::GenomeMapError;
use thiserror::Error;

use crate::Position;

#[derive(Debug, Error)]
pub enum GRangesError {
// IO related errors
#[error("File reading eror: {0}")]
IOError(#[from] std::io::Error),

// File parsing related errors
#[error("Integer parsing error: {0}")]
ParseIntError(#[from] ParseIntError),
#[error("Float parsing error: {0}")]
ParseFloatError(#[from] ParseFloatError),
#[error("Bed-like file has too few columns. The first three columns must be sequence name, and start and end positions.\nLine: {0}")]
BedlikeTooFewColumns(String),
#[error("File has invalid column type entry: {0}")]
InvalidColumnType(String),

// Invalid genomic range errors
#[error("Range invalid: start ({0}) must be greater than end ({1})")]
InvalidGenomicRange(Position, Position),

#[error("Range [{0}, {1}] is invalid for sequence of length {2}")]
InvalidGenomicRangeForSequence(Position, Position, Position),

#[error("Sequence name '{0}' is not the ranges container")]
MissingSequence(String),
#[error("Error encountered in genomap::GenomeMap")]
GenomeMapError(#[from] GenomeMapError),
}
Loading

0 comments on commit 548fbd0

Please sign in to comment.