Skip to content

Commit

Permalink
fixed bug, added more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
vsbuffalo committed Feb 9, 2024
1 parent 8705d38 commit 2b0b7b4
Show file tree
Hide file tree
Showing 7 changed files with 333 additions and 71 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "recmap"
version = "0.1.1"
version = "0.2.1"
edition = "2021"
license = "MIT"
authors = ["Vince Buffalo <[email protected]>"]
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ This example can be run on the command line with:
cargo run --example calc_map_lengths -- --seqlens hg38_seqlens.tsv decode_2019_map.txt
```

One of the most common tasks when working with recombination maps is to
estimate the map position of arbitrary markers, which is usually done by linear
interpolation. `RecMap` provides an easy way to do this for one position
(`RecMap.interpolate_map_position()`) and for many positions, with
`RecMap.interpolate_map_positions()`:

```rust
use recmap::prelude::*;
let seqlens = read_seqlens("hg38_seqlens.tsv")
Expand Down
2 changes: 1 addition & 1 deletion examples/calc_map_lengths.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ struct Args {
fn main() -> Result<(), RecMapError> {
let args = Args::parse();
let seqlens = read_seqlens(&args.seqlens)?;
let rec_map = RecMap::from_hapmap(&args.hapmap, seqlens)?;
let rec_map = RecMap::from_hapmap(&args.hapmap, &seqlens)?;

for (name, rate_map) in rec_map.iter() {
println!("{}\t{}", name, rate_map.total_map_length().unwrap());
Expand Down
64 changes: 48 additions & 16 deletions src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ fn is_gzipped_file(file_path: &str) -> io::Result<bool> {
/// to be read through a common interface.
pub struct InputFile {
pub filepath: String,
pub comments: Option<Vec<String>>,
pub header: Option<String>,
pub skip_lines: usize,
}

impl InputFile {
Expand All @@ -46,6 +49,9 @@ impl InputFile {
pub fn new(filepath: &str) -> Self {
Self {
filepath: filepath.to_string(),
comments: None,
header: None,
skip_lines: 0,
}
}

Expand All @@ -70,23 +76,49 @@ impl InputFile {
Ok(BufReader::new(reader))
}

/// Checks if the first line of the file matches the expected header.
///
/// # Arguments
///
/// * `expect` - A string slice representing the expected header.
///
/// # Returns
///
/// A result containing a boolean, `true` if the header matches, `false` otherwise,
/// or a `FileError` on failure.
///
pub fn has_header(&self, expect: &str) -> Result<bool, FileError> {
/// Collects comment lines and/or a line at the start of the file.
pub fn collect_metadata(&mut self, comment: &str, header: &str) -> Result<bool, FileError> {
let mut buf_reader = self.reader()?;
let mut comments = Vec::new();
let mut line = String::new();

while buf_reader.read_line(&mut line)? > 0 {
if line.starts_with(comment) {
comments.push(line.trim_end().to_string());
self.skip_lines += 1;
} else {
if line.starts_with(header) {
self.header = Some(line.trim_end().to_string());
self.skip_lines += 1;
// We only handle one header line. If there are more, the
// file is *very* poorly formatted. So just let downstream
// parsing errors catch this. In the future, we could have a specialized
// error.
break;
}
// break on the first non-header/comment line
break;
}
line.clear();
}

self.comments = Some(comments);
Ok(self.skip_lines > 0)
}

/// Method to continue reading after skipping the comment and header lines.
pub fn continue_reading(&self) -> Result<BufReader<Box<dyn Read>>, FileError> {
let mut buf_reader = self.reader()?;
let mut first_line = String::new();
buf_reader.read_line(&mut first_line)?;
let has_header = first_line.starts_with(expect);
Ok(has_header)
let mut skipped_lines = 0;
let mut line = String::new();

// skip the lines that were previously read as comments or header
while skipped_lines < self.skip_lines {
buf_reader.read_line(&mut line)?;
skipped_lines += 1;
line.clear();
}
Ok(buf_reader)
}
}

Expand Down
14 changes: 10 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
//! use recmap::prelude::*;
//! let seqlens = read_seqlens("hg38_seqlens.tsv")
//! .expect("could not read seqlens");
//! let rec_map = RecMap::from_hapmap("decode_2019_map.txt", seqlens)
//! let rec_map = RecMap::from_hapmap("decode_2019_map.txt", &seqlens)
//! .expect("cannot read hapmap");
//!
//! for (name, rate_map) in rec_map.iter() {
Expand All @@ -26,12 +26,18 @@
//! ```bash
//! cargo run --example calc_map_lengths -- --seqlens hg38_seqlens.tsv decode_2019_map.txt
//! ```
//! One of the most common tasks when working with recombination maps is to
//! estimate the map position of arbitrary markers, which is usually done by linear
//! interpolation. `RecMap` provides an easy way to do this for one position
//! (`RecMap.interpolate_map_position()`) and for many positions, with
//! `RecMap.interpolate_map_positions()`:
//!
//!
//! ```no_run
//! use recmap::prelude::*;
//! let seqlens = read_seqlens("hg38_seqlens.tsv")
//! .expect("could not read seqlens");
//! let rec_map = RecMap::from_hapmap("decode_2019_map.txt", seqlens)
//! let rec_map = RecMap::from_hapmap("decode_2019_map.txt", &seqlens)
//! .expect("cannot read hapmap");
//!
//! let positions = vec![11975064, 15007450];
Expand All @@ -43,10 +49,10 @@ mod file;
mod numeric;
pub mod recmap;

pub use recmap::{read_seqlens, RecMap, RecMapError};
pub use recmap::{read_seqlens, RateFloat, RecMap, RecMapError, CM_MB_CONVERSION};

pub mod prelude {
pub use crate::recmap::{read_seqlens, RecMap, RecMapError};
pub use crate::recmap::{read_seqlens, RateFloat, RecMap, RecMapError, CM_MB_CONVERSION};
}

#[cfg(test)]
Expand Down
5 changes: 4 additions & 1 deletion src/numeric.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@ where

/// Assert to float values are the same up to `eps`.
#[allow(dead_code)]
pub fn assert_floats_eq(left: &[f64], right: &[f64], eps: f64) {
pub fn assert_floats_eq<T>(left: &[T], right: &[T], eps: T)
where
T: Float + Display,
{
assert_eq!(left.len(), right.len());
for (l, r) in left.iter().zip(right.iter()) {
assert_float_eq(*l, *r, eps)
Expand Down
Loading

0 comments on commit 2b0b7b4

Please sign in to comment.