-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
902 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
/target |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
[package] | ||
name = "recmap" | ||
version = "0.1.0" | ||
edition = "2021" | ||
|
||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||
|
||
[dependencies] | ||
csv = "1.3.0" | ||
flate2 = "1.0.28" | ||
genomap = "0.1.3" | ||
indexmap = "2.2.2" | ||
ndarray = "0.15.6" | ||
num-traits = "0.2.17" | ||
serde = { version = "1.0.196", features = ["derive"] } | ||
thiserror = "1.0.56" | ||
|
||
[dev-dependencies] | ||
clap = { version = "4.4.18", features = ["derive"] } | ||
tempfile = "3.10.0" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# RecMap Library for reading and working with recombination maps in Rust | ||
|
||
`RecMap` objects can be created from reading in a HapMap-formatted | ||
recombination map. Note that since the HapMap recombination format does | ||
not include the chromosome lengths, this must be specified too. | ||
A convenience function `read_seqlens` is provided to read in TSV-formatted | ||
"genome" files of the chromosome names and lengths. | ||
|
||
Here is a example which loads a recombination map from a HapMap-formatted | ||
recombination map and calculates the total map lengths. | ||
|
||
```rust | ||
use recmap::prelude::*; | ||
let seqlens = read_seqlens("hg38_seqlens.tsv") | ||
.expect("could not read seqlens"); | ||
let rec_map = RecMap::from_hapmap("decode_2019_map.txt", seqlens) | ||
.expect("cannot read hapmap"); | ||
|
||
for (name, rate_map) in rec_map.iter() { | ||
println!("{}\t{}", name, rate_map.total_map_length().unwrap()); | ||
} | ||
``` | ||
|
||
This example can be run on the command line with: | ||
|
||
```bash | ||
cargo run --example calc_map_lengths -- --seqlens hg38_seqlens.tsv decode_2019_map.txt | ||
``` | ||
|
||
```rust | ||
use recmap::prelude::*; | ||
let seqlens = read_seqlens("hg38_seqlens.tsv") | ||
.expect("could not read seqlens"); | ||
let rec_map = RecMap::from_hapmap("decode_2019_map.txt", seqlens) | ||
.expect("cannot read hapmap"); | ||
|
||
let positions = vec![11975064, 15007450]; | ||
rec_map.interpolate_map_positions("chr1", &positions); | ||
|
||
``` | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
use clap::Parser; | ||
use recmap::prelude::*; | ||
|
||
#[derive(Parser, Debug)] | ||
#[clap(author, version, about, long_about = None)] | ||
struct Args { | ||
/// Path to the sequence lengths file | ||
#[clap(long, value_parser)] | ||
seqlens: String, | ||
|
||
/// Path to the HapMap recombination map file | ||
#[clap(value_parser)] | ||
hapmap: String, | ||
} | ||
|
||
fn main() -> Result<(), RecMapError> { | ||
let args = Args::parse(); | ||
let seqlens = read_seqlens(&args.seqlens)?; | ||
let rec_map = RecMap::from_hapmap(&args.hapmap, seqlens)?; | ||
|
||
for (name, rate_map) in rec_map.iter() { | ||
println!("{}\t{}", name, rate_map.total_map_length().unwrap()); | ||
} | ||
|
||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
//! Encapsulates plaintext and gzip-compressed file input and output. | ||
//! | ||
//! The [`InputFile`] and [`OutputFile`] abstractions are for working with | ||
//! possible gzip-compressed TSV files. | ||
//! | ||
use flate2::read::GzDecoder; | ||
use flate2::write::GzEncoder; | ||
use flate2::Compression; | ||
use std::fs::File; | ||
use std::io::Write; | ||
use std::io::{self, BufWriter}; | ||
use std::io::{BufRead, BufReader, Read}; | ||
use thiserror::Error; | ||
|
||
#[derive(Error, Debug)] | ||
pub enum FileError { | ||
#[error("IO error: {0}")] | ||
IOError(#[from] io::Error), | ||
} | ||
|
||
/// Check if a file is a gzipped by looking for the magic numbers | ||
fn is_gzipped_file(file_path: &str) -> io::Result<bool> { | ||
let mut file = File::open(file_path)?; | ||
let mut buffer = [0; 2]; | ||
file.read_exact(&mut buffer)?; | ||
|
||
Ok(buffer == [0x1f, 0x8b]) | ||
} | ||
|
||
/// Represents an input file. | ||
/// | ||
/// This struct is used to handle operations on an input file, such as reading from the file. | ||
/// This abstracts how data is read in, allowing for both plaintext and gzip-compressed input | ||
/// to be read through a common interface. | ||
pub struct InputFile { | ||
pub filepath: String, | ||
} | ||
|
||
impl InputFile { | ||
/// Constructs a new `InputFile`. | ||
/// | ||
/// # Arguments | ||
/// | ||
/// * `filepath` - A string slice that holds the path to the file. If the file extension is | ||
/// `.gz`, `InputFile` will automatically uncompress the input. | ||
pub fn new(filepath: &str) -> Self { | ||
Self { | ||
filepath: filepath.to_string(), | ||
} | ||
} | ||
|
||
/// Opens the file and returns a buffered reader. | ||
/// | ||
/// If the file is gzip-compressed (indicated by a ".gz" extension), this method will | ||
/// automatically handle the decompression. | ||
/// | ||
/// # Returns | ||
/// | ||
/// A result containing a `BufReader<Box<dyn Read>>` on success, or a `FileError` on failure. | ||
/// | ||
pub fn reader(&self) -> Result<BufReader<Box<dyn Read>>, FileError> { | ||
let file = File::open(self.filepath.clone())?; | ||
//let is_gzipped_name = self.filepath.ends_with(".gz"); | ||
let is_gzipped = is_gzipped_file(&self.filepath)?; | ||
let reader: Box<dyn Read> = if is_gzipped { | ||
Box::new(GzDecoder::new(file)) | ||
} else { | ||
Box::new(file) | ||
}; | ||
Ok(BufReader::new(reader)) | ||
} | ||
|
||
/// Checks if the first line of the file matches the expected header. | ||
/// | ||
/// # Arguments | ||
/// | ||
/// * `expect` - A string slice representing the expected header. | ||
/// | ||
/// # Returns | ||
/// | ||
/// A result containing a boolean, `true` if the header matches, `false` otherwise, | ||
/// or a `FileError` on failure. | ||
/// | ||
pub fn has_header(&self, expect: &str) -> Result<bool, FileError> { | ||
let mut buf_reader = self.reader()?; | ||
let mut first_line = String::new(); | ||
buf_reader.read_line(&mut first_line)?; | ||
let has_header = first_line.starts_with(expect); | ||
Ok(has_header) | ||
} | ||
} | ||
|
||
/// Represents an output file. | ||
/// | ||
/// This struct is used to handle operations on an output file, such as writing to the file. | ||
/// This abstracts writing both plaintext and gzip-compressed files. | ||
pub struct OutputFile { | ||
pub filepath: String, | ||
pub header: Option<Vec<String>>, | ||
} | ||
|
||
impl OutputFile { | ||
/// Constructs a new `OutputFile`. | ||
/// | ||
/// # Arguments | ||
/// | ||
/// * `filepath` - A string slice that holds the path to the file. If the file extension is | ||
/// `.gz`, `OutputFile` will automatically write gzip-compressed output. | ||
/// * `header` - An optional vector of strings representing commented header lines to be written to the file. | ||
pub fn new(filepath: &str, header: Option<Vec<String>>) -> Self { | ||
Self { | ||
filepath: filepath.to_string(), | ||
header, | ||
} | ||
} | ||
|
||
/// Opens the file and returns a writer. | ||
/// | ||
/// If the file path ends with ".gz", the file is treated as gzip-compressed, and the | ||
/// function will handle compression automatically. If a header is set, it will be written | ||
/// to the file. | ||
/// | ||
/// # Returns | ||
/// | ||
/// A result containing a `Box<dyn Write>` on success, or an `io::Error` on failure. | ||
pub fn writer(&self) -> Result<Box<dyn Write>, io::Error> { | ||
let outfile = &self.filepath; | ||
let is_gzip = outfile.ends_with(".gz"); | ||
let mut writer: Box<dyn Write> = if is_gzip { | ||
Box::new(BufWriter::new(GzEncoder::new( | ||
File::create(outfile)?, | ||
Compression::default(), | ||
))) | ||
} else { | ||
Box::new(BufWriter::new(File::create(outfile)?)) | ||
}; | ||
// write header if one is set | ||
if let Some(entries) = &self.header { | ||
for entry in entries { | ||
writeln!(writer, "#{}", entry)?; | ||
} | ||
} | ||
Ok(writer) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
use indexmap::IndexMap; | ||
|
||
use super::recmap::RecMap; | ||
|
||
pub type GenomeCoord = i32; | ||
|
||
pub struct Genome { | ||
pub seqlens: IndexMap<String, GenomeCoord>, | ||
pub recmap: RecMap, | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
//! Functionality for reading and working with recombination maps. | ||
//! | ||
//! [`RecMap`] objects can be created from reading in a HapMap-formatted | ||
//! recombination map. Note that since the HapMap recombination format does | ||
//! not include the chromosome lengths, this must be specified too. | ||
//! A convenience function [`read_seqlens`] is provided to read in TSV-formatted | ||
//! "genome" files of the chromosome names and lengths. | ||
//! | ||
//! Here is a example which loads a recombination map from a HapMap-formatted | ||
//! recombination map and calculates the total map lengths. | ||
//! | ||
//! ```no_run | ||
//! use recmap::prelude::*; | ||
//! let seqlens = read_seqlens("hg38_seqlens.tsv") | ||
//! .expect("could not read seqlens"); | ||
//! let rec_map = RecMap::from_hapmap("decode_2019_map.txt", seqlens) | ||
//! .expect("cannot read hapmap"); | ||
//! | ||
//! for (name, rate_map) in rec_map.iter() { | ||
//! println!("{}\t{}", name, rate_map.total_map_length().unwrap()); | ||
//! } | ||
//! ``` | ||
//! | ||
//! This example can be run on the command line with: | ||
//! | ||
//! ```bash | ||
//! cargo run --example calc_map_lengths -- --seqlens hg38_seqlens.tsv decode_2019_map.txt | ||
//! ``` | ||
//! | ||
//! ```no_run | ||
//! use recmap::prelude::*; | ||
//! let seqlens = read_seqlens("hg38_seqlens.tsv") | ||
//! .expect("could not read seqlens"); | ||
//! let rec_map = RecMap::from_hapmap("decode_2019_map.txt", seqlens) | ||
//! .expect("cannot read hapmap"); | ||
//! | ||
//! let positions = vec![11975064, 15007450]; | ||
//! rec_map.interpolate_map_positions("chr1", &positions); | ||
//! | ||
//! ``` | ||
|
||
mod file; | ||
mod numeric; | ||
pub mod recmap; | ||
|
||
pub use recmap::{read_seqlens, RecMap, RecMapError}; | ||
|
||
pub mod prelude { | ||
pub use crate::recmap::{read_seqlens, RecMap, RecMapError}; | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests {} |
Oops, something went wrong.