From 0ac84ef6a5b0f6fcc161e19244b6f76b586cf955 Mon Sep 17 00:00:00 2001 From: Vince Buffalo Date: Fri, 9 Feb 2024 12:12:59 -0800 Subject: [PATCH] new command line tool --- .gitignore | 2 + Cargo.lock | 510 ++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 15 +- src/file.rs | 10 +- src/lib.rs | 2 +- src/main.rs | 190 +++++++++++++++++++ src/recmap.rs | 24 ++- 7 files changed, 739 insertions(+), 14 deletions(-) create mode 100644 Cargo.lock create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore index c615549..2519b2d 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ decode_2019_map.txt hg38_seqlens.tsv +hg38_1Mb_windows.bed +genetic_map_GRCh37_chr1.txt diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..8bd5f2c --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,510 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "anstream" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" + +[[package]] +name = "anstyle-parse" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitflags" +version = "2.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "4.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80c21025abd42669a92efc996ef13cfb2c5c627858421ea58d5c3b331a6c134f" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "458bf1f341769dfcf849846f65dffdf9146daa56bcd2a47cb4e1de9915567c99" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "csv" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" + +[[package]] +name = "flate2" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "genomap" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ddeaf40335f61563a642ddcbb8516f95652844fa39ca43c390bd256657b7129" +dependencies = [ + "fnv", + "thiserror", +] + +[[package]] +name = "hashbrown" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "indexmap" +version = "2.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824b2ae422412366ba479e8111fd301f7b5faece8149317bb81925979a53f520" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "itoa" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" + +[[package]] +name = "libc" +version = "0.2.153" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "linux-raw-sys" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" + +[[package]] +name = "matrixmultiply" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7574c1cf36da4798ab73da5b215bbf444f50718207754cb522201d78d1cd0ff2" +dependencies = [ + "autocfg", + "rawpointer", +] + +[[package]] +name = "memchr" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" + +[[package]] +name = "miniz_oxide" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" +dependencies = [ + "adler", +] + +[[package]] +name = "ndarray" +version = "0.15.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" +dependencies = [ + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "rawpointer", +] + +[[package]] +name = "num-complex" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "proc-macro2" +version = "1.0.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + +[[package]] +name = "recmap" +version = "0.2.2" +dependencies = [ + "clap", + "csv", + "flate2", + "genomap", + "indexmap", + "ndarray", + "num-traits", + "serde", + "tempfile", + "thiserror", +] + +[[package]] +name = "rustix" +version = "0.38.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "ryu" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" + +[[package]] +name = "serde" +version = "1.0.196" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "870026e60fa08c69f064aa766c10f10b1d62db9ccd4d0abb206472bee0ce3b32" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.196" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "strsim" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" + +[[package]] +name = "syn" +version = "2.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a365e8cd18e44762ef95d87f284f4b5cd04107fec2ff3052bd6a3e6069669e67" +dependencies = [ + "cfg-if", + "fastrand", + "rustix", + "windows-sys", +] + +[[package]] +name = "thiserror" +version = "1.0.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" diff --git a/Cargo.toml b/Cargo.toml index 4ba6c79..a4fc9cb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "recmap" -version = "0.2.1" +version = "0.2.2" edition = "2021" license = "MIT" authors = ["Vince Buffalo "] @@ -10,6 +10,18 @@ documentation = "https://docs.rs/recmap/" repository = "https://github.com/vsbuffalo/recmap" description = "A library for reading and working with recombination maps in Rust" +[lib] +name = "recmap" +path = "src/lib.rs" + +[features] +cli = [ "clap" ] + +[[bin]] +name = "recmap" +path = "src/main.rs" +required-features = ["cli"] + [dependencies] csv = "1.3.0" flate2 = "1.0.28" @@ -19,6 +31,7 @@ ndarray = "0.15.6" num-traits = "0.2.17" serde = { version = "1.0.196", features = ["derive"] } thiserror = "1.0.56" +clap = { version = "4.4.18", features = ["derive"], optional = true } [dev-dependencies] clap = { version = "4.4.18", features = ["derive"] } diff --git a/src/file.rs b/src/file.rs index a7c7f70..a040d18 100644 --- a/src/file.rs +++ b/src/file.rs @@ -77,7 +77,11 @@ impl InputFile { } /// Collects comment lines and/or a line at the start of the file. - pub fn collect_metadata(&mut self, comment: &str, header: &str) -> Result { + pub fn collect_metadata( + &mut self, + comment: &str, + header: Option<&str>, + ) -> Result { let mut buf_reader = self.reader()?; let mut comments = Vec::new(); let mut line = String::new(); @@ -86,8 +90,8 @@ impl InputFile { if line.starts_with(comment) { comments.push(line.trim_end().to_string()); self.skip_lines += 1; - } else { - if line.starts_with(header) { + } else if let Some(header_string) = header { + if line.starts_with(header_string) { self.header = Some(line.trim_end().to_string()); self.skip_lines += 1; // We only handle one header line. If there are more, the diff --git a/src/lib.rs b/src/lib.rs index bd84d5c..0ef235b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -45,7 +45,7 @@ //! //! ``` -mod file; +pub mod file; mod numeric; pub mod recmap; diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..7891363 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,190 @@ +use clap::{Parser, Subcommand}; +use recmap::recmap::format_float; +use recmap::recmap::Position; +use recmap::RateFloat; +use recmap::CM_MB_CONVERSION; +use recmap::{read_seqlens, RecMap, RecMapError}; +use std::io; +use std::io::BufRead; +use std::io::Write; + +const INFO: &str = "\ +recmap: manipulate recombination maps +usage: recmap [--help] + +Subcommands: + + interp: linearly interpolate the positions in a BED file. + +"; + +#[derive(Parser)] +#[clap(name = "sdf")] +#[clap(about = INFO)] +struct Cli { + #[arg(short, long, action = clap::ArgAction::Count)] + debug: u8, + + #[command(subcommand)] + command: Option, +} + +#[derive(Subcommand)] +enum Commands { + /// Linearly interpolate the recombination map position for the specified BED3 file. + /// + /// This will output a TSV with the following columns: + /// + /// - chromosome name + /// - start position (0-indexed, inclusive) + /// - end position (0-indexed, exclusive) + /// - cumulative map position at start (in centiMorgans) + /// - cumulative map position at end (in centiMorgans) + /// - recombination rate in [start, end) (in cM/Mb) + /// + /// Example: + /// $ recmap interp --seqlens hg38_seqlens.tsv --hapmap decode_2019_map.txt \ + /// hg38_1Mb_windows.bed --output decode_2019_map_1Mb_summaries.tsv --header + Interp { + /// a TSV file of chromosome names and their lengths + #[arg(long, required = true)] + seqlens: String, + /// the input recombination map in HapMap format + #[arg(long, required = true)] + hapmap: String, + /// the output file path (if not set, uses standard out) + #[arg(long)] + output: Option, + /// a BED-like TSV file of chromosome, position columns to interpolate at + #[arg(required = true)] + bedfile: String, + /// Include a header + #[arg(long, default_value_t = false)] + header: bool, + }, +} + +fn write_entry( + writer: &mut Box, + recmap: &RecMap, + chrom: &str, + starts: &[Position], + ends: &[Position], +) -> Result<(), RecMapError> { + let map_starts = recmap.interpolate_map_positions(chrom, starts)?; + let map_ends = recmap.interpolate_map_positions(chrom, ends)?; + + let iter = starts.iter().zip(ends).zip(map_starts).zip(map_ends); + + for (((start, end), map_start), map_end) in iter { + let width = end - start; + let diff = (map_end - map_start) / width as RateFloat; + writeln!( + writer, + "{}\t{}\t{}\t{}\t{}\t{}", + chrom, + start, + end, + format_float(map_start * 100.), + format_float(map_end * 100.), + format_float(diff / CM_MB_CONVERSION) + )?; + } + Ok(()) +} + +fn interpolate_map( + bedfile: &str, + hapmap: &str, + seqlens: &str, + output: Option<&str>, + header: bool, +) -> Result<(), RecMapError> { + let sl = read_seqlens(seqlens)?; + let rm = RecMap::from_hapmap(hapmap, &sl)?; + + // open writer, possibly to stdout + let mut writer = if let Some(filepath) = output { + let output = recmap::file::OutputFile::new(filepath, None); + output.writer()? + } else { + Box::new(io::stdout()) + }; + + if header { + // write the header + writeln!(writer, "chrom\tstart\tend\tstart_map\tend_map\trate")?; + } + + // open reader + let mut tsvfile = recmap::file::InputFile::new(bedfile); + // remove the header, if there + let _has_metadata = tsvfile.collect_metadata("#", None); + let reader = tsvfile.continue_reading()?; + + // process input + let mut current_chrom = None; + let mut starts = Vec::new(); + let mut ends = Vec::new(); + for result in reader.lines() { + let line = result?; + let fields: Vec<&str> = line.split_whitespace().collect(); + let chrom = fields.first().ok_or(RecMapError::MissingField)?.to_string(); + let start_str = fields.get(1).ok_or(RecMapError::MissingField)?; + let start: Position = start_str.parse().map_err(|_| { + RecMapError::ParseError(format!("Failed to parse end from string: {}", start_str)) + })?; + let end_str = fields.get(2).ok_or(RecMapError::MissingField)?; + let end: Position = end_str.parse().map_err(|_| { + RecMapError::ParseError(format!("Failed to parse end from string: {}", end_str)) + })?; + + match current_chrom { + None => current_chrom = Some(chrom), + Some(ref cur_chrom) => { + if *cur_chrom != chrom { + // we've hit a new chromosome. Run the interpolation. + write_entry(&mut writer, &rm, &cur_chrom, &starts, &ends)?; + starts.clear(); + ends.clear(); + current_chrom = Some(chrom); + } + starts.push(start); + ends.push(end); + } + } + } + + // process last chromosome + if let Some(cur_chrom) = current_chrom { + write_entry(&mut writer, &rm, &cur_chrom, &starts, &ends)?; + } + Ok(()) +} + +fn run() -> Result<(), RecMapError> { + let cli = Cli::parse(); + match &cli.command { + Some(Commands::Interp { + bedfile, + hapmap, + seqlens, + output, + header, + }) => interpolate_map(bedfile, hapmap, seqlens, output.as_deref(), *header), + None => { + println!("{}\n", INFO); + std::process::exit(1); + } + } +} + +fn main() { + match run() { + Ok(_) => {} + Err(e) => { + eprintln!("Error: {}", e); + std::process::exit(1); + } + } +} diff --git a/src/recmap.rs b/src/recmap.rs index 8b04673..780cfc1 100644 --- a/src/recmap.rs +++ b/src/recmap.rs @@ -1,8 +1,8 @@ use genomap::{GenomeMap, GenomeMapError}; use indexmap::map::IndexMap; -use ndarray::Array1; use num_traits::Float; use serde::{Deserialize, Serialize}; +use std::fmt::Display; use std::fmt::LowerExp; use std::io; use std::io::BufRead; @@ -26,6 +26,7 @@ pub type Position = u64; pub const CM_MB_CONVERSION: RateFloat = 1e-8; pub const RATE_PRECISION: usize = 8; +pub const SCI_NOTATION_THRESH: usize = 8; #[derive(Error, Debug)] pub enum RecMapError { @@ -41,7 +42,7 @@ pub enum RecMapError { ParseError(String), #[error("Improper Rate value, either NaN or negative ({0})")] ImproperRate(String), - #[error("Chromosome key '{0}' does not exist")] + #[error("Chromosome key '{0}' does not exist in the recombination map")] NoChrom(String), #[error("HapMap file not sorted")] HapMapNotSorted, @@ -226,7 +227,7 @@ impl RecMap { ) -> Result { let mut input_file = InputFile::new(filepath); - let _has_metadata = input_file.collect_metadata("#", "Chr")?; + let _has_metadata = input_file.collect_metadata("#", Some("Chr"))?; let reader = input_file.continue_reading()?; let mut rec_map: GenomeMap = GenomeMap::new(); @@ -273,7 +274,6 @@ impl RecMap { // get the position and rate column, parsing into proper numeric types let end_str = fields.get(1).ok_or(RecMapError::MissingField)?; let end: Position = end_str.parse().map_err(|_| { - dbg!(&end_str); RecMapError::ParseError(format!("Failed to parse end from string: {}", end_str)) })?; @@ -425,12 +425,12 @@ impl RecMap { &self, chrom: &str, positions: &[Position], - ) -> Result, RecMapError> { + ) -> Result, RecMapError> { let positions: Vec = positions .iter() .map(|p| self.interpolate_map_position(chrom, *p)) .collect::, _>>()?; - Ok(Array1::from_vec(positions)) + Ok(positions) } /// Write recombination map to HapMap-formatted file. @@ -539,11 +539,17 @@ impl RecMap { } } -fn format_float(x: T) -> String +pub fn format_float(x: T) -> String where - T: Float + LowerExp, + T: Float + LowerExp + Display, { - format!("{:.1$e}", x, RATE_PRECISION) + let min = T::from(SCI_NOTATION_THRESH).unwrap(); + let max = T::from(SCI_NOTATION_THRESH).unwrap(); + if x.abs().log10() < -min || x.abs().log10() > max { + format!("{:.1$e}", x, RATE_PRECISION) + } else { + format!("{:.*}", RATE_PRECISION, x) + } } #[cfg(test)]