From 9221222476620b13dcfaf87f11af89efb6542dac Mon Sep 17 00:00:00 2001 From: AroneyS Date: Thu, 30 Nov 2023 10:23:37 +1000 Subject: [PATCH] switch genome name lookup to hashmap --- src/skani.rs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/skani.rs b/src/skani.rs index a734ed3..09bf814 100644 --- a/src/skani.rs +++ b/src/skani.rs @@ -10,6 +10,7 @@ use skani::params::*; use skani::screen; use concurrent_queue::ConcurrentQueue; +use std::collections::HashMap; pub struct SkaniPreclusterer { pub threshold: f32, @@ -44,6 +45,7 @@ fn precluster_skani( .collect::>(); // Note that sketches is now shuffled! let sketches = &file_io::fastx_to_sketches(&fasta_strings, &sketch_params, true); + let genome_indices: HashMap<_, _> = genome_fasta_paths.into_iter().enumerate().map(|(index, element)| (element, index)).collect(); // Right now implemented by parallel collection into a queue, and then // reprocessed into a BTreeMap. Could potentially be made more efficient by @@ -78,14 +80,8 @@ fn precluster_skani( let query_name = sketches[j].file_name.clone(); debug!("Pushing ANI result for {} and {}", ref_name, query_name); - let ref_index = genome_fasta_paths - .iter() - .position(|&r| r == ref_name) - .unwrap(); - let query_index = genome_fasta_paths - .iter() - .position(|&r| r == query_name) - .unwrap(); + let ref_index = genome_indices.get(&ref_name).unwrap(); + let query_index = genome_indices.get(&query_name).unwrap(); if ani >= threshold { queue .push((ref_index, query_index, ani)) @@ -98,7 +94,7 @@ fn precluster_skani( let mut to_return = SortedPairGenomeDistanceCache::new(); while let Ok((i, j, ani)) = queue.pop() { - to_return.insert((i, j), Some(ani)); + to_return.insert((*i, *j), Some(ani)); } debug!("Finished skani.");