From a5c3a00699a76fae74fc2377c56c984ee60374e6 Mon Sep 17 00:00:00 2001
From: Vince Buffalo <vsbuffalo@gmail.com>
Date: Sat, 24 Feb 2024 23:42:48 -0800
Subject: [PATCH] Prototype of bedtools map working with multiple operators.

 - New `TsvConfig` type for e.g. missing data string, with
   `BED_TSV` definition.
 - Fixed issues with `left_overlaps()`.
 - New `DatumType` enum for data types.
 - New `IntoDatumType` trait for conversion.
 - Fix to `median()`.
 - `Operation.run()` method for operations.
 - New AI-generated more helpful errors.
 - `to_tsv()` now with `TsvConfig`
 - lazy static dep. added.
 - Better parsing error messages.
 - New join data iterators (mostly for error checking.
---
 Cargo.toml                  |   1 +
 src/commands.rs             |  90 +++++++++++--------
 src/data/mod.rs             |  64 +++++++++++++-
 src/data/operations.rs      |  94 +++++++++++---------
 src/error.rs                | 140 ++++++++++++++++++++++++------
 src/granges.rs              | 161 +++++++++++++++++++++-------------
 src/io/mod.rs               |   1 +
 src/io/parsers.rs           |  82 +++++++++++++-----
 src/io/tsv.rs               |  72 ++++++++++++++++
 src/join.rs                 | 167 +++++++++++++++++++-----------------
 src/lib.rs                  |   4 +-
 src/main/mod.rs             |  35 +++++---
 src/ranges/mod.rs           |  14 +--
 src/sequences/nucleotide.rs |  10 ++-
 src/traits.rs               |  72 +++++++---------
 15 files changed, 685 insertions(+), 322 deletions(-)
 create mode 100644 src/io/tsv.rs

diff --git a/Cargo.toml b/Cargo.toml
index 56ff344..d5ab0b6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -26,6 +26,7 @@ polars = { version = "0.37.0", optional = true }
 bytes = "1.5.0"
 ndarray-npy = { version = "0.8.1", optional = true }
 num-traits = "0.2.18"
+lazy_static = "1.4.0"
 
 [features]
 # cli = [ "clap" ]  # TODO make feature
diff --git a/src/commands.rs b/src/commands.rs
index 84346fb..2499d76 100644
--- a/src/commands.rs
+++ b/src/commands.rs
@@ -3,8 +3,12 @@
 use std::path::PathBuf;
 
 use crate::{
-    data::operations::Operation,
-    io::{parsers::{GenomicRangesParser, Bed5Iterator}, OutputStream},
+    data::{operations::Operation, DatumType},
+    io::{
+        parsers::{Bed5Iterator, GenomicRangesParser},
+        tsv::BED_TSV,
+        OutputStream,
+    },
     prelude::*,
     ranges::{operations::adjust_range, GenomicRangeEmptyRecord, GenomicRangeRecord},
     reporting::{CommandOutput, Report},
@@ -80,7 +84,7 @@ pub fn granges_adjust(
             let possibly_adjusted_range = adjust_range(range, -both, both, length);
 
             if let Some(range_adjusted) = possibly_adjusted_range {
-                writeln!(writer, "{}", &range_adjusted.to_tsv())?;
+                writeln!(writer, "{}", &range_adjusted.to_tsv(&BED_TSV))?;
             } else {
                 skipped_ranges += 1;
             }
@@ -101,10 +105,11 @@ pub fn granges_adjust(
         match ranges_iter {
             GenomicRangesParser::Bed3(iter) => {
                 let gr = GRangesEmpty::from_iter(iter, &genome)?;
-                gr.adjust_ranges(-both, both).to_tsv(output)?
+                gr.adjust_ranges(-both, both).to_tsv(output, &BED_TSV)?
             }
             GenomicRangesParser::Bed5(iter) => {
-                unimplemented!()
+                let gr = GRanges::from_iter(iter, &genome)?;
+                gr.adjust_ranges(-both, both).to_tsv(output, &BED_TSV)?
             }
             GenomicRangesParser::Bedlike(iter) => {
                 // Note the call to try_unwrap_data() here: this is because
@@ -112,7 +117,7 @@ pub fn granges_adjust(
                 // values means that writing to TSV doesn't have to deal with this (which
                 // always creates headaches).
                 let gr = GRanges::from_iter(iter.try_unwrap_data(), &genome)?;
-                gr.adjust_ranges(-both, both).to_tsv(output)?
+                gr.adjust_ranges(-both, both).to_tsv(output, &BED_TSV)?
             }
             GenomicRangesParser::Unsupported => {
                 return Err(GRangesError::UnsupportedGenomicRangesFileFormat)
@@ -158,7 +163,7 @@ pub fn granges_filter(
     let right_iter = GenomicRangesFile::parsing_iterator(right_path)?;
 
     // for reporting stuff to the user
-    let mut report = Report::new();
+    let report = Report::new();
 
     match (left_iter, right_iter) {
         (GenomicRangesParser::Bed3(left), GenomicRangesParser::Bed3(right)) => {
@@ -176,7 +181,7 @@ pub fn granges_filter(
             let right_gr = right_gr.into_coitrees()?;
 
             let semijoin = left_gr.filter_overlaps(&right_gr)?;
-            semijoin.to_tsv(output)?;
+            semijoin.to_tsv(output, &BED_TSV)?;
 
             Ok(CommandOutput::new((), report))
         }
@@ -198,7 +203,7 @@ pub fn granges_filter(
             let right_gr = right_gr.into_coitrees()?;
 
             let semijoin = left_gr.filter_overlaps(&right_gr)?;
-            semijoin.to_tsv(output)?;
+            semijoin.to_tsv(output, &BED_TSV)?;
 
             Ok(CommandOutput::new((), report))
         }
@@ -218,7 +223,7 @@ pub fn granges_filter(
             let right_gr = right_gr.into_coitrees()?;
 
             let semijoin = left_gr.filter_overlaps(&right_gr)?;
-            semijoin.to_tsv(output)?;
+            semijoin.to_tsv(output, &BED_TSV)?;
 
             Ok(CommandOutput::new((), report))
         }
@@ -241,7 +246,7 @@ pub fn granges_filter(
             let right_gr = right_gr.into_coitrees()?;
 
             let intersection = left_gr.filter_overlaps(&right_gr)?;
-            intersection.to_tsv(output)?;
+            intersection.to_tsv(output, &BED_TSV)?;
 
             Ok(CommandOutput::new((), report))
         }
@@ -298,7 +303,7 @@ pub fn granges_flank(
                 } else {
                     GRangesEmpty::from_iter(iter, &genome)?
                 };
-                gr.flanking_ranges(left, right)?.to_tsv(output)?
+                gr.flanking_ranges(left, right)?.to_tsv(output, &BED_TSV)?
             }
             GenomicRangesParser::Bed5(_iter) => {
                 unimplemented!()
@@ -309,7 +314,7 @@ pub fn granges_flank(
                 } else {
                     GRanges::from_iter(iter.try_unwrap_data(), &genome)?
                 };
-                gr.flanking_ranges(left, right)?.to_tsv(output)?
+                gr.flanking_ranges(left, right)?.to_tsv(output, &BED_TSV)?
             }
             GenomicRangesParser::Unsupported => {
                 return Err(GRangesError::UnsupportedGenomicRangesFileFormat)
@@ -336,7 +341,7 @@ pub fn granges_flank(
                             let flanking_ranges = range
                                 .flanking_ranges::<GenomicRangeRecord<String>>(left, right, length);
                             for flanking_range in flanking_ranges {
-                                writeln!(writer, "{}", &flanking_range.to_tsv())?;
+                                writeln!(writer, "{}", &flanking_range.to_tsv(&BED_TSV))?;
                             }
                         }
                     } else {
@@ -350,7 +355,7 @@ pub fn granges_flank(
                             let flanking_ranges = range
                                 .flanking_ranges::<GenomicRangeEmptyRecord>(left, right, length);
                             for flanking_range in flanking_ranges {
-                                writeln!(writer, "{}", &flanking_range.to_tsv())?;
+                                writeln!(writer, "{}", &flanking_range.to_tsv(&BED_TSV))?;
                             }
                         }
                     }
@@ -370,7 +375,7 @@ pub fn granges_flank(
                             let flanking_ranges = range
                                 .flanking_ranges::<GenomicRangeRecord<String>>(left, right, length);
                             for flanking_range in flanking_ranges {
-                                writeln!(writer, "{}", &flanking_range.to_tsv())?;
+                                writeln!(writer, "{}", &flanking_range.to_tsv(&BED_TSV))?;
                             }
                         }
                     } else {
@@ -384,7 +389,7 @@ pub fn granges_flank(
                             let flanking_ranges = range
                                 .flanking_ranges::<GenomicRangeEmptyRecord>(left, right, length);
                             for flanking_range in flanking_ranges {
-                                writeln!(writer, "{}", &flanking_range.to_tsv())?;
+                                writeln!(writer, "{}", &flanking_range.to_tsv(&BED_TSV))?;
                             }
                         }
                     }
@@ -398,7 +403,6 @@ pub fn granges_flank(
     Ok(CommandOutput::new((), report))
 }
 
-
 /// # Developer Notes
 /// This function is a great way to see GRange's methods in action.
 pub fn granges_map(
@@ -411,7 +415,7 @@ pub fn granges_map(
 ) -> Result<CommandOutput<()>, GRangesError> {
     let genome = read_seqlens(seqlens)?;
     let seqnames: Vec<String> = genome.keys().cloned().collect();
-    let mut report = Report::new();
+    let report = Report::new();
 
     let left_iter = Bed3Iterator::new(left_path)?;
     let right_iter = Bed5Iterator::new(right_path)?;
@@ -427,22 +431,40 @@ pub fn granges_map(
         right_gr = GRanges::from_iter(right_iter, &genome)?;
     }
 
-    let right_gr = right_gr.into_coitrees()?
-        .map_data(|bed5_cols| {
-            // extract out just the score
-            bed5_cols.score
-        })?;
-
-    let left_join = left_gr.left_overlaps(&right_gr)?;
+    if left_gr.is_empty() {
+        return Err(GRangesError::NoRows);
+    }
+    if right_gr.is_empty() {
+        return Err(GRangesError::NoRows);
+    }
 
-    let new_column = left_join.map_over_joins(|join_data| {
-        join_data.
-        operations.iter().map(|operation| operation.run(data)).collect()
+    let right_gr = {
+        // Convert to interval trees for join.
+        right_gr
+            .into_coitrees()?
+            // Select out the score.
+            .map_data(|bed5_cols| {
+                // Extract out just the score.
+                bed5_cols.score
+            })?
+    };
+
+    // Find the overlaps.
+    let left_join_gr = left_gr.left_overlaps(&right_gr)?;
+
+    // Process all the overlaps.
+    let result_gr = left_join_gr.map_over_joins(|join_data| {
+        // Get the "right data" -- the BED5 scores.
+        let overlap_scores = join_data.right_data;
+
+        // Run all operations on the scores.
+        operations
+            .iter()
+            .map(|operation| operation.run(overlap_scores.as_ref()))
+            .collect::<Vec<DatumType>>()
     })?;
 
-
-    // TODO -- map function
-    // left_join.to_tsv(output)?;
+    result_gr.to_tsv(output, &BED_TSV)?;
 
     Ok(CommandOutput::new((), report))
 }
@@ -453,7 +475,7 @@ pub fn granges_random_bed(
     num: usize,
     output: Option<impl Into<PathBuf>>,
     sort: bool,
-    ) -> Result<CommandOutput<()>, GRangesError> {
+) -> Result<CommandOutput<()>, GRangesError> {
     // get the genome info
     let genome = read_seqlens(seqlens)?;
 
@@ -463,7 +485,7 @@ pub fn granges_random_bed(
         gr = gr.sort();
     }
 
-    gr.to_tsv(output)?;
+    gr.to_tsv(output, &BED_TSV)?;
 
     let report = Report::new();
     Ok(CommandOutput::new((), report))
diff --git a/src/data/mod.rs b/src/data/mod.rs
index c3ac1cb..532428a 100644
--- a/src/data/mod.rs
+++ b/src/data/mod.rs
@@ -1,10 +1,72 @@
 //! Data container implementations.
 //!
 
-use crate::traits::DataContainer;
+use crate::traits::{DataContainer, IntoDatumType};
 
 pub mod operations;
 pub mod vec;
 
+/// These are core supported data types stored in an `enum`, to
+/// unify the types that come out of standard operations like
+/// `select()`.
+#[derive(Debug, Clone)]
+pub enum DatumType {
+    Float32(f32),
+    Float64(f64),
+    String(String),
+    Integer32(i32),
+    Integer64(i64),
+    Unsigned32(u32),
+    Unsigned64(u64),
+    NoValue,
+}
+
+impl IntoDatumType for f64 {
+    fn into_data_type(self) -> DatumType {
+        DatumType::Float64(self)
+    }
+}
+impl IntoDatumType for i64 {
+    fn into_data_type(self) -> DatumType {
+        DatumType::Integer64(self)
+    }
+}
+impl IntoDatumType for String {
+    fn into_data_type(self) -> DatumType {
+        DatumType::String(self)
+    }
+}
+
+impl IntoDatumType for f32 {
+    fn into_data_type(self) -> DatumType {
+        DatumType::Float32(self)
+    }
+}
+
+impl IntoDatumType for i32 {
+    fn into_data_type(self) -> DatumType {
+        DatumType::Integer32(self)
+    }
+}
+
+impl IntoDatumType for u32 {
+    fn into_data_type(self) -> DatumType {
+        DatumType::Unsigned32(self)
+    }
+}
+
+impl IntoDatumType for u64 {
+    fn into_data_type(self) -> DatumType {
+        DatumType::Unsigned64(self)
+    }
+}
+
+// Conversion from field types to `DatumType`
+impl<T: IntoDatumType> From<T> for DatumType {
+    fn from(item: T) -> Self {
+        item.into_data_type()
+    }
+}
+
 impl<U> DataContainer for Vec<U> {}
 impl DataContainer for () {}
diff --git a/src/data/operations.rs b/src/data/operations.rs
index 7677b33..e03356c 100644
--- a/src/data/operations.rs
+++ b/src/data/operations.rs
@@ -1,35 +1,32 @@
 //! Implementations of various operations on data.
 //!
 
+use clap::ValueEnum;
 use num_traits::{Float, ToPrimitive};
 use std::iter::Sum;
 
-use crate::error::GRangesError;
+use super::DatumType;
+use crate::traits::IntoDatumType;
 
 /// Calculate the median.
 ///
 /// This will clone and turn `numbers` into a `Vec`.
-pub fn median<F: Float + Ord + Sum>(numbers: &[F]) -> F {
+pub fn median<F: Float + Sum>(numbers: &[F]) -> Option<F> {
+    if numbers.is_empty() {
+        return None;
+    }
     let mut numbers = numbers.to_vec();
-    numbers.sort_unstable();
+    numbers.sort_by(|a, b| a.partial_cmp(b).unwrap());
     let mid = numbers.len() / 2;
     if numbers.len() % 2 == 0 {
-        (numbers[mid - 1] + numbers[mid]) / F::from(2.0).unwrap()
+        Some((numbers[mid - 1] + numbers[mid]) / F::from(2.0).unwrap())
     } else {
-        numbers[mid]
+        Some(numbers[mid])
     }
 }
 
-/// A subset of types that represent operation results types.
-pub enum OperationResult<T>
-where
-T: Float,
-{
-    Float(T),
-    String(String),
-}
-
 /// The (subset of) standard `bedtools map` operations.
+#[derive(Clone, Debug, ValueEnum)]
 pub enum Operation {
     Sum,
     Min,
@@ -40,36 +37,49 @@ pub enum Operation {
 }
 
 impl Operation {
-    /// Do a particular (summarizing) operation on some generic data.
-    pub fn run<T>(&self, data: &[T]) -> Option<OperationResult<T>>
-        where
-        T: Float + Sum<T> + ToPrimitive + Ord + Clone + ToString,
-        {
-            match self {
-                Operation::Sum => {
+    pub fn run<T: IntoDatumType>(&self, data: &[T]) -> DatumType
+    where
+        T: Float + Sum<T> + ToPrimitive + Clone + ToString,
+    {
+        match self {
+            Operation::Sum => {
+                let sum: T = data.iter().cloned().sum();
+                sum.into_data_type()
+            }
+            Operation::Min => {
+                let min = data
+                    .iter()
+                    .filter(|x| x.is_finite())
+                    .cloned()
+                    .min_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Greater));
+                min.map_or(DatumType::NoValue, |x| x.into_data_type())
+            }
+            Operation::Max => {
+                let max = data
+                    .iter()
+                    .filter(|x| x.is_finite())
+                    .cloned()
+                    .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Less));
+                max.map_or(DatumType::NoValue, |x| x.into_data_type())
+            }
+            Operation::Mean => {
+                if data.is_empty() {
+                    DatumType::NoValue
+                } else {
                     let sum: T = data.iter().cloned().sum();
-                    Some(OperationResult::Float(sum))
-                }
-                Operation::Min => data.iter().cloned().min().map(OperationResult::Float),
-                Operation::Max => data.iter().cloned().max().map(OperationResult::Float),
-                Operation::Mean => {
-                    if data.is_empty() {
-                        None
-                    } else {
-                        let sum: T = data.iter().cloned().sum();
-                        let mean = sum / T::from(data.len()).unwrap();
-                        Some(OperationResult::Float(mean))
-                    }
-                }
-                Operation::Median => Some(OperationResult::Float(median(data))),
-                Operation::Collapse => {
-                    let collapsed = data
-                        .iter()
-                        .map(|num| num.to_string())
-                        .collect::<Vec<_>>()
-                        .join(", ");
-                    Some(OperationResult::String(collapsed))
+                    let mean = sum / T::from(data.len()).unwrap();
+                    mean.into_data_type()
                 }
             }
+            Operation::Median => median(data).map_or(DatumType::NoValue, |x| x.into_data_type()),
+            Operation::Collapse => {
+                let collapsed = data
+                    .iter()
+                    .map(|num| num.to_string())
+                    .collect::<Vec<_>>()
+                    .join(", ");
+                DatumType::String(collapsed)
+            }
         }
+    }
 }
diff --git a/src/error.rs b/src/error.rs
index 4224ba6..4f21944 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -8,62 +8,152 @@ use std::{
 };
 use thiserror::Error;
 
-/// The [`GRangesError`] defines the standard set of errors that should
-/// be passed to the user.
+///// The [`GRangesError`] defines the standard set of errors that should
+///// be passed to the user.
+//#[derive(Debug, Error)]
+//pub enum GRangesError {
+//    // IO related errors
+//    #[error("File reading eror: {0}")]
+//    IOError(#[from] std::io::Error),
+//
+//    // File parsing related errors
+//    #[error("Could not detect genomic ranges filetype from extension.")]
+//    CouldNotDetectRangesFiletype,
+//    #[error("Integer parsing error: {0}")]
+//    ParseIntError(#[from] ParseIntError),
+//    #[error("Float parsing error: {0}")]
+//    ParseFloatError(#[from] ParseFloatError),
+//    #[error("Bed-like file has too few columns. The first three columns must be sequence name, and start and end positions.\nLine: {0}")]
+//    BedlikeTooFewColumns(String),
+//    #[error("File has invalid column type entry: {0}")]
+//    InvalidColumnType(String),
+//    #[error("Genome file is invalid: {0}")]
+//    InvalidGenomeFile(String),
+//    #[error("Invalid BED string: must be either '+', '-', or '.'")]
+//    InvalidString,
+//
+//    // BedlikeIterator errors
+//    #[error("GenomicRangeRecord encountered with None data in try_unwrap_data()")]
+//    TryUnwrapDataError,
+//    #[error("GenomicRangeRecord.try_unwrap_data() called on TSV with fewer than 4 columns")]
+//    TooFewColumns,
+//
+//    // Invalid genomic range errors
+//    #[error("Range invalid: start ({0}) must be greater than end ({1})")]
+//    InvalidGenomicRange(Position, Position),
+//    #[error("Range [{0}, {1}] is invalid for sequence of length {2}")]
+//    InvalidGenomicRangeForSequence(Position, Position, Position),
+//    #[error("Sequence name '{0}' is not in the ranges container")]
+//    MissingSequence(String),
+//    #[error("Error encountered in genomap::GenomeMap")]
+//    GenomeMapError(#[from] GenomeMapError),
+//
+//    #[error("Invalid GRanges object: no data container.")]
+//    NoDataContainer,
+//
+//    // Sequences related errors
+//    #[error("Sequence name '{0}' was not found.")]
+//    MissingSequenceName(String),
+//
+//    // FASTA/noodles related errors
+//    #[error("Error encountered in trying to convert bytes to UTF8 string.")]
+//    FromUtf8Error(#[from] FromUtf8Error),
+//
+//    // Command line tool related errors
+//    #[error("Unsupported genomic range format")]
+//    UnsupportedGenomicRangesFileFormat,
+//    #[error("Command line argument error: {0}")]
+//    ArgumentError(#[from] clap::error::Error),
+//    #[error("No such operation: {0}")]
+//    NoSuchOperation(String),
+//}
+
+// AICODE/NOTE: these are AI-generated from the above human-written ones.
 #[derive(Debug, Error)]
 pub enum GRangesError {
     // IO related errors
-    #[error("File reading eror: {0}")]
+    #[error("File reading error: {0}. Please check if the file exists and you have permission to read it.")]
     IOError(#[from] std::io::Error),
 
     // File parsing related errors
-    #[error("Could not detect genomic ranges filetype from extension.")]
+    #[error("Could not determine the file type based on its extension. Ensure the file has a standard genomic data extension (.bed, .gff, etc.).")]
     CouldNotDetectRangesFiletype,
-    #[error("Integer parsing error: {0}")]
+
+    #[error("Error parsing an integer value: {0}. Check the file for non-integer entries where integers are expected.")]
     ParseIntError(#[from] ParseIntError),
-    #[error("Float parsing error: {0}")]
+
+    #[error("Error parsing a floating-point value: {0}. Check the file for non-numeric entries where floating-point numbers are expected.")]
     ParseFloatError(#[from] ParseFloatError),
-    #[error("Bed-like file has too few columns. The first three columns must be sequence name, and start and end positions.\nLine: {0}")]
-    BedlikeTooFewColumns(String),
-    #[error("File has invalid column type entry: {0}")]
-    InvalidColumnType(String),
-    #[error("Genome file is invalid: {0}")]
+
+    #[error(
+        "The provided BED file has fewer columns ({0}) than expected ({1}). Problematic line:\n{2}"
+    )]
+    BedTooFewColumns(usize, usize, String),
+
+    #[error("The provided BED3 file has fewer columns ({0}) than expected (3).\nAt least three columns are needed: sequence name, start, and end positions.\nProblematic line:\n{1}")]
+    Bed3TooFewColumns(usize, String),
+
+    #[error(
+        "Invalid column type: expected {expected_type} but got '{found_value}' in line: '{line}'."
+    )]
+    InvalidColumnType {
+        expected_type: String,
+        found_value: String,
+        line: String,
+    },
+
+    #[error("The input file had no rows.")]
+    NoRows,
+
+    #[error("The genome file is invalid: {0}. Please verify the file's format and contents.")]
     InvalidGenomeFile(String),
-    #[error("Invalid BED string: must be either '+', '-', or '.'")]
+
+    #[error("Invalid BED format detected. Each entry must be '+', '-', or '.' to represent strand information.")]
     InvalidString,
 
     // BedlikeIterator errors
-    #[error("GenomicRangeRecord encountered with None data in try_unwrap_data()")]
+    #[error("Attempted to unwrap genomic range data, but none was present. This operation requires data to be associated with each genomic range.")]
     TryUnwrapDataError,
-    #[error("GenomicRangeRecord.try_unwrap_data() called on TSV with fewer than 4 columns")]
+
+    #[error("Attempted to unwrap genomic range data from a TSV with fewer than 4 columns. Ensure your TSV includes the necessary genomic range columns plus additional data.")]
     TooFewColumns,
 
     // Invalid genomic range errors
-    #[error("Range invalid: start ({0}) must be greater than end ({1})")]
+    #[error("Invalid genomic range specified: start position ({0}) must be less than or equal to the end position ({1}).")]
     InvalidGenomicRange(Position, Position),
-    #[error("Range [{0}, {1}] is invalid for sequence of length {2}")]
+
+    #[error("The specified genomic range [{0}, {1}] is invalid for a sequence of length {2}. Adjust the range to fit within the sequence length.")]
     InvalidGenomicRangeForSequence(Position, Position, Position),
-    #[error("Sequence name '{0}' is not in the ranges container")]
+
+    #[error("The sequence name '{0}' is not found within the provided ranges container. Check the sequence names for typos or missing entries.")]
     MissingSequence(String),
-    #[error("Error encountered in genomap::GenomeMap")]
+
+    #[error("An error was encountered with the underlying genomap::GenomeMap: {0}")]
     GenomeMapError(#[from] GenomeMapError),
 
-    #[error("Invalid GRanges object: no data container.")]
+    #[error("The GRanges object is invalid because it lacks an associated data container. Ensure data is loaded or associated with the GRanges object before attempting operations that require data.")]
     NoDataContainer,
 
     // Sequences related errors
-    #[error("Sequence name '{0}' was not found.")]
+    #[error("The sequence name '{0}' was not found in the dataset. Verify the sequence names and try again.")]
     MissingSequenceName(String),
 
     // FASTA/noodles related errors
-    #[error("Error encountered in trying to convert bytes to UTF8 string.")]
+    #[error("An error occurred while converting bytes to a UTF-8 string. This often indicates invalid or corrupted data.")]
     FromUtf8Error(#[from] FromUtf8Error),
 
     // Command line tool related errors
-    #[error("Unsupported genomic range format")]
+    #[error("The specified genomic range file format is unsupported. Check the documentation for a list of supported formats.")]
     UnsupportedGenomicRangesFileFormat,
-    #[error("Command line argument error: {0}")]
+
+    #[error("There was an error with the provided command line argument: {0}. Review the command line arguments for any mistakes.")]
     ArgumentError(#[from] clap::error::Error),
-    #[error("No such operation: {0}")]
-    NoSuchOperation,
+
+    #[error(
+        "The requested operation '{0}' does not exist. Check the list of supported operations."
+    )]
+    NoSuchOperation(String),
+
+    #[error("No operation was specified. See granges map --help.")]
+    NoOperationSpecified,
 }
diff --git a/src/granges.rs b/src/granges.rs
index 3549d72..e4894f1 100644
--- a/src/granges.rs
+++ b/src/granges.rs
@@ -85,7 +85,7 @@ use indexmap::IndexMap;
 
 use crate::{
     ensure_eq,
-    io::OutputStream,
+    io::{tsv::TsvConfig, OutputStream},
     iterators::GRangesIterator,
     join::{
         CombinedJoinData, CombinedJoinDataBothEmpty, CombinedJoinDataLeftEmpty,
@@ -225,7 +225,11 @@ where
     <T as IndexedDataContainer>::Item<'a>: TsvSerialize,
 {
     /// Write
-    fn to_tsv(&'a self, output: Option<impl Into<PathBuf>>) -> Result<(), GRangesError> {
+    fn to_tsv(
+        &'a self,
+        output: Option<impl Into<PathBuf>>,
+        config: &TsvConfig,
+    ) -> Result<(), GRangesError> {
         // output stream -- header is None for now (TODO)
         let output = output.map_or(OutputStream::new_stdout(None), |file| {
             OutputStream::new(file, None)
@@ -236,7 +240,7 @@ where
         let seqnames = self.seqnames();
         for range in self.iter_ranges() {
             let record = range.to_record(&seqnames, data_ref);
-            writeln!(writer, "{}", record.to_tsv())?;
+            writeln!(writer, "{}", record.to_tsv(config))?;
         }
         Ok(())
     }
@@ -244,7 +248,11 @@ where
 
 impl<'a, R: IterableRangeContainer> GenomicRangesTsvSerialize<'a, R> for GRangesEmpty<R> {
     /// Output a BED3 file for for this data-less [`GRanges<R, ()>`].
-    fn to_tsv(&'a self, output: Option<impl Into<PathBuf>>) -> Result<(), GRangesError> {
+    fn to_tsv(
+        &'a self,
+        output: Option<impl Into<PathBuf>>,
+        config: &TsvConfig,
+    ) -> Result<(), GRangesError> {
         // output stream -- header is None for now (TODO)
         let output = output.map_or(OutputStream::new_stdout(None), |file| {
             OutputStream::new(file, None)
@@ -254,7 +262,7 @@ impl<'a, R: IterableRangeContainer> GenomicRangesTsvSerialize<'a, R> for GRanges
         let seqnames = self.seqnames();
         for range in self.0.iter_ranges() {
             let record = range.to_record_empty::<()>(&seqnames);
-            writeln!(writer, "{}", record.to_tsv())?;
+            writeln!(writer, "{}", record.to_tsv(config))?;
         }
         Ok(())
     }
@@ -517,17 +525,20 @@ impl<U> GRanges<VecRangesIndexed, Vec<U>> {
     }
 }
 
-
-impl<C, U> GRanges<C, Vec<U>> 
-where C: RangeContainer {
-    /// Consume this [`GRanges<C, Vec<U>>`] object, applying `func` to all elements 
+impl<C, U> GRanges<C, Vec<U>>
+where
+    C: RangeContainer,
+{
+    /// Consume this [`GRanges<C, Vec<U>>`] object, applying `func` to all elements
     /// in [`Vec<U>`], to return a new [`GRanges<C, Vec<V>>`].
     ///
-    pub fn map_data<F, V>(mut self, func: F) -> Result<GRanges<C, Vec<V>>, GRangesError> 
-    where F: Fn(U) -> V {
+    pub fn map_data<F, V>(mut self, func: F) -> Result<GRanges<C, Vec<V>>, GRangesError>
+    where
+        F: Fn(U) -> V,
+    {
         let left_data = self.take_data()?;
         let transformed_data = left_data.into_iter().map(func).collect();
-        Ok(GRanges { 
+        Ok(GRanges {
             ranges: self.ranges,
             data: Some(transformed_data),
         })
@@ -573,33 +584,34 @@ impl<'a, DL, DR> GRanges<VecRangesIndexed, JoinData<'a, DL, DR>> {
     }
 }
 
-impl<'a, T> GRanges<VecRanges<RangeIndexed>, T>
-where
-    T: IndexedDataContainer + 'a,
-    T: TsvSerialize,
-    <T as IndexedDataContainer>::Item<'a>: TsvSerialize,
-{
-    /// Write this [`GRanges<VecRanges, T>`] object to a TSV file, using the
-    /// [`TsvSerialize`] trait methods defiend for the items in `T`.
-    pub fn to_tsv(&'a self, output: Option<impl Into<PathBuf>>) -> Result<(), GRangesError> {
-        // output stream -- header is None for now (TODO)
-        let output = output.map_or(OutputStream::new_stdout(None), |file| {
-            OutputStream::new(file, None)
-        });
-        let mut writer = output.writer()?;
-
-        let seqnames = self.seqnames();
-        let data_ref = self.data.as_ref().ok_or(GRangesError::NoDataContainer)?;
-        for range in self.iter_ranges() {
-            let record = range.to_record(&seqnames, data_ref);
-            writeln!(writer, "{}", record.to_tsv())?;
-        }
-        Ok(())
-    }
-}
+//impl<'a, T> GRanges<VecRanges<RangeIndexed>, T>
+//where
+//    T: IndexedDataContainer + 'a,
+//    T: TsvSerialize,
+//    <T as IndexedDataContainer>::Item<'a>: TsvSerialize,
+//{
+//    /// Write this [`GRanges<VecRanges, T>`] object to a TSV file, using the
+//    /// [`TsvSerialize`] trait methods defined for the items in `T`.
+//    pub fn to_tsv(&'a self, output: Option<impl Into<PathBuf>>) -> Result<(), GRangesError> {
+//        // output stream -- header is None for now (TODO)
+//        let output = output.map_or(OutputStream::new_stdout(None), |file| {
+//            OutputStream::new(file, None)
+//        });
+//        let mut writer = output.writer()?;
+//
+//        let seqnames = self.seqnames();
+//        let data_ref = self.data.as_ref().ok_or(GRangesError::NoDataContainer)?;
+//        for range in self.iter_ranges() {
+//            let record = range.to_record(&seqnames, data_ref);
+//            writeln!(writer, "{}", record.to_tsv())?;
+//        }
+//        Ok(())
+//    }
+//}
 
 /// [`GRanges::left_overlaps()`] for the left with data, right with data case.
-impl<'a, DL: 'a, DR: 'a> LeftOverlaps<'a, GRanges<COITreesIndexed, DR>> for GRanges<VecRangesIndexed, DL>
+impl<'a, DL: 'a, DR: 'a> LeftOverlaps<'a, GRanges<COITreesIndexed, DR>>
+    for GRanges<VecRangesIndexed, DL>
 where
     DL: IndexedDataContainer + 'a,
     DR: IndexedDataContainer + 'a,
@@ -609,7 +621,7 @@ where
     /// Conduct a left overlap join, consuming self and returning a new
     /// [`GRanges<VecRangesIndexed, JoinData>`].
     ///
-    /// The [`JoinData`] container contains the owned left data container and has 
+    /// The [`JoinData`] container contains the owned left data container and has
     /// a reference to the right data container, as as well as a [`Vec<LeftGroupedJoin`]
     /// that contains information about each overlap between a left and zero or more right
     /// ranges.
@@ -651,7 +663,7 @@ where
     /// Conduct a left overlap join, consuming self and returning a new
     /// [`GRanges<VecRangesIndexed, JoinDataRightEmpty>`].
     ///
-    /// The [`JoinData`] container contains the left data container and has 
+    /// The [`JoinData`] container contains the left data container and has
     /// a reference to the right data container, as as well as a [`Vec<LeftGroupedJoin`]
     /// that contains information about each overlap between a left and zero or more right
     /// ranges.
@@ -679,6 +691,7 @@ where
             }
         }
 
+        // get the join data out, to transform it to a more informative type
         let join_data = gr.take_data()?;
         let data = JoinDataRightEmpty {
             joins: join_data.joins,
@@ -695,7 +708,7 @@ where
 /// [`GRanges::left_overlaps()`] for the left empty, right with data case.
 impl<'a, C, DR: 'a> LeftOverlaps<'a, GRanges<COITreesIndexed, DR>> for GRangesEmpty<C>
 where
-    C: RangeContainer,
+    C: IterableRangeContainer,
     DR: IndexedDataContainer + 'a,
 {
     type Output = GRanges<VecRanges<RangeIndexed>, JoinDataLeftEmpty<'a, DR>>;
@@ -703,7 +716,7 @@ where
     /// Conduct a left overlap join, consuming self and returning a new
     /// [`GRanges<VecRangesIndexed, JoinDataLeftEmpty>`].
     ///
-    /// The [`JoinDataLeftEmpty`] contains no left data, and a reference to the 
+    /// The [`JoinDataLeftEmpty`] contains no left data, and a reference to the
     /// right data container, as as well as a [`Vec<LeftGroupedJoin`]
     /// that contains information about each overlap between a left and zero or more right
     /// ranges.
@@ -717,10 +730,24 @@ where
         let right_data = right.data.as_ref().ok_or(GRangesError::NoDataContainer)?;
         gr.data = Some(JoinData::new((), right_data));
 
-        // Since there's no left data, we don't perform any joins but still need to handle the structure
+        for (seqname, left_ranges) in self.0.ranges.iter() {
+            for left_range in left_ranges.iter_ranges() {
+                // Left join: every left range gets a JoinData.
+                let mut join_data = LeftGroupedJoin::new(&left_range);
+                if let Some(right_ranges) = right.ranges.get(seqname) {
+                    right_ranges.query(left_range.start(), left_range.end(), |right_range| {
+                        // NOTE: right_range is a coitrees::IntervalNode.
+                        join_data.add_right(&left_range, right_range);
+                    });
+                }
+                gr.push_range_with_join(seqname, left_range.start(), left_range.end(), join_data)?;
+            }
+        }
+
+        // get the join data out, to transform it to a more informative type
         let join_data = gr.take_data()?;
         let data = JoinDataLeftEmpty {
-            joins: Vec::new(), // No joins since there's no left data
+            joins: join_data.joins,
             right_data: join_data.right_data,
         };
         let ranges = gr.ranges;
@@ -745,13 +772,29 @@ impl<'a> LeftOverlaps<'a, GRangesEmpty<COITreesEmpty>> for GRangesEmpty<COITrees
     /// of overlapping basepairs, the overlap fraction, etc.
     fn left_overlaps(
         self,
-        _right: &'a GRangesEmpty<COITreesEmpty>,
+        right: &'a GRangesEmpty<COITreesEmpty>,
     ) -> Result<Self::Output, GRangesError> {
-        let gr: GRanges<VecRangesIndexed, JoinData<(), ()>> = GRanges::new_vec(&self.0.seqlens());
+        let mut gr: GRanges<VecRangesIndexed, JoinData<(), ()>> =
+            GRanges::new_vec(&self.0.seqlens());
+
+        for (seqname, left_ranges) in self.0.ranges.iter() {
+            for left_range in left_ranges.iter_ranges() {
+                // Left join: every left range gets a JoinData.
+                let mut join_data = LeftGroupedJoin::new(&left_range);
+                if let Some(right_ranges) = right.0.ranges.get(seqname) {
+                    right_ranges.query(left_range.start(), left_range.end(), |right_range| {
+                        // NOTE: right_range is a coitrees::IntervalNode.
+                        join_data.add_right(&left_range, right_range);
+                    });
+                }
+                gr.push_range_with_join(seqname, left_range.start(), left_range.end(), join_data)?;
+            }
+        }
 
-        // Since there's no data on either side, we essentially return an empty structure
+        // get the join data out, to transform it to a more informative type
+        let join_data = gr.take_data()?;
         let data = JoinDataBothEmpty {
-            joins: Vec::new(), // No joins possible without data
+            joins: join_data.joins,
         };
         let ranges = gr.ranges;
         Ok(GRanges {
@@ -775,7 +818,7 @@ where
     /// e.g. a median `f64` value, a `String` of all overlap right ranges' values concatenated,
     /// etc.
     ///
-    /// # Arugments 
+    /// # Arugments
     /// * `func`: a function that takes a [`CombinedJoinData`] (which contains
     ///           the associated data for the left range and overlapping right ranges)
     ///           and summarizes it into a new type `V`.
@@ -815,7 +858,7 @@ impl GRanges<VecRangesIndexed, JoinDataBothEmpty> {
     /// e.g. a median `f64` value, a `String` of all overlap right ranges' values concatenated,
     /// etc.
     ///
-    /// # Arugments 
+    /// # Arugments
     /// * `func`: a function that takes a [`CombinedJoinDataLeftEmpty`] (which contains
     ///           the associated data for the left range and overlapping right ranges)
     ///           and summarizes it into a new type `V`.
@@ -854,7 +897,7 @@ where
     /// e.g. a median `f64` value, a `String` of all overlap right ranges' values concatenated,
     /// etc.
     ///
-    /// # Arugments 
+    /// # Arugments
     /// * `func`: a function that takes a [`CombinedJoinDataRightEmpty`] (which contains
     ///           the associated data for the left range and overlapping right ranges)
     ///           and summarizes it into a new type `V`.
@@ -892,7 +935,7 @@ where
     /// e.g. a median `f64` value, a `String` of all overlap right ranges' values concatenated,
     /// etc.
     ///
-    /// # Arugments 
+    /// # Arugments
     /// * `func`: a function that takes a [`CombinedJoinDataLeftEmpty`] (which contains
     ///           the associated data for the left range and overlapping right ranges)
     ///           and summarizes it into a new type `V`.
@@ -918,7 +961,7 @@ where
     }
 }
 
-impl<'a, C, T> GRanges<C, T>
+impl<C, T> GRanges<C, T>
 where
     T: IndexedDataContainer,
 {
@@ -1391,15 +1434,14 @@ mod tests {
         // get join data
         let data = joined_results.data.unwrap();
 
-        // TODO fix
         // check is left join
-        //assert_eq!(data.len(), windows_len);
+        assert_eq!(data.len(), windows_len);
 
-        //let mut join_iter = data.iter();
-        //assert_eq!(join_iter.next().unwrap().num_overlaps(), 2);
-        //assert_eq!(join_iter.next().unwrap().num_overlaps(), 0);
-        //assert_eq!(join_iter.next().unwrap().num_overlaps(), 2);
-        //assert_eq!(join_iter.next().unwrap().num_overlaps(), 1);
+        let mut join_iter = data.iter();
+        assert_eq!(join_iter.next().unwrap().num_overlaps(), 2);
+        assert_eq!(join_iter.next().unwrap().num_overlaps(), 0);
+        assert_eq!(join_iter.next().unwrap().num_overlaps(), 2);
+        assert_eq!(join_iter.next().unwrap().num_overlaps(), 1);
         // rest are empty TODO should check
     }
 
@@ -1417,6 +1459,7 @@ mod tests {
         right_gr.push_range("chr1", 23, 24, 2.9).unwrap();
         let right_gr = right_gr.into_coitrees().unwrap();
 
+        // TODO
         let joined_results = windows.left_overlaps(&right_gr).unwrap();
         // joined_results.apply_over_joins();
     }
diff --git a/src/io/mod.rs b/src/io/mod.rs
index 43df647..7362132 100644
--- a/src/io/mod.rs
+++ b/src/io/mod.rs
@@ -2,6 +2,7 @@
 
 pub mod file;
 pub mod parsers;
+pub mod tsv;
 
 pub use file::{InputStream, OutputStream};
 pub use parsers::{
diff --git a/src/io/parsers.rs b/src/io/parsers.rs
index 24cfd8f..fa47ec2 100644
--- a/src/io/parsers.rs
+++ b/src/io/parsers.rs
@@ -109,12 +109,23 @@ use std::io::{BufRead, BufReader};
 use std::marker::PhantomData;
 use std::path::{Path, PathBuf};
 
+use crate::data::DatumType;
 use crate::error::GRangesError;
 use crate::io::file::InputStream;
 use crate::ranges::{GenomicRangeEmptyRecord, GenomicRangeRecord};
-use crate::traits::{GeneralRangeRecordIterator, GenomicRangeRecordUnwrappable, TsvSerialize};
+use crate::traits::{
+    GeneralRangeRecordIterator, GenomicRangeRecordUnwrappable, Selection, TsvSerialize,
+};
 use crate::Position;
 
+use super::tsv::TsvConfig;
+
+// FEATURE/TODO: hints? if not performance cost
+// use lazy_static::lazy_static;
+//lazy_static! {
+//    static ref POSITION_HINT: Option<String> = Some("Check that the start position column is a valid integer.".to_string());
+//}
+
 /// Get the *base* extension to help infer filetype, which ignores compression-related
 /// extensions (`.gz` and `.bgz`).
 fn get_base_extension<P: AsRef<Path>>(filepath: P) -> Option<String> {
@@ -405,7 +416,8 @@ pub enum Strand {
 }
 
 impl TsvSerialize for Option<Strand> {
-    fn to_tsv(&self) -> String {
+    #![allow(unused_variables)]
+    fn to_tsv(&self, config: &TsvConfig) -> String {
         match self {
             Some(Strand::Forward) => "+".to_string(),
             Some(Strand::Reverse) => "-".to_string(),
@@ -421,27 +433,39 @@ pub struct Bed5Addition {
     pub score: f64,
 }
 
-/// The additional three BED6 columns.
-// TODO: not connectted yet
-#[derive(Clone, Debug)]
-pub struct Bed6Addition {
-    pub name: String,
-    pub score: f64,
-    pub strand: Option<Strand>,
+impl Selection for &Bed5Addition {
+    fn select_by_name(&self, name: &str) -> DatumType {
+        match name {
+            "name" => DatumType::String(self.name.clone()),
+            "score" => DatumType::Float64(self.score),
+            _ => panic!("No item named '{}'", name),
+        }
+    }
 }
 
 impl TsvSerialize for &Bed5Addition {
-    fn to_tsv(&self) -> String {
+    #![allow(unused_variables)]
+    fn to_tsv(&self, config: &TsvConfig) -> String {
         format!("{}\t{}", self.name, self.score)
     }
 }
 
 impl TsvSerialize for Bed5Addition {
-    fn to_tsv(&self) -> String {
+    #![allow(unused_variables)]
+    fn to_tsv(&self, config: &TsvConfig) -> String {
         format!("{}\t{}", self.name, self.score)
     }
 }
 
+/// The additional three BED6 columns.
+// TODO: not connectted yet
+#[derive(Clone, Debug)]
+pub struct Bed6Addition {
+    pub name: String,
+    pub score: f64,
+    pub strand: Option<Strand>,
+}
+
 /// A lazy parser for BED5 files. This parses the first three range columns, the feature name,
 /// and the score, yielding a [`GenomicRangeRecord`] will be set to `None`, since there are no remaining
 /// string columns to parse.
@@ -793,10 +817,13 @@ pub fn parse_column<T: std::str::FromStr>(column: &str, line: &str) -> Result<T,
 where
     <T as std::str::FromStr>::Err: std::fmt::Debug,
 {
-    // NOTE: this is used a lot, and should be benchmarked.
     column
         .parse::<T>()
-        .map_err(|_| GRangesError::InvalidColumnType(format!("column '{}' in '{}'", column, line)))
+        .map_err(|_| GRangesError::InvalidColumnType {
+            expected_type: std::any::type_name::<T>().to_string(), // Provides the expected type name
+            found_value: column.to_string(),
+            line: line.to_string(),
+        })
 }
 
 /// Parses a BED-like TSV format line into its constituent components.
@@ -816,7 +843,11 @@ where
 pub fn parse_bedlike(line: &str) -> Result<(String, Position, Position, Vec<&str>), GRangesError> {
     let columns: Vec<&str> = line.split('\t').collect();
     if columns.len() < 3 {
-        return Err(GRangesError::BedlikeTooFewColumns(line.to_string()));
+        return Err(GRangesError::BedTooFewColumns(
+            columns.len(),
+            3,
+            line.to_string(),
+        ));
     }
 
     let seqname = parse_column(columns[0], line)?;
@@ -837,7 +868,10 @@ pub fn parse_bedlike(line: &str) -> Result<(String, Position, Position, Vec<&str
 pub fn parse_bed_lazy(line: &str) -> Result<GenomicRangeRecord<Option<String>>, GRangesError> {
     let columns: Vec<&str> = line.splitn(4, '\t').collect();
     if columns.len() < 3 {
-        return Err(GRangesError::BedlikeTooFewColumns(line.to_string()));
+        return Err(GRangesError::Bed3TooFewColumns(
+            columns.len(),
+            line.to_string(),
+        ));
     }
 
     let seqname = parse_column(columns[0], line)?;
@@ -863,7 +897,10 @@ pub fn parse_bed_lazy(line: &str) -> Result<GenomicRangeRecord<Option<String>>,
 pub fn parse_bed3(line: &str) -> Result<GenomicRangeEmptyRecord, GRangesError> {
     let columns: Vec<&str> = line.splitn(4, '\t').collect();
     if columns.len() < 3 {
-        return Err(GRangesError::BedlikeTooFewColumns(line.to_string()));
+        return Err(GRangesError::Bed3TooFewColumns(
+            columns.len(),
+            line.to_string(),
+        ));
     }
 
     let seqname = parse_column(columns[0], line)?;
@@ -877,6 +914,7 @@ pub fn parse_bed3(line: &str) -> Result<GenomicRangeEmptyRecord, GRangesError> {
     })
 }
 
+#[allow(dead_code)]
 fn parse_strand(symbol: char) -> Result<Option<Strand>, GRangesError> {
     match symbol {
         '+' => Ok(Some(Strand::Forward)),
@@ -890,9 +928,13 @@ fn parse_strand(symbol: char) -> Result<Option<Strand>, GRangesError> {
 /// columns
 ///
 pub fn parse_bed5(line: &str) -> Result<GenomicRangeRecord<Bed5Addition>, GRangesError> {
-    let columns: Vec<&str> = line.splitn(4, '\t').collect();
-    if columns.len() < 3 {
-        return Err(GRangesError::BedlikeTooFewColumns(line.to_string()));
+    let columns: Vec<&str> = line.splitn(6, '\t').collect();
+    if columns.len() < 5 {
+        return Err(GRangesError::BedTooFewColumns(
+            columns.len(),
+            5,
+            line.to_string(),
+        ));
     }
 
     let seqname = parse_column(columns[0], line)?;
@@ -900,7 +942,7 @@ pub fn parse_bed5(line: &str) -> Result<GenomicRangeRecord<Bed5Addition>, GRange
     let end: Position = parse_column(columns[2], line)?;
 
     let name = parse_column(columns[3], line)?;
-    let score: f64 = parse_column(columns[3], line)?;
+    let score: f64 = parse_column(columns[4], line)?;
 
     let data = Bed5Addition { name, score };
 
diff --git a/src/io/tsv.rs b/src/io/tsv.rs
new file mode 100644
index 0000000..dc0429f
--- /dev/null
+++ b/src/io/tsv.rs
@@ -0,0 +1,72 @@
+//! TSV Serializing helpers, functionality, etc.
+
+use crate::{data::DatumType, traits::TsvSerialize};
+use lazy_static::lazy_static;
+
+lazy_static! {
+    /// The standard BED format TSV configuration.
+    pub static ref BED_TSV: TsvConfig = TsvConfig {
+        no_value_string: ".".to_string(),
+    };
+}
+
+/// This is an extensible type to handle common
+/// TSV output configurations, e.g. what to print
+/// for `None` or [`DatumType::NoValue`].
+pub struct TsvConfig {
+    pub no_value_string: String,
+}
+
+impl TsvSerialize for &String {
+    #![allow(unused_variables)]
+    fn to_tsv(&self, config: &TsvConfig) -> String {
+        self.to_string()
+    }
+}
+
+impl TsvSerialize for String {
+    #![allow(unused_variables)]
+    fn to_tsv(&self, config: &TsvConfig) -> String {
+        self.to_string()
+    }
+}
+
+impl TsvSerialize for Option<String> {
+    fn to_tsv(&self, config: &TsvConfig) -> String {
+        self.as_ref()
+            .map_or("".to_string(), |x| format!("\t{}", x.to_tsv(config)))
+    }
+}
+
+impl<U: TsvSerialize> TsvSerialize for Vec<U> {
+    fn to_tsv(&self, config: &TsvConfig) -> String {
+        self.iter()
+            .map(|x| x.to_tsv(config))
+            .collect::<Vec<_>>()
+            .join("\t")
+    }
+}
+
+impl TsvSerialize for &Vec<DatumType> {
+    fn to_tsv(&self, config: &TsvConfig) -> String {
+        self.iter()
+            .map(|x| x.to_tsv(config))
+            .collect::<Vec<_>>()
+            .join("\t")
+    }
+}
+
+impl TsvSerialize for DatumType {
+    fn to_tsv(&self, config: &TsvConfig) -> String {
+        match self {
+            DatumType::Float32(val) => val.to_string(),
+            DatumType::Float64(val) => val.to_string(),
+            DatumType::String(val) => val.clone(),
+            DatumType::Integer32(val) => val.to_string(),
+            DatumType::Integer64(val) => val.to_string(),
+            DatumType::Unsigned32(val) => val.to_string(),
+            DatumType::Unsigned64(val) => val.to_string(),
+            DatumType::NoValue => config.no_value_string.clone(),
+        }
+    }
+}
diff --git a/src/join.rs b/src/join.rs
index 348814d..d68cd76 100644
--- a/src/join.rs
+++ b/src/join.rs
@@ -110,33 +110,33 @@ impl<'a, DL, DR> JoinData<'a, DL, DR> {
         self.len() == 0
     }
 
-    ///// Create an iterator over the joins.
-    //pub fn iter(&'a self) -> JoinDataIterator<'a, DL, DR> {
-    //    JoinDataIterator {
-    //        inner: self.joins.iter(),
-    //        left_data: self.left_data.as_ref(),
-    //        right_data: self.right_data,
-    //    }
-    //}
+    /// Create an iterator over the joins.
+    pub fn iter(&'a self) -> JoinDataIterator<'a, DL, DR> {
+        JoinDataIterator {
+            inner: self.joins.iter(),
+            left_data: Some(&self.left_data),
+            right_data: Some(self.right_data),
+        }
+    }
 }
 
 pub struct CombinedJoinData<DL, DR> {
-    pub join: LeftGroupedJoin,   // Information on the join
-    pub left_data: DL,           // The left data element
-    pub right_data: Vec<DR>,     // The right data elements
+    pub join: LeftGroupedJoin, // Information on the join
+    pub left_data: DL,         // The left data element
+    pub right_data: Vec<DR>,   // The right data elements
 }
 
 impl<DL, DR> JoinDataOperations<DL, DR> for CombinedJoinData<DL, DR> {
     type LeftDataElementType = DL;
     type RightDataElementType = DR;
-    fn join(&self) -> LeftGroupedJoin {
-        self.join
+    fn join(&self) -> &LeftGroupedJoin {
+        &self.join
     }
-    fn left_data(&self) -> Option<Self::LeftDataElementType> {
-        Some(self.left_data)
+    fn left_data(&self) -> Option<&Self::LeftDataElementType> {
+        Some(&self.left_data)
     }
-    fn right_data(&self) -> Option<Vec<Self::RightDataElementType>> {
-        Some(self.right_data)
+    fn right_data(&self) -> Option<&Vec<Self::RightDataElementType>> {
+        Some(&self.right_data)
     }
 }
 
@@ -156,20 +156,18 @@ where
             >,
         ) -> V,
     {
-        // Cloning `left_data` and `right_data` to ensure they live long enough.
-        // This might not be the most efficient but ensures lifetime correctness.
-
         self.joins
             .iter()
             .map(|join| {
                 let left_data = self.left_data.get_owned(join.left.unwrap());
                 let right_data = join
-                    .rights.as_ref()
+                    .rights
+                    .as_ref()
                     .unwrap()
                     .iter()
                     .map(|idx| self.right_data.get_owned(*idx))
                     .collect();
-                // Now `func` is applied to each `CombinedJoinData`
+
                 func(CombinedJoinData {
                     join: join.clone(),
                     left_data,
@@ -209,6 +207,15 @@ impl<'a, DR> JoinDataLeftEmpty<'a, DR> {
     pub fn is_empty(&self) -> bool {
         self.len() == 0
     }
+
+    /// Create an iterator over the joins.
+    pub fn iter(&'a self) -> JoinDataIterator<'a, (), DR> {
+        JoinDataIterator {
+            inner: self.joins.iter(),
+            left_data: None,
+            right_data: Some(self.right_data),
+        }
+    }
 }
 
 pub struct CombinedJoinDataLeftEmpty<DR> {
@@ -219,18 +226,17 @@ pub struct CombinedJoinDataLeftEmpty<DR> {
 impl<DR> JoinDataOperations<(), DR> for CombinedJoinDataLeftEmpty<DR> {
     type LeftDataElementType = ();
     type RightDataElementType = DR;
-    fn join(&self) -> LeftGroupedJoin {
-        self.join
+    fn join(&self) -> &LeftGroupedJoin {
+        &self.join
     }
-    fn left_data(&self) -> Option<Self::LeftDataElementType> {
+    fn left_data(&self) -> Option<&Self::LeftDataElementType> {
         None
     }
-    fn right_data(&self) -> Option<Vec<Self::RightDataElementType>> {
-        Some(self.right_data)
+    fn right_data(&self) -> Option<&Vec<Self::RightDataElementType>> {
+        Some(&self.right_data)
     }
 }
 
-
 impl<'a, DR> JoinDataLeftEmpty<'a, DR>
 where
     DR: IndexedDataContainer + 'a,
@@ -241,19 +247,19 @@ where
     where
         F: Fn(CombinedJoinDataLeftEmpty<<DR as IndexedDataContainer>::OwnedItem>) -> V,
     {
-        // Cloning `left_data` and `right_data` to ensure they live long enough.
-        // This might not be the most efficient but ensures lifetime correctness.
-
+        // TODO/OPTIMIZE: would consuming here (and analagous funcs) be better/faster?
+        // Would require a bit of a redesign.
         self.joins
             .iter()
             .map(|join| {
-                let right_data = join
-                    .rights.as_ref()
-                    .unwrap()
-                    .iter()
-                    .map(|idx| self.right_data.get_owned(*idx))
-                    .collect();
-                // Now `func` is applied to each `CombinedJoinDataLeftEmpty`
+                let right_indices = join.rights.as_ref();
+                let right_data = right_indices.map_or(Vec::new(), |indices| {
+                    indices
+                        .iter()
+                        .map(|idx| self.right_data.get_owned(*idx))
+                        .collect()
+                });
+
                 func(CombinedJoinDataLeftEmpty {
                     join: join.clone(),
                     right_data,
@@ -292,6 +298,15 @@ impl<'a, DL> JoinDataRightEmpty<DL> {
     pub fn is_empty(&self) -> bool {
         self.len() == 0
     }
+
+    /// Create an iterator over the joins.
+    pub fn iter(&'a self) -> JoinDataIterator<'a, DL, ()> {
+        JoinDataIterator {
+            inner: self.joins.iter(),
+            left_data: Some(&self.left_data),
+            right_data: None,
+        }
+    }
 }
 
 pub struct CombinedJoinDataRightEmpty<DL> {
@@ -299,23 +314,20 @@ pub struct CombinedJoinDataRightEmpty<DL> {
     pub left_data: DL,         // The right data element
 }
 
-
 impl<DL> JoinDataOperations<DL, ()> for CombinedJoinDataRightEmpty<DL> {
     type LeftDataElementType = DL;
     type RightDataElementType = ();
-    fn join(&self) -> LeftGroupedJoin {
-        self.join
+    fn join(&self) -> &LeftGroupedJoin {
+        &self.join
     }
-    fn left_data(&self) -> Option<Self::LeftDataElementType> {
-        Some(self.left_data)
+    fn left_data(&self) -> Option<&Self::LeftDataElementType> {
+        Some(&self.left_data)
     }
-    fn right_data(&self) -> Option<Vec<Self::RightDataElementType>> {
+    fn right_data(&self) -> Option<&Vec<Self::RightDataElementType>> {
         None
     }
 }
 
-
-
 impl<'a, DL> JoinDataRightEmpty<DL>
 where
     DL: IndexedDataContainer,
@@ -326,14 +338,11 @@ where
     where
         F: Fn(CombinedJoinDataRightEmpty<<DL as IndexedDataContainer>::OwnedItem>) -> V,
     {
-        // Cloning `left_data` and `right_data` to ensure they live long enough.
-        // This might not be the most efficient but ensures lifetime correctness.
-
         self.joins
             .iter()
             .map(|join| {
                 let left_data = self.left_data.get_owned(join.left.unwrap());
-                // Now `func` is applied to each `CombinedJoinDataRightEmpty`
+
                 func(CombinedJoinDataRightEmpty {
                     join: join.clone(),
                     left_data,
@@ -371,28 +380,35 @@ impl JoinDataBothEmpty {
     pub fn is_empty(&self) -> bool {
         self.len() == 0
     }
+
+    /// Create an iterator over the joins.
+    pub fn iter(&self) -> JoinDataIterator<'_, (), ()> {
+        JoinDataIterator {
+            inner: self.joins.iter(),
+            left_data: None,
+            right_data: None,
+        }
+    }
 }
 
 pub struct CombinedJoinDataBothEmpty {
     pub join: LeftGroupedJoin,
 }
 
-
 impl JoinDataOperations<(), ()> for CombinedJoinDataBothEmpty {
     type LeftDataElementType = ();
     type RightDataElementType = ();
-    fn join(&self) -> LeftGroupedJoin {
-        self.join
+    fn join(&self) -> &LeftGroupedJoin {
+        &self.join
     }
-    fn left_data(&self) -> Option<Self::LeftDataElementType> {
+    fn left_data(&self) -> Option<&Self::LeftDataElementType> {
         None
     }
-    fn right_data(&self) -> Option<Vec<Self::RightDataElementType>> {
+    fn right_data(&self) -> Option<&Vec<Self::RightDataElementType>> {
         None
     }
 }
 
-
 impl JoinDataBothEmpty {
     /// Apply `func` to each element, putting the results into the returned
     /// `Vec<U>`.
@@ -400,9 +416,6 @@ impl JoinDataBothEmpty {
     where
         F: Fn(CombinedJoinDataBothEmpty) -> V,
     {
-        // Cloning `left_data` and `right_data` to ensure they live long enough.
-        // This might not be the most efficient but ensures lifetime correctness.
-
         self.joins
             .iter()
             .map(|join| func(CombinedJoinDataBothEmpty { join: join.clone() }))
@@ -410,24 +423,24 @@ impl JoinDataBothEmpty {
     }
 }
 
-///// An iterator over the [`LeftGroupedJoin`] types that represent
-///// information about overlaps right ranges have with a particular left range.
-/////
-///// This also contains references to the left and right data containers, for
-///// better ergonomics in downstream data processing.
-//pub struct JoinDataIterator<'a, DL, DR> {
-//    inner: std::slice::Iter<'a, LeftGroupedJoin>,
-//    pub left_data: Option<&'a DL>,
-//    pub right_data: Option<&'a DR>,
-//}
-//
-//impl<'a, DL, DR> Iterator for JoinDataIterator<'a, DL, DR> {
-//    type Item = &'a LeftGroupedJoin;
-//
-//    fn next(&mut self) -> Option<Self::Item> {
-//        self.inner.next()
-//    }
-//}
+/// An iterator over the [`LeftGroupedJoin`] types that represent
+/// information about overlaps right ranges have with a particular left range.
+///
+/// This also contains references to the left and right data containers, for
+/// better ergonomics in downstream data processing.
+pub struct JoinDataIterator<'a, DL, DR> {
+    inner: std::slice::Iter<'a, LeftGroupedJoin>,
+    pub left_data: Option<&'a DL>,
+    pub right_data: Option<&'a DR>,
+}
+
+impl<'a, DL, DR> Iterator for JoinDataIterator<'a, DL, DR> {
+    type Item = &'a LeftGroupedJoin;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.inner.next()
+    }
+}
 
 #[cfg(test)]
 mod tests {
diff --git a/src/lib.rs b/src/lib.rs
index a73463b..c2b6324 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -159,8 +159,8 @@ pub mod prelude {
     pub use crate::traits::{
         AsGRangesRef, GeneralRangeRecordIterator, GenericRange, GenericRangeOperations,
         GenomicRangeRecordUnwrappable, GenomicRangesTsvSerialize, IndexedDataContainer,
-        IntoIterableRangesContainer, IterableRangeContainer, TsvSerialize,
-        LeftOverlaps,
+        IntoDatumType, IntoIterableRangesContainer, IterableRangeContainer, JoinDataOperations,
+        LeftOverlaps, TsvSerialize,
     };
 
     pub use crate::seqlens;
diff --git a/src/main/mod.rs b/src/main/mod.rs
index 2048bf1..282e379 100644
--- a/src/main/mod.rs
+++ b/src/main/mod.rs
@@ -2,9 +2,10 @@ use std::path::PathBuf;
 
 use clap::{Parser, Subcommand};
 use granges::{
-    commands::{granges_adjust, granges_filter, granges_flank, ProcessingMode},
+    commands::{granges_adjust, granges_filter, granges_flank, granges_map, ProcessingMode},
+    data::operations::Operation,
     prelude::GRangesError,
-    Position, PositionOffset, data::operations::{match_operation, Operation},
+    Position, PositionOffset,
 };
 
 #[cfg(feature = "dev-commands")]
@@ -78,7 +79,7 @@ enum Commands {
 
         /// Skip ranges from sequences (e.g. chromosomes) not present in the genome file.
         /// By default, ranges with sequence names not in the genome file will raise an error.
-        #[arg(short = 'f', long)]
+        #[arg(short, long)]
         skip_missing: bool,
     },
     Flank {
@@ -108,7 +109,7 @@ enum Commands {
 
         /// Skip ranges from sequences (e.g. chromosomes) not present in the genome file.
         /// By default, ranges with sequence names not in the genome file will raise an error.
-        #[arg(short = 'f', long)]
+        #[arg(long)]
         skip_missing: bool,
 
         /// Processing mode
@@ -128,9 +129,9 @@ enum Commands {
         #[arg(short, long, required = true)]
         right: PathBuf,
 
-        /// Operation 
-        #[clap(short, long, value_parser = clap::value_parser!(Operation), multiple_occurrences(true))]
-        operation: Vec<Operation>,
+        /// Operation
+        #[clap(short, long, value_parser = clap::value_parser!(Operation), use_value_delimiter = true, value_delimiter = ',')]
+        func: Vec<Operation>,
 
         /// An optional output file (standard output will be used if not specified)
         #[arg(short, long)]
@@ -138,12 +139,8 @@ enum Commands {
 
         /// Skip ranges from sequences (e.g. chromosomes) not present in the genome file.
         /// By default, ranges with sequence names not in the genome file will raise an error.
-        #[arg(short = 'f', long)]
+        #[arg(short, long)]
         skip_missing: bool,
-
-        /// Processing mode
-        #[arg(long)]
-        in_mem: bool,
     },
     #[cfg(feature = "dev-commands")]
     RandomBed {
@@ -227,11 +224,21 @@ fn run() -> Result<(), GRangesError> {
             genome,
             left,
             right,
-            operation,
+            func,
             output,
             skip_missing,
-            in_mem,
         }) => {
+            if func.is_empty() {
+                return Err(GRangesError::NoOperationSpecified);
+            }
+            granges_map(
+                genome,
+                left,
+                right,
+                func.to_vec(),
+                output.as_ref(),
+                *skip_missing,
+            )
         }
 
         #[cfg(feature = "dev-commands")]
diff --git a/src/ranges/mod.rs b/src/ranges/mod.rs
index 39edda5..0ff77e4 100644
--- a/src/ranges/mod.rs
+++ b/src/ranges/mod.rs
@@ -6,6 +6,7 @@ use std::ops::Range;
 
 use crate::{
     error::GRangesError,
+    io::tsv::TsvConfig,
     traits::{
         AdjustableGenericRange, GenericRange, GenericRangeOperations, IndexedDataContainer,
         TsvSerialize,
@@ -241,7 +242,8 @@ impl<U: Clone> GenericRangeOperations for GenomicRangeRecord<U> {
 }
 
 impl TsvSerialize for GenomicRangeRecord<()> {
-    fn to_tsv(&self) -> String {
+    #[allow(unused_variables)]
+    fn to_tsv(&self, config: &TsvConfig) -> String {
         format!("{}\t{}\t{}", self.seqname, self.start, self.end)
     }
 }
@@ -274,13 +276,14 @@ impl<U: TsvSerialize> TsvSerialize for GenomicRangeRecord<U> {
     /// intentional, since one common use case is when `U` is an [`Option<String>`],
     /// which is the type combination commonly used in lazy parsing. In this case,
     /// when the data is `None`, that indicates no more columns.
-    fn to_tsv(&self) -> String {
+    #[allow(unused_variables)]
+    fn to_tsv(&self, config: &TsvConfig) -> String {
         format!(
-            "{}\t{}\t{}{}",
+            "{}\t{}\t{}\t{}",
             self.seqname,
             self.start,
             self.end,
-            self.data.to_tsv()
+            self.data.to_tsv(config)
         )
     }
 }
@@ -357,7 +360,8 @@ impl GenericRangeOperations for GenomicRangeEmptyRecord {
 }
 
 impl TsvSerialize for GenomicRangeEmptyRecord {
-    fn to_tsv(&self) -> String {
+    #[allow(unused_variables)]
+    fn to_tsv(&self, config: &TsvConfig) -> String {
         format!("{}\t{}\t{}", self.seqname, self.start, self.end)
     }
 }
diff --git a/src/sequences/nucleotide.rs b/src/sequences/nucleotide.rs
index 54d79dd..ac1c223 100644
--- a/src/sequences/nucleotide.rs
+++ b/src/sequences/nucleotide.rs
@@ -212,10 +212,12 @@ impl LazyNucleotideSequences {
             .index()
             .iter()
             .filter_map(|r| {
-                let name = String::from_utf8(r.name().to_vec()).expect(&format!(
-                    "{}\nError in converting FASTA name to a UTF8 String.",
-                    &INTERNAL_ERROR_MESSAGE
-                ));
+                let name = String::from_utf8(r.name().to_vec()).unwrap_or_else(|_| {
+                    panic!(
+                        "{}\nError in converting FASTA name to a UTF8 String.",
+                        &INTERNAL_ERROR_MESSAGE
+                    )
+                });
                 if allowed_seqnames
                     .as_ref()
                     .map_or(true, |seqnames| seqnames.contains(&name))
diff --git a/src/traits.rs b/src/traits.rs
index a3bc4fa..2d71aba 100644
--- a/src/traits.rs
+++ b/src/traits.rs
@@ -6,11 +6,16 @@ use std::path::PathBuf;
 use indexmap::IndexMap;
 
 use crate::{
+    data::DatumType,
     error::GRangesError,
     granges::GRanges,
-    io::parsers::{FilteredRanges, UnwrappedRanges},
+    io::{
+        parsers::{FilteredRanges, UnwrappedRanges},
+        tsv::TsvConfig,
+    },
+    join::LeftGroupedJoin,
     ranges::GenomicRangeRecord,
-    Position, join::LeftGroupedJoin,
+    Position,
 };
 
 /// Traits for [`GRanges`] types that can be modified.
@@ -27,7 +32,7 @@ pub trait AsGRangesRef<'a, C, T> {
 }
 
 /// The [`LeftOverlaps`] trait provides compile time polymorphic behavior
-/// over its associated [`LeftOverlaps::Output`] type and its `Right` 
+/// over its associated [`LeftOverlaps::Output`] type and its `Right`
 /// generic type.
 pub trait LeftOverlaps<'a, Right> {
     type Output;
@@ -39,7 +44,11 @@ pub trait LeftOverlaps<'a, Right> {
 /// object, for some mix of generic types, to a TSV file.
 pub trait GenomicRangesTsvSerialize<'a, C: RangeContainer> {
     /// Output the TSV version of this [`GRanges`] object.
-    fn to_tsv(&'a self, output: Option<impl Into<PathBuf>>) -> Result<(), GRangesError>;
+    fn to_tsv(
+        &'a self,
+        output: Option<impl Into<PathBuf>>,
+        config: &TsvConfig,
+    ) -> Result<(), GRangesError>;
 }
 
 /// The [`GenericRange`] trait defines common functionality for all range types.
@@ -78,10 +87,10 @@ pub trait GenericRange: Clone {
     }
 }
 
-/// The [`JoinDataOperations`] trait unifies common operations 
-/// over combined join data types ([`CombinedJoinData`], 
+/// The [`JoinDataOperations`] trait unifies common operations
+/// over combined join data types ([`CombinedJoinData`],
 /// CombinedJoinDataBothEmpty`], etc).
-/// 
+///
 ///
 /// [`CombinedJoinData`] crate::granges::join::CombinedJoinData
 /// [`CombinedJoinDataBothEmpty`] crate::granges::join::CombinedJoinDataBothEmpty
@@ -89,9 +98,22 @@ pub trait JoinDataOperations<DL, DR> {
     type LeftDataElementType;
     type RightDataElementType;
 
-    fn join(&self) -> LeftGroupedJoin;
-    fn left_data(&self) -> Option<Self::LeftDataElementType>;
-    fn right_data(&self) -> Option<Vec<Self::RightDataElementType>>;
+    fn join(&self) -> &LeftGroupedJoin;
+    fn left_data(&self) -> Option<&Self::LeftDataElementType>;
+    fn right_data(&self) -> Option<&Vec<Self::RightDataElementType>>;
+}
+
+/// Helper trait to convert selected fields into `DataType`
+pub trait IntoDatumType {
+    fn into_data_type(self) -> DatumType;
+}
+
+// TODO
+pub trait Selection {
+    fn select_by_name(&self, name: &str) -> DatumType;
+    fn select(&self, names: &[String]) -> Vec<DatumType> {
+        names.iter().map(|name| self.select_by_name(name)).collect()
+    }
 }
 
 /// The [`GenericRangeOperations`] trait extends additional functionality to [`GenericRange`],
@@ -274,33 +296,5 @@ pub trait Sequences<'a> {
 /// Defines how to serialize something to TSV.
 pub trait TsvSerialize {
     // Serialize something to a TSV [`String`].
-    fn to_tsv(&self) -> String;
-}
-
-impl TsvSerialize for &String {
-    fn to_tsv(&self) -> String {
-        self.to_string()
-    }
-}
-
-impl TsvSerialize for String {
-    fn to_tsv(&self) -> String {
-        self.to_string()
-    }
-}
-
-impl TsvSerialize for Option<String> {
-    fn to_tsv(&self) -> String {
-        self.as_ref()
-            .map_or("".to_string(), |x| format!("\t{}", x.to_tsv()))
-    }
-}
-
-impl<U: TsvSerialize> TsvSerialize for Vec<U> {
-    fn to_tsv(&self) -> String {
-        self.iter()
-            .map(|x| x.to_tsv())
-            .collect::<Vec<_>>()
-            .join("\t")
-    }
+    fn to_tsv(&self, config: &TsvConfig) -> String;
 }