From 2b8aa5a9f241a099f9f293d3cdb40c2af0b3ccd9 Mon Sep 17 00:00:00 2001
From: Vince Buffalo <vsbuffalo@gmail.com>
Date: Thu, 22 Feb 2024 23:49:51 -0800
Subject: [PATCH] Doc-writing session.  - Added lots of module, type, and
 function level documentation.  - F'ing sick ASCII diagrams.

---
 Cargo.toml               |   6 +-
 src/commands.rs          | 257 +++++++++++++++++++++++-------------
 src/data/mod.rs          |   1 +
 src/error.rs             |  10 +-
 src/granges.rs           |  78 +++++++----
 src/io/file.rs           |  22 ++--
 src/io/mod.rs            |   6 +-
 src/io/noodles.rs        |  16 ---
 src/io/parsers.rs        | 277 ++++++++++++++++++++++-----------------
 src/iterators.rs         |   7 +-
 src/join.rs              |  12 +-
 src/lib.rs               | 114 +++++++++++++++-
 src/main/mod.rs          |   4 +-
 src/ranges/coitrees.rs   |   8 +-
 src/ranges/mod.rs        |   2 +-
 src/ranges/operations.rs |   8 +-
 src/ranges/vec.rs        |   2 +
 src/reporting.rs         |  11 +-
 src/test_utilities.rs    |   2 +-
 src/traits.rs            |  14 +-
 20 files changed, 562 insertions(+), 295 deletions(-)
 delete mode 100644 src/io/noodles.rs

diff --git a/Cargo.toml b/Cargo.toml
index d67871c..fb5ac8a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,10 +22,14 @@ noodles = { version = "0.63.0", features = ["core", "bed"] }
 rand = "0.8.5"
 tempfile = "3.10.0"
 thiserror = "1.0.57"
+polars = { version = "0.37.0", optional = true }
 
 [features]
-# cli = [ "clap" ] // TODO make feature
+# cli = [ "clap" ]  # TODO make feature
 dev-commands = [ ]
+test_bigranges = [] # NOTE: not used yet, for tests on large files
+polars = ["dep:polars"]
+
 
 [profile.release]
 opt-level = 3
diff --git a/src/commands.rs b/src/commands.rs
index 9440239..e239b26 100644
--- a/src/commands.rs
+++ b/src/commands.rs
@@ -1,7 +1,9 @@
+//! Command functions that implement each of the `granges` subcommands.
+
 use std::path::PathBuf;
 
 use crate::{
-    io::{parsers::GenomicRangesParser, OutputFile},
+    io::{parsers::GenomicRangesParser, OutputStream},
     prelude::*,
     ranges::{operations::adjust_range, GenomicRangeEmptyRecord, GenomicRangeRecord},
     reporting::{CommandOutput, Report},
@@ -10,13 +12,37 @@ use crate::{
     Position, PositionOffset,
 };
 
+/// An `enum` to indicate whether an streaming or in-memory algorithm should be used.
 #[derive(Clone)]
 pub enum ProcessingMode {
     Streaming,
     InMemory,
 }
 
-/// Adjust the genomic ranges in a bedfile by some specified amount.
+/// Adjusts genomic ranges in a BED file by a specified amount.
+///
+/// This function modifies the start and end positions of each range in the input BED file based on
+/// the provided offsets, ensuring the adjusted ranges do not exceed the sequence lengths specified
+/// in the `seqlens` file. If sorting is enabled, the output will be sorted based on the sequence names
+/// and start positions.
+///
+/// # Arguments
+///
+/// * `bedfile` - A reference to a `PathBuf` for the input BED file.
+/// * `seqlens` - A reference to a `PathBuf` for the file containing sequence lengths.
+/// * `both` - A [`PositionOffset`] specifying how much to adjust the start and end positions.
+/// * `output` - An optional reference to a `PathBuf` where the adjusted ranges will be written. Writes
+///   to stdout if `None`.
+/// * `sort` - A boolean indicating whether to sort the output.
+///
+/// # Returns
+///
+/// A `Result` wrapping [`CommandOutput<()>`] on success, or [`GRangesError`] on failure.
+///
+/// # Errors
+///
+/// Returns `GRangesError` if the input BED file or sequence lengths file cannot be read, or if
+/// an adjusted range exceeds the sequence boundaries.
 pub fn granges_adjust(
     bedfile: &PathBuf,
     seqlens: &PathBuf,
@@ -27,8 +53,8 @@ pub fn granges_adjust(
     let genome = read_seqlens(seqlens)?;
 
     // Setup Output stream -- header is None for now (TODO).
-    let output_stream = output.map_or(OutputFile::new_stdout(None), |file| {
-        OutputFile::new(file, None)
+    let output_stream = output.map_or(OutputStream::new_stdout(None), |file| {
+        OutputStream::new(file, None)
     });
     let mut writer = output_stream.writer()?;
 
@@ -76,6 +102,9 @@ pub fn granges_adjust(
                 let gr = GRangesEmpty::from_iter(iter, &genome)?;
                 gr.adjust_ranges(-both, both).to_tsv(output)?
             }
+            GenomicRangesParser::Bed5(iter) => {
+                unimplemented!()
+            }
             GenomicRangesParser::Bedlike(iter) => {
                 // Note the call to try_unwrap_data() here: this is because
                 // we know that the records *do* have data. Unwrapping the Option<String>
@@ -92,7 +121,28 @@ pub fn granges_adjust(
     Ok(CommandOutput::new((), report))
 }
 
-/// Retain only the ranges that have at least one overlap with another set of ranges.
+/// Filters genomic ranges based on overlaps with another set of ranges.
+///
+/// Retains only the ranges from the `left_path` file that have at least one overlap with
+/// the ranges in the `right_path` file. The function can optionally skip ranges that do not
+/// exist in the provided sequence lengths (e.g. a "genome" file in `bedtools` lingo).
+///
+/// # Arguments
+///
+/// * `seqlens` - A reference to a `PathBuf` for the file containing sequence lengths.
+/// * `left_path` - A reference to a `PathBuf` for the input BED file containing the ranges to filter.
+/// * `right_path` - A reference to a `PathBuf` for the BED file containing ranges to check for overlaps.
+/// * `output` - An optional reference to a `PathBuf` where the filtered ranges will be written. Writes
+///   to stdout if `None`.
+/// * `skip_missing` - A boolean indicating whether to skip ranges missing in the sequence lengths file.
+///
+/// # Returns
+///
+/// A `Result` wrapping [`CommandOutput<()>`] on success, or [`GRangesError`] on failure.
+///
+/// # Errors
+///
+/// Returns [`GRangesError`] if any input file cannot be read, or if there's an issue processing the ranges.
 pub fn granges_filter(
     seqlens: &PathBuf,
     left_path: &PathBuf,
@@ -198,7 +248,30 @@ pub fn granges_filter(
     }
 }
 
-/// Adjust the genomic ranges in a bedfile by some specified amount.
+/// Generates flanking regions for genomic ranges in a BED file.
+///
+/// For each range in the input BED file, this function computes the flanking regions based on
+/// the specified left and right offsets. The flanking regions are constrained by the sequence lengths
+/// provided in the `seqlens` file. The function supports both in-memory and streaming modes for processing.
+///
+/// # Arguments
+///
+/// * `seqlens` - A reference to a `PathBuf` for the file containing sequence lengths.
+/// * `bedfile` - A reference to a `PathBuf` for the input BED file.
+/// * `left` - An optional `Position` specifying the left flank size.
+/// * `right` - An optional `Position` specifying the right flank size.
+/// * `output` - An optional reference to a `PathBuf` for the output file. Writes to stdout if `None`.
+/// * `skip_missing` - A boolean indicating whether to skip ranges missing in the sequence lengths file.
+/// * `mode` - A [`ProcessingMode`] indicating whether to use in-memory or streaming processing.
+///
+/// # Returns
+///
+/// A `Result` wrapping [`CommandOutput<()>`] on success, or [`GRangesError`] on failure.
+///
+/// # Errors
+///
+/// Returns [`GRangesError`] if the input BED file or sequence lengths file cannot be read, or if there's
+/// an issue generating the flanking regions.
 pub fn granges_flank(
     seqlens: &PathBuf,
     bedfile: &PathBuf,
@@ -226,6 +299,9 @@ pub fn granges_flank(
                 };
                 gr.flanking_ranges(left, right)?.to_tsv(output)?
             }
+            GenomicRangesParser::Bed5(_iter) => {
+                unimplemented!()
+            }
             GenomicRangesParser::Bedlike(iter) => {
                 let gr = if skip_missing {
                     GRanges::from_iter(iter.try_unwrap_data().retain_seqnames(&seqnames), &genome)?
@@ -240,8 +316,8 @@ pub fn granges_flank(
         },
         ProcessingMode::Streaming => {
             // Setup Output stream -- header is None for now (TODO).
-            let output_stream = output.map_or(OutputFile::new_stdout(None), |file| {
-                OutputFile::new(file, None)
+            let output_stream = output.map_or(OutputStream::new_stdout(None), |file| {
+                OutputStream::new(file, None)
             });
             let mut writer = output_stream.writer()?;
 
@@ -278,6 +354,9 @@ pub fn granges_flank(
                         }
                     }
                 }
+                GenomicRangesParser::Bed5(_iter) => {
+                    unimplemented!()
+                }
                 GenomicRangesParser::Bedlike(iter) => {
                     if skip_missing {
                         for record in iter.retain_seqnames(&seqnames) {
@@ -318,114 +397,112 @@ pub fn granges_flank(
     Ok(CommandOutput::new((), report))
 }
 
-
 pub fn granges_map(
     seqlens: impl Into<PathBuf>,
     left_path: &PathBuf,
     right_path: &PathBuf,
     output: Option<&PathBuf>,
-    skip_missing: bool)
-    -> Result<CommandOutput<()>, GRangesError> {
-        let genome = read_seqlens(seqlens)?;
-        let seqnames: Vec<String> = genome.keys().cloned().collect();
-        let mut report = Report::new();
+    skip_missing: bool,
+) -> Result<CommandOutput<()>, GRangesError> {
+    let genome = read_seqlens(seqlens)?;
+    let seqnames: Vec<String> = genome.keys().cloned().collect();
+    let mut report = Report::new();
 
-        let left_iter = GenomicRangesFile::parsing_iterator(left_path)?;
-        let right_iter = GenomicRangesFile::parsing_iterator(right_path)?;
+    let left_iter = GenomicRangesFile::parsing_iterator(left_path)?;
+    let right_iter = GenomicRangesFile::parsing_iterator(right_path)?;
 
+    match (left_iter, right_iter) {
+        (GenomicRangesParser::Bed5(left), GenomicRangesParser::Bed5(right)) => {
+            let left_gr;
+            let right_gr;
 
-        match (left_iter, right_iter) {
-            (GenomicRangesParser::Bed5(left), GenomicRangesParser::Bed5(right)) => {
-                let left_gr;
-                let right_gr;
+            if skip_missing {
+                left_gr = GRanges::from_iter(left.retain_seqnames(&seqnames), &genome)?;
+                right_gr = GRanges::from_iter(right.retain_seqnames(&seqnames), &genome)?;
+            } else {
+                left_gr = GRanges::from_iter(left, &genome)?;
+                right_gr = GRanges::from_iter(right, &genome)?;
+            }
 
-                if skip_missing {
-                    left_gr = GRanges::from_iter(left.retain_seqnames(&seqnames), &genome)?;
-                    right_gr = GRanges::from_iter(right.retain_seqnames(&seqnames), &genome)?;
-                } else {
-                    left_gr = GRanges::from_iter(left, &genome)?;
-                    right_gr = GRanges::from_iter(right, &genome)?;
-                }
+            let right_gr = right_gr.into_coitrees()?;
 
-                let right_gr = right_gr.into_coitrees()?;
+            let left_join = left_gr.left_overlaps(&right_gr)?;
 
-                let left_join = left_gr.left_overlaps(&right_gr)?;
+            // TODO -- map function
+            // left_join.to_tsv(output)?;
 
-                left_join.to_tsv(output)?;
+            Ok(CommandOutput::new((), report))
+        }
+        (GenomicRangesParser::Bed3(left), GenomicRangesParser::Bedlike(right)) => {
+            todo!();
+            let left_gr;
+            let right_gr;
 
-                Ok(CommandOutput::new((), report))
+            if skip_missing {
+                left_gr = GRangesEmpty::from_iter(left.retain_seqnames(&seqnames), &genome)?;
+                right_gr = GRanges::from_iter(
+                    right.try_unwrap_data().retain_seqnames(&seqnames),
+                    &genome,
+                )?;
+            } else {
+                left_gr = GRangesEmpty::from_iter(left, &genome)?;
+                right_gr = GRanges::from_iter(right.try_unwrap_data(), &genome)?;
             }
-            (GenomicRangesParser::Bed3(left), GenomicRangesParser::Bedlike(right)) => {
-                todo!();
-                let left_gr;
-                let right_gr;
-
-                if skip_missing {
-                    left_gr = GRangesEmpty::from_iter(left.retain_seqnames(&seqnames), &genome)?;
-                    right_gr = GRanges::from_iter(
-                        right.try_unwrap_data().retain_seqnames(&seqnames),
-                        &genome,
-                        )?;
-                } else {
-                    left_gr = GRangesEmpty::from_iter(left, &genome)?;
-                    right_gr = GRanges::from_iter(right.try_unwrap_data(), &genome)?;
-                }
 
-                let right_gr = right_gr.into_coitrees()?;
+            let right_gr = right_gr.into_coitrees()?;
 
-                let intersection = left_gr.filter_overlaps(&right_gr)?;
-                intersection.to_tsv(output)?;
+            let intersection = left_gr.filter_overlaps(&right_gr)?;
+            intersection.to_tsv(output)?;
 
-                Ok(CommandOutput::new((), report))
+            Ok(CommandOutput::new((), report))
+        }
+        (GenomicRangesParser::Bedlike(left), GenomicRangesParser::Bed3(right)) => {
+            let left_gr;
+            let right_gr;
+
+            if skip_missing {
+                left_gr =
+                    GRanges::from_iter(left.try_unwrap_data().retain_seqnames(&seqnames), &genome)?;
+                right_gr = GRangesEmpty::from_iter(right.retain_seqnames(&seqnames), &genome)?;
+            } else {
+                left_gr = GRanges::from_iter(left.try_unwrap_data(), &genome)?;
+                right_gr = GRangesEmpty::from_iter(right, &genome)?;
             }
-            (GenomicRangesParser::Bedlike(left), GenomicRangesParser::Bed3(right)) => {
-                let left_gr;
-                let right_gr;
-
-                if skip_missing {
-                    left_gr =
-                        GRanges::from_iter(left.try_unwrap_data().retain_seqnames(&seqnames), &genome)?;
-                    right_gr = GRangesEmpty::from_iter(right.retain_seqnames(&seqnames), &genome)?;
-                } else {
-                    left_gr = GRanges::from_iter(left.try_unwrap_data(), &genome)?;
-                    right_gr = GRangesEmpty::from_iter(right, &genome)?;
-                }
 
-                let right_gr = right_gr.into_coitrees()?;
+            let right_gr = right_gr.into_coitrees()?;
+
+            let intersection = left_gr.filter_overlaps(&right_gr)?;
+            intersection.to_tsv(output)?;
 
-                let intersection = left_gr.filter_overlaps(&right_gr)?;
-                intersection.to_tsv(output)?;
+            Ok(CommandOutput::new((), report))
+        }
+        (GenomicRangesParser::Bedlike(left), GenomicRangesParser::Bedlike(right)) => {
+            todo!();
+            let left_gr;
+            let right_gr;
 
-                Ok(CommandOutput::new((), report))
+            if skip_missing {
+                left_gr =
+                    GRanges::from_iter(left.try_unwrap_data().retain_seqnames(&seqnames), &genome)?;
+                right_gr = GRanges::from_iter(
+                    right.try_unwrap_data().retain_seqnames(&seqnames),
+                    &genome,
+                )?;
+            } else {
+                left_gr = GRanges::from_iter(left.try_unwrap_data(), &genome)?;
+                right_gr = GRanges::from_iter(right.try_unwrap_data(), &genome)?;
             }
-            (GenomicRangesParser::Bedlike(left), GenomicRangesParser::Bedlike(right)) => {
-                todo!();
-                let left_gr;
-                let right_gr;
-
-                if skip_missing {
-                    left_gr =
-                        GRanges::from_iter(left.try_unwrap_data().retain_seqnames(&seqnames), &genome)?;
-                    right_gr = GRanges::from_iter(
-                        right.try_unwrap_data().retain_seqnames(&seqnames),
-                        &genome,
-                        )?;
-                } else {
-                    left_gr = GRanges::from_iter(left.try_unwrap_data(), &genome)?;
-                    right_gr = GRanges::from_iter(right.try_unwrap_data(), &genome)?;
-                }
 
-                let right_gr = right_gr.into_coitrees()?;
+            let right_gr = right_gr.into_coitrees()?;
 
-                let intersection = left_gr.filter_overlaps(&right_gr)?;
-                intersection.to_tsv(output)?;
+            let intersection = left_gr.filter_overlaps(&right_gr)?;
+            intersection.to_tsv(output)?;
 
-                Ok(CommandOutput::new((), report))
-            }
-            _ => Err(GRangesError::UnsupportedGenomicRangesFileFormat),
+            Ok(CommandOutput::new((), report))
         }
+        _ => Err(GRangesError::UnsupportedGenomicRangesFileFormat),
     }
-
+}
 
 /// Generate a random BED-like file with genomic ranges.
 pub fn granges_random_bed(
@@ -433,7 +510,7 @@ pub fn granges_random_bed(
     num: usize,
     output: Option<impl Into<PathBuf>>,
     sort: bool,
-    ) -> Result<CommandOutput<()>, GRangesError> {
+) -> Result<CommandOutput<()>, GRangesError> {
     // get the genome info
     let genome = read_seqlens(seqlens)?;
 
diff --git a/src/data/mod.rs b/src/data/mod.rs
index 3c031c1..1cea0b5 100644
--- a/src/data/mod.rs
+++ b/src/data/mod.rs
@@ -1,4 +1,5 @@
 //! Data container implementations.
+//!
 
 use crate::traits::DataContainer;
 
diff --git a/src/error.rs b/src/error.rs
index da84761..0f15f26 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -1,10 +1,12 @@
-use std::num::{ParseFloatError, ParseIntError};
-
+//! The [`GRangesError`] `enum` definition and error messages.
+//!
+use crate::Position;
 use genomap::GenomeMapError;
+use std::num::{ParseFloatError, ParseIntError};
 use thiserror::Error;
 
-use crate::Position;
-
+/// The [`GRangesError`] defines the standard set of errors that should
+/// be passed to the user.
 #[derive(Debug, Error)]
 pub enum GRangesError {
     // IO related errors
diff --git a/src/granges.rs b/src/granges.rs
index 83f921f..ed191d4 100644
--- a/src/granges.rs
+++ b/src/granges.rs
@@ -1,33 +1,50 @@
-//! # Design
+//! The [`GRanges<R, T>`] and [`GRangesEmpty`] types, and associated functionality.
 //!
 //!
+//!                                  +----------------------+
+//!                                  |     GRanges<R, T>    |
+//!                                  |        object        |
+//!                                  +----------------------+
+//!                                              /\
+//!                                             /  \
+//!                                            /    \
+//!                                           v      v
+//!                               +-----------+      +-------------+
+//!                               |   Ranges  |      |    Data     |
+//!                               | container |      |  container  |
+//!                               |    (R)    |      |      (T)    |
+//!                               +-----------+      +-------------+
+//!
 //! # [`GRanges<R, T>`] Generic Types
 //!
 //! [`GRanges<R, T>`] types are generic over:
 //!
-//! 1. Their **range container** (`R`). This is because different operations to be fast, and thus
-//!    need different data structures. The [`GRanges`] methods allow for efficient conversion of
-//!    one range type to another.
+//! 1. Their **range container** (`R`). This is because for some range operations to be fast, they
+//!    need to be converted to different data structures. The [`GRanges`] methods allow for efficient
+//!    conversion of one range type to another (e.g. [`GRanges::into_coitrees()`].
 //!
 //! 2. The *optional* **data container** (`T`). The data container exists if the ranges have some
 //!    associated data. When range *do* have data, there is an index that associates each range
 //!    with its data element in the data container.
 //!
-//! This brings up the most important thing to know about working with the GRanges library: because
-//! of the emphasis on on knowing all types at compile-time (which has performance benefits over
-//! runtime type polymorphism), it must be known at compile-time whether ranges have data or not.
+//! Because GRanges is designed to be a compile-time library (which has performance benefits over
+//! runtime type polymorphism), it must be known at compile-time whether a [`GRanges<R, T>`] container
+//! will have associated data or not. The special [`GRangesEmpty`] type represents (and wraps)
+//! [`GRanges<R, T>`] objects that do not have data.
+//!
 //!
-//! In most applications this is known: a user specifies, for example, a GTF file and the contents
-//! are parsed and processed accordingly. However, it's not unfeasible to imagine that one would
-//! need runtime "polymorphism" over BED3 input (which would lead to a [`GRanges`] object without
-//! data) and BED* (e.g. BED5, BED12, etc) input. (For example, the GRanges command line tool
-//! `granges` runs into this problem — see it's implementation for examples.)
 //!
-//! These two possibilities are handled with two differently typed parsing iterators:
-//! [`Bed3Iterator`] and [`BedlikeIterator`]. These yield different parsed range types,
-//! [`GenomicRangeEmptyRecord`] and [`GenomicRangeRecord`], respectively.
 //!
+//! TODO
 //!
+//! **High-level data types**: A key feature of GRanges design is that a single type of ranges are
+//! contained in the range containers. By knowing that every range in a range container either has
+//! an index to a data element in the data container or it does not ahead of time simplifies
+//! downstream ergonomics tremendously.
+//!
+//! **Emphasis on compile-time**: For this, let's consider a common problem: a bioinformatics tool
+//! needs to read in a BED-like file that has a variable, unknown at compile time, number of
+//! columns.
 //!
 //! This is an important concept when working with [`GRanges<R, T>`] types:
 //!
@@ -55,6 +72,10 @@
 //!
 //! Because at compile-time, the types *need* to be known, there are a few options here.
 //!
+//!
+//! [`Bed3Iterator`]: crate::io::parsers::Bed3Iterator
+//! [`BedlikeIterator`]: crate::io::parsers::BedlikeIterator
+//! [`GRanges::into_coitrees`]: crate::granges::GRanges::into_coitrees
 
 use std::{collections::HashSet, path::PathBuf};
 
@@ -64,7 +85,7 @@ use indexmap::IndexMap;
 
 use crate::{
     ensure_eq,
-    io::OutputFile,
+    io::OutputStream,
     iterators::GRangesIterator,
     join::{JoinData, LeftGroupedJoin},
     prelude::GRangesError,
@@ -143,7 +164,7 @@ impl<'a, C> AsGRangesRef<'a, C, ()> for GRangesEmpty<C> {
 
 impl<'a, C, T> AsGRangesRef<'a, C, T> for GRanges<C, T> {
     /// Return a reference of a [`GRanges<C, T>`] object. This is essentially
-    /// a pass-through method. [`IntoGRangesRef`] is not needed in this case,
+    /// a pass-through method. [`AsGRangesRef`] is not needed in this case,
     /// but is needed elsewhere (see the implementation for [`GRangesEmpty`]) to
     /// improve the ergonomics of working with [`GRanges`] and [`GRangesEmpty`] types.
     fn as_granges_ref(&'a self) -> &'a GRanges<C, T> {
@@ -195,8 +216,8 @@ where
     /// Write
     fn to_tsv(&'a self, output: Option<impl Into<PathBuf>>) -> Result<(), GRangesError> {
         // output stream -- header is None for now (TODO)
-        let output = output.map_or(OutputFile::new_stdout(None), |file| {
-            OutputFile::new(file, None)
+        let output = output.map_or(OutputStream::new_stdout(None), |file| {
+            OutputStream::new(file, None)
         });
         let mut writer = output.writer()?;
 
@@ -214,8 +235,8 @@ impl<'a, R: IterableRangeContainer> GenomicRangesTsvSerialize<'a, R> for GRanges
     /// Output a BED3 file for for this data-less [`GRanges<R, ()>`].
     fn to_tsv(&'a self, output: Option<impl Into<PathBuf>>) -> Result<(), GRangesError> {
         // output stream -- header is None for now (TODO)
-        let output = output.map_or(OutputFile::new_stdout(None), |file| {
-            OutputFile::new(file, None)
+        let output = output.map_or(OutputStream::new_stdout(None), |file| {
+            OutputStream::new(file, None)
         });
         let mut writer = output.writer()?;
 
@@ -505,7 +526,6 @@ impl<'a, DL, DR> GRanges<VecRangesIndexed, JoinData<'a, DL, DR>> {
         data: LeftGroupedJoin,
     ) -> Result<(), GRangesError> {
         if self.data.is_none() {
-            // unlike push_range()
             panic!("Internal error: JoinData not initialized.");
         }
         let data_ref = self.data.as_mut().ok_or(GRangesError::NoDataContainer)?;
@@ -535,8 +555,8 @@ where
     /// [`TsvSerialize`] trait methods defiend for the items in `T`.
     pub fn to_tsv(&'a self, output: Option<impl Into<PathBuf>>) -> Result<(), GRangesError> {
         // output stream -- header is None for now (TODO)
-        let output = output.map_or(OutputFile::new_stdout(None), |file| {
-            OutputFile::new(file, None)
+        let output = output.map_or(OutputStream::new_stdout(None), |file| {
+            OutputStream::new(file, None)
         });
         let mut writer = output.writer()?;
 
@@ -550,10 +570,10 @@ where
     }
 
     /// Conduct a left overlap join, consuming self and returning a new
-    /// [`GRanges<VecRangesIndexed, JoinData>>`].
+    /// [`GRanges<VecRangesIndexed, JoinData>`].
     ///
     /// The [`JoinData`] container contains references to both left and right
-    /// data containers and a [`Vec<OverlapJoin>`]. Each [`OverlapJoin`] represents
+    /// data containers and a [`Vec<LeftGroupedJoin>`]. Each [`LeftGroupedJoin`] represents
     /// a summary of an overlap, which downstream operations use to calculate
     /// statistics using the information about overlaps.
     pub fn left_overlaps<M: Clone + 'a, DR: 'a>(
@@ -646,10 +666,10 @@ impl<T> GRanges<VecRanges<RangeIndexed>, T> {
 
 impl<'a> GRangesEmpty<VecRangesEmpty> {
     /// Conduct a left overlap join, consuming self and returning a new
-    /// [`GRanges<VecRangesIndexed, JoinData>>`].
+    /// [`GRanges<VecRangesIndexed, JoinData>`].
     ///
     /// The [`JoinData`] container contains references to both left and right
-    /// data containers and a [`Vec<OverlapJoin>`]. Each [`OverlapJoin`] represents
+    /// data containers and a [`Vec<LeftGroupedJoin>`]. Each [`LeftGroupedJoin`] represents
     /// a summary of an overlap, which downstream operations use to calculate
     /// statistics using the information about overlaps.
     pub fn left_overlaps<M: Clone + 'a, DR: 'a>(
@@ -809,7 +829,7 @@ where
     /// Science](https://r4ds.hadley.nz/joins.html#filtering-joins) for more information.
     ///
     /// Note that this consumes the `self` [`GRanges`] object, turning it into a new
-    /// [`GRanges<VecRangesIndexed, Vec<U>`]. The data container is rebuilt from indices
+    /// [`GRanges<VecRangesIndexed, Vec<U>>`]. The data container is rebuilt from indices
     /// into a new [`Vec<U>`] where `U` is the associated type [`IndexedDataContainer::Item`],
     /// which represents the individual data element in the data container.
     pub fn filter_overlaps<'a, M: Clone + 'a, DR: 'a>(
diff --git a/src/io/file.rs b/src/io/file.rs
index 7fff865..f65c2ea 100644
--- a/src/io/file.rs
+++ b/src/io/file.rs
@@ -1,4 +1,4 @@
-//! Input/Output file handling with [`InputFile`] and [`OutputFile`].
+//! Input/Output file handling with [`InputStream`] and [`OutputStream`].
 //!
 //! These types abstract over reading/writing both plaintext and gzip-compressed
 //! input/output.
@@ -20,7 +20,7 @@ use crate::Position;
 pub fn read_seqlens(
     filepath: impl Into<PathBuf>,
 ) -> Result<IndexMap<String, Position>, GRangesError> {
-    let input_file = InputFile::new(filepath);
+    let input_file = InputStream::new(filepath);
     let reader = input_file.reader()?;
 
     let mut seqlens = IndexMap::new();
@@ -55,20 +55,20 @@ fn is_gzipped_file(file_path: impl Into<PathBuf>) -> io::Result<bool> {
 /// This abstracts how data is read in, allowing for both plaintext and gzip-compressed input
 /// to be read through a common interface.
 #[derive(Clone, Debug)]
-pub struct InputFile {
+pub struct InputStream {
     pub filepath: PathBuf,
     pub comments: Option<Vec<String>>,
     pub header: Option<String>,
     pub skip_lines: usize,
 }
 
-impl InputFile {
-    /// Constructs a new `InputFile`.
+impl InputStream {
+    /// Constructs a new `InputStream`.
     ///
     /// # Arguments
     ///
     /// * `filepath` - A string slice that holds the path to the file. If the file extension is
-    /// `.gz`, `InputFile` will automatically uncompress the input.
+    /// `.gz`, `InputStream` will automatically uncompress the input.
     pub fn new(filepath: impl Into<PathBuf>) -> Self {
         Self {
             filepath: filepath.into(),
@@ -167,18 +167,18 @@ enum OutputDestination {
 ///
 /// This struct is used to handle operations on an output file, such as writing to the file.
 /// This abstracts writing both plaintext and gzip-compressed files.
-pub struct OutputFile {
+pub struct OutputStream {
     destination: OutputDestination,
     pub header: Option<Vec<String>>,
 }
 
-impl OutputFile {
-    /// Constructs a new `OutputFile`.
+impl OutputStream {
+    /// Constructs a new `OutputStream`.
     ///
     /// # Arguments
     ///
     /// * `filepath` - A string slice that holds the path to the file. If the file extension is
-    /// `.gz`, `OutputFile` will automatically write gzip-compressed output.
+    /// `.gz`, `OutputStream` will automatically write gzip-compressed output.
     /// * `header` - An optional vector of strings representing commented header lines to be written to the file.
     pub fn new(filepath: impl Into<PathBuf>, header: Option<Vec<String>>) -> Self {
         Self {
@@ -187,7 +187,7 @@ impl OutputFile {
         }
     }
 
-    /// Constructs a new [`OutputFile`] for standard output.
+    /// Constructs a new [`OutputStream`] for standard output.
     pub fn new_stdout(header: Option<Vec<String>>) -> Self {
         Self {
             destination: OutputDestination::Stdout,
diff --git a/src/io/mod.rs b/src/io/mod.rs
index 2b377c4..43df647 100644
--- a/src/io/mod.rs
+++ b/src/io/mod.rs
@@ -1,11 +1,9 @@
-//! Input/Output
-//!
+//! Types and methods for reading and parsing input and writing output.
 
 pub mod file;
-pub mod noodles;
 pub mod parsers;
 
-pub use file::{InputFile, OutputFile};
+pub use file::{InputStream, OutputStream};
 pub use parsers::{
     Bed3Iterator, BedlikeIterator, GenomicRangesFile, GenomicRangesParser, TsvRecordIterator,
 };
diff --git a/src/io/noodles.rs b/src/io/noodles.rs
deleted file mode 100644
index 9e70cdb..0000000
--- a/src/io/noodles.rs
+++ /dev/null
@@ -1,16 +0,0 @@
-//!
-
-use crate::Position;
-use noodles::core::Position as NoodlesPosition;
-
-/// Convert from [`noodles::core::Position`], which is [1-based
-/// indexing](https://docs.rs/noodles-core/latest/noodles_core/position/struct.Position.html)
-/// and right-to GRanges internal 0-based indexing.
-///
-/// # Developers Note
-/// See: https://github.com/zaeleus/noodles/issues/226
-pub fn convert_noodles_range(start: NoodlesPosition, end: NoodlesPosition) -> (Position, Position) {
-    let start_one_based: Position = start.get().try_into().unwrap();
-    let end_one_based: Position = end.get().try_into().unwrap();
-    (start_one_based - 1, end_one_based)
-}
diff --git a/src/io/parsers.rs b/src/io/parsers.rs
index 3633fc0..263d11b 100644
--- a/src/io/parsers.rs
+++ b/src/io/parsers.rs
@@ -1,49 +1,80 @@
-//! Functionality for general parsing, by turning BED-like files into iterators.
+//! Various types of parsing iterators for range formats.
 //!
-//! ## Parsers
+//! To work with genomic data in GRanges, one first needs to read it off disk and parse it. This is
+//! done with *parsing iterators*. These iterators work on both plaintext and gzip-compressed
+//! files (and bgzip and binary format support will hopefully be added). Each row of a file is
+//! yielded as a particular parsed range type, which then can be filtered or altered in while
+//! in the iterator, using Rust's powerful [`Iterator`] trait methods.
 //!
-//! Parsers in the GRanges library are designed to be:
+//! ## Parsing Iterator Design
 //!
-//!  1. *Iterator-based*: GRanges parsers are iterators, allow entries to be filtered or
+//! The key design elements of parsing iterators are:
+//!
+//!  1. *Iterators*: GRanges parsers are iterators, allow entries to be filtered or
 //!     manipulated on the fly using Rust's iterator methods like [`Iterator.filter`].
+//!     Additionally, the trait [`GeneralRangeRecordIterator`] adds additional convenience methods
+//!     for commmon genomic tasks, like filtering based on chromosome name.
 //!
-//!  2. *Lazy*: GRanges parsers are lazy. Since all BED file formats are a superset of BED3, we can
-//!     parse the range components out of the first three columns and then just store the remaining
-//!     unparsed part of the line in a `String`.
+//!  2. *Lazy*: GRanges parsers are build on lazy parsers. Since all BED file formats are a
+//!     superset of BED3, we can parse the range components out of the first three columns and then
+//!     just store the remaining unparsed part of the line in a `String`. Other formats like GTF
+//!     are similar to BED: we can parse the range components of the file first, and parse the
+//!     remaining data later *if needed*.
 //!
 //!  3. *Permissive*: All GRanges parsers are built off of the [`TsvRecordIterator`], which reads
-//!     plaintext and gzip-compressed files and parses them according to a specified function. This
-//!     general parser should be able to accomodate every line-based range bioinformatics format
+//!     plaintext and gzip-compressed files and parses the record according to a specified function.
+//!     This general parser should be able to accomodate every line-based range bioinformatics format
 //!     (BED3, BED5, GTF, GFF, VCF, etc). Downstream users can implement their own specific parsers
 //!     for these variants (please feel free to contribute a parser for a commonly-used variant to
 //!     GRanges too!).
 //!
-//! Since GRanges is fundamentally about manipulating genomic range data, all parsers output to the
-//! same record type: [`GenomicRangeRecord<Option<String>>`], which is generic over the data. The
-//! [`Option<String>`] here is because the lazy parser *at compile time* does not know how many
-//! columns it will encounter. If only three columns are encountered (e.g. a BED3 file), the data
-//! in this [`GenomicRangeRecord`] are all `None`. Then, the ranges that go into [`GRanges`] object
-//! do not have indices, since there is not data container.
+//! ## Parsing Iterator Item Types
+//!
+//! These are the generic types that the iterator yields for each call of [`Iterator::next()`],
+//! which represent a single row of data.
+//!
+//! There are two key item types that flow through parsing iterators, each representing a row of a
+//! different kind of data file.
+//!
+//!  1. [`GenomicRangeEmptyRecord`], when a BED3 is loaded. This type indicates the incoming ranges
+//!     *do not* have associated data that would go into a data container. It is *empty* of data.
+//!
+//!  2. [`GenomicRangeRecord<Option<String>>`], which is a range record *possibly* (hence,
+//!     the [`Option`]) with remaining *unparsed* `String` data (e.g. the remaining unparsed
+//!     columns of a VCF file).
 //!
-//! Otherwise, if there *is* data, this data needs to be pushed to the data container, and the
-//! indexed range type ([`RangeIndexed`]) is used in the range containers.
+//! Most genomic file formats can be thought of as a genomic range with some associated data,
+//! for example:
 //!
-//! While handling of this at compile time leads to very performant code with lower memory
-//! overhead, it has the downside that *we must handle both types at compile time*.
+//!  - VCF files will have information on the alleles and a vector of genotypes.
 //!
+//!  - GTF/GFF files store details about the annotated feature (e.g. name, protein ID, etc).
 //!
-//! # Working Downstream of Parsers
+//! Thus, most parsing iterators will yield this second type, [`GenomicRangeRecord<Option<String>>`].
 //!
-//! Often at runtime the exact file format may not be known. A user could specify a BED3 file,
-//! which only contains ranges and no data, or a BED* of BED-like file (see terminology below).
-//! Since GRanges aims to handle most situations *at compile time*, it must handle the process of
-//! figuring out how to take an iterator of [`GenomicRangeRecord<U>`] entries and
+//! However, often empty ranges (those that do not carry any data) are useful: for example, ranges
+//! that just define genomic windows at regular widths, or masked repeat ranges. In this case, when
+//! data is loaded into a [`GRanges<R, T>`] type, the data container type `T` is `()`. This is a
+//! degenerate case that GRanges handles with a specific type called [`GRangesEmpty`], which indicates
+//! the ranges in the ranges container in [`GRangesEmpty`] do not have any associated data.
 //!
+//! In most cases, people working with genomic file formats will know its type at compile-time.
+//! However, if this isn't the case, see [⚙️  Handling Range Data when the type isn't know at runtime] below.
 //!
+//! # ⚙️  Handling Range Data when the type isn't know at runtime
 //!
-//! ## Terminology
+//! Because GRanges is a compile-time library, the type of incoming data is not runtime-polymorphic.
+//! GRanges implements file detection methods to infer the filetype from the extension and
+//! does additional type validation checks. When you want to use GRanges to handle a type
+//! not known at compile-time, you will need to use `match` statements against the parsing
+//! iterator types defined in [`GenomicRangesParser`]. The best examples of this are the
+//! `granges` subcommands implementations.
 //!
-//!  - BED3 - BED* - BED-like
+//! ## File Format Terminology
+//!
+//!  - BED3
+//!  - BED*
+//!  - BED-like
 //!
 //!
 //! : it could be ranges-only (i.e. a BED3), or contain data (e.g. BED5). The lazy BED parser will
@@ -61,13 +92,17 @@
 //!
 //! # BED-like File Parser Design
 //!
-//! All BED formats (BED3, BED5, etc). are built upon a BED3. Often when working with these types
+//! All BED formats (BED3, BED5, etc) are built upon a BED3. Often when working with these types
 //! of formats, many operations do not immediately require full parsing of the line, past the
 //! *range components*. This is because downstream operations may immediately filter away entries
 //! (e.g. based on width), or do an overlap operation, and then filter away entries based on some
 //! overlap criteria. Either way, it may be advantageous to work with ranges that just store some
 //! generic data.
 //!
+//! [`GRanges`]: crate::granges::GRanges
+//! [`GRanges<R, T>`]: crate::granges::GRanges
+//! [`GRangesEmpty`]: crate::granges::GRangesEmpty
+//!
 
 use std::collections::HashSet;
 use std::io::{BufRead, BufReader};
@@ -75,7 +110,7 @@ use std::marker::PhantomData;
 use std::path::{Path, PathBuf};
 
 use crate::error::GRangesError;
-use crate::io::file::InputFile;
+use crate::io::file::InputStream;
 use crate::ranges::{GenomicRangeEmptyRecord, GenomicRangeRecord};
 use crate::traits::{GeneralRangeRecordIterator, GenomicRangeRecordUnwrappable, TsvSerialize};
 use crate::Position;
@@ -116,7 +151,7 @@ fn get_base_extension<P: AsRef<Path>>(filepath: P) -> Option<String> {
 /// file, i.e. the first column is there (there are no reasonable checks
 /// for sequence names other than presence), and the next to columns can
 /// be parsed into a [`Position`].
-fn valid_bedlike(input_file: &mut InputFile) -> Result<bool, GRangesError> {
+fn valid_bedlike(input_file: &mut InputStream) -> Result<bool, GRangesError> {
     let _metadata = input_file.collect_metadata("#", None)?;
     let mut reader = input_file.continue_reading()?;
     let mut first_line = String::new();
@@ -144,7 +179,7 @@ fn valid_bedlike(input_file: &mut InputFile) -> Result<bool, GRangesError> {
 }
 
 // TODO: we can combine this and the above into an enum
-fn valid_bed5(input_file: &mut InputFile) -> Result<bool, GRangesError> {
+fn valid_bed5(input_file: &mut InputStream) -> Result<bool, GRangesError> {
     let _metadata = input_file.collect_metadata("#", None)?;
     let mut reader = input_file.continue_reading()?;
     let mut first_line = String::new();
@@ -167,7 +202,6 @@ fn valid_bed5(input_file: &mut InputFile) -> Result<bool, GRangesError> {
     Ok(false)
 }
 
-
 /// Enum that connects a genomic ranges file type to its specific parser.
 #[derive(Debug)]
 pub enum GenomicRangesParser {
@@ -209,15 +243,17 @@ impl GenomicRangesFile {
     ///     columns are BED3) will have the type [`GenomicRangesFile::Bed3`]. This
     ///     is because downstream [`GRanges`] operations need to know if any
     ///     additional data is present, which would need to be put in a data container.
-    ///  4. If the file type does not satisfy any of the rules above, it is
+    ///  4. BED5 files, which are BED3 + a *feature name* and a *strand* column.
+    ///  5. If the file type does not satisfy any of the rules above, it is
     ///     [`GenomicRangesFile::Unsupported`].
     ///
     /// See the `match` statement in the source code for the exact rules. Additional
     /// genomic range file formats like GTF/GFF can easily be added later.
     ///
+    /// [`GRanges`]: crate::granges::GRanges
     pub fn detect(filepath: impl Into<PathBuf>) -> Result<Self, GRangesError> {
         let path: PathBuf = filepath.into();
-        let mut input_file = InputFile::new(&path);
+        let mut input_file = InputStream::new(&path);
         let _metadata = input_file.collect_metadata("#", None)?;
         let number_columns = input_file.detect_columns("\t")?;
 
@@ -228,11 +264,16 @@ impl GenomicRangesFile {
         // test if the first row can be parsed into a BED-like file format.
         let is_valid_bedlike = valid_bedlike(&mut input_file)?;
         let is_valid_bed5 = valid_bed5(&mut input_file)?; // TODO OPTIMIZE merge with above
-        let file_type = match (extension.as_str(), number_columns, is_valid_bedlike, is_valid_bed5) {
+        let file_type = match (
+            extension.as_str(),
+            number_columns,
+            is_valid_bedlike,
+            is_valid_bed5,
+        ) {
             ("bed", 3, true, false) => GenomicRangesFile::Bed3(path),
             ("tsv", 3, true, false) => GenomicRangesFile::Bed3(path),
             ("bed", 5, true, true) => GenomicRangesFile::Bed5(path),
-            ("bed", n, true, false,) if n > 3 => GenomicRangesFile::Bedlike(path),
+            ("bed", n, true, false) if n > 3 => GenomicRangesFile::Bedlike(path),
             ("tsv", n, true, false) if n > 3 => GenomicRangesFile::Bedlike(path),
             _ => GenomicRangesFile::Unsupported,
         };
@@ -242,11 +283,11 @@ impl GenomicRangesFile {
     /// Detect the genomic range filetype and link it to its parsing iterator, or raise an error
     /// if the filetype is not supported.
     ///
-    /// This returns a [`GenomicRangesParsers`] enum, since the parsing iterator filetype
+    /// This returns a [`GenomicRangesParser`] enum, since the parsing iterator filetype
     /// cannot be known at compile time.
     pub fn parsing_iterator(
         filepath: impl Clone + Into<PathBuf>,
-        ) -> Result<GenomicRangesParser, GRangesError> {
+    ) -> Result<GenomicRangesParser, GRangesError> {
         let path = filepath.into();
         match Self::detect(path)? {
             GenomicRangesFile::Bed3(path) => {
@@ -264,7 +305,7 @@ impl GenomicRangesFile {
 }
 
 /// An extensible TSV parser, which uses a supplied parser function to
-/// convert a line into a [`RangeRecord<U>`], a range with generic associated
+/// convert a line into a [`GenomicRangeRecord<U>`], a range with generic associated
 /// data.
 pub struct TsvRecordIterator<F, R> {
     reader: BufReader<Box<dyn std::io::Read>>,
@@ -281,12 +322,12 @@ impl<F, R> std::fmt::Debug for TsvRecordIterator<F, R> {
 
 impl<F, R> TsvRecordIterator<F, R>
 where
-F: Fn(&str) -> Result<R, GRangesError>,
+    F: Fn(&str) -> Result<R, GRangesError>,
 {
     /// Create a new [`TsvRecordIterator`], which parses lines from the supplied
-    /// file path into [`RangeRecord<U>`] using the specified parsing function.
+    /// file path into [`GenomicRangeRecord<U>`] using the specified parsing function.
     pub fn new(filepath: impl Into<PathBuf>, parser: F) -> Result<Self, GRangesError> {
-        let mut input_file = InputFile::new(filepath);
+        let mut input_file = InputStream::new(filepath);
         let _has_metadata = input_file.collect_metadata("#", None);
         let num_columns = input_file.detect_columns("\t")?;
         let reader = input_file.continue_reading()?;
@@ -302,7 +343,7 @@ F: Fn(&str) -> Result<R, GRangesError>,
 
 impl<F, R> Iterator for TsvRecordIterator<F, R>
 where
-F: Fn(&str) -> Result<R, GRangesError>,
+    F: Fn(&str) -> Result<R, GRangesError>,
 {
     type Item = Result<R, GRangesError>;
 
@@ -324,18 +365,15 @@ pub enum GenomicRangesIteratorVariant {
     Empty(Box<dyn Iterator<Item = Result<GenomicRangeRecord<()>, GRangesError>>>),
 }
 
-/// A lazy parser for BED3 (ranges only) files. This parses the first three columns.
-/// yields [`GenomicRangeRecord<>>`] entries. If the file is a BED3 file,
-/// the data in the [`GenomicRangeRecord`] will be set to `None`, since there are no remaining
-/// string columns to parse.
-///
+/// A lazy parser for BED3 (ranges only) files. This parses the first three columns,
+/// yielding a [`GenomicRangeRecord`] items.
 #[allow(clippy::type_complexity)]
 #[derive(Debug)]
 pub struct Bed3Iterator {
     iter: TsvRecordIterator<
         fn(&str) -> Result<GenomicRangeEmptyRecord, GRangesError>,
         GenomicRangeEmptyRecord,
-        >,
+    >,
 }
 
 impl Bed3Iterator {
@@ -359,7 +397,6 @@ impl Iterator for Bed3Iterator {
     }
 }
 
-
 /// Nucleotide strand enum type.
 #[derive(Clone, Debug)]
 pub enum Strand {
@@ -384,6 +421,12 @@ pub struct Bed5Addition {
     strand: Option<Strand>,
 }
 
+impl TsvSerialize for &Bed5Addition {
+    fn to_tsv(&self) -> String {
+        format!("{}\t{}", self.name, self.strand.to_tsv())
+    }
+}
+
 impl TsvSerialize for Bed5Addition {
     fn to_tsv(&self) -> String {
         format!("{}\t{}", self.name, self.strand.to_tsv())
@@ -400,7 +443,7 @@ pub struct Bed5Iterator {
     iter: TsvRecordIterator<
         fn(&str) -> Result<GenomicRangeRecord<Bed5Addition>, GRangesError>,
         GenomicRangeRecord<Bed5Addition>,
-        >,
+    >,
 }
 
 impl Bed5Iterator {
@@ -424,7 +467,6 @@ impl Iterator for Bed5Iterator {
     }
 }
 
-
 /// A lazy parser for BED-like files. This lazily parses only the first three columns, and
 /// yields [`GenomicRangeRecord<Option<String>>`] entries. If the file is a BED3 file,
 /// the data in the [`GenomicRangeRecord`] will be set to `None`, since there are no remaining
@@ -436,7 +478,7 @@ pub struct BedlikeIterator {
     iter: TsvRecordIterator<
         fn(&str) -> Result<GenomicRangeRecord<Option<String>>, GRangesError>,
         GenomicRangeRecord<Option<String>>,
-        >,
+    >,
 }
 
 impl BedlikeIterator {
@@ -472,13 +514,13 @@ impl BedlikeIterator {
             result
                 .map(|record| {
                     Ok(GenomicRangeRecord::new(
-                            record.seqname,
-                            record.start,
-                            record.end,
-                            (),
-                            ))
+                        record.seqname,
+                        record.start,
+                        record.end,
+                        (),
+                    ))
                 })
-            .unwrap_or_else(Err) // pass through parsing errors
+                .unwrap_or_else(Err) // pass through parsing errors
         })
     }
 }
@@ -491,7 +533,7 @@ impl Iterator for BedlikeIterator {
     }
 }
 
-/// An iterator over [`IntervalRecord`] items that filters based on sequence name.
+/// An iterator over a generic "genomic range like " item type `R`, that filters based on sequence name.
 ///
 /// Note that that the exclude filter is prioritized over the retain filter. So,
 /// if a pipeline contains both, then if a chromosome is supplied to
@@ -522,7 +564,7 @@ impl Iterator for BedlikeIterator {
 #[derive(Debug)]
 pub struct FilteredRanges<I, R>
 where
-I: Iterator<Item = Result<R, GRangesError>>,
+    I: Iterator<Item = Result<R, GRangesError>>,
 {
     inner: I,
     retain_seqnames: Option<HashSet<String>>,
@@ -531,13 +573,13 @@ I: Iterator<Item = Result<R, GRangesError>>,
 
 impl<I, R> FilteredRanges<I, R>
 where
-I: Iterator<Item = Result<R, GRangesError>>,
+    I: Iterator<Item = Result<R, GRangesError>>,
 {
     pub fn new(
         inner: I,
         retain_seqnames: Option<&Vec<String>>,
         exclude_seqnames: Option<&Vec<String>>,
-        ) -> Self {
+    ) -> Self {
         let retain_seqnames = retain_seqnames.cloned().map(HashSet::from_iter);
         let exclude_seqnames = exclude_seqnames.cloned().map(HashSet::from_iter);
         Self {
@@ -551,7 +593,7 @@ I: Iterator<Item = Result<R, GRangesError>>,
 /// Range-filtering iterator implementation for [`GenomicRangeRecord<U>`].
 impl<I, U> Iterator for FilteredRanges<I, GenomicRangeRecord<U>>
 where
-I: Iterator<Item = Result<GenomicRangeRecord<U>, GRangesError>>,
+    I: Iterator<Item = Result<GenomicRangeRecord<U>, GRangesError>>,
 {
     type Item = Result<GenomicRangeRecord<U>, GRangesError>;
 
@@ -562,18 +604,18 @@ I: Iterator<Item = Result<GenomicRangeRecord<U>, GRangesError>>,
                 Ok(entry) => {
                     if self
                         .exclude_seqnames
-                            .as_ref()
-                            .map_or(false, |ex| ex.contains(&entry.seqname))
-                            {
-                                continue;
-                            }
+                        .as_ref()
+                        .map_or(false, |ex| ex.contains(&entry.seqname))
+                    {
+                        continue;
+                    }
                     if self
                         .retain_seqnames
-                            .as_ref()
-                            .map_or(true, |rt| rt.contains(&entry.seqname))
-                            {
-                                return Some(item);
-                            }
+                        .as_ref()
+                        .map_or(true, |rt| rt.contains(&entry.seqname))
+                    {
+                        return Some(item);
+                    }
                 }
                 Err(_) => return Some(item),
             }
@@ -585,7 +627,7 @@ I: Iterator<Item = Result<GenomicRangeRecord<U>, GRangesError>>,
 /// Range-filtering iterator implementation for [`GenomicRangeEmptyRecord`].
 impl<I> Iterator for FilteredRanges<I, GenomicRangeEmptyRecord>
 where
-I: Iterator<Item = Result<GenomicRangeEmptyRecord, GRangesError>>,
+    I: Iterator<Item = Result<GenomicRangeEmptyRecord, GRangesError>>,
 {
     type Item = Result<GenomicRangeEmptyRecord, GRangesError>;
 
@@ -596,18 +638,18 @@ I: Iterator<Item = Result<GenomicRangeEmptyRecord, GRangesError>>,
                 Ok(entry) => {
                     if self
                         .exclude_seqnames
-                            .as_ref()
-                            .map_or(false, |ex| ex.contains(&entry.seqname))
-                            {
-                                continue;
-                            }
+                        .as_ref()
+                        .map_or(false, |ex| ex.contains(&entry.seqname))
+                    {
+                        continue;
+                    }
                     if self
                         .retain_seqnames
-                            .as_ref()
-                            .map_or(true, |rt| rt.contains(&entry.seqname))
-                            {
-                                return Some(item);
-                            }
+                        .as_ref()
+                        .map_or(true, |rt| rt.contains(&entry.seqname))
+                    {
+                        return Some(item);
+                    }
                 }
                 Err(_) => return Some(item),
             }
@@ -620,13 +662,13 @@ impl GeneralRangeRecordIterator<GenomicRangeRecord<Option<String>>> for BedlikeI
     fn retain_seqnames(
         self,
         seqnames: &[String],
-        ) -> FilteredRanges<Self, GenomicRangeRecord<Option<String>>> {
+    ) -> FilteredRanges<Self, GenomicRangeRecord<Option<String>>> {
         FilteredRanges::new(self, Some(&seqnames.to_vec()), None)
     }
     fn exclude_seqnames(
         self,
         seqnames: &[String],
-        ) -> FilteredRanges<Self, GenomicRangeRecord<Option<String>>> {
+    ) -> FilteredRanges<Self, GenomicRangeRecord<Option<String>>> {
         FilteredRanges::new(self, None, Some(&seqnames.to_vec()))
     }
 }
@@ -638,39 +680,40 @@ impl GeneralRangeRecordIterator<GenomicRangeEmptyRecord> for Bed3Iterator {
     fn exclude_seqnames(
         self,
         seqnames: &[String],
-        ) -> FilteredRanges<Self, GenomicRangeEmptyRecord> {
+    ) -> FilteredRanges<Self, GenomicRangeEmptyRecord> {
         FilteredRanges::new(self, None, Some(&seqnames.to_vec()))
     }
 }
 
 impl GeneralRangeRecordIterator<GenomicRangeRecord<Bed5Addition>> for Bed5Iterator {
-    fn retain_seqnames(self, seqnames: &[String]) -> FilteredRanges<Self, GenomicRangeRecord<Bed5Addition>> {
+    fn retain_seqnames(
+        self,
+        seqnames: &[String],
+    ) -> FilteredRanges<Self, GenomicRangeRecord<Bed5Addition>> {
         FilteredRanges::new(self, Some(&seqnames.to_vec()), None)
     }
     fn exclude_seqnames(
         self,
         seqnames: &[String],
-        ) -> FilteredRanges<Self, GenomicRangeRecord<Bed5Addition>> {
+    ) -> FilteredRanges<Self, GenomicRangeRecord<Bed5Addition>> {
         FilteredRanges::new(self, None, Some(&seqnames.to_vec()))
     }
 }
 
-
-
 impl<I> GeneralRangeRecordIterator<GenomicRangeRecord<String>> for UnwrappedRanges<I>
 where
-I: Iterator<Item = Result<GenomicRangeRecord<Option<String>>, GRangesError>>,
+    I: Iterator<Item = Result<GenomicRangeRecord<Option<String>>, GRangesError>>,
 {
     fn retain_seqnames(
         self,
         seqnames: &[String],
-        ) -> FilteredRanges<Self, GenomicRangeRecord<String>> {
+    ) -> FilteredRanges<Self, GenomicRangeRecord<String>> {
         FilteredRanges::new(self, Some(&seqnames.to_vec()), None)
     }
     fn exclude_seqnames(
         self,
         seqnames: &[String],
-        ) -> FilteredRanges<Self, GenomicRangeRecord<String>> {
+    ) -> FilteredRanges<Self, GenomicRangeRecord<String>> {
         FilteredRanges::new(self, None, Some(&seqnames.to_vec()))
     }
 }
@@ -678,14 +721,14 @@ I: Iterator<Item = Result<GenomicRangeRecord<Option<String>>, GRangesError>>,
 #[derive(Debug)]
 pub struct UnwrappedRanges<I>
 where
-I: Iterator<Item = Result<GenomicRangeRecord<Option<String>>, GRangesError>>,
+    I: Iterator<Item = Result<GenomicRangeRecord<Option<String>>, GRangesError>>,
 {
     inner: I,
 }
 
 impl<I> UnwrappedRanges<I>
 where
-I: Iterator<Item = Result<GenomicRangeRecord<Option<String>>, GRangesError>>,
+    I: Iterator<Item = Result<GenomicRangeRecord<Option<String>>, GRangesError>>,
 {
     pub fn new(inner: I) -> Self {
         Self { inner }
@@ -694,7 +737,7 @@ I: Iterator<Item = Result<GenomicRangeRecord<Option<String>>, GRangesError>>,
 
 impl<I> Iterator for UnwrappedRanges<I>
 where
-I: Iterator<Item = Result<GenomicRangeRecord<Option<String>>, GRangesError>>,
+    I: Iterator<Item = Result<GenomicRangeRecord<Option<String>>, GRangesError>>,
 {
     type Item = Result<GenomicRangeRecord<String>, GRangesError>;
 
@@ -702,11 +745,11 @@ I: Iterator<Item = Result<GenomicRangeRecord<Option<String>>, GRangesError>>,
         if let Some(result) = self.inner.next() {
             return Some(result.and_then(|record| match record.data {
                 Some(data) => Ok(GenomicRangeRecord::new(
-                        record.seqname,
-                        record.start,
-                        record.end,
-                        data,
-                        )),
+                    record.seqname,
+                    record.start,
+                    record.end,
+                    data,
+                )),
                 None => Err(GRangesError::TryUnwrapDataError),
             }));
         }
@@ -739,7 +782,7 @@ impl GenomicRangeRecordUnwrappable for BedlikeIterator {
 /// Returns `GRangesError::InvalidColumnType` if the column cannot be parsed into type `T`.
 pub fn parse_column<T: std::str::FromStr>(column: &str, line: &str) -> Result<T, GRangesError>
 where
-<T as std::str::FromStr>::Err: std::fmt::Debug,
+    <T as std::str::FromStr>::Err: std::fmt::Debug,
 {
     // NOTE: this is used a lot, and should be benchmarked.
     column
@@ -825,7 +868,6 @@ pub fn parse_bed3(line: &str) -> Result<GenomicRangeEmptyRecord, GRangesError> {
     })
 }
 
-
 fn parse_strand(symbol: char) -> Result<Option<Strand>, GRangesError> {
     match symbol {
         '+' => Ok(Some(Strand::Forward)),
@@ -835,9 +877,8 @@ fn parse_strand(symbol: char) -> Result<Option<Strand>, GRangesError> {
     }
 }
 
-
 /// Parses a BED5 format line into the three columns defining the range, and additional
-/// columns 
+/// columns
 ///
 pub fn parse_bed5(line: &str) -> Result<GenomicRangeRecord<Bed5Addition>, GRangesError> {
     let columns: Vec<&str> = line.splitn(4, '\t').collect();
@@ -862,13 +903,11 @@ pub fn parse_bed5(line: &str) -> Result<GenomicRangeRecord<Bed5Addition>, GRange
     })
 }
 
-
-
 #[cfg(test)]
 mod tests {
     use crate::io::{
         parsers::{get_base_extension, valid_bedlike},
-        InputFile,
+        InputStream,
     };
 
     use super::GenomicRangesFile;
@@ -891,27 +930,27 @@ mod tests {
     fn test_rangefiletype_detect() {
         let range_filetype = GenomicRangesFile::detect("tests_data/example.bed");
         assert!(matches!(
-                range_filetype.unwrap(),
-                GenomicRangesFile::Bed3(_)
-                ));
+            range_filetype.unwrap(),
+            GenomicRangesFile::Bed3(_)
+        ));
 
         let range_filetype = GenomicRangesFile::detect("tests_data/example_bedlike.tsv");
         assert!(matches!(
-                range_filetype.unwrap(),
-                GenomicRangesFile::Bedlike(_)
-                ));
+            range_filetype.unwrap(),
+            GenomicRangesFile::Bedlike(_)
+        ));
     }
 
     #[test]
     fn test_valid_bedlike() {
         assert_eq!(
-            valid_bedlike(&mut InputFile::new("tests_data/example.bed")).unwrap(),
+            valid_bedlike(&mut InputStream::new("tests_data/example.bed")).unwrap(),
             true
-            );
+        );
         assert_eq!(
-            valid_bedlike(&mut InputFile::new("tests_data/invalid_format.bed")).unwrap(),
+            valid_bedlike(&mut InputStream::new("tests_data/invalid_format.bed")).unwrap(),
             false
-            );
+        );
     }
 
     //#[test]
diff --git a/src/iterators.rs b/src/iterators.rs
index cb43a48..8e89373 100644
--- a/src/iterators.rs
+++ b/src/iterators.rs
@@ -1,3 +1,8 @@
+//! Iterators over genomic ranges and their data.
+//!
+//! Note that *parsing iterators* are in [`parsers`].
+//!
+//! [`parsers`]: crate::io::parsers
 use genomap::GenomeMap;
 
 use crate::{
@@ -5,7 +10,7 @@ use crate::{
     traits::{GenericRange, IterableRangeContainer, RangeContainer},
 };
 
-/// An iterator yielding [`RangeIndexedRecord`], which store
+/// An iterator yielding [`GenomicRangeIndexedRecord`], which store
 /// indices to the sequence names and data container.
 ///
 /// # Developer Notes
diff --git a/src/join.rs b/src/join.rs
index ed845c4..90260cd 100644
--- a/src/join.rs
+++ b/src/join.rs
@@ -1,3 +1,5 @@
+//! [`LeftGroupedJoin`], [`JoinData`], and [`JoinDataIterator`] types for overlaps.
+//!
 #![allow(clippy::all)]
 
 use crate::{traits::GenericRange, Position};
@@ -45,7 +47,10 @@ impl LeftGroupedJoin {
             // right_data,
         }
     }
-    /// Add a right range to this [`LeftGroupedJoin`].
+    /// Add a right (overlapping) range to this [`LeftGroupedJoin`].
+    ///
+    // Note: in principle, this can be called on *non-overlapping* right ranges too,
+    // for a full-outer join.
     pub fn add_right<R: GenericRange, Q: GenericRange>(&mut self, left: &R, right: &Q) {
         self.rights.push(right.index());
         self.right_lengths.push(right.width());
@@ -107,6 +112,11 @@ impl<'a, DL, DR> JoinData<'a, DL, DR> {
     }
 }
 
+/// An iterator over the [`LeftGroupedJoin`] types that represent
+/// information about overlaps right ranges have with a particular left range.
+///
+/// This also contains references to the left and right data containers, for
+/// better ergonomics in downstream data processing.
 pub struct JoinDataIterator<'a, DL, DR> {
     inner: std::slice::Iter<'a, LeftGroupedJoin>,
     pub left_data: Option<&'a DL>,
diff --git a/src/lib.rs b/src/lib.rs
index 82bd346..2d84af8 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,6 +2,102 @@
 #![crate_name = "granges"]
 #![doc(html_root_url = "https://docs.rs/granges/")]
 
+//! # GRanges: Generic Genomic Range and Data Containers
+//!
+//! GRanges is a Rust library for working with genomic ranges and their associated data. GRanges
+//! also provides a separate command line tool built using this library, which currently
+//! implements a subset of the features of other great bioinformatics utilities like
+//! [bedtools](https://bedtools.readthedocs.io/en/latest/).
+//!
+//! The idea of GRanges is to facilitate building a new generation of robust, reproducible, and
+//! easily installable bioinformatics tools in Rust. It aims to lower the barrier to building
+//! simple bioinformatics data processing tools in Rust for biologists that are interested
+//! in using the language in their work. Even biologists that have no interest in learning Rust
+//! should benefit from Rust-based tools since they are fast. Currently GRanges is engineered
+//! as a set of compile-time generic types and methods, though future versions may implement
+//! analagous runtime dynamic data structures that would allow the library to be wrapped and used
+//! by other languages.
+//!
+//!
+//! ## Design Overview
+//!
+//! To understand how GRanges could be useful in processing genomic data, it's helpful to first
+//! understand the overall design of this software library.
+//!
+//! Nearly all range data is loaded into GRanges using **parsing iterators** ([parsers]).
+//! Parsing iterators read input data from some bioinformatics file format (e.g. `.bed`,
+//! `.gtf.gz`, etc) and it then goes down one of two paths:
+//!
+//!  1. Streaming mode: the entire data set is never read into memory all at once, but rather
+//!     is processed entry by entry.
+//!
+//!  2. In-memory mode: the entire data set *is* read into memory all at once, usually as
+//!     a [`GRanges<R, T>`] object. The [`GRanges<R, T>`] object is composed of two parts:
+//!     a *ranges container* ([ranges]) and a *data container* ([data]).
+//!     [`GRanges`] objects then provide high-level in-memory methods for working with this
+//!     genomic data.
+//!
+//! Both modes then write something to output: a TSV of some statistic calculated on the
+//! genomic data, the output of 10 million block bootstraps, or the coefficients of
+//! linear regressions run every kilobase on sequence data.
+//!
+//! The GRanges workflow idea is illustrated in this (super cool) ASCII diagram:
+//!
+//!
+//! ```text
+//!   +-------+           +-------------------+           +----------------------+          
+//!   | Input |  ----->   |     Iterator      |  ----->   |    GRanges object    | 
+//!   +------ +           | (streaming mode)  |           |   (in-memory mode)   |          
+//!                       +-------------------+           +----------------------+          
+//!                                 |                                |            
+//!                                 |                                |            
+//!                                 v                                v            
+//!                        +-----------------+               +-----------------+       
+//!                        | Data Processing |               | Data Processing |       
+//!                        +-----------------+               +-----------------+       
+//!                                 |                                |            
+//!                                 v                                v            
+//!                             +--------+                       +--------+       
+//!                             | Output |                       | Output |       
+//!                             +--------+                       +--------+       
+//!
+//! ```
+//!
+//! Nearly all common data processing and analytics tasks in science can be thought of as taking
+//! some raw input data and refining it down some pipeline, until eventually outputting it as some sort of statistical
+//! result. Bioinformatics has been built on composing workflows out of command line tools and
+//! pipes. Analogously, tidyverse unifies data tidying, summarizing, and modeling by passing data
+//! in a datafame down a similar pipeline-based interface. Both operations are simple to use
+//! because they compose the same essential *operations* into specific data processing workflows.
+//!
+//! GRanges is designed to emulate these powerful data analytics tools. It implements core
+//! operations on genomic ranges and their associated data, and provides generic, extensible
+//! data structures for storing genomic data.
+//!
+//! GRanges attempts to implement the same core set of data joining and processing tools as the
+//! [plyranges](https://www.bioconductor.org/packages/release/bioc/html/plyranges.html) library and
+//! R's [tidyverse](https://www.tidyverse.org). It also frames these genomic operations in
+//! standard data analytic and database terms. For example, the operation `bedtools intersect -wa -u -a <left>
+//! -b <right>` finds all ranges in the *left* set of genomic ranges that overlaps one or more
+//! *right* genomic ranges, and reports only the unique left range. This operation is common in
+//! many data analytics workflows and database queries, and is known as a *semi join* between a
+//! a left table and right table. (see for example this section in Hadley Wickham's book [R for Data Science](https://r4ds.hadley.nz/joins.html#filtering-joins)).
+//! In the `granges` command line tool, this is implemented in the `granges filter` subcommand. In
+//! the library, this subcommand is just the method [`GRanges::filter_overlaps()`].
+//!
+//!
+//! ## Documentation Guide
+//!
+//!  - ⚙️ : technical details that might only be useful to developers handling tricky cases.
+//!  - ⚠️: indicates a method or type that is unstable (it may change in future implementations),
+//!     or is "sharp" (developer error could cause a panic).
+//!  - 🚀: optimization tip.
+//!
+//! [`GRanges`]: crate::granges::GRanges
+//! [`GRanges::filter_overlaps()`]: crate::granges::GRanges
+//! [`GRanges<R, T>`]: crate::granges::GRanges
+//! [parsers]: crate::io::parsers
+
 pub use indexmap;
 
 pub mod data;
@@ -19,9 +115,14 @@ pub mod traits;
 pub mod commands;
 pub mod reporting;
 
+/// The main position type in GRanges.
 pub type Position = u32;
-pub type PositionOffset = i32; // signed variant
 
+/// The main *signed* position type in GRanges, to represent offsets (e.g.
+/// for adjust range coordinates, etc).
+pub type PositionOffset = i32;
+
+/// The main exports of the GRanges library.
 pub mod prelude {
     pub use crate::error::GRangesError;
     pub use crate::granges::{GRanges, GRangesEmpty};
@@ -45,7 +146,9 @@ An internal error has occurred. Please file a GitHub issue at
 https://github.com/vsbuffalo/granges/issues
 "#;
 
-/// Create and [`indexmap::IndexMap`] of sequence names and their lengths.
+/// Create an [`IndexMap`] of sequence names and their lengths.
+///
+/// [`IndexMap`]: indexmap::IndexMap
 #[macro_export]
 macro_rules! seqlens {
     ($($key:expr => $value:expr),* $(,)?) => {
@@ -53,7 +156,10 @@ macro_rules! seqlens {
     };
 }
 
-/// Create a new `GRanges<R, T>` with sequence length information (used primarily for small examples)
+/// Create a new [`GRanges<R, T>`] with sequence length information (used primarily for small examples)
+///
+/// [`GRanges<R, T>`]: crate::granges::GRanges
+///
 #[macro_export]
 macro_rules! create_granges_with_seqlens {
     ($range_type:ty, $data_type:ty, { $($chr:expr => [$(($start:expr, $end:expr, $data:expr)),*]),* }, seqlens: { $($chr_len:expr => $len:expr),* }) => {
@@ -74,7 +180,7 @@ macro_rules! create_granges_with_seqlens {
     };
 }
 
-/// a version of assert_eq! that is more user-friendly.
+/// a version of [assert_eq!] that is more user-friendly.
 #[macro_export]
 macro_rules! ensure_eq {
     ($left:expr, $right:expr $(,)?) => ({
diff --git a/src/main/mod.rs b/src/main/mod.rs
index 6ee821d..e33d9d0 100644
--- a/src/main/mod.rs
+++ b/src/main/mod.rs
@@ -227,9 +227,9 @@ fn run() -> Result<(), GRangesError> {
             skip_missing,
             in_mem,
         }) => {
-
+            unimplemented!()
         }
- 
+
         #[cfg(feature = "dev-commands")]
         Some(Commands::RandomBed {
             genome,
diff --git a/src/ranges/coitrees.rs b/src/ranges/coitrees.rs
index f355d53..b5db7be 100644
--- a/src/ranges/coitrees.rs
+++ b/src/ranges/coitrees.rs
@@ -1,3 +1,7 @@
+//! The [`COITrees<M>`] type.
+//!
+//! This wraps the functionality og the [`coitrees`] library by Daniel C. Jones.
+//!
 use coitrees::{BasicCOITree, GenericInterval, Interval, IntervalNode, IntervalTree};
 
 use crate::{
@@ -59,12 +63,12 @@ impl<M: Clone> COITrees<M> {
         self.ranges.query_count(first, end - 1)
     }
 
-    /// Return the number of ranges in this [`COITreeRangeContainer`] container.
+    /// Return the number of ranges in this [`COITrees`] container.
     pub fn len(&self) -> usize {
         self.ranges.len()
     }
 
-    /// Return whether the [`COITreeRangeContainer`] object is empty (contains no ranges).
+    /// Return whether the [`COITrees`] object is empty (contains no ranges).
     pub fn is_empty(&self) -> bool {
         self.len() == 0
     }
diff --git a/src/ranges/mod.rs b/src/ranges/mod.rs
index 42a11a0..45505dd 100644
--- a/src/ranges/mod.rs
+++ b/src/ranges/mod.rs
@@ -1,4 +1,4 @@
-//! Range and Range Containers.
+//! Range types and Range Containers.
 //!
 //!
 
diff --git a/src/ranges/operations.rs b/src/ranges/operations.rs
index fd746ec..7dbfa6e 100644
--- a/src/ranges/operations.rs
+++ b/src/ranges/operations.rs
@@ -1,11 +1,9 @@
-//! Range Operations.
+//! Range operations.
 //!
-//! - [`adjust()`]: Adjust range start and end positions.
-
 use crate::{traits::AdjustableGenericRange, Position, PositionOffset};
 
-/// Adjusts the coordinates of a range, ensuring the adjusted range is within [0, length]
-/// and returning `None` if the range has zero width after adjustment.
+/// Adjusts the start and end coordinates of a range, ensuring the adjusted range is
+/// within [0, length] and returning `None` if the range has zero width after adjustment.
 pub fn adjust_range<R: AdjustableGenericRange>(
     mut range: R,
     start_delta: PositionOffset,
diff --git a/src/ranges/vec.rs b/src/ranges/vec.rs
index 7213b0f..34fb825 100644
--- a/src/ranges/vec.rs
+++ b/src/ranges/vec.rs
@@ -1,3 +1,5 @@
+//! The [`VecRanges<R>`] type, and the [`VecRangesIndexed`] and [`VecRangesEmpty`] type aliases.
+//!
 use super::operations::adjust_range;
 use super::{validate_range, RangeEmpty, RangeIndexed};
 use crate::traits::{
diff --git a/src/reporting.rs b/src/reporting.rs
index 6f2ef02..3fd7353 100644
--- a/src/reporting.rs
+++ b/src/reporting.rs
@@ -1,6 +1,14 @@
-//! Centralized reporting to the user about e.g. fragile operations.
+//! Types for standardized reports to the user about range operations.
+//!
+//! ⚠️: This is still under development.
+//!
+//! The goal of this is to encourage and facilitate developers of bioinformatics
+//! tools to report information about potentially fragile operations, or inform
+//! them of e.g. how many ranges were filtered out by some operation.
 //!
 
+/// The [`CommandOutput<U>`] type output is generic over some data output
+/// from a command, and a [`Report`] that reports information to the user.
 #[allow(unused)]
 pub struct CommandOutput<U> {
     value: U,
@@ -13,6 +21,7 @@ impl<U> CommandOutput<U> {
     }
 }
 
+/// A type to (semi) standardize reporting to the user.
 #[derive(Default)]
 pub struct Report {
     entries: Vec<String>,
diff --git a/src/test_utilities.rs b/src/test_utilities.rs
index 232e14a..ac48ae3 100644
--- a/src/test_utilities.rs
+++ b/src/test_utilities.rs
@@ -84,7 +84,7 @@ pub fn random_vecranges(n: usize) -> VecRanges<RangeEmpty> {
     vr
 }
 
-/// Build a random [`GRanges<VecRangesEmpty, ()`] using a set of
+/// Build a random [`GRangesEmpty`] using a set of
 /// sequence lengths.
 pub fn random_granges(
     seqlens: &IndexMap<String, Position>,
diff --git a/src/traits.rs b/src/traits.rs
index 812bbf4..1966830 100644
--- a/src/traits.rs
+++ b/src/traits.rs
@@ -18,6 +18,8 @@ use crate::{
 
 /// The [`AsGRangesRef`] trait improves the ergonomics of working
 /// with both [`GRanges`] and [`GRangesEmpty`] function arguments.
+///
+/// [`GRangesEmpty`]: crate::granges::GRanges
 pub trait AsGRangesRef<'a, C, T> {
     fn as_granges_ref(&'a self) -> &'a GRanges<C, T>;
 }
@@ -95,6 +97,9 @@ pub trait AdjustableGenericRange: GenericRange {
 
 /// Defines functionality common to all range containers, e.g. [`VecRanges<R>`] and
 /// [`COITrees`].
+///
+/// [`VecRanges<R>`]: crate::ranges::vec::VecRanges
+/// [`COITrees`]: crate::ranges::coitrees::COITrees
 pub trait RangeContainer {
     fn len(&self) -> usize;
     fn is_empty(&self) -> bool {
@@ -103,10 +108,13 @@ pub trait RangeContainer {
     fn sequence_length(&self) -> Position;
 }
 
+/// Marker trait for data container.
+/// This is presently not used for much, but is useful to indicate certain
+/// type combinations are data containers.
 pub trait DataContainer {}
 
-/// Defines functionality for filtering [`RangeRecord`] entries based on their
-/// sequence names. [`RangeRecord`] are predominantly used in reading in data,
+/// Defines functionality for filtering [`GenomicRangeRecord`] entries based on their
+/// sequence names. [`GenomicRangeRecord`] are predominantly used in reading in data,
 /// so these trait methods simplify excluding or retaining ranges based on what
 /// sequence (i.e. chromosome) they are on.
 pub trait GeneralRangeRecordIterator<R: GenericRange>:
@@ -141,7 +149,7 @@ where
     fn iter_ranges(&self) -> Box<dyn Iterator<Item = Self::RangeType> + '_>;
 }
 
-/// The [`IntoIterableRangeContainer`] trait defines common functionality for *consuming* iterating
+/// The [`IntoIterableRangesContainer`] trait defines common functionality for *consuming* iterating
 /// over the range types in range containers.
 pub trait IntoIterableRangesContainer<R> {
     fn into_iter_ranges(self) -> Box<dyn Iterator<Item = R>>;