From a55e44fa38a4fae6e02af1bbe7c133c7ca259d75 Mon Sep 17 00:00:00 2001
From: "Christopher H. Jordan" <christopherjordan87@gmail.com>
Date: Mon, 10 Jan 2022 16:06:15 +0800
Subject: [PATCH] Add coverage and bump version.

Along with adding coverage to CI, add more tests to increase coverage.

Remove dependence on dashmap in favour of using parking_lot directly.
Dashmap could in rare cases cause a deadlock.

Improve the CUDA de-duplication code.

Use Rust 2021 edition.

Update dependency versions and adjust the code accordingly.

Rename the GitHub workflow file extensions.

Add a changelog.
---
 .github/workflows/build.sh                    |   2 +-
 .github/workflows/coverage.yml                |  59 +++
 .../workflows/{releases.yaml => releases.yml} |   0
 .../{run-tests.yaml => run-tests.yml}         |   0
 CHANGELOG.md                                  |  47 +++
 Cargo.toml                                    |  27 +-
 README.md                                     |  42 +-
 benches/bench.rs                              |  40 +-
 examples/beam_calcs.rs                        |  47 ++-
 examples/beam_calcs_cuda.rs                   |  27 +-
 examples/beam_calcs_cuda_device.cu            |  16 +-
 src/fee/cuda/double.rs                        |   2 +-
 src/fee/cuda/mod.rs                           | 257 ++++++------
 src/fee/cuda/single.rs                        |   2 +-
 src/fee/cuda/tests.rs                         | 117 +++++-
 src/fee/ffi/mod.rs                            |  27 +-
 src/fee/ffi/tests.rs                          | 366 +++++++++++++++++-
 src/fee/mod.rs                                | 212 +++++-----
 src/fee/tests.rs                              | 206 +++++++---
 src/fee/types.rs                              |  17 +-
 src/python/fee.rs                             |  12 +-
 21 files changed, 1147 insertions(+), 378 deletions(-)
 create mode 100644 .github/workflows/coverage.yml
 rename .github/workflows/{releases.yaml => releases.yml} (100%)
 rename .github/workflows/{run-tests.yaml => run-tests.yml} (100%)
 create mode 100644 CHANGELOG.md

diff --git a/.github/workflows/build.sh b/.github/workflows/build.sh
index a117ba5..b6ad7d4 100755
--- a/.github/workflows/build.sh
+++ b/.github/workflows/build.sh
@@ -9,7 +9,7 @@ cp .github/workflows/releases-readme.md README.md
 if [[ "$OSTYPE" == "linux-gnu"* ]]; then
     # I don't know why, but I need to reinstall Rust. Probably something to do with
     # GitHub overriding env variables.
-    curl https://sh.rustup.rs -sSf | sh -s -- -y
+    curl https://sh.rustup.rs -sSf | sh -s -- -y --profile minimal
 
     # Build a release for each x86_64 microarchitecture level. v4 can't be
     # compiled on GitHub for some reason.
diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
new file mode 100644
index 0000000..bedc086
--- /dev/null
+++ b/.github/workflows/coverage.yml
@@ -0,0 +1,59 @@
+# Based on https://github.com/actions-rs/meta/blob/master/recipes/quickstart.md
+
+on: [push, pull_request]
+
+name: Generate Coverage report
+
+jobs:
+  coverage:
+    runs-on: ubuntu-latest
+    container: mwatelescope/birli:latest
+    environment: CI
+
+    steps:
+      - name: Checkout sources
+        uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      - name: Install nightly toolchain
+        run: |
+          curl https://sh.rustup.rs -sSf | sh -s -- -y --profile minimal
+          export PATH="${HOME}/.cargo/bin:${PATH}"
+          rustup toolchain install nightly --component llvm-tools-preview
+          rustup default nightly
+
+      - name: Install system and Cargo Packages
+        run: |
+          export PATH="${HOME}/.cargo/bin:${PATH}"
+          apt-get update
+          apt-get install -y lcov clang curl zip unzip libssl-dev jq libhdf5-dev liberfa-dev
+          cargo update
+          cargo install --force cargo-make cargo-binutils grcov
+        env:
+          DEBIAN_FRONTEND: noninteractive
+
+      - name: Get the HDF5 file
+        run: curl http://ws.mwatelescope.org/static/mwa_full_embedded_element_pattern.h5 -o mwa_full_embedded_element_pattern.h5
+
+      - name: Generate test lcov coverage into coverage/ dir
+        env:
+          LD_LIBRARY_PATH: /usr/local/lib/
+          CARGO_INCREMENTAL: 0
+          RUSTFLAGS: "-Zprofile -Ccodegen-units=1 -Copt-level=0 -Coverflow-checks=off -Zpanic_abort_tests -Cpanic=abort"
+          RUSTDOCFLAGS: "-Cpanic=abort"
+          LLVM_PROFILE_FILE: json5format-%m.profraw
+          MWA_BEAM_FILE: mwa_full_embedded_element_pattern.h5
+        run: |
+          mkdir -p coverage
+
+          export PATH="${HOME}/.cargo/bin:${PATH}"
+          cargo build
+          cargo test
+          zip -0 ccov.zip `find . \( -name "mwa_hyperbeam*.gc*" \) -print`
+          grcov ccov.zip -s . -t lcov --llvm --branch --ignore-not-existing --ignore "/*" --ignore src/jones_test.rs --excl-br-line "^.*((debug_)?assert(_eq|_ne|_abs_diff_(eq|ne))?!|#\[derive\()" -o coverage/coverage.lcov
+
+      - name: Upload reports to codecov.io
+        uses: codecov/codecov-action@v2
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/.github/workflows/releases.yaml b/.github/workflows/releases.yml
similarity index 100%
rename from .github/workflows/releases.yaml
rename to .github/workflows/releases.yml
diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yml
similarity index 100%
rename from .github/workflows/run-tests.yaml
rename to .github/workflows/run-tests.yml
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..cd81f22
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,47 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic
+Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [0.4.0] - 2021-10-14
+### Added
+- FEE beam code for CUDA
+  - The original code is courtesy of Cristian Di Pietrantonio and Maciej
+    Cytowski on behalf of the Pawsey Supercomputing Centre.
+  - CHJ modified it to be easily called from Rust.
+  - It is is possible to run the code in single- or double-precision (Cargo
+    features "cuda-single" and "cuda", respectively). This is important because
+    most NVIDIA desktop GPUs have significantly less double-precision compute
+    capability.
+  - There are examples of using the CUDA functionality from Rust, C and Python.
+- Parallactic angle correction
+  - Jack Line did a thorough investigation of what our beam responses should be;
+    the write up is
+    [here](https://github.com/JLBLine/polarisation_tests_for_FEE).
+  - New Rust functions are provided (`*_eng*` for "engineering") to get the
+    old-style beam responses. The existing functions do the corrections by
+    default.
+- A binary `verify-beam-file`
+  - (In theory) verifies that an HDF5 FEE beam file has sensible contents.
+  - The only way that standard beam calculations can fail is if the spherical
+    harmonic coefficients are nonsensical, so this binary is an attempt to
+    ensure that the files used are sensible.
+
+### Changed
+- Rust API
+  - `calc_jones*_array` functions now return a `Vec`, not an `Array1`.
+- Rust internals
+  - Small optimisations.
+  - Small documentation clean ups.
+- C API
+  - The caller must now specify if they want the parallactic angle correction.
+  - All functions that can fail return an error code. If this is non-zero, the
+    function failed.
+  - The caller can also provide error strings to these fallible functions; in
+    the event of failure, an error message is written to the string.
+  - The example C files have been modified to conform with these changes.
+- Python API
+  - The caller must now specify if they want the parallactic angle correction.
diff --git a/Cargo.toml b/Cargo.toml
index 528fd91..c41dcb7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,10 +1,11 @@
 [package]
 name = "mwa_hyperbeam"
-version = "0.3.4"
+version = "0.4.0"
 authors = ["Christopher H. Jordan <christopherjordan87@gmail.com>",
            "Jack L. B. Line <jack.line@curtin.edu.au>",
            "Marcin Sokolowski <marcin.sokolowski@curtin.edu.au>"]
-edition = "2018"
+edition = "2021"
+rust-version = "1.56"
 license = "MPL-2.0"
 readme = "README.md"
 description = "Primary beam code for the Murchison Widefield Array (MWA) radio telescope."
@@ -30,28 +31,33 @@ cuda = ["marlu/cuda", "cc"]
 # Opt-out of double precision, use only single precision.
 cuda-single = ["cuda"]
 
+# Example-only dependencies
+examples = ["anyhow", "clap"]
+
 [profile.release]
 lto = true
 codegen-units = 1   # Set this to 1 in Cargo.toml to allow for maximum size reduction optimizations
 
 [dependencies]
 cfg-if = "1.0.*"
-dashmap = "4.0.*"
-hdf5 = "0.7.*"
+hdf5 = "0.8.*"
 marlu = { version = "0.3.*", default-features = false }
+parking_lot = "0.11.*"
 thiserror = "1.0.*"
 
-hdf5-sys = { version = "0.7.*", features = ["static", "threadsafe"], optional = true }
+hdf5-sys = { version = "0.8.*", features = ["static", "threadsafe"], optional = true }
+
+pyo3 = { version = "0.15.*", features = ["extension-module"], optional = true }
+numpy = { version = "0.15.*", optional = true }
 
-pyo3 = { version = "0.13.*", features = ["extension-module"], optional = true }
-numpy = { version = "0.13.0", optional = true }
+# Example-only dependencies.
+anyhow = { version = "1.0.*", optional = true }
+clap = { version = "3.0.*", features = ["derive"], optional = true }
 
 [dev-dependencies]
-anyhow = "1.0.*"
 approx = { version = "0.5.*", features = ["num-complex"] }
 criterion = "0.3.*"
 serial_test = "0.5.*"
-structopt = "0.3.*"
 
 ndarray = { version = ">=0.15.4,<0.16", features = ["approx-0_5"] }
 
@@ -75,7 +81,8 @@ harness = false
 
 [[example]]
 name = "beam_calcs"
+required-features = ["examples"]
 
 [[example]]
 name = "beam_calcs_cuda"
-required-features = ["cuda"]
+required-features = ["examples", "cuda"]
diff --git a/README.md b/README.md
index 3f4371c..51c46f3 100644
--- a/README.md
+++ b/README.md
@@ -3,8 +3,13 @@
 <div class="bg-gray-dark" align="center" style="background-color:#24292e">
 <img src="hyperbeam.png" height="200px" alt="hyperbeam logo">
 <br/>
-<a href="https://docs.rs/crate/mwa_hyperbeam"><img src="https://docs.rs/mwa_hyperbeam/badge.svg" alt="docs"></a>
-<img src="https://github.com/MWATelescope/mwa_hyperbeam/workflows/Cross-platform%20tests/badge.svg" alt="Cross-platform%20tests">
+<a href="https://crates.io/crates/mwa_hyperbeam">
+  <img src="https://img.shields.io/crates/v/mwa_hyperbeam?logo=rust" alt="crates.io"></a>
+<a href="https://docs.rs/crate/mwa_hyperbeam">
+  <img src="https://img.shields.io/docsrs/mwa_hyperbeam?logo=rust" alt="docs.rs"></a>
+<img src="https://img.shields.io/github/workflow/status/MWATelescope/mwa_hyperbeam/Cross-platform%20tests?label=Cross-platform%20tests&logo=github" alt="Cross-platform%20tests">
+<a href="https://codecov.io/gh/MWATelescope/mwa_hyperbeam">
+  <img src="https://codecov.io/gh/MWATelescope/mwa_hyperbeam/branch/main/graph/badge.svg?token=61JYU54DG2"/></a>
 </div>
 
 Primary beam code for the Murchison Widefield Array (MWA) radio telescope.
@@ -16,6 +21,10 @@ Full Embedded Element (FEE) primary beam model of the MWA, a.k.a. "the 2016
 beam". This code should be used over all others. If there are soundness issues,
 please raise them here so everyone can benefit.
 
+See the
+[changelog](https://github.com/MWATelescope/mwa_hyperbeam/blob/main/CHANGELOG.md)
+for the latest changes to the code.
+
 ## Usage
 `hyperbeam` requires the MWA FEE HDF5 file. This can be obtained with:
 
@@ -48,9 +57,8 @@ other words, most languages. See Rust, C and Python examples of usage in the
 
 ### CUDA
 `hyperbeam` also can also be run on NVIDIA GPUs. To see an example of usage, see
-any of the examples with "_cuda" in the name. CUDA functionality is only
-provided with one of two Cargo features; see installing from source instructions
-below.
+any of the examples with "cuda" in the name. CUDA functionality is only provided
+with one of two Cargo features; see installing from source instructions below.
 
 ## Installation
 ### Python PyPI
@@ -76,10 +84,10 @@ their respective licenses are also distributed.
 
   `https://www.rust-lang.org/tools/install`
 
-  The Rust compiler must be at least version 1.47.0:
+  The Rust compiler must be at least version 1.56.0:
   ```bash
   $ rustc -V
-  rustc 1.47.0 (18bf6b4f0 2020-10-07)
+  rustc 1.57.0 (f1edd0429 2021-11-29)
   ```
 
 - [hdf5](https://www.hdfgroup.org/hdf5)
@@ -108,6 +116,26 @@ For usage with other languages, an include file will be in the `include`
 directory, along with C-compatible shared and static objects in the
 `target/release` directory.
 
+#### CUDA
+Are you running `hyperbeam` on a desktop NVIDIA GPU? Then you probably want to
+compile with single-precision floats:
+
+    cargo build --release --features=cuda-single
+
+Otherwise, go ahead with double-precision floats:
+
+    cargo build --release --features=cuda
+
+Desktop GPUs (e.g. NVIDIA GeForce RTX 2070) have significantly less
+double-precision compute capability than "data center" GPUs (e.g. NVIDIA V100).
+Allowing `hyperbeam` to switch on the float type allows the user to decide
+between the performance and precision compromise.
+
+`CUDA` can also be linked statically (although it seems to link statically by
+default regardless of this flag):
+
+    cargo build --release --features=cuda,cuda-static
+
 #### Static dependencies
 To make `hyperbeam` without a dependence on a system `HDF5` library, give the
 `build` command a feature flag:
diff --git a/benches/bench.rs b/benches/bench.rs
index d4dc967..ced776f 100644
--- a/benches/bench.rs
+++ b/benches/bench.rs
@@ -24,11 +24,11 @@ fn fee(c: &mut Criterion) {
         let za = 80.0_f64.to_radians();
         let freq = 51200000;
         let delays = [0; 16];
-        let gains = [1.0; 16];
+        let amps = [1.0; 16];
         let norm_to_zenith = false;
         b.iter(|| {
             let beam = FEEBeam::new("mwa_full_embedded_element_pattern.h5").unwrap();
-            beam.calc_jones(az, za, freq, &delays, &gains, norm_to_zenith)
+            beam.calc_jones(az, za, freq, &delays, &amps, norm_to_zenith)
                 .unwrap();
         })
     });
@@ -38,11 +38,11 @@ fn fee(c: &mut Criterion) {
         let za = 80.0_f64.to_radians();
         let freq = 51200000;
         let delays = [0; 16];
-        let gains = [1.0; 16];
+        let amps = [1.0; 16];
         let norm_to_zenith = false;
         let beam = FEEBeam::new("mwa_full_embedded_element_pattern.h5").unwrap();
         b.iter(|| {
-            beam.calc_jones(az, za, freq, &delays, &gains, norm_to_zenith)
+            beam.calc_jones(az, za, freq, &delays, &amps, norm_to_zenith)
                 .unwrap();
             beam.empty_cache();
         })
@@ -53,14 +53,14 @@ fn fee(c: &mut Criterion) {
         let za = 80.0_f64.to_radians();
         let freq = 51200000;
         let delays = [0; 16];
-        let gains = [1.0; 16];
+        let amps = [1.0; 16];
         let norm_to_zenith = false;
         let beam = FEEBeam::new("mwa_full_embedded_element_pattern.h5").unwrap();
         // Prime the cache.
-        beam.calc_jones(az, za, freq, &delays, &gains, norm_to_zenith)
+        beam.calc_jones(az, za, freq, &delays, &amps, norm_to_zenith)
             .unwrap();
         b.iter(|| {
-            beam.calc_jones(az, za, freq, &delays, &gains, norm_to_zenith)
+            beam.calc_jones(az, za, freq, &delays, &amps, norm_to_zenith)
                 .unwrap();
         })
     });
@@ -75,14 +75,14 @@ fn fee(c: &mut Criterion) {
         }
         let freq = 51200000;
         let delays = [0; 16];
-        let gains = [1.0; 16];
+        let amps = [1.0; 16];
         let norm_to_zenith = false;
         let beam = FEEBeam::new("mwa_full_embedded_element_pattern.h5").unwrap();
         // Prime the cache.
-        beam.calc_jones(az[0], za[0], freq, &delays, &gains, norm_to_zenith)
+        beam.calc_jones(az[0], za[0], freq, &delays, &amps, norm_to_zenith)
             .unwrap();
         b.iter(|| {
-            beam.calc_jones_array(&az, &za, freq, &delays, &gains, norm_to_zenith)
+            beam.calc_jones_array(&az, &za, freq, &delays, &amps, norm_to_zenith)
                 .unwrap();
         })
     });
@@ -99,17 +99,17 @@ fn fee(c: &mut Criterion) {
         }
         let freq = 51200000;
         let delays = [0; 16];
-        let gains = [1.0; 16];
+        let amps = [1.0; 16];
         let norm_to_zenith = false;
         let beam = FEEBeam::new("mwa_full_embedded_element_pattern.h5").unwrap();
         // Prime the cache.
-        beam.calc_jones(az[0], za[0], freq, &delays, &gains, norm_to_zenith)
+        beam.calc_jones(az[0], za[0], freq, &delays, &amps, norm_to_zenith)
             .unwrap();
         b.iter(|| {
             az.par_iter()
                 .zip(za.par_iter())
                 .map(|(&a, &z)| {
-                    beam.calc_jones(a, z, freq, &delays, &gains, norm_to_zenith)
+                    beam.calc_jones(a, z, freq, &delays, &amps, norm_to_zenith)
                         .unwrap()
                 })
                 .collect::<Vec<_>>();
@@ -217,24 +217,24 @@ fn fee(c: &mut Criterion) {
     // c.bench_function("calculating coefficients", |b| {
     //     let freq = 51200000;
     //     let delays = [0; 16];
-    //     let gains = [1.0; 16];
+    //     let amps = [1.0; 32];
+    //     let beam = FEEBeam::new("mwa_full_embedded_element_pattern.h5").unwrap();
     //     b.iter(|| {
-    //         let beam = FEEBeam::new("mwa_full_embedded_element_pattern.h5").unwrap();
-    //         beam.populate_modes(freq, &delays, &gains).unwrap();
+    //         beam.calc_modes(freq, &delays, &amps).unwrap();
     //     })
     // });
 
     // c.bench_function("getting coefficients from cache", |b| {
     //     let freq = 51200000;
     //     let delays = [0; 16];
-    //     let gains = [1.0; 16];
-    //     // Prime the cache.
+    //     let amps = [1.0; 32];
     //     let beam = FEEBeam::new("mwa_full_embedded_element_pattern.h5").unwrap();
-    //     beam.populate_modes(freq, &delays, &gains).unwrap();
+    //     // Prime the cache.
+    //     let _ = beam.get_modes(freq, &delays, &amps).unwrap();
     //     b.iter(|| {
     //         // By calling populate_modes before the loop we are benchmarking a
     //         // hot cache.
-    //         beam.populate_modes(freq, &delays, &gains).unwrap();
+    //         let _ = beam.get_modes(freq, &delays, &amps).unwrap();
     //     })
     // });
 }
diff --git a/examples/beam_calcs.rs b/examples/beam_calcs.rs
index 440ba4e..921e4f8 100644
--- a/examples/beam_calcs.rs
+++ b/examples/beam_calcs.rs
@@ -14,32 +14,43 @@
 
 use std::f64::consts::PI;
 
+use clap::Parser;
 use mwa_hyperbeam::fee::FEEBeam;
-use structopt::*;
 
-#[derive(StructOpt, Debug)]
-struct Opts {
+#[derive(Parser, Debug)]
+#[clap()]
+struct Args {
     /// Path to the HDF5 file.
-    #[structopt(short, long, parse(from_os_str))]
+    #[clap(short, long, parse(from_os_str))]
     hdf5_file: Option<std::path::PathBuf>,
 
     /// The number of directions to run.
-    #[structopt()]
+    #[clap()]
     num_directions: usize,
 
+    /// Use these delays when calculating the beam response. There must be 16
+    /// values.
+    #[clap(short, long, multiple_values(true))]
+    delays: Option<Vec<u32>>,
+
+    /// Use these dipole gains when calculating the beam response. There must be
+    /// 16 or 32 values.
+    #[clap(short, long, multiple_values(true))]
+    gains: Option<Vec<f64>>,
+
     /// Calculate the Jones matrices in parallel.
-    #[structopt(short, long)]
+    #[clap(short, long)]
     parallel: bool,
 
     /// Don't apply parallactic-angle correction.
-    #[structopt(short, long)]
+    #[clap(short, long)]
     no_parallactic: bool,
 }
 
 fn main() -> Result<(), anyhow::Error> {
-    let opts = Opts::from_args();
+    let args = Args::parse();
     // If we were given a file, use it. Otherwise, fall back on MWA_BEAM_FILE.
-    let beam = match opts.hdf5_file {
+    let beam = match args.hdf5_file {
         Some(f) => FEEBeam::new(f)?,
         None => FEEBeam::new_from_env()?,
     };
@@ -47,21 +58,23 @@ fn main() -> Result<(), anyhow::Error> {
     // Set up the directions to test.
     let mut azs = vec![];
     let mut zas = vec![];
-    for i in 0..opts.num_directions {
-        azs.push(0.9 * PI * i as f64 / opts.num_directions as f64);
-        zas.push(0.1 + 0.9 * PI / 2.0 * i as f64 / opts.num_directions as f64);
+    for i in 0..args.num_directions {
+        azs.push(0.9 * PI * i as f64 / args.num_directions as f64);
+        zas.push(0.1 + 0.9 * PI / 2.0 * i as f64 / args.num_directions as f64);
     }
     let freq_hz = 51200000;
     // Delays and amps correspond to dipoles in the "M&C order". See
     // https://wiki.mwatelescope.org/pages/viewpage.action?pageId=48005139) for
     // more info.
-    let delays = vec![0; 16];
-    let amps = vec![1.0; 16];
+    let delays = args.delays.unwrap_or_else(|| vec![0; 16]);
+    assert_eq!(delays.len(), 16);
+    let amps = args.gains.unwrap_or_else(|| vec![1.0; 16]);
+    assert!(amps.len() == 16 || amps.len() == 32);
     let norm_to_zenith = false;
 
     // Call hyperbeam.
-    let jones = if opts.parallel {
-        if opts.no_parallactic {
+    let jones = if args.parallel {
+        if args.no_parallactic {
             beam.calc_jones_eng_array(&azs, &zas, freq_hz, &delays, &amps, norm_to_zenith)
                 .unwrap()
         } else {
@@ -71,7 +84,7 @@ fn main() -> Result<(), anyhow::Error> {
     } else {
         let mut results = vec![];
         for (az, za) in azs.into_iter().zip(zas.into_iter()) {
-            let j = if opts.no_parallactic {
+            let j = if args.no_parallactic {
                 beam.calc_jones_eng(az, za, freq_hz, &delays, &amps, norm_to_zenith)
                     .unwrap()
             } else {
diff --git a/examples/beam_calcs_cuda.rs b/examples/beam_calcs_cuda.rs
index 333aa87..12fa43f 100644
--- a/examples/beam_calcs_cuda.rs
+++ b/examples/beam_calcs_cuda.rs
@@ -14,9 +14,9 @@
 
 use std::f64::consts::PI;
 
+use clap::Parser;
 use marlu::{ndarray, Jones};
 use ndarray::prelude::*;
-use structopt::*;
 
 use mwa_hyperbeam::fee::FEEBeam;
 
@@ -25,26 +25,27 @@ type CudaFloat = f32;
 #[cfg(not(feature = "cuda-single"))]
 type CudaFloat = f64;
 
-#[derive(StructOpt, Debug)]
-struct Opts {
+#[derive(Parser, Debug)]
+#[clap()]
+struct Args {
     /// Path to the HDF5 file.
-    #[structopt(short, long, parse(from_os_str))]
+    #[clap(short, long, parse(from_os_str))]
     hdf5_file: Option<std::path::PathBuf>,
 
     /// The number of directions to run.
-    #[structopt()]
+    #[clap()]
     num_directions: usize,
 }
 
 fn main() -> Result<(), anyhow::Error> {
-    let opts = Opts::from_args();
-    if opts.num_directions == 0 {
+    let args = Args::parse();
+    if args.num_directions == 0 {
         eprintln!("num_directions cannot be 0.");
         std::process::exit(1);
     }
 
     // If we were given a file, use it. Otherwise, fall back on MWA_BEAM_FILE.
-    let beam = match opts.hdf5_file {
+    let beam = match args.hdf5_file {
         Some(f) => FEEBeam::new(f)?,
         None => FEEBeam::new_from_env()?,
     };
@@ -63,12 +64,12 @@ fn main() -> Result<(), anyhow::Error> {
         unsafe { beam.cuda_prepare(&freqs_hz, delays.view(), amps.view(), norm_to_zenith)? };
 
     // Set up the directions to test. The type depends on the CUDA precision.
-    let mut azs = Vec::with_capacity(opts.num_directions);
-    let mut zas = Vec::with_capacity(opts.num_directions);
-    for i in 0..opts.num_directions {
-        azs.push(0.4 + 0.3 * PI as CudaFloat * i as CudaFloat / opts.num_directions as CudaFloat);
+    let mut azs = Vec::with_capacity(args.num_directions);
+    let mut zas = Vec::with_capacity(args.num_directions);
+    for i in 0..args.num_directions {
+        azs.push(0.4 + 0.3 * PI as CudaFloat * i as CudaFloat / args.num_directions as CudaFloat);
         zas.push(
-            0.3 + 0.4 * PI as CudaFloat / 2.0 * i as CudaFloat / opts.num_directions as CudaFloat,
+            0.3 + 0.4 * PI as CudaFloat / 2.0 * i as CudaFloat / args.num_directions as CudaFloat,
         );
     }
     let parallactic_correction = true;
diff --git a/examples/beam_calcs_cuda_device.cu b/examples/beam_calcs_cuda_device.cu
index 5ff4b7a..16962ca 100644
--- a/examples/beam_calcs_cuda_device.cu
+++ b/examples/beam_calcs_cuda_device.cu
@@ -52,8 +52,8 @@ typedef struct JONES {
     CUCOMPLEX j11;
 } JONES;
 
-__global__ void use_hyperbeam_values(JONES *d_jones, const uint64_t *d_map, int num_unique_fee_freqs, int num_tiles,
-                                     int num_freqs, int num_directions) {
+__global__ void use_hyperbeam_values(JONES *d_jones, const int *d_tile_map, const int *d_freq_map,
+                                     int num_unique_fee_freqs, int num_tiles, int num_freqs, int num_directions) {
     int i_tile = blockIdx.y;
     int i_freq = blockIdx.z;
     if (i_tile >= num_tiles)
@@ -66,9 +66,8 @@ __global__ void use_hyperbeam_values(JONES *d_jones, const uint64_t *d_map, int
 
     // For *this tile* and *this frequency*, access the de-duplicated beam
     // response.
-    uint64_t i_map = d_map[i_tile * num_unique_fee_freqs + i_freq];
-    int i_row = i_map >> 32;
-    int i_col = i_map & 0xffffffff;
+    int i_row = d_tile_map[i_tile];
+    int i_col = d_freq_map[i_freq];
     JONES jones = d_jones[((num_directions * num_unique_fee_freqs * i_row) + num_directions * i_col) + i_dir];
 
     // To test that the right response is paired with the right tile, assert
@@ -175,7 +174,8 @@ int main(int argc, char *argv[]) {
     // interface with the values. This kernel prints messages if the values are
     // not what was expected. We need to have a couple of bits of metadata to
     // interface with the beam responses.
-    const uint64_t *d_beam_map = get_cuda_map(cuda_beam);
+    const int *d_tile_map = get_tile_map(cuda_beam);
+    const int *d_freq_map = get_freq_map(cuda_beam);
     int num_unique_fee_freqs = get_num_unique_fee_freqs(cuda_beam);
 
     dim3 gridDim, blockDim;
@@ -183,8 +183,8 @@ int main(int argc, char *argv[]) {
     gridDim.x = (int)ceil((double)num_directions / (double)blockDim.x);
     gridDim.y = num_tiles; // The total number of tiles, not the unique count.
     gridDim.z = num_freqs; // The total number of freqs, not the unique count.
-    use_hyperbeam_values<<<gridDim, blockDim>>>(d_jones, d_beam_map, num_unique_fee_freqs, num_tiles, num_freqs,
-                                                num_directions);
+    use_hyperbeam_values<<<gridDim, blockDim>>>(d_jones, d_tile_map, d_freq_map, num_unique_fee_freqs, num_tiles,
+                                                num_freqs, num_directions);
     // Check that our kernel had no issues.
     cudaError_t cuda_err_code = cudaPeekAtLastError();
     if (cuda_err_code != cudaSuccess) {
diff --git a/src/fee/cuda/double.rs b/src/fee/cuda/double.rs
index 719ee3b..0607a71 100644
--- a/src/fee/cuda/double.rs
+++ b/src/fee/cuda/double.rs
@@ -1,4 +1,4 @@
-/* automatically generated by rust-bindgen 0.59.1 */
+/* automatically generated by rust-bindgen 0.59.2 */
 
 pub type __int8_t = ::std::os::raw::c_schar;
 #[repr(C)]
diff --git a/src/fee/cuda/mod.rs b/src/fee/cuda/mod.rs
index 15ecb09..8f31dc6 100644
--- a/src/fee/cuda/mod.rs
+++ b/src/fee/cuda/mod.rs
@@ -29,17 +29,17 @@ cfg_if::cfg_if! {
 #[cfg(test)]
 mod tests;
 
-use std::collections::{hash_map::Entry, HashMap, HashSet};
+use std::collections::hash_map::DefaultHasher;
 use std::convert::TryInto;
 use std::ffi::CString;
+use std::hash::{Hash, Hasher};
 
 use marlu::{
     c64,
     cuda::{cuda_status_to_error, DevicePointer},
-    ndarray, rayon, Jones,
+    ndarray, Jones,
 };
 use ndarray::prelude::*;
-use rayon::prelude::*;
 
 use super::{fix_amps, FEEBeam, FEEBeamError};
 use crate::types::CacheKey;
@@ -72,22 +72,29 @@ pub struct FEEBeamCUDA {
 
     /// The number of tiles used to generate the `FEECoeffs`. Also one of the indices
     /// used to make `(d_)coeff_map`.
-    pub(super) num_tiles: i32,
+    pub(super) num_unique_tiles: i32,
 
     /// The number of frequencies used to generate `FEECoeffs`. Also one of the
     /// indices used to make `(d_)coeff_map`.
-    pub(super) num_freqs: i32,
+    pub(super) num_unique_freqs: i32,
 
-    /// Coefficients map. This is used to access de-duplicated Jones matrices.
+    /// Tile map. This is used to access de-duplicated Jones matrices.
     /// Not using this would mean that Jones matrices have to be 1:1 with
     /// threads, and that potentially uses a huge amount of compute and memory!
-    coeff_map: Array2<(usize, usize)>,
+    tile_map: Vec<i32>,
 
-    /// Device pointer to the coefficients map. This is used to access
-    /// de-duplicated Jones matrices. Not using this would mean that Jones
-    /// matrices have to be 1:1 with threads, and that potentially uses a huge
-    /// amount of compute and memory!
-    d_coeff_map: DevicePointer<u64>,
+    /// Tile map. This is used to access de-duplicated Jones matrices.
+    /// Not using this would mean that Jones matrices have to be 1:1 with
+    /// threads, and that potentially uses a huge amount of compute and memory!
+    freq_map: Vec<i32>,
+
+    /// The device pointer to the `tile_map` (same as the host's memory
+    /// equivalent above).
+    d_tile_map: DevicePointer<i32>,
+
+    /// The device pointer to the `freq_map` (same as the host's memory
+    /// equivalent above).
+    d_freq_map: DevicePointer<i32>,
 
     /// Jones matrices for normalising the beam response. This array has the
     /// same shape as `coeff_map`. If this is `None`, then no normalisation is
@@ -122,67 +129,77 @@ impl FEEBeamCUDA {
         // amplitudes.
         debug_assert!(amps_array.dim().1 == 16 || amps_array.dim().1 == 32);
 
-        // Prepare the cache with all unique combinations of tiles and frequencies
-        let hash_results: Vec<Vec<Result<_, _>>> = delays_array
+        // Prepare the cache with all unique combinations of tiles and
+        // frequencies. Track all of the unique tiles and frequencies to allow
+        // de-duplication.
+        let mut unique_hashes = vec![];
+        let mut unique_tiles = vec![];
+        let mut unique_fee_freqs = vec![];
+        let mut tile_map = vec![];
+        let mut freq_map = vec![];
+        let mut i_tile = 0;
+        let mut i_freq = 0;
+        for (i, (delays, amps)) in delays_array
             .outer_iter()
-            .into_par_iter()
-            .zip(amps_array.outer_iter().into_par_iter())
+            .zip(amps_array.outer_iter())
             .enumerate()
-            .map(|(i_tile, (delays, amps))| {
-                // unwrap is safe as these collections are contiguous.
-                let delays = delays.as_slice().unwrap();
-                let full_amps = fix_amps(amps.as_slice().unwrap(), delays);
-                freqs_hz
+        {
+            // unwrap is safe as these collections are contiguous (outer iter).
+            let delays = delays.as_slice().unwrap();
+            let full_amps = fix_amps(amps.as_slice().unwrap(), delays);
+
+            let mut unique_tile_hasher = DefaultHasher::new();
+            delays.hash(&mut unique_tile_hasher);
+            // We can't hash f64 values, but we can hash their bits.
+            for amp in full_amps {
+                amp.to_bits().hash(&mut unique_tile_hasher);
+            }
+            let unique_tile_hash = unique_tile_hasher.finish();
+            let this_tile_index = if unique_tiles.contains(&unique_tile_hash) {
+                unique_tiles
                     .iter()
                     .enumerate()
-                    .map(|(i_freq, freq)| {
-                        // If we're normalising the beam responses, cache the
-                        // normalisation Jones matrices too.
-                        if norm_to_zenith {
-                            fee_beam.populate_norm_jones(*freq).and_then(|fee_freq| {
-                                fee_beam
-                                    .populate_modes(*freq, delays, &full_amps)
-                                    .map(|h| ((i_tile, i_freq, fee_freq), h))
-                            })
-                        } else {
-                            fee_beam
-                                .populate_modes(*freq, delays, &full_amps)
-                                .map(|h| ((i_tile, i_freq, 0), h))
-                        }
-                    })
-                    .collect::<Vec<Result<_, _>>>()
-            })
-            .collect();
+                    .find(|(_, t)| **t == unique_tile_hash)
+                    .unwrap()
+                    .0 as i32
+            } else {
+                unique_tiles.push(unique_tile_hash);
+                i_tile += 1;
+                i_tile - 1
+            };
+            tile_map.push(this_tile_index);
+
+            for freq in freqs_hz {
+                // If we're normalising the beam responses, cache the
+                // normalisation Jones matrices too.
+                if norm_to_zenith {
+                    fee_beam.get_norm_jones(*freq)?;
+                }
 
-        let mut unique_hash_set = HashSet::new();
-        let mut unique_tile_set = HashSet::new();
-        let mut unique_freq_set = HashMap::new();
-        let mut unique_hashes = vec![];
-        let mut row = 0;
-        let mut col = 0;
-        for hash_collection in hash_results {
-            for hash_result in hash_collection {
-                let ((i_tile, i_freq, fee_freq), hash) = hash_result?;
-
-                if !unique_hash_set.contains(&hash) {
-                    unique_hash_set.insert(hash);
-                    // Unique hash found. Have we seen this tile before?
-                    if !unique_tile_set.contains(&i_tile) {
-                        // Put it in the set and increment the unique row
-                        // dimension.
-                        unique_tile_set.insert(i_tile);
-                        row += 1;
-                    }
-
-                    // Have we seen this frequency before?
-                    if let Entry::Vacant(e) = unique_freq_set.entry(i_freq) {
-                        // Put it in the set and increment the unique column
-                        // dimension.
-                        e.insert(col);
-                        col += 1;
-                    }
-
-                    unique_hashes.push((hash, (row - 1, unique_freq_set[&i_freq], fee_freq)));
+                let _ = fee_beam.get_modes(*freq, delays, &full_amps)?;
+
+                let fee_freq = fee_beam.find_closest_freq(*freq);
+                let hash = CacheKey::new(fee_freq, delays, &full_amps);
+                if !unique_hashes.contains(&(hash, fee_freq)) {
+                    unique_hashes.push((hash, fee_freq));
+                }
+
+                // No need to do this code more than once; frequency redundancy
+                // applies to all tiles.
+                if i == 0 {
+                    let this_freq_index = if unique_fee_freqs.contains(&fee_freq) {
+                        unique_fee_freqs
+                            .iter()
+                            .enumerate()
+                            .find(|(_, f)| **f == fee_freq)
+                            .unwrap()
+                            .0 as i32
+                    } else {
+                        unique_fee_freqs.push(fee_freq);
+                        i_freq += 1;
+                        i_freq - 1
+                    };
+                    freq_map.push(this_freq_index);
                 }
             }
         }
@@ -214,8 +231,9 @@ impl FEEBeamCUDA {
                 .collect()
         };
 
-        unique_hashes.iter().for_each(|(hash, (_, _, fee_freq))| {
-            let coeffs = fee_beam.coeff_cache.get(hash).unwrap();
+        let num_coeffs = unique_hashes.len().try_into().unwrap();
+        unique_hashes.into_iter().for_each(|(hash, fee_freq)| {
+            let coeffs = &fee_beam.coeff_cache.read()[&hash];
             let x_offset = x_offsets
                 .last()
                 .and_then(|&o| x_lengths.last().map(|&l| l + o))
@@ -248,44 +266,10 @@ impl FEEBeamCUDA {
             y_offsets.push(y_offset);
 
             if norm_to_zenith {
-                norm_jones.push(*fee_beam.norm_cache.get(fee_freq).unwrap());
+                norm_jones.push(*fee_beam.norm_cache.read()[&fee_freq]);
             }
         });
 
-        let coeff_dedup_map: HashMap<CacheKey, (usize, usize)> = unique_hashes
-            .iter()
-            .map(|(hash, (tile_index, freq_index, _))| (*hash, (*tile_index, *freq_index)))
-            .collect();
-
-        // Now map each combination of frequency and tile parameters to its
-        // unique dipole coefficients. The mapping is an index into the 1D array
-        // `unique_hashes`, which corresponds exactly to `cuda_coeffs`.
-        let mut coeff_map = Array2::from_elem((delays_array.dim().0, freqs_hz.len()), (0, 0));
-        coeff_map
-            .outer_iter_mut()
-            .into_par_iter()
-            .zip(delays_array.outer_iter().into_par_iter())
-            .zip(amps_array.outer_iter().into_par_iter())
-            .try_for_each(|((mut freq_coeff_indices, delays), amps)| {
-                let delays = delays.as_slice().unwrap();
-                let full_amps = fix_amps(amps.as_slice().unwrap(), delays);
-                freq_coeff_indices
-                    .iter_mut()
-                    .zip(freqs_hz.iter())
-                    .try_for_each(|(freq_coeff_index, freq)| {
-                        match fee_beam.populate_modes(*freq, delays, &full_amps) {
-                            Ok(hash) => {
-                                *freq_coeff_index = coeff_dedup_map[&hash];
-                                Ok(())
-                            }
-                            Err(e) => Err(e),
-                        }
-                    })
-            })?;
-
-        let num_tiles = unique_tile_set.len().try_into().unwrap();
-        let num_freqs = unique_freq_set.len().try_into().unwrap();
-
         let x_q1_accum = DevicePointer::copy_to_device(&x_q1_accum)?;
         let x_q2_accum = DevicePointer::copy_to_device(&x_q2_accum)?;
         let x_m_accum = DevicePointer::copy_to_device(&x_m_accum)?;
@@ -304,13 +288,11 @@ impl FEEBeamCUDA {
         let y_lengths = DevicePointer::copy_to_device(&y_lengths)?;
         let y_offsets = DevicePointer::copy_to_device(&y_offsets)?;
 
-        // Put the map's tuple elements into a single int, while demoting
-        // the size of the ints; CUDA goes a little faster with "int"
-        // instead of "size_t". Overflowing ints? Well, that'd be a big
-        // problem, but there's almost certainly insufficient memory before
-        // that happens.
-        let coeff_map_ints = coeff_map.mapv(|(i_row, i_col)| ((i_row << 32) + i_col) as u64);
-        let d_coeff_map = DevicePointer::copy_to_device(coeff_map_ints.as_slice().unwrap())?;
+        let num_unique_tiles = unique_tiles.len().try_into().unwrap();
+        let num_unique_freqs = unique_fee_freqs.len().try_into().unwrap();
+
+        let d_tile_map = DevicePointer::copy_to_device(&tile_map)?;
+        let d_freq_map = DevicePointer::copy_to_device(&freq_map)?;
 
         let d_norm_jones = if norm_jones.is_empty() {
             None
@@ -352,12 +334,14 @@ impl FEEBeamCUDA {
             y_lengths,
             y_offsets,
 
-            num_coeffs: unique_hashes.len().try_into().unwrap(),
-            num_tiles,
-            num_freqs,
+            num_coeffs,
+            num_unique_tiles,
+            num_unique_freqs,
 
-            coeff_map,
-            d_coeff_map,
+            tile_map,
+            freq_map,
+            d_tile_map,
+            d_freq_map,
             d_norm_jones,
         })
     }
@@ -373,8 +357,8 @@ impl FEEBeamCUDA {
         unsafe {
             // Allocate a buffer on the device for results.
             let d_results = DevicePointer::malloc(
-                self.num_tiles as usize
-                    * self.num_freqs as usize
+                self.num_unique_tiles as usize
+                    * self.num_unique_freqs as usize
                     * az_rad.len()
                     * std::mem::size_of::<Jones<CudaFloat>>(),
             )?;
@@ -418,8 +402,8 @@ impl FEEBeamCUDA {
         let device_ptr = self.calc_jones_device(az_rad, za_rad, parallactic)?;
         let mut jones_results: Array3<Jones<CudaFloat>> = Array3::from_elem(
             (
-                self.num_tiles as usize,
-                self.num_freqs as usize,
+                self.num_unique_tiles as usize,
+                self.num_unique_freqs as usize,
                 az_rad.len(),
             ),
             Jones::default(),
@@ -432,18 +416,21 @@ impl FEEBeamCUDA {
 
         // Expand the results according to the map.
         let mut jones_expanded = Array3::from_elem(
-            (self.coeff_map.dim().0, self.coeff_map.dim().1, az_rad.len()),
+            (self.tile_map.len(), self.freq_map.len(), az_rad.len()),
             Jones::default(),
         );
         jones_expanded
             .outer_iter_mut()
-            .zip(self.coeff_map.outer_iter())
-            .for_each(|(mut jones_row, index_row)| {
-                jones_row.outer_iter_mut().zip(index_row.iter()).for_each(
-                    |(mut jones_col, &(i_row, i_col))| {
+            .zip(self.tile_map.iter())
+            .for_each(|(mut jones_row, &i_row)| {
+                let i_row: usize = i_row.try_into().unwrap();
+                jones_row
+                    .outer_iter_mut()
+                    .zip(self.freq_map.iter())
+                    .for_each(|(mut jones_col, &i_col)| {
+                        let i_col: usize = i_col.try_into().unwrap();
                         jones_col.assign(&jones_results.slice(s![i_row, i_col, ..]));
-                    },
-                )
+                    })
             });
         Ok(jones_expanded)
     }
@@ -471,15 +458,21 @@ impl FEEBeamCUDA {
         }
     }
 
-    /// Get a pointer to the device beam Jones map. This is necessary to access
+    /// Get a pointer to the device tile map. This is necessary to access
+    /// de-duplicated beam Jones matrices on the device.
+    pub fn get_tile_map(&self) -> *const i32 {
+        self.d_tile_map.get()
+    }
+
+    /// Get a pointer to the device freq map. This is necessary to access
     /// de-duplicated beam Jones matrices on the device.
-    pub fn get_beam_jones_map(&self) -> *const u64 {
-        self.d_coeff_map.get()
+    pub fn get_freq_map(&self) -> *const i32 {
+        self.d_freq_map.get()
     }
 
     /// Get the number of de-duplicated frequencies associated with this
     /// [FEEBeamCUDA].
     pub fn get_num_unique_freqs(&self) -> i32 {
-        self.num_freqs
+        self.num_unique_freqs
     }
 }
diff --git a/src/fee/cuda/single.rs b/src/fee/cuda/single.rs
index b899b71..a6c4ad3 100644
--- a/src/fee/cuda/single.rs
+++ b/src/fee/cuda/single.rs
@@ -1,4 +1,4 @@
-/* automatically generated by rust-bindgen 0.59.1 */
+/* automatically generated by rust-bindgen 0.59.2 */
 
 pub type __int8_t = ::std::os::raw::c_schar;
 #[repr(C)]
diff --git a/src/fee/cuda/tests.rs b/src/fee/cuda/tests.rs
index 0ab4054..c9670bc 100644
--- a/src/fee/cuda/tests.rs
+++ b/src/fee/cuda/tests.rs
@@ -4,7 +4,7 @@
 
 //! Tests for CUDA FEE beam code.
 
-use approx::assert_abs_diff_eq;
+use approx::{assert_abs_diff_eq, assert_abs_diff_ne};
 use marlu::ndarray::prelude::*;
 use serial_test::serial;
 
@@ -24,8 +24,8 @@ fn test_cuda_calc_jones_no_norm() {
     assert!(result.is_ok(), "{}", result.unwrap_err());
     let cuda_beam = result.unwrap();
     assert_eq!(cuda_beam.num_coeffs, 1);
-    assert_eq!(cuda_beam.num_tiles, 1);
-    assert_eq!(cuda_beam.num_freqs, 1);
+    assert_eq!(cuda_beam.num_unique_tiles, 1);
+    assert_eq!(cuda_beam.num_unique_freqs, 1);
 
     let (az, za): (Vec<_>, Vec<_>) = (0..1025)
         .into_iter()
@@ -99,8 +99,8 @@ fn test_cuda_calc_jones_w_norm() {
     assert!(result.is_ok(), "{}", result.unwrap_err());
     let cuda_beam = result.unwrap();
     assert_eq!(cuda_beam.num_coeffs, 1);
-    assert_eq!(cuda_beam.num_tiles, 1);
-    assert_eq!(cuda_beam.num_freqs, 1);
+    assert_eq!(cuda_beam.num_unique_tiles, 1);
+    assert_eq!(cuda_beam.num_unique_freqs, 1);
 
     let (az, za): (Vec<_>, Vec<_>) = (0..1025)
         .into_iter()
@@ -174,8 +174,8 @@ fn test_cuda_calc_jones_w_norm_and_parallactic() {
     assert!(result.is_ok(), "{}", result.unwrap_err());
     let cuda_beam = result.unwrap();
     assert_eq!(cuda_beam.num_coeffs, 1);
-    assert_eq!(cuda_beam.num_tiles, 1);
-    assert_eq!(cuda_beam.num_freqs, 1);
+    assert_eq!(cuda_beam.num_unique_tiles, 1);
+    assert_eq!(cuda_beam.num_unique_freqs, 1);
 
     let (az, za): (Vec<_>, Vec<_>) = (0..1025)
         .into_iter()
@@ -267,8 +267,8 @@ fn test_cuda_calc_jones_deduplication() {
     assert!(result.is_ok(), "{}", result.unwrap_err());
     let cuda_beam = result.unwrap();
     assert_eq!(cuda_beam.num_coeffs, 9);
-    assert_eq!(cuda_beam.num_tiles, 3);
-    assert_eq!(cuda_beam.num_freqs, 3);
+    assert_eq!(cuda_beam.num_unique_tiles, 3);
+    assert_eq!(cuda_beam.num_unique_freqs, 3);
 
     let (az, za): (Vec<_>, Vec<_>) = (0..1025)
         .into_iter()
@@ -360,8 +360,8 @@ fn test_cuda_calc_jones_deduplication_w_norm() {
     assert!(result.is_ok(), "{}", result.unwrap_err());
     let cuda_beam = result.unwrap();
     assert_eq!(cuda_beam.num_coeffs, 9);
-    assert_eq!(cuda_beam.num_tiles, 3);
-    assert_eq!(cuda_beam.num_freqs, 3);
+    assert_eq!(cuda_beam.num_unique_tiles, 3);
+    assert_eq!(cuda_beam.num_unique_freqs, 3);
 
     let (az, za): (Vec<_>, Vec<_>) = (0..1025)
         .into_iter()
@@ -421,3 +421,98 @@ fn test_cuda_calc_jones_deduplication_w_norm() {
     // The errors are heavily dependent on the directions.
     assert_abs_diff_eq!(jones_gpu, jones_cpu, epsilon = 1e-6);
 }
+
+#[test]
+#[serial]
+fn test_cuda_calc_jones_no_amps() {
+    let beam = FEEBeam::new("mwa_full_embedded_element_pattern.h5").unwrap();
+    let freqs: Vec<u32> = [50e6, 75e6, 100e6, 125e6, 150e6, 175e6, 200e6]
+        .into_iter()
+        .map(|f| f as u32)
+        .collect();
+    let delays = array![
+        [3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0],
+        [3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0]
+    ];
+    let amps = array![
+        [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
+        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+    ];
+    let norm_to_zenith = false;
+    let result = unsafe { beam.cuda_prepare(&freqs, delays.view(), amps.view(), norm_to_zenith) };
+    assert!(result.is_ok(), "{}", result.unwrap_err());
+    let cuda_beam = result.unwrap();
+    assert_eq!(cuda_beam.num_coeffs, 14);
+    assert_eq!(cuda_beam.num_unique_tiles, 2);
+    assert_eq!(cuda_beam.num_unique_freqs, 7);
+
+    let (az, za): (Vec<_>, Vec<_>) = (0..1025)
+        .into_iter()
+        .map(|i| {
+            (
+                0.45 + i as CudaFloat / 10000.0,
+                0.45 + i as CudaFloat / 10000.0,
+            )
+        })
+        .unzip();
+    let parallactic_correction = false;
+
+    let result = cuda_beam.calc_jones(&az, &za, parallactic_correction);
+    assert!(result.is_ok(), "{}", result.unwrap_err());
+    let jones_gpu = result.unwrap();
+
+    // Compare with CPU results.
+    let mut jones_cpu =
+        Array3::from_elem((delays.dim().0, freqs.len(), az.len()), Jones::default());
+    // Maybe need to regenerate the directions, depending on the CUDA precision.
+    let (az, za): (Vec<_>, Vec<_>) = (0..1025)
+        .into_iter()
+        .map(|i| (0.45 + i as f64 / 10000.0, 0.45 + i as f64 / 10000.0))
+        .unzip();
+    for ((mut out, delays), amps) in jones_cpu
+        .outer_iter_mut()
+        .zip(delays.outer_iter())
+        .zip(amps.outer_iter())
+    {
+        for (mut out, freq) in out.outer_iter_mut().zip(freqs.iter()) {
+            let cpu_results = beam
+                .calc_jones_eng_array(
+                    &az,
+                    &za,
+                    *freq,
+                    delays.as_slice().unwrap(),
+                    amps.as_slice().unwrap(),
+                    norm_to_zenith,
+                )
+                .unwrap();
+
+            // Demote the CPU results if we have to.
+            #[cfg(feature = "cuda-single")]
+            let cpu_results: Vec<Jones<f32>> = cpu_results.into_iter().map(|j| j.into()).collect();
+
+            out.assign(&Array1::from(cpu_results));
+        }
+    }
+
+    let jones_cpu = jones_cpu.mapv(TestJones::from);
+    let jones_gpu = jones_gpu.mapv(TestJones::from);
+
+    #[cfg(not(feature = "cuda-single"))]
+    assert_abs_diff_eq!(jones_gpu, jones_cpu, epsilon = 1e-15);
+
+    #[cfg(feature = "cuda-single")]
+    // The errors are heavily dependent on the directions.
+    assert_abs_diff_eq!(jones_gpu, jones_cpu, epsilon = 1e-6);
+
+    // The results for this tile are all zero.
+    assert_abs_diff_eq!(
+        jones_gpu.slice(s![1, .., ..]),
+        Array2::from_elem((jones_gpu.dim().1, jones_gpu.dim().2), TestJones::default())
+    );
+
+    // The results for this tile are at least some non-zero.
+    assert_abs_diff_ne!(
+        jones_gpu.slice(s![0, .., ..]),
+        Array2::from_elem((jones_gpu.dim().1, jones_gpu.dim().2), TestJones::default())
+    );
+}
diff --git a/src/fee/ffi/mod.rs b/src/fee/ffi/mod.rs
index 8adc5c6..e7a5c14 100644
--- a/src/fee/ffi/mod.rs
+++ b/src/fee/ffi/mod.rs
@@ -628,7 +628,7 @@ pub unsafe extern "C" fn calc_jones_cuda_device(
     0
 }
 
-/// Get a pointer to the device beam Jones map. This is necessary to access
+/// Get a pointer to the device tile map. This is necessary to access
 /// de-duplicated beam Jones matrices on the device.
 ///
 /// # Arguments
@@ -642,9 +642,28 @@ pub unsafe extern "C" fn calc_jones_cuda_device(
 ///
 #[cfg(feature = "cuda")]
 #[no_mangle]
-pub unsafe extern "C" fn get_cuda_map(cuda_fee_beam: *mut FEEBeamCUDA) -> *const u64 {
+pub unsafe extern "C" fn get_tile_map(cuda_fee_beam: *mut FEEBeamCUDA) -> *const i32 {
     let beam = &mut *cuda_fee_beam;
-    beam.get_beam_jones_map()
+    beam.get_tile_map()
+}
+
+/// Get a pointer to the device freq map. This is necessary to access
+/// de-duplicated beam Jones matrices on the device.
+///
+/// # Arguments
+///
+/// `cuda_fee_beam` - the pointer to the `FEEBeamCUDA` struct.
+///
+/// # Returns
+///
+/// * A pointer to the device beam Jones map. The const annotation is
+///   deliberate; the caller does not own the map.
+///
+#[cfg(feature = "cuda")]
+#[no_mangle]
+pub unsafe extern "C" fn get_freq_map(cuda_fee_beam: *mut FEEBeamCUDA) -> *const i32 {
+    let beam = &mut *cuda_fee_beam;
+    beam.get_freq_map()
 }
 
 /// Get the number of de-duplicated frequencies associated with this
@@ -663,7 +682,7 @@ pub unsafe extern "C" fn get_cuda_map(cuda_fee_beam: *mut FEEBeamCUDA) -> *const
 #[no_mangle]
 pub unsafe extern "C" fn get_num_unique_fee_freqs(cuda_fee_beam: *mut FEEBeamCUDA) -> i32 {
     let beam = &mut *cuda_fee_beam;
-    beam.num_freqs
+    beam.num_unique_freqs
 }
 
 /// Free the memory associated with an `FEEBeamCUDA` beam.
diff --git a/src/fee/ffi/tests.rs b/src/fee/ffi/tests.rs
index 2409516..9fecf09 100644
--- a/src/fee/ffi/tests.rs
+++ b/src/fee/ffi/tests.rs
@@ -10,11 +10,37 @@ use marlu::ndarray::prelude::*;
 use serial_test::serial;
 
 use super::*;
-use crate::jones_test::TestJones;
 
 #[cfg(feature = "cuda")]
 use marlu::Jones;
 
+#[test]
+#[serial]
+fn test_ffi_fee_new() {
+    let file = std::ffi::CString::new("mwa_full_embedded_element_pattern.h5").unwrap();
+    unsafe {
+        let mut beam = null_mut();
+        let error_str = CString::from_vec_unchecked(vec![1; 200]).into_raw();
+        let result = new_fee_beam(file.into_raw(), &mut beam, error_str);
+        assert_eq!(result, 0);
+
+        free_fee_beam(beam);
+    };
+}
+#[test]
+#[serial]
+fn test_ffi_fee_new_from_env() {
+    std::env::set_var("MWA_BEAM_FILE", "mwa_full_embedded_element_pattern.h5");
+    unsafe {
+        let mut beam = null_mut();
+        let error_str = CString::from_vec_unchecked(vec![1; 200]).into_raw();
+        let result = new_fee_beam_from_env(&mut beam, error_str);
+        assert_eq!(result, 0);
+
+        free_fee_beam(beam);
+    };
+}
+
 #[test]
 #[serial]
 fn test_calc_jones_via_ffi() {
@@ -34,6 +60,52 @@ fn test_calc_jones_via_ffi() {
             [1.0; 16].as_ptr(),
             16,
             0,
+            1,
+            jones.as_mut_ptr(),
+            error_str,
+        );
+        assert_eq!(
+            result,
+            0,
+            "{}",
+            CString::from_raw(error_str).into_string().unwrap()
+        );
+
+        free_fee_beam(beam);
+    };
+
+    let expected = array![
+        0.051673288904250436,
+        0.14798615369209014,
+        -0.0029907711920181858,
+        -0.008965331092654419,
+        0.002309524016541907,
+        0.006230549725189563,
+        0.05144802517335513,
+        0.14772685224822762
+    ];
+    assert_abs_diff_eq!(jones, expected);
+}
+
+#[test]
+#[serial]
+fn test_calc_jones_eng_via_ffi() {
+    let file = std::ffi::CString::new("mwa_full_embedded_element_pattern.h5").unwrap();
+    let mut jones = Array1::zeros(8);
+    unsafe {
+        let mut beam = null_mut();
+        let error_str = CString::from_vec_unchecked(vec![1; 200]).into_raw();
+        let result = new_fee_beam(file.into_raw(), &mut beam, null_mut());
+        assert_eq!(result, 0);
+        let result = calc_jones(
+            beam,
+            45.0_f64.to_radians(),
+            10.0_f64.to_radians(),
+            51200000,
+            [0; 16].as_ptr(),
+            [1.0; 16].as_ptr(),
+            16,
+            1,
             0,
             jones.as_mut_ptr(),
             error_str,
@@ -44,11 +116,21 @@ fn test_calc_jones_via_ffi() {
             "{}",
             CString::from_raw(error_str).into_string().unwrap()
         );
+
+        free_fee_beam(beam);
     };
 
-    let expected =
-        array![0.036179, 0.103586, 0.036651, 0.105508, 0.036362, 0.103868, -0.036836, -0.105791];
-    assert_abs_diff_eq!(jones, expected, epsilon = 1e-6);
+    let expected = array![
+        0.22172963270798213,
+        0.6348455841660556,
+        0.22462189266869015,
+        0.6466211027377966,
+        0.22219717621735582,
+        0.6347084075291201,
+        -0.22509296063362327,
+        -0.6464547894620056
+    ];
+    assert_abs_diff_eq!(jones, expected);
 }
 
 #[test]
@@ -78,6 +160,8 @@ fn test_calc_jones_32_amps_via_ffi() {
             null_mut(),
         );
         assert_eq!(result, 0);
+
+        free_fee_beam(beam);
     };
 
     let expected = array![
@@ -121,6 +205,9 @@ fn test_calc_jones_array_via_ffi() {
             error,
         );
         assert_eq!(result, 0);
+
+        free_fee_beam(beam);
+
         Array1::from(Vec::from_raw_parts(
             jones_ptr,
             8 * num_directions,
@@ -168,6 +255,9 @@ fn test_calc_jones_array_32_amps_via_ffi() {
             error,
         );
         assert_eq!(result, 0);
+
+        free_fee_beam(beam);
+
         Array1::from(Vec::from_raw_parts(
             jones_ptr,
             8 * num_directions,
@@ -191,6 +281,38 @@ fn test_calc_jones_array_32_amps_via_ffi() {
     assert_abs_diff_eq!(jones.slice(s![-1, ..]), expected);
 }
 
+#[test]
+#[serial]
+fn test_ffi_fee_freq_functions() {
+    let file = "mwa_full_embedded_element_pattern.h5";
+    let beam = FEEBeam::new(file).unwrap();
+    assert!(!beam.get_freqs().is_empty());
+    let freqs = beam.get_freqs();
+    let search_freq = 150e6 as u32;
+    let closest = beam.find_closest_freq(search_freq);
+
+    unsafe {
+        let file_cstr = std::ffi::CString::new("mwa_full_embedded_element_pattern.h5").unwrap();
+        let mut ffi_beam = null_mut();
+        let result = new_fee_beam(file_cstr.into_raw(), &mut ffi_beam, null_mut());
+        assert_eq!(result, 0);
+
+        let mut freqs_ptr: *const u32 = null_mut();
+        let mut num_freqs = 0;
+        get_fee_beam_freqs(ffi_beam, &mut freqs_ptr, &mut num_freqs);
+        assert_eq!(freqs.len(), num_freqs);
+        let freqs_slice = std::slice::from_raw_parts(freqs_ptr, num_freqs);
+        for (i, &freq) in freqs.iter().enumerate() {
+            assert_eq!(freq, freqs_slice[i]);
+        }
+
+        let ffi_closest = closest_freq(ffi_beam, search_freq);
+        assert_eq!(closest, ffi_closest);
+
+        free_fee_beam(ffi_beam);
+    }
+}
+
 #[test]
 #[cfg(feature = "cuda")]
 #[serial]
@@ -261,6 +383,8 @@ fn test_calc_jones_cuda_via_ffi() {
             CString::from_raw(error_str).into_string().unwrap()
         );
 
+        free_cuda_fee_beam(cuda_beam);
+
         let jones: *mut Jones<CudaFloat> = jones_floats.cast();
         ArrayView3::from_shape_ptr((delays.dim().0, freqs.len(), az.len()), jones).into_owned()
     };
@@ -300,8 +424,12 @@ fn test_calc_jones_cuda_via_ffi() {
         }
     }
 
-    let jones_cpu = jones_cpu.mapv(TestJones::from);
-    let jones_gpu = jones_gpu.mapv(TestJones::from);
+    unsafe {
+        free_fee_beam(beam);
+    }
+
+    let jones_cpu = jones_cpu.mapv(crate::jones_test::TestJones::from);
+    let jones_gpu = jones_gpu.mapv(crate::jones_test::TestJones::from);
 
     #[cfg(not(feature = "cuda-single"))]
     assert_abs_diff_eq!(jones_gpu, jones_cpu, epsilon = 1e-15);
@@ -310,3 +438,229 @@ fn test_calc_jones_cuda_via_ffi() {
     // The errors are heavily dependent on the directions.
     assert_abs_diff_eq!(jones_gpu, jones_cpu, epsilon = 1e-6);
 }
+
+// Tests to expose errors follow.
+
+#[test]
+fn test_error_file_doesnt_exist() {
+    let file = "/unlikely/to/exist.h5";
+    let file_cstr = std::ffi::CString::new(file).unwrap();
+    let mut beam = null_mut();
+    let error_str = unsafe {
+        let error_str = CString::from_vec_unchecked(vec![1; 200]).into_raw();
+        let result = new_fee_beam(file_cstr.into_raw(), &mut beam, error_str);
+        assert!(result != 0);
+        let cstr = CString::from_raw(error_str);
+        assert!(cstr.to_str().is_ok());
+
+        cstr.to_str().unwrap().to_string()
+    };
+    assert!(!error_str.is_empty());
+    assert_eq!(
+        error_str,
+        format!("Specified beam file '{}' doesn't exist", file)
+    );
+}
+
+#[test]
+fn test_error_env_file_doesnt_exist() {
+    let file = "/unlikely/to/exist/again.h5";
+    std::env::set_var("MWA_BEAM_FILE", file);
+    let mut beam = null_mut();
+    let error_str = unsafe {
+        let error_str = CString::from_vec_unchecked(vec![1; 200]).into_raw();
+        let result = new_fee_beam_from_env(&mut beam, error_str);
+        assert!(result != 0);
+        let cstr = CString::from_raw(error_str);
+        assert!(cstr.to_str().is_ok());
+
+        cstr.to_str().unwrap().to_string()
+    };
+    assert!(!error_str.is_empty());
+    assert_eq!(
+        error_str,
+        format!("Specified beam file '{}' doesn't exist", file)
+    );
+}
+
+#[test]
+fn test_error_file_invalid_utf8() {
+    let file_cstr = unsafe { std::ffi::CString::from_vec_unchecked(vec![1, 1, 1, 1, 0]) };
+    let mut beam = null_mut();
+    let error_str = unsafe {
+        let error_str = CString::from_vec_unchecked(vec![1; 200]).into_raw();
+        let result = new_fee_beam(file_cstr.into_raw(), &mut beam, error_str);
+        assert!(result != 0);
+        let cstr = CString::from_raw(error_str);
+        assert!(cstr.to_str().is_ok());
+
+        cstr.to_str().unwrap().to_string()
+    };
+    assert!(!error_str.is_empty());
+    assert_eq!(
+        error_str,
+        "Specified beam file '\u{1}\u{1}\u{1}\u{1}' doesn't exist"
+    );
+}
+
+#[test]
+fn test_bool_errors() {
+    let file = std::ffi::CString::new("mwa_full_embedded_element_pattern.h5").unwrap();
+    unsafe {
+        let mut beam = null_mut();
+        let result = new_fee_beam(file.into_raw(), &mut beam, null_mut());
+        assert!(result == 0);
+
+        let mut jones = [0.0; 8];
+
+        // Bad number of amps.
+        let error_str = CString::from_vec_unchecked(vec![1; 200]).into_raw();
+        let result = calc_jones(
+            beam,
+            45.0_f64.to_radians(),
+            10.0_f64.to_radians(),
+            51200000,
+            [0; 16].as_ptr(),
+            [
+                1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0,
+            ]
+            .as_ptr(),
+            5,
+            2,
+            0,
+            jones.as_mut_ptr(),
+            error_str,
+        );
+        assert!(result != 0);
+        let cstr = CString::from_raw(error_str);
+        assert_eq!(
+            cstr.to_str().unwrap(),
+            "A value other than 16 or 32 was used for num_amps"
+        );
+
+        // Bad norm_to_zenith value.
+        let error_str = CString::from_vec_unchecked(vec![1; 200]).into_raw();
+        let result = calc_jones(
+            beam,
+            45.0_f64.to_radians(),
+            10.0_f64.to_radians(),
+            51200000,
+            [0; 16].as_ptr(),
+            [
+                1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0,
+            ]
+            .as_ptr(),
+            32,
+            2,
+            0,
+            jones.as_mut_ptr(),
+            error_str,
+        );
+        assert!(result != 0);
+        let cstr = CString::from_raw(error_str);
+        assert_eq!(
+            cstr.to_str().unwrap(),
+            "A value other than 0 or 1 was used for norm_to_zenith"
+        );
+
+        // Bad parallactic value.
+        let error_str = CString::from_vec_unchecked(vec![1; 200]).into_raw();
+        let result = calc_jones(
+            beam,
+            45.0_f64.to_radians(),
+            10.0_f64.to_radians(),
+            51200000,
+            [0; 16].as_ptr(),
+            [
+                1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0,
+            ]
+            .as_ptr(),
+            32,
+            0,
+            2,
+            jones.as_mut_ptr(),
+            error_str,
+        );
+        assert!(result != 0);
+        let cstr = CString::from_raw(error_str);
+        assert_eq!(
+            cstr.to_str().unwrap(),
+            "A value other than 0 or 1 was used for parallactic"
+        );
+
+        // Do it all again for calc_jones_array.
+        let az = [0.1];
+        let za = [0.1];
+        // Bad number of amps.
+        let error_str = CString::from_vec_unchecked(vec![1; 200]).into_raw();
+        let result = calc_jones_array(
+            beam,
+            az.len() as _,
+            az.as_ptr(),
+            za.as_ptr(),
+            51200000,
+            [0; 16].as_ptr(),
+            [1.0; 16].as_ptr(),
+            10,
+            0,
+            0,
+            &mut jones.as_mut_ptr(),
+            error_str,
+        );
+        assert!(result != 0);
+        let cstr = CString::from_raw(error_str);
+        assert_eq!(
+            cstr.to_str().unwrap(),
+            "A value other than 16 or 32 was used for num_amps"
+        );
+
+        // Bad norm_to_zenith value.
+        let error_str = CString::from_vec_unchecked(vec![1; 200]).into_raw();
+        let result = calc_jones_array(
+            beam,
+            az.len() as _,
+            az.as_ptr(),
+            za.as_ptr(),
+            51200000,
+            [0; 16].as_ptr(),
+            [1.0; 16].as_ptr(),
+            16,
+            3,
+            0,
+            &mut jones.as_mut_ptr(),
+            error_str,
+        );
+        assert!(result != 0);
+        let cstr = CString::from_raw(error_str);
+        assert_eq!(
+            cstr.to_str().unwrap(),
+            "A value other than 0 or 1 was used for norm_to_zenith"
+        );
+
+        // Bad parallactic value.
+        let error_str = CString::from_vec_unchecked(vec![1; 200]).into_raw();
+        let result = calc_jones_array(
+            beam,
+            az.len() as _,
+            az.as_ptr(),
+            za.as_ptr(),
+            51200000,
+            [0; 16].as_ptr(),
+            [1.0; 16].as_ptr(),
+            16,
+            0,
+            255,
+            &mut jones.as_mut_ptr(),
+            error_str,
+        );
+        assert!(result != 0);
+        let cstr = CString::from_raw(error_str);
+        assert_eq!(
+            cstr.to_str().unwrap(),
+            "A value other than 0 or 1 was used for parallactic"
+        );
+    };
+}
diff --git a/src/fee/mod.rs b/src/fee/mod.rs
index cd5fb03..c232da5 100644
--- a/src/fee/mod.rs
+++ b/src/fee/mod.rs
@@ -16,6 +16,7 @@ mod cuda;
 mod tests;
 
 pub use error::{FEEBeamError, InitFEEBeamError};
+use parking_lot::{MappedRwLockReadGuard, RwLockReadGuard};
 use types::*;
 
 #[cfg(feature = "cuda")]
@@ -56,7 +57,7 @@ impl FEEBeam {
     /// Given the path to the FEE beam file, create a new [FEEBeam] struct.
     pub fn new<T: AsRef<std::path::Path>>(file: T) -> Result<Self, InitFEEBeamError> {
         // so that libhdf5 doesn't print errors to stdout
-        let _e = hdf5::silence_errors();
+        hdf5::silence_errors(true);
 
         // If the file doesn't exist, hdf5::File::open will handle it, but the
         // error message is horrendous.
@@ -204,44 +205,80 @@ impl FEEBeam {
         }
     }
 
-    /// Check that [DipoleCoefficients] are cached for the input parameters. If
-    /// they aren't, populate the cache. The returned hash is used to access the
-    /// populated cache.
+    /// Get [DipoleCoefficients] for the input parameters.
     ///
-    /// This function is intended to be used every time the cache is to be
-    /// accessed. By ensuring that the right coefficients are available at the
-    /// end of this function, the caller can then directly access the cache. The
-    /// only way to make Rust return the coefficients would be by keeping the
-    /// whole cache locked, which ruins concurrent performance.
+    /// This function is deliberately private; it uses a cache on [FEEBeam] as
+    /// calculating [DipoleCoefficients] is expensive, and it's easy to
+    /// accidentally stall the cache with locks. This function automatically
+    /// populates the cache with [DipoleCoefficients] and returns a reference to
+    /// them.
     ///
     /// Note that specified frequencies are "rounded" to frequencies that are
     /// defined the HDF5 file.
-    fn populate_modes(
+    fn get_modes(
         &self,
-        desired_freq: u32,
+        desired_freq_hz: u32,
         delays: &[u32],
         amps: &[f64; 32],
-    ) -> Result<CacheKey, FEEBeamError> {
-        let freq = self.find_closest_freq(desired_freq);
+    ) -> Result<MappedRwLockReadGuard<'_, BowtieCoefficients>, FEEBeamError> {
+        let fee_freq = self.find_closest_freq(desired_freq_hz);
 
         // Are the input settings already cached? Hash them to check.
-        let hash = CacheKey::new(freq, delays, amps);
+        let hash = CacheKey::new(fee_freq, delays, amps);
 
-        // If the cache for this hash exists, we can return the hash.
-        if self.coeff_cache.contains_key(&hash) {
-            return Ok(hash);
+        // If the cache for this hash is already populated, we can return the reference.
+        {
+            let cache = self.coeff_cache.read();
+            if cache.contains_key(&hash) {
+                return Ok(RwLockReadGuard::map(cache, |hm| &hm[&hash]));
+            }
+        }
+
+        // If we hit this part of the code, we need to populate the cache.
+        let m = self.calc_modes(fee_freq, delays, amps)?;
+        {
+            let mut locked_cache = self.coeff_cache.write();
+            locked_cache.insert(hash, m);
         }
+        Ok(RwLockReadGuard::map(self.coeff_cache.read(), |hm| {
+            &hm[&hash]
+        }))
+    }
 
-        // If we hit this part of the code, the coefficients were not in the
-        // cache.
-        let modes = self.calc_modes(freq, delays, amps)?;
-        self.coeff_cache.insert(hash, modes);
-        Ok(hash)
+    /// Get a [Jones] matrix for beam normalisation.
+    ///
+    /// This function is deliberately private and is intertwined with
+    /// `get_modes`; this function should always be called before `get_modes` to
+    /// prevent a deadlock. Beam normalisation Jones matrices are cached but
+    /// because [Jones] is [Copy], an owned copy is returned from the cache.
+    fn get_norm_jones(&self, desired_freq_hz: u32) -> Result<Jones<f64>, FEEBeamError> {
+        // Are the input settings already cached? Hash them to check.
+        let fee_freq = self.find_closest_freq(desired_freq_hz);
+
+        // If the cache for this hash is already populated, we can return the
+        // reference.
+        {
+            let cache = self.norm_cache.read();
+            if cache.contains_key(&fee_freq) {
+                return Ok(cache[&fee_freq]);
+            }
+        }
+
+        // If we hit this part of the code, we need to populate the modes cache.
+        let n = {
+            let norm_coeffs = self.get_modes(fee_freq, &[0; 16], &[1.0; 32])?;
+            calc_zenith_norm_jones(&norm_coeffs)
+        };
+        {
+            let mut locked_cache = self.norm_cache.write();
+            locked_cache.insert(fee_freq, n);
+        }
+        Ok(n)
     }
 
     /// Given the input parameters, calculate and return the X and Y
     /// coefficients ("modes"). As this function is relatively expensive, it
-    /// should only be called by `Self::populate_modes` to cache the outputs.
+    /// should only be called by `Self::get_modes` to cache the outputs.
     fn calc_modes(
         &self,
         freq: u32,
@@ -394,24 +431,6 @@ impl FEEBeam {
         })
     }
 
-    fn populate_norm_jones(&self, desired_freq: u32) -> Result<u32, FEEBeamError> {
-        let freq = self.find_closest_freq(desired_freq);
-
-        // If the cache for this freq exists, we can return it.
-        if self.norm_cache.contains_key(&freq) {
-            return Ok(freq);
-        }
-
-        // If we hit this part of the code, the normalisation Jones matrix was
-        // not in the cache.
-        let hash = self.populate_modes(freq, &[0; 16], &[1.0; 32])?;
-        let coeffs = self.coeff_cache.get(&hash).unwrap();
-        let jones = calc_zenith_norm_jones(&coeffs);
-        self.norm_cache.insert(freq, jones);
-
-        Ok(freq)
-    }
-
     /// Calculate the Jones matrix for a given direction and pointing. This
     /// matches the original specification of the FEE beam code (hence "eng", or
     /// "engineering"). Astronomers more likely want the `calc_jones` method
@@ -439,22 +458,17 @@ impl FEEBeam {
         // amplitudes.
         debug_assert!(amps.len() == 16 || amps.len() == 32);
 
-        let full_amps = fix_amps(amps, delays);
-
-        // Ensure the dipole coefficients for the provided parameters exist.
-        let hash = self.populate_modes(freq_hz, delays, &full_amps)?;
-
-        // If we're normalising the beam, get the normalisation frequency here.
-        let norm_freq = if norm_to_zenith {
-            Some(self.populate_norm_jones(freq_hz)?)
-        } else {
-            None
+        // If we're normalising the beam, get the normalisation Jones matrix here.
+        let norm_jones = match norm_to_zenith {
+            true => Some(self.get_norm_jones(freq_hz)?),
+            false => None,
         };
 
-        let coeffs = self.coeff_cache.get(&hash).unwrap();
-        let norm_jones = norm_freq.and_then(|f| self.norm_cache.get(&f));
+        // Populate the coefficients cache if it isn't already populated.
+        let full_amps = fix_amps(amps, delays);
+        let coeffs = self.get_modes(freq_hz, delays, &full_amps)?;
 
-        let jones = calc_jones_direct(az_rad, za_rad, &coeffs, norm_jones.as_deref());
+        let jones = calc_jones_direct(az_rad, za_rad, &coeffs, norm_jones);
         Ok(jones)
     }
 
@@ -488,26 +502,20 @@ impl FEEBeam {
         // amplitudes.
         debug_assert!(amps.len() == 16 || amps.len() == 32);
 
+        // Populate the coefficients cache if it isn't already populated.
         let full_amps = fix_amps(amps, delays);
+        let coeffs = self.get_modes(freq_hz, delays, &full_amps)?;
 
-        // Ensure the dipole coefficients for the provided parameters exist.
-        let hash = self.populate_modes(freq_hz, delays, &full_amps)?;
-
-        // If we're normalising the beam, get the normalisation Jones matrix
-        // here.
-        let norm_freq = if norm_to_zenith {
-            Some(self.populate_norm_jones(freq_hz)?)
-        } else {
-            None
+        // If we're normalising the beam, get the normalisation Jones matrix here.
+        let norm_jones = match norm_to_zenith {
+            true => Some(self.get_norm_jones(freq_hz)?),
+            false => None,
         };
 
-        let coeffs = self.coeff_cache.get(&hash).unwrap();
-        let norm = norm_freq.and_then(|f| self.norm_cache.get(&f));
-
         let out = az_rad
             .par_iter()
             .zip(za_rad.par_iter())
-            .map(|(&az, &za)| calc_jones_direct(az, za, &coeffs, norm.as_deref()))
+            .map(|(&az, &za)| calc_jones_direct(az, za, &coeffs, norm_jones))
             .collect();
         Ok(out)
     }
@@ -533,21 +541,9 @@ impl FEEBeam {
         amps: &[f64],
         norm_to_zenith: bool,
     ) -> Result<Jones<f64>, FEEBeamError> {
-        let j = self.calc_jones_eng(az_rad, za_rad, freq_hz, delays, amps, norm_to_zenith)?;
-
-        // Re-order the polarisations.
-        let j = [-j[3], j[2], -j[1], j[0]];
-        // Parallactic-angle correction.
-        let para_angle = AzEl::new(az_rad, FRAC_PI_2 - za_rad)
-            .to_hadec_mwa()
-            .get_parallactic_angle_mwa();
-        let (s_rot, c_rot) = (para_angle + FRAC_PI_2).sin_cos();
-        let j = Jones::from([
-            j[0] * c_rot - j[1] * s_rot,
-            j[0] * s_rot + j[1] * c_rot,
-            j[2] * c_rot - j[3] * s_rot,
-            j[2] * s_rot + j[3] * c_rot,
-        ]);
+        let mut j = self.calc_jones_eng(az_rad, za_rad, freq_hz, delays, amps, norm_to_zenith)?;
+        apply_parallactic_correction(az_rad, za_rad, &mut j);
+
         Ok(j)
     }
 
@@ -575,19 +571,40 @@ impl FEEBeam {
         amps: &[f64],
         norm_to_zenith: bool,
     ) -> Result<Vec<Jones<f64>>, FEEBeamError> {
-        let out: Vec<Result<Jones<f64>, FEEBeamError>> = az_rad
+        // `delays` must have 16 elements...
+        debug_assert_eq!(delays.len(), 16);
+        // ... but `amps` may have either 16 or 32. 32 elements corresponds to
+        // each element of each dipole; i.e. 16 X amplitudes followed by 16 Y
+        // amplitudes.
+        debug_assert!(amps.len() == 16 || amps.len() == 32);
+
+        // If we're normalising the beam, get the normalisation Jones matrix here.
+        let norm_jones = match norm_to_zenith {
+            true => Some(self.get_norm_jones(freq_hz)?),
+            false => None,
+        };
+
+        // Populate the coefficients cache if it isn't already populated.
+        let full_amps = fix_amps(amps, delays);
+        let coeffs = self.get_modes(freq_hz, delays, &full_amps)?;
+
+        let out = az_rad
             .par_iter()
             .zip(za_rad.par_iter())
-            .map(|(&az, &za)| self.calc_jones(az, za, freq_hz, delays, amps, norm_to_zenith))
+            .map(|(&az, &za)| {
+                let mut jones = calc_jones_direct(az, za, &coeffs, norm_jones);
+                apply_parallactic_correction(az, za, &mut jones);
+                jones
+            })
             .collect();
-        out.into_iter().collect()
+        Ok(out)
     }
 
     /// Empty the cached dipole coefficients and normalisation Jones matrices to
     /// recover memory.
     pub fn empty_cache(&self) {
-        self.coeff_cache.clear();
-        self.norm_cache.clear();
+        self.coeff_cache.write().clear();
+        self.norm_cache.write().clear();
     }
 
     /// Prepare a CUDA-capable device for beam-response computations given the
@@ -672,7 +689,7 @@ fn calc_jones_direct(
     az_rad: f64,
     za_rad: f64,
     coeffs: &BowtieCoefficients,
-    norm_matrix: Option<&Jones<f64>>,
+    norm_matrix: Option<Jones<f64>>,
 ) -> Jones<f64> {
     // Convert azimuth to FEKO phi (East through North).
     let phi_rad = FRAC_PI_2 - az_rad;
@@ -718,3 +735,22 @@ pub(super) fn fix_amps(amps: &[f64], delays: &[u32]) -> [f64; 32] {
         });
     full_amps
 }
+
+/// Apply the parallactic angle correction to a beam-response Jones matrix
+/// (when also given its corresponding direction). This function also
+/// re-arranges the Jones matrix to conform with Jack's investigation.
+fn apply_parallactic_correction(az_rad: f64, za_rad: f64, jones: &mut Jones<f64>) {
+    // Re-order the polarisations.
+    let j = [-jones[3], jones[2], -jones[1], jones[0]];
+    // Parallactic-angle correction.
+    let para_angle = AzEl::new(az_rad, FRAC_PI_2 - za_rad)
+        .to_hadec_mwa()
+        .get_parallactic_angle_mwa();
+    let (s_rot, c_rot) = (para_angle + FRAC_PI_2).sin_cos();
+    *jones = Jones::from([
+        j[0] * c_rot - j[1] * s_rot,
+        j[0] * s_rot + j[1] * c_rot,
+        j[2] * c_rot - j[3] * s_rot,
+        j[2] * s_rot + j[3] * c_rot,
+    ]);
+}
diff --git a/src/fee/tests.rs b/src/fee/tests.rs
index ec35939..013cafb 100644
--- a/src/fee/tests.rs
+++ b/src/fee/tests.rs
@@ -131,11 +131,9 @@ fn test_get_dataset() {
 #[serial]
 fn test_get_modes() {
     let beam = FEEBeam::new("mwa_full_embedded_element_pattern.h5").unwrap();
-    let hash = match beam.populate_modes(51200000, &[0; 16], &[1.0; 32]) {
-        Ok(h) => h,
-        Err(e) => panic!("{}", e),
-    };
-    let coeffs = beam.coeff_cache.get(&hash).unwrap();
+    let result = beam.get_modes(51200000, &[0; 16], &[1.0; 32]);
+    assert!(result.is_ok());
+    let coeffs = result.unwrap();
 
     // Values taken from the C++ code.
     // m_accum and n_accum are floats in the C++ code, but these appear to
@@ -312,21 +310,18 @@ fn test_get_modes() {
 }
 
 #[test]
-#[serial]
 fn test_get_modes2() {
     let beam = FEEBeam::new("mwa_full_embedded_element_pattern.h5").unwrap();
-    let hash = match beam.populate_modes(
+    let result = beam.get_modes(
         51200000,
         &[3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0],
         &[
             1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0,
             1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0,
         ],
-    ) {
-        Ok(h) => h,
-        Err(e) => panic!("{}", e),
-    };
-    let coeffs = beam.coeff_cache.get(&hash).unwrap();
+    );
+    assert!(result.is_ok());
+    let coeffs = result.unwrap();
 
     // Values taken from the C++ code.
     let x_m_expected = array![
@@ -501,7 +496,11 @@ fn test_get_modes2() {
 
     // Check that if the Y dipole gains are different, they don't match the
     // earlier values.
-    let hash = match beam.populate_modes(
+
+    // We need to drop the reference to the coefficients used before, otherwise
+    // we'll deadlock the cache.
+    drop(coeffs);
+    let result = beam.get_modes(
         51200000,
         &[3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0],
         &[
@@ -509,11 +508,9 @@ fn test_get_modes2() {
             // First value here
             10.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0,
         ],
-    ) {
-        Ok(h) => h,
-        Err(e) => panic!("{}", e),
-    };
-    let coeffs = beam.coeff_cache.get(&hash).unwrap();
+    );
+    assert!(result.is_ok());
+    let coeffs = result.unwrap();
 
     // X values are the same.
     let x_q1_accum_arr = Array1::from(coeffs.x.q1_accum.clone());
@@ -578,17 +575,16 @@ fn test_get_modes2() {
 #[serial]
 fn test_calc_jones_eng() {
     let beam = FEEBeam::new("mwa_full_embedded_element_pattern.h5").unwrap();
-    let jones = match beam.calc_jones_eng(
+    let result = beam.calc_jones_eng(
         45.0_f64.to_radians(),
         10.0_f64.to_radians(),
         51200000,
         &[0; 16],
         &[1.0; 16],
         false,
-    ) {
-        Ok(j) => j,
-        Err(e) => panic!("{}", e),
-    };
+    );
+    assert!(result.is_ok());
+    let jones = result.unwrap();
 
     let expected = Jones::from([
         c64::new(0.036179, 0.103586),
@@ -605,7 +601,7 @@ fn test_calc_jones_eng() {
 #[serial]
 fn test_calc_jones_eng_2() {
     let beam = FEEBeam::new("mwa_full_embedded_element_pattern.h5").unwrap();
-    let jones = match beam.calc_jones_eng(
+    let result = beam.calc_jones_eng(
         70.0_f64.to_radians(),
         10.0_f64.to_radians(),
         51200000,
@@ -614,10 +610,9 @@ fn test_calc_jones_eng_2() {
             1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0,
         ],
         false,
-    ) {
-        Ok(j) => j,
-        Err(e) => panic!("{}", e),
-    };
+    );
+    assert!(result.is_ok());
+    let jones = result.unwrap();
 
     let expected = Jones::from([
         c64::new(0.068028, 0.111395),
@@ -634,10 +629,9 @@ fn test_calc_jones_eng_2() {
 #[serial]
 fn test_calc_jones_eng_norm() {
     let beam = FEEBeam::new("mwa_full_embedded_element_pattern.h5").unwrap();
-    let jones = match beam.calc_jones_eng(0.1_f64, 0.1_f64, 150000000, &[0; 16], &[1.0; 16], true) {
-        Ok(j) => j,
-        Err(e) => panic!("{}", e),
-    };
+    let result = beam.calc_jones_eng(0.1_f64, 0.1_f64, 150000000, &[0; 16], &[1.0; 16], true);
+    assert!(result.is_ok());
+    let jones = result.unwrap();
 
     let expected = Jones::from([
         c64::new(0.0887949, 0.0220569),
@@ -654,7 +648,7 @@ fn test_calc_jones_eng_norm() {
 #[serial]
 fn test_calc_jones_eng_norm_2() {
     let beam = FEEBeam::new("mwa_full_embedded_element_pattern.h5").unwrap();
-    let jones = match beam.calc_jones_eng(
+    let result = beam.calc_jones_eng(
         0.1_f64,
         0.1_f64,
         150000000,
@@ -663,10 +657,9 @@ fn test_calc_jones_eng_norm_2() {
             1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0,
         ],
         true,
-    ) {
-        Ok(j) => j,
-        Err(e) => panic!("{}", e),
-    };
+    );
+    assert!(result.is_ok());
+    let jones = result.unwrap();
 
     let expected = Jones::from([
         c64::new(0.0704266, -0.0251082),
@@ -683,17 +676,16 @@ fn test_calc_jones_eng_norm_2() {
 #[serial]
 fn test_calc_jones() {
     let beam = FEEBeam::new("mwa_full_embedded_element_pattern.h5").unwrap();
-    let jones = match beam.calc_jones(
+    let result = beam.calc_jones(
         45.0_f64.to_radians(),
         10.0_f64.to_radians(),
         51200000,
         &[0; 16],
         &[1.0; 16],
         false,
-    ) {
-        Ok(j) => j,
-        Err(e) => panic!("{}", e),
-    };
+    );
+    assert!(result.is_ok());
+    let jones = result.unwrap();
 
     let expected = Jones::from([
         c64::new(0.051673288904250436, 0.14798615369209014),
@@ -710,10 +702,9 @@ fn test_calc_jones() {
 #[serial]
 fn test_calc_jones_norm() {
     let beam = FEEBeam::new("mwa_full_embedded_element_pattern.h5").unwrap();
-    let jones = match beam.calc_jones(0.1_f64, 0.1_f64, 150000000, &[0; 16], &[1.0; 16], true) {
-        Ok(j) => j,
-        Err(e) => panic!("{}", e),
-    };
+    let result = beam.calc_jones(0.1_f64, 0.1_f64, 150000000, &[0; 16], &[1.0; 16], true);
+    assert!(result.is_ok());
+    let jones = result.unwrap();
 
     let expected = Jones::from([
         c64::new(0.8916497260404116, 0.21719761321518402),
@@ -725,3 +716,126 @@ fn test_calc_jones_norm() {
     let expected = TestJones::from(expected);
     assert_abs_diff_eq!(jones, expected, epsilon = 1e-6);
 }
+
+#[test]
+#[serial]
+fn test_calc_jones_array() {
+    let beam = FEEBeam::new("mwa_full_embedded_element_pattern.h5").unwrap();
+    let result = beam.calc_jones(
+        45.0_f64.to_radians(),
+        10.0_f64.to_radians(),
+        51200000,
+        &[0; 16],
+        &[1.0; 16],
+        true,
+    );
+    assert!(result.is_ok());
+    let jones = result.unwrap();
+
+    let result = beam.calc_jones_array(
+        &[45.0_f64.to_radians()],
+        &[10.0_f64.to_radians()],
+        51200000,
+        &[0; 16],
+        &[1.0; 16],
+        true,
+    );
+    assert!(result.is_ok());
+    let jones_array = result.unwrap();
+
+    assert_eq!(jones_array.len(), 1);
+    let jones = TestJones::from(jones);
+    let jones_array = TestJones::from(jones_array[0]);
+    assert_eq!(jones, jones_array);
+}
+
+#[test]
+#[serial]
+fn test_empty_cache() {
+    let beam = FEEBeam::new("mwa_full_embedded_element_pattern.h5").unwrap();
+    let result = beam.calc_jones(
+        45.0_f64.to_radians(),
+        10.0_f64.to_radians(),
+        51200000,
+        &[0; 16],
+        &[1.0; 16],
+        true,
+    );
+    assert!(result.is_ok());
+    result.unwrap();
+
+    assert!(!beam.coeff_cache.read().is_empty());
+    assert!(!beam.norm_cache.read().is_empty());
+
+    beam.empty_cache();
+    assert!(beam.coeff_cache.read().is_empty());
+    assert!(beam.norm_cache.read().is_empty());
+}
+
+// If the beam file is fine, then there should be frequencies inside it.
+#[test]
+#[serial]
+fn test_get_freqs() {
+    let beam = FEEBeam::new("mwa_full_embedded_element_pattern.h5").unwrap();
+    assert!(!beam.get_freqs().is_empty());
+}
+
+// Tests for coverage follow.
+
+#[test]
+#[serial]
+fn test_cache_is_used() {
+    let beam = FEEBeam::new("mwa_full_embedded_element_pattern.h5").unwrap();
+    let result = beam.calc_jones(
+        45.0_f64.to_radians(),
+        10.0_f64.to_radians(),
+        51200000,
+        &[0; 16],
+        &[1.0; 16],
+        true,
+    );
+    assert!(result.is_ok());
+    result.unwrap();
+
+    let result = beam.calc_jones(
+        45.0_f64.to_radians(),
+        10.0_f64.to_radians(),
+        51200000,
+        &[0; 16],
+        &[1.0; 16],
+        true,
+    );
+    assert!(result.is_ok());
+    result.unwrap();
+}
+
+// Tests to expose errors follow.
+
+#[test]
+fn test_error_file_doesnt_exist() {
+    let file = "/unlikely/to/exist.h5";
+    let result = FEEBeam::new(file);
+    assert!(result.is_err());
+    match result {
+        Err(e) => assert_eq!(
+            e.to_string(),
+            format!("Specified beam file '{}' doesn't exist", file)
+        ),
+        _ => unreachable!(),
+    }
+}
+
+#[test]
+fn test_error_env_file_doesnt_exist() {
+    let file = "/unlikely/to/exist/again.h5";
+    std::env::set_var("MWA_BEAM_FILE", file);
+    let result = FEEBeam::new_from_env();
+    assert!(result.is_err());
+    match result {
+        Err(e) => assert_eq!(
+            e.to_string(),
+            format!("Specified beam file '{}' doesn't exist", file)
+        ),
+        _ => unreachable!(),
+    }
+}
diff --git a/src/fee/types.rs b/src/fee/types.rs
index 3063878..779f52b 100644
--- a/src/fee/types.rs
+++ b/src/fee/types.rs
@@ -4,8 +4,10 @@
 
 //! Helper types for the FEE beam.
 
-use dashmap::DashMap;
+use std::collections::HashMap;
+
 use marlu::{c64, Jones};
+use parking_lot::RwLock;
 
 use crate::types::CacheKey;
 
@@ -29,14 +31,13 @@ pub(super) struct BowtieCoefficients {
     pub(super) y: DipoleCoefficients,
 }
 
-/// [CoeffCache] is mostly just a `RwLock` around a `HashMap` (which is handled
-/// by [DashMap]). This allows multiple concurrent readers with the ability to
-/// halt all reading when writing.
+/// [CoeffCache] is just a `RwLock` around a `HashMap`. This allows multiple
+/// concurrent readers with the ability to halt all reading when writing.
 #[derive(Default)]
-pub(super) struct CoeffCache(DashMap<CacheKey, BowtieCoefficients>);
+pub(super) struct CoeffCache(RwLock<HashMap<CacheKey, BowtieCoefficients>>);
 
 impl std::ops::Deref for CoeffCache {
-    type Target = DashMap<CacheKey, BowtieCoefficients>;
+    type Target = RwLock<HashMap<CacheKey, BowtieCoefficients>>;
 
     fn deref(&self) -> &Self::Target {
         &self.0
@@ -47,10 +48,10 @@ impl std::ops::Deref for CoeffCache {
 /// to normalise beam responses at various frequencies (i.e. frequency is the
 /// key of the cache).
 #[derive(Default)]
-pub(super) struct NormCache(DashMap<u32, Jones<f64>>);
+pub(super) struct NormCache(RwLock<HashMap<u32, Jones<f64>>>);
 
 impl std::ops::Deref for NormCache {
-    type Target = DashMap<u32, Jones<f64>>;
+    type Target = RwLock<HashMap<u32, Jones<f64>>>;
 
     fn deref(&self) -> &Self::Target {
         &self.0
diff --git a/src/python/fee.rs b/src/python/fee.rs
index e713e18..2aaa674 100644
--- a/src/python/fee.rs
+++ b/src/python/fee.rs
@@ -28,7 +28,7 @@ impl std::convert::From<InitFEEBeamError> for PyErr {
 
 /// A Python class interfacing with the hyperbeam code written in Rust.
 #[pyclass]
-#[text_signature = "(hdf5_file)"]
+#[pyo3(text_signature = "(hdf5_file)")]
 #[allow(clippy::upper_case_acronyms)]
 struct FEEBeam {
     beam: FEEBeamRust,
@@ -53,7 +53,7 @@ impl FEEBeam {
     /// If there are 16, then the dipole gains apply to both X and Y elements of
     /// dipoles. If there are 32, the first 16 amps are for the X elements, the
     /// next 16 the Y elements.
-    #[text_signature = "(az_rad, za_rad, freq_hz, delays, amps, norm_to_zenith, parallactic)"]
+    #[pyo3(text_signature = "(az_rad, za_rad, freq_hz, delays, amps, norm_to_zenith, parallactic)")]
     #[allow(clippy::too_many_arguments)]
     fn calc_jones(
         &mut self,
@@ -100,7 +100,7 @@ impl FEEBeam {
     /// Each direction is calculated in parallel by Rust. The number of parallel
     /// threads used can be controlled by setting RAYON_NUM_THREADS. `delays`
     /// must have 16 ints, and `amps` must have 16 or 32 floats.
-    #[text_signature = "(az_rad, za_rad, freq_hz, delays, amps, norm_to_zenith, parallactic)"]
+    #[pyo3(text_signature = "(az_rad, za_rad, freq_hz, delays, amps, norm_to_zenith, parallactic)")]
     #[allow(clippy::too_many_arguments)]
     fn calc_jones_array(
         &mut self,
@@ -161,7 +161,7 @@ impl FEEBeam {
 
     /// Given a frequency in Hz, get the closest available frequency inside the
     /// HDF5 file.
-    #[text_signature = "(freq_hz)"]
+    #[pyo3(text_signature = "(freq_hz)")]
     fn closest_freq(&self, freq_hz: f64) -> u32 {
         self.beam.find_closest_freq(freq_hz.round() as _)
     }
@@ -175,7 +175,9 @@ impl FEEBeam {
     /// but `amps_array` can have 16 or 32 elements per row (see `calc_jones`
     /// for an explanation).
     #[cfg(feature = "cuda")]
-    #[text_signature = "(az_rad, za_rad, freq_hz, delays_array, amps_array, norm_to_zenith, parallactic)"]
+    #[pyo3(
+        text_signature = "(az_rad, za_rad, freq_hz, delays_array, amps_array, norm_to_zenith, parallactic)"
+    )]
     #[allow(clippy::too_many_arguments)]
     fn calc_jones_cuda(
         &mut self,