From 9de4d4cfea55961b71d606cd5c4e328134a85887 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Mon, 11 Nov 2024 20:53:05 -0600 Subject: [PATCH] add NWBHDF5IO.read_nwb() method (#1979) Co-authored-by: Ryan Ly --- CHANGELOG.md | 3 +++ requirements-opt.txt | 5 ++++ src/pynwb/__init__.py | 28 +++++++++++++++++++++ tests/integration/hdf5/test_io.py | 42 +++++++++++++++++++++++++++++++ 4 files changed, 78 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d0989e10e..4400300de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ## PyNWB 2.8.3 (Upcoming) +### Enhancements and minor changes +* Added `NWBHDF5IO.read_nwb` convenience method to simplify reading an NWB file. @h-mayorquin [#1979](https://github.com/NeurodataWithoutBorders/pynwb/pull/1979) + ### Documentation and tutorial enhancements - Added documentation example for `SpikeEventSeries`. @stephprince [#1983](https://github.com/NeurodataWithoutBorders/pynwb/pull/1983) diff --git a/requirements-opt.txt b/requirements-opt.txt index 3badc79c7..da62bc314 100644 --- a/requirements-opt.txt +++ b/requirements-opt.txt @@ -1,3 +1,8 @@ linkml-runtime==1.7.4; python_version >= "3.9" schemasheets==0.2.1; python_version >= "3.9" oaklib==0.5.32; python_version >= "3.9" + +# for streaming tests +fsspec==2024.10.0 +requests==2.32.3 +aiohttp==3.10.10 diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index 2ac87a3ea..3a4d95e98 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -506,6 +506,34 @@ def export(self, **kwargs): kwargs['container'] = nwbfile super().export(**kwargs) + @staticmethod + @docval({'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file', 'default': None}, + {'name': 'file', 'type': [h5py.File, 'S3File'], 'doc': 'a pre-existing h5py.File object', 'default': None}, + is_method=False) + def read_nwb(**kwargs): + """ + Helper factory method for reading an NWB file and return the NWBFile object + """ + # Retrieve the filepath + path = popargs('path', kwargs) + file = popargs('file', kwargs) + + path = str(path) if path is not None else None + + # Streaming case + if path is not None and (path.startswith("s3://") or path.startswith("http")): + import fsspec + fsspec_file_system = fsspec.filesystem("http") + ffspec_file = fsspec_file_system.open(path, "rb") + + open_file = h5py.File(ffspec_file, "r") + io = NWBHDF5IO(file=open_file) + nwbfile = io.read() + else: + io = NWBHDF5IO(path=path, file=file, mode="r", load_namespaces=True) + nwbfile = io.read() + + return nwbfile from . import io as __io # noqa: F401,E402 from .core import NWBContainer, NWBData # noqa: F401,E402 diff --git a/tests/integration/hdf5/test_io.py b/tests/integration/hdf5/test_io.py index a97eb1b63..1e6ed0593 100644 --- a/tests/integration/hdf5/test_io.py +++ b/tests/integration/hdf5/test_io.py @@ -18,6 +18,13 @@ from pynwb.testing.mock.file import mock_NWBFile +import unittest +try: + import fsspec # noqa f401 + HAVE_FSSPEC = True +except ImportError: + HAVE_FSSPEC = False + class TestHDF5Writer(TestCase): _required_tests = ('test_nwbio', 'test_write_clobber', 'test_write_cache_spec', 'test_write_no_cache_spec') @@ -586,3 +593,38 @@ def test_can_read_file_old_version(self): def test_can_read_file_invalid_hdf5_file(self): # current file is not an HDF5 file self.assertFalse(NWBHDF5IO.can_read(__file__)) + + def test_read_nwb_method_path(self): + + # write the example file + with NWBHDF5IO(self.path, 'w') as io: + io.write(self.nwbfile) + + # test that the read_nwb method works + read_nwbfile = NWBHDF5IO.read_nwb(path=self.path) + self.assertContainerEqual(read_nwbfile, self.nwbfile) + + read_nwbfile.get_read_io().close() + + def test_read_nwb_method_file(self): + + # write the example file + with NWBHDF5IO(self.path, 'w') as io: + io.write(self.nwbfile) + + import h5py + + file = h5py.File(self.path, 'r') + + read_nwbfile = NWBHDF5IO.read_nwb(file=file) + self.assertContainerEqual(read_nwbfile, self.nwbfile) + + read_nwbfile.get_read_io().close() + + @unittest.skipIf(not HAVE_FSSPEC, "fsspec library not available") + def test_read_nwb_method_s3_path(self): + s3_test_path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991" + read_nwbfile = NWBHDF5IO.read_nwb(path=s3_test_path) + assert read_nwbfile.identifier == "3f77c586-6139-4777-a05d-f603e90b1330" + + assert read_nwbfile.subject.subject_id == "1" \ No newline at end of file