From 7c65fe2cb20f10cefa0cefeae019ce32c6b1706e Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Thu, 7 Nov 2024 15:48:54 -0600 Subject: [PATCH 01/10] add NWBHDF5IO.read_nwb() method --- src/pynwb/__init__.py | 18 ++++++++++++++++++ tests/integration/hdf5/test_io.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index 9ea18efb8..f1681498b 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -372,6 +372,24 @@ def export(self, **kwargs): kwargs['container'] = nwbfile super().export(**kwargs) + @staticmethod + @docval({'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file', 'default': None}, + {'name': 'file', 'type': [h5py.File, 'S3File'], 'doc': 'a pre-existing h5py.File object', 'default': None}, + is_method=False) + def read_nwb(**kwargs): + """ + Helper factory method for reading an NWB file and return the NWBFile object + """ + # Retrieve the filepath + path = popargs('path', kwargs) + file = popargs('file', kwargs) + + # open the file with NWBZarrIO and rad the file + io = NWBHDF5IO(path=path, file=file, mode="r", load_namespaces=True) + nwbfile = io.read() + + # return the NWBFile object + return nwbfile from . import io as __io # noqa: F401,E402 from .core import NWBContainer, NWBData # noqa: F401,E402 diff --git a/tests/integration/hdf5/test_io.py b/tests/integration/hdf5/test_io.py index d68334c89..1932b7680 100644 --- a/tests/integration/hdf5/test_io.py +++ b/tests/integration/hdf5/test_io.py @@ -531,3 +531,31 @@ def test_round_trip_with_pathlib_path(self): with NWBHDF5IO(pathlib_path, 'r') as io: read_file = io.read() self.assertContainerEqual(read_file, self.nwbfile) + + + def test_read_nwb_method_path(self): + + # write the example file + with NWBHDF5IO(self.path, 'w') as io: + io.write(self.nwbfile) + + # test that the read_nwb method works + read_nwbfile = NWBHDF5IO.read_nwb(path=self.path) + self.assertContainerEqual(read_nwbfile, self.nwbfile) + + read_nwbfile.get_read_io().close() + + def test_read_nwb_method_file(self): + + # write the example file + with NWBHDF5IO(self.path, 'w') as io: + io.write(self.nwbfile) + + import h5py + + file = h5py.File(self.path, 'r') + + read_nwbfile = NWBHDF5IO.read_nwb(file=file) + self.assertContainerEqual(read_nwbfile, self.nwbfile) + + read_nwbfile.get_read_io().close() From d8f9464a8388b71b2b9ccf2cbfb3318b181b4cae Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Sat, 9 Nov 2024 12:52:34 -0600 Subject: [PATCH 02/10] add s3 test --- src/pynwb/__init__.py | 18 ++++++++++++++---- tests/integration/hdf5/test_io.py | 10 ++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index e8294057f..2eef7bbe9 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -514,11 +514,21 @@ def read_nwb(**kwargs): path = popargs('path', kwargs) file = popargs('file', kwargs) - # open the file with NWBZarrIO and rad the file - io = NWBHDF5IO(path=path, file=file, mode="r", load_namespaces=True) - nwbfile = io.read() + # Streaming case + if path is not None and str(path).startswith("s3://") or str(path).startswith("http"): + import fsspec + print(path) + path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991" + fsspec_file_system = fsspec.filesystem("http") + ffspec_file = fsspec_file_system.open(str(path), "rb") + + open_file = h5py.File(ffspec_file, "r") + io = NWBHDF5IO(file=open_file) + nwbfile = io.read() + else: + io = NWBHDF5IO(path=path, file=file, mode="r", load_namespaces=True) + nwbfile = io.read() - # return the NWBFile object return nwbfile from . import io as __io # noqa: F401,E402 diff --git a/tests/integration/hdf5/test_io.py b/tests/integration/hdf5/test_io.py index d54201b7b..76921d290 100644 --- a/tests/integration/hdf5/test_io.py +++ b/tests/integration/hdf5/test_io.py @@ -4,6 +4,7 @@ from h5py import File from pathlib import Path import tempfile +import pytest from pynwb import NWBFile, TimeSeries, get_manager, NWBHDF5IO, validate @@ -596,3 +597,12 @@ def test_read_nwb_method_file(self): self.assertContainerEqual(read_nwbfile, self.nwbfile) read_nwbfile.get_read_io().close() + + @pytest.mark.skipif(not pytest.importorskip("fsspec"), reason="fsspec library not available") + def test_read_nwb_method_s3_path(self): + + s3_test_path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991" + read_nwbfile = NWBHDF5IO.read_nwb(path=s3_test_path) + assert read_nwbfile.identifier == "3f77c586-6139-4777-a05d-f603e90b1330" + + assert read_nwbfile.subject.subject_id == "1" \ No newline at end of file From 5cdd96e08995dcb7fae504570b3f5bfc9c3965fd Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Sat, 9 Nov 2024 13:01:46 -0600 Subject: [PATCH 03/10] changelog and cast to path to str --- CHANGELOG.md | 3 +++ src/pynwb/__init__.py | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 16e504659..f8bf49e06 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ## PyNWB 2.8.3 (Upcoming) +### Enhancements and minor changes +* Added `NWBHDF5IO.read_nwb` convenience method to simplify reading an NWB file. @h-mayorquin [#1979](https://github.com/NeurodataWithoutBorders/pynwb/pull/1979) + ### Performance - Cache global type map to speed import 3X. @sneakers-the-rat [#1931](https://github.com/NeurodataWithoutBorders/pynwb/pull/1931) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index 2eef7bbe9..a3a13fb30 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -513,14 +513,16 @@ def read_nwb(**kwargs): # Retrieve the filepath path = popargs('path', kwargs) file = popargs('file', kwargs) + + path = str(path) if path is not None else None # Streaming case - if path is not None and str(path).startswith("s3://") or str(path).startswith("http"): + if path is not None and (path.startswith("s3://") or path.startswith("http")): import fsspec print(path) path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991" fsspec_file_system = fsspec.filesystem("http") - ffspec_file = fsspec_file_system.open(str(path), "rb") + ffspec_file = fsspec_file_system.open(path, "rb") open_file = h5py.File(ffspec_file, "r") io = NWBHDF5IO(file=open_file) From e37eb5989cd3ef2fd24482984951a4e4d7e2fd95 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Sat, 9 Nov 2024 13:02:39 -0600 Subject: [PATCH 04/10] forgot print --- src/pynwb/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index a3a13fb30..fd0f25596 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -519,7 +519,6 @@ def read_nwb(**kwargs): # Streaming case if path is not None and (path.startswith("s3://") or path.startswith("http")): import fsspec - print(path) path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991" fsspec_file_system = fsspec.filesystem("http") ffspec_file = fsspec_file_system.open(path, "rb") From 0035bb2d37af5d0ad0946a1faa463b75516fc64e Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Sat, 9 Nov 2024 13:03:00 -0600 Subject: [PATCH 05/10] remove testing code --- src/pynwb/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index fd0f25596..b49a76e25 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -519,7 +519,6 @@ def read_nwb(**kwargs): # Streaming case if path is not None and (path.startswith("s3://") or path.startswith("http")): import fsspec - path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991" fsspec_file_system = fsspec.filesystem("http") ffspec_file = fsspec_file_system.open(path, "rb") From a143e14e8328d01b3fbb0bd304a03d1b8d22ec12 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Sat, 9 Nov 2024 13:25:08 -0600 Subject: [PATCH 06/10] try pytest import skip --- tests/integration/hdf5/test_io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/hdf5/test_io.py b/tests/integration/hdf5/test_io.py index b91fb6345..b6c0ef0fb 100644 --- a/tests/integration/hdf5/test_io.py +++ b/tests/integration/hdf5/test_io.py @@ -615,9 +615,9 @@ def test_read_nwb_method_file(self): read_nwbfile.get_read_io().close() - @pytest.mark.skipif(not pytest.importorskip("fsspec"), reason="fsspec library not available") def test_read_nwb_method_s3_path(self): - + fsspec = pytest.importorskip("fsspec") # This alone will skip if import fails + s3_test_path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991" read_nwbfile = NWBHDF5IO.read_nwb(path=s3_test_path) assert read_nwbfile.identifier == "3f77c586-6139-4777-a05d-f603e90b1330" From 5c45278031f3846f70de2908ef81e3442a070fbe Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Sat, 9 Nov 2024 14:27:50 -0600 Subject: [PATCH 07/10] test unittest skip --- tests/integration/hdf5/test_io.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/integration/hdf5/test_io.py b/tests/integration/hdf5/test_io.py index b6c0ef0fb..795ce722c 100644 --- a/tests/integration/hdf5/test_io.py +++ b/tests/integration/hdf5/test_io.py @@ -4,7 +4,6 @@ from h5py import File from pathlib import Path import tempfile -import pytest from pynwb import NWBFile, TimeSeries, get_manager, NWBHDF5IO, validate @@ -19,6 +18,13 @@ from pynwb.testing.mock.file import mock_NWBFile +import unittest +try: + import fsspec # noqa f401 + HAVE_FSSPEC = True +except ImportError: + HAVE_FSSPEC = False + class TestHDF5Writer(TestCase): _required_tests = ('test_nwbio', 'test_write_clobber', 'test_write_cache_spec', 'test_write_no_cache_spec') @@ -615,9 +621,8 @@ def test_read_nwb_method_file(self): read_nwbfile.get_read_io().close() - def test_read_nwb_method_s3_path(self): - fsspec = pytest.importorskip("fsspec") # This alone will skip if import fails - + @unittest.skipIf(not HAVE_FSSPEC, "fsspec library not available") + def test_read_nwb_method_s3_path(): s3_test_path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991" read_nwbfile = NWBHDF5IO.read_nwb(path=s3_test_path) assert read_nwbfile.identifier == "3f77c586-6139-4777-a05d-f603e90b1330" From b63dade370fcd40940be3062444ec6fbd3ae8176 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Mon, 11 Nov 2024 16:54:57 -0800 Subject: [PATCH 08/10] Update requirements-opt.txt --- requirements-opt.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-opt.txt b/requirements-opt.txt index 3badc79c7..ad49ae289 100644 --- a/requirements-opt.txt +++ b/requirements-opt.txt @@ -1,3 +1,4 @@ linkml-runtime==1.7.4; python_version >= "3.9" schemasheets==0.2.1; python_version >= "3.9" oaklib==0.5.32; python_version >= "3.9" +fsspec==2024.10.0 From e2d8d299059cf62ed214c610abf26e5718d4d67c Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Mon, 11 Nov 2024 17:48:37 -0800 Subject: [PATCH 09/10] Update tests/integration/hdf5/test_io.py --- tests/integration/hdf5/test_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/hdf5/test_io.py b/tests/integration/hdf5/test_io.py index 795ce722c..1e6ed0593 100644 --- a/tests/integration/hdf5/test_io.py +++ b/tests/integration/hdf5/test_io.py @@ -622,7 +622,7 @@ def test_read_nwb_method_file(self): read_nwbfile.get_read_io().close() @unittest.skipIf(not HAVE_FSSPEC, "fsspec library not available") - def test_read_nwb_method_s3_path(): + def test_read_nwb_method_s3_path(self): s3_test_path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991" read_nwbfile = NWBHDF5IO.read_nwb(path=s3_test_path) assert read_nwbfile.identifier == "3f77c586-6139-4777-a05d-f603e90b1330" From 2ed4aa50d7a81c80418109bbeda4074a13a27c9b Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Mon, 11 Nov 2024 18:14:19 -0800 Subject: [PATCH 10/10] Update requirements-opt.txt --- requirements-opt.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/requirements-opt.txt b/requirements-opt.txt index ad49ae289..da62bc314 100644 --- a/requirements-opt.txt +++ b/requirements-opt.txt @@ -1,4 +1,8 @@ linkml-runtime==1.7.4; python_version >= "3.9" schemasheets==0.2.1; python_version >= "3.9" oaklib==0.5.32; python_version >= "3.9" + +# for streaming tests fsspec==2024.10.0 +requests==2.32.3 +aiohttp==3.10.10