From 1ec93f5234976b5d55a0d9dde6205d35b6fb9e21 Mon Sep 17 00:00:00 2001 From: brimoor Date: Thu, 26 Dec 2024 20:18:03 -0600 Subject: [PATCH] add accesses to index stats --- fiftyone/core/collections.py | 18 ++++++++++++++++-- fiftyone/core/dataset.py | 29 ++++++++++------------------- tests/unittests/dataset_tests.py | 6 ++++-- 3 files changed, 30 insertions(+), 23 deletions(-) diff --git a/fiftyone/core/collections.py b/fiftyone/core/collections.py index 2aafc32ee2..1cbc54d127 100644 --- a/fiftyone/core/collections.py +++ b/fiftyone/core/collections.py @@ -9506,8 +9506,8 @@ def get_index_information(self, include_stats=False): details on the structure of this dictionary. Args: - include_stats (False): whether to include the size and build status - of each index + include_stats (False): whether to include the size, usage, and + build status of each index Returns: a dict mapping index names to info dicts @@ -9528,6 +9528,13 @@ def get_index_information(self, include_stats=False): if key in sample_info: sample_info[key]["in_progress"] = True + for d in self._dataset._sample_collection.aggregate( + [{"$indexStats": {}}] + ): + key = d["name"] + if key in sample_info: + sample_info[key]["accesses"] = d["accesses"] + for key, info in sample_info.items(): if len(info["key"]) == 1: field = info["key"][0][0] @@ -9550,6 +9557,13 @@ def get_index_information(self, include_stats=False): if key in frame_info: frame_info[key]["in_progress"] = True + for d in self._dataset._frame_collection.aggregate( + [{"$indexStats": {}}] + ): + key = d["name"] + if key in frame_info: + frame_info[key]["accesses"] = d["accesses"] + for key, info in frame_info.items(): if len(info["key"]) == 1: field = info["key"][0][0] diff --git a/fiftyone/core/dataset.py b/fiftyone/core/dataset.py index 65e75cd910..e69d17ef3a 100644 --- a/fiftyone/core/dataset.py +++ b/fiftyone/core/dataset.py @@ -1202,21 +1202,13 @@ def stats( return stats def _sample_collstats(self): - conn = foo.get_db_conn() - return conn.command( - "collstats", - self._sample_collection_name, - ) + return _get_collstats(self._sample_collection) def _frame_collstats(self): if self._frame_collection_name is None: return None - conn = foo.get_db_conn() - return conn.command( - "collstats", - self._frame_collection_name, - ) + return _get_collstats(self._frame_collection) def first(self): """Returns the first sample in the dataset. @@ -7770,15 +7762,6 @@ def _get_frame_collection(self, write_concern=None): self._frame_collection_name, write_concern=write_concern ) - @property - def _frame_indexes(self): - frame_collection = self._frame_collection - if frame_collection is None: - return None - - index_info = frame_collection.index_information() - return [k["key"][0][0] for k in index_info.values()] - def _apply_sample_field_schema(self, schema): for field_name, field_or_str in schema.items(): kwargs = foo.get_field_kwargs(field_or_str) @@ -9104,6 +9087,14 @@ def _get_single_index_map(coll): } +def _get_collstats(coll): + pipeline = [ + {"$collStats": {"storageStats": {}}}, + {"$replaceRoot": {"newRoot": "$storageStats"}}, + ] + return next(coll.aggregate(pipeline)) + + def _add_collection_with_new_ids( dataset, sample_collection, diff --git a/tests/unittests/dataset_tests.py b/tests/unittests/dataset_tests.py index 9ef306e471..af3164e695 100644 --- a/tests/unittests/dataset_tests.py +++ b/tests/unittests/dataset_tests.py @@ -671,7 +671,7 @@ def test_indexes(self): dataset.create_index("non_existent_field") @drop_datasets - def test_index_sizes(self): + def test_index_stats(self): gt = fo.Detections(detections=[fo.Detection(label="foo")]) sample = fo.Sample(filepath="video.mp4", gt=gt) sample.frames[1] = fo.Frame(gt=gt) @@ -700,7 +700,9 @@ def test_index_sizes(self): self.assertSetEqual(set(dataset.list_indexes()), indexes) self.assertSetEqual(set(info.keys()), indexes) for d in info.values(): - self.assertTrue(d.get("size") is not None) + self.assertTrue(d["size"] is not None) + self.assertTrue("ops" in d["accesses"]) + self.assertTrue("since" in d["accesses"]) @drop_datasets def test_index_in_progress(self):