From db36821499b60296c5d8e9e360eb98bd41b1c2d3 Mon Sep 17 00:00:00 2001 From: Manushree Gangwar Date: Fri, 13 Dec 2024 13:26:15 -0700 Subject: [PATCH 1/7] Add COCO-styled mAR to evaluate_detections --- fiftyone/utils/eval/coco.py | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/fiftyone/utils/eval/coco.py b/fiftyone/utils/eval/coco.py index 1efa89331b..f55cd4bea2 100644 --- a/fiftyone/utils/eval/coco.py +++ b/fiftyone/utils/eval/coco.py @@ -218,6 +218,7 @@ def generate_results( thresholds, iou_threshs, classes, + recall_sweep, ) = _compute_pr_curves( samples, self.config, classes=classes, progress=progress ) @@ -229,6 +230,7 @@ def generate_results( matches, precision, recall, + recall_sweep, iou_threshs, classes, thresholds=thresholds, @@ -251,6 +253,7 @@ class COCODetectionResults(DetectionResults): precision: an array of precision values of shape ``num_iou_threshs x num_classes x num_recall`` recall: an array of recall values + recall_sweep: an array of recall values of shape ``num_iou x num_classes`` iou_threshs: an array of IoU thresholds classes: the list of possible classes thresholds (None): an optional array of decision thresholds of shape @@ -268,6 +271,7 @@ def __init__( matches, precision, recall, + recall_sweep, iou_threshs, classes, thresholds=None, @@ -292,6 +296,7 @@ def __init__( ) self._classwise_AP = np.mean(precision, axis=(0, 2)) + self._classwise_AR = np.mean(recall_sweep, axis=0) def plot_pr_curves( self, classes=None, iou_thresh=None, backend="plotly", **kwargs @@ -376,6 +381,31 @@ def mAP(self, classes=None): return np.mean(classwise_AP) + def mAR(self, classes=None): + """Computes COCO-style mean average recall (mAR) for the specified + classes. + + See `this page `_ + for more details about COCO-style mAR. + + Args: + classes (None): a list of classes for which to compute mAR + + Returns: + the mAR in ``[0, 1]`` + """ + if classes is not None: + class_inds = np.array([self._get_class_index(c) for c in classes]) + classwise_AR = self._classwise_AR[class_inds] + else: + classwise_AR = self._classwise_AR + + classwise_AR = classwise_AR[classwise_AR > -1] + if classwise_AR.size == 0: + return -1 + + return np.mean(classwise_AR) + def _get_iou_thresh_inds(self, iou_thresh=None): if iou_thresh is None: return np.arange(len(self.iou_threshs)) @@ -713,6 +743,7 @@ def _compute_pr_curves(samples, config, classes=None, progress=None): precision = -np.ones((num_threshs, num_classes, 101)) thresholds = -np.ones((num_threshs, num_classes, 101)) recall = np.linspace(0, 1, 101) + recall_sweep = -np.ones((num_threshs, num_classes)) for idx, _thresh_matches in enumerate(thresh_matches): for c, matches in _thresh_matches.items(): c_idx = class_idx_map.get(c, None) @@ -755,13 +786,15 @@ def _compute_pr_curves(samples, config, classes=None, progress=None): for ri, pi in enumerate(inds): q[ri] = pre[pi] t[ri] = confs[pi] + except: pass precision[idx][c_idx] = q thresholds[idx][c_idx] = t + recall_sweep[idx][c_idx] = rec[-1] - return precision, recall, thresholds, iou_threshs, classes + return precision, recall, thresholds, iou_threshs, classes, recall_sweep def _copy_labels(labels): From fcf4caff2298a5d20fdfe71d4560388de4008fad Mon Sep 17 00:00:00 2001 From: Manushree Gangwar Date: Mon, 16 Dec 2024 10:00:56 -0700 Subject: [PATCH 2/7] Make recall_sweep optional --- .../builtins/panels/model_evaluation/__init__.py | 7 +++++++ fiftyone/utils/eval/coco.py | 15 +++++++++++---- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/fiftyone/operators/builtins/panels/model_evaluation/__init__.py b/fiftyone/operators/builtins/panels/model_evaluation/__init__.py index 2878b65a9a..5380248968 100644 --- a/fiftyone/operators/builtins/panels/model_evaluation/__init__.py +++ b/fiftyone/operators/builtins/panels/model_evaluation/__init__.py @@ -140,6 +140,12 @@ def get_map(self, results): except Exception as e: return None + def get_mar(self, results): + try: + return results.mAR() + except Exception as e: + return None + def set_status(self, ctx): if not self.can_edit_status(ctx): ctx.ops.notify( @@ -355,6 +361,7 @@ def load_evaluation(self, ctx): info, results ) metrics["mAP"] = self.get_map(results) + metrics["mAR"] = self.get_mar(results) evaluation_data = { "metrics": metrics, "info": serialized_info, diff --git a/fiftyone/utils/eval/coco.py b/fiftyone/utils/eval/coco.py index f55cd4bea2..4b697c7474 100644 --- a/fiftyone/utils/eval/coco.py +++ b/fiftyone/utils/eval/coco.py @@ -230,9 +230,9 @@ def generate_results( matches, precision, recall, - recall_sweep, iou_threshs, classes, + recall_sweep=recall_sweep, thresholds=thresholds, missing=missing, backend=self, @@ -253,9 +253,9 @@ class COCODetectionResults(DetectionResults): precision: an array of precision values of shape ``num_iou_threshs x num_classes x num_recall`` recall: an array of recall values - recall_sweep: an array of recall values of shape ``num_iou x num_classes`` iou_threshs: an array of IoU thresholds classes: the list of possible classes + recall_sweep (None): an array of recall values of shape ``num_iou x num_classes`` thresholds (None): an optional array of decision thresholds of shape ``num_iou_threshs x num_classes x num_recall`` missing (None): a missing label string. Any unmatched objects are @@ -271,9 +271,9 @@ def __init__( matches, precision, recall, - recall_sweep, iou_threshs, classes, + recall_sweep=None, thresholds=None, missing=None, backend=None, @@ -296,7 +296,9 @@ def __init__( ) self._classwise_AP = np.mean(precision, axis=(0, 2)) - self._classwise_AR = np.mean(recall_sweep, axis=0) + self._classwise_AR = ( + np.mean(recall_sweep, axis=0) if recall_sweep is not None else None + ) def plot_pr_curves( self, classes=None, iou_thresh=None, backend="plotly", **kwargs @@ -394,6 +396,11 @@ def mAR(self, classes=None): Returns: the mAR in ``[0, 1]`` """ + if self._classwise_AR is None: + raise Exception( + "Classwise AR is not available. mAR can't be computed." + ) + if classes is not None: class_inds = np.array([self._get_class_index(c) for c in classes]) classwise_AR = self._classwise_AR[class_inds] From 5e389145e8910b54492658f3eda2e3794071220a Mon Sep 17 00:00:00 2001 From: Ritchie Martori Date: Mon, 16 Dec 2024 11:25:10 -0700 Subject: [PATCH 3/7] add mAR to eval table --- .../components/NativeModelEvaluationView/Evaluation.tsx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/app/packages/core/src/plugins/SchemaIO/components/NativeModelEvaluationView/Evaluation.tsx b/app/packages/core/src/plugins/SchemaIO/components/NativeModelEvaluationView/Evaluation.tsx index db05065126..28a214054c 100644 --- a/app/packages/core/src/plugins/SchemaIO/components/NativeModelEvaluationView/Evaluation.tsx +++ b/app/packages/core/src/plugins/SchemaIO/components/NativeModelEvaluationView/Evaluation.tsx @@ -414,6 +414,13 @@ export default function Evaluation(props: EvaluationProps) { compareValue: compareEvaluationMetrics.mAP, hide: !isObjectDetection, }, + { + id: "mAR", + property: "mAR", + value: evaluationMetrics.mAR, + compareValue: compareEvaluationMetrics.mAR, + hide: !isObjectDetection, + }, { id: "tp", property: "True Positives", From 667984d52f5840a68b0f9178c307838761ee9932 Mon Sep 17 00:00:00 2001 From: Manushree Gangwar Date: Wed, 18 Dec 2024 09:50:55 -0700 Subject: [PATCH 4/7] Fix line length --- fiftyone/utils/eval/coco.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fiftyone/utils/eval/coco.py b/fiftyone/utils/eval/coco.py index 4b697c7474..2b5a0b7a37 100644 --- a/fiftyone/utils/eval/coco.py +++ b/fiftyone/utils/eval/coco.py @@ -5,6 +5,7 @@ | `voxel51.com `_ | """ + import logging from collections import defaultdict @@ -50,7 +51,7 @@ class COCOEvaluationConfig(DetectionEvaluationConfig): tolerance (None): a tolerance, in pixels, when generating approximate polylines for instance masks. Typical values are 1-3 pixels compute_mAP (False): whether to perform the necessary computations so - that mAP and PR curves can be generated + that mAP, mAR, and PR curves can be generated iou_threshs (None): a list of IoU thresholds to use when computing mAP and PR curves. Only applicable when ``compute_mAP`` is True max_preds (None): the maximum number of predicted objects to evaluate @@ -255,7 +256,8 @@ class COCODetectionResults(DetectionResults): recall: an array of recall values iou_threshs: an array of IoU thresholds classes: the list of possible classes - recall_sweep (None): an array of recall values of shape ``num_iou x num_classes`` + recall_sweep (None): an array of recall values of shape + ``num_iou x num_classes`` thresholds (None): an optional array of decision thresholds of shape ``num_iou_threshs x num_classes x num_recall`` missing (None): a missing label string. Any unmatched objects are From 450d254363a72d2ee2ffb4a2e506c63e04cfdf05 Mon Sep 17 00:00:00 2001 From: Manushree Gangwar Date: Wed, 18 Dec 2024 10:08:40 -0700 Subject: [PATCH 5/7] Update docs --- docs/source/integrations/coco.rst | 12 ++++++++---- docs/source/user_guide/evaluation.rst | 14 +++++++++----- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/docs/source/integrations/coco.rst b/docs/source/integrations/coco.rst index 2b910952ef..af520197a9 100644 --- a/docs/source/integrations/coco.rst +++ b/docs/source/integrations/coco.rst @@ -471,13 +471,14 @@ The example below demonstrates COCO-style detection evaluation on the mAP and PR curves ~~~~~~~~~~~~~~~~~ -You can compute mean average precision (mAP) and precision-recall (PR) curves -for your predictions by passing the ``compute_mAP=True`` flag to +You can compute mean average precision (mAP), mean average recall (mAR), and +precision-recall (PR) curves for your predictions by passing the +``compute_mAP=True`` flag to :meth:`evaluate_detections() `: .. note:: - All mAP calculations are performed according to the + All mAP and mAR calculations are performed according to the `COCO evaluation protocol `_. .. code-block:: python @@ -489,7 +490,7 @@ for your predictions by passing the ``compute_mAP=True`` flag to dataset = foz.load_zoo_dataset("quickstart") print(dataset) - # Performs an IoU sweep so that mAP and PR curves can be computed + # Performs an IoU sweep so that mAP, mAR, and PR curves can be computed results = dataset.evaluate_detections( "predictions", gt_field="ground_truth", @@ -500,6 +501,9 @@ for your predictions by passing the ``compute_mAP=True`` flag to print(results.mAP()) # 0.3957 + print(results.mAR()) + # 0.5210 + plot = results.plot_pr_curves(classes=["person", "kite", "car"]) plot.show() diff --git a/docs/source/user_guide/evaluation.rst b/docs/source/user_guide/evaluation.rst index 57e25d5b11..0c8d35c893 100644 --- a/docs/source/user_guide/evaluation.rst +++ b/docs/source/user_guide/evaluation.rst @@ -1041,16 +1041,17 @@ The example below demonstrates COCO-style detection evaluation on the The easiest way to analyze models in FiftyOne is via the :ref:`Model Evaluation panel `! -mAP and PR curves +mAP, mAR and PR curves ~~~~~~~~~~~~~~~~~ -You can compute mean average precision (mAP) and precision-recall (PR) curves -for your objects by passing the ``compute_mAP=True`` flag to +You can compute mean average precision (mAP), mean average recall (mAR), and +precision-recall (PR) curves for your predictions by passing the +``compute_mAP=True`` flag to :meth:`evaluate_detections() `: .. note:: - All mAP calculations are performed according to the + All mAP and mAR calculations are performed according to the `COCO evaluation protocol `_. .. code-block:: python @@ -1062,7 +1063,7 @@ for your objects by passing the ``compute_mAP=True`` flag to dataset = foz.load_zoo_dataset("quickstart") print(dataset) - # Performs an IoU sweep so that mAP and PR curves can be computed + # Performs an IoU sweep so that mAP, mAR, and PR curves can be computed results = dataset.evaluate_detections( "predictions", gt_field="ground_truth", @@ -1072,6 +1073,9 @@ for your objects by passing the ``compute_mAP=True`` flag to print(results.mAP()) # 0.3957 + print(results.mAR()) + # 0.5210 + plot = results.plot_pr_curves(classes=["person", "kite", "car"]) plot.show() From 770c7d626b7991878dd26af3c061fc1a91262748 Mon Sep 17 00:00:00 2001 From: brimoor Date: Fri, 20 Dec 2024 10:29:02 -0500 Subject: [PATCH 6/7] lint --- docs/source/user_guide/evaluation.rst | 2 +- fiftyone/utils/eval/coco.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/source/user_guide/evaluation.rst b/docs/source/user_guide/evaluation.rst index 0c8d35c893..97b789667a 100644 --- a/docs/source/user_guide/evaluation.rst +++ b/docs/source/user_guide/evaluation.rst @@ -1042,7 +1042,7 @@ The example below demonstrates COCO-style detection evaluation on the :ref:`Model Evaluation panel `! mAP, mAR and PR curves -~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~ You can compute mean average precision (mAP), mean average recall (mAR), and precision-recall (PR) curves for your predictions by passing the diff --git a/fiftyone/utils/eval/coco.py b/fiftyone/utils/eval/coco.py index 2b5a0b7a37..bb3a6c6553 100644 --- a/fiftyone/utils/eval/coco.py +++ b/fiftyone/utils/eval/coco.py @@ -795,7 +795,6 @@ def _compute_pr_curves(samples, config, classes=None, progress=None): for ri, pi in enumerate(inds): q[ri] = pre[pi] t[ri] = confs[pi] - except: pass From e94c8a0a5c99f6c8f5046f3838f923e2d52bcb8e Mon Sep 17 00:00:00 2001 From: brimoor Date: Fri, 20 Dec 2024 15:07:23 -0500 Subject: [PATCH 7/7] include recall_sweep in serialization --- fiftyone/utils/eval/coco.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fiftyone/utils/eval/coco.py b/fiftyone/utils/eval/coco.py index bb3a6c6553..3de1702051 100644 --- a/fiftyone/utils/eval/coco.py +++ b/fiftyone/utils/eval/coco.py @@ -293,6 +293,7 @@ def __init__( self.precision = np.asarray(precision) self.recall = np.asarray(recall) self.iou_threshs = np.asarray(iou_threshs) + self.recall_sweep = recall_sweep self.thresholds = ( np.asarray(thresholds) if thresholds is not None else None ) @@ -449,6 +450,7 @@ def _from_dict(cls, d, samples, config, eval_key, **kwargs): precision = d["precision"] recall = d["recall"] iou_threshs = d["iou_threshs"] + recall_sweep = d.get("recall_sweep", None) thresholds = d.get("thresholds", None) return super()._from_dict( d, @@ -458,6 +460,7 @@ def _from_dict(cls, d, samples, config, eval_key, **kwargs): precision=precision, recall=recall, iou_threshs=iou_threshs, + recall_sweep=recall_sweep, thresholds=thresholds, **kwargs, )