From db36821499b60296c5d8e9e360eb98bd41b1c2d3 Mon Sep 17 00:00:00 2001
From: Manushree Gangwar <mg3631@columbia.edu>
Date: Fri, 13 Dec 2024 13:26:15 -0700
Subject: [PATCH 1/7] Add COCO-styled mAR to evaluate_detections

---
 fiftyone/utils/eval/coco.py | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/fiftyone/utils/eval/coco.py b/fiftyone/utils/eval/coco.py
index 1efa89331b..f55cd4bea2 100644
--- a/fiftyone/utils/eval/coco.py
+++ b/fiftyone/utils/eval/coco.py
@@ -218,6 +218,7 @@ def generate_results(
             thresholds,
             iou_threshs,
             classes,
+            recall_sweep,
         ) = _compute_pr_curves(
             samples, self.config, classes=classes, progress=progress
         )
@@ -229,6 +230,7 @@ def generate_results(
             matches,
             precision,
             recall,
+            recall_sweep,
             iou_threshs,
             classes,
             thresholds=thresholds,
@@ -251,6 +253,7 @@ class COCODetectionResults(DetectionResults):
         precision: an array of precision values of shape
             ``num_iou_threshs x num_classes x num_recall``
         recall: an array of recall values
+        recall_sweep: an array of recall values of shape ``num_iou x num_classes``
         iou_threshs: an array of IoU thresholds
         classes: the list of possible classes
         thresholds (None): an optional array of decision thresholds of shape
@@ -268,6 +271,7 @@ def __init__(
         matches,
         precision,
         recall,
+        recall_sweep,
         iou_threshs,
         classes,
         thresholds=None,
@@ -292,6 +296,7 @@ def __init__(
         )
 
         self._classwise_AP = np.mean(precision, axis=(0, 2))
+        self._classwise_AR = np.mean(recall_sweep, axis=0)
 
     def plot_pr_curves(
         self, classes=None, iou_thresh=None, backend="plotly", **kwargs
@@ -376,6 +381,31 @@ def mAP(self, classes=None):
 
         return np.mean(classwise_AP)
 
+    def mAR(self, classes=None):
+        """Computes COCO-style mean average recall (mAR) for the specified
+        classes.
+
+        See `this page <https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/cocoeval.py>`_
+        for more details about COCO-style mAR.
+
+        Args:
+            classes (None): a list of classes for which to compute mAR
+
+        Returns:
+            the mAR in ``[0, 1]``
+        """
+        if classes is not None:
+            class_inds = np.array([self._get_class_index(c) for c in classes])
+            classwise_AR = self._classwise_AR[class_inds]
+        else:
+            classwise_AR = self._classwise_AR
+
+        classwise_AR = classwise_AR[classwise_AR > -1]
+        if classwise_AR.size == 0:
+            return -1
+
+        return np.mean(classwise_AR)
+
     def _get_iou_thresh_inds(self, iou_thresh=None):
         if iou_thresh is None:
             return np.arange(len(self.iou_threshs))
@@ -713,6 +743,7 @@ def _compute_pr_curves(samples, config, classes=None, progress=None):
     precision = -np.ones((num_threshs, num_classes, 101))
     thresholds = -np.ones((num_threshs, num_classes, 101))
     recall = np.linspace(0, 1, 101)
+    recall_sweep = -np.ones((num_threshs, num_classes))
     for idx, _thresh_matches in enumerate(thresh_matches):
         for c, matches in _thresh_matches.items():
             c_idx = class_idx_map.get(c, None)
@@ -755,13 +786,15 @@ def _compute_pr_curves(samples, config, classes=None, progress=None):
                 for ri, pi in enumerate(inds):
                     q[ri] = pre[pi]
                     t[ri] = confs[pi]
+
             except:
                 pass
 
             precision[idx][c_idx] = q
             thresholds[idx][c_idx] = t
+            recall_sweep[idx][c_idx] = rec[-1]
 
-    return precision, recall, thresholds, iou_threshs, classes
+    return precision, recall, thresholds, iou_threshs, classes, recall_sweep
 
 
 def _copy_labels(labels):

From fcf4caff2298a5d20fdfe71d4560388de4008fad Mon Sep 17 00:00:00 2001
From: Manushree Gangwar <mg3631@columbia.edu>
Date: Mon, 16 Dec 2024 10:00:56 -0700
Subject: [PATCH 2/7] Make recall_sweep optional

---
 .../builtins/panels/model_evaluation/__init__.py  |  7 +++++++
 fiftyone/utils/eval/coco.py                       | 15 +++++++++++----
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/fiftyone/operators/builtins/panels/model_evaluation/__init__.py b/fiftyone/operators/builtins/panels/model_evaluation/__init__.py
index 2878b65a9a..5380248968 100644
--- a/fiftyone/operators/builtins/panels/model_evaluation/__init__.py
+++ b/fiftyone/operators/builtins/panels/model_evaluation/__init__.py
@@ -140,6 +140,12 @@ def get_map(self, results):
         except Exception as e:
             return None
 
+    def get_mar(self, results):
+        try:
+            return results.mAR()
+        except Exception as e:
+            return None
+
     def set_status(self, ctx):
         if not self.can_edit_status(ctx):
             ctx.ops.notify(
@@ -355,6 +361,7 @@ def load_evaluation(self, ctx):
                 info, results
             )
             metrics["mAP"] = self.get_map(results)
+            metrics["mAR"] = self.get_mar(results)
             evaluation_data = {
                 "metrics": metrics,
                 "info": serialized_info,
diff --git a/fiftyone/utils/eval/coco.py b/fiftyone/utils/eval/coco.py
index f55cd4bea2..4b697c7474 100644
--- a/fiftyone/utils/eval/coco.py
+++ b/fiftyone/utils/eval/coco.py
@@ -230,9 +230,9 @@ def generate_results(
             matches,
             precision,
             recall,
-            recall_sweep,
             iou_threshs,
             classes,
+            recall_sweep=recall_sweep,
             thresholds=thresholds,
             missing=missing,
             backend=self,
@@ -253,9 +253,9 @@ class COCODetectionResults(DetectionResults):
         precision: an array of precision values of shape
             ``num_iou_threshs x num_classes x num_recall``
         recall: an array of recall values
-        recall_sweep: an array of recall values of shape ``num_iou x num_classes``
         iou_threshs: an array of IoU thresholds
         classes: the list of possible classes
+        recall_sweep (None): an array of recall values of shape ``num_iou x num_classes``
         thresholds (None): an optional array of decision thresholds of shape
             ``num_iou_threshs x num_classes x num_recall``
         missing (None): a missing label string. Any unmatched objects are
@@ -271,9 +271,9 @@ def __init__(
         matches,
         precision,
         recall,
-        recall_sweep,
         iou_threshs,
         classes,
+        recall_sweep=None,
         thresholds=None,
         missing=None,
         backend=None,
@@ -296,7 +296,9 @@ def __init__(
         )
 
         self._classwise_AP = np.mean(precision, axis=(0, 2))
-        self._classwise_AR = np.mean(recall_sweep, axis=0)
+        self._classwise_AR = (
+            np.mean(recall_sweep, axis=0) if recall_sweep is not None else None
+        )
 
     def plot_pr_curves(
         self, classes=None, iou_thresh=None, backend="plotly", **kwargs
@@ -394,6 +396,11 @@ def mAR(self, classes=None):
         Returns:
             the mAR in ``[0, 1]``
         """
+        if self._classwise_AR is None:
+            raise Exception(
+                "Classwise AR is not available. mAR can't be computed."
+            )
+
         if classes is not None:
             class_inds = np.array([self._get_class_index(c) for c in classes])
             classwise_AR = self._classwise_AR[class_inds]

From 5e389145e8910b54492658f3eda2e3794071220a Mon Sep 17 00:00:00 2001
From: Ritchie Martori <ritchie@voxel51.com>
Date: Mon, 16 Dec 2024 11:25:10 -0700
Subject: [PATCH 3/7] add mAR to eval table

---
 .../components/NativeModelEvaluationView/Evaluation.tsx    | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/app/packages/core/src/plugins/SchemaIO/components/NativeModelEvaluationView/Evaluation.tsx b/app/packages/core/src/plugins/SchemaIO/components/NativeModelEvaluationView/Evaluation.tsx
index db05065126..28a214054c 100644
--- a/app/packages/core/src/plugins/SchemaIO/components/NativeModelEvaluationView/Evaluation.tsx
+++ b/app/packages/core/src/plugins/SchemaIO/components/NativeModelEvaluationView/Evaluation.tsx
@@ -414,6 +414,13 @@ export default function Evaluation(props: EvaluationProps) {
       compareValue: compareEvaluationMetrics.mAP,
       hide: !isObjectDetection,
     },
+    {
+      id: "mAR",
+      property: "mAR",
+      value: evaluationMetrics.mAR,
+      compareValue: compareEvaluationMetrics.mAR,
+      hide: !isObjectDetection,
+    },
     {
       id: "tp",
       property: "True Positives",

From 667984d52f5840a68b0f9178c307838761ee9932 Mon Sep 17 00:00:00 2001
From: Manushree Gangwar <mg3631@columbia.edu>
Date: Wed, 18 Dec 2024 09:50:55 -0700
Subject: [PATCH 4/7] Fix line length

---
 fiftyone/utils/eval/coco.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fiftyone/utils/eval/coco.py b/fiftyone/utils/eval/coco.py
index 4b697c7474..2b5a0b7a37 100644
--- a/fiftyone/utils/eval/coco.py
+++ b/fiftyone/utils/eval/coco.py
@@ -5,6 +5,7 @@
 | `voxel51.com <https://voxel51.com/>`_
 |
 """
+
 import logging
 from collections import defaultdict
 
@@ -50,7 +51,7 @@ class COCOEvaluationConfig(DetectionEvaluationConfig):
         tolerance (None): a tolerance, in pixels, when generating approximate
             polylines for instance masks. Typical values are 1-3 pixels
         compute_mAP (False): whether to perform the necessary computations so
-            that mAP and PR curves can be generated
+            that mAP, mAR, and PR curves can be generated
         iou_threshs (None): a list of IoU thresholds to use when computing mAP
             and PR curves. Only applicable when ``compute_mAP`` is True
         max_preds (None): the maximum number of predicted objects to evaluate
@@ -255,7 +256,8 @@ class COCODetectionResults(DetectionResults):
         recall: an array of recall values
         iou_threshs: an array of IoU thresholds
         classes: the list of possible classes
-        recall_sweep (None): an array of recall values of shape ``num_iou x num_classes``
+        recall_sweep (None): an array of recall values of shape
+            ``num_iou x num_classes``
         thresholds (None): an optional array of decision thresholds of shape
             ``num_iou_threshs x num_classes x num_recall``
         missing (None): a missing label string. Any unmatched objects are

From 450d254363a72d2ee2ffb4a2e506c63e04cfdf05 Mon Sep 17 00:00:00 2001
From: Manushree Gangwar <mg3631@columbia.edu>
Date: Wed, 18 Dec 2024 10:08:40 -0700
Subject: [PATCH 5/7] Update docs

---
 docs/source/integrations/coco.rst     | 12 ++++++++----
 docs/source/user_guide/evaluation.rst | 14 +++++++++-----
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/docs/source/integrations/coco.rst b/docs/source/integrations/coco.rst
index 2b910952ef..af520197a9 100644
--- a/docs/source/integrations/coco.rst
+++ b/docs/source/integrations/coco.rst
@@ -471,13 +471,14 @@ The example below demonstrates COCO-style detection evaluation on the
 mAP and PR curves
 ~~~~~~~~~~~~~~~~~
 
-You can compute mean average precision (mAP) and precision-recall (PR) curves
-for your predictions by passing the ``compute_mAP=True`` flag to
+You can compute mean average precision (mAP), mean average recall (mAR), and
+precision-recall (PR) curves for your predictions by passing the
+``compute_mAP=True`` flag to
 :meth:`evaluate_detections() <fiftyone.core.collections.SampleCollection.evaluate_detections>`:
 
 .. note::
 
-    All mAP calculations are performed according to the
+    All mAP and mAR calculations are performed according to the
     `COCO evaluation protocol <https://cocodataset.org/#detection-eval>`_.
 
 .. code-block:: python
@@ -489,7 +490,7 @@ for your predictions by passing the ``compute_mAP=True`` flag to
     dataset = foz.load_zoo_dataset("quickstart")
     print(dataset)
 
-    # Performs an IoU sweep so that mAP and PR curves can be computed
+    # Performs an IoU sweep so that mAP, mAR, and PR curves can be computed
     results = dataset.evaluate_detections(
         "predictions",
         gt_field="ground_truth",
@@ -500,6 +501,9 @@ for your predictions by passing the ``compute_mAP=True`` flag to
     print(results.mAP())
     # 0.3957
 
+    print(results.mAR())
+    # 0.5210
+
     plot = results.plot_pr_curves(classes=["person", "kite", "car"])
     plot.show()
 
diff --git a/docs/source/user_guide/evaluation.rst b/docs/source/user_guide/evaluation.rst
index 57e25d5b11..0c8d35c893 100644
--- a/docs/source/user_guide/evaluation.rst
+++ b/docs/source/user_guide/evaluation.rst
@@ -1041,16 +1041,17 @@ The example below demonstrates COCO-style detection evaluation on the
     The easiest way to analyze models in FiftyOne is via the
     :ref:`Model Evaluation panel <app-model-evaluation-panel>`!
 
-mAP and PR curves
+mAP, mAR and PR curves
 ~~~~~~~~~~~~~~~~~
 
-You can compute mean average precision (mAP) and precision-recall (PR) curves
-for your objects by passing the ``compute_mAP=True`` flag to
+You can compute mean average precision (mAP), mean average recall (mAR), and
+precision-recall (PR) curves for your predictions by passing the
+``compute_mAP=True`` flag to
 :meth:`evaluate_detections() <fiftyone.core.collections.SampleCollection.evaluate_detections>`:
 
 .. note::
 
-    All mAP calculations are performed according to the
+    All mAP and mAR calculations are performed according to the
     `COCO evaluation protocol <https://cocodataset.org/#detection-eval>`_.
 
 .. code-block:: python
@@ -1062,7 +1063,7 @@ for your objects by passing the ``compute_mAP=True`` flag to
     dataset = foz.load_zoo_dataset("quickstart")
     print(dataset)
 
-    # Performs an IoU sweep so that mAP and PR curves can be computed
+    # Performs an IoU sweep so that mAP, mAR, and PR curves can be computed
     results = dataset.evaluate_detections(
         "predictions",
         gt_field="ground_truth",
@@ -1072,6 +1073,9 @@ for your objects by passing the ``compute_mAP=True`` flag to
     print(results.mAP())
     # 0.3957
 
+    print(results.mAR())
+    # 0.5210
+
     plot = results.plot_pr_curves(classes=["person", "kite", "car"])
     plot.show()
 

From 770c7d626b7991878dd26af3c061fc1a91262748 Mon Sep 17 00:00:00 2001
From: brimoor <brimoor@umich.edu>
Date: Fri, 20 Dec 2024 10:29:02 -0500
Subject: [PATCH 6/7] lint

---
 docs/source/user_guide/evaluation.rst | 2 +-
 fiftyone/utils/eval/coco.py           | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/source/user_guide/evaluation.rst b/docs/source/user_guide/evaluation.rst
index 0c8d35c893..97b789667a 100644
--- a/docs/source/user_guide/evaluation.rst
+++ b/docs/source/user_guide/evaluation.rst
@@ -1042,7 +1042,7 @@ The example below demonstrates COCO-style detection evaluation on the
     :ref:`Model Evaluation panel <app-model-evaluation-panel>`!
 
 mAP, mAR and PR curves
-~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~
 
 You can compute mean average precision (mAP), mean average recall (mAR), and
 precision-recall (PR) curves for your predictions by passing the
diff --git a/fiftyone/utils/eval/coco.py b/fiftyone/utils/eval/coco.py
index 2b5a0b7a37..bb3a6c6553 100644
--- a/fiftyone/utils/eval/coco.py
+++ b/fiftyone/utils/eval/coco.py
@@ -795,7 +795,6 @@ def _compute_pr_curves(samples, config, classes=None, progress=None):
                 for ri, pi in enumerate(inds):
                     q[ri] = pre[pi]
                     t[ri] = confs[pi]
-
             except:
                 pass
 

From e94c8a0a5c99f6c8f5046f3838f923e2d52bcb8e Mon Sep 17 00:00:00 2001
From: brimoor <brimoor@umich.edu>
Date: Fri, 20 Dec 2024 15:07:23 -0500
Subject: [PATCH 7/7] include recall_sweep in serialization

---
 fiftyone/utils/eval/coco.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fiftyone/utils/eval/coco.py b/fiftyone/utils/eval/coco.py
index bb3a6c6553..3de1702051 100644
--- a/fiftyone/utils/eval/coco.py
+++ b/fiftyone/utils/eval/coco.py
@@ -293,6 +293,7 @@ def __init__(
         self.precision = np.asarray(precision)
         self.recall = np.asarray(recall)
         self.iou_threshs = np.asarray(iou_threshs)
+        self.recall_sweep = recall_sweep
         self.thresholds = (
             np.asarray(thresholds) if thresholds is not None else None
         )
@@ -449,6 +450,7 @@ def _from_dict(cls, d, samples, config, eval_key, **kwargs):
         precision = d["precision"]
         recall = d["recall"]
         iou_threshs = d["iou_threshs"]
+        recall_sweep = d.get("recall_sweep", None)
         thresholds = d.get("thresholds", None)
         return super()._from_dict(
             d,
@@ -458,6 +460,7 @@ def _from_dict(cls, d, samples, config, eval_key, **kwargs):
             precision=precision,
             recall=recall,
             iou_threshs=iou_threshs,
+            recall_sweep=recall_sweep,
             thresholds=thresholds,
             **kwargs,
         )