voxel51 · brimoor · Jul 27, 2020 · Jul 27, 2020 · Jul 27, 2020 · Jul 27, 2020
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -18,7 +18,7 @@ repos:
         language: system
         files: \.py$
         entry: pylint
-        args: ["--errors-only"]
+        args: ["--extension-pkg-whitelist=cv2", "--errors-only"]
   - repo: https://github.com/prettier/prettier
     rev: 2.0.5
     hooks:

diff --git a/fiftyone/brain/__init__.py b/fiftyone/brain/__init__.py
@@ -92,3 +92,56 @@ def compute_uniqueness(samples, uniqueness_field="uniqueness", roi_field=None):
     fbu.compute_uniqueness(
         samples, uniqueness_field=uniqueness_field, roi_field=roi_field,
     )
+
+
+def sample_best_video_frames(
+    video_path,
+    out_frames_dir,
+    target_num_frames=None,
+    target_accel=None,
+    target_fps=None,
+    size=None,
+    max_size=None,
+):
+    """Adaptively samples the best frames from the input video.
+
+    The "best" video frames at a given sampling density are defined as the
+    frames with highest image quality that are most representative of the
+    visual content in the video.
+
+    Provide one of ``target_num_frames``, ``target_accel``, or ``target_fps``
+    to perform the sampling.
+
+    Args:
+        video_path: the path to the video to process
+        out_frames_dir: a directory to write the sampled frames
+        target_num_frames (None): the target number of frames to sample
+        target_accel (None): a desired target acceleration factor to apply when
+            sampling frames. For example, a target acceleration of 2x would
+            correspond to sampling every other frame, on average
+        target_fps (None): a desired target sampling rate, which must be less
+            than the frame rate of the input video
+        size (None): a desired output ``(width, height)`` for the sampled
+            frames. Dimensions can be -1, in which case the input aspect ratio
+            is preserved. By default, the input frame size is maintained
+        max_size (None): a maximum ``(width, height)`` allowed for the sampled
+            frames. Frames are resized as necessary to meet this limit, and
+            ``size`` is decreased (aspect-preserving) if necessary to satisfy
+            this constraint. Dimensions can be -1, in which case no limit is
+            applied to them. By default, no maximum frame size is imposed
+
+    Returns:
+        a dict mapping frame numbers to the paths to the sampled frames in
+        ``out_frames_dir``
+    """
+    import fiftyone.brain.internal.core.sampling as fbs
+
+    return fbs.sample_best_video_frames(
+        video_path,
+        out_frames_dir,
+        target_num_frames=target_num_frames,
+        target_accel=target_accel,
+        target_fps=target_fps,
+        size=size,
+        max_size=max_size,
+    )
diff --git a/fiftyone/brain/internal/core/__init__.py b/fiftyone/brain/internal/core/__init__.py
@@ -0,0 +1,5 @@
+"""
+| Copyright 2017-2020, Voxel51, Inc.
+| `voxel51.com <https://voxel51.com/>`_
+|
+"""
diff --git a/fiftyone/brain/internal/core/image.py b/fiftyone/brain/internal/core/image.py
@@ -0,0 +1,100 @@
+"""
+Core methods for working with images.
+
+| Copyright 2017-2020, Voxel51, Inc.
+| `voxel51.com <https://voxel51.com/>`_
+|
+"""
+import cv2
+import skimage.metrics as skm
+
+import eta.core.image as etai
+
+
+def compute_quality(img, method=None):
+    """Computes the quality of the image using the specified method.
+
+    Quality is returned on a ``[0, 100]`` scale, where ``0 = low quality`` and
+    ``100 = highest quality``.
+
+    Args:
+        img: the image
+        method (None): the image quality method. Supported values are
+            ``("laplacian-stdev", "median-psnr", "median-ssim")``. The default
+            is ``"laplacian-stdev"``
+
+    Returns:
+        the image quality score, in ``[0, 100]``
+    """
+    method_lower = method.lower() if method else "laplacian-stdev"
+
+    if method_lower == "laplacian-stdev":
+        stdev = stdev_of_laplacian(img)
+        return min(stdev, 100.0)
+
+    if method_lower == "median-psnr":
+        #
+        # @todo improve this? currently we assume that PSNR = [30dB, 50dB] is
+        # a typical range for 8bit images
+        #
+        # @todo handle non 8-bit images
+        #
+        psnr = psnr_wrt_median(img)
+        return 5.0 * min(max(0, psnr - 30.0), 20.0)
+
+    if method_lower == "median-ssim":
+        ssim = ssim_wrt_median(img)
+        return 50.0 * (1.0 + ssim)
+
+    raise ValueError("Unsupported `method = %s`" % method)
+
+
+def stdev_of_laplacian(img, kernel_size=3):
+    """Computes the standard deviation of the Laplacian of the given image.
+
+    References:
+        https://www.pyimagesearch.com/2015/09/07/blur-detection-with-opencv
+        https://ieeexplore.ieee.org/document/903548
+        http://isp-utb.github.io/seminario/papers/Pattern_Recognition_Pertuz_2013.pdf
+
+    Args:
+        img: an image
+        kernel_size (3): the kernel size to use
+
+    Returns:
+        the standard deviation of the Laplacian filtered version of the image
+    """
+    return cv2.Laplacian(img, cv2.CV_32F, ksize=kernel_size).std()
+
+
+def psnr_wrt_median(img, kernel_size=3):
+    """Computes the PSNR, in dB, of the image with respect to a median-filtered
+    version of the image.
+
+    Args:
+        img: an image
+        kernel_size (3): the median kernel size to use
+
+    Returns:
+        the PSNR
+    """
+    img_median = cv2.medianBlur(img, ksize=kernel_size)
+    return cv2.PSNR(img, img_median)
+
+
+def ssim_wrt_median(img, kernel_size=3):
+    """Computes the SSIM of the image with respect to a median-filtered version
+    of the image.
+
+    Args:
+        img: an image
+        kernel_size (3): the median kernel size to use
+
+    Returns:
+        the SSIM, in ``[-1, 1]``
+    """
+    img_median = cv2.medianBlur(img, ksize=kernel_size)
+    multichannel = etai.is_color(img)
+    return skm.structural_similarity(
+        img, img_median, multichannel=multichannel
+    )
diff --git a/fiftyone/brain/internal/core/motion.py b/fiftyone/brain/internal/core/motion.py
@@ -0,0 +1,93 @@
+"""
+Core infrastructure for computing frame motion in videos.
+
+| Copyright 2017-2020, Voxel51, Inc.
+| `voxel51.com <https://voxel51.com/>`_
+|
+"""
+import numpy as np
+
+from eta.core.config import Config
+import eta.core.data as etad
+import eta.core.utils as etau
+import eta.core.video as etav
+
+
+# @todo add support for customizing the BackgroundSubtractor parameters
+class FrameMotionConfig(Config):
+    """Frame motion configuration settings.
+
+    Attributes:
+        background_subtractor: the fully-qualified name of the
+            ``eta.core.primitives.BackgroundSubtractor`` class to use. The
+            default is ``"eta.core.primitives.MOG2BackgroundSubtractor"``
+        motion_method: the method to use to compute the frame motion. Supported
+            values are ``("fgsupport")``. The default is ``"fgsupport"``
+        init_buffer_frames: the number of initial buffer frames before frame
+            motion should be trusted and reported. The default is 5 frames
+        attr_name: the name of the numeric attribute in which to store the
+            frame motion values. The default is ``"motion"``
+    """
+
+    def __init__(self, d):
+        self.background_subtractor = self.parse_string(
+            d,
+            "background_subtractor",
+            default="eta.core.primitives.MOG2BackgroundSubtractor",
+        )
+        self.motion_method = self.parse_string(
+            d, "motion_method", default="fgsupport"
+        )
+        self.init_buffer_frames = self.parse_number(
+            d, "init_buffer_frames", default=5
+        )
+        self.attr_name = self.parse_string(d, "attr_name", default="motion")
+
+
+def compute_frame_motion(video_reader, video_labels=None, motion_config=None):
+    """Computes the frame motion for the frames of the given video.
+
+    Args:
+        video_reader: an ``eta.core.video.VideoReader`` that is ready to read
+            the frames of the video
+        video_labels (None): an optional ``eta.core.video.VideoLabels`` to
+            which to add the labels. By default, a new instance is created
+        motion_config (None): a :class:`FrameMotionConfig` describing the
+            method to use. If omitted, the default config is used
+
+    Returns:
+        a ``eta.core.video.VideoLabels`` containing the frame motions
+    """
+    if motion_config is None:
+        motion_config = FrameMotionConfig.default()
+
+    if motion_config.motion_method != "fgsupport":
+        raise ValueError(
+            "Unsupported motion_method = '%s'" % motion_config.motion_method
+        )
+
+    background_subtractor_cls = etau.get_class(
+        motion_config.background_subtractor
+    )
+    init_buffer_frames = motion_config.init_buffer_frames
+    attr_name = motion_config.attr_name
+
+    if video_labels is None:
+        video_labels = etav.VideoLabels()
+
+    with background_subtractor_cls() as bgs:
+        with video_reader as vr:
+            for idx, img in enumerate(vr):
+                attr = _compute_motion(img, bgs, attr_name)
+                if idx >= init_buffer_frames:
+                    # Only store motion after initial buffer
+                    video_labels.add_frame_attribute(attr, vr.frame_number)
+
+    return video_labels
+
+
+def _compute_motion(img, bgs, attr_name):
+    # Motion = proportion of foreground pixels
+    fgmask, _ = bgs.process_frame(img)
+    motion = np.count_nonzero(fgmask) / fgmask.size
+    return etad.NumericAttribute(attr_name, motion)