Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HOLD] Adding method to sample "best" frames from a video #37

Open
wants to merge 24 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
2ed8943
tweaking lint configs
brimoor Jul 27, 2020
2517b19
adding frame quality module
brimoor Jul 27, 2020
8682550
adding video motion module
brimoor Jul 27, 2020
8abdbfe
adding core image module
brimoor Jul 27, 2020
d92708f
adding core sampling module
brimoor Jul 27, 2020
54bcc44
adding brain method to sample best frames from a video
brimoor Jul 27, 2020
e417063
adding scikit-image to deps
brimoor Jul 27, 2020
bebf29e
removing public brain method for now
brimoor Jul 27, 2020
a78d2e0
Merge branch 'develop' into sample-video-frames
brimoor Aug 3, 2020
63bef5d
migrating core module to internal
brimoor Aug 3, 2020
d5d5273
Merge branch 'no-py2-idioms' into sample-video-frames
brimoor Aug 4, 2020
4428a85
removing py2 compability idioms
brimoor Aug 4, 2020
fc3383c
Merge branch 'develop' into sample-video-frames
brimoor Aug 4, 2020
9b21a13
Merge branch 'develop' into sample-video-frames
brimoor Aug 4, 2020
a529232
Merge branch 'internals' into sample-video-frames
brimoor Oct 22, 2020
2181d49
refactoring core logic into sampling module
brimoor Oct 22, 2020
231bdfd
Merge branch 'internals' into sample-video-frames
brimoor Oct 22, 2020
d207996
Merge branch 'develop' into sample-video-frames
brimoor Oct 23, 2020
4c5cdc0
fix parantheses bug
brimoor Oct 23, 2020
6f93b42
adding support for selecting a quality factor
brimoor Oct 28, 2020
dc21ff9
Merge branch 'develop' into sample-video-frames
brimoor Nov 25, 2020
bae7105
Merge branch 'develop' into sample-video-frames
brimoor Dec 17, 2020
4d24ced
Merge branch 'develop' into sample-video-frames
brimoor May 29, 2021
8dfb2ea
Merge branch 'develop' into sample-video-frames
brimoor Sep 11, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ repos:
language: system
files: \.py$
entry: pylint
args: ["--errors-only"]
args: ["--extension-pkg-whitelist=cv2", "--errors-only"]
- repo: https://github.com/prettier/prettier
rev: 2.0.5
hooks:
Expand Down
53 changes: 53 additions & 0 deletions fiftyone/brain/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,56 @@ def compute_uniqueness(samples, uniqueness_field="uniqueness", roi_field=None):
fbu.compute_uniqueness(
samples, uniqueness_field=uniqueness_field, roi_field=roi_field,
)


def sample_best_video_frames(
video_path,
out_frames_dir,
target_num_frames=None,
target_accel=None,
target_fps=None,
size=None,
max_size=None,
):
"""Adaptively samples the best frames from the input video.

The "best" video frames at a given sampling density are defined as the
frames with highest image quality that are most representative of the
visual content in the video.

Provide one of ``target_num_frames``, ``target_accel``, or ``target_fps``
to perform the sampling.

Args:
video_path: the path to the video to process
out_frames_dir: a directory to write the sampled frames
target_num_frames (None): the target number of frames to sample
target_accel (None): a desired target acceleration factor to apply when
sampling frames. For example, a target acceleration of 2x would
correspond to sampling every other frame, on average
target_fps (None): a desired target sampling rate, which must be less
than the frame rate of the input video
size (None): a desired output ``(width, height)`` for the sampled
frames. Dimensions can be -1, in which case the input aspect ratio
is preserved. By default, the input frame size is maintained
max_size (None): a maximum ``(width, height)`` allowed for the sampled
frames. Frames are resized as necessary to meet this limit, and
``size`` is decreased (aspect-preserving) if necessary to satisfy
this constraint. Dimensions can be -1, in which case no limit is
applied to them. By default, no maximum frame size is imposed

Returns:
a dict mapping frame numbers to the paths to the sampled frames in
``out_frames_dir``
"""
import fiftyone.brain.internal.core.sampling as fbs

return fbs.sample_best_video_frames(
video_path,
out_frames_dir,
target_num_frames=target_num_frames,
target_accel=target_accel,
target_fps=target_fps,
size=size,
max_size=max_size,
)
5 changes: 5 additions & 0 deletions fiftyone/brain/internal/core/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""
| Copyright 2017-2020, Voxel51, Inc.
| `voxel51.com <https://voxel51.com/>`_
|
"""
100 changes: 100 additions & 0 deletions fiftyone/brain/internal/core/image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
"""
Core methods for working with images.

| Copyright 2017-2020, Voxel51, Inc.
| `voxel51.com <https://voxel51.com/>`_
|
"""
import cv2
import skimage.metrics as skm

import eta.core.image as etai


def compute_quality(img, method=None):
"""Computes the quality of the image using the specified method.

Quality is returned on a ``[0, 100]`` scale, where ``0 = low quality`` and
``100 = highest quality``.

Args:
img: the image
method (None): the image quality method. Supported values are
``("laplacian-stdev", "median-psnr", "median-ssim")``. The default
is ``"laplacian-stdev"``

Returns:
the image quality score, in ``[0, 100]``
"""
method_lower = method.lower() if method else "laplacian-stdev"

if method_lower == "laplacian-stdev":
stdev = stdev_of_laplacian(img)
return min(stdev, 100.0)

if method_lower == "median-psnr":
#
# @todo improve this? currently we assume that PSNR = [30dB, 50dB] is
# a typical range for 8bit images
#
# @todo handle non 8-bit images
#
psnr = psnr_wrt_median(img)
return 5.0 * min(max(0, psnr - 30.0), 20.0)

if method_lower == "median-ssim":
ssim = ssim_wrt_median(img)
return 50.0 * (1.0 + ssim)

raise ValueError("Unsupported `method = %s`" % method)


def stdev_of_laplacian(img, kernel_size=3):
"""Computes the standard deviation of the Laplacian of the given image.

References:
https://www.pyimagesearch.com/2015/09/07/blur-detection-with-opencv
https://ieeexplore.ieee.org/document/903548
http://isp-utb.github.io/seminario/papers/Pattern_Recognition_Pertuz_2013.pdf

Args:
img: an image
kernel_size (3): the kernel size to use

Returns:
the standard deviation of the Laplacian filtered version of the image
"""
return cv2.Laplacian(img, cv2.CV_32F, ksize=kernel_size).std()


def psnr_wrt_median(img, kernel_size=3):
"""Computes the PSNR, in dB, of the image with respect to a median-filtered
version of the image.

Args:
img: an image
kernel_size (3): the median kernel size to use

Returns:
the PSNR
"""
img_median = cv2.medianBlur(img, ksize=kernel_size)
return cv2.PSNR(img, img_median)


def ssim_wrt_median(img, kernel_size=3):
"""Computes the SSIM of the image with respect to a median-filtered version
of the image.

Args:
img: an image
kernel_size (3): the median kernel size to use

Returns:
the SSIM, in ``[-1, 1]``
"""
img_median = cv2.medianBlur(img, ksize=kernel_size)
multichannel = etai.is_color(img)
return skm.structural_similarity(
img, img_median, multichannel=multichannel
)
93 changes: 93 additions & 0 deletions fiftyone/brain/internal/core/motion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""
Core infrastructure for computing frame motion in videos.

| Copyright 2017-2020, Voxel51, Inc.
| `voxel51.com <https://voxel51.com/>`_
|
"""
import numpy as np

from eta.core.config import Config
import eta.core.data as etad
import eta.core.utils as etau
import eta.core.video as etav


# @todo add support for customizing the BackgroundSubtractor parameters
class FrameMotionConfig(Config):
"""Frame motion configuration settings.

Attributes:
background_subtractor: the fully-qualified name of the
``eta.core.primitives.BackgroundSubtractor`` class to use. The
default is ``"eta.core.primitives.MOG2BackgroundSubtractor"``
motion_method: the method to use to compute the frame motion. Supported
values are ``("fgsupport")``. The default is ``"fgsupport"``
init_buffer_frames: the number of initial buffer frames before frame
motion should be trusted and reported. The default is 5 frames
attr_name: the name of the numeric attribute in which to store the
frame motion values. The default is ``"motion"``
"""

def __init__(self, d):
self.background_subtractor = self.parse_string(
d,
"background_subtractor",
default="eta.core.primitives.MOG2BackgroundSubtractor",
)
self.motion_method = self.parse_string(
d, "motion_method", default="fgsupport"
)
self.init_buffer_frames = self.parse_number(
d, "init_buffer_frames", default=5
)
self.attr_name = self.parse_string(d, "attr_name", default="motion")


def compute_frame_motion(video_reader, video_labels=None, motion_config=None):
"""Computes the frame motion for the frames of the given video.

Args:
video_reader: an ``eta.core.video.VideoReader`` that is ready to read
the frames of the video
video_labels (None): an optional ``eta.core.video.VideoLabels`` to
which to add the labels. By default, a new instance is created
motion_config (None): a :class:`FrameMotionConfig` describing the
method to use. If omitted, the default config is used

Returns:
a ``eta.core.video.VideoLabels`` containing the frame motions
"""
if motion_config is None:
motion_config = FrameMotionConfig.default()

if motion_config.motion_method != "fgsupport":
raise ValueError(
"Unsupported motion_method = '%s'" % motion_config.motion_method
)

background_subtractor_cls = etau.get_class(
motion_config.background_subtractor
)
init_buffer_frames = motion_config.init_buffer_frames
attr_name = motion_config.attr_name

if video_labels is None:
video_labels = etav.VideoLabels()

with background_subtractor_cls() as bgs:
with video_reader as vr:
for idx, img in enumerate(vr):
attr = _compute_motion(img, bgs, attr_name)
if idx >= init_buffer_frames:
# Only store motion after initial buffer
video_labels.add_frame_attribute(attr, vr.frame_number)

return video_labels


def _compute_motion(img, bgs, attr_name):
# Motion = proportion of foreground pixels
fgmask, _ = bgs.process_frame(img)
motion = np.count_nonzero(fgmask) / fgmask.size
return etad.NumericAttribute(attr_name, motion)
Loading