From 6d741cd9e4829f09e956b2a8ea971cf2e7395edb Mon Sep 17 00:00:00 2001 From: YaoFANGUK Date: Tue, 12 Dec 2023 17:06:05 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E8=A7=86=E9=A2=91=E5=9C=BA?= =?UTF-8?q?=E6=99=AF=E6=A3=80=E6=B5=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/main.py | 19 + backend/scenedetect/__init__.py | 158 +++ backend/scenedetect/__main__.py | 61 + backend/scenedetect/_cli/__init__.py | 1145 +++++++++++++++++ backend/scenedetect/_cli/config.py | 548 ++++++++ backend/scenedetect/_cli/context.py | 820 ++++++++++++ backend/scenedetect/_cli/controller.py | 273 ++++ backend/scenedetect/_scene_loader.py | 107 ++ backend/scenedetect/_thirdparty/__init__.py | 15 + .../scenedetect/_thirdparty/simpletable.py | 327 +++++ backend/scenedetect/backends/__init__.py | 114 ++ backend/scenedetect/backends/moviepy.py | 224 ++++ backend/scenedetect/backends/opencv.py | 539 ++++++++ backend/scenedetect/backends/pyav.py | 355 +++++ backend/scenedetect/detectors/__init__.py | 81 ++ .../detectors/adaptive_detector.py | 184 +++ .../scenedetect/detectors/content_detector.py | 259 ++++ .../scenedetect/detectors/motion_detector.py | 92 ++ .../detectors/threshold_detector.py | 203 +++ backend/scenedetect/frame_timecode.py | 462 +++++++ backend/scenedetect/platform.py | 357 +++++ backend/scenedetect/scene_detector.py | 148 +++ backend/scenedetect/scene_manager.py | 1034 +++++++++++++++ backend/scenedetect/stats_manager.py | 324 +++++ backend/scenedetect/video_manager.py | 772 +++++++++++ backend/scenedetect/video_splitter.py | 296 +++++ backend/scenedetect/video_stream.py | 222 ++++ 27 files changed, 9139 insertions(+) create mode 100644 backend/scenedetect/__init__.py create mode 100755 backend/scenedetect/__main__.py create mode 100644 backend/scenedetect/_cli/__init__.py create mode 100644 backend/scenedetect/_cli/config.py create mode 100644 backend/scenedetect/_cli/context.py create mode 100644 backend/scenedetect/_cli/controller.py create mode 100644 backend/scenedetect/_scene_loader.py create mode 100644 backend/scenedetect/_thirdparty/__init__.py create mode 100644 backend/scenedetect/_thirdparty/simpletable.py create mode 100644 backend/scenedetect/backends/__init__.py create mode 100644 backend/scenedetect/backends/moviepy.py create mode 100644 backend/scenedetect/backends/opencv.py create mode 100644 backend/scenedetect/backends/pyav.py create mode 100644 backend/scenedetect/detectors/__init__.py create mode 100644 backend/scenedetect/detectors/adaptive_detector.py create mode 100644 backend/scenedetect/detectors/content_detector.py create mode 100644 backend/scenedetect/detectors/motion_detector.py create mode 100644 backend/scenedetect/detectors/threshold_detector.py create mode 100644 backend/scenedetect/frame_timecode.py create mode 100644 backend/scenedetect/platform.py create mode 100644 backend/scenedetect/scene_detector.py create mode 100644 backend/scenedetect/scene_manager.py create mode 100644 backend/scenedetect/stats_manager.py create mode 100644 backend/scenedetect/video_manager.py create mode 100644 backend/scenedetect/video_splitter.py create mode 100644 backend/scenedetect/video_stream.py diff --git a/backend/main.py b/backend/main.py index b0f7213..40bbdd8 100644 --- a/backend/main.py +++ b/backend/main.py @@ -5,6 +5,10 @@ from pathlib import Path import threading import cv2 import sys + +from backend.scenedetect import scene_detect +from backend.scenedetect.detectors import ContentDetector + sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from backend.inpaint.lama_inpaint import LamaInpaint @@ -116,6 +120,21 @@ class SubtitleDetect: new_subtitle_frame_no_box_dict[key] = subtitle_frame_no_box_dict[key] return new_subtitle_frame_no_box_dict + @staticmethod + def get_scene_div_frame_no(v_path): + """ + 获取发生场景切换的帧号 + """ + scene_div_frame_no_list = [] + scene_list = scene_detect(v_path, ContentDetector()) + for scene in scene_list: + start, end = scene + if start.frame_num == 0: + pass + else: + scene_div_frame_no_list.append(start.frame_num + 1) + return scene_div_frame_no_list + @staticmethod def are_similar(region1, region2): """判断两个区域是否相似。""" diff --git a/backend/scenedetect/__init__.py b/backend/scenedetect/__init__.py new file mode 100644 index 0000000..2f08add --- /dev/null +++ b/backend/scenedetect/__init__.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +"""The ``scenedetect`` module comes with helper functions to simplify common use cases. +:func:`scene_detect` can be used to perform scene detection on a video by path. :func:`open_video` +can be used to open a video for a +:class:`SceneManager `. +""" + +from logging import getLogger +from typing import List, Optional, Tuple, Union + +# OpenCV is a required package, but we don't have it as an explicit dependency since we +# need to support both opencv-python and opencv-python-headless. Include some additional +# context with the exception if this is the case. +try: + import cv2 as _ +except ModuleNotFoundError as ex: + raise ModuleNotFoundError( + "OpenCV could not be found, try installing opencv-python:\n\npip install opencv-python", + name='cv2', + ) from ex + +# Commonly used classes/functions exported under the `scenedetect` namespace for brevity. +from backend.scenedetect.platform import init_logger +from backend.scenedetect.frame_timecode import FrameTimecode +from backend.scenedetect.video_stream import VideoStream, VideoOpenFailure +from backend.scenedetect.scene_detector import SceneDetector +from backend.scenedetect.backends import (AVAILABLE_BACKENDS, VideoStreamCv2, VideoStreamAv, + VideoStreamMoviePy, VideoCaptureAdapter) +from backend.scenedetect.stats_manager import StatsManager, StatsFileCorrupt +from backend.scenedetect.scene_manager import SceneManager, save_images + +# Used for module identification and when printing version & about info +# (e.g. calling `scenedetect version` or `scenedetect about`). +__version__ = '0.6.2' + +init_logger() +logger = getLogger('pyscenedetect') + + +def open_video( + path: str, + framerate: Optional[float] = None, + backend: str = 'opencv', + **kwargs, +) -> VideoStream: + """Open a video at the given path. If `backend` is specified but not available on the current + system, OpenCV (`VideoStreamCv2`) will be used as a fallback. + + Arguments: + path: Path to video file to open. + framerate: Overrides detected framerate if set. + backend: Name of specific backend to use, if possible. See + :data:`scenedetect.backends.AVAILABLE_BACKENDS` for backends available on the current + system. If the backend fails to open the video, OpenCV will be used as a fallback. + kwargs: Optional named arguments to pass to the specified `backend` constructor for + overriding backend-specific options. + + Returns: + Backend object created with the specified video path. + + Raises: + :class:`VideoOpenFailure`: Constructing the VideoStream fails. If multiple backends have + been attempted, the error from the first backend will be returned. + """ + last_error: Exception = None + # If `backend` is available, try to open the video at `path` using it. + if backend in AVAILABLE_BACKENDS: + backend_type = AVAILABLE_BACKENDS[backend] + try: + logger.debug('Opening video with %s...', backend_type.BACKEND_NAME) + return backend_type(path, framerate, **kwargs) + except VideoOpenFailure as ex: + logger.warning('Failed to open video with %s: %s', backend_type.BACKEND_NAME, str(ex)) + if backend == VideoStreamCv2.BACKEND_NAME: + raise + last_error = ex + else: + logger.warning('Backend %s not available.', backend) + # Fallback to OpenCV if `backend` is unavailable, or specified backend failed to open `path`. + backend_type = VideoStreamCv2 + logger.warning('Trying another backend: %s', backend_type.BACKEND_NAME) + try: + return backend_type(path, framerate) + except VideoOpenFailure as ex: + logger.debug('Failed to open video: %s', str(ex)) + if last_error is None: + last_error = ex + # Propagate any exceptions raised from specified backend, instead of errors from the fallback. + assert last_error is not None + raise last_error + + +def scene_detect( + video_path: str, + detector: SceneDetector, + stats_file_path: Optional[str] = None, + show_progress: bool = False, + start_time: Optional[Union[str, float, int]] = None, + end_time: Optional[Union[str, float, int]] = None, + start_in_scene: bool = False, +) -> List[Tuple[FrameTimecode, FrameTimecode]]: + """Perform scene detection on a given video `path` using the specified `detector`. + + Arguments: + video_path: Path to input video (absolute or relative to working directory). + detector: A `SceneDetector` instance (see :mod:`scenedetect.detectors` for a full list + of detectors). + stats_file_path: Path to save per-frame metrics to for statistical analysis or to + determine a better threshold value. + show_progress: Show a progress bar with estimated time remaining. Default is False. + start_time: Starting point in video, in the form of a timecode ``HH:MM:SS[.nnn]`` (`str`), + number of seconds ``123.45`` (`float`), or number of frames ``200`` (`int`). + end_time: Starting point in video, in the form of a timecode ``HH:MM:SS[.nnn]`` (`str`), + number of seconds ``123.45`` (`float`), or number of frames ``200`` (`int`). + start_in_scene: Assume the video begins in a scene. This means that when detecting + fast cuts with `ContentDetector`, if no cuts are found, the resulting scene list + will contain a single scene spanning the entire video (instead of no scenes). + When detecting fades with `ThresholdDetector`, the beginning portion of the video + will always be included until the first fade-out event is detected. + + Returns: + List of scenes (pairs of :class:`FrameTimecode` objects). + + Raises: + :class:`VideoOpenFailure`: `video_path` could not be opened. + :class:`StatsFileCorrupt`: `stats_file_path` is an invalid stats file + ValueError: `start_time` or `end_time` are incorrectly formatted. + TypeError: `start_time` or `end_time` are invalid types. + """ + video = open_video(video_path) + if start_time is not None: + start_time = video.base_timecode + start_time + video.seek(start_time) + if end_time is not None: + end_time = video.base_timecode + end_time + # To reduce memory consumption when not required, we only add a StatsManager if we + # need to save frame metrics to disk. + scene_manager = SceneManager(StatsManager() if stats_file_path else None) + scene_manager.add_detector(detector) + scene_manager.detect_scenes( + video=video, + show_progress=show_progress, + end_time=end_time, + ) + if not scene_manager.stats_manager is None: + scene_manager.stats_manager.save_to_csv(csv_file=stats_file_path) + return scene_manager.get_scene_list(start_in_scene=start_in_scene) diff --git a/backend/scenedetect/__main__.py b/backend/scenedetect/__main__.py new file mode 100755 index 0000000..e76c0da --- /dev/null +++ b/backend/scenedetect/__main__.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +"""Entry point for PySceneDetect's command-line interface.""" + +from logging import getLogger +import sys + +from backend.scenedetect._cli import scenedetect +from backend.scenedetect._cli.context import CliContext +from backend.scenedetect._cli.controller import run_scenedetect + +from backend.scenedetect.platform import logging_redirect_tqdm, FakeTqdmLoggingRedirect + + +def main(): + """PySceneDetect command-line interface (CLI) entry point.""" + cli_ctx = CliContext() + try: + # Process command line arguments and subcommands to initialize the context. + scenedetect.main(obj=cli_ctx) # Parse CLI arguments with registered callbacks. + except SystemExit as exit: + help_command = any(arg in sys.argv for arg in ['-h', '--help']) + if help_command or exit.code != 0: + raise + + # If we get here, processing the command line and loading the context worked. Let's run + # the controller if we didn't process any help requests. + logger = getLogger('pyscenedetect') + # Ensure log messages don't conflict with any progress bars. If we're in quiet mode, where + # no progress bars get created, we instead create a fake context manager. This is done here + # to avoid needing a separate context manager at each point a progress bar is created. + log_redirect = FakeTqdmLoggingRedirect() if cli_ctx.quiet_mode else logging_redirect_tqdm( + loggers=[logger]) + + with log_redirect: + try: + run_scenedetect(cli_ctx) + except KeyboardInterrupt: + logger.info('Stopped.') + if __debug__: + raise + except BaseException as ex: + if __debug__: + raise + else: + logger.critical('Unhandled exception:', exc_info=ex) + raise SystemExit(1) + + +if __name__ == '__main__': + main() diff --git a/backend/scenedetect/_cli/__init__.py b/backend/scenedetect/_cli/__init__.py new file mode 100644 index 0000000..9a6fdb4 --- /dev/null +++ b/backend/scenedetect/_cli/__init__.py @@ -0,0 +1,1145 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +"""Implementation of the PySceneDetect application itself (the `scenedetect` command). The main CLI +entry-point function is :func:scenedetect_cli, which is a chained command group. + +Commands are first parsed into a context (`CliContext`), which is then passed to a controller which +performs scene detection and other required actions (`run_scenedetect`). +""" + +# Some parts of this file need word wrap to be displayed. +# pylint: disable=line-too-long + +import inspect +import logging +from typing import AnyStr, Optional, Tuple + +import click + +import scenedetect +from scenedetect.detectors import AdaptiveDetector, ContentDetector, ThresholdDetector +from scenedetect.backends import AVAILABLE_BACKENDS +from scenedetect.platform import get_system_version_info + +from scenedetect._cli.config import CHOICE_MAP, CONFIG_FILE_PATH, CONFIG_MAP +from scenedetect._cli.context import CliContext, USER_CONFIG + +_PROGRAM_VERSION = scenedetect.__version__ +"""Used to avoid name conflict with named `scenedetect` command below.""" + +logger = logging.getLogger('pyscenedetect') + +_LINE_SEPARATOR = '-' * 72 + +# About & copyright message string shown for the 'about' CLI command (scenedetect about). +_ABOUT_STRING = """ +Site: http://scenedetect.com/ +Docs: http://manual.scenedetect.com/ +Code: https://github.com/Breakthrough/PySceneDetect/ + +Copyright (C) 2014-2023 Brandon Castellano. All rights reserved. + +PySceneDetect is released under the BSD 3-Clause license. See the +included LICENSE file or visit the PySceneDetect website for details. +This software uses the following third-party components: + + > NumPy [Copyright (C) 2018, Numpy Developers] + > OpenCV [Copyright (C) 2018, OpenCV Team] + > click [Copyright (C) 2018, Armin Ronacher] + > simpletable [Copyright (C) 2014 Matheus Vieira Portela] + +This software may also invoke the following third-party executables: + + > FFmpeg [Copyright (C) 2018, Fabrice Bellard] + > mkvmerge [Copyright (C) 2005-2016, Matroska] + +If included with your distribution of PySceneDetect, see the included +LICENSE-FFMPEG and LICENSE-MKVMERGE or visit: + [ https://scenedetect.com/copyright/ ] + +FFmpeg and mkvmerge are distributed only with certain PySceneDetect +releases, in order to allow for automatic video splitting capability. +If they were not included with your distribution, they can usually be +installed from your operating system's package manager, or downloaded +from the following URLs: + + FFmpeg: [ https://ffmpeg.org/download.html ] + mkvmerge: [ https://mkvtoolnix.download/downloads.html ] + (Note that mkvmerge is a part of the mkvtoolnix package.) + +Once installed, ensure the respective program can be accessed from the +same location running PySceneDetect by calling the `ffmpeg` or +`mkvmerge` command from a terminal/command prompt. + +PySceneDetect will automatically use whichever program is available on +the computer, depending on the specified command-line options. + +Additionally, certain Windows distributions may include a compiled +Python distribution. For license information regarding the distributed +version of Python, see the included LICENSE-PYTHON file for details, +or visit the following URL: [ https://docs.python.org/3/license.html ] + +THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. +""" + + +class _Command(click.Command): + """Custom formatting for commands.""" + + def format_help(self, ctx: click.Context, formatter: click.HelpFormatter) -> None: + """Writes the help into the formatter if it exists.""" + if ctx.parent: + formatter.write(click.style('`%s` Command' % ctx.command.name, fg='cyan')) + formatter.write_paragraph() + formatter.write(click.style(_LINE_SEPARATOR, fg='cyan')) + formatter.write_paragraph() + else: + formatter.write(click.style(_LINE_SEPARATOR, fg='yellow')) + formatter.write_paragraph() + formatter.write(click.style('PySceneDetect Help', fg='yellow')) + formatter.write_paragraph() + formatter.write(click.style(_LINE_SEPARATOR, fg='yellow')) + formatter.write_paragraph() + + self.format_usage(ctx, formatter) + self.format_help_text(ctx, formatter) + self.format_options(ctx, formatter) + self.format_epilog(ctx, formatter) + + def format_help_text(self, ctx: click.Context, formatter: click.HelpFormatter) -> None: + """Writes the help text to the formatter if it exists.""" + if self.help: + base_command = (ctx.parent.info_name if ctx.parent is not None else ctx.info_name) + formatted_help = self.help.format( + scenedetect=base_command, scenedetect_with_video='%s -i video.mp4' % base_command) + text = inspect.cleandoc(formatted_help).partition("\f")[0] + formatter.write_paragraph() + formatter.write_text(text) + + def format_epilog(self, ctx: click.Context, formatter: click.HelpFormatter) -> None: + """Writes the epilog into the formatter if it exists.""" + if self.epilog: + epilog = inspect.cleandoc(self.epilog) + formatter.write_paragraph() + formatter.write_text(epilog) + + +class _CommandGroup(_Command, click.Group): + """Custom formatting for command groups.""" + pass + + +def _print_command_help(ctx: click.Context, command: click.Command): + """Print help/usage for a given command. Modifies `ctx` in-place.""" + ctx.info_name = command.name + ctx.command = command + click.echo('') + click.echo(command.get_help(ctx)) + + +@click.group( + cls=_CommandGroup, + chain=True, + context_settings=dict(help_option_names=['-h', '--help']), + invoke_without_command=True, + epilog="""Type "scenedetect [command] --help" for command usage. See https://scenedetect.com/docs/ for online docs.""" +) +# We cannot make this a required argument otherwise we will reject commands of the form +# `scenedetect help scene_detect-content` or `scenedetect scene_detect-content --help`. +@click.option( + '--input', + '-i', + multiple=False, + required=False, + metavar='VIDEO', + type=click.STRING, + help='[REQUIRED] Input video file. Image sequences and URLs are supported.', +) +@click.option( + '--output', + '-o', + multiple=False, + required=False, + metavar='DIR', + type=click.Path(exists=False, dir_okay=True, writable=True, resolve_path=True), + help='Output directory for created files. If unset, working directory will be used. May be overridden by command options.%s' + % (USER_CONFIG.get_help_string("global", "output", show_default=False)), +) +@click.option( + '--config', + '-c', + metavar='FILE', + type=click.Path(exists=True, file_okay=True, readable=True, resolve_path=False), + help='Path to config file. If unset, tries to load config from %s' % (CONFIG_FILE_PATH), +) +@click.option( + '--stats', + '-s', + metavar='CSV', + type=click.Path(exists=False, file_okay=True, writable=True, resolve_path=False), + help='Stats file (.csv) to write frame metrics. Existing files will be overwritten. Used for tuning detection parameters and data analysis.', +) +@click.option( + '--framerate', + '-f', + metavar='FPS', + type=click.FLOAT, + default=None, + help='Override framerate with value as frames/sec.', +) +@click.option( + '--min-scene-len', + '-m', + metavar='TIMECODE', + type=click.STRING, + default=None, + help='Minimum length of any scene. TIMECODE can be specified as number of frames (-m=10), time in seconds followed by "s" (-m=2.5s), or timecode (-m=00:02:53.633).%s' + % USER_CONFIG.get_help_string("global", "min-scene-len"), +) +@click.option( + '--drop-short-scenes', + is_flag=True, + flag_value=True, + help='Drop scenes shorter than -m/--min-scene-len, instead of combining with neighbors.%s' % + (USER_CONFIG.get_help_string('global', 'drop-short-scenes')), +) +@click.option( + '--merge-last-scene', + is_flag=True, + flag_value=True, + help='Merge last scene with previous if shorter than -m/--min-scene-len.%s' % + (USER_CONFIG.get_help_string('global', 'merge-last-scene')), +) +@click.option( + '--backend', + '-b', + metavar='BACKEND', + type=click.Choice(CHOICE_MAP["global"]["backend"]), + default=None, + help='Backend to use for video input. Backend options can be set using a config file (-c/--config). [available: %s]%s' + % (', '.join(AVAILABLE_BACKENDS.keys()), USER_CONFIG.get_help_string("global", "backend")), +) +@click.option( + '--downscale', + '-d', + metavar='N', + type=click.INT, + default=None, + help='Integer factor to downscale video by before processing. If unset, value is selected based on resolution. Set -d=1 to disable downscaling.%s' + % (USER_CONFIG.get_help_string("global", "downscale", show_default=False)), +) +@click.option( + '--frame-skip', + '-fs', + metavar='N', + type=click.INT, + default=None, + help='Skip N frames during processing. Reduces processing speed at expense of accuracy. -fs=1 skips every other frame processing 50%% of the video, -fs=2 processes 33%% of the video frames, -fs=3 processes 25%%, etc... %s' + % USER_CONFIG.get_help_string("global", "frame-skip"), +) +@click.option( + '--verbosity', + '-v', + metavar='LEVEL', + type=click.Choice(CHOICE_MAP['global']['verbosity'], False), + default=None, + help='Amount of information to show. LEVEL must be one of: %s. Overrides -q/--quiet.%s' % + (', '.join(CHOICE_MAP["global"]["verbosity"]), USER_CONFIG.get_help_string( + "global", "verbosity")), +) +@click.option( + '--logfile', + '-l', + metavar='FILE', + type=click.Path(exists=False, file_okay=True, writable=True, resolve_path=False), + help='Save debug log to FILE. Appends to existing file if present.', +) +@click.option( + '--quiet', + '-q', + is_flag=True, + flag_value=True, + help='Suppress output to terminal/stdout. Equivalent to setting --verbosity=none.', +) +@click.pass_context +# pylint: disable=redefined-builtin +def scenedetect( + ctx: click.Context, + input: Optional[AnyStr], + output: Optional[AnyStr], + stats: Optional[AnyStr], + config: Optional[AnyStr], + framerate: Optional[float], + min_scene_len: Optional[str], + drop_short_scenes: bool, + merge_last_scene: bool, + backend: Optional[str], + downscale: Optional[int], + frame_skip: Optional[int], + verbosity: Optional[str], + logfile: Optional[AnyStr], + quiet: bool, +): + """PySceneDetect is a scene cut/transition detection program. PySceneDetect takes an input video, runs detection on it, and uses the resulting scene information to generate output. The syntax for using PySceneDetect is: + + {scenedetect_with_video} [detector] [commands] + +For [detector] use `scene_detect-adaptive` or `scene_detect-content` to find fast cuts, and `scene_detect-threshold` for fades in/out. If [detector] is not specified, a default detector will be used. + +Examples: + +Split video wherever a new scene is detected: + + {scenedetect_with_video} split-video + +Save scene list in CSV format with images at the start, middle, and end of each scene: + + {scenedetect_with_video} list-scenes save-images + +Skip the first 10 seconds of the input video: + + {scenedetect_with_video} time --start 10s scene_detect-content + +Show summary of all options and commands: + + {scenedetect} --help + +Global options (e.g. -i/--input, -c/--config) must be specified before any commands and their options. The order of commands is not strict, but each command must only be specified once. +""" + assert isinstance(ctx.obj, CliContext) + ctx.obj.handle_options( + input_path=input, + output=output, + framerate=framerate, + stats_file=stats, + downscale=downscale, + frame_skip=frame_skip, + min_scene_len=min_scene_len, + drop_short_scenes=drop_short_scenes, + merge_last_scene=merge_last_scene, + backend=backend, + quiet=quiet, + logfile=logfile, + config=config, + stats=stats, + verbosity=verbosity, + ) + + +# pylint: enable=redefined-builtin + + +@click.command('help', cls=_Command) +@click.argument( + 'command_name', + required=False, + type=click.STRING, +) +@click.pass_context +def help_command(ctx: click.Context, command_name: str): + """Print help for command (`help [command]`).""" + assert isinstance(ctx.obj, CliContext) + assert isinstance(ctx.parent.command, click.MultiCommand) + parent_command = ctx.parent.command + all_commands = set(parent_command.list_commands(ctx)) + if command_name is not None: + if not command_name in all_commands: + error_strs = [ + 'unknown command. List of valid commands:', + ' %s' % ', '.join(sorted(all_commands)) + ] + raise click.BadParameter('\n'.join(error_strs), param_hint='command') + click.echo('') + _print_command_help(ctx, parent_command.get_command(ctx, command_name)) + else: + click.echo(ctx.parent.get_help()) + for command in sorted(all_commands): + _print_command_help(ctx, parent_command.get_command(ctx, command)) + ctx.exit() + + +@click.command('about', cls=_Command, add_help_option=False) +@click.pass_context +def about_command(ctx: click.Context): + """Print license/copyright info.""" + assert isinstance(ctx.obj, CliContext) + click.echo('') + click.echo(click.style(_LINE_SEPARATOR, fg='cyan')) + click.echo(click.style(' About PySceneDetect %s' % _PROGRAM_VERSION, fg='yellow')) + click.echo(click.style(_LINE_SEPARATOR, fg='cyan')) + click.echo(_ABOUT_STRING) + ctx.exit() + + +@click.command('version', cls=_Command, add_help_option=False) +@click.pass_context +def version_command(ctx: click.Context): + """Print PySceneDetect version.""" + assert isinstance(ctx.obj, CliContext) + click.echo('') + click.echo(get_system_version_info()) + ctx.exit() + + +@click.command('time', cls=_Command) +@click.option( + '--start', + '-s', + metavar='TIMECODE', + type=click.STRING, + default=None, + help='Time in video to start detection. TIMECODE can be specified as number of frames (--start=100 for frame 100), time in seconds followed by "s" (--start=100s for 100 seconds), or timecode (--start=00:01:40 for 1m40s).', +) +@click.option( + '--duration', + '-d', + metavar='TIMECODE', + type=click.STRING, + default=None, + help='Maximum time in video to process. TIMECODE format is the same as other arguments. Mutually exclusive with -e/--end.', +) +@click.option( + '--end', + '-e', + metavar='TIMECODE', + type=click.STRING, + default=None, + help='Time in video to end detecting scenes. TIMECODE format is the same as other arguments. Mutually exclusive with -d/--duration', +) +@click.pass_context +def time_command( + ctx: click.Context, + start: Optional[str], + duration: Optional[str], + end: Optional[str], +): + """Set start/end/duration of input video. + +Values can be specified as frames (NNNN), seconds (NNNN.NNs), or timecode (HH:MM:SS.nnn). For example, to process only the first minute of a video: + + {scenedetect_with_video} time --end 00:01:00 + + {scenedetect_with_video} time --duration 60s + +Note that --end and --duration are mutually exclusive (i.e. only one of the two can be set). Lastly, the following is an example using absolute frame numbers to process frames 0 through 1000: + + {scenedetect_with_video} time --start 0 --end 1000 +""" + assert isinstance(ctx.obj, CliContext) + ctx.obj.handle_time( + start=start, + duration=duration, + end=end, + ) + + +@click.command('scene_detect-content', cls=_Command) +@click.option( + '--threshold', + '-t', + metavar='VAL', + type=click.FloatRange(CONFIG_MAP['scene_detect-content']['threshold'].min_val, + CONFIG_MAP['scene_detect-content']['threshold'].max_val), + default=None, + help='Threshold (float) that frame score must exceed to trigger a cut. Refers to "content_val" in stats file.%s' + % (USER_CONFIG.get_help_string("scene_detect-content", "threshold")), +) +@click.option( + '--weights', + '-w', + type=(float, float, float, float), + default=None, + metavar='HUE SAT LUM EDGE', + help='Weights of 4 components used to calculate frame score from (delta_hue, delta_sat, delta_lum, delta_edges).%s' + % (USER_CONFIG.get_help_string("scene_detect-content", "weights")), +) +@click.option( + '--luma-only', + '-l', + is_flag=True, + flag_value=True, + help='Only use luma (brightness) channel. Useful for greyscale videos. Equivalent to setting "-w 0 0 1 0".%s' + % (USER_CONFIG.get_help_string("scene_detect-content", "luma-only")), +) +@click.option( + '--kernel-size', + '-k', + metavar='N', + type=click.INT, + default=None, + help='Size of kernel for expanding detected edges. Must be odd integer greater than or equal to 3. If unset, kernel size is estimated using video resolution.%s' + % (USER_CONFIG.get_help_string("scene_detect-content", "kernel-size")), +) +@click.option( + '--min-scene-len', + '-m', + metavar='TIMECODE', + type=click.STRING, + default=None, + help='Minimum length of any scene. Overrides global option -m/--min-scene-len. TIMECODE can be specified in frames (-m=100), in seconds with `s` suffix (-m=3.5s), or timecode (-m=00:01:52.778).%s' + % ('' if USER_CONFIG.is_default('scene_detect-content', 'min-scene-len') else + USER_CONFIG.get_help_string('scene_detect-content', 'min-scene-len')), +) +@click.pass_context +def detect_content_command( + ctx: click.Context, + threshold: Optional[float], + weights: Optional[Tuple[float, float, float, float]], + luma_only: bool, + kernel_size: Optional[int], + min_scene_len: Optional[str], +): + """Perform content detection algorithm on input video. + +For each frame, a score from 0 to 255.0 is calculated which represents the difference in content between the current and previous frame (higher = more different). A cut is generated when a frame score exceeds -t/--threshold. Frame scores are saved under the "content_val" column in a statsfile. + +Scores are calculated from several components which are also recorded in the statsfile: + + - *delta_hue*: Difference between pixel hue values of adjacent frames. + + - *delta_sat*: Difference between pixel saturation values of adjacent frames. + + - *delta_lum*: Difference between pixel luma (brightness) values of adjacent frames. + + - *delta_edges*: Difference between calculated edges of adjacent frames. Typically larger than other components, so threshold may need to be increased to compensate. + +Once calculated, these components are multiplied by the specified -w/--weights to calculate the final frame score ("content_val"). Weights are set as a set of 4 numbers in the form (*delta_hue*, *delta_sat*, *delta_lum*, *delta_edges*). For example, "--weights 1.0 0.5 1.0 0.2 --threshold 32" is a good starting point for trying edge detection. The final sum is normalized by the weight of all components, so they need not equal 100%. Edge detection is disabled by default to improve performance. + +Examples: + + {scenedetect_with_video} scene_detect-content + + {scenedetect_with_video} scene_detect-content --threshold 27.5 +""" + assert isinstance(ctx.obj, CliContext) + detector_args = ctx.obj.get_detect_content_params( + threshold=threshold, + luma_only=luma_only, + min_scene_len=min_scene_len, + weights=weights, + kernel_size=kernel_size) + logger.debug('Adding detector: ContentDetector(%s)', detector_args) + ctx.obj.add_detector(ContentDetector(**detector_args)) + + +@click.command('scene_detect-adaptive', cls=_Command) +@click.option( + '--threshold', + '-t', + metavar='VAL', + type=click.FLOAT, + default=None, + help='Threshold (float) that frame score must exceed to trigger a cut. Refers to "adaptive_ratio" in stats file.%s' + % (USER_CONFIG.get_help_string('scene_detect-adaptive', 'threshold')), +) +@click.option( + '--min-content-val', + '-c', + metavar='VAL', + type=click.FLOAT, + default=None, + help='Minimum threshold (float) that "content_val" must exceed to trigger a cut.%s' % + (USER_CONFIG.get_help_string('scene_detect-adaptive', 'min-content-val')), +) +@click.option( + '--min-delta-hsv', + '-d', + metavar='VAL', + type=click.FLOAT, + default=None, + help='[DEPRECATED] Use -c/--min-content-val instead.%s' % + (USER_CONFIG.get_help_string('scene_detect-adaptive', 'min-delta-hsv')), + hidden=True, +) +@click.option( + '--frame-window', + '-f', + metavar='VAL', + type=click.INT, + default=None, + help='Size of window to scene_detect deviations from mean. Represents how many frames before/after the current one to use for mean.%s' + % (USER_CONFIG.get_help_string('scene_detect-adaptive', 'frame-window')), +) +@click.option( + '--weights', + '-w', + type=(float, float, float, float), + default=None, + help='Weights of 4 components ("delta_hue", "delta_sat", "delta_lum", "delta_edges") used to calculate "content_val".%s' + % (USER_CONFIG.get_help_string("scene_detect-content", "weights")), +) +@click.option( + '--luma-only', + '-l', + is_flag=True, + flag_value=True, + help='Only use luma (brightness) channel. Useful for greyscale videos. Equivalent to "--weights 0 0 1 0".%s' + % (USER_CONFIG.get_help_string("scene_detect-content", "luma-only")), +) +@click.option( + '--kernel-size', + '-k', + metavar='N', + type=click.INT, + default=None, + help='Size of kernel for expanding detected edges. Must be odd number >= 3. If unset, size is estimated using video resolution.%s' + % (USER_CONFIG.get_help_string("scene_detect-content", "kernel-size")), +) +@click.option( + '--min-scene-len', + '-m', + metavar='TIMECODE', + type=click.STRING, + default=None, + help='Minimum length of any scene. Overrides global option -m/--min-scene-len. TIMECODE can be specified in frames (-m=100), in seconds with `s` suffix (-m=3.5s), or timecode (-m=00:01:52.778).%s' + % ('' if USER_CONFIG.is_default('scene_detect-adaptive', 'min-scene-len') else + USER_CONFIG.get_help_string('scene_detect-adaptive', 'min-scene-len')), +) +@click.pass_context +def detect_adaptive_command( + ctx: click.Context, + threshold: Optional[float], + min_content_val: Optional[float], + min_delta_hsv: Optional[float], + frame_window: Optional[int], + weights: Optional[Tuple[float, float, float, float]], + luma_only: bool, + kernel_size: Optional[int], + min_scene_len: Optional[str], +): + """Perform adaptive detection algorithm on input video. + +Two-pass algorithm that first calculates frame scores with `scene_detect-content`, and then applies a rolling average when processing the result. This can help mitigate false detections in situations such as camera movement. + +Examples: + + {scenedetect_with_video} scene_detect-adaptive + + {scenedetect_with_video} scene_detect-adaptive --threshold 3.2 +""" + assert isinstance(ctx.obj, CliContext) + detector_args = ctx.obj.get_detect_adaptive_params( + threshold=threshold, + min_content_val=min_content_val, + min_delta_hsv=min_delta_hsv, + frame_window=frame_window, + luma_only=luma_only, + min_scene_len=min_scene_len, + weights=weights, + kernel_size=kernel_size, + ) + logger.debug('Adding detector: AdaptiveDetector(%s)', detector_args) + ctx.obj.add_detector(AdaptiveDetector(**detector_args)) + + +@click.command('scene_detect-threshold', cls=_Command) +@click.option( + '--threshold', + '-t', + metavar='VAL', + type=click.FloatRange(CONFIG_MAP['scene_detect-threshold']['threshold'].min_val, + CONFIG_MAP['scene_detect-threshold']['threshold'].max_val), + default=None, + help='Threshold (integer) that frame score must exceed to start a new scene. Refers to "delta_rgb" in stats file.%s' + % (USER_CONFIG.get_help_string('scene_detect-threshold', 'threshold')), +) +@click.option( + '--fade-bias', + '-f', + metavar='PERCENT', + type=click.FloatRange(CONFIG_MAP['scene_detect-threshold']['fade-bias'].min_val, + CONFIG_MAP['scene_detect-threshold']['fade-bias'].max_val), + default=None, + help='Percent (%%) from -100 to 100 of timecode skew of cut placement. -100 indicates the start frame, +100 indicates the end frame, and 0 is the middle of both.%s' + % (USER_CONFIG.get_help_string('scene_detect-threshold', 'fade-bias')), +) +@click.option( + '--add-last-scene', + '-l', + is_flag=True, + flag_value=True, + help='If set and video ends after a fade-out event, generate a final cut at the last fade-out position.%s' + % (USER_CONFIG.get_help_string('scene_detect-threshold', 'add-last-scene')), +) +@click.option( + '--min-scene-len', + '-m', + metavar='TIMECODE', + type=click.STRING, + default=None, + help='Minimum length of any scene. Overrides global option -m/--min-scene-len. TIMECODE can be specified in frames (-m=100), in seconds with `s` suffix (-m=3.5s), or timecode (-m=00:01:52.778).%s' + % ('' if USER_CONFIG.is_default('scene_detect-threshold', 'min-scene-len') else + USER_CONFIG.get_help_string('scene_detect-threshold', 'min-scene-len')), +) +@click.pass_context +def detect_threshold_command( + ctx: click.Context, + threshold: Optional[float], + fade_bias: Optional[float], + add_last_scene: bool, + min_scene_len: Optional[str], +): + """Perform threshold detection algorithm on input video. + +Detects fade-in and fade-out events using average pixel values. Resulting cuts are placed between adjacent fade-out and fade-in events. + +Examples: + + {scenedetect_with_video} scene_detect-threshold + + {scenedetect_with_video} scene_detect-threshold --threshold 15 +""" + assert isinstance(ctx.obj, CliContext) + detector_args = ctx.obj.get_detect_threshold_params( + threshold=threshold, + fade_bias=fade_bias, + add_last_scene=add_last_scene, + min_scene_len=min_scene_len, + ) + logger.debug('Adding detector: ThresholdDetector(%s)', detector_args) + ctx.obj.add_detector(ThresholdDetector(**detector_args)) + + +@click.command('load-scenes', cls=_Command) +@click.option( + '--input', + '-i', + multiple=False, + metavar='FILE', + required=True, + type=click.Path(exists=True, file_okay=True, readable=True, resolve_path=True), + help='Scene list to read cut information from.') +@click.option( + '--start-col-name', + '-c', + metavar='STRING', + type=click.STRING, + default=None, + help='Name of column used to mark scene cuts.%s' % + (USER_CONFIG.get_help_string('load-scenes', 'start-col-name'))) +@click.pass_context +def load_scenes_command(ctx: click.Context, input: Optional[str], start_col_name: Optional[str]): + """Load scenes from CSV instead of detecting. Can be used with CSV generated by `list-scenes`. Scenes are loaded using the specified column as cut locations (frame number or timecode). + +Examples: + + {scenedetect_with_video} load-scenes -i scenes.csv + + {scenedetect_with_video} load-scenes -i scenes.csv --start-col-name "Start Timecode" +""" + assert isinstance(ctx.obj, CliContext) + logger.debug('Loading scenes from %s (start_col_name = %s)', input, start_col_name) + ctx.obj.handle_load_scenes(input=input, start_col_name=start_col_name) + + +@click.command('export-html', cls=_Command) +@click.option( + '--filename', + '-f', + metavar='NAME', + default='$VIDEO_NAME-Scenes.html', + type=click.STRING, + help='Filename format to use for the scene list HTML file. You can use the $VIDEO_NAME macro in the file name. Note that you may have to wrap the format name using single quotes.%s' + % (USER_CONFIG.get_help_string('export-html', 'filename')), +) +@click.option( + '--no-images', + is_flag=True, + flag_value=True, + help='Export the scene list including or excluding the saved images.%s' % + (USER_CONFIG.get_help_string('export-html', 'no-images')), +) +@click.option( + '--image-width', + '-w', + metavar='pixels', + type=click.INT, + help='Width in pixels of the images in the resulting HTML table.%s' % + (USER_CONFIG.get_help_string('export-html', 'image-width', show_default=False)), +) +@click.option( + '--image-height', + '-h', + metavar='pixels', + type=click.INT, + help='Height in pixels of the images in the resulting HTML table.%s' % + (USER_CONFIG.get_help_string('export-html', 'image-height', show_default=False)), +) +@click.pass_context +def export_html_command( + ctx: click.Context, + filename: Optional[AnyStr], + no_images: bool, + image_width: Optional[int], + image_height: Optional[int], +): + """Export scene list to HTML file. Requires save-images unless --no-images is specified.""" + assert isinstance(ctx.obj, CliContext) + ctx.obj.handle_export_html( + filename=filename, + no_images=no_images, + image_width=image_width, + image_height=image_height, + ) + + +@click.command('list-scenes', cls=_Command) +@click.option( + '--output', + '-o', + metavar='DIR', + type=click.Path(exists=False, dir_okay=True, writable=True, resolve_path=False), + help='Output directory to save videos to. Overrides global option -o/--output if set.%s' % + (USER_CONFIG.get_help_string('list-scenes', 'output', show_default=False)), +) +@click.option( + '--filename', + '-f', + metavar='NAME', + default='$VIDEO_NAME-Scenes.csv', + type=click.STRING, + help='Filename format to use for the scene list CSV file. You can use the $VIDEO_NAME macro in the file name. Note that you may have to wrap the name using single quotes or use escape characters (e.g. -f=\$VIDEO_NAME-Scenes.csv).%s' + % (USER_CONFIG.get_help_string('list-scenes', 'filename')), +) +@click.option( + '--no-output-file', + '-n', + is_flag=True, + flag_value=True, + help='Only print scene list.%s' % + (USER_CONFIG.get_help_string('list-scenes', 'no-output-file')), +) +@click.option( + '--quiet', + '-q', + is_flag=True, + flag_value=True, + help='Suppress printing scene list.%s' % (USER_CONFIG.get_help_string('list-scenes', 'quiet')), +) +@click.option( + '--skip-cuts', + '-s', + is_flag=True, + flag_value=True, + help='Skip cutting list as first row in the CSV file. Set for RFC 4180 compliant output.%s' % + (USER_CONFIG.get_help_string('list-scenes', 'skip-cuts')), +) +@click.pass_context +def list_scenes_command( + ctx: click.Context, + output: Optional[AnyStr], + filename: Optional[AnyStr], + no_output_file: bool, + quiet: bool, + skip_cuts: bool, +): + """Create scene list CSV file (will be named $VIDEO_NAME-Scenes.csv by default).""" + assert isinstance(ctx.obj, CliContext) + ctx.obj.handle_list_scenes( + output=output, + filename=filename, + no_output_file=no_output_file, + quiet=quiet, + skip_cuts=skip_cuts, + ) + + +@click.command('split-video', cls=_Command) +@click.option( + '--output', + '-o', + metavar='DIR', + type=click.Path(exists=False, dir_okay=True, writable=True, resolve_path=False), + help='Output directory to save videos to. Overrides global option -o/--output if set.%s' % + (USER_CONFIG.get_help_string('split-video', 'output', show_default=False)), +) +@click.option( + '--filename', + '-f', + metavar='NAME', + default=None, + type=click.STRING, + help='File name format to use when saving videos, with or without extension. You can use $VIDEO_NAME and $SCENE_NUMBER macros in the filename. You may have to wrap the format in single quotes or use escape characters to avoid variable expansion (e.g. -f=\\$VIDEO_NAME-Scene-\\$SCENE_NUMBER).%s' + % (USER_CONFIG.get_help_string('split-video', 'filename')), +) +@click.option( + '--quiet', + '-q', + is_flag=True, + flag_value=True, + help='Hide output from external video splitting tool.%s' % + (USER_CONFIG.get_help_string('split-video', 'quiet')), +) +@click.option( + '--copy', + '-c', + is_flag=True, + flag_value=True, + help='Copy instead of re-encode. Faster but less precise. Equivalent to: --args="-map 0 -c:v copy -c:a copy"%s' + % (USER_CONFIG.get_help_string('split-video', 'copy')), +) +@click.option( + '--high-quality', + '-hq', + is_flag=True, + flag_value=True, + help='Encode video with higher quality, overrides -f option if present. Equivalent to: --rate-factor=17 --preset=slow%s' + % (USER_CONFIG.get_help_string('split-video', 'high-quality')), +) +@click.option( + '--rate-factor', + '-crf', + metavar='RATE', + default=None, + type=click.IntRange(CONFIG_MAP['split-video']['rate-factor'].min_val, + CONFIG_MAP['split-video']['rate-factor'].max_val), + help='Video encoding quality (x264 constant rate factor), from 0-100, where lower is higher quality (larger output). 0 indicates lossless.%s' + % (USER_CONFIG.get_help_string('split-video', 'rate-factor')), +) +@click.option( + '--preset', + '-p', + metavar='LEVEL', + default=None, + type=click.Choice(CHOICE_MAP['split-video']['preset']), + help='Video compression quality (x264 preset). Can be one of: %s. Faster modes take less time but output may be larger.%s' + % (', '.join( + CHOICE_MAP['split-video']['preset']), USER_CONFIG.get_help_string('split-video', 'preset')), +) +@click.option( + '--args', + '-a', + metavar='ARGS', + type=click.STRING, + default=None, + help='Override codec arguments passed to FFmpeg when splitting scenes. Use double quotes (") around arguments. Must specify at least audio/video codec.%s' + % (USER_CONFIG.get_help_string('split-video', 'args')), +) +@click.option( + '--mkvmerge', + '-m', + is_flag=True, + flag_value=True, + help='Split video using mkvmerge. Faster than re-encoding, but less precise. If set, options other than -f/--filename, -q/--quiet and -o/--output will be ignored. Note that mkvmerge automatically appends the $SCENE_NUMBER suffix.%s' + % (USER_CONFIG.get_help_string('split-video', 'mkvmerge')), +) +@click.pass_context +def split_video_command( + ctx: click.Context, + output: Optional[AnyStr], + filename: Optional[AnyStr], + quiet: bool, + copy: bool, + high_quality: bool, + rate_factor: Optional[int], + preset: Optional[str], + args: Optional[str], + mkvmerge: bool, +): + """Split input video using ffmpeg or mkvmerge. + +Examples: + + {scenedetect_with_video} split-video + + {scenedetect_with_video} split-video --copy + + {scenedetect_with_video} split-video --filename \$VIDEO_NAME-Clip-\$SCENE_NUMBER +""" + assert isinstance(ctx.obj, CliContext) + ctx.obj.handle_split_video( + output=output, + filename=filename, + quiet=quiet, + copy=copy, + high_quality=high_quality, + rate_factor=rate_factor, + preset=preset, + args=args, + mkvmerge=mkvmerge, + ) + + +@click.command('save-images', cls=_Command) +@click.option( + '--output', + '-o', + metavar='DIR', + type=click.Path(exists=False, dir_okay=True, writable=True, resolve_path=False), + help='Output directory for images. Overrides global option -o/--output if set.%s' % + (USER_CONFIG.get_help_string('save-images', 'output', show_default=False)), +) +@click.option( + '--filename', + '-f', + metavar='NAME', + default=None, + type=click.STRING, + help='Filename format *without* extension to use when saving images. You can use the $VIDEO_NAME, $SCENE_NUMBER, $IMAGE_NUMBER, and $FRAME_NUMBER macros in the file name. You may have to use escape characters (e.g. -f=\\$SCENE_NUMBER-Image-\\$IMAGE_NUMBER) or single quotes.%s' + % (USER_CONFIG.get_help_string('save-images', 'filename')), +) +@click.option( + '--num-images', + '-n', + metavar='N', + default=None, + type=click.INT, + help='Number of images to generate per scene. Will always include start/end frame, unless -n=1, in which case the image will be the frame at the mid-point of the scene.%s' + % (USER_CONFIG.get_help_string('save-images', 'num-images')), +) +@click.option( + '--jpeg', + '-j', + is_flag=True, + flag_value=True, + help='Set output format to JPEG (default).%s' % + (USER_CONFIG.get_help_string('save-images', 'format', show_default=False)), +) +@click.option( + '--webp', + '-w', + is_flag=True, + flag_value=True, + help='Set output format to WebP', +) +@click.option( + '--quality', + '-q', + metavar='Q', + default=None, + type=click.IntRange(0, 100), + help='JPEG/WebP encoding quality, from 0-100 (higher indicates better quality). For WebP, 100 indicates lossless. [default: JPEG: 95, WebP: 100]%s' + % (USER_CONFIG.get_help_string('save-images', 'quality', show_default=False)), +) +@click.option( + '--png', + '-p', + is_flag=True, + flag_value=True, + help='Set output format to PNG.', +) +@click.option( + '--compression', + '-c', + metavar='C', + default=None, + type=click.IntRange(0, 9), + help='PNG compression rate, from 0-9. Higher values produce smaller files but result in longer compression time. This setting does not affect image quality, only file size.%s' + % (USER_CONFIG.get_help_string('save-images', 'compression')), +) +@click.option( + '-m', + '--frame-margin', + metavar='N', + default=None, + type=click.INT, + help='Number of frames to ignore at beginning/end of scenes when saving images. Controls temporal padding on scene boundaries.%s' + % (USER_CONFIG.get_help_string('save-images', 'num-images')), +) +@click.option( + '--scale', + '-s', + metavar='S', + default=None, + type=click.FLOAT, + help='Factor to scale images by. Ignored if -W/--width or -H/--height is set.%s' % + (USER_CONFIG.get_help_string('save-images', 'scale', show_default=False)), +) +@click.option( + '--height', + '-H', + metavar='H', + default=None, + type=click.INT, + help='Height (pixels) of images.%s' % + (USER_CONFIG.get_help_string('save-images', 'height', show_default=False)), +) +@click.option( + '--width', + '-W', + metavar='W', + default=None, + type=click.INT, + help='Width (pixels) of images.%s' % + (USER_CONFIG.get_help_string('save-images', 'width', show_default=False)), +) +@click.pass_context +def save_images_command( + ctx: click.Context, + output: Optional[AnyStr], + filename: Optional[AnyStr], + num_images: Optional[int], + jpeg: bool, + webp: bool, + quality: Optional[int], + png: bool, + compression: Optional[int], + frame_margin: Optional[int], + scale: Optional[float], + height: Optional[int], + width: Optional[int], +): + """Create images for each detected scene. + +Images can be resized + +Examples: + + {scenedetect_with_video} save-images + + {scenedetect_with_video} save-images --width 1024 + + {scenedetect_with_video} save-images --filename \$SCENE_NUMBER-img\$IMAGE_NUMBER +""" + assert isinstance(ctx.obj, CliContext) + ctx.obj.handle_save_images( + num_images=num_images, + output=output, + filename=filename, + jpeg=jpeg, + webp=webp, + quality=quality, + png=png, + compression=compression, + frame_margin=frame_margin, + scale=scale, + height=height, + width=width, + ) + + +# ---------------------------------------------------------------------- +# Commands Omitted From Help List +# ---------------------------------------------------------------------- + +# Info Commands +scenedetect.add_command(help_command) +scenedetect.add_command(version_command) +scenedetect.add_command(about_command) + +# ---------------------------------------------------------------------- +# Commands Added To Help List +# ---------------------------------------------------------------------- + +# Input / Output +scenedetect.add_command(time_command) +scenedetect.add_command(export_html_command) +scenedetect.add_command(list_scenes_command) +scenedetect.add_command(save_images_command) +scenedetect.add_command(split_video_command) + +# Detection Algorithms +scenedetect.add_command(detect_content_command) +scenedetect.add_command(detect_threshold_command) +scenedetect.add_command(detect_adaptive_command) +scenedetect.add_command(load_scenes_command) diff --git a/backend/scenedetect/_cli/config.py b/backend/scenedetect/_cli/config.py new file mode 100644 index 0000000..2eb37be --- /dev/null +++ b/backend/scenedetect/_cli/config.py @@ -0,0 +1,548 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +"""Handles loading configuration files from disk and validating each section. Only validation of the +config file schema and data types are performed. Constants/defaults are also defined here where +possible and re-used by the CLI so that there is one source of truth. +""" + +from abc import ABC, abstractmethod +import logging +import os +import os.path +from configparser import ConfigParser, ParsingError +from typing import Any, AnyStr, Dict, List, Optional, Tuple, Union + +from platformdirs import user_config_dir + +from scenedetect.detectors import ContentDetector +from scenedetect.frame_timecode import FrameTimecode +from scenedetect.scene_manager import Interpolation +from scenedetect.video_splitter import DEFAULT_FFMPEG_ARGS + +VALID_PYAV_THREAD_MODES = ['NONE', 'SLICE', 'FRAME', 'AUTO'] + + +class OptionParseFailure(Exception): + """Raised when a value provided in a user config file fails validation.""" + + def __init__(self, error): + super().__init__() + self.error = error + + +class ValidatedValue(ABC): + """Used to represent configuration values that must be validated against constraints.""" + + @property + @abstractmethod + def value(self) -> Any: + """Get the value after validation.""" + raise NotImplementedError() + + @staticmethod + @abstractmethod + def from_config(config_value: str, default: 'ValidatedValue') -> 'ValidatedValue': + """Validate and get the user-specified configuration option. + + Raises: + OptionParseFailure: Value from config file did not meet validation constraints. + """ + raise NotImplementedError() + + +class TimecodeValue(ValidatedValue): + """Validator for timecode values in frames (1234), seconds (123.4s), or HH:MM:SS. + + Stores value in original representation.""" + + def __init__(self, value: Union[int, float, str]): + # Ensure value is a valid timecode. + FrameTimecode(timecode=value, fps=100.0) + self._value = value + + @property + def value(self) -> Union[int, float, str]: + return self._value + + def __repr__(self) -> str: + return str(self.value) + + def __str__(self) -> str: + return str(self.value) + + @staticmethod + def from_config(config_value: str, default: 'TimecodeValue') -> 'TimecodeValue': + try: + return TimecodeValue(config_value) + except ValueError as ex: + raise OptionParseFailure( + 'Timecodes must be in frames (1234), seconds (123.4s), or HH:MM:SS (00:02:03.400).' + ) from ex + + +class RangeValue(ValidatedValue): + """Validator for int/float ranges. `min_val` and `max_val` are inclusive.""" + + def __init__( + self, + value: Union[int, float], + min_val: Union[int, float], + max_val: Union[int, float], + ): + if value < min_val or value > max_val: + # min and max are inclusive. + raise ValueError() + self._value = value + self._min_val = min_val + self._max_val = max_val + + @property + def value(self) -> Union[int, float]: + return self._value + + @property + def min_val(self) -> Union[int, float]: + """Minimum value of the range.""" + return self._min_val + + @property + def max_val(self) -> Union[int, float]: + """Maximum value of the range.""" + return self._max_val + + def __repr__(self) -> str: + return str(self.value) + + def __str__(self) -> str: + return str(self.value) + + @staticmethod + def from_config(config_value: str, default: 'RangeValue') -> 'RangeValue': + try: + return RangeValue( + value=int(config_value) if isinstance(default.value, int) else float(config_value), + min_val=default.min_val, + max_val=default.max_val, + ) + except ValueError as ex: + raise OptionParseFailure('Value must be between %s and %s.' % + (default.min_val, default.max_val)) from ex + + +class ScoreWeightsValue(ValidatedValue): + """Validator for score weight values (currently a tuple of four numbers).""" + + _IGNORE_CHARS = [',', '/', '(', ')'] + """Characters to ignore.""" + + def __init__(self, value: Union[str, ContentDetector.Components]): + if isinstance(value, ContentDetector.Components): + self._value = value + else: + translation_table = str.maketrans( + {char: ' ' for char in ScoreWeightsValue._IGNORE_CHARS}) + values = value.translate(translation_table).split() + if not len(values) == 4: + raise ValueError("Score weights must be specified as four numbers!") + self._value = ContentDetector.Components(*(float(val) for val in values)) + + @property + def value(self) -> Tuple[float, float, float, float]: + return self._value + + def __repr__(self) -> str: + return str(self.value) + + def __str__(self) -> str: + return '%.3f, %.3f, %.3f, %.3f' % self.value + + @staticmethod + def from_config(config_value: str, default: 'ScoreWeightsValue') -> 'ScoreWeightsValue': + try: + return ScoreWeightsValue(config_value) + except ValueError as ex: + raise OptionParseFailure( + 'Score weights must be specified as four numbers in the form (H,S,L,E),' + ' e.g. (0.9, 0.2, 2.0, 0.5). Commas/brackets/slashes are ignored.') from ex + + +class KernelSizeValue(ValidatedValue): + """Validator for kernel sizes (odd integer > 1, or -1 for auto size).""" + + def __init__(self, value: int): + if value == -1: + # Downscale factor of -1 maps to None internally for auto downscale. + value = None + elif value < 0: + # Disallow other negative values. + raise ValueError() + elif value % 2 == 0: + # Disallow even values. + raise ValueError() + self._value = value + + @property + def value(self) -> int: + return self._value + + def __repr__(self) -> str: + return str(self.value) + + def __str__(self) -> str: + if self.value is None: + return 'auto' + return str(self.value) + + @staticmethod + def from_config(config_value: str, default: 'KernelSizeValue') -> 'KernelSizeValue': + try: + return KernelSizeValue(int(config_value)) + except ValueError as ex: + raise OptionParseFailure( + 'Value must be an odd integer greater than 1, or set to -1 for auto kernel size.' + ) from ex + + +ConfigValue = Union[bool, int, float, str] +ConfigDict = Dict[str, Dict[str, ConfigValue]] + +_CONFIG_FILE_NAME: AnyStr = 'scenedetect.cfg' +_CONFIG_FILE_DIR: AnyStr = user_config_dir("PySceneDetect", False) + +CONFIG_FILE_PATH: AnyStr = os.path.join(_CONFIG_FILE_DIR, _CONFIG_FILE_NAME) + +CONFIG_MAP: ConfigDict = { + 'backend-opencv': { + 'max-decode-attempts': 5, + }, + 'backend-pyav': { + 'suppress-output': False, + 'threading-mode': 'auto', + }, + 'scene_detect-adaptive': { + 'frame-window': 2, + 'kernel-size': KernelSizeValue(-1), + 'luma-only': False, + 'min-content-val': RangeValue(15.0, min_val=0.0, max_val=255.0), + 'min-scene-len': TimecodeValue(0), + 'threshold': RangeValue(3.0, min_val=0.0, max_val=255.0), + 'weights': ScoreWeightsValue(ContentDetector.DEFAULT_COMPONENT_WEIGHTS), + # TODO(v0.7): Remove `min-delta-hsv``. + 'min-delta-hsv': RangeValue(15.0, min_val=0.0, max_val=255.0), + }, + 'scene_detect-content': { + 'kernel-size': KernelSizeValue(-1), + 'luma-only': False, + 'min-scene-len': TimecodeValue(0), + 'threshold': RangeValue(27.0, min_val=0.0, max_val=255.0), + 'weights': ScoreWeightsValue(ContentDetector.DEFAULT_COMPONENT_WEIGHTS), + }, + 'scene_detect-threshold': { + 'add-last-scene': True, + 'fade-bias': RangeValue(0, min_val=-100.0, max_val=100.0), + 'min-scene-len': TimecodeValue(0), + 'threshold': RangeValue(12.0, min_val=0.0, max_val=255.0), + }, + 'load-scenes': { + 'start-col-name': 'Start Frame', + }, + 'export-html': { + 'filename': '$VIDEO_NAME-Scenes.html', + 'image-height': 0, + 'image-width': 0, + 'no-images': False, + }, + 'list-scenes': { + 'output': '', + 'filename': '$VIDEO_NAME-Scenes.csv', + 'no-output-file': False, + 'quiet': False, + 'skip-cuts': False, + }, + 'global': { + 'backend': 'opencv', + 'default-detector': 'scene_detect-adaptive', + 'downscale': 0, + 'downscale-method': 'linear', + 'drop-short-scenes': False, + 'frame-skip': 0, + 'merge-last-scene': False, + 'min-scene-len': TimecodeValue('0.6s'), + 'output': '', + 'verbosity': 'info', + }, + 'save-images': { + 'compression': RangeValue(3, min_val=0, max_val=9), + 'filename': '$VIDEO_NAME-Scene-$SCENE_NUMBER-$IMAGE_NUMBER', + 'format': 'jpeg', + 'frame-margin': 1, + 'height': 0, + 'num-images': 3, + 'output': '', + 'quality': RangeValue(0, min_val=0, max_val=100), # Default depends on format + 'scale': 1.0, + 'scale-method': 'linear', + 'width': 0, + }, + 'split-video': { + 'args': DEFAULT_FFMPEG_ARGS, + 'copy': False, + 'filename': '$VIDEO_NAME-Scene-$SCENE_NUMBER', + 'high-quality': False, + 'mkvmerge': False, + 'output': '', + 'preset': 'veryfast', + 'quiet': False, + 'rate-factor': RangeValue(22, min_val=0, max_val=100), + }, +} +"""Mapping of valid configuration file parameters and their default values or placeholders. +The types of these values are used when decoding the configuration file. Valid choices for +certain string options are stored in `CHOICE_MAP`.""" + +CHOICE_MAP: Dict[str, Dict[str, List[str]]] = { + 'global': { + 'backend': ['opencv', 'pyav', 'moviepy'], + 'default-detector': ['scene_detect-adaptive', 'scene_detect-content', 'scene_detect-threshold'], + 'downscale-method': [value.name.lower() for value in Interpolation], + 'verbosity': ['debug', 'info', 'warning', 'error', 'none'], + }, + 'split-video': { + 'preset': [ + 'ultrafast', 'superfast', 'veryfast', 'faster', 'fast', 'medium', 'slow', 'slower', + 'veryslow' + ], + }, + 'save-images': { + 'format': ['jpeg', 'png', 'webp'], + 'scale-method': [value.name.lower() for value in Interpolation], + }, + 'backend-pyav': { + 'threading_mode': [str(mode).lower() for mode in VALID_PYAV_THREAD_MODES], + }, +} +"""Mapping of string options which can only be of a particular set of values. We use a list instead +of a set to preserve order when generating error contexts. Values are case-insensitive, and must be +in lowercase in this map.""" + + +def _validate_structure(config: ConfigParser) -> List[str]: + """Validates the layout of the section/option mapping. + + Returns: + List of any parsing errors in human-readable form. + """ + errors: List[str] = [] + for section in config.sections(): + if not section in CONFIG_MAP.keys(): + errors.append('Unsupported config section: [%s]' % (section)) + continue + for (option_name, _) in config.items(section): + if not option_name in CONFIG_MAP[section].keys(): + errors.append('Unsupported config option in [%s]: %s' % (section, option_name)) + return errors + + +def _parse_config(config: ConfigParser) -> Tuple[ConfigDict, List[str]]: + """Process the given configuration into a key-value mapping. + + Returns: + Configuration mapping and list of any processing errors in human readable form. + """ + out_map: ConfigDict = {} + errors: List[str] = [] + for command in CONFIG_MAP: + out_map[command] = {} + for option in CONFIG_MAP[command]: + if command in config and option in config[command]: + try: + value_type = None + if isinstance(CONFIG_MAP[command][option], bool): + value_type = 'yes/no value' + out_map[command][option] = config.getboolean(command, option) + continue + elif isinstance(CONFIG_MAP[command][option], int): + value_type = 'integer' + out_map[command][option] = config.getint(command, option) + continue + elif isinstance(CONFIG_MAP[command][option], float): + value_type = 'number' + out_map[command][option] = config.getfloat(command, option) + continue + except ValueError as _: + errors.append('Invalid [%s] value for %s: %s is not a valid %s.' % + (command, option, config.get(command, option), value_type)) + continue + + # Handle custom validation types. + config_value = config.get(command, option) + default = CONFIG_MAP[command][option] + option_type = type(default) + if issubclass(option_type, ValidatedValue): + try: + out_map[command][option] = option_type.from_config( + config_value=config_value, default=default) + except OptionParseFailure as ex: + errors.append('Invalid [%s] value for %s:\n %s\n%s' % + (command, option, config_value, ex.error)) + continue + + # If we didn't process the value as a given type, handle it as a string. We also + # replace newlines with spaces, and strip any remaining leading/trailing whitespace. + if value_type is None: + config_value = config.get(command, option).replace('\n', ' ').strip() + if command in CHOICE_MAP and option in CHOICE_MAP[command]: + if config_value.lower() not in CHOICE_MAP[command][option]: + errors.append('Invalid [%s] value for %s: %s. Must be one of: %s.' % + (command, option, config.get(command, option), ', '.join( + choice for choice in CHOICE_MAP[command][option]))) + continue + out_map[command][option] = config_value + continue + + return (out_map, errors) + + +class ConfigLoadFailure(Exception): + """Raised when a user-specified configuration file fails to be loaded or validated.""" + + def __init__(self, init_log: Tuple[int, str], reason: Optional[Exception] = None): + super().__init__() + self.init_log = init_log + self.reason = reason + + +class ConfigRegistry: + + def __init__(self, path: Optional[str] = None, throw_exception: bool = True): + self._config: ConfigDict = {} # Options set in the loaded config file. + self._init_log: List[Tuple[int, str]] = [] + self._initialized = False + + try: + self._load_from_disk(path) + self._initialized = True + + except ConfigLoadFailure as ex: + if throw_exception: + raise + # If we fail to load the user config file, ensure the object is flagged as + # uninitialized, and log the error so it can be dealt with if necessary. + self._init_log = ex.init_log + if ex.reason is not None: + self._init_log += [ + (logging.ERROR, 'Error: %s' % str(ex.reason).replace('\t', ' ')), + ] + self._initialized = False + + @property + def config_dict(self) -> ConfigDict: + """Current configuration options that are set for each command.""" + return self._config + + @property + def initialized(self) -> bool: + """True if the ConfigRegistry was constructed without errors, False otherwise.""" + return self._initialized + + def get_init_log(self): + """Get initialization log. Consumes the log, so subsequent calls will return None.""" + init_log = self._init_log + self._init_log = [] + return init_log + + def _log(self, log_level, log_str): + self._init_log.append((log_level, log_str)) + + def _load_from_disk(self, path=None): + # Validate `path`, or if not provided, use CONFIG_FILE_PATH if it exists. + if path: + self._init_log.append((logging.INFO, "Loading config from file:\n %s" % path)) + if not os.path.exists(path): + self._init_log.append((logging.ERROR, "File not found: %s" % (path))) + raise ConfigLoadFailure(self._init_log) + else: + # Gracefully handle the case where there isn't a user config file. + if not os.path.exists(CONFIG_FILE_PATH): + self._init_log.append((logging.DEBUG, "User config file not found.")) + return + path = CONFIG_FILE_PATH + self._init_log.append((logging.INFO, "Loading user config file:\n %s" % path)) + # Try to load and parse the config file at `path`. + config = ConfigParser() + try: + with open(path, 'r') as config_file: + config_file_contents = config_file.read() + config.read_string(config_file_contents, source=path) + except ParsingError as ex: + raise ConfigLoadFailure(self._init_log, reason=ex) + except OSError as ex: + raise ConfigLoadFailure(self._init_log, reason=ex) + # At this point the config file syntax is correct, but we need to still validate + # the parsed options (i.e. that the options have valid values). + errors = _validate_structure(config) + if not errors: + self._config, errors = _parse_config(config) + if errors: + for log_str in errors: + self._init_log.append((logging.ERROR, log_str)) + raise ConfigLoadFailure(self._init_log) + + def is_default(self, command: str, option: str) -> bool: + """True if specified config option is unset (i.e. the default), False otherwise.""" + return not (command in self._config and option in self._config[command]) + + def get_value(self, + command: str, + option: str, + override: Optional[ConfigValue] = None, + ignore_default: bool = False) -> ConfigValue: + """Get the current setting or default value of the specified command option.""" + assert command in CONFIG_MAP and option in CONFIG_MAP[command] + if override is not None: + return override + if command in self._config and option in self._config[command]: + value = self._config[command][option] + else: + value = CONFIG_MAP[command][option] + if ignore_default: + return None + if issubclass(type(value), ValidatedValue): + return value.value + return value + + def get_help_string(self, + command: str, + option: str, + show_default: Optional[bool] = None) -> str: + """Get a string to specify for the help text indicating the current command option value, + if set, or the default. + + Arguments: + command: A command name or, "global" for global options. + option: Command-line option to set within `command`. + show_default: Always show default value. Default is False for flag/bool values, + True otherwise. + """ + assert command in CONFIG_MAP and option in CONFIG_MAP[command] + is_flag = isinstance(CONFIG_MAP[command][option], bool) + if command in self._config and option in self._config[command]: + if is_flag: + value_str = 'on' if self._config[command][option] else 'off' + else: + value_str = str(self._config[command][option]) + return ' [setting: %s]' % (value_str) + if show_default is False or (show_default is None and is_flag + and CONFIG_MAP[command][option] is False): + return '' + return ' [default: %s]' % (str(CONFIG_MAP[command][option])) diff --git a/backend/scenedetect/_cli/context.py b/backend/scenedetect/_cli/context.py new file mode 100644 index 0000000..f6882d9 --- /dev/null +++ b/backend/scenedetect/_cli/context.py @@ -0,0 +1,820 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +"""Context of which command-line options and config settings the user provided.""" + +import logging +import os +from typing import Any, AnyStr, Dict, Optional, Tuple, Type + +import click + +import scenedetect + +from scenedetect import open_video, AVAILABLE_BACKENDS +from scenedetect._scene_loader import SceneLoader + +from scenedetect.scene_detector import SceneDetector +from scenedetect.platform import get_and_create_path, get_cv2_imwrite_params, init_logger +from scenedetect.frame_timecode import FrameTimecode, MAX_FPS_DELTA +from scenedetect.video_stream import VideoStream, VideoOpenFailure, FrameRateUnavailable +from scenedetect.video_splitter import is_mkvmerge_available, is_ffmpeg_available +from scenedetect.detectors import AdaptiveDetector, ContentDetector, ThresholdDetector +from scenedetect.stats_manager import StatsManager +from scenedetect.scene_manager import SceneManager, Interpolation + +from scenedetect._cli.config import ConfigRegistry, ConfigLoadFailure, CHOICE_MAP + +logger = logging.getLogger('pyscenedetect') + +USER_CONFIG = ConfigRegistry(throw_exception=False) + + +def parse_timecode(value: str, + frame_rate: float, + first_index_is_one: bool = False) -> FrameTimecode: + """Parses a user input string into a FrameTimecode assuming the given framerate. + + If value is None, None will be returned instead of processing the value. + + Raises: + click.BadParameter + """ + if value is None: + return None + try: + if first_index_is_one and value.isdigit(): + value = int(value) + if value >= 1: + value -= 1 + return FrameTimecode(timecode=value, fps=frame_rate) + except ValueError as ex: + raise click.BadParameter( + 'timecode must be in frames (1234), seconds (123.4s), or HH:MM:SS (00:02:03.400)' + ) from ex + + +def contains_sequence_or_url(video_path: str) -> bool: + """Checks if the video path is a URL or image sequence.""" + return '%' in video_path or '://' in video_path + + +def check_split_video_requirements(use_mkvmerge: bool) -> None: + """ Validates that the proper tool is available on the system to perform the + `split-video` command. + + Arguments: + use_mkvmerge: True if mkvmerge (-m), False otherwise. + + Raises: click.BadParameter if the proper video splitting tool cannot be found. + """ + + if (use_mkvmerge and not is_mkvmerge_available()) or not is_ffmpeg_available(): + error_strs = [ + "{EXTERN_TOOL} is required for split-video{EXTRA_ARGS}.".format( + EXTERN_TOOL='mkvmerge' if use_mkvmerge else 'ffmpeg', + EXTRA_ARGS=' when mkvmerge (-m) is set' if use_mkvmerge else '') + ] + error_strs += ['Ensure the program is available on your system and try again.'] + if not use_mkvmerge and is_mkvmerge_available(): + error_strs += ['You can specify mkvmerge (-m) to use mkvmerge for splitting.'] + elif use_mkvmerge and is_ffmpeg_available(): + error_strs += ['You can specify copy (-c) to use ffmpeg stream copying.'] + error_str = '\n'.join(error_strs) + raise click.BadParameter(error_str, param_hint='split-video') + + +# pylint: disable=too-many-instance-attributes,too-many-arguments,too-many-locals +class CliContext: + """Context of the command-line interface and config file parameters passed between sub-commands. + + Handles validation of options taken in from the CLI *and* configuration files. + + After processing the main program options via `handle_options`, the CLI will then call + the respective `handle_*` method for each command. Once all commands have been + processed, the main program actions are executed by passing this object to the + `run_scenedetect` function in `scenedetect.cli.controller`. + """ + + def __init__(self): + self.config = USER_CONFIG + self.video_stream: VideoStream = None + self.scene_manager: SceneManager = None + self.stats_manager: StatsManager = None + + # Global `scenedetect` Options + self.output_directory: str = None # -o/--output + self.quiet_mode: bool = None # -q/--quiet or -v/--verbosity quiet + self.stats_file_path: str = None # -s/--stats + self.drop_short_scenes: bool = None # --drop-short-scenes + self.merge_last_scene: bool = None # --merge-last-scene + self.min_scene_len: FrameTimecode = None # -m/--min-scene-len + self.frame_skip: int = None # -fs/--frame-skip + self.default_detector: Tuple[Type[SceneDetector], + Dict[str, Any]] = None # [global] default-detector + + # `time` Command Options + self.time: bool = False + self.start_time: FrameTimecode = None # time -s/--start + self.end_time: FrameTimecode = None # time -e/--end + self.duration: FrameTimecode = None # time -d/--duration + + # `save-images` Command Options + self.save_images: bool = False + self.image_extension: str = None # save-images -j/--jpeg, -w/--webp, -p/--png + self.image_directory: str = None # save-images -o/--output + self.image_param: int = None # save-images -q/--quality if -j/-w, + # otherwise -c/--compression if -p + self.image_name_format: str = None # save-images -f/--name-format + self.num_images: int = None # save-images -n/--num-images + self.frame_margin: int = 1 # save-images -m/--frame-margin + self.scale: float = None # save-images -s/--scale + self.height: int = None # save-images -h/--height + self.width: int = None # save-images -w/--width + self.scale_method: Interpolation = None # [save-images] scale-method + + # `split-video` Command Options + self.split_video: bool = False + self.split_mkvmerge: bool = None # split-video -m/--mkvmerge + self.split_args: str = None # split-video -a/--args, -c/--copy + self.split_directory: str = None # split-video -o/--output + self.split_name_format: str = None # split-video -f/--filename + self.split_quiet: bool = None # split-video -q/--quiet + + # `list-scenes` Command Options + self.list_scenes: bool = False + self.print_scene_list: bool = None # list-scenes -q/--quiet + self.scene_list_directory: str = None # list-scenes -o/--output + self.scene_list_name_format: str = None # list-scenes -f/--filename + self.scene_list_output: bool = None # list-scenes -n/--no-output + self.skip_cuts: bool = None # list-scenes -s/--skip-cuts + + # `export-html` Command Options + self.export_html: bool = False + self.html_name_format: str = None # export-html -f/--filename + self.html_include_images: bool = None # export-html --no-images + self.image_width: int = None # export-html -w/--image-width + self.image_height: int = None # export-html -h/--image-height + + # + # Command Handlers + # + + def handle_options( + self, + input_path: AnyStr, + output: Optional[AnyStr], + framerate: float, + stats_file: Optional[AnyStr], + downscale: Optional[int], + frame_skip: int, + min_scene_len: str, + drop_short_scenes: bool, + merge_last_scene: bool, + backend: Optional[str], + quiet: bool, + logfile: Optional[AnyStr], + config: Optional[AnyStr], + stats: Optional[AnyStr], + verbosity: Optional[str], + ): + """Parse all global options/arguments passed to the main scenedetect command, + before other sub-commands (e.g. this function processes the [options] when calling + `scenedetect [options] [commands [command options]]`). + + Raises: + click.BadParameter: One of the given options/parameters is invalid. + click.Abort: Fatal initialization failure. + """ + + # TODO(v1.0): Make the stats value optional (e.g. allow -s only), and allow use of + # $VIDEO_NAME macro in the name. Default to $VIDEO_NAME.csv. + + try: + init_failure = not self.config.initialized + init_log = self.config.get_init_log() + quiet = not init_failure and quiet + self._initialize_logging(quiet=quiet, verbosity=verbosity, logfile=logfile) + + # Configuration file was specified via CLI argument -c/--config. + if config and not init_failure: + self.config = ConfigRegistry(config) + init_log += self.config.get_init_log() + # Re-initialize logger with the correct verbosity. + if verbosity is None and not self.config.is_default('global', 'verbosity'): + verbosity_str = self.config.get_value('global', 'verbosity') + assert verbosity_str in CHOICE_MAP['global']['verbosity'] + self.quiet_mode = False + self._initialize_logging(verbosity=verbosity_str, logfile=logfile) + + except ConfigLoadFailure as ex: + init_failure = True + init_log += ex.init_log + if ex.reason is not None: + init_log += [(logging.ERROR, 'Error: %s' % str(ex.reason).replace('\t', ' '))] + finally: + # Make sure we print the version number even on any kind of init failure. + logger.info('PySceneDetect %s', scenedetect.__version__) + for (log_level, log_str) in init_log: + logger.log(log_level, log_str) + if init_failure: + logger.critical("Error processing configuration file.") + raise click.Abort() + + if self.config.config_dict: + logger.debug("Current configuration:\n%s", str(self.config.config_dict)) + + logger.debug('Parsing program options.') + if stats is not None and frame_skip: + error_strs = [ + 'Unable to scene_detect scenes with stats file if frame skip is not 0.', + ' Either remove the -fs/--frame-skip option, or the -s/--stats file.\n' + ] + logger.error('\n'.join(error_strs)) + raise click.BadParameter( + 'Combining the -s/--stats and -fs/--frame-skip options is not supported.', + param_hint='frame skip + stats file') + + # Handle the case where -i/--input was not specified (e.g. for the `help` command). + if input_path is None: + return + + # Have to load the input video to obtain a time base before parsing timecodes. + self._open_video_stream( + input_path=input_path, + framerate=framerate, + backend=self.config.get_value("global", "backend", backend, ignore_default=True)) + + self.output_directory = output if output else self.config.get_value("global", "output") + if self.output_directory: + logger.info('Output directory set:\n %s', self.output_directory) + + self.min_scene_len = parse_timecode( + min_scene_len if min_scene_len is not None else self.config.get_value( + "global", "min-scene-len"), self.video_stream.frame_rate) + self.drop_short_scenes = drop_short_scenes or self.config.get_value( + "global", "drop-short-scenes") + self.merge_last_scene = merge_last_scene or self.config.get_value( + "global", "merge-last-scene") + self.frame_skip = self.config.get_value("global", "frame-skip", frame_skip) + + # Create StatsManager if --stats is specified. + if stats_file: + self.stats_file_path = get_and_create_path(stats_file, self.output_directory) + self.stats_manager = StatsManager() + + # Initialize default detector with values in the config file. + default_detector = self.config.get_value("global", "default-detector") + if default_detector == 'scene_detect-adaptive': + self.default_detector = (AdaptiveDetector, self.get_detect_adaptive_params()) + elif default_detector == 'scene_detect-content': + self.default_detector = (ContentDetector, self.get_detect_content_params()) + elif default_detector == 'scene_detect-threshold': + self.default_detector = (ThresholdDetector, self.get_detect_threshold_params()) + else: + raise click.BadParameter("Unknown detector type!", param_hint='default-detector') + + logger.debug('Initializing SceneManager.') + scene_manager = SceneManager(self.stats_manager) + + if downscale is None and self.config.is_default("global", "downscale"): + scene_manager.auto_downscale = True + else: + scene_manager.auto_downscale = False + downscale = self.config.get_value("global", "downscale", downscale) + try: + scene_manager.downscale = downscale + except ValueError as ex: + logger.debug(str(ex)) + raise click.BadParameter(str(ex), param_hint='downscale factor') + scene_manager.interpolation = Interpolation[self.config.get_value( + 'global', 'downscale-method').upper()] + self.scene_manager = scene_manager + + def get_detect_content_params( + self, + threshold: Optional[float] = None, + luma_only: bool = None, + min_scene_len: Optional[str] = None, + weights: Optional[Tuple[float, float, float, float]] = None, + kernel_size: Optional[int] = None, + ) -> Dict[str, Any]: + """Handle scene_detect-content command options and return dict to construct one with.""" + self._ensure_input_open() + + if self.drop_short_scenes: + min_scene_len = 0 + else: + if min_scene_len is None: + if self.config.is_default('scene_detect-content', 'min-scene-len'): + min_scene_len = self.min_scene_len.frame_num + else: + min_scene_len = self.config.get_value('scene_detect-content', 'min-scene-len') + min_scene_len = parse_timecode(min_scene_len, self.video_stream.frame_rate).frame_num + + if weights is not None: + try: + weights = ContentDetector.Components(*weights) + except ValueError as ex: + logger.debug(str(ex)) + raise click.BadParameter(str(ex), param_hint='weights') + return { + 'weights': self.config.get_value('scene_detect-content', 'weights', weights), + 'kernel_size': self.config.get_value('scene_detect-content', 'kernel-size', kernel_size), + 'luma_only': luma_only or self.config.get_value('scene_detect-content', 'luma-only'), + 'min_scene_len': min_scene_len, + 'threshold': self.config.get_value('scene_detect-content', 'threshold', threshold), + } + + def get_detect_adaptive_params( + self, + threshold: Optional[float] = None, + min_content_val: Optional[float] = None, + frame_window: Optional[int] = None, + luma_only: bool = None, + min_scene_len: Optional[str] = None, + weights: Optional[Tuple[float, float, float, float]] = None, + kernel_size: Optional[int] = None, + min_delta_hsv: Optional[float] = None, + ) -> Dict[str, Any]: + """Handle scene_detect-adaptive command options and return dict to construct one with.""" + self._ensure_input_open() + + # TODO(v0.7): Remove these branches when removing -d/--min-delta-hsv. + if min_delta_hsv is not None: + logger.error('-d/--min-delta-hsv is deprecated, use -c/--min-content-val instead.') + if min_content_val is None: + min_content_val = min_delta_hsv + # Handle case where deprecated min-delta-hsv is set, and use it to set min-content-val. + if not self.config.is_default("scene_detect-adaptive", "min-delta-hsv"): + logger.error('[scene_detect-adaptive] config file option `min-delta-hsv` is deprecated' + ', use `min-delta-hsv` instead.') + if self.config.is_default("scene_detect-adaptive", "min-content-val"): + self.config.config_dict["scene_detect-adaptive"]["min-content-val"] = ( + self.config.config_dict["scene_detect-adaptive"]["min-deleta-hsv"]) + + if self.drop_short_scenes: + min_scene_len = 0 + else: + if min_scene_len is None: + if self.config.is_default("scene_detect-adaptive", "min-scene-len"): + min_scene_len = self.min_scene_len.frame_num + else: + min_scene_len = self.config.get_value("scene_detect-adaptive", "min-scene-len") + min_scene_len = parse_timecode(min_scene_len, self.video_stream.frame_rate).frame_num + + if weights is not None: + try: + weights = ContentDetector.Components(*weights) + except ValueError as ex: + logger.debug(str(ex)) + raise click.BadParameter(str(ex), param_hint='weights') + return { + 'adaptive_threshold': + self.config.get_value("scene_detect-adaptive", "threshold", threshold), + 'weights': + self.config.get_value("scene_detect-adaptive", "weights", weights), + 'kernel_size': + self.config.get_value("scene_detect-adaptive", "kernel-size", kernel_size), + 'luma_only': + luma_only or self.config.get_value("scene_detect-adaptive", "luma-only"), + 'min_content_val': + self.config.get_value("scene_detect-adaptive", "min-content-val", min_content_val), + 'min_scene_len': + min_scene_len, + 'window_width': + self.config.get_value("scene_detect-adaptive", "frame-window", frame_window), + } + + def get_detect_threshold_params( + self, + threshold: Optional[float] = None, + fade_bias: Optional[float] = None, + add_last_scene: bool = None, + min_scene_len: Optional[str] = None, + ) -> Dict[str, Any]: + """Handle scene_detect-threshold command options and return dict to construct one with.""" + self._ensure_input_open() + + if self.drop_short_scenes: + min_scene_len = 0 + else: + if min_scene_len is None: + if self.config.is_default("scene_detect-threshold", "min-scene-len"): + min_scene_len = self.min_scene_len.frame_num + else: + min_scene_len = self.config.get_value("scene_detect-threshold", "min-scene-len") + min_scene_len = parse_timecode(min_scene_len, self.video_stream.frame_rate).frame_num + + return { + # TODO(v1.0): add_last_scene cannot be disabled right now. + 'add_final_scene': + add_last_scene or self.config.get_value("scene_detect-threshold", "add-last-scene"), + 'fade_bias': + self.config.get_value("scene_detect-threshold", "fade-bias", fade_bias), + 'min_scene_len': + min_scene_len, + 'threshold': + self.config.get_value("scene_detect-threshold", "threshold", threshold), + } + + def handle_load_scenes(self, input: AnyStr, start_col_name: Optional[str]): + """Handle `load-scenes` command options.""" + self._ensure_input_open() + start_col_name = self.config.get_value("load-scenes", "start-col-name", start_col_name) + self.add_detector( + SceneLoader( + file=input, framerate=self.video_stream.frame_rate, start_col_name=start_col_name)) + + def handle_export_html( + self, + filename: Optional[AnyStr], + no_images: bool, + image_width: Optional[int], + image_height: Optional[int], + ): + """Handle `export-html` command options.""" + self._ensure_input_open() + if self.export_html: + self._on_duplicate_command('export_html') + + no_images = no_images or self.config.get_value('export-html', 'no-images') + self.html_include_images = not no_images + + self.html_name_format = self.config.get_value('export-html', 'filename', filename) + self.image_width = self.config.get_value('export-html', 'image-width', image_width) + self.image_height = self.config.get_value('export-html', 'image-height', image_height) + + if not self.save_images and not no_images: + raise click.BadArgumentUsage( + 'The export-html command requires that the save-images command\n' + 'is specified before it, unless --no-images is specified.') + logger.info('HTML file name format:\n %s', filename) + + self.export_html = True + + def handle_list_scenes( + self, + output: Optional[AnyStr], + filename: Optional[AnyStr], + no_output_file: bool, + quiet: bool, + skip_cuts: bool, + ): + """Handle `list-scenes` command options.""" + self._ensure_input_open() + if self.list_scenes: + self._on_duplicate_command('list-scenes') + + self.skip_cuts = skip_cuts or self.config.get_value('list-scenes', 'skip-cuts') + self.print_scene_list = not (quiet or self.config.get_value('list-scenes', 'quiet')) + no_output_file = no_output_file or self.config.get_value('list-scenes', 'no-output-file') + + self.scene_list_directory = self.config.get_value( + 'list-scenes', 'output', output, ignore_default=True) + self.scene_list_name_format = self.config.get_value('list-scenes', 'filename', filename) + if self.scene_list_name_format is not None and not no_output_file: + logger.info('Scene list filename format:\n %s', self.scene_list_name_format) + self.scene_list_output = not no_output_file + if self.scene_list_directory is not None: + logger.info('Scene list output directory:\n %s', self.scene_list_directory) + + self.list_scenes = True + + def handle_split_video( + self, + output: Optional[AnyStr], + filename: Optional[AnyStr], + quiet: bool, + copy: bool, + high_quality: bool, + rate_factor: Optional[int], + preset: Optional[str], + args: Optional[str], + mkvmerge: bool, + ): + """Handle `split-video` command options.""" + self._ensure_input_open() + if self.split_video: + self._on_duplicate_command('split-video') + + check_split_video_requirements(use_mkvmerge=mkvmerge) + + if contains_sequence_or_url(self.video_stream.path): + error_str = 'The split-video command is incompatible with image sequences/URLs.' + raise click.BadParameter(error_str, param_hint='split-video') + + ## + ## Common Arguments/Options + ## + + self.split_video = True + self.split_quiet = quiet or self.config.get_value('split-video', 'quiet') + self.split_directory = self.config.get_value( + 'split-video', 'output', output, ignore_default=True) + if self.split_directory is not None: + logger.info('Video output path set: \n%s', self.split_directory) + self.split_name_format = self.config.get_value('split-video', 'filename', filename) + + # We only load the config values for these flags/options if none of the other + # encoder flags/options were set via the CLI to avoid any conflicting options + # (e.g. if the config file sets `high-quality = yes` but `--copy` is specified). + if not (mkvmerge or copy or high_quality or args or rate_factor or preset): + mkvmerge = self.config.get_value('split-video', 'mkvmerge') + copy = self.config.get_value('split-video', 'copy') + high_quality = self.config.get_value('split-video', 'high-quality') + rate_factor = self.config.get_value('split-video', 'rate-factor') + preset = self.config.get_value('split-video', 'preset') + args = self.config.get_value('split-video', 'args') + + # Disallow certain combinations of flags/options. + if mkvmerge or copy: + command = 'mkvmerge (-m)' if mkvmerge else 'copy (-c)' + if high_quality: + raise click.BadParameter( + 'high-quality (-hq) cannot be used with %s' % (command), + param_hint='split-video') + if args: + raise click.BadParameter( + 'args (-a) cannot be used with %s' % (command), param_hint='split-video') + if rate_factor: + raise click.BadParameter( + 'rate-factor (crf) cannot be used with %s' % (command), + param_hint='split-video') + if preset: + raise click.BadParameter( + 'preset (-p) cannot be used with %s' % (command), param_hint='split-video') + + ## + ## mkvmerge-Specific Arguments/Options + ## + if mkvmerge: + if copy: + logger.warning('copy mode (-c) ignored due to mkvmerge mode (-m).') + self.split_mkvmerge = True + logger.info('Using mkvmerge for video splitting.') + return + + ## + ## ffmpeg-Specific Arguments/Options + ## + if copy: + args = '-map 0 -c:v copy -c:a copy' + elif not args: + if rate_factor is None: + rate_factor = 22 if not high_quality else 17 + if preset is None: + preset = 'veryfast' if not high_quality else 'slow' + args = ('-map 0 -c:v libx264 -preset {PRESET} -crf {RATE_FACTOR} -c:a aac'.format( + PRESET=preset, RATE_FACTOR=rate_factor)) + + logger.info('ffmpeg arguments: %s', args) + self.split_args = args + if filename: + logger.info('Output file name format: %s', filename) + + def handle_save_images( + self, + num_images: Optional[int], + output: Optional[AnyStr], + filename: Optional[AnyStr], + jpeg: bool, + webp: bool, + quality: Optional[int], + png: bool, + compression: Optional[int], + frame_margin: Optional[int], + scale: Optional[float], + height: Optional[int], + width: Optional[int], + ): + """Handle `save-images` command options.""" + self._ensure_input_open() + if self.save_images: + self._on_duplicate_command('save-images') + + if '://' in self.video_stream.path: + error_str = '\nThe save-images command is incompatible with URLs.' + logger.error(error_str) + raise click.BadParameter(error_str, param_hint='save-images') + + num_flags = sum([1 if flag else 0 for flag in [jpeg, webp, png]]) + if num_flags > 1: + logger.error('Multiple image type flags set for save-images command.') + raise click.BadParameter( + 'Only one image type (JPG/PNG/WEBP) can be specified.', param_hint='save-images') + # Only use config params for image format if one wasn't specified. + elif num_flags == 0: + image_format = self.config.get_value('save-images', 'format').lower() + jpeg = image_format == 'jpeg' + webp = image_format == 'webp' + png = image_format == 'png' + + # Only use config params for scale/height/width if none of them are specified explicitly. + if scale is None and height is None and width is None: + self.scale = self.config.get_value('save-images', 'scale') + self.height = self.config.get_value('save-images', 'height') + self.width = self.config.get_value('save-images', 'width') + else: + self.scale = scale + self.height = height + self.width = width + + self.scale_method = Interpolation[self.config.get_value('save-images', + 'scale-method').upper()] + + default_quality = 100 if webp else 95 + quality = ( + default_quality if self.config.is_default('save-images', 'quality') else + self.config.get_value('save-images', 'quality')) + + compression = self.config.get_value('save-images', 'compression', compression) + self.image_param = compression if png else quality + + self.image_extension = 'jpg' if jpeg else 'png' if png else 'webp' + valid_params = get_cv2_imwrite_params() + if not self.image_extension in valid_params or valid_params[self.image_extension] is None: + error_strs = [ + 'Image encoder type `%s` not supported.' % self.image_extension.upper(), + 'The specified encoder type could not be found in the current OpenCV module.', + 'To enable this output format, please update the installed version of OpenCV.', + 'If you build OpenCV, ensure the the proper dependencies are enabled. ' + ] + logger.debug('\n'.join(error_strs)) + raise click.BadParameter('\n'.join(error_strs), param_hint='save-images') + + self.image_directory = self.config.get_value( + 'save-images', 'output', output, ignore_default=True) + + self.image_name_format = self.config.get_value('save-images', 'filename', filename) + self.num_images = self.config.get_value('save-images', 'num-images', num_images) + self.frame_margin = self.config.get_value('save-images', 'frame-margin', frame_margin) + + image_type = ('jpeg' if jpeg else self.image_extension).upper() + image_param_type = 'Compression' if png else 'Quality' + image_param_type = ' [%s: %d]' % (image_param_type, self.image_param) + logger.info('Image output format set: %s%s', image_type, image_param_type) + if self.image_directory is not None: + logger.info('Image output directory set:\n %s', os.path.abspath(self.image_directory)) + + self.save_images = True + + def handle_time(self, start, duration, end): + """Handle `time` command options.""" + self._ensure_input_open() + if self.time: + self._on_duplicate_command('time') + + if duration is not None and end is not None: + raise click.BadParameter( + 'Only one of --duration/-d or --end/-e can be specified, not both.', + param_hint='time') + + logger.debug('Setting video time:\n start: %s, duration: %s, end: %s', start, duration, + end) + + self.start_time = parse_timecode( + start, self.video_stream.frame_rate, first_index_is_one=True) + self.end_time = parse_timecode(end, self.video_stream.frame_rate, first_index_is_one=True) + self.duration = parse_timecode( + duration, self.video_stream.frame_rate, first_index_is_one=True) + self.time = True + + # + # Private Methods + # + + def _initialize_logging( + self, + quiet: Optional[bool] = None, + verbosity: Optional[str] = None, + logfile: Optional[AnyStr] = None, + ): + """Setup logging based on CLI args and user configuration settings.""" + if quiet is not None: + self.quiet_mode = bool(quiet) + curr_verbosity = logging.INFO + # Convert verbosity into it's log level enum, and override quiet mode if set. + if verbosity is not None: + assert verbosity in CHOICE_MAP['global']['verbosity'] + if verbosity.lower() == 'none': + self.quiet_mode = True + verbosity = 'info' + else: + # Override quiet mode if verbosity is set. + self.quiet_mode = False + curr_verbosity = getattr(logging, verbosity.upper()) + else: + verbosity_str = USER_CONFIG.get_value('global', 'verbosity') + assert verbosity_str in CHOICE_MAP['global']['verbosity'] + if verbosity_str.lower() == 'none': + self.quiet_mode = True + else: + curr_verbosity = getattr(logging, verbosity_str.upper()) + # Override quiet mode if verbosity is set. + if not USER_CONFIG.is_default('global', 'verbosity'): + self.quiet_mode = False + # Initialize logger with the set CLI args / user configuration. + init_logger(log_level=curr_verbosity, show_stdout=not self.quiet_mode, log_file=logfile) + + def add_detector(self, detector): + """ Add Detector: Adds a detection algorithm to the CliContext's SceneManager. """ + self._ensure_input_open() + try: + self.scene_manager.add_detector(detector) + except scenedetect.stats_manager.FrameMetricRegistered as ex: + raise click.BadParameter( + message='Cannot specify detection algorithm twice.', + param_hint=detector.cli_name) from ex + + def _ensure_input_open(self) -> None: + """Ensure self.video_stream was initialized (i.e. -i/--input was specified), + otherwise raises an exception. Should only be used from commands that require an + input video to process the options (e.g. those that require a timecode). + + Raises: + click.BadParameter: self.video_stream was not initialized. + """ + if self.video_stream is None: + raise click.ClickException('No input video (-i/--input) was specified.') + + def _open_video_stream(self, input_path: AnyStr, framerate: Optional[float], + backend: Optional[str]): + if '%' in input_path and backend != 'opencv': + raise click.BadParameter( + 'The OpenCV backend (`--backend opencv`) must be used to process image sequences.', + param_hint='-i/--input') + if framerate is not None and framerate < MAX_FPS_DELTA: + raise click.BadParameter('Invalid framerate specified!', param_hint='-f/--framerate') + try: + if backend is None: + backend = self.config.get_value('global', 'backend') + else: + if not backend in AVAILABLE_BACKENDS: + raise click.BadParameter( + 'Specified backend %s is not available on this system!' % backend, + param_hint='-b/--backend') + # Open the video with the specified backend, loading any required config settings. + if backend == 'pyav': + self.video_stream = open_video( + path=input_path, + framerate=framerate, + backend=backend, + threading_mode=self.config.get_value('backend-pyav', 'threading-mode'), + suppress_output=self.config.get_value('backend-pyav', 'suppress-output'), + ) + elif backend == 'opencv': + self.video_stream = open_video( + path=input_path, + framerate=framerate, + backend=backend, + max_decode_attempts=self.config.get_value('backend-opencv', + 'max-decode-attempts'), + ) + # Handle backends without any config options. + else: + self.video_stream = open_video( + path=input_path, + framerate=framerate, + backend=backend, + ) + logger.debug('Video opened using backend %s', type(self.video_stream).__name__) + except FrameRateUnavailable as ex: + raise click.BadParameter( + 'Failed to obtain framerate for input video. Manually specify framerate with the' + ' -f/--framerate option, or try re-encoding the file.', + param_hint='-i/--input') from ex + except VideoOpenFailure as ex: + raise click.BadParameter( + 'Failed to open input video%s: %s' % + (' using %s backend' % backend if backend else '', str(ex)), + param_hint='-i/--input') from ex + except OSError as ex: + raise click.BadParameter('Input error:\n\n\t%s\n' % str(ex), param_hint='-i/--input') + + def _on_duplicate_command(self, command: str) -> None: + """Called when a command is duplicated to stop parsing and raise an error. + + Arguments: + command: Command that was duplicated for error context. + + Raises: + click.BadParameter + """ + error_strs = [] + error_strs.append('Error: Command %s specified multiple times.' % command) + error_strs.append('The %s command may appear only one time.') + + logger.error('\n'.join(error_strs)) + raise click.BadParameter( + '\n Command %s may only be specified once.' % command, + param_hint='%s command' % command) diff --git a/backend/scenedetect/_cli/controller.py b/backend/scenedetect/_cli/controller.py new file mode 100644 index 0000000..65d4cdd --- /dev/null +++ b/backend/scenedetect/_cli/controller.py @@ -0,0 +1,273 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +"""Logic for the PySceneDetect command.""" + +import logging +import os +from string import Template +import time +from typing import Dict, List, Tuple, Optional +from string import Template + +from scenedetect.detectors import AdaptiveDetector +from scenedetect.frame_timecode import FrameTimecode +from scenedetect.platform import get_and_create_path, get_file_name +from scenedetect.scene_manager import save_images, write_scene_list, write_scene_list_html +from scenedetect.video_splitter import split_video_mkvmerge, split_video_ffmpeg +from scenedetect.video_stream import SeekError + +from scenedetect._cli.context import CliContext, check_split_video_requirements + +logger = logging.getLogger('pyscenedetect') + + +def run_scenedetect(context: CliContext): + """Perform main CLI application control logic. Run once all command-line options and + configuration file options have been validated. + + Arguments: + context: Prevalidated command-line option context to use for processing. + """ + # No input may have been specified depending on the commands/args that were used. + logger.debug("Running controller.") + if context.scene_manager is None: + logger.debug("No input specified.") + return + # Use default detector if one was not specified. + if context.scene_manager.get_num_detectors() == 0: + detector_type, detector_args = context.default_detector + logger.debug('Using default detector: %s(%s)' % (detector_type.__name__, detector_args)) + context.scene_manager.add_detector(detector_type(**detector_args)) + + perf_start_time = time.time() + if context.start_time is not None: + logger.debug('Seeking to start time...') + try: + context.video_stream.seek(target=context.start_time) + except SeekError as ex: + logging.critical('Failed to seek to %s / frame %d: %s', + context.start_time.get_timecode(), context.start_time.get_frames(), + str(ex)) + return + + num_frames = context.scene_manager.detect_scenes( + video=context.video_stream, + duration=context.duration, + end_time=context.end_time, + frame_skip=context.frame_skip, + show_progress=not context.quiet_mode) + + # Handle case where video failure is most likely due to multiple audio tracks (#179). + if num_frames <= 0 and context.video_stream.BACKEND_NAME == 'opencv': + logger.critical( + 'Failed to read any frames from video file. This could be caused by the video' + ' having multiple audio tracks. If so, try installing the PyAV backend:\n' + ' pip install av\n' + 'Or remove the audio tracks by running either:\n' + ' ffmpeg -i input.mp4 -c copy -an output.mp4\n' + ' mkvmerge -o output.mkv input.mp4\n' + 'For details, see https://scenedetect.com/faq/') + return + + perf_duration = time.time() - perf_start_time + logger.info('Processed %d frames in %.1f seconds (average %.2f FPS).', num_frames, + perf_duration, + float(num_frames) / perf_duration) + + # Handle -s/--stats option. + _save_stats(context) + + # Get list of detected cuts/scenes from the SceneManager to generate the required output + # files, based on the given commands (list-scenes, split-video, save-images, etc...). + cut_list = context.scene_manager.get_cut_list(show_warning=False) + scene_list = context.scene_manager.get_scene_list(start_in_scene=True) + + # Handle --merge-last-scene. + if context.merge_last_scene and context.min_scene_len is not None and context.min_scene_len > 0: + if len(scene_list) > 1 and (scene_list[-1][1] - scene_list[-1][0]) < context.min_scene_len: + new_last_scene = (scene_list[-2][0], scene_list[-1][1]) + scene_list = scene_list[:-2] + [new_last_scene] + + # Handle --drop-short-scenes. + if context.drop_short_scenes and context.min_scene_len > 0: + scene_list = [s for s in scene_list if (s[1] - s[0]) >= context.min_scene_len] + + # Ensure we don't divide by zero. + if scene_list: + logger.info( + 'Detected %d scenes, average shot length %.1f seconds.', len(scene_list), + sum([(end_time - start_time).get_seconds() for start_time, end_time in scene_list]) / + float(len(scene_list))) + else: + logger.info('No scenes detected.') + + # Handle list-scenes command. + _list_scenes(context, scene_list, cut_list) + + # Handle save-images command. + image_filenames = _save_images(context, scene_list) + + # Handle export-html command. + _export_html(context, scene_list, cut_list, image_filenames) + + # Handle split-video command. + _split_video(context, scene_list) + + +def _save_stats(context: CliContext) -> None: + """Handles saving the statsfile if -s/--stats was specified.""" + if context.stats_file_path is not None: + # We check if the save is required in order to reduce unnecessary log messages. + if context.stats_manager.is_save_required(): + logger.info('Saving frame metrics to stats file: %s', + os.path.basename(context.stats_file_path)) + context.stats_manager.save_to_csv(csv_file=context.stats_file_path) + else: + logger.debug('No frame metrics updated, skipping update of the stats file.') + + +def _list_scenes(context: CliContext, scene_list: List[Tuple[FrameTimecode, FrameTimecode]], + cut_list: List[FrameTimecode]) -> None: + """Handles the `list-scenes` command.""" + if context.scene_list_output: + scene_list_filename = Template( + context.scene_list_name_format).safe_substitute(VIDEO_NAME=context.video_stream.name) + if not scene_list_filename.lower().endswith('.csv'): + scene_list_filename += '.csv' + scene_list_path = get_and_create_path( + scene_list_filename, context.scene_list_directory + if context.scene_list_directory is not None else context.output_directory) + logger.info('Writing scene list to CSV file:\n %s', scene_list_path) + with open(scene_list_path, 'wt') as scene_list_file: + write_scene_list( + output_csv_file=scene_list_file, + scene_list=scene_list, + include_cut_list=not context.skip_cuts, + cut_list=cut_list) + + if context.print_scene_list: + logger.info( + """Scene List: +----------------------------------------------------------------------- + | Scene # | Start Frame | Start Time | End Frame | End Time | +----------------------------------------------------------------------- +%s +----------------------------------------------------------------------- +""", '\n'.join([ + ' | %5d | %11d | %s | %11d | %s |' % + (i + 1, start_time.get_frames() + 1, start_time.get_timecode(), + end_time.get_frames(), end_time.get_timecode()) + for i, (start_time, end_time) in enumerate(scene_list) + ])) + + if cut_list: + logger.info('Comma-separated timecode list:\n %s', + ','.join([cut.get_timecode() for cut in cut_list])) + + +def _save_images( + context: CliContext, + scene_list: List[Tuple[FrameTimecode, FrameTimecode]]) -> Optional[Dict[int, List[str]]]: + """Handles the `save-images` command.""" + if not context.save_images: + return None + + image_output_dir = context.output_directory + if context.image_directory is not None: + image_output_dir = context.image_directory + + return save_images( + scene_list=scene_list, + video=context.video_stream, + num_images=context.num_images, + frame_margin=context.frame_margin, + image_extension=context.image_extension, + encoder_param=context.image_param, + image_name_template=context.image_name_format, + output_dir=image_output_dir, + show_progress=not context.quiet_mode, + scale=context.scale, + height=context.height, + width=context.width, + interpolation=context.scale_method) + + +def _export_html(context: CliContext, scene_list: List[Tuple[FrameTimecode, FrameTimecode]], + cut_list: List[FrameTimecode], image_filenames: Optional[Dict[int, + List[str]]]) -> None: + """Handles the `export-html` command.""" + if not context.export_html: + return + + html_filename = Template( + context.html_name_format).safe_substitute(VIDEO_NAME=context.video_stream.name) + if not html_filename.lower().endswith('.html'): + html_filename += '.html' + html_path = get_and_create_path( + html_filename, context.image_directory + if context.image_directory is not None else context.output_directory) + logger.info('Exporting to html file:\n %s:', html_path) + if not context.html_include_images: + image_filenames = None + write_scene_list_html( + html_path, + scene_list, + cut_list, + image_filenames=image_filenames, + image_width=context.image_width, + image_height=context.image_height) + + +def _split_video(context: CliContext, scene_list: List[Tuple[FrameTimecode, + FrameTimecode]]) -> None: + """Handles the `split-video` command.""" + if not context.split_video: + return + + output_path_template = context.split_name_format + # Add proper extension to filename template if required. + dot_pos = output_path_template.rfind('.') + extension_length = 0 if dot_pos < 0 else len(output_path_template) - (dot_pos + 1) + # If using mkvmerge, force extension to .mkv. + if context.split_mkvmerge and not output_path_template.endswith('.mkv'): + output_path_template += '.mkv' + # Otherwise, if using ffmpeg, only add an extension if one doesn't exist. + elif not 2 <= extension_length <= 4: + output_path_template += '.mp4' + # Pre-expand $VIDEO_NAME so it can be used for a directory. + # TODO: Do this elsewhere in a future version for all output options. + output_path_template = Template(output_path_template).safe_substitute( + VIDEO_NAME=get_file_name(context.video_stream.path, include_extension=False)) + output_path_template = get_and_create_path( + output_path_template, context.split_directory + if context.split_directory is not None else context.output_directory) + # Ensure the appropriate tool is available before handling split-video. + check_split_video_requirements(context.split_mkvmerge) + if context.split_mkvmerge: + split_video_mkvmerge( + input_video_path=context.video_stream.path, + scene_list=scene_list, + output_file_template=output_path_template, + show_output=not (context.quiet_mode or context.split_quiet), + ) + else: + split_video_ffmpeg( + input_video_path=context.video_stream.path, + scene_list=scene_list, + output_file_template=output_path_template, + arg_override=context.split_args, + show_progress=not context.quiet_mode, + show_output=not (context.quiet_mode or context.split_quiet), + ) + if scene_list: + logger.info('Video splitting completed, scenes written to disk.') diff --git a/backend/scenedetect/_scene_loader.py b/backend/scenedetect/_scene_loader.py new file mode 100644 index 0000000..fc0d70d --- /dev/null +++ b/backend/scenedetect/_scene_loader.py @@ -0,0 +1,107 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# --------------------------------------------------------------- +# [ Site: http://www.scenedetect.scenedetect.com/ ] +# [ Docs: http://manual.scenedetect.scenedetect.com/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +""":class:`SceneLoader` is a class designed for use cases in which a list of +scenes is read from a csv file and actual detection of scene boundaries does not +need to occur. + +This is available from the command-line as the `load-scenes` command. +""" + +import os +import csv + +import typing as ty + +import numpy + +from scenedetect.scene_detector import SceneDetector +from scenedetect.frame_timecode import FrameTimecode + + +class SceneLoader(SceneDetector): + """Detector which load a list of predefined cuts from a CSV file. Used by the CLI to implement + the `load-scenes` functionality. Incompatible with other detectors. + """ + + def __init__(self, file: ty.TextIO, framerate: float, start_col_name: str = "Start Frame"): + """ + Arguments: + file: Path to csv file containing scene data for video + framerate: Framerate used to construct `FrameTimecode` for parsing input. + start_col_name: Header for column containing the frame/timecode where cuts occur. + """ + super().__init__() + + # Check to make specified csv file exists + if not file: + raise ValueError('file path to csv file must be specified') + if not os.path.exists(file): + raise ValueError('specified csv file does not exist') + + self.csv_file = file + + # Open csv and check and read first row for column headers + (self.file_reader, csv_headers) = self._open_csv(self.csv_file, start_col_name) + + # Check to make sure column headers are present + if start_col_name not in csv_headers: + raise ValueError('specified column header for scene start is not present') + + self._col_idx = csv_headers.index(start_col_name) + self._last_scene_row = None + self._scene_start = None + + # `SceneDetector` works on cuts, so we have to skip the first scene and use the first frame + # of the next scene as the cut point. This can be fixed if we used `SparseSceneDetector` + # but this part of the API is being reworked and hasn't been used by any detectors yet. + self._cut_list = sorted( + FrameTimecode(row[self._col_idx], fps=framerate).frame_num - 1 + for row in self.file_reader) + if self._cut_list: + self._cut_list = self._cut_list[1:] + + def _open_csv(self, csv_file, start_col_name): + """Opens the specified csv file for reading. + + Arguments: + csv_file: Path to csv file containing scene data for video + + Returns: + (reader, headers): csv.reader object and headers + """ + input_file = open(csv_file, 'r') + file_reader = csv.reader(input_file) + csv_headers = next(file_reader) + if not start_col_name in csv_headers: + csv_headers = next(file_reader) + return (file_reader, csv_headers) + + def process_frame(self, frame_num: int, frame_img: numpy.ndarray) -> ty.List[int]: + """Simply reads cut data from a given csv file. Video is not analyzed. Therefore this + detector is incompatible with other detectors or a StatsManager. + + Arguments: + frame_num: Frame number of frame that is being passed. + frame_img: Decoded frame image (numpy.ndarray) to perform scene detection on. This is + unused for this detector as the video is not analyzed, but is allowed for + compatibility. + + Returns: + cut_list: List of cuts (as provided by input csv file) + """ + if frame_num in self._cut_list: + return [frame_num] + return [] + + def is_processing_required(self, frame_num): + return False diff --git a/backend/scenedetect/_thirdparty/__init__.py b/backend/scenedetect/_thirdparty/__init__.py new file mode 100644 index 0000000..c56fdb7 --- /dev/null +++ b/backend/scenedetect/_thirdparty/__init__.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +"""Includes third-party libraries distributed with PySceneDetect. To simplify distribution of binary +builds, the source directory also includes license files for the packages PySceneDetect depends on. +""" diff --git a/backend/scenedetect/_thirdparty/simpletable.py b/backend/scenedetect/_thirdparty/simpletable.py new file mode 100644 index 0000000..e940a43 --- /dev/null +++ b/backend/scenedetect/_thirdparty/simpletable.py @@ -0,0 +1,327 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# The MIT License (MIT) +# +# Copyright (c) 2014 Matheus Vieira Portela +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +"""simpletable.py - v0.1 2014-07-31 Matheus Vieira Portela + +This module provides simple classes and interfaces to generate simple HTML +tables based on Python native types, such as lists. + +Author's website: http://matheusvportela.wordpress.com/ + +v0.4 2019-05-24 by Walter Schwenger +""" + +### CHANGES ### +# 2014-07-31: v0.1 MVP: +# - First version +# 2014-08-05: v0.2 MVP: +# - Method for defining header rows +# - SimpleTable method to create a SimpleTable from lists +# - Method to create a table from a simple list of elements and a column size +# 2014-08-20: v0.3 MVP: +# - Enable SimplePage to accept a list of tables +# - Enable SimplePage to iterate over its tables +# 2019-05-24: v0.4 WS: +# - Added SimpleTableImage class to handle adding images to tables +# - Added test images and image example to __main__ + +### REFERENCES ### +# Decalage HTML.py module: http://www.decalage.info/python/html + +import codecs + + +# noinspection PyCompatibility,PyUnresolvedReferences +def quote(string): + try: + from urllib.parse import quote + return quote(string) + except ModuleNotFoundError: + from urllib import pathname2url + return pathname2url(string) + + +class SimpleTableCell(object): + """A table class to create table cells. + + Example: + cell = SimpleTableCell('Hello, world!') + """ + + def __init__(self, text, header=False): + """Table cell constructor. + + Keyword arguments: + text -- text to be displayed + header -- flag to indicate this cell is a header cell. + """ + self.text = text + self.header = header + + def __str__(self): + """Return the HTML code for the table cell.""" + if self.header: + return '%s' % (self.text) + else: + return '%s' % (self.text) + + +class SimpleTableImage(object): + """A table class to create table cells with an image. + + Example: + cell = SimpleTableImage('images/image_1.jpg') + """ + + def __init__(self, image_file, width=None, height=None): + """Table cell constructor. + + Keyword arguments: + image_file -- relative filepath to image file to display. + width -- (optional) width of the image in pixels + height -- (optional) height of the image in pixels + """ + self.image_file = image_file + if width: + self.width = round(width) + else: + self.width = width + if height: + self.height = round(height) + else: + self.height = height + + def __str__(self): + """Return the HTML code for the table cell with the image.""" + safe_filename = quote(self.image_file) + output = '' % (safe_filename) + output += '') + + for cell in self.cells: + row.append(str(cell)) + + row.append('') + + return '\n'.join(row) + + def __iter__(self): + """Iterate through row cells""" + for cell in self.cells: + yield cell + + def add_cell(self, cell): + """Add a SimpleTableCell object to the list of cells.""" + self.cells.append(cell) + + def add_cells(self, cells): + """Add a list of SimpleTableCell objects to the list of cells.""" + for cell in cells: + self.cells.append(cell) + + +class SimpleTable(object): + """A table class to create HTML tables, populated by HTML table rows. + + Example: + # Table from lists + table = SimpleTable([['Hello,', 'world!'], ['How', 'are', 'you?']]) + + # Table with header row + table = SimpleTable([['Hello,', 'world!'], ['How', 'are', 'you?']], + header_row=['Header1', 'Header2', 'Header3']) + + # Table from SimpleTableRow + rows = SimpleTableRow(['Hello,', 'world!']) + table = SimpleTable(rows) + """ + + def __init__(self, rows=None, header_row=None, css_class=None): + """Table constructor. + + Keyword arguments: + rows -- iterable of SimpleTableRow + header_row -- row that will be displayed at the beginning of the table. + if this row is SimpleTableRow, it is the programmer's + responsibility to verify whether it was created with the + header flag set to True. + css_class -- table CSS class + """ + rows = rows or [] + if isinstance(rows[0], SimpleTableRow): + self.rows = rows + else: + self.rows = [SimpleTableRow(row) for row in rows] + + if header_row is None: + self.header_row = None + elif isinstance(header_row, SimpleTableRow): + self.header_row = header_row + else: + self.header_row = SimpleTableRow(header_row, header=True) + + self.css_class = css_class + + def __str__(self): + """Return the HTML code for the table as a string.""" + table = [] + + if self.css_class: + table.append('' % self.css_class) + else: + table.append('
') + + if self.header_row: + table.append(str(self.header_row)) + + for row in self.rows: + table.append(str(row)) + + table.append('
') + + return '\n'.join(table) + + def __iter__(self): + """Iterate through table rows""" + for row in self.rows: + yield row + + def add_row(self, row): + """Add a SimpleTableRow object to the list of rows.""" + self.rows.append(row) + + def add_rows(self, rows): + """Add a list of SimpleTableRow objects to the list of rows.""" + for row in rows: + self.rows.append(row) + + +class HTMLPage(object): + """A class to create HTML pages containing CSS and tables.""" + + def __init__(self, tables=None, css=None, encoding="utf-8"): + """HTML page constructor. + + Keyword arguments: + tables -- List of SimpleTable objects + css -- Cascading Style Sheet specification that is appended before the + table string + encoding -- Characters encoding. Default: UTF-8 + """ + self.tables = tables or [] + self.css = css + self.encoding = encoding + + def __str__(self): + """Return the HTML page as a string.""" + page = [] + + if self.css: + page.append('' % self.css) + + # Set encoding + page.append('' % self.encoding) + + for table in self.tables: + page.append(str(table)) + page.append('
') + + return '\n'.join(page) + + def __iter__(self): + """Iterate through tables""" + for table in self.tables: + yield table + + def save(self, filename): + """Save HTML page to a file using the proper encoding""" + with codecs.open(filename, 'w', self.encoding) as outfile: + for line in str(self): + outfile.write(line) + + def add_table(self, table): + """Add a SimpleTable to the page list of tables""" + self.tables.append(table) + + +def fit_data_to_columns(data, num_cols): + """Format data into the configured number of columns in a proper format to + generate a SimpleTable. + + Example: + test_data = [str(x) for x in range(20)] + fitted_data = fit_data_to_columns(test_data, 5) + table = SimpleTable(fitted_data) + """ + num_iterations = len(data) / num_cols + + if len(data) % num_cols != 0: + num_iterations += 1 + + return [data[num_cols * i:num_cols * i + num_cols] for i in range(num_iterations)] diff --git a/backend/scenedetect/backends/__init__.py b/backend/scenedetect/backends/__init__.py new file mode 100644 index 0000000..c137f86 --- /dev/null +++ b/backend/scenedetect/backends/__init__.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +"""``scenedetect.backends`` Module + +This module contains :class:`VideoStream ` implementations +backed by various Python multimedia libraries. In addition to creating backend objects directly, +:func:`scenedetect.open_video` can be used to open a video with a specified backend, falling +back to OpenCV if not available. + +All backends available on the current system can be found via :data:`AVAILABLE_BACKENDS`. + +If you already have a `cv2.VideoCapture` object you want to use for scene detection, you can +use a :class:`VideoCaptureAdapter ` instead +of a backend. This is useful when working with devices or streams, for example. + +=============================================================== +Video Files +=============================================================== + +Assuming we have a file `video.mp4` in our working directory, we can load it and perform scene +detection on it using :func:`open_video`: + +.. code:: python + + from scenedetect import open_video + video = open_video('video.mp4') + +An optional backend from :data:`AVAILABLE_BACKENDS` can be passed to :func:`open_video` +(e.g. `backend='opencv'`). Additional keyword arguments passed to :func:`open_video` +will be forwarded to the backend constructor. If the specified backend is unavailable, or +loading the video fails, ``opencv`` will be tried as a fallback. + +Lastly, to use a specific backend directly: + +.. code:: python + + # Manually importing and constructing a backend: + from scenedetect.backends.opencv import VideoStreamCv2 + video = VideoStreamCv2('video.mp4') + +In both examples above, the resulting ``video`` can be used with +:meth:`SceneManager.detect_scenes() `. + +=============================================================== +Devices / Cameras / Pipes +=============================================================== + +You can use an existing `cv2.VideoCapture` object with the PySceneDetect API using a +:class:`VideoCaptureAdapter `. For example, +to use a :class:`SceneManager ` with a webcam device: + +.. code:: python + + from scenedetect import SceneManager, ContentDetector + from scenedetect.backends import VideoCaptureAdapter + # Open device ID 2. + cap = cv2.VideoCapture(2) + video = VideoCaptureAdapter(cap) + total_frames = 1000 + scene_manager = SceneManager() + scene_manager.add_detector(ContentDetector()) + scene_manager.detect_scenes(video=video, duration=total_frames) + +When working with live inputs, note that you can pass a callback to +:meth:`detect_scenes() ` to be +called on every scene detection event. See the :mod:`SceneManager ` +examples for details. +""" + +# TODO(v1.0): Consider removing and making this a namespace package so that additional backends can +# be dynamically added. The preferred approach for this should probably be: +# https://packaging.python.org/en/latest/guides/creating-and-discovering-plugins/#using-namespace-packages + +# TODO: Future VideoStream implementations under consideration: +# - Nvidia VPF: https://developer.nvidia.com/blog/vpf-hardware-accelerated-video-processing-framework-in-python/ + +from typing import Dict, Type + +# OpenCV must be available at minimum. +from backend.scenedetect.backends.opencv import VideoStreamCv2, VideoCaptureAdapter + +try: + from scenedetect.backends.pyav import VideoStreamAv +except ImportError: + VideoStreamAv = None + +try: + from scenedetect.backends.moviepy import VideoStreamMoviePy +except ImportError: + VideoStreamMoviePy = None + +# TODO(0.6.3): Replace this with a function named `get_available_backends`. +AVAILABLE_BACKENDS: Dict[str, Type] = { + backend.BACKEND_NAME: backend for backend in filter(None, [ + VideoStreamCv2, + VideoStreamAv, + VideoStreamMoviePy, + ]) +} +"""All available backends that :func:`scenedetect.open_video` can consider for the `backend` +parameter. These backends must support construction with the following signature: + + BackendType(path: str, framerate: Optional[float]) +""" diff --git a/backend/scenedetect/backends/moviepy.py b/backend/scenedetect/backends/moviepy.py new file mode 100644 index 0000000..e99c3f9 --- /dev/null +++ b/backend/scenedetect/backends/moviepy.py @@ -0,0 +1,224 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +""":class:`VideoStreamMoviePy` provides an adapter for MoviePy's `FFMPEG_VideoReader`. + +Uses string identifier ``'moviepy'``. + +.. warning:: + + The MoviePy backend is still under development. Some features are not yet supported. +""" + +from logging import getLogger +from typing import AnyStr, Tuple, Union, Optional + +import cv2 +from moviepy.video.io.ffmpeg_reader import FFMPEG_VideoReader +from numpy import ndarray + +from scenedetect.frame_timecode import FrameTimecode +from scenedetect.platform import get_file_name +from scenedetect.video_stream import VideoStream, SeekError, VideoOpenFailure +from scenedetect.backends.opencv import VideoStreamCv2 + +logger = getLogger('pyscenedetect') + + +class VideoStreamMoviePy(VideoStream): + """MoviePy `FFMPEG_VideoReader` backend.""" + + def __init__(self, path: AnyStr, framerate: Optional[float] = None, print_infos: bool = False): + """Open a video or device. + + Arguments: + path: Path to video,. + framerate: If set, overrides the detected framerate. + print_infos: If True, prints information about the opened video to stdout. + + Raises: + OSError: file could not be found, access was denied, or the video is corrupt + VideoOpenFailure: video could not be opened (may be corrupted) + """ + super().__init__() + + # TODO(0.6.3) - Investigate how MoviePy handles ffmpeg not being on PATH. + # TODO(0.6.3): Add framerate override. + if framerate is not None: + raise NotImplementedError("TODO(0.6.3)") + + self._path = path + # TODO(0.6.3): Need to map errors based on the strings, since several failure + # cases return IOErrors (e.g. could not read duration/video resolution). These + # should be mapped to specific errors, e.g. write a function to map MoviePy + # exceptions to a new set of equivalents. + self._reader = FFMPEG_VideoReader(path, print_infos=print_infos) + # This will always be one behind self._reader.lastread when we finally call read() + # as MoviePy caches the first frame when opening the video. Thus self._last_frame + # will always be the current frame, and self._reader.lastread will be the next. + self._last_frame: Union[bool, ndarray] = False + self._last_frame_rgb: Optional[ndarray] = None + # Older versions don't track the video position when calling read_frame so we need + # to keep track of the current frame number. + self._frame_number = 0 + # We need to manually keep track of EOF as duration may not be accurate. + self._eof = False + # MoviePy doesn't support extracting the aspect ratio yet, so for now we just fall + # back to using OpenCV to determine it. + try: + self._aspect_ratio = VideoStreamCv2(self._path).aspect_ratio + except VideoOpenFailure as ex: + logger.warning("Unable to determine aspect ratio: %s", str(ex)) + self._aspect_ratio = 1.0 + + # + # VideoStream Methods/Properties + # + + BACKEND_NAME = 'moviepy' + """Unique name used to identify this backend.""" + + @property + def frame_rate(self) -> float: + """Framerate in frames/sec.""" + return self._reader.fps + + @property + def path(self) -> Union[bytes, str]: + """Video path.""" + return self._path + + @property + def name(self) -> str: + """Name of the video, without extension, or device.""" + return get_file_name(self.path, include_extension=False) + + @property + def is_seekable(self) -> bool: + """True if seek() is allowed, False otherwise.""" + return True + + @property + def frame_size(self) -> Tuple[int, int]: + """Size of each video frame in pixels as a tuple of (width, height).""" + return tuple(self._reader.infos['video_size']) + + @property + def duration(self) -> Optional[FrameTimecode]: + """Duration of the stream as a FrameTimecode, or None if non terminating.""" + assert isinstance(self._reader.infos['duration'], float) + return self.base_timecode + self._reader.infos['duration'] + + @property + def aspect_ratio(self) -> float: + """Display/pixel aspect ratio as a float (1.0 represents square pixels).""" + return self._aspect_ratio + + @property + def position(self) -> FrameTimecode: + """Current position within stream as FrameTimecode. + + This can be interpreted as presentation time stamp of the last frame which was + decoded by calling `read` with advance=True. + + This method will always return 0 (e.g. be equal to `base_timecode`) if no frames + have been `read`.""" + frame_number = max(self._frame_number - 1, 0) + return FrameTimecode(frame_number, self.frame_rate) + + @property + def position_ms(self) -> float: + """Current position within stream as a float of the presentation time in milliseconds. + The first frame has a time of 0.0 ms. + + This method will always return 0.0 if no frames have been `read`.""" + return self.position.get_seconds() * 1000.0 + + @property + def frame_number(self) -> int: + """Current position within stream in frames as an int. + + 1 indicates the first frame was just decoded by the last call to `read` with advance=True, + whereas 0 indicates that no frames have been `read`. + + This method will always return 0 if no frames have been `read`.""" + return self._frame_number + + def seek(self, target: Union[FrameTimecode, float, int]): + """Seek to the given timecode. If given as a frame number, represents the current seek + pointer (e.g. if seeking to 0, the next frame decoded will be the first frame of the video). + + For 1-based indices (first frame is frame #1), the target frame number needs to be converted + to 0-based by subtracting one. For example, if we want to seek to the first frame, we call + seek(0) followed by read(). If we want to seek to the 5th frame, we call seek(4) followed + by read(), at which point frame_number will be 5. + + Not supported if the VideoStream is a device/camera. Untested with web streams. + + Arguments: + target: Target position in video stream to seek to. + If float, interpreted as time in seconds. + If int, interpreted as frame number. + Raises: + SeekError: An error occurs while seeking, or seeking is not supported. + ValueError: `target` is not a valid value (i.e. it is negative). + """ + if not isinstance(target, FrameTimecode): + target = FrameTimecode(target, self.frame_rate) + try: + self._reader.get_frame(target.get_seconds()) + except IOError as ex: + # Leave the object in a valid state. + self.reset() + if target >= self.duration: + raise SeekError("Target frame is beyond end of video!") from ex + raise + self._last_frame = self._reader.lastread + self._frame_number = target.frame_num + + def reset(self): + """ Close and re-open the VideoStream (should be equivalent to calling `seek(0)`). """ + self._reader.initialize() + self._last_frame = self._reader.read_frame() + self._frame_number = 0 + self._eof = False + + def read(self, decode: bool = True, advance: bool = True) -> Union[ndarray, bool]: + """Read and decode the next frame as a numpy.ndarray. Returns False when video ends. + + Arguments: + decode: Decode and return the frame. + advance: Seek to the next frame. If False, will return the current (last) frame. + + Returns: + If decode = True, the decoded frame (numpy.ndarray), or False (bool) if end of video. + If decode = False, a bool indicating if advancing to the the next frame succeeded. + """ + if not advance: + if self._last_frame_rgb is None: + self._last_frame_rgb = cv2.cvtColor(self._last_frame, cv2.COLOR_BGR2RGB) + return self._last_frame_rgb + if not hasattr(self._reader, 'lastread'): + return False + self._last_frame = self._reader.lastread + self._reader.read_frame() + if self._last_frame is self._reader.lastread: + # Didn't decode a new frame, must have hit EOF. + if self._eof: + return False + self._eof = True + self._frame_number += 1 + if decode: + if self._last_frame is not None: + self._last_frame_rgb = cv2.cvtColor(self._last_frame, cv2.COLOR_BGR2RGB) + return self._last_frame_rgb + return True diff --git a/backend/scenedetect/backends/opencv.py b/backend/scenedetect/backends/opencv.py new file mode 100644 index 0000000..acadcdf --- /dev/null +++ b/backend/scenedetect/backends/opencv.py @@ -0,0 +1,539 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +""":class:`VideoStreamCv2` is backed by the OpenCV `VideoCapture` object. This is the default +backend. Works with video files, image sequences, and network streams/URLs. + +For wrapping input devices or pipes, there is also :class:`VideoCaptureAdapter` which can be +constructed from an existing `cv2.VideoCapture`. This allows performing scene detection on inputs +which do not support seeking. +""" + +from logging import getLogger +import math +from typing import AnyStr, Tuple, Union, Optional +import os.path + +import cv2 +from numpy import ndarray + +from backend.scenedetect.frame_timecode import FrameTimecode, MAX_FPS_DELTA +from backend.scenedetect.platform import get_file_name +from backend.scenedetect.video_stream import VideoStream, SeekError, VideoOpenFailure, FrameRateUnavailable + +logger = getLogger('pyscenedetect') + +IMAGE_SEQUENCE_IDENTIFIER = '%' + +NON_VIDEO_FILE_INPUT_IDENTIFIERS = ( + IMAGE_SEQUENCE_IDENTIFIER, # image sequence + '://', # URL/network stream + ' ! ', # gstreamer pipe +) + + +def _get_aspect_ratio(cap: cv2.VideoCapture, epsilon: float = 0.0001) -> float: + """Display/pixel aspect ratio of the VideoCapture as a float (1.0 represents square pixels).""" + # Versions of OpenCV < 3.4.1 do not support this, so we fall back to 1.0. + if not 'CAP_PROP_SAR_NUM' in dir(cv2): + return 1.0 + num: float = cap.get(cv2.CAP_PROP_SAR_NUM) + den: float = cap.get(cv2.CAP_PROP_SAR_DEN) + # If numerator or denominator are close to zero, so we fall back to 1.0. + if abs(num) < epsilon or abs(den) < epsilon: + return 1.0 + return num / den + + +class VideoStreamCv2(VideoStream): + """OpenCV `cv2.VideoCapture` backend.""" + + def __init__( + self, + path: AnyStr = None, + framerate: Optional[float] = None, + max_decode_attempts: int = 5, + path_or_device: Union[bytes, str, int] = None, + ): + """Open a video file, image sequence, or network stream. + + Arguments: + path: Path to the video. Can be a file, image sequence (`'folder/DSC_%04d.jpg'`), + or network stream. + framerate: If set, overrides the detected framerate. + max_decode_attempts: Number of attempts to continue decoding the video + after a frame fails to decode. This allows processing videos that + have a few corrupted frames or metadata (in which case accuracy + of detection algorithms may be lower). Once this limit is passed, + decoding will stop and emit an error. + path_or_device: [DEPRECATED] Specify `path` for files, image sequences, or + network streams/URLs. Use `VideoCaptureAdapter` for devices/pipes. + + Raises: + OSError: file could not be found or access was denied + VideoOpenFailure: video could not be opened (may be corrupted) + ValueError: specified framerate is invalid + """ + super().__init__() + # TODO(v0.7): Replace with DeprecationWarning that `path_or_device` will be removed in v0.8. + if path_or_device is not None: + logger.error('path_or_device is deprecated, use path or VideoCaptureAdapter instead.') + path = path_or_device + if path is None: + raise ValueError('Path must be specified!') + if framerate is not None and framerate < MAX_FPS_DELTA: + raise ValueError('Specified framerate (%f) is invalid!' % framerate) + if max_decode_attempts < 0: + raise ValueError('Maximum decode attempts must be >= 0!') + + self._path_or_device = path + self._is_device = isinstance(self._path_or_device, int) + + # Initialized in _open_capture: + self._cap: Optional[ + cv2.VideoCapture] = None # Reference to underlying cv2.VideoCapture object. + self._frame_rate: Optional[float] = None + + # VideoCapture state + self._has_grabbed = False + self._max_decode_attempts = max_decode_attempts + self._decode_failures = 0 + self._warning_displayed = False + + self._open_capture(framerate) + + # + # Backend-Specific Methods/Properties + # + + @property + def capture(self) -> cv2.VideoCapture: + """Returns reference to underlying VideoCapture object. Use with caution. + + Prefer to use this property only to take ownership of the underlying cv2.VideoCapture object + backing this object. Seeking or using the read/grab methods through this property are + unsupported and will leave this object in an inconsistent state. + """ + assert self._cap + return self._cap + + # + # VideoStream Methods/Properties + # + + BACKEND_NAME = 'opencv' + """Unique name used to identify this backend.""" + + @property + def frame_rate(self) -> float: + """Framerate in frames/sec.""" + assert self._frame_rate + return self._frame_rate + + @property + def path(self) -> Union[bytes, str]: + """Video or device path.""" + if self._is_device: + assert isinstance(self._path_or_device, (int)) + return "Device %d" % self._path_or_device + assert isinstance(self._path_or_device, (bytes, str)) + return self._path_or_device + + @property + def name(self) -> str: + """Name of the video, without extension, or device.""" + if self._is_device: + return self.path + file_name: str = get_file_name(self.path, include_extension=False) + if IMAGE_SEQUENCE_IDENTIFIER in file_name: + # file_name is an image sequence, trim everything including/after the %. + # TODO: This excludes any suffix after the sequence identifier. + file_name = file_name[:file_name.rfind(IMAGE_SEQUENCE_IDENTIFIER)] + return file_name + + @property + def is_seekable(self) -> bool: + """True if seek() is allowed, False otherwise. + + Always False if opening a device/webcam.""" + return not self._is_device + + @property + def frame_size(self) -> Tuple[int, int]: + """Size of each video frame in pixels as a tuple of (width, height).""" + return (math.trunc(self._cap.get(cv2.CAP_PROP_FRAME_WIDTH)), + math.trunc(self._cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) + + @property + def duration(self) -> Optional[FrameTimecode]: + """Duration of the stream as a FrameTimecode, or None if non terminating.""" + if self._is_device: + return None + return self.base_timecode + math.trunc(self._cap.get(cv2.CAP_PROP_FRAME_COUNT)) + + @property + def aspect_ratio(self) -> float: + """Display/pixel aspect ratio as a float (1.0 represents square pixels).""" + return _get_aspect_ratio(self._cap) + + @property + def position(self) -> FrameTimecode: + """Current position within stream as FrameTimecode. + + This can be interpreted as presentation time stamp of the last frame which was + decoded by calling `read` with advance=True. + + This method will always return 0 (e.g. be equal to `base_timecode`) if no frames + have been `read`.""" + if self.frame_number < 1: + return self.base_timecode + return self.base_timecode + (self.frame_number - 1) + + @property + def position_ms(self) -> float: + """Current position within stream as a float of the presentation time in milliseconds. + The first frame has a time of 0.0 ms. + + This method will always return 0.0 if no frames have been `read`.""" + return self._cap.get(cv2.CAP_PROP_POS_MSEC) + + @property + def frame_number(self) -> int: + """Current position within stream in frames as an int. + + 1 indicates the first frame was just decoded by the last call to `read` with advance=True, + whereas 0 indicates that no frames have been `read`. + + This method will always return 0 if no frames have been `read`.""" + return math.trunc(self._cap.get(cv2.CAP_PROP_POS_FRAMES)) + + def seek(self, target: Union[FrameTimecode, float, int]): + """Seek to the given timecode. If given as a frame number, represents the current seek + pointer (e.g. if seeking to 0, the next frame decoded will be the first frame of the video). + + For 1-based indices (first frame is frame #1), the target frame number needs to be converted + to 0-based by subtracting one. For example, if we want to seek to the first frame, we call + seek(0) followed by read(). If we want to seek to the 5th frame, we call seek(4) followed + by read(), at which point frame_number will be 5. + + Not supported if the VideoStream is a device/camera. Untested with web streams. + + Arguments: + target: Target position in video stream to seek to. + If float, interpreted as time in seconds. + If int, interpreted as frame number. + Raises: + SeekError: An error occurs while seeking, or seeking is not supported. + ValueError: `target` is not a valid value (i.e. it is negative). + """ + if self._is_device: + raise SeekError("Cannot seek if input is a device!") + if target < 0: + raise ValueError("Target seek position cannot be negative!") + + # Have to seek one behind and call grab() after to that the VideoCapture + # returns a valid timestamp when using CAP_PROP_POS_MSEC. + target_frame_cv2 = (self.base_timecode + target).get_frames() + if target_frame_cv2 > 0: + target_frame_cv2 -= 1 + self._cap.set(cv2.CAP_PROP_POS_FRAMES, target_frame_cv2) + self._has_grabbed = False + # Preemptively grab the frame behind the target position if possible. + if target > 0: + self._has_grabbed = self._cap.grab() + # If we seeked past the end of the video, need to seek one frame backwards + # from the current position and grab that frame instead. + if not self._has_grabbed: + seek_pos = round(self._cap.get(cv2.CAP_PROP_POS_FRAMES) - 1.0) + self._cap.set(cv2.CAP_PROP_POS_FRAMES, max(0, seek_pos)) + self._has_grabbed = self._cap.grab() + + def reset(self): + """ Close and re-open the VideoStream (should be equivalent to calling `seek(0)`). """ + self._cap.release() + self._open_capture(self._frame_rate) + + def read(self, decode: bool = True, advance: bool = True) -> Union[ndarray, bool]: + """Read and decode the next frame as a numpy.ndarray. Returns False when video ends, + or the maximum number of decode attempts has passed. + + Arguments: + decode: Decode and return the frame. + advance: Seek to the next frame. If False, will return the current (last) frame. + + Returns: + If decode = True, the decoded frame (numpy.ndarray), or False (bool) if end of video. + If decode = False, a bool indicating if advancing to the the next frame succeeded. + """ + if not self._cap.isOpened(): + return False + # Grab the next frame if possible. + if advance: + has_grabbed = self._cap.grab() + # If we failed to grab the frame, retry a few times if required. + if not has_grabbed: + if self.duration > 0 and self.position < (self.duration - 1): + for _ in range(self._max_decode_attempts): + has_grabbed = self._cap.grab() + if has_grabbed: + break + # Report previous failure in debug mode. + if has_grabbed: + self._decode_failures += 1 + logger.debug('Frame failed to decode.') + if not self._warning_displayed and self._decode_failures > 1: + logger.warning('Failed to decode some frames, results may be inaccurate.') + # We didn't manage to grab a frame even after retrying, so just return. + if not has_grabbed: + return False + self._has_grabbed = True + # Need to make sure we actually grabbed a frame before calling retrieve. + if decode and self._has_grabbed: + _, frame = self._cap.retrieve() + return frame + return self._has_grabbed + + # + # Private Methods + # + + def _open_capture(self, framerate: Optional[float] = None): + """Opens capture referenced by this object and resets internal state.""" + if self._is_device and self._path_or_device < 0: + raise ValueError('Invalid/negative device ID specified.') + input_is_video_file = not self._is_device and not any( + identifier in self._path_or_device for identifier in NON_VIDEO_FILE_INPUT_IDENTIFIERS) + # We don't have a way of querying why opening a video fails (errors are logged at least), + # so provide a better error message if we try to open a file that doesn't exist. + if input_is_video_file: + if not os.path.exists(self._path_or_device): + raise OSError('Video file not found.') + + cap = cv2.VideoCapture(self._path_or_device) + if not cap.isOpened(): + raise VideoOpenFailure( + 'Ensure file is valid video and system dependencies are up to date.\n') + + # Display an error if the video codec type seems unsupported (#86) as this indicates + # potential video corruption, or may explain missing frames. We only perform this check + # for video files on-disk (skipped for devices, image sequences, streams, etc...). + codec_unsupported: bool = (int(abs(cap.get(cv2.CAP_PROP_FOURCC))) == 0) + if codec_unsupported and input_is_video_file: + logger.error('Video codec detection failed. If output is incorrect:\n' + ' - Re-encode the input video with ffmpeg\n' + ' - Update OpenCV (pip install --upgrade opencv-python)\n' + ' - Use the PyAV backend (--backend pyav)\n' + 'For details, see https://github.com/Breakthrough/PySceneDetect/issues/86') + + # Ensure the framerate is correct to avoid potential divide by zero errors. This can be + # addressed in the PyAV backend if required since it supports integer timebases. + assert framerate is None or framerate > MAX_FPS_DELTA, "Framerate must be validated if set!" + if framerate is None: + framerate = cap.get(cv2.CAP_PROP_FPS) + if framerate < MAX_FPS_DELTA: + raise FrameRateUnavailable() + + self._cap = cap + self._frame_rate = framerate + self._has_grabbed = False + + +# TODO(#168): Support non-monotonic timing for `position`. VFR timecode support is a +# prerequisite for this. Timecodes are currently calculated by multiplying the framerate +# by number of frames. Actual elapsed time can be obtained via `position_ms` for now. +class VideoCaptureAdapter(VideoStream): + """Adapter for existing VideoCapture objects. Unlike VideoStreamCv2, this class supports + VideoCaptures which may not support seeking. + """ + + def __init__( + self, + cap: cv2.VideoCapture, + framerate: Optional[float] = None, + max_read_attempts: int = 5, + ): + """Create from an existing OpenCV VideoCapture object. Used for webcams, live streams, + pipes, or other inputs which may not support seeking. + + Arguments: + cap: The `cv2.VideoCapture` object to wrap. Must already be opened and ready to + have `cap.read()` called on it. + framerate: If set, overrides the detected framerate. + max_read_attempts: Number of attempts to continue decoding the video + after a frame fails to decode. This allows processing videos that + have a few corrupted frames or metadata (in which case accuracy + of detection algorithms may be lower). Once this limit is passed, + decoding will stop and emit an error. + + Raises: + ValueError: capture is not open, framerate or max_read_attempts is invalid + """ + super().__init__() + + if framerate is not None and framerate < MAX_FPS_DELTA: + raise ValueError('Specified framerate (%f) is invalid!' % framerate) + if max_read_attempts < 0: + raise ValueError('Maximum decode attempts must be >= 0!') + if not cap.isOpened(): + raise ValueError('Specified VideoCapture must already be opened!') + if framerate is None: + framerate = cap.get(cv2.CAP_PROP_FPS) + if framerate < MAX_FPS_DELTA: + raise FrameRateUnavailable() + + self._cap = cap + self._frame_rate: float = framerate + self._num_frames = 0 + self._max_read_attempts = max_read_attempts + self._decode_failures = 0 + self._warning_displayed = False + self._time_base: float = 0.0 + + # + # Backend-Specific Methods/Properties + # + + @property + def capture(self) -> cv2.VideoCapture: + """Returns reference to underlying VideoCapture object. Use with caution. + + Prefer to use this property only to take ownership of the underlying cv2.VideoCapture object + backing this object. Using the read/grab methods through this property are unsupported and + will leave this object in an inconsistent state. + """ + assert self._cap + return self._cap + + # + # VideoStream Methods/Properties + # + + BACKEND_NAME = 'opencv_adapter' + """Unique name used to identify this backend.""" + + @property + def frame_rate(self) -> float: + """Framerate in frames/sec.""" + assert self._frame_rate + return self._frame_rate + + @property + def path(self) -> str: + """Always 'CAP_ADAPTER'.""" + return 'CAP_ADAPTER' + + @property + def name(self) -> str: + """Always 'CAP_ADAPTER'.""" + return 'CAP_ADAPTER' + + @property + def is_seekable(self) -> bool: + """Always False, as the underlying VideoCapture is assumed to not support seeking.""" + return False + + @property + def frame_size(self) -> Tuple[int, int]: + """Reported size of each video frame in pixels as a tuple of (width, height).""" + return (math.trunc(self._cap.get(cv2.CAP_PROP_FRAME_WIDTH)), + math.trunc(self._cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) + + @property + def duration(self) -> Optional[FrameTimecode]: + """Always None, as the underlying VideoCapture is assumed to not have a known duration.""" + None + + @property + def aspect_ratio(self) -> float: + """Display/pixel aspect ratio as a float (1.0 represents square pixels).""" + return _get_aspect_ratio(self._cap) + + @property + def position(self) -> FrameTimecode: + """Current position within stream as FrameTimecode. Use the :meth:`position_ms` + if an accurate duration of elapsed time is required, as `position` is currently + based off of the number of frames, and may not be accurate for devicesor live streams. + + This method will always return 0 (e.g. be equal to `base_timecode`) if no frames + have been `read`.""" + if self.frame_number < 1: + return self.base_timecode + return self.base_timecode + (self.frame_number - 1) + + @property + def position_ms(self) -> float: + """Current position within stream as a float of the presentation time in milliseconds. + The first frame has a time of 0.0 ms. + + This method will always return 0.0 if no frames have been `read`.""" + if self._num_frames == 0: + return 0.0 + return self._cap.get(cv2.CAP_PROP_POS_MSEC) - self._time_base + + @property + def frame_number(self) -> int: + """Current position within stream in frames as an int. + + 1 indicates the first frame was just decoded by the last call to `read` with advance=True, + whereas 0 indicates that no frames have been `read`. + + This method will always return 0 if no frames have been `read`.""" + return self._num_frames + + def seek(self, target: Union[FrameTimecode, float, int]): + """The underlying VideoCapture is assumed to not support seeking.""" + raise NotImplementedError("Seeking is not supported.") + + def reset(self): + """Not supported.""" + raise NotImplementedError("Reset is not supported.") + + def read(self, decode: bool = True, advance: bool = True) -> Union[ndarray, bool]: + """Read and decode the next frame as a numpy.ndarray. Returns False when video ends, + or the maximum number of decode attempts has passed. + + Arguments: + decode: Decode and return the frame. + advance: Seek to the next frame. If False, will return the current (last) frame. + + Returns: + If decode = True, the decoded frame (numpy.ndarray), or False (bool) if end of video. + If decode = False, a bool indicating if advancing to the the next frame succeeded. + """ + if not self._cap.isOpened(): + return False + # Grab the next frame if possible. + if advance: + has_grabbed = self._cap.grab() + # If we failed to grab the frame, retry a few times if required. + if not has_grabbed: + for _ in range(self._max_read_attempts): + has_grabbed = self._cap.grab() + if has_grabbed: + break + # Report previous failure in debug mode. + if has_grabbed: + self._decode_failures += 1 + logger.debug('Frame failed to decode.') + if not self._warning_displayed and self._decode_failures > 1: + logger.warning('Failed to decode some frames, results may be inaccurate.') + # We didn't manage to grab a frame even after retrying, so just return. + if not has_grabbed: + return False + if self._num_frames == 0: + self._time_base = self._cap.get(cv2.CAP_PROP_POS_MSEC) + self._num_frames += 1 + # Need to make sure we actually grabbed a frame before calling retrieve. + if decode and self._num_frames > 0: + _, frame = self._cap.retrieve() + return frame + return True diff --git a/backend/scenedetect/backends/pyav.py b/backend/scenedetect/backends/pyav.py new file mode 100644 index 0000000..761b63f --- /dev/null +++ b/backend/scenedetect/backends/pyav.py @@ -0,0 +1,355 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +""":class:`VideoStreamAv` provides an adapter for the PyAV av.InputContainer object. + +Uses string identifier ``'pyav'``. +""" + +from logging import getLogger +from typing import AnyStr, BinaryIO, Optional, Tuple, Union + +# pylint: disable=c-extension-no-member +import av +from numpy import ndarray + +from scenedetect.frame_timecode import FrameTimecode, MAX_FPS_DELTA +from scenedetect.platform import get_file_name +from scenedetect.video_stream import VideoStream, VideoOpenFailure, FrameRateUnavailable + +logger = getLogger('pyscenedetect') + +VALID_THREAD_MODES = [ + av.codec.context.ThreadType.NONE, + av.codec.context.ThreadType.SLICE, + av.codec.context.ThreadType.FRAME, + av.codec.context.ThreadType.AUTO, +] + + +class VideoStreamAv(VideoStream): + """PyAV `av.InputContainer` backend.""" + + # TODO: Investigate adding an accurate_duration option to backends to calculate the duration + # with higher precision. Sometimes it doesn't exactly match what the codec or VLC reports, + # but we can try to seek to the end of the video first to determine it. Investigate how VLC + # calculates the end time. + def __init__( + self, + path_or_io: Union[AnyStr, BinaryIO], + framerate: Optional[float] = None, + name: Optional[str] = None, + threading_mode: Optional[str] = None, + suppress_output: bool = False, + ): + """Open a video by path. + + .. warning:: + + Using `threading_mode` with `suppress_output = True` can cause lockups in your + application. See the PyAV documentation for details: + https://pyav.org/docs/stable/overview/caveats.html#sub-interpeters + + Arguments: + path_or_io: Path to the video, or a file-like object. + framerate: If set, overrides the detected framerate. + name: Overrides the `name` property derived from the video path. Should be set if + `path_or_io` is a file-like object. + threading_mode: The PyAV video stream `thread_type`. See av.codec.context.ThreadType + for valid threading modes ('AUTO', 'FRAME', 'NONE', and 'SLICE'). If this mode is + 'AUTO' or 'FRAME' and not all frames have been decoded, the video will be reopened + if seekable, and the remaining frames decoded in single-threaded mode. + suppress_output: If False, ffmpeg output will be sent to stdout/stderr by calling + `av.logging.restore_default_callback()` before any other library calls. If True + the application may deadlock if threading_mode is set. See the PyAV documentation + for details: https://pyav.org/docs/stable/overview/caveats.html#sub-interpeters + + Raises: + OSError: file could not be found or access was denied + VideoOpenFailure: video could not be opened (may be corrupted) + ValueError: specified framerate is invalid + """ + self._container = None + + # TODO(#258): See what self._container.discard_corrupt = True does with corrupt videos. + super().__init__() + + # Ensure specified framerate is valid if set. + if framerate is not None and framerate < MAX_FPS_DELTA: + raise ValueError('Specified framerate (%f) is invalid!' % framerate) + + self._name = '' if name is None else name + self._path = '' + self._frame = None + self._reopened = True + + if threading_mode: + threading_mode = threading_mode.upper() + if not threading_mode in VALID_THREAD_MODES: + raise ValueError('Invalid threading mode! Must be one of: %s' % VALID_THREAD_MODES) + + if not suppress_output: + logger.debug('Restoring default ffmpeg log callbacks.') + av.logging.restore_default_callback() + + try: + if isinstance(path_or_io, (str, bytes)): + self._path = path_or_io + self._io = open(path_or_io, 'rb') + if not self._name: + self._name = get_file_name(self.path, include_extension=False) + else: + self._io = path_or_io + + self._container = av.open(self._io) + if threading_mode is not None: + self._video_stream.thread_type = threading_mode + self._reopened = False + logger.debug('Threading mode set: %s', threading_mode) + except OSError: + raise + except Exception as ex: + raise VideoOpenFailure(str(ex)) from ex + + if framerate is None: + # Calculate framerate from video container. `guessed_rate` below appears in PyAV 9. + frame_rate = self._video_stream.guessed_rate if hasattr( + self._video_stream, 'guessed_rate') else self._codec_context.framerate + if frame_rate is None or frame_rate == 0: + raise FrameRateUnavailable() + # TODO: Refactor FrameTimecode to support raw timing rather than framerate based calculations. + # See https://pyav.org/docs/develop/api/stream.html for details. + frame_rate = frame_rate.numerator / float(frame_rate.denominator) + if frame_rate < MAX_FPS_DELTA: + raise FrameRateUnavailable() + self._frame_rate: float = frame_rate + else: + assert framerate >= MAX_FPS_DELTA + self._frame_rate: float = framerate + + # Calculate duration after we have set the framerate. + self._duration_frames = self._get_duration() + + def __del__(self): + if self._container is not None: + self._container.close() + + # + # VideoStream Methods/Properties + # + + BACKEND_NAME = 'pyav' + """Unique name used to identify this backend.""" + + @property + def path(self) -> Union[bytes, str]: + """Video path.""" + return self._path + + @property + def name(self) -> Union[bytes, str]: + """Name of the video, without extension.""" + return self._name + + @property + def is_seekable(self) -> bool: + """True if seek() is allowed, False otherwise.""" + return self._io.seekable() + + @property + def frame_size(self) -> Tuple[int, int]: + """Size of each video frame in pixels as a tuple of (width, height).""" + return (self._codec_context.width, self._codec_context.height) + + @property + def duration(self) -> FrameTimecode: + """Duration of the video as a FrameTimecode.""" + return self.base_timecode + self._duration_frames + + @property + def frame_rate(self) -> float: + """Frame rate in frames/sec.""" + return self._frame_rate + + @property + def position(self) -> FrameTimecode: + """Current position within stream as FrameTimecode. + + This can be interpreted as presentation time stamp, thus frame 1 corresponds + to the presentation time 0. Returns 0 even if `frame_number` is 1.""" + if self._frame is None: + return self.base_timecode + return FrameTimecode(round(self._frame.time * self.frame_rate), self.frame_rate) + + @property + def position_ms(self) -> float: + """Current position within stream as a float of the presentation time in + milliseconds. The first frame has a PTS of 0.""" + if self._frame is None: + return 0.0 + return self._frame.time * 1000.0 + + @property + def frame_number(self) -> int: + """Current position within stream as the frame number. + + Will return 0 until the first frame is `read`.""" + if self._frame: + return self.position.frame_num + 1 + return 0 + + @property + def aspect_ratio(self) -> float: + """Pixel aspect ratio as a float (1.0 represents square pixels).""" + display_aspect_ratio = ( + self._codec_context.display_aspect_ratio.numerator / + self._codec_context.display_aspect_ratio.denominator) + frame_aspect_ratio = self.frame_size[0] / self.frame_size[1] + return display_aspect_ratio / frame_aspect_ratio + + def seek(self, target: Union[FrameTimecode, float, int]) -> None: + """Seek to the given timecode. If given as a frame number, represents the current seek + pointer (e.g. if seeking to 0, the next frame decoded will be the first frame of the video). + + For 1-based indices (first frame is frame #1), the target frame number needs to be converted + to 0-based by subtracting one. For example, if we want to seek to the first frame, we call + seek(0) followed by read(). If we want to seek to the 5th frame, we call seek(4) followed + by read(), at which point frame_number will be 5. + + May not be supported on all input codecs (see `is_seekable`). + + Arguments: + target: Target position in video stream to seek to. + If float, interpreted as time in seconds. + If int, interpreted as frame number. + Raises: + ValueError: `target` is not a valid value (i.e. it is negative). + """ + if target < 0: + raise ValueError("Target cannot be negative!") + beginning = (target == 0) + target = (self.base_timecode + target) + if target >= 1: + target = target - 1 + target_pts = self._video_stream.start_time + int( + (self.base_timecode + target).get_seconds() / self._video_stream.time_base) + self._frame = None + self._container.seek(target_pts, stream=self._video_stream) + if not beginning: + self.read(decode=False, advance=True) + while self.position < target: + if self.read(decode=False, advance=True) is False: + break + + def reset(self): + """ Close and re-open the VideoStream (should be equivalent to calling `seek(0)`). """ + self._container.close() + self._frame = None + try: + self._container = av.open(self._path if self._path else self._io) + except Exception as ex: + raise VideoOpenFailure() from ex + + def read(self, decode: bool = True, advance: bool = True) -> Union[ndarray, bool]: + """Read and decode the next frame as a numpy.ndarray. Returns False when video ends. + + Arguments: + decode: Decode and return the frame. + advance: Seek to the next frame. If False, will return the current (last) frame. + + Returns: + If decode = True, the decoded frame (numpy.ndarray), or False (bool) if end of video. + If decode = False, a bool indicating if advancing to the the next frame succeeded. + """ + has_advanced = False + if advance: + try: + last_frame = self._frame + self._frame = next(self._container.decode(video=0)) + except av.error.EOFError: + self._frame = last_frame + if self._handle_eof(): + return self.read(decode, advance=True) + return False + except StopIteration: + return False + has_advanced = True + if decode: + return self._frame.to_ndarray(format='bgr24') + return has_advanced + + # + # Private Methods/Properties + # + + @property + def _video_stream(self): + """PyAV `av.video.stream.VideoStream` being used.""" + return self._container.streams.video[0] + + @property + def _codec_context(self): + """PyAV `av.codec.context.CodecContext` being used.""" + return self._video_stream.codec_context + + def _get_duration(self) -> int: + """Get video duration as number of frames based on the video and set framerate.""" + # See https://pyav.org/docs/develop/api/time.html for details on how ffmpeg/PyAV + # handle time calculations internally and which time base to use. + assert self.frame_rate is not None, "Frame rate must be set before calling _get_duration!" + # See if we can obtain the number of frames directly from the stream itself. + if self._video_stream.frames > 0: + return self._video_stream.frames + # Calculate based on the reported container duration. + duration_sec = None + container = self._video_stream.container + if container.duration is not None and container.duration > 0: + # Containers use AV_TIME_BASE as the time base. + duration_sec = float(self._video_stream.container.duration / av.time_base) + # Lastly, if that calculation fails, try to calculate it based on the stream duration. + if duration_sec is None or duration_sec < MAX_FPS_DELTA: + if self._video_stream.duration is None: + logger.warning('Video duration unavailable.') + return 0 + # Streams use stream `time_base` as the time base. + time_base = self._video_stream.time_base + if time_base.denominator == 0: + logger.warning( + 'Unable to calculate video duration: time_base (%s) has zero denominator!', + str(time_base)) + return 0 + duration_sec = float(self._video_stream.duration / time_base) + return round(duration_sec * self.frame_rate) + + def _handle_eof(self): + """Fix for issue where if thread_type is 'AUTO' the whole video is not decoded. + + Re-open video if the threading mode is AUTO and we didn't decode all of the frames.""" + # Don't re-open the video if we already did, or if we already decoded all the frames. + if self._reopened or self.frame_number >= self.duration: + return False + self._reopened = True + # Don't re-open the video if we can't seek or aren't in AUTO/FRAME thread_type mode. + if not self.is_seekable or not self._video_stream.thread_type in ('AUTO', 'FRAME'): + return False + last_frame = self.frame_number + orig_pos = self._io.tell() + try: + self._io.seek(0) + container = av.open(self._io) + except: + self._io.seek(orig_pos) + raise + self._container.close() + self._container = container + self.seek(last_frame) + return True diff --git a/backend/scenedetect/detectors/__init__.py b/backend/scenedetect/detectors/__init__.py new file mode 100644 index 0000000..92e1725 --- /dev/null +++ b/backend/scenedetect/detectors/__init__.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +"""``scenedetect.detectors`` Module + +This module contains the following scene detection algorithms: + + * :mod:`ContentDetector `: + Detects shot changes by considering pixel changes in the HSV colorspace. + + * :mod:`ThresholdDetector `: + Detects transitions below a set pixel intensity (cuts or fades to black). + + * :mod:`AdaptiveDetector `: + Two-pass version of `ContentDetector` that handles fast camera movement better in some cases. + +Detection algorithms are created by implementing the +:class:`SceneDetector ` interface. Detectors are +typically attached to a :class:`SceneManager ` when +processing videos, however they can also be used to process frames directly. +""" + +# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # +# # +# Detection Methods & Algorithms Planned or In Development # +# # +# +# class EdgeDetector(SceneDetector): +# """Detects fast cuts/slow fades by using edge detection on adjacent frames. +# +# Computes the difference image between subsequent frames after applying a +# Sobel filter (can also use a high-pass or other edge detection filters) and +# comparing the result with a set threshold (may be found using -stats mode). +# Detects both fast cuts and slow fades, although some parameters may need to +# be modified for accurate slow fade detection. +# """ +# def __init__(self): +# super(EdgeDetector, self).__init__() +# # +# # +# class DissolveDetector(SceneDetector): +# """Detects slow fades (dissolve cuts) via changes in the HSV colour space. +# +# Detects slow fades only; to scene_detect fast cuts between content scenes, the +# ContentDetector should be used instead. +# """ +# +# def __init__(self): +# super(DissolveDetector, self).__init__() +# # +# # +# class HistogramDetector(SceneDetector): +# """Detects fast cuts via histogram changes between sequential frames. +# +# Detects fast cuts between content (using histogram deltas, much like the +# ContentDetector uses HSV colourspace deltas), as well as both fades and +# cuts to/from black (using a threshold, much like the ThresholdDetector). +# """ +# +# def __init__(self): +# super(DissolveDetector, self).__init__() +# # +# # +# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + +# PySceneDetect Detection Algorithm Imports +from backend.scenedetect.detectors.content_detector import ContentDetector +from backend.scenedetect.detectors.threshold_detector import ThresholdDetector +from backend.scenedetect.detectors.adaptive_detector import AdaptiveDetector + +# Algorithms being ported: +#from scenedetect.detectors.motion_detector import MotionDetector diff --git a/backend/scenedetect/detectors/adaptive_detector.py b/backend/scenedetect/detectors/adaptive_detector.py new file mode 100644 index 0000000..ac56155 --- /dev/null +++ b/backend/scenedetect/detectors/adaptive_detector.py @@ -0,0 +1,184 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +""":class:`AdaptiveDetector` compares the difference in content between adjacent frames similar +to `ContentDetector` except the threshold isn't fixed, but is a rolling average of adjacent frame +changes. This can help mitigate false detections in situations such as fast camera motions. + +This detector is available from the command-line as the `scene_detect-adaptive` command. +""" + +from logging import getLogger +from typing import List, Optional + +from numpy import ndarray + +from backend.scenedetect.detectors import ContentDetector + +logger = getLogger('pyscenedetect') + + +class AdaptiveDetector(ContentDetector): + """Two-pass detector that calculates frame scores with ContentDetector, and then applies + a rolling average when processing the result that can help mitigate false detections + in situations such as camera movement. + """ + + ADAPTIVE_RATIO_KEY_TEMPLATE = "adaptive_ratio{luma_only} (w={window_width})" + + def __init__( + self, + adaptive_threshold: float = 3.0, + min_scene_len: int = 15, + window_width: int = 2, + min_content_val: float = 15.0, + weights: ContentDetector.Components = ContentDetector.DEFAULT_COMPONENT_WEIGHTS, + luma_only: bool = False, + kernel_size: Optional[int] = None, + video_manager=None, + min_delta_hsv: Optional[float] = None, + ): + """ + Arguments: + adaptive_threshold: Threshold (float) that score ratio must exceed to trigger a + new scene (see frame metric adaptive_ratio in stats file). + min_scene_len: Minimum length of any scene. + window_width: Size of window (number of frames) before and after each frame to + average together in order to scene_detect deviations from the mean. Must be at least 1. + min_content_val: Minimum threshold (float) that the content_val must exceed in order to + register as a new scene. This is calculated the same way that `scene_detect-content` + calculates frame score based on `weights`/`luma_only`/`kernel_size`. + weights: Weight to place on each component when calculating frame score + (`content_val` in a statsfile, the value `threshold` is compared against). + If omitted, the default ContentDetector weights are used. + luma_only: If True, only considers changes in the luminance channel of the video. + Equivalent to specifying `weights` as :data:`ContentDetector.LUMA_ONLY`. + Overrides `weights` if both are set. + kernel_size: Size of kernel to use for post edge detection filtering. If None, + automatically set based on video resolution. + video_manager: [DEPRECATED] DO NOT USE. For backwards compatibility only. + min_delta_hsv: [DEPRECATED] DO NOT USE. Use `min_content_val` instead. + """ + # TODO(v0.7): Replace with DeprecationWarning that `video_manager` and `min_delta_hsv` will + # be removed in v0.8. + if video_manager is not None: + logger.error('video_manager is deprecated, use video instead.') + if min_delta_hsv is not None: + logger.error('min_delta_hsv is deprecated, use min_content_val instead.') + min_content_val = min_delta_hsv + if window_width < 1: + raise ValueError('window_width must be at least 1.') + + super().__init__( + threshold=255.0, + min_scene_len=0, + weights=weights, + luma_only=luma_only, + kernel_size=kernel_size, + ) + + # TODO: Turn these options into properties. + self.min_scene_len = min_scene_len + self.adaptive_threshold = adaptive_threshold + self.min_content_val = min_content_val + self.window_width = window_width + + self._adaptive_ratio_key = AdaptiveDetector.ADAPTIVE_RATIO_KEY_TEMPLATE.format( + window_width=window_width, luma_only='' if not luma_only else '_lum') + self._first_frame_num = None + self._last_frame_num = None + + self._last_cut: Optional[int] = None + + self._buffer = [] + + @property + def event_buffer_length(self) -> int: + """Number of frames any detected cuts will be behind the current frame due to buffering.""" + return self.window_width + + def get_metrics(self) -> List[str]: + """Combines base ContentDetector metric keys with the AdaptiveDetector one.""" + return super().get_metrics() + [self._adaptive_ratio_key] + + def stats_manager_required(self) -> bool: + """Not required for AdaptiveDetector.""" + return False + + def process_frame(self, frame_num: int, frame_img: Optional[ndarray]) -> List[int]: + """ Similar to ThresholdDetector, but using the HSV colour space DIFFERENCE instead + of single-frame RGB/grayscale intensity (thus cannot scene_detect slow fades with this method). + + Arguments: + frame_num: Frame number of frame that is being passed. + + frame_img: Decoded frame image (numpy.ndarray) to perform scene + detection on. Can be None *only* if the self.is_processing_required() method + (inhereted from the base SceneDetector class) returns True. + + Returns: + Empty list + """ + + # TODO(#283): Merge this with ContentDetector and turn it on by default. + + super().process_frame(frame_num=frame_num, frame_img=frame_img) + + required_frames = 1 + (2 * self.window_width) + self._buffer.append((frame_num, self._frame_score)) + if not len(self._buffer) >= required_frames: + return [] + self._buffer = self._buffer[-required_frames:] + target = self._buffer[self.window_width] + average_window_score = ( + sum(frame[1] for i, frame in enumerate(self._buffer) if i != self.window_width) / + (2.0 * self.window_width)) + + average_is_zero = abs(average_window_score) < 0.00001 + + adaptive_ratio = 0.0 + if not average_is_zero: + adaptive_ratio = min(target[1] / average_window_score, 255.0) + elif average_is_zero and target[1] >= self.min_content_val: + # if we would have divided by zero, set adaptive_ratio to the max (255.0) + adaptive_ratio = 255.0 + if self.stats_manager is not None: + self.stats_manager.set_metrics(target[0], {self._adaptive_ratio_key: adaptive_ratio}) + + cut_list = [] + # Check to see if adaptive_ratio exceeds the adaptive_threshold as well as there + # being a large enough content_val to trigger a cut + if (adaptive_ratio >= self.adaptive_threshold and target[1] >= self.min_content_val): + + if self._last_cut is None: + # No previously detected cuts + cut_list.append(target[0]) + self._last_cut = target[0] + elif (target[0] - self._last_cut) >= self.min_scene_len: + # Respect the min_scene_len parameter + cut_list.append(target[0]) + # TODO: Should this be updated every time the threshold is exceeded? + # It might help with flash suppression for example. + self._last_cut = target[0] + + return cut_list + + # TODO(0.6.3): Deprecate & remove this method. + def get_content_val(self, frame_num: int) -> Optional[float]: + """Returns the average content change for a frame.""" + if self.stats_manager is not None: + return self.stats_manager.get_metrics(frame_num, [ContentDetector.FRAME_SCORE_KEY])[0] + return 0.0 + + def post_process(self, _unused_frame_num: int): + """Not required for AdaptiveDetector.""" + return [] diff --git a/backend/scenedetect/detectors/content_detector.py b/backend/scenedetect/detectors/content_detector.py new file mode 100644 index 0000000..cc8be84 --- /dev/null +++ b/backend/scenedetect/detectors/content_detector.py @@ -0,0 +1,259 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +""":class:`ContentDetector` compares the difference in content between adjacent frames against a +set threshold/score, which if exceeded, triggers a scene cut. + +This detector is available from the command-line as the `scene_detect-content` command. +""" +from dataclasses import dataclass +import math +from typing import List, NamedTuple, Optional + +import numpy +import cv2 + +from backend.scenedetect.scene_detector import SceneDetector + + +def _mean_pixel_distance(left: numpy.ndarray, right: numpy.ndarray) -> float: + """Return the mean average distance in pixel values between `left` and `right`. + Both `left and `right` should be 2 dimensional 8-bit images of the same shape. + """ + assert len(left.shape) == 2 and len(right.shape) == 2 + assert left.shape == right.shape + num_pixels: float = float(left.shape[0] * left.shape[1]) + return (numpy.sum(numpy.abs(left.astype(numpy.int32) - right.astype(numpy.int32))) / num_pixels) + + +def _estimated_kernel_size(frame_width: int, frame_height: int) -> int: + """Estimate kernel size based on video resolution.""" + # TODO: This equation is based on manual estimation from a few videos. + # Create a more comprehensive test suite to optimize against. + size: int = 4 + round(math.sqrt(frame_width * frame_height) / 192) + if size % 2 == 0: + size += 1 + return size + + +class ContentDetector(SceneDetector): + """Detects fast cuts using changes in colour and intensity between frames. + + Since the difference between frames is used, unlike the ThresholdDetector, + only fast cuts are detected with this method. To scene_detect slow fades between + content scenes still using HSV information, use the DissolveDetector. + """ + + # TODO: Come up with some good weights for a new default if there is one that can pass + # a wider variety of test cases. + class Components(NamedTuple): + """Components that make up a frame's score, and their default values.""" + delta_hue: float = 1.0 + """Difference between pixel hue values of adjacent frames.""" + delta_sat: float = 1.0 + """Difference between pixel saturation values of adjacent frames.""" + delta_lum: float = 1.0 + """Difference between pixel luma (brightness) values of adjacent frames.""" + delta_edges: float = 0.0 + """Difference between calculated edges of adjacent frames. + + Edge differences are typically larger than the other components, so the detection + threshold may need to be adjusted accordingly.""" + + DEFAULT_COMPONENT_WEIGHTS = Components() + """Default component weights. Actual default values are specified in :class:`Components` + to allow adding new components without breaking existing usage.""" + + LUMA_ONLY_WEIGHTS = Components( + delta_hue=0.0, + delta_sat=0.0, + delta_lum=1.0, + delta_edges=0.0, + ) + """Component weights to use if `luma_only` is set.""" + + FRAME_SCORE_KEY = 'content_val' + """Key in statsfile representing the final frame score after weighed by specified components.""" + + METRIC_KEYS = [FRAME_SCORE_KEY, *Components._fields] + """All statsfile keys this detector produces.""" + + @dataclass + class _FrameData: + """Data calculated for a given frame.""" + hue: numpy.ndarray + """Frame hue map [2D 8-bit].""" + sat: numpy.ndarray + """Frame saturation map [2D 8-bit].""" + lum: numpy.ndarray + """Frame luma/brightness map [2D 8-bit].""" + edges: Optional[numpy.ndarray] + """Frame edge map [2D 8-bit, edges are 255, non edges 0]. Affected by `kernel_size`.""" + + def __init__( + self, + threshold: float = 27.0, + min_scene_len: int = 15, + weights: 'ContentDetector.Components' = DEFAULT_COMPONENT_WEIGHTS, + luma_only: bool = False, + kernel_size: Optional[int] = None, + ): + """ + Arguments: + threshold: Threshold the average change in pixel intensity must exceed to trigger a cut. + min_scene_len: Once a cut is detected, this many frames must pass before a new one can + be added to the scene list. + weights: Weight to place on each component when calculating frame score + (`content_val` in a statsfile, the value `threshold` is compared against). + luma_only: If True, only considers changes in the luminance channel of the video. + Equivalent to specifying `weights` as :data:`ContentDetector.LUMA_ONLY`. + Overrides `weights` if both are set. + kernel_size: Size of kernel for expanding detected edges. Must be odd integer + greater than or equal to 3. If None, automatically set using video resolution. + """ + super().__init__() + self._threshold: float = threshold + self._min_scene_len: int = min_scene_len + self._last_scene_cut: Optional[int] = None + self._last_frame: Optional[ContentDetector._FrameData] = None + self._weights: ContentDetector.Components = weights + if luma_only: + self._weights = ContentDetector.LUMA_ONLY_WEIGHTS + self._kernel: Optional[numpy.ndarray] = None + if kernel_size is not None: + print(kernel_size) + if kernel_size < 3 or kernel_size % 2 == 0: + raise ValueError('kernel_size must be odd integer >= 3') + self._kernel = numpy.ones((kernel_size, kernel_size), numpy.uint8) + self._frame_score: Optional[float] = None + + def get_metrics(self): + return ContentDetector.METRIC_KEYS + + def is_processing_required(self, frame_num): + return True + + def _calculate_frame_score(self, frame_num: int, frame_img: numpy.ndarray) -> float: + """Calculate score representing relative amount of motion in `frame_img` compared to + the last time the function was called (returns 0.0 on the first call).""" + # TODO: Add option to enable motion estimation before calculating score components. + # TODO: Investigate methods of performing cheaper alternatives, e.g. shifting or resizing + # the frame to simulate camera movement, using optical flow, etc... + + # Convert image into HSV colorspace. + hue, sat, lum = cv2.split(cv2.cvtColor(frame_img, cv2.COLOR_BGR2HSV)) + + # Performance: Only calculate edges if we have to. + calculate_edges: bool = ((self._weights.delta_edges > 0.0) + or self.stats_manager is not None) + edges = self._detect_edges(lum) if calculate_edges else None + + if self._last_frame is None: + # Need another frame to compare with for score calculation. + self._last_frame = ContentDetector._FrameData(hue, sat, lum, edges) + return 0.0 + + score_components = ContentDetector.Components( + delta_hue=_mean_pixel_distance(hue, self._last_frame.hue), + delta_sat=_mean_pixel_distance(sat, self._last_frame.sat), + delta_lum=_mean_pixel_distance(lum, self._last_frame.lum), + delta_edges=(0.0 if edges is None else _mean_pixel_distance( + edges, self._last_frame.edges)), + ) + + frame_score: float = ( + sum(component * weight for (component, weight) in zip(score_components, self._weights)) + / sum(abs(weight) for weight in self._weights)) + + # Record components and frame score if needed for analysis. + if self.stats_manager is not None: + metrics = {self.FRAME_SCORE_KEY: frame_score} + metrics.update(score_components._asdict()) + self.stats_manager.set_metrics(frame_num, metrics) + + # Store all data required to calculate the next frame's score. + self._last_frame = ContentDetector._FrameData(hue, sat, lum, edges) + return frame_score + + def process_frame(self, frame_num: int, frame_img: numpy.ndarray) -> List[int]: + """ Similar to ThresholdDetector, but using the HSV colour space DIFFERENCE instead + of single-frame RGB/grayscale intensity (thus cannot scene_detect slow fades with this method). + + Arguments: + frame_num: Frame number of frame that is being passed. + frame_img: Decoded frame image (numpy.ndarray) to perform scene + detection on. Can be None *only* if the self.is_processing_required() method + (inhereted from the base SceneDetector class) returns True. + + Returns: + List of frames where scene cuts have been detected. There may be 0 + or more frames in the list, and not necessarily the same as frame_num. + """ + if frame_img is None: + # TODO(0.6.3): Make frame_img a required argument in the interface. Log a warning + # that passing None is deprecated and results will be incorrect if this is the case. + return [] + + # Initialize last scene cut point at the beginning of the frames of interest. + if self._last_scene_cut is None: + self._last_scene_cut = frame_num + + self._frame_score = self._calculate_frame_score(frame_num, frame_img) + if self._frame_score is None: + return [] + + # We consider any frame over the threshold a new scene, but only if + # the minimum scene length has been reached (otherwise it is ignored). + min_length_met = (frame_num - self._last_scene_cut) >= self._min_scene_len + if self._frame_score >= self._threshold and min_length_met: + self._last_scene_cut = frame_num + return [frame_num] + + return [] + + # TODO(#250): Based on the parameters passed to the ContentDetector constructor, + # ensure that the last scene meets the minimum length requirement, otherwise it + # should be merged with the previous scene. This can be done by caching the cuts + # for the amount of time the minimum length is set to, returning any outstanding + # final cuts in post_process. + + #def post_process(self, frame_num): + # """ + # return [] + + def _detect_edges(self, lum: numpy.ndarray) -> numpy.ndarray: + """Detect edges using the luma channel of a frame. + + Arguments: + lum: 2D 8-bit image representing the luma channel of a frame. + + Returns: + 2D 8-bit image of the same size as the input, where pixels with values of 255 + represent edges, and all other pixels are 0. + """ + # Initialize kernel. + if self._kernel is None: + kernel_size = _estimated_kernel_size(lum.shape[1], lum.shape[0]) + self._kernel = numpy.ones((kernel_size, kernel_size), numpy.uint8) + + # Estimate levels for thresholding. + # TODO(0.6.3): Add config file entries for sigma, aperture/kernel size, etc. + sigma: float = 1.0 / 3.0 + median = numpy.median(lum) + low = int(max(0, (1.0 - sigma) * median)) + high = int(min(255, (1.0 + sigma) * median)) + + # Calculate edges using Canny algorithm, and reduce noise by dilating the edges. + # This increases edge overlap leading to improved robustness against noise and slow + # camera movement. Note that very large kernel sizes can negatively affect accuracy. + edges = cv2.Canny(lum, low, high) + return cv2.dilate(edges, self._kernel) diff --git a/backend/scenedetect/detectors/motion_detector.py b/backend/scenedetect/detectors/motion_detector.py new file mode 100644 index 0000000..6636727 --- /dev/null +++ b/backend/scenedetect/detectors/motion_detector.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +""":class:`MotionDetector`, detects motion events using background subtraction, morphological +transforms, and thresholding.""" + +# Third-Party Library Imports +import cv2 + +# PySceneDetect Library Imports +from scenedetect.scene_detector import SparseSceneDetector + + +class MotionDetector(SparseSceneDetector): + """Detects motion events in scenes containing a static background. + + Uses background subtraction followed by noise removal (via morphological + opening) to generate a frame score compared against the set threshold. + + Attributes: + threshold: floating point value compared to each frame's score, which + represents average intensity change per pixel (lower values are + more sensitive to motion changes). Default 0.5, must be > 0.0. + num_frames_post_scene: Number of frames to include in each motion + event after the frame score falls below the threshold, adding any + subsequent motion events to the same scene. + kernel_size: Size of morphological opening kernel for noise removal. + Setting to -1 (default) will auto-compute based on video resolution + (typically 3 for SD, 5-7 for HD). Must be an odd integer > 1. + """ + + def __init__(self, threshold=0.50, num_frames_post_scene=30, kernel_size=-1): + """Initializes motion-based scene detector object.""" + # TODO: Requires porting to v0.5 API. + raise NotImplementedError() + """ + self.threshold = float(threshold) + self.num_frames_post_scene = int(num_frames_post_scene) + + self.kernel_size = int(kernel_size) + if self.kernel_size < 0: + # Set kernel size when process_frame first runs based on + # video resolution (480p = 3x3, 720p = 5x5, 1080p = 7x7). + pass + + self.bg_subtractor = cv2.createBackgroundSubtractorMOG2( + detectShadows = False ) + + self.last_frame_score = 0.0 + + self.in_motion_event = False + self.first_motion_frame_index = -1 + self.last_motion_frame_index = -1 + """ + + def process_frame(self, frame_num, frame_img): + # TODO. + """ + frame_grayscale = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + masked_frame = self.bg_subtractor.apply(frame_grayscale) + + kernel = numpy.ones((self.kernel_size, self.kernel_size), numpy.uint8) + filtered_frame = cv2.morphologyEx(fgmask, cv2.MORPH_OPEN, kernel) + + frame_score = numpy.sum(filtered_frame) / float( + filtered_frame.shape[0] * filtered_frame.shape[1] ) + """ + return [] + + def post_process(self, frame_num): + """Writes the last scene if the video ends while in a motion event. + """ + + # If the last fade detected was a fade out, we add a corresponding new + # scene break to indicate the end of the scene. This is only done for + # fade-outs, as a scene cut is already added when a fade-in is found. + """ + if self.in_motion_event: + # Write new scene based on first and last motion event frames. + pass + return self.in_motion_event + """ + return [] diff --git a/backend/scenedetect/detectors/threshold_detector.py b/backend/scenedetect/detectors/threshold_detector.py new file mode 100644 index 0000000..ff5ec75 --- /dev/null +++ b/backend/scenedetect/detectors/threshold_detector.py @@ -0,0 +1,203 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +""":class:`ThresholdDetector` uses a set intensity as a threshold to scene_detect cuts, which are +triggered when the average pixel intensity exceeds or falls below this threshold. + +This detector is available from the command-line as the `scene_detect-threshold` command. +""" + +from enum import Enum +from logging import getLogger +from typing import List, Optional + +import numpy + +from backend.scenedetect.scene_detector import SceneDetector + +logger = getLogger('pyscenedetect') + +## +## ThresholdDetector Helper Functions +## + + +def _compute_frame_average(frame: numpy.ndarray) -> float: + """Computes the average pixel value/intensity for all pixels in a frame. + + The value is computed by adding up the 8-bit R, G, and B values for + each pixel, and dividing by the number of pixels multiplied by 3. + + Arguments: + frame: Frame representing the RGB pixels to average. + + Returns: + Average pixel intensity across all 3 channels of `frame` + """ + num_pixel_values = float(frame.shape[0] * frame.shape[1] * frame.shape[2]) + avg_pixel_value = numpy.sum(frame[:, :, :]) / num_pixel_values + return avg_pixel_value + + +## +## ThresholdDetector Class Implementation +## + + +class ThresholdDetector(SceneDetector): + """Detects fast cuts/slow fades in from and out to a given threshold level. + + Detects both fast cuts and slow fades so long as an appropriate threshold + is chosen (especially taking into account the minimum grey/black level). + """ + + class Method(Enum): + """Method for ThresholdDetector to use when comparing frame brightness to the threshold.""" + FLOOR = 0 + """Fade out happens when frame brightness falls below threshold.""" + CEILING = 1 + """Fade out happens when frame brightness rises above threshold.""" + + THRESHOLD_VALUE_KEY = 'average_rgb' + + def __init__( + self, + threshold: float = 12, + min_scene_len: int = 15, + fade_bias: float = 0.0, + add_final_scene: bool = False, + method: Method = Method.FLOOR, + block_size=None, + ): + """ + Arguments: + threshold: 8-bit intensity value that each pixel value (R, G, and B) + must be <= to in order to trigger a fade in/out. + min_scene_len: FrameTimecode object or integer greater than 0 of the + minimum length, in frames, of a scene (or subsequent scene cut). + fade_bias: Float between -1.0 and +1.0 representing the percentage of + timecode skew for the start of a scene (-1.0 causing a cut at the + fade-to-black, 0.0 in the middle, and +1.0 causing the cut to be + right at the position where the threshold is passed). + add_final_scene: Boolean indicating if the video ends on a fade-out to + generate an additional scene at this timecode. + method: How to treat `threshold` when detecting fade events. + block_size: [DEPRECATED] DO NOT USE. For backwards compatibility. + """ + # TODO(v0.7): Replace with DeprecationWarning that `block_size` will be removed in v0.8. + if block_size is not None: + logger.error('block_size is deprecated.') + + super().__init__() + self.threshold = int(threshold) + self.method = ThresholdDetector.Method(method) + self.fade_bias = fade_bias + self.min_scene_len = min_scene_len + self.processed_frame = False + self.last_scene_cut = None + # Whether to add an additional scene or not when ending on a fade out + # (as cuts are only added on fade ins; see post_process() for details). + self.add_final_scene = add_final_scene + # Where the last fade (threshold crossing) was detected. + self.last_fade = { + 'frame': 0, # frame number where the last detected fade is + 'type': None # type of fade, can be either 'in' or 'out' + } + self._metric_keys = [ThresholdDetector.THRESHOLD_VALUE_KEY] + + def get_metrics(self) -> List[str]: + return self._metric_keys + + def process_frame(self, frame_num: int, frame_img: Optional[numpy.ndarray]) -> List[int]: + """ + Args: + frame_num (int): Frame number of frame that is being passed. + frame_img (numpy.ndarray or None): Decoded frame image (numpy.ndarray) to perform + scene detection with. Can be None *only* if the self.is_processing_required() + method (inhereted from the base SceneDetector class) returns True. + Returns: + List[int]: List of frames where scene cuts have been detected. There may be 0 + or more frames in the list, and not necessarily the same as frame_num. + """ + + # Initialize last scene cut point at the beginning of the frames of interest. + if self.last_scene_cut is None: + self.last_scene_cut = frame_num + + # Compare the # of pixels under threshold in current_frame & last_frame. + # If absolute value of pixel intensity delta is above the threshold, + # then we trigger a new scene cut/break. + + # List of cuts to return. + cut_list = [] + + # The metric used here to scene_detect scene breaks is the percent of pixels + # less than or equal to the threshold; however, since this differs on + # user-supplied values, we supply the average pixel intensity as this + # frame metric instead (to assist with manually selecting a threshold) + if (self.stats_manager is not None) and (self.stats_manager.metrics_exist( + frame_num, self._metric_keys)): + frame_avg = self.stats_manager.get_metrics(frame_num, self._metric_keys)[0] + else: + frame_avg = _compute_frame_average(frame_img) + if self.stats_manager is not None: + self.stats_manager.set_metrics(frame_num, {self._metric_keys[0]: frame_avg}) + + if self.processed_frame: + if self.last_fade['type'] == 'in' and (( + (self.method == ThresholdDetector.Method.FLOOR and frame_avg < self.threshold) or + (self.method == ThresholdDetector.Method.CEILING and frame_avg >= self.threshold))): + # Just faded out of a scene, wait for next fade in. + self.last_fade['type'] = 'out' + self.last_fade['frame'] = frame_num + + elif self.last_fade['type'] == 'out' and ( + (self.method == ThresholdDetector.Method.FLOOR and frame_avg >= self.threshold) or + (self.method == ThresholdDetector.Method.CEILING and frame_avg < self.threshold)): + # Only add the scene if min_scene_len frames have passed. + if (frame_num - self.last_scene_cut) >= self.min_scene_len: + # Just faded into a new scene, compute timecode for the scene + # split based on the fade bias. + f_out = self.last_fade['frame'] + f_split = int( + (frame_num + f_out + int(self.fade_bias * (frame_num - f_out))) / 2) + cut_list.append(f_split) + self.last_scene_cut = frame_num + self.last_fade['type'] = 'in' + self.last_fade['frame'] = frame_num + else: + self.last_fade['frame'] = 0 + if frame_avg < self.threshold: + self.last_fade['type'] = 'out' + else: + self.last_fade['type'] = 'in' + self.processed_frame = True + return cut_list + + def post_process(self, frame_num: int): + """Writes a final scene cut if the last detected fade was a fade-out. + + Only writes the scene cut if add_final_scene is true, and the last fade + that was detected was a fade-out. There is no bias applied to this cut + (since there is no corresponding fade-in) so it will be located at the + exact frame where the fade-out crossed the detection threshold. + """ + + # If the last fade detected was a fade out, we add a corresponding new + # scene break to indicate the end of the scene. This is only done for + # fade-outs, as a scene cut is already added when a fade-in is found. + cut_times = [] + if self.last_fade['type'] == 'out' and self.add_final_scene and ( + (self.last_scene_cut is None and frame_num >= self.min_scene_len) or + (frame_num - self.last_scene_cut) >= self.min_scene_len): + cut_times.append(self.last_fade['frame']) + return cut_times diff --git a/backend/scenedetect/frame_timecode.py b/backend/scenedetect/frame_timecode.py new file mode 100644 index 0000000..9487fde --- /dev/null +++ b/backend/scenedetect/frame_timecode.py @@ -0,0 +1,462 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +"""``scenedetect.frame_timecode`` Module + +This module implements :class:`FrameTimecode` which is used as a way for PySceneDetect to store +frame-accurate timestamps of each cut. This is done by also specifying the video framerate with the +timecode, allowing a frame number to be converted to/from a floating-point number of seconds, or +string in the form `"HH:MM:SS[.nnn]"` where the `[.nnn]` part is optional. + +See the following examples, or the :class:`FrameTimecode constructor `. + +=============================================================== +Usage Examples +=============================================================== + +A :class:`FrameTimecode` can be created by specifying a timecode (`int` for number of frames, +`float` for number of seconds, or `str` in the form "HH:MM:SS" or "HH:MM:SS.nnn") with a framerate: + +.. code:: python + + frames = FrameTimecode(timecode = 29, fps = 29.97) + seconds_float = FrameTimecode(timecode = 10.0, fps = 10.0) + timecode_str = FrameTimecode(timecode = "00:00:10.000", fps = 10.0) + + +Arithmetic/comparison operations with :class:`FrameTimecode` objects is also possible, and the +other operand can also be of the above types: + +.. code:: python + + x = FrameTimecode(timecode = "00:01:00.000", fps = 10.0) + # Can add int (frames), float (seconds), or str (timecode). + print(x + 10) + print(x + 10.0) + print(x + "00:10:00") + # Same for all comparison operators. + print((x + 10.0) == "00:01:10.000") + + +:class:`FrameTimecode` objects can be added and subtracted, however the current implementation +disallows negative values, and will clamp negative results to 0. + +.. warning:: + + Be careful when subtracting :class:`FrameTimecode` objects or adding negative + amounts of frames/seconds. In the example below, ``c`` will be at frame 0 since + ``b > a``, but ``d`` will be at frame 5: + + .. code:: python + + a = FrameTimecode(5, 10.0) + b = FrameTimecode(10, 10.0) + c = a - b # b > a, so c == 0 + d = b - a + assert(c == 0) + assert(d == 5) + +""" + +import math +from typing import Union + +MAX_FPS_DELTA: float = 1.0 / 100000 +"""Maximum amount two framerates can differ by for equality testing.""" + +# TODO(0.6.3): Replace uses of Union[int, float, str] with TimecodeValue. +TimecodeValue = Union[int, float, str] +"""Named type for values representing timecodes. Must be in one of the following forms: + + 1. Timecode as `str` in the form 'HH:MM:SS[.nnn]' (`'01:23:45'` or `'01:23:45.678'`) + 2. Number of seconds as `float`, or `str` in form 'Ss' or 'S.SSSs' (`'2s'` or `'2.3456s'`) + 3. Exact number of frames as `int`, or `str` in form NNNNN (`123` or `'123'`) +""" + + +class FrameTimecode: + """Object for frame-based timecodes, using the video framerate to compute back and + forth between frame number and seconds/timecode. + + A timecode is valid only if it complies with one of the following three types/formats: + + 1. Timecode as `str` in the form 'HH:MM:SS[.nnn]' (`'01:23:45'` or `'01:23:45.678'`) + 2. Number of seconds as `float`, or `str` in form 'Ss' or 'S.SSSs' (`'2s'` or `'2.3456s'`) + 3. Exact number of frames as `int`, or `str` in form NNNNN (`123` or `'123'`) + """ + + def __init__(self, + timecode: Union[int, float, str, 'FrameTimecode'] = None, + fps: Union[int, float, str, 'FrameTimecode'] = None): + """ + Arguments: + timecode: A frame number (int), number of seconds (float), or timecode (str in + the form `'HH:MM:SS'` or `'HH:MM:SS.nnn'`). + fps: The framerate or FrameTimecode to use as a time base for all arithmetic. + Raises: + TypeError: Thrown if either `timecode` or `fps` are unsupported types. + ValueError: Thrown when specifying a negative timecode or framerate. + """ + # The following two properties are what is used to keep track of time + # in a frame-specific manner. Note that once the framerate is set, + # the value should never be modified (only read if required). + # TODO(v1.0): Make these actual @properties. + self.framerate = None + self.frame_num = None + + # Copy constructor. Only the timecode argument is used in this case. + if isinstance(timecode, FrameTimecode): + self.framerate = timecode.framerate + self.frame_num = timecode.frame_num + if fps is not None: + raise TypeError('Framerate cannot be overwritten when copying a FrameTimecode.') + else: + # Ensure other arguments are consistent with API. + if fps is None: + raise TypeError('Framerate (fps) is a required argument.') + if isinstance(fps, FrameTimecode): + fps = fps.framerate + + # Process the given framerate, if it was not already set. + if not isinstance(fps, (int, float)): + raise TypeError('Framerate must be of type int/float.') + if (isinstance(fps, int) and not fps > 0) or (isinstance(fps, float) + and not fps >= MAX_FPS_DELTA): + raise ValueError('Framerate must be positive and greater than zero.') + self.framerate = float(fps) + + # Process the timecode value, storing it as an exact number of frames. + if isinstance(timecode, str): + self.frame_num = self._parse_timecode_string(timecode) + else: + self.frame_num = self._parse_timecode_number(timecode) + + # TODO(v1.0): Add a `frame` property to replace the existing one and deprecate this getter. + def get_frames(self) -> int: + """Get the current time/position in number of frames. This is the + equivalent of accessing the self.frame_num property (which, along + with the specified framerate, forms the base for all of the other + time measurement calculations, e.g. the :meth:`get_seconds` method). + + If using to compare a :class:`FrameTimecode` with a frame number, + you can do so directly against the object (e.g. ``FrameTimecode(10, 10.0) <= 10``). + + Returns: + int: The current time in frames (the current frame number). + """ + return self.frame_num + + # TODO(v1.0): Add a `framerate` property to replace the existing one and deprecate this getter. + def get_framerate(self) -> float: + """Get Framerate: Returns the framerate used by the FrameTimecode object. + + Returns: + float: Framerate of the current FrameTimecode object, in frames per second. + """ + return self.framerate + + def equal_framerate(self, fps) -> bool: + """Equal Framerate: Determines if the passed framerate is equal to that of this object. + + Arguments: + fps: Framerate to compare against within the precision constant defined in this module + (see :data:`MAX_FPS_DELTA`). + Returns: + bool: True if passed fps matches the FrameTimecode object's framerate, False otherwise. + + """ + return math.fabs(self.framerate - fps) < MAX_FPS_DELTA + + # TODO(v1.0): Add a `seconds` property to replace this and deprecate the existing one. + def get_seconds(self) -> float: + """Get the frame's position in number of seconds. + + If using to compare a :class:`FrameTimecode` with a frame number, + you can do so directly against the object (e.g. ``FrameTimecode(10, 10.0) <= 1.0``). + + Returns: + float: The current time/position in seconds. + """ + return float(self.frame_num) / self.framerate + + # TODO(v1.0): Add a `timecode` property to replace this and deprecate the existing one. + def get_timecode(self, precision: int = 3, use_rounding: bool = True) -> str: + """Get a formatted timecode string of the form HH:MM:SS[.nnn]. + + Args: + precision: The number of decimal places to include in the output ``[.nnn]``. + use_rounding: Rounds the output to the desired precision. If False, the value + will be truncated to the specified precision. + + Returns: + str: The current time in the form ``"HH:MM:SS[.nnn]"``. + """ + # Compute hours and minutes based off of seconds, and update seconds. + secs = self.get_seconds() + base = 60.0 * 60.0 + hrs = int(secs / base) + secs -= (hrs * base) + base = 60.0 + mins = int(secs / base) + secs -= (mins * base) + # Convert seconds into string based on required precision. + if precision > 0: + if use_rounding: + secs = round(secs, precision) + msec = format(secs, '.%df' % precision)[-precision:] + secs = '%02d.%s' % (int(secs), msec) + else: + secs = '%02d' % int(round(secs, 0)) if use_rounding else '%02d' % int(secs) + # Return hours, minutes, and seconds as a formatted timecode string. + return '%02d:%02d:%s' % (hrs, mins, secs) + + # TODO(v1.0): Add a `previous` property to replace the existing one and deprecate this getter. + def previous_frame(self) -> 'FrameTimecode': + """Return a new FrameTimecode for the previous frame (or 0 if on frame 0).""" + new_timecode = FrameTimecode(self) + new_timecode.frame_num = max(0, new_timecode.frame_num - 1) + return new_timecode + + def _seconds_to_frames(self, seconds: float) -> int: + """Convert the passed value seconds to the nearest number of frames using + the current FrameTimecode object's FPS (self.framerate). + + Returns: + Integer number of frames the passed number of seconds represents using + the current FrameTimecode's framerate property. + """ + return round(seconds * self.framerate) + + def _parse_timecode_number(self, timecode: Union[int, float]) -> int: + """ Parse a timecode number, storing it as the exact number of frames. + Can be passed as frame number (int), seconds (float) + + Raises: + TypeError, ValueError + """ + # Process the timecode value, storing it as an exact number of frames. + # Exact number of frames N + if isinstance(timecode, int): + if timecode < 0: + raise ValueError('Timecode frame number must be positive and greater than zero.') + return timecode + # Number of seconds S + elif isinstance(timecode, float): + if timecode < 0.0: + raise ValueError('Timecode value must be positive and greater than zero.') + return self._seconds_to_frames(timecode) + # FrameTimecode + elif isinstance(timecode, FrameTimecode): + return timecode.frame_num + elif timecode is None: + raise TypeError('Timecode/frame number must be specified!') + else: + raise TypeError('Timecode format/type unrecognized.') + + def _parse_timecode_string(self, timecode_string: str) -> int: + """Parses a string based on the three possible forms (in timecode format, + as an integer number of frames, or floating-point seconds, ending with 's'). + + Requires that the `framerate` property is set before calling this method. + Assuming a framerate of 30.0 FPS, the strings '00:05:00.000', '00:05:00', + '9000', '300s', and '300.0s' are all possible valid values, all representing + a period of time equal to 5 minutes, 300 seconds, or 9000 frames (at 30 FPS). + + Raises: + TypeError, ValueError + """ + if self.framerate is None: + raise TypeError('self.framerate must be set before calling _parse_timecode_string.') + # Number of seconds S + if timecode_string.endswith('s'): + secs = timecode_string[:-1] + if not secs.replace('.', '').isdigit(): + raise ValueError('All characters in timecode seconds string must be digits.') + secs = float(secs) + if secs < 0.0: + raise ValueError('Timecode seconds value must be positive.') + return self._seconds_to_frames(secs) + # Exact number of frames N + elif timecode_string.isdigit(): + timecode = int(timecode_string) + if timecode < 0: + raise ValueError('Timecode frame number must be positive.') + return timecode + # Standard timecode in string format 'HH:MM:SS[.nnn]' + else: + tc_val = timecode_string.split(':') + if not (len(tc_val) == 3 and tc_val[0].isdigit() and tc_val[1].isdigit() + and tc_val[2].replace('.', '').isdigit()): + raise ValueError('Unrecognized or improperly formatted timecode string.') + hrs, mins = int(tc_val[0]), int(tc_val[1]) + secs = float(tc_val[2]) if '.' in tc_val[2] else int(tc_val[2]) + if not (hrs >= 0 and mins >= 0 and secs >= 0 and mins < 60 and secs < 60): + raise ValueError('Invalid timecode range (values outside allowed range).') + secs += (((hrs * 60.0) + mins) * 60.0) + return self._seconds_to_frames(secs) + + def __iadd__(self, other: Union[int, float, str, 'FrameTimecode']) -> 'FrameTimecode': + if isinstance(other, int): + self.frame_num += other + elif isinstance(other, FrameTimecode): + if self.equal_framerate(other.framerate): + self.frame_num += other.frame_num + else: + raise ValueError('FrameTimecode instances require equal framerate for addition.') + # Check if value to add is in number of seconds. + elif isinstance(other, float): + self.frame_num += self._seconds_to_frames(other) + elif isinstance(other, str): + self.frame_num += self._parse_timecode_string(other) + else: + raise TypeError('Unsupported type for performing addition with FrameTimecode.') + if self.frame_num < 0: # Required to allow adding negative seconds/frames. + self.frame_num = 0 + return self + + def __add__(self, other: Union[int, float, str, 'FrameTimecode']) -> 'FrameTimecode': + to_return = FrameTimecode(timecode=self) + to_return += other + return to_return + + def __isub__(self, other: Union[int, float, str, 'FrameTimecode']) -> 'FrameTimecode': + if isinstance(other, int): + self.frame_num -= other + elif isinstance(other, FrameTimecode): + if self.equal_framerate(other.framerate): + self.frame_num -= other.frame_num + else: + raise ValueError('FrameTimecode instances require equal framerate for subtraction.') + # Check if value to add is in number of seconds. + elif isinstance(other, float): + self.frame_num -= self._seconds_to_frames(other) + elif isinstance(other, str): + self.frame_num -= self._parse_timecode_string(other) + else: + raise TypeError('Unsupported type for performing subtraction with FrameTimecode: %s' % + type(other)) + if self.frame_num < 0: + self.frame_num = 0 + return self + + def __sub__(self, other: Union[int, float, str, 'FrameTimecode']) -> 'FrameTimecode': + to_return = FrameTimecode(timecode=self) + to_return -= other + return to_return + + def __eq__(self, other: Union[int, float, str, 'FrameTimecode']) -> 'FrameTimecode': + if isinstance(other, int): + return self.frame_num == other + elif isinstance(other, float): + return self.get_seconds() == other + elif isinstance(other, str): + return self.frame_num == self._parse_timecode_string(other) + elif isinstance(other, FrameTimecode): + if self.equal_framerate(other.framerate): + return self.frame_num == other.frame_num + else: + raise TypeError( + 'FrameTimecode objects must have the same framerate to be compared.') + elif other is None: + return False + else: + raise TypeError('Unsupported type for performing == with FrameTimecode: %s' % + type(other)) + + def __ne__(self, other: Union[int, float, str, 'FrameTimecode']) -> bool: + return not self == other + + def __lt__(self, other: Union[int, float, str, 'FrameTimecode']) -> bool: + if isinstance(other, int): + return self.frame_num < other + elif isinstance(other, float): + return self.get_seconds() < other + elif isinstance(other, str): + return self.frame_num < self._parse_timecode_string(other) + elif isinstance(other, FrameTimecode): + if self.equal_framerate(other.framerate): + return self.frame_num < other.frame_num + else: + raise TypeError( + 'FrameTimecode objects must have the same framerate to be compared.') + else: + raise TypeError('Unsupported type for performing < with FrameTimecode: %s' % + type(other)) + + def __le__(self, other: Union[int, float, str, 'FrameTimecode']) -> bool: + if isinstance(other, int): + return self.frame_num <= other + elif isinstance(other, float): + return self.get_seconds() <= other + elif isinstance(other, str): + return self.frame_num <= self._parse_timecode_string(other) + elif isinstance(other, FrameTimecode): + if self.equal_framerate(other.framerate): + return self.frame_num <= other.frame_num + else: + raise TypeError( + 'FrameTimecode objects must have the same framerate to be compared.') + else: + raise TypeError('Unsupported type for performing <= with FrameTimecode: %s' % + type(other)) + + def __gt__(self, other: Union[int, float, str, 'FrameTimecode']) -> bool: + if isinstance(other, int): + return self.frame_num > other + elif isinstance(other, float): + return self.get_seconds() > other + elif isinstance(other, str): + return self.frame_num > self._parse_timecode_string(other) + elif isinstance(other, FrameTimecode): + if self.equal_framerate(other.framerate): + return self.frame_num > other.frame_num + else: + raise TypeError( + 'FrameTimecode objects must have the same framerate to be compared.') + else: + raise TypeError('Unsupported type for performing > with FrameTimecode: %s' % + type(other)) + + def __ge__(self, other: Union[int, float, str, 'FrameTimecode']) -> bool: + if isinstance(other, int): + return self.frame_num >= other + elif isinstance(other, float): + return self.get_seconds() >= other + elif isinstance(other, str): + return self.frame_num >= self._parse_timecode_string(other) + elif isinstance(other, FrameTimecode): + if self.equal_framerate(other.framerate): + return self.frame_num >= other.frame_num + else: + raise TypeError( + 'FrameTimecode objects must have the same framerate to be compared.') + else: + raise TypeError('Unsupported type for performing >= with FrameTimecode: %s' % + type(other)) + + # TODO(v1.0): __int__ and __float__ should be removed. Mark as deprecated, and indicate + # need to use relevant property instead. + + def __int__(self) -> int: + return self.frame_num + + def __float__(self) -> float: + return self.get_seconds() + + def __str__(self) -> str: + return self.get_timecode() + + def __repr__(self) -> str: + return '%s [frame=%d, fps=%.3f]' % (self.get_timecode(), self.frame_num, self.framerate) + + def __hash__(self) -> int: + return self.frame_num diff --git a/backend/scenedetect/platform.py b/backend/scenedetect/platform.py new file mode 100644 index 0000000..01dc15f --- /dev/null +++ b/backend/scenedetect/platform.py @@ -0,0 +1,357 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +"""``scenedetect.platform`` Module + +This moduke contains all platform/library specific compatibility fixes, as well as some utility +functions to handle logging and invoking external commands. +""" + +import importlib +import logging +import os +import os.path +import platform +import re +import string +import subprocess +import sys +from typing import AnyStr, Dict, List, Optional, Union + +import cv2 + +## +## tqdm Library +## + + +class FakeTqdmObject: + """Provides a no-op tqdm-like object.""" + + # pylint: disable=unused-argument + def __init__(self, **kawrgs): + """No-op.""" + + def update(self, n=1): + """No-op.""" + + def close(self): + """No-op.""" + + def set_description(self, desc=None, refresh=True): + """No-op.""" + + # pylint: enable=unused-argument + + +class FakeTqdmLoggingRedirect: + """Provides a no-op tqdm context manager for redirecting log messages.""" + + # pylint: disable=redefined-builtin,unused-argument + def __init__(self, **kawrgs): + """No-op.""" + + def __enter__(self): + """No-op.""" + + def __exit__(self, type, value, traceback): + """No-op.""" + + # pylint: enable=redefined-builtin,unused-argument + + +# Try to import tqdm and the logging redirect, otherwise provide fake implementations.. +try: + # pylint: disable=unused-import + from tqdm import tqdm + from tqdm.contrib.logging import logging_redirect_tqdm + # pylint: enable=unused-import +except ModuleNotFoundError: + # pylint: disable=invalid-name + tqdm = FakeTqdmObject + logging_redirect_tqdm = FakeTqdmLoggingRedirect + # pylint: enable=invalid-name + +## +## OpenCV imwrite Supported Image Types & Quality/Compression Parameters +## + + +# TODO: Move this into scene_manager. +def get_cv2_imwrite_params() -> Dict[str, Union[int, None]]: + """ Get OpenCV imwrite Params: Returns a dict of supported image formats and + their associated quality/compression parameter index, or None if that format + is not supported. + + Returns: + Dictionary of supported image formats/extensions ('jpg', 'png', etc...) mapped to the + respective OpenCV quality or compression parameter as {'jpg': cv2.IMWRITE_JPEG_QUALITY, + 'png': cv2.IMWRITE_PNG_COMPRESSION, ...}. Parameter will be None if not found on the + current system library (e.g. {'jpg': None}). + """ + + def _get_cv2_param(param_name: str) -> Union[int, None]: + if param_name.startswith('CV_'): + param_name = param_name[3:] + try: + return getattr(cv2, param_name) + except AttributeError: + return None + + return { + 'jpg': _get_cv2_param('IMWRITE_JPEG_QUALITY'), + 'png': _get_cv2_param('IMWRITE_PNG_COMPRESSION'), + 'webp': _get_cv2_param('IMWRITE_WEBP_QUALITY') + } + + +## +## File I/O +## + + +def get_file_name(file_path: AnyStr, include_extension=True) -> AnyStr: + """Return the file name that `file_path` refers to, optionally removing the extension. + + If `include_extension` is False, the result will always be a str. + + E.g. /tmp/foo.bar -> foo""" + file_name = os.path.basename(file_path) + if not include_extension: + file_name = str(file_name) + last_dot_pos = file_name.rfind('.') + if last_dot_pos >= 0: + file_name = file_name[:last_dot_pos] + return file_name + + +def get_and_create_path(file_path: AnyStr, output_directory: Optional[AnyStr] = None) -> AnyStr: + """ Get & Create Path: Gets and returns the full/absolute path to file_path + in the specified output_directory if set, creating any required directories + along the way. + + If file_path is already an absolute path, then output_directory is ignored. + + Arguments: + file_path: File name to get path for. If file_path is an absolute + path (e.g. starts at a drive/root), no modification of the path + is performed, only ensuring that all output directories are created. + output_dir: An optional output directory to override the + directory of file_path if it is relative to the working directory. + + Returns: + Full path to output file suitable for writing. + + """ + # If an output directory is defined and the file path is a relative path, open + # the file handle in the output directory instead of the working directory. + if output_directory is not None and not os.path.isabs(file_path): + file_path = os.path.join(output_directory, file_path) + # Now that file_path is an absolute path, let's make sure all the directories + # exist for us to start writing files there. + os.makedirs(os.path.split(os.path.abspath(file_path))[0], exist_ok=True) + return file_path + + +## +## Logging +## + + +def init_logger(log_level: int = logging.INFO, + show_stdout: bool = False, + log_file: Optional[str] = None): + """Initializes logging for PySceneDetect. The logger instance used is named 'pyscenedetect'. + By default the logger has no handlers to suppress output. All existing log handlers are replaced + every time this function is invoked. + + Arguments: + log_level: Verbosity of log messages. Should be one of [logging.INFO, logging.DEBUG, + logging.WARNING, logging.ERROR, logging.CRITICAL]. + show_stdout: If True, add handler to show log messages on stdout (default: False). + log_file: If set, add handler to dump debug log messages to given file path. + """ + # Format of log messages depends on verbosity. + INFO_TEMPLATE = '[PySceneDetect] %(message)s' + DEBUG_TEMPLATE = '%(levelname)s: %(module)s.%(funcName)s(): %(message)s' + # Get the named logger and remove any existing handlers. + logger_instance = logging.getLogger('pyscenedetect') + logger_instance.handlers = [] + logger_instance.setLevel(log_level) + # Add stdout handler if required. + if show_stdout: + handler = logging.StreamHandler(stream=sys.stdout) + handler.setLevel(log_level) + handler.setFormatter( + logging.Formatter(fmt=DEBUG_TEMPLATE if log_level == logging.DEBUG else INFO_TEMPLATE)) + logger_instance.addHandler(handler) + # Add debug log handler if required. + if log_file: + log_file = get_and_create_path(log_file) + handler = logging.FileHandler(log_file) + handler.setLevel(logging.DEBUG) + handler.setFormatter(logging.Formatter(fmt=DEBUG_TEMPLATE)) + logger_instance.addHandler(handler) + + +## +## Running External Commands +## + + +class CommandTooLong(Exception): + """Raised if the length of a command line argument exceeds the limit allowed on Windows.""" + + +def invoke_command(args: List[str]) -> int: + """Same as calling Python's subprocess.call() method, but explicitly + raises a different exception when the command length is too long. + + See https://github.com/Breakthrough/PySceneDetect/issues/164 for details. + + Arguments: + args: List of strings to pass to subprocess.call(). + + Returns: + Return code of command. + + Raises: + CommandTooLong: `args` exceeds built in command line length limit on Windows. + """ + try: + return subprocess.call(args) + except OSError as err: + if os.name != 'nt': + raise + exception_string = str(err) + # Error 206: The filename or extension is too long + # Error 87: The parameter is incorrect + to_match = ('206', '87') + if any([x in exception_string for x in to_match]): + raise CommandTooLong() from err + raise + + +def get_ffmpeg_path() -> Optional[str]: + """Get path to ffmpeg if available on the current system. First looks at PATH, then checks if + one is available from the `imageio_ffmpeg` package. Returns None if ffmpeg couldn't be found. + """ + try: + subprocess.call(['ffmpeg', '-v', 'quiet']) + return 'ffmpeg' + except OSError: + pass + # Failed to invoke ffmpeg from PATH, see if we have a copy from imageio_ffmpeg. + try: + # pylint: disable=import-outside-toplevel + from imageio_ffmpeg import get_ffmpeg_exe + # pylint: enable=import-outside-toplevel + subprocess.call([get_ffmpeg_exe(), '-v', 'quiet']) + return get_ffmpeg_exe() + # Gracefully handle case where imageio_ffmpeg is not available. + except ModuleNotFoundError: + pass + # Handle case where path might be wrong/non-existent. + except OSError: + pass + # get_ffmpeg_exe may throw a RuntimeError if the executable is not available. + except RuntimeError: + pass + return None + + +def get_ffmpeg_version() -> Optional[str]: + """Get ffmpeg version identifier, or None if ffmpeg is not found. Uses `get_ffmpeg_path()`.""" + ffmpeg_path = get_ffmpeg_path() + if ffmpeg_path is None: + return None + # If get_ffmpeg_path() returns a value, the path it returns should be invocable. + output = subprocess.check_output(args=[ffmpeg_path, '-version'], text=True) + output_split = output.split() + if len(output_split) >= 3 and output_split[1] == 'version': + return output_split[2] + # If parsing the version fails, return the entire first line of output. + return output.splitlines()[0] + + +def get_mkvmerge_version() -> Optional[str]: + """Get mkvmerge version identifier, or None if mkvmerge is not found in PATH.""" + tool_name = 'mkvmerge' + try: + output = subprocess.check_output(args=[tool_name, '--version'], text=True) + except FileNotFoundError: + return None + output_split = output.split() + if len(output_split) >= 1 and output_split[0] == tool_name: + return ' '.join(output_split[1:]) + # If parsing the version fails, return the entire first line of output. + return output.splitlines()[0] + + +def get_system_version_info() -> str: + """Get the system's operating system, Python, packages, and external tool versions. + Useful for debugging or filing bug reports. + + Used for the `scenedetect version -a` command. + """ + output_template = '{:<12} {}' + line_separator = '-' * 60 + not_found_str = 'Not Installed' + out_lines = [] + + # System (Python, OS) + out_lines += ['System Info', line_separator] + out_lines += [ + output_template.format(name, version) for name, version in ( + ('OS', '%s' % platform.platform()), + ('Python', '%d.%d.%d' % sys.version_info[0:3]), + ) + ] + + # Third-Party Packages + out_lines += ['', 'Packages', line_separator] + third_party_packages = ( + 'av', + 'click', + 'cv2', + 'moviepy', + 'numpy', + 'platformdirs', + 'scenedetect', + 'tqdm', + ) + for module_name in third_party_packages: + try: + module = importlib.import_module(module_name) + out_lines.append(output_template.format(module_name, module.__version__)) + except ModuleNotFoundError: + out_lines.append(output_template.format(module_name, not_found_str)) + + # External Tools + out_lines += ['', 'Tools', line_separator] + + tool_version_info = ( + ('ffmpeg', get_ffmpeg_version()), + ('mkvmerge', get_mkvmerge_version()), + ) + + for (tool_name, tool_version) in tool_version_info: + out_lines.append( + output_template.format(tool_name, tool_version if tool_version else not_found_str)) + + return '\n'.join(out_lines) + + +class Template(string.Template): + """Template matcher used to replace instances of $TEMPLATES in filenames.""" + idpattern = '[A-Z0-9_]+' + flags = re.ASCII diff --git a/backend/scenedetect/scene_detector.py b/backend/scenedetect/scene_detector.py new file mode 100644 index 0000000..2ad0d84 --- /dev/null +++ b/backend/scenedetect/scene_detector.py @@ -0,0 +1,148 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +"""``scenedetect.scene_detector`` Module + +This module contains the :class:`SceneDetector` interface, from which all scene detectors in +:mod:`scenedetect.detectors` module are derived from. + +The SceneDetector class represents the interface which detection algorithms are expected to provide +in order to be compatible with PySceneDetect. + +.. warning:: + + This API is still unstable, and changes and design improvements are planned for the v1.0 + release. Instead of just timecodes, detection algorithms will also provide a specific type of + event (in, out, cut, etc...). +""" + +from typing import List, Optional, Tuple + +import numpy + +from backend.scenedetect.stats_manager import StatsManager + + +# pylint: disable=unused-argument, no-self-use +class SceneDetector: + """ Base class to inherit from when implementing a scene detection algorithm. + + This API is not yet stable and subject to change. + + This represents a "dense" scene detector, which returns a list of frames where + the next scene/shot begins in a video. + + Also see the implemented scene detectors in the scenedetect.detectors module + to get an idea of how a particular detector can be created. + """ + # TODO(v0.7): Make this a proper abstract base class. + + stats_manager: Optional[StatsManager] = None + """Optional :class:`StatsManager ` to + use for caching frame metrics to and from.""" + + # TODO(v1.0): Remove - this is a rarely used case for what is now a neglegible performance gain. + def is_processing_required(self, frame_num: int) -> bool: + """[DEPRECATED] DO NOT USE + + Test if all calculations for a given frame are already done. + + Returns: + False if the SceneDetector has assigned _metric_keys, and the + stats_manager property is set to a valid StatsManager object containing + the required frame metrics/calculations for the given frame - thus, not + needing the frame to perform scene detection. + + True otherwise (i.e. the frame_img passed to process_frame is required + to be passed to process_frame for the given frame_num). + """ + metric_keys = self.get_metrics() + return not metric_keys or not (self.stats_manager is not None + and self.stats_manager.metrics_exist(frame_num, metric_keys)) + + def stats_manager_required(self) -> bool: + """Stats Manager Required: Prototype indicating if detector requires stats. + + Returns: + True if a StatsManager is required for the detector, False otherwise. + """ + return False + + def get_metrics(self) -> List[str]: + """Get Metrics: Get a list of all metric names/keys used by the detector. + + Returns: + List of strings of frame metric key names that will be used by + the detector when a StatsManager is passed to process_frame. + """ + return [] + + def process_frame(self, frame_num: int, frame_img: Optional[numpy.ndarray]) -> List[int]: + """Process Frame: Computes/stores metrics and detects any scene changes. + + Prototype method, no actual detection. + + Returns: + List of frame numbers of cuts to be added to the cutting list. + """ + return [] + + def post_process(self, frame_num: int) -> List[int]: + """Post Process: Performs any processing after the last frame has been read. + + Prototype method, no actual detection. + + Returns: + List of frame numbers of cuts to be added to the cutting list. + """ + return [] + + @property + def event_buffer_length(self) -> int: + """The amount of frames a given event can be buffered for, in time. Represents maximum + amount any event can be behind `frame_number` in the result of :meth:`process_frame`. + """ + return 0 + + +class SparseSceneDetector(SceneDetector): + """Base class to inherit from when implementing a sparse scene detection algorithm. + + This class will be removed in v1.0 and should not be used. + + Unlike dense detectors, sparse detectors scene_detect "events" and return a *pair* of frames, + as opposed to just a single cut. + + An example of a SparseSceneDetector is the MotionDetector. + """ + + def process_frame(self, frame_num: int, frame_img: numpy.ndarray) -> List[Tuple[int, int]]: + """Process Frame: Computes/stores metrics and detects any scene changes. + + Prototype method, no actual detection. + + Returns: + List of frame pairs representing individual scenes + to be added to the output scene list directly. + """ + return [] + + def post_process(self, frame_num: int) -> List[Tuple[int, int]]: + """Post Process: Performs any processing after the last frame has been read. + + Prototype method, no actual detection. + + Returns: + List of frame pairs representing individual scenes + to be added to the output scene list directly. + """ + return [] diff --git a/backend/scenedetect/scene_manager.py b/backend/scenedetect/scene_manager.py new file mode 100644 index 0000000..70732fa --- /dev/null +++ b/backend/scenedetect/scene_manager.py @@ -0,0 +1,1034 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +"""``scenedetect.scene_manager`` Module + +This module implements :class:`SceneManager`, coordinates running a +:mod:`SceneDetector ` over the frames of a video +(:mod:`VideoStream `). Video decoding is done in a separate thread to +improve performance. + +This module also contains other helper functions (e.g. :func:`save_images`) which can be used to +process the resulting scene list. + +=============================================================== +Usage +=============================================================== + +The following example shows basic usage of a :class:`SceneManager`: + +.. code:: python + + from scenedetect import open_video, SceneManager, ContentDetector + video = open_video(video_path) + scene_manager = SceneManager() + scene_manager.add_detector(ContentDetector()) + # Detect all scenes in video from current position to end. + scene_manager.detect_scenes(video) + # `get_scene_list` returns a list of start/end timecode pairs + # for each scene that was found. + scenes = scene_manager.get_scene_list() + +An optional callback can also be invoked on each detected scene, for example: + +.. code:: python + + from scenedetect import open_video, SceneManager, ContentDetector + + # Callback to invoke on the first frame of every new scene detection. + def on_new_scene(frame_img: numpy.ndarray, frame_num: int): + print("New scene found at frame %d." % frame_num) + + video = open_video(test_video_file) + scene_manager = SceneManager() + scene_manager.add_detector(ContentDetector()) + scene_manager.detect_scenes(video=video, callback=on_new_scene) + +To use a `SceneManager` with a webcam/device or existing `cv2.VideoCapture` device, use the +:class:`VideoCaptureAdapter ` instead of +`open_video`. + +======================================================================= +Storing Per-Frame Statistics +======================================================================= + +`SceneManager` can use an optional +:class:`StatsManager ` to save frame statistics to disk: + +.. code:: python + + from scenedetect import open_video, ContentDetector, SceneManager, StatsManager + video = open_video(test_video_file) + scene_manager = SceneManager(stats_manager=StatsManager()) + scene_manager.add_detector(ContentDetector()) + scene_manager.detect_scenes(video=video) + scene_list = scene_manager.get_scene_list() + print_scenes(scene_list=scene_list) + # Save per-frame statistics to disk. + scene_manager.stats_manager.save_to_csv(csv_file=STATS_FILE_PATH) + +The statsfile can be used to find a better threshold for certain inputs, or perform statistical +analysis of the video. +""" + +import csv +from enum import Enum +from typing import Iterable, List, Tuple, Optional, Dict, Callable, Union, TextIO +import threading +import queue +import logging +import math +import sys + +import cv2 +import numpy as np +from backend.scenedetect._thirdparty.simpletable import (SimpleTableCell, SimpleTableImage, SimpleTableRow, + SimpleTable, HTMLPage) + +from backend.scenedetect.platform import (tqdm, get_and_create_path, get_cv2_imwrite_params, Template) +from backend.scenedetect.frame_timecode import FrameTimecode +from backend.scenedetect.video_stream import VideoStream +from backend.scenedetect.scene_detector import SceneDetector, SparseSceneDetector +from backend.scenedetect.stats_manager import StatsManager, FrameMetricRegistered + +logger = logging.getLogger('pyscenedetect') + +# TODO: This value can and should be tuned for performance improvements as much as possible, +# until accuracy falls, on a large enough dataset. This has yet to be done, but the current +# value doesn't seem to have caused any issues at least. +DEFAULT_MIN_WIDTH: int = 256 +"""The default minimum width a frame will be downscaled to when calculating a downscale factor.""" + +MAX_FRAME_QUEUE_LENGTH: int = 4 +"""Maximum number of decoded frames which can be buffered while waiting to be processed.""" + +PROGRESS_BAR_DESCRIPTION = 'Detected: %d | Progress' +"""Template to use for progress bar.""" + + +class Interpolation(Enum): + """Interpolation method used for image resizing. Based on constants defined in OpenCV.""" + NEAREST = cv2.INTER_NEAREST + """Nearest neighbor interpolation.""" + LINEAR = cv2.INTER_LINEAR + """Bilinear interpolation.""" + CUBIC = cv2.INTER_CUBIC + """Bicubic interpolation.""" + AREA = cv2.INTER_AREA + """Pixel area relation resampling. Provides moire'-free downscaling.""" + LANCZOS4 = cv2.INTER_LANCZOS4 + """Lanczos interpolation over 8x8 neighborhood.""" + + +def compute_downscale_factor(frame_width: int, effective_width: int = DEFAULT_MIN_WIDTH) -> int: + """Get the optimal default downscale factor based on a video's resolution (currently only + the width in pixels is considered). + + The resulting effective width of the video will be between frame_width and 1.5 * frame_width + pixels (e.g. if frame_width is 200, the range of effective widths will be between 200 and 300). + + Arguments: + frame_width: Actual width of the video frame in pixels. + effective_width: Desired minimum width in pixels. + + Returns: + int: The default downscale factor to use to achieve at least the target effective_width. + """ + assert not (frame_width < 1 or effective_width < 1) + if frame_width < effective_width: + return 1 + return frame_width // effective_width + + +def get_scenes_from_cuts( + cut_list: Iterable[FrameTimecode], + start_pos: Union[int, FrameTimecode], + end_pos: Union[int, FrameTimecode], + base_timecode: Optional[FrameTimecode] = None, +) -> List[Tuple[FrameTimecode, FrameTimecode]]: + """Returns a list of tuples of start/end FrameTimecodes for each scene based on a + list of detected scene cuts/breaks. + + This function is called when using the :meth:`SceneManager.get_scene_list` method. + The scene list is generated from a cutting list (:meth:`SceneManager.get_cut_list`), + noting that each scene is contiguous, starting from the first to last frame of the input. + If `cut_list` is empty, the resulting scene will span from `start_pos` to `end_pos`. + + Arguments: + cut_list: List of FrameTimecode objects where scene cuts/breaks occur. + base_timecode: The base_timecode of which all FrameTimecodes in the cut_list are based on. + num_frames: The number of frames, or FrameTimecode representing duration, of the video that + was processed (used to generate last scene's end time). + start_frame: The start frame or FrameTimecode of the cut list. Used to generate the first + scene's start time. + base_timecode: [DEPRECATED] DO NOT USE. For backwards compatibility only. + Returns: + List of tuples in the form (start_time, end_time), where both start_time and + end_time are FrameTimecode objects representing the exact time/frame where each + scene occupies based on the input cut_list. + """ + # TODO(v0.7): Use the warnings module to turn this into a warning. + if base_timecode is not None: + logger.error('`base_timecode` argument is deprecated has no effect.') + + # Scene list, where scenes are tuples of (Start FrameTimecode, End FrameTimecode). + scene_list = [] + if not cut_list: + scene_list.append((start_pos, end_pos)) + return scene_list + # Initialize last_cut to the first frame we processed,as it will be + # the start timecode for the first scene in the list. + last_cut = start_pos + for cut in cut_list: + scene_list.append((last_cut, cut)) + last_cut = cut + # Last scene is from last cut to end of video. + scene_list.append((last_cut, end_pos)) + + return scene_list + + +def write_scene_list(output_csv_file: TextIO, + scene_list: Iterable[Tuple[FrameTimecode, FrameTimecode]], + include_cut_list: bool = True, + cut_list: Optional[Iterable[FrameTimecode]] = None) -> None: + """Writes the given list of scenes to an output file handle in CSV format. + + Arguments: + output_csv_file: Handle to open file in write mode. + scene_list: List of pairs of FrameTimecodes denoting each scene's start/end FrameTimecode. + include_cut_list: Bool indicating if the first row should include the timecodes where + each scene starts. Should be set to False if RFC 4180 compliant CSV output is required. + cut_list: Optional list of FrameTimecode objects denoting the cut list (i.e. the frames + in the video that need to be split to generate individual scenes). If not specified, + the cut list is generated using the start times of each scene following the first one. + """ + csv_writer = csv.writer(output_csv_file, lineterminator='\n') + # If required, output the cutting list as the first row (i.e. before the header row). + if include_cut_list: + csv_writer.writerow( + ["Timecode List:"] + + cut_list if cut_list else [start.get_timecode() for start, _ in scene_list[1:]]) + csv_writer.writerow([ + "Scene Number", "Start Frame", "Start Timecode", "Start Time (seconds)", "End Frame", + "End Timecode", "End Time (seconds)", "Length (frames)", "Length (timecode)", + "Length (seconds)" + ]) + for i, (start, end) in enumerate(scene_list): + duration = end - start + csv_writer.writerow([ + '%d' % (i + 1), + '%d' % (start.get_frames() + 1), + start.get_timecode(), + '%.3f' % start.get_seconds(), + '%d' % end.get_frames(), + end.get_timecode(), + '%.3f' % end.get_seconds(), + '%d' % duration.get_frames(), + duration.get_timecode(), + '%.3f' % duration.get_seconds() + ]) + + +def write_scene_list_html(output_html_filename, + scene_list, + cut_list=None, + css=None, + css_class='mytable', + image_filenames=None, + image_width=None, + image_height=None): + """Writes the given list of scenes to an output file handle in html format. + + Arguments: + output_html_filename: filename of output html file + scene_list: List of pairs of FrameTimecodes denoting each scene's start/end FrameTimecode. + cut_list: Optional list of FrameTimecode objects denoting the cut list (i.e. the frames + in the video that need to be split to generate individual scenes). If not passed, + the start times of each scene (besides the 0th scene) is used instead. + css: String containing all the css information for the resulting html page. + css_class: String containing the named css class + image_filenames: dict where key i contains a list with n elements (filenames of + the n saved images from that scene) + image_width: Optional desired width of images in table in pixels + image_height: Optional desired height of images in table in pixels + """ + if not css: + css = """ + table.mytable { + font-family: times; + font-size:12px; + color:#000000; + border-width: 1px; + border-color: #eeeeee; + border-collapse: collapse; + background-color: #ffffff; + width=100%; + max-width:550px; + table-layout:fixed; + } + table.mytable th { + border-width: 1px; + padding: 8px; + border-style: solid; + border-color: #eeeeee; + background-color: #e6eed6; + color:#000000; + } + table.mytable td { + border-width: 1px; + padding: 8px; + border-style: solid; + border-color: #eeeeee; + } + #code { + display:inline; + font-family: courier; + color: #3d9400; + } + #string { + display:inline; + font-weight: bold; + } + """ + + # Output Timecode list + timecode_table = SimpleTable( + [["Timecode List:"] + + (cut_list if cut_list else [start.get_timecode() for start, _ in scene_list[1:]])], + css_class=css_class) + + # Output list of scenes + header_row = [ + "Scene Number", "Start Frame", "Start Timecode", "Start Time (seconds)", "End Frame", + "End Timecode", "End Time (seconds)", "Length (frames)", "Length (timecode)", + "Length (seconds)" + ] + for i, (start, end) in enumerate(scene_list): + duration = end - start + + row = SimpleTableRow([ + '%d' % (i + 1), + '%d' % (start.get_frames() + 1), + start.get_timecode(), + '%.3f' % start.get_seconds(), + '%d' % end.get_frames(), + end.get_timecode(), + '%.3f' % end.get_seconds(), + '%d' % duration.get_frames(), + duration.get_timecode(), + '%.3f' % duration.get_seconds() + ]) + + if image_filenames: + for image in image_filenames[i]: + row.add_cell( + SimpleTableCell( + SimpleTableImage(image, width=image_width, height=image_height))) + + if i == 0: + scene_table = SimpleTable(rows=[row], header_row=header_row, css_class=css_class) + else: + scene_table.add_row(row=row) + + # Write html file + page = HTMLPage() + page.add_table(timecode_table) + page.add_table(scene_table) + page.css = css + page.save(output_html_filename) + + +# +# TODO(v1.0): Refactor to take a SceneList object; consider moving this and save scene list +# to a better spot, or just move them to scene_list.py. +# +def save_images(scene_list: List[Tuple[FrameTimecode, FrameTimecode]], + video: VideoStream, + num_images: int = 3, + frame_margin: int = 1, + image_extension: str = 'jpg', + encoder_param: int = 95, + image_name_template: str = '$VIDEO_NAME-Scene-$SCENE_NUMBER-$IMAGE_NUMBER', + output_dir: Optional[str] = None, + show_progress: Optional[bool] = False, + scale: Optional[float] = None, + height: Optional[int] = None, + width: Optional[int] = None, + interpolation: Interpolation = Interpolation.CUBIC, + video_manager=None) -> Dict[int, List[str]]: + """Save a set number of images from each scene, given a list of scenes + and the associated video/frame source. + + Arguments: + scene_list: A list of scenes (pairs of FrameTimecode objects) returned + from calling a SceneManager's detect_scenes() method. + video: A VideoStream object corresponding to the scene list. + Note that the video will be closed/re-opened and seeked through. + num_images: Number of images to generate for each scene. Minimum is 1. + frame_margin: Number of frames to pad each scene around the beginning + and end (e.g. moves the first/last image into the scene by N frames). + Can set to 0, but will result in some video files failing to extract + the very last frame. + image_extension: Type of image to save (must be one of 'jpg', 'png', or 'webp'). + encoder_param: Quality/compression efficiency, based on type of image: + 'jpg' / 'webp': Quality 0-100, higher is better quality. 100 is lossless for webp. + 'png': Compression from 1-9, where 9 achieves best filesize but is slower to encode. + image_name_template: Template to use when creating the images on disk. Can + use the macros $VIDEO_NAME, $SCENE_NUMBER, and $IMAGE_NUMBER. The image + extension is applied automatically as per the argument image_extension. + output_dir: Directory to output the images into. If not set, the output + is created in the working directory. + show_progress: If True, shows a progress bar if tqdm is installed. + scale: Optional factor by which to rescale saved images. A scaling factor of 1 would + not result in rescaling. A value < 1 results in a smaller saved image, while a + value > 1 results in an image larger than the original. This value is ignored if + either the height or width values are specified. + height: Optional value for the height of the saved images. Specifying both the height + and width will resize images to an exact size, regardless of aspect ratio. + Specifying only height will rescale the image to that number of pixels in height + while preserving the aspect ratio. + width: Optional value for the width of the saved images. Specifying both the width + and height will resize images to an exact size, regardless of aspect ratio. + Specifying only width will rescale the image to that number of pixels wide + while preserving the aspect ratio. + interpolation: Type of interpolation to use when resizing images. + video_manager: [DEPRECATED] DO NOT USE. For backwards compatibility only. + + Returns: + Dictionary of the format { scene_num : [image_paths] }, where scene_num is the + number of the scene in scene_list (starting from 1), and image_paths is a list of + the paths to the newly saved/created images. + + Raises: + ValueError: Raised if any arguments are invalid or out of range (e.g. + if num_images is negative). + """ + # TODO(v0.7): Add DeprecationWarning that `video_manager` will be removed in v0.8. + if video_manager is not None: + logger.error('`video_manager` argument is deprecated, use `video` instead.') + video = video_manager + + if not scene_list: + return {} + if num_images <= 0 or frame_margin < 0: + raise ValueError() + + # TODO: Validate that encoder_param is within the proper range. + # Should be between 0 and 100 (inclusive) for jpg/webp, and 1-9 for png. + imwrite_param = [get_cv2_imwrite_params()[image_extension], encoder_param + ] if encoder_param is not None else [] + + video.reset() + + # Setup flags and init progress bar if available. + completed = True + logger.info('Generating output images (%d per scene)...', num_images) + progress_bar = None + if show_progress: + progress_bar = tqdm(total=len(scene_list) * num_images, unit='images', dynamic_ncols=True) + + filename_template = Template(image_name_template) + + scene_num_format = '%0' + scene_num_format += str(max(3, math.floor(math.log(len(scene_list), 10)) + 1)) + 'd' + image_num_format = '%0' + image_num_format += str(math.floor(math.log(num_images, 10)) + 2) + 'd' + + framerate = scene_list[0][0].framerate + + # TODO(v1.0): Split up into multiple sub-expressions so auto-formatter works correctly. + timecode_list = [ + [ + FrameTimecode(int(f), fps=framerate) for f in [ + # middle frames + a[len(a) // 2] if (0 < j < num_images - 1) or num_images == 1 + + # first frame + else min(a[0] + frame_margin, a[-1]) if j == 0 + + # last frame + else max(a[-1] - frame_margin, a[0]) + + # for each evenly-split array of frames in the scene list + for j, a in enumerate(np.array_split(r, num_images)) + ] + ] for i, r in enumerate([ + # pad ranges to number of images + r if 1 + r[-1] - r[0] >= num_images else list(r) + [r[-1]] * (num_images - len(r)) + # create range of frames in scene + for r in ( + range( + start.get_frames(), + start.get_frames() + max( + 1, # guard against zero length scenes + end.get_frames() - start.get_frames())) + # for each scene in scene list + for start, end in scene_list) + ]) + ] + + image_filenames = {i: [] for i in range(len(timecode_list))} + aspect_ratio = video.aspect_ratio + if abs(aspect_ratio - 1.0) < 0.01: + aspect_ratio = None + + logger.debug('Writing images with template %s', filename_template.template) + for i, scene_timecodes in enumerate(timecode_list): + for j, image_timecode in enumerate(scene_timecodes): + video.seek(image_timecode) + frame_im = video.read() + if frame_im is not None: + # TODO: Allow NUM to be a valid suffix in addition to NUMBER. + file_path = '%s.%s' % (filename_template.safe_substitute( + VIDEO_NAME=video.name, + SCENE_NUMBER=scene_num_format % (i + 1), + IMAGE_NUMBER=image_num_format % (j + 1), + FRAME_NUMBER=image_timecode.get_frames()), image_extension) + image_filenames[i].append(file_path) + # TODO(0.6.3): Combine this resize with the ones below. + if aspect_ratio is not None: + frame_im = cv2.resize( + frame_im, (0, 0), + fx=aspect_ratio, + fy=1.0, + interpolation=interpolation.value) + frame_height = frame_im.shape[0] + frame_width = frame_im.shape[1] + + # Figure out what kind of resizing needs to be done + if height or width: + if height and not width: + factor = height / float(frame_height) + width = int(factor * frame_width) + if width and not height: + factor = width / float(frame_width) + height = int(factor * frame_height) + assert height > 0 and width > 0 + frame_im = cv2.resize( + frame_im, (width, height), interpolation=interpolation.value) + elif scale: + frame_im = cv2.resize( + frame_im, (0, 0), fx=scale, fy=scale, interpolation=interpolation.value) + + cv2.imwrite(get_and_create_path(file_path, output_dir), frame_im, imwrite_param) + else: + completed = False + break + if progress_bar is not None: + progress_bar.update(1) + + if progress_bar is not None: + progress_bar.close() + + if not completed: + logger.error('Could not generate all output images.') + + return image_filenames + + +## +## SceneManager Class Implementation +## + + +class SceneManager: + """The SceneManager facilitates detection of scenes (:meth:`detect_scenes`) on a video + (:class:`VideoStream `) using a detector + (:meth:`add_detector`). Video decoding is done in parallel in a background thread. + """ + + def __init__( + self, + stats_manager: Optional[StatsManager] = None, + ): + """ + Arguments: + stats_manager: :class:`StatsManager` to bind to this `SceneManager`. Can be + accessed via the `stats_manager` property of the resulting object to save to disk. + """ + self._cutting_list = [] + self._event_list = [] + self._detector_list = [] + self._sparse_detector_list = [] + # TODO(v1.0): This class should own a StatsManager instead of taking an optional one. + # Expose a new `stats_manager` @property from the SceneManager, and either change the + # `stats_manager` argument to to `store_stats: bool=False`, or lazy-init one. + + # TODO(v1.0): This class should own a VideoStream as well, instead of passing one + # to the detect_scenes method. If concatenation is required, it can be implemented as + # a generic VideoStream wrapper. + self._stats_manager: Optional[StatsManager] = stats_manager + + # Position of video that was first passed to detect_scenes. + self._start_pos: FrameTimecode = None + # Position of video on the last frame processed by detect_scenes. + self._last_pos: FrameTimecode = None + self._base_timecode: Optional[FrameTimecode] = None + self._downscale: int = 1 + self._auto_downscale: bool = True + # Interpolation method to use when downscaling. Defaults to linear interpolation + # as a good balance between quality and performance. + self._interpolation: Interpolation = Interpolation.LINEAR + # Boolean indicating if we have only seen EventType.CUT events so far. + self._only_cuts: bool = True + # Set by decode thread when an exception occurs. + self._exception_info = None + self._stop = threading.Event() + + self._frame_buffer = [] + self._frame_buffer_size = 0 + + @property + def interpolation(self) -> Interpolation: + """Interpolation method to use when downscaling frames. Must be one of cv2.INTER_*.""" + return self._interpolation + + @interpolation.setter + def interpolation(self, value: Interpolation): + self._interpolation = value + + @property + def stats_manager(self) -> Optional[StatsManager]: + """Getter for the StatsManager associated with this SceneManager, if any.""" + return self._stats_manager + + @property + def downscale(self) -> int: + """Factor to downscale each frame by. Will always be >= 1, where 1 + indicates no scaling. Will be ignored if auto_downscale=True.""" + return self._downscale + + @downscale.setter + def downscale(self, value: int): + """Set to 1 for no downscaling, 2 for 2x downscaling, 3 for 3x, etc...""" + if value < 1: + raise ValueError("Downscale factor must be a positive integer >= 1!") + if self.auto_downscale: + logger.warning("Downscale factor will be ignored because auto_downscale=True!") + if value is not None and not isinstance(value, int): + logger.warning("Downscale factor will be truncated to integer!") + value = int(value) + self._downscale = value + + @property + def auto_downscale(self) -> bool: + """If set to True, will automatically downscale based on video frame size. + + Overrides `downscale` if set.""" + return self._auto_downscale + + @auto_downscale.setter + def auto_downscale(self, value: bool): + self._auto_downscale = value + + def add_detector(self, detector: SceneDetector) -> None: + """Add/register a SceneDetector (e.g. ContentDetector, ThresholdDetector) to + run when detect_scenes is called. The SceneManager owns the detector object, + so a temporary may be passed. + + Arguments: + detector (SceneDetector): Scene detector to add to the SceneManager. + """ + if self._stats_manager is None and detector.stats_manager_required(): + # Make sure the lists are empty so that the detectors don't get + # out of sync (require an explicit statsmanager instead) + assert not self._detector_list and not self._sparse_detector_list + self._stats_manager = StatsManager() + + detector.stats_manager = self._stats_manager + if self._stats_manager is not None: + try: + self._stats_manager.register_metrics(detector.get_metrics()) + except FrameMetricRegistered: + # Allow multiple detection algorithms of the same type to be added + # by suppressing any FrameMetricRegistered exceptions due to attempts + # to re-register the same frame metric keys. + # TODO(#334): Fix this, this should not be part of regular control flow. + pass + + if not issubclass(type(detector), SparseSceneDetector): + self._detector_list.append(detector) + else: + self._sparse_detector_list.append(detector) + + self._frame_buffer_size = max(detector.event_buffer_length, self._frame_buffer_size) + + def get_num_detectors(self) -> int: + """Get number of registered scene detectors added via add_detector. """ + return len(self._detector_list) + + def clear(self) -> None: + """Clear all cuts/scenes and resets the SceneManager's position. + + Any statistics generated are still saved in the StatsManager object passed to the + SceneManager's constructor, and thus, subsequent calls to detect_scenes, using the same + frame source seeked back to the original time (or beginning of the video) will use the + cached frame metrics that were computed and saved in the previous call to detect_scenes. + """ + self._cutting_list.clear() + self._event_list.clear() + self._last_pos = None + self._start_pos = None + self.clear_detectors() + + def clear_detectors(self) -> None: + """Remove all scene detectors added to the SceneManager via add_detector(). """ + self._detector_list.clear() + self._sparse_detector_list.clear() + + def get_scene_list(self, + base_timecode: Optional[FrameTimecode] = None, + start_in_scene: bool = False) -> List[Tuple[FrameTimecode, FrameTimecode]]: + """Return a list of tuples of start/end FrameTimecodes for each detected scene. + + Arguments: + base_timecode: [DEPRECATED] DO NOT USE. For backwards compatibility. + start_in_scene: Assume the video begins in a scene. This means that when detecting + fast cuts with `ContentDetector`, if no cuts are found, the resulting scene list + will contain a single scene spanning the entire video (instead of no scenes). + When detecting fades with `ThresholdDetector`, the beginning portion of the video + will always be included until the first fade-out event is detected. + + Returns: + List of tuples in the form (start_time, end_time), where both start_time and + end_time are FrameTimecode objects representing the exact time/frame where each + detected scene in the video begins and ends. + """ + # TODO(v0.7): Replace with DeprecationWarning that `base_timecode` will be removed in v0.8. + if base_timecode is not None: + logger.error('`base_timecode` argument is deprecated and has no effect.') + if self._base_timecode is None: + return [] + cut_list = self._get_cutting_list() + scene_list = get_scenes_from_cuts( + cut_list=cut_list, start_pos=self._start_pos, end_pos=self._last_pos + 1) + # If we didn't actually scene_detect any cuts, make sure the resulting scene_list is empty + # unless start_in_scene is True. + if not cut_list and not start_in_scene: + scene_list = [] + return sorted(self._get_event_list() + scene_list) + + def _get_cutting_list(self) -> List[int]: + """Return a sorted list of unique frame numbers of any detected scene cuts.""" + if not self._cutting_list: + return [] + assert self._base_timecode is not None + # Ensure all cuts are unique by using a set to remove all duplicates. + return [self._base_timecode + cut for cut in sorted(set(self._cutting_list))] + + def _get_event_list(self) -> List[Tuple[FrameTimecode, FrameTimecode]]: + if not self._event_list: + return [] + assert self._base_timecode is not None + return [(self._base_timecode + start, self._base_timecode + end) + for start, end in self._event_list] + + def _process_frame(self, + frame_num: int, + frame_im: np.ndarray, + callback: Optional[Callable[[np.ndarray, int], None]] = None) -> bool: + """Add any cuts detected with the current frame to the cutting list. Returns True if any new + cuts were detected, False otherwise.""" + new_cuts = False + # TODO(#283): This breaks with AdaptiveDetector as cuts differ from the frame number + # being processed. Allow detectors to specify the max frame lookahead they require + # (i.e. any event will never be more than N frames behind the current one). + self._frame_buffer.append(frame_im) + # frame_buffer[-1] is current frame, -2 is one behind, etc + # so index based on cut frame should be [event_frame - (frame_num + 1)] + self._frame_buffer = self._frame_buffer[-(self._frame_buffer_size + 1):] + for detector in self._detector_list: + cuts = detector.process_frame(frame_num, frame_im) + self._cutting_list += cuts + new_cuts = True if cuts else False + if callback: + for cut_frame_num in cuts: + buffer_index = cut_frame_num - (frame_num + 1) + callback(self._frame_buffer[buffer_index], cut_frame_num) + for detector in self._sparse_detector_list: + events = detector.process_frame(frame_num, frame_im) + self._event_list += events + if callback: + for event_start, _ in events: + buffer_index = event_start - (frame_num + 1) + callback(self._frame_buffer[buffer_index], event_start) + return new_cuts + + def _post_process(self, frame_num: int) -> None: + """Add remaining cuts to the cutting list, after processing the last frame.""" + for detector in self._detector_list: + self._cutting_list += detector.post_process(frame_num) + + def stop(self) -> None: + """Stop the current :meth:`detect_scenes` call, if any. Thread-safe.""" + self._stop.set() + + def detect_scenes(self, + video: VideoStream = None, + duration: Optional[FrameTimecode] = None, + end_time: Optional[FrameTimecode] = None, + frame_skip: int = 0, + show_progress: bool = False, + callback: Optional[Callable[[np.ndarray, int], None]] = None, + frame_source: Optional[VideoStream] = None) -> int: + """Perform scene detection on the given video using the added SceneDetectors, returning the + number of frames processed. Results can be obtained by calling :meth:`get_scene_list` or + :meth:`get_cut_list`. + + Video decoding is performed in a background thread to allow scene detection and frame + decoding to happen in parallel. Detection will continue until no more frames are left, + the specified duration or end time has been reached, or :meth:`stop` was called. + + Arguments: + video: VideoStream obtained from either `scenedetect.open_video`, or by creating + one directly (e.g. `scenedetect.backends.opencv.VideoStreamCv2`). + duration: Amount of time to scene_detect from current video position. Cannot be + specified if `end_time` is set. + end_time: Time to stop processing at. Cannot be specified if `duration` is set. + frame_skip: Not recommended except for extremely high framerate videos. + Number of frames to skip (i.e. process every 1 in N+1 frames, + where N is frame_skip, processing only 1/N+1 percent of the video, + speeding up the detection time at the expense of accuracy). + `frame_skip` **must** be 0 (the default) when using a StatsManager. + show_progress: If True, and the ``tqdm`` module is available, displays + a progress bar with the progress, framerate, and expected time to + complete processing the video frame source. + callback: If set, called after each scene/event detected. + frame_source: [DEPRECATED] DO NOT USE. For compatibility with previous version. + Returns: + int: Number of frames read and processed from the frame source. + Raises: + ValueError: `frame_skip` **must** be 0 (the default) if the SceneManager + was constructed with a StatsManager object. + """ + # TODO(v0.7): Add DeprecationWarning that `frame_source` will be removed in v0.8. + # TODO(v0.8): Remove default value for `video`` when removing `frame_source`. + if frame_source is not None: + video = frame_source + if video is None: + raise TypeError("detect_scenes() missing 1 required positional argument: 'video'") + + if frame_skip > 0 and self.stats_manager is not None: + raise ValueError('frame_skip must be 0 when using a StatsManager.') + if duration is not None and end_time is not None: + raise ValueError('duration and end_time cannot be set at the same time!') + if duration is not None and duration < 0: + raise ValueError('duration must be greater than or equal to 0!') + if end_time is not None and end_time < 0: + raise ValueError('end_time must be greater than or equal to 0!') + + self._base_timecode = video.base_timecode + # TODO(v1.0): Fix this properly by making SceneManager create and own a StatsManager, + # and requiring the framerate to be passed to the StatsManager the constructor. + if self._stats_manager is not None: + self._stats_manager._base_timecode = self._base_timecode + start_frame_num: int = video.frame_number + + if duration is not None: + end_time: Union[int, FrameTimecode] = duration + start_frame_num + + if end_time is not None: + end_time: FrameTimecode = self._base_timecode + end_time + + # Can only calculate total number of frames we expect to process if the duration of + # the video is available. + total_frames = 0 + if video.duration is not None: + if end_time is not None and end_time < video.duration: + total_frames = (end_time - start_frame_num) + 1 + else: + total_frames = (video.duration.get_frames() - start_frame_num) + + # Calculate the desired downscale factor and log the effective resolution. + if self.auto_downscale: + downscale_factor = compute_downscale_factor(frame_width=video.frame_size[0]) + else: + downscale_factor = self.downscale + if downscale_factor > 1: + logger.info('Downscale factor set to %d, effective resolution: %d x %d', + downscale_factor, video.frame_size[0] // downscale_factor, + video.frame_size[1] // downscale_factor) + + progress_bar = None + if show_progress: + progress_bar = tqdm( + total=int(total_frames), + unit='frames', + desc=PROGRESS_BAR_DESCRIPTION % 0, + dynamic_ncols=True, + ) + + frame_queue = queue.Queue(MAX_FRAME_QUEUE_LENGTH) + self._stop.clear() + decode_thread = threading.Thread( + target=SceneManager._decode_thread, + args=(self, video, frame_skip, downscale_factor, end_time, frame_queue), + daemon=True) + decode_thread.start() + frame_im = None + + logger.info('Detecting scenes...') + while not self._stop.is_set(): + next_frame, position = frame_queue.get() + if next_frame is None and position is None: + break + if not next_frame is None: + frame_im = next_frame + new_cuts = self._process_frame(position.frame_num, frame_im, callback) + if progress_bar is not None: + if new_cuts: + progress_bar.set_description( + PROGRESS_BAR_DESCRIPTION % len(self._cutting_list), refresh=False) + progress_bar.update(1 + frame_skip) + + if progress_bar is not None: + progress_bar.close() + # Unblock any puts in the decode thread before joining. This can happen if the main + # processing thread stops before the decode thread. + while not frame_queue.empty(): + frame_queue.get_nowait() + decode_thread.join() + + if self._exception_info is not None: + raise self._exception_info[1].with_traceback(self._exception_info[2]) + + self._last_pos = video.position + self._post_process(video.position.frame_num) + return video.frame_number - start_frame_num + + def _decode_thread( + self, + video: VideoStream, + frame_skip: int, + downscale_factor: int, + end_time: FrameTimecode, + out_queue: queue.Queue, + ): + try: + while not self._stop.is_set(): + frame_im = None + # We don't do any kind of locking here since the worst-case of this being wrong + # is that we do some extra work, and this function should never mutate any data + # (all of which should be modified under the GIL). + # TODO(v1.0): This optimization should be removed as it is an uncommon use case and + # greatly increases the complexity of detection algorithms using it. + if self._is_processing_required(video.position.frame_num): + frame_im = video.read() + if frame_im is False: + break + if downscale_factor > 1: + frame_im = cv2.resize( + frame_im, (round(frame_im.shape[1] / downscale_factor), + round(frame_im.shape[0] / downscale_factor)), + interpolation=self._interpolation.value) + else: + if video.read(decode=False) is False: + break + + # Set the start position now that we decoded at least the first frame. + if self._start_pos is None: + self._start_pos = video.position + + out_queue.put((frame_im, video.position)) + + if frame_skip > 0: + for _ in range(frame_skip): + if not video.read(decode=False): + break + # End time includes the presentation time of the frame, but the `position` + # property of a VideoStream references the beginning of the frame in time. + if end_time is not None and not (video.position + 1) < end_time: + break + + # If *any* exceptions occur, we re-raise them in the main thread so that the caller of + # detect_scenes can handle it. + except KeyboardInterrupt: + logger.debug("Received KeyboardInterrupt.") + self._stop.set() + except BaseException: + logger.critical('Fatal error: Exception raised in decode thread.') + self._exception_info = sys.exc_info() + self._stop.set() + + finally: + # Handle case where start position was never set if we did not decode any frames. + if self._start_pos is None: + self._start_pos = video.position + # Make sure main thread stops processing loop. + out_queue.put((None, None)) + + # pylint: enable=bare-except + + # + # Deprecated Methods + # + + # pylint: disable=unused-argument + + def get_cut_list(self, + base_timecode: Optional[FrameTimecode] = None, + show_warning: bool = True) -> List[FrameTimecode]: + """[DEPRECATED] Return a list of FrameTimecodes of the detected scene changes/cuts. + + Unlike get_scene_list, the cutting list returns a list of FrameTimecodes representing + the point in the input video where a new scene was detected, and thus the frame + where the input should be cut/split. The cutting list, in turn, is used to generate + the scene list, noting that each scene is contiguous starting from the first frame + and ending at the last frame detected. + + If only sparse detectors are used (e.g. MotionDetector), this will always be empty. + + Arguments: + base_timecode: [DEPRECATED] DO NOT USE. For backwards compatibility only. + show_warning: If set to False, suppresses the error from being warned. In v0.7, + this will have no effect and the error will become a Python warning. + + Returns: + List of FrameTimecode objects denoting the points in time where a scene change + was detected in the input video, which can also be passed to external tools + for automated splitting of the input into individual scenes. + """ + # TODO(v0.7): Use the warnings module to turn this into a warning. + if show_warning: + logger.error('`get_cut_list()` is deprecated and will be removed in a future release.') + return self._get_cutting_list() + + def get_event_list( + self, + base_timecode: Optional[FrameTimecode] = None + ) -> List[Tuple[FrameTimecode, FrameTimecode]]: + """[DEPRECATED] DO NOT USE. + + Get a list of start/end timecodes of sparse detection events. + + Unlike get_scene_list, the event list returns a list of FrameTimecodes representing + the point in the input video where a new scene was detected only by sparse detectors, + otherwise it is the same. + + Arguments: + base_timecode: [DEPRECATED] DO NOT USE. For backwards compatibility only. + + Returns: + List of pairs of FrameTimecode objects denoting the detected scenes. + """ + # TODO(v0.7): Use the warnings module to turn this into a warning. + logger.error('`get_event_list()` is deprecated and will be removed in a future release.') + return self._get_event_list() + + # pylint: enable=unused-argument + + def _is_processing_required(self, frame_num: int) -> bool: + """True if frame metrics not in StatsManager, False otherwise.""" + if self.stats_manager is None: + return True + return all([detector.is_processing_required(frame_num) for detector in self._detector_list]) diff --git a/backend/scenedetect/stats_manager.py b/backend/scenedetect/stats_manager.py new file mode 100644 index 0000000..099c868 --- /dev/null +++ b/backend/scenedetect/stats_manager.py @@ -0,0 +1,324 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +"""``scenedetect.stats_manager`` Module + +This module contains the :class:`StatsManager` class, which provides a key-value store for each +:class:`SceneDetector ` to write the metrics calculated +for each frame. The :class:`StatsManager` must be registered to a +:class:`SceneManager ` upon construction. + +The entire :class:`StatsManager` can be :meth:`saved to ` a +human-readable CSV file, allowing for precise determination of the ideal threshold (or other +detection parameters) for the given input. +""" + +import csv +from logging import getLogger +from typing import Any, Dict, Iterable, List, Optional, Set, TextIO, Union +import os.path + +from backend.scenedetect.frame_timecode import FrameTimecode + +logger = getLogger('pyscenedetect') + +## +## StatsManager CSV File Column Names (Header Row) +## + +COLUMN_NAME_FRAME_NUMBER = "Frame Number" +"""Name of column containing frame numbers in the statsfile CSV.""" + +COLUMN_NAME_TIMECODE = "Timecode" +"""Name of column containing timecodes in the statsfile CSV.""" + +## +## StatsManager Exceptions +## + + +class FrameMetricRegistered(Exception): + """ Raised when attempting to register a frame metric key which has + already been registered. """ + + def __init__(self, + metric_key: str, + message: str = "Attempted to re-register frame metric key."): + super().__init__(message) + self.metric_key = metric_key + + +class FrameMetricNotRegistered(Exception): + """ Raised when attempting to call get_metrics(...)/set_metrics(...) with a + frame metric that does not exist, or has not been registered. """ + + def __init__(self, + metric_key: str, + message: str = "Attempted to get/set frame metrics for unregistered metric key."): + super().__init__(message) + self.metric_key = metric_key + + +class StatsFileCorrupt(Exception): + """Raised when frame metrics/stats could not be loaded from a provided CSV file.""" + + def __init__(self, + message: str = "Could not load frame metric data data from passed CSV file."): + super().__init__(message) + + +## +## StatsManager Class Implementation +## + + +# TODO(v1.0): Relax restriction on metric types only being float or int when loading from disk +# is fully deprecated. +class StatsManager: + """Provides a key-value store for frame metrics/calculations which can be used + for two-pass detection algorithms, as well as saving stats to a CSV file. + + Analyzing a statistics CSV file is also very useful for finding the optimal + algorithm parameters for certain detection methods. Additionally, the data + may be plotted by a graphing module (e.g. matplotlib) by obtaining the + metric of interest for a series of frames by iteratively calling get_metrics(), + after having called the detect_scenes(...) method on the SceneManager object + which owns the given StatsManager instance. + + Only metrics consisting of `float` or `int` should be used currently. + """ + + def __init__(self, base_timecode: FrameTimecode = None): + """Initialize a new StatsManager. + + Arguments: + base_timecode: Timecode associated with this object. Must not be None (default value + will be removed in a future release). + """ + # Frame metrics is a dict of frame (int): metric_dict (Dict[str, float]) + # of each frame metric key and the value it represents (usually float). + self._frame_metrics: Dict[FrameTimecode, Dict[str, float]] = dict() + self._registered_metrics: Set[str] = set() # Set of frame metric keys. + self._loaded_metrics: Set[str] = set() # Metric keys loaded from stats file. + self._metrics_updated: bool = False # Flag indicating if metrics require saving. + self._base_timecode: Optional[FrameTimecode] = base_timecode # Used for timing calculations. + + def register_metrics(self, metric_keys: Iterable[str]) -> None: + """Register a list of metric keys that will be used by the detector. + + Used to ensure that multiple detector keys don't overlap. + + Raises: + FrameMetricRegistered: A particular metric_key has already been registered/added + to the StatsManager. Only if the StatsManager is being used for read-only + access (i.e. all frames in the video have already been processed for the given + metric_key in the exception) is this behavior desirable. + """ + for metric_key in metric_keys: + if metric_key not in self._registered_metrics: + self._registered_metrics.add(metric_key) + else: + raise FrameMetricRegistered(metric_key) + + # TODO(v1.0): Change frame_number to a FrameTimecode now that it is just a hash and will + # be required for VFR support. + def get_metrics(self, frame_number: int, metric_keys: Iterable[str]) -> List[Any]: + """Return the requested statistics/metrics for a given frame. + + Arguments: + frame_number (int): Frame number to retrieve metrics for. + metric_keys (List[str]): A list of metric keys to look up. + + Returns: + A list containing the requested frame metrics for the given frame number + in the same order as the input list of metric keys. If a metric could + not be found, None is returned for that particular metric. + """ + return [self._get_metric(frame_number, metric_key) for metric_key in metric_keys] + + def set_metrics(self, frame_number: int, metric_kv_dict: Dict[str, Any]) -> None: + """ Set Metrics: Sets the provided statistics/metrics for a given frame. + + Arguments: + frame_number: Frame number to retrieve metrics for. + metric_kv_dict: A dict mapping metric keys to the + respective integer/floating-point metric values to set. + """ + for metric_key in metric_kv_dict: + self._set_metric(frame_number, metric_key, metric_kv_dict[metric_key]) + + def metrics_exist(self, frame_number: int, metric_keys: Iterable[str]) -> bool: + """ Metrics Exist: Checks if the given metrics/stats exist for the given frame. + + Returns: + bool: True if the given metric keys exist for the frame, False otherwise. + """ + return all([self._metric_exists(frame_number, metric_key) for metric_key in metric_keys]) + + def is_save_required(self) -> bool: + """ Is Save Required: Checks if the stats have been updated since loading. + + Returns: + bool: True if there are frame metrics/statistics not yet written to disk, + False otherwise. + """ + return self._metrics_updated + + def save_to_csv(self, + csv_file: Union[str, bytes, TextIO], + base_timecode: Optional[FrameTimecode] = None, + force_save=True) -> None: + """ Save To CSV: Saves all frame metrics stored in the StatsManager to a CSV file. + + Arguments: + csv_file: A file handle opened in write mode (e.g. open('...', 'w')) or a path as str. + base_timecode: [DEPRECATED] DO NOT USE. For backwards compatibility. + force_save: If True, writes metrics out even if an update is not required. + + Raises: + OSError: If `path` cannot be opened or a write failure occurs. + """ + # TODO(v0.7): Replace with DeprecationWarning that `base_timecode` will be removed in v0.8. + if base_timecode is not None: + logger.error('base_timecode is deprecated.') + + # Ensure we need to write to the file, and that we have data to do so with. + if not ((self.is_save_required() or force_save) and self._registered_metrics + and self._frame_metrics): + logger.info("No metrics to save.") + return + + assert self._base_timecode is not None + + # If we get a path instead of an open file handle, recursively call ourselves + # again but with file handle instead of path. + if isinstance(csv_file, (str, bytes)): + with open(csv_file, 'w') as file: + self.save_to_csv(csv_file=file, force_save=force_save) + return + + csv_writer = csv.writer(csv_file, lineterminator='\n') + metric_keys = sorted(list(self._registered_metrics.union(self._loaded_metrics))) + csv_writer.writerow([COLUMN_NAME_FRAME_NUMBER, COLUMN_NAME_TIMECODE] + metric_keys) + frame_keys = sorted(self._frame_metrics.keys()) + logger.info("Writing %d frames to CSV...", len(frame_keys)) + for frame_key in frame_keys: + frame_timecode = self._base_timecode + frame_key + csv_writer.writerow( + [frame_timecode.get_frames() + + 1, frame_timecode.get_timecode()] + + [str(metric) for metric in self.get_metrics(frame_key, metric_keys)]) + + @staticmethod + def valid_header(row: List[str]) -> bool: + """Check that the given CSV row is a valid header for a statsfile. + + Arguments: + row: A row decoded from the CSV reader. + + Returns: + True if `row` is a valid statsfile header, False otherwise. + """ + if not row or not len(row) >= 2: + return False + if row[0] != COLUMN_NAME_FRAME_NUMBER or row[1] != COLUMN_NAME_TIMECODE: + return False + return True + + # TODO(v1.0): Remove. + def load_from_csv(self, csv_file: Union[str, bytes, TextIO]) -> Optional[int]: + """[DEPRECATED] DO NOT USE + + Load all metrics stored in a CSV file into the StatsManager instance. Will be removed in a + future release after becoming a no-op. + + Arguments: + csv_file: A file handle opened in read mode (e.g. open('...', 'r')) or a path as str. + + Returns: + int or None: Number of frames/rows read from the CSV file, or None if the + input file was blank or could not be found. + + Raises: + StatsFileCorrupt: Stats file is corrupt and can't be loaded, or wrong file + was specified. + """ + # TODO: Make this an error, then make load_from_csv() a no-op, and finally, remove it. + logger.warning("load_from_csv() is deprecated and will be removed in a future release.") + + # If we get a path instead of an open file handle, check that it exists, and if so, + # recursively call ourselves again but with file set instead of path. + if isinstance(csv_file, (str, bytes)): + if os.path.exists(csv_file): + with open(csv_file, 'r') as file: + return self.load_from_csv(csv_file=file) + # Path doesn't exist. + return None + + # If we get here, file is a valid file handle in read-only text mode. + csv_reader = csv.reader(csv_file, lineterminator='\n') + num_cols = None + num_metrics = None + num_frames = None + # First Row: Frame Num, Timecode, [metrics...] + try: + row = next(csv_reader) + # Backwards compatibility for previous versions of statsfile + # which included an additional header row. + if not self.valid_header(row): + row = next(csv_reader) + except StopIteration: + # If the file is blank or we couldn't decode anything, assume the file was empty. + return None + if not self.valid_header(row): + raise StatsFileCorrupt() + num_cols = len(row) + num_metrics = num_cols - 2 + if not num_metrics > 0: + raise StatsFileCorrupt('No metrics defined in CSV file.') + self._loaded_metrics = row[2:] + num_frames = 0 + for row in csv_reader: + metric_dict = {} + if not len(row) == num_cols: + raise StatsFileCorrupt('Wrong number of columns detected in stats file row.') + for i, metric_str in enumerate(row[2:]): + if metric_str and metric_str != 'None': + try: + metric_dict[self._loaded_metrics[i]] = float(metric_str) + except ValueError: + raise StatsFileCorrupt('Corrupted value in stats file: %s' % + metric_str) from ValueError + frame_number = int(row[0]) + # Switch from 1-based to 0-based frame numbers. + if frame_number > 0: + frame_number -= 1 + self.set_metrics(frame_number, metric_dict) + num_frames += 1 + logger.info('Loaded %d metrics for %d frames.', num_metrics, num_frames) + self._metrics_updated = False + return num_frames + + def _get_metric(self, frame_number: int, metric_key: str) -> Optional[Any]: + if self._metric_exists(frame_number, metric_key): + return self._frame_metrics[frame_number][metric_key] + return None + + def _set_metric(self, frame_number: int, metric_key: str, metric_value: Any) -> None: + self._metrics_updated = True + if not frame_number in self._frame_metrics: + self._frame_metrics[frame_number] = dict() + self._frame_metrics[frame_number][metric_key] = metric_value + + def _metric_exists(self, frame_number: int, metric_key: str) -> bool: + return (frame_number in self._frame_metrics + and metric_key in self._frame_metrics[frame_number]) diff --git a/backend/scenedetect/video_manager.py b/backend/scenedetect/video_manager.py new file mode 100644 index 0000000..a4ce5cf --- /dev/null +++ b/backend/scenedetect/video_manager.py @@ -0,0 +1,772 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +"""``scenedetect.video_manager`` Module + +[DEPRECATED] DO NOT USE. Use `open_video` from `scenedetect.backends` or create a +VideoStreamCv2 object (`scenedetect.backends.opencv`) instead. + +This module exists for *some* backwards compatibility with v0.5, and will be removed +in a future release. +""" + +import os +import math +from logging import getLogger + +from typing import Iterable, List, Optional, Tuple, Union +from numpy import ndarray +import cv2 + +from scenedetect.platform import get_file_name +from scenedetect.frame_timecode import FrameTimecode, MAX_FPS_DELTA +from scenedetect.video_stream import VideoStream, VideoOpenFailure, FrameRateUnavailable +from scenedetect.backends.opencv import _get_aspect_ratio + +## +## VideoManager Exceptions +## + + +class VideoParameterMismatch(Exception): + """ VideoParameterMismatch: Raised when opening multiple videos with a VideoManager, and some + of the video parameters (frame height, frame width, and framerate/FPS) do not match. """ + + def __init__(self, + file_list=None, + message="OpenCV VideoCapture object parameters do not match."): + # type: (Iterable[Tuple[int, float, float, str, str]], str) -> None + # Pass message string to base Exception class. + super(VideoParameterMismatch, self).__init__(message) + # list of (param_mismatch_type: int, parameter value, expected value, + # filename: str, filepath: str) + # where param_mismatch_type is an OpenCV CAP_PROP (e.g. CAP_PROP_FPS). + self.file_list = file_list + + +class VideoDecodingInProgress(RuntimeError): + """ VideoDecodingInProgress: Raised when attempting to call certain VideoManager methods that + must be called *before* start() has been called. """ + + +class InvalidDownscaleFactor(ValueError): + """ InvalidDownscaleFactor: Raised when trying to set invalid downscale factor, + i.e. the supplied downscale factor was not a positive integer greater than zero. """ + + +## +## VideoManager Helper Functions +## + + +def get_video_name(video_file: str) -> Tuple[str, str]: + """Get the video file/device name. + + Returns: + Tuple of the form [name, video_file]. + """ + if isinstance(video_file, int): + return ('Device %d' % video_file, video_file) + return (os.path.split(video_file)[1], video_file) + + +def get_num_frames(cap_list: Iterable[cv2.VideoCapture]) -> int: + """ Get Number of Frames: Returns total number of frames in the cap_list. + + Calls get(CAP_PROP_FRAME_COUNT) and returns the sum for all VideoCaptures. + """ + return sum([math.trunc(cap.get(cv2.CAP_PROP_FRAME_COUNT)) for cap in cap_list]) + + +def open_captures( + video_files: Iterable[str], + framerate: Optional[float] = None, + validate_parameters: bool = True, +) -> Tuple[List[cv2.VideoCapture], float, Tuple[int, int]]: + """ Open Captures - helper function to open all capture objects, set the framerate, + and ensure that all open captures have been opened and the framerates match on a list + of video file paths, or a list containing a single device ID. + + Arguments: + video_files: List of one or more paths (str), or a list + of a single integer device ID, to open as an OpenCV VideoCapture object. + A ValueError will be raised if the list does not conform to the above. + framerate: Framerate to assume when opening the video_files. + If not set, the first open video is used for deducing the framerate of + all videos in the sequence. + validate_parameters (bool, optional): If true, will ensure that the frame sizes + (width, height) and frame rate (FPS) of all passed videos is the same. + A VideoParameterMismatch is raised if the framerates do not match. + + Returns: + A tuple of form (cap_list, framerate, framesize) where cap_list is a list of open + OpenCV VideoCapture objects in the same order as the video_files list, framerate + is a float of the video(s) framerate(s), and framesize is a tuple of (width, height) + where width and height are integers representing the frame size in pixels. + + Raises: + ValueError: No video file(s) specified, or invalid/multiple device IDs specified. + TypeError: `framerate` must be type `float`. + IOError: Video file(s) not found. + FrameRateUnavailable: Video framerate could not be obtained and `framerate` + was not set manually. + VideoParameterMismatch: All videos in `video_files` do not have equal parameters. + Set `validate_parameters=False` to skip this check. + VideoOpenFailure: Video(s) could not be opened. + """ + is_device = False + if not video_files: + raise ValueError("Expected at least 1 video file or device ID.") + if isinstance(video_files[0], int): + if len(video_files) > 1: + raise ValueError("If device ID is specified, no video sources may be appended.") + elif video_files[0] < 0: + raise ValueError("Invalid/negative device ID specified.") + is_device = True + elif not all([isinstance(video_file, (str, bytes)) for video_file in video_files]): + print(video_files) + raise ValueError("Unexpected element type in video_files list (expected str(s)/int).") + elif framerate is not None and not isinstance(framerate, float): + raise TypeError("Expected type float for parameter framerate.") + # Check if files exist if passed video file is not an image sequence + # (checked with presence of % in filename) or not a URL (://). + if not is_device and any([ + not os.path.exists(video_file) + for video_file in video_files + if not ('%' in video_file or '://' in video_file) + ]): + raise IOError("Video file(s) not found.") + cap_list = [] + + try: + cap_list = [cv2.VideoCapture(video_file) for video_file in video_files] + video_names = [get_video_name(video_file) for video_file in video_files] + closed_caps = [video_names[i] for i, cap in enumerate(cap_list) if not cap.isOpened()] + if closed_caps: + raise VideoOpenFailure(str(closed_caps)) + + cap_framerates = [cap.get(cv2.CAP_PROP_FPS) for cap in cap_list] + cap_framerate, check_framerate = validate_capture_framerate(video_names, cap_framerates, + framerate) + # Store frame sizes as integers (VideoCapture.get() returns float). + cap_frame_sizes = [(math.trunc(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), + math.trunc(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) for cap in cap_list] + cap_frame_size = cap_frame_sizes[0] + + # If we need to validate the parameters, we check that the FPS and width/height + # of all open captures is identical (or almost identical in the case of FPS). + if validate_parameters: + validate_capture_parameters( + video_names=video_names, + cap_frame_sizes=cap_frame_sizes, + check_framerate=check_framerate, + cap_framerates=cap_framerates) + + except: + for cap in cap_list: + cap.release() + raise + + return (cap_list, cap_framerate, cap_frame_size) + + +def validate_capture_framerate( + video_names: Iterable[Tuple[str, str]], + cap_framerates: List[float], + framerate: Optional[float] = None, +) -> Tuple[float, bool]: + """Ensure the passed capture framerates are valid and equal. + + Raises: + ValueError: Invalid framerate (must be positive non-zero value). + TypeError: Framerate must be of type float. + FrameRateUnavailable: Framerate for video could not be obtained, + and `framerate` was not set. + """ + check_framerate = True + cap_framerate = cap_framerates[0] + if framerate is not None: + if isinstance(framerate, float): + if framerate < MAX_FPS_DELTA: + raise ValueError("Invalid framerate (must be a positive non-zero value).") + cap_framerate = framerate + check_framerate = False + else: + raise TypeError("Expected float for framerate, got %s." % type(framerate).__name__) + else: + unavailable_framerates = [(video_names[i][0], video_names[i][1]) + for i, fps in enumerate(cap_framerates) + if fps < MAX_FPS_DELTA] + if unavailable_framerates: + raise FrameRateUnavailable() + return (cap_framerate, check_framerate) + + +def validate_capture_parameters( + video_names: List[Tuple[str, str]], + cap_frame_sizes: List[Tuple[int, int]], + check_framerate: bool = False, + cap_framerates: Optional[List[float]] = None, +) -> None: + """ Validate Capture Parameters: Ensures that all passed capture frame sizes and (optionally) + framerates are equal. Raises VideoParameterMismatch if there is a mismatch. + + Raises: + VideoParameterMismatch + """ + bad_params = [] + max_framerate_delta = MAX_FPS_DELTA + # Check heights/widths match. + bad_params += [(cv2.CAP_PROP_FRAME_WIDTH, frame_size[0], cap_frame_sizes[0][0], + video_names[i][0], video_names[i][1]) + for i, frame_size in enumerate(cap_frame_sizes) + if abs(frame_size[0] - cap_frame_sizes[0][0]) > 0] + bad_params += [(cv2.CAP_PROP_FRAME_HEIGHT, frame_size[1], cap_frame_sizes[0][1], + video_names[i][0], video_names[i][1]) + for i, frame_size in enumerate(cap_frame_sizes) + if abs(frame_size[1] - cap_frame_sizes[0][1]) > 0] + # Check framerates if required. + if check_framerate: + bad_params += [(cv2.CAP_PROP_FPS, fps, cap_framerates[0], video_names[i][0], + video_names[i][1]) + for i, fps in enumerate(cap_framerates) + if math.fabs(fps - cap_framerates[0]) > max_framerate_delta] + + if bad_params: + raise VideoParameterMismatch(bad_params) + + +## +## VideoManager Class Implementation +## + + +class VideoManager(VideoStream): + """[DEPRECATED] DO NOT USE. + + Provides a cv2.VideoCapture-like interface to a set of one or more video files, + or a single device ID. Supports seeking and setting end time/duration.""" + + BACKEND_NAME = 'video_manager_do_not_use' + + def __init__(self, + video_files: List[str], + framerate: Optional[float] = None, + logger=getLogger('pyscenedetect')): + """[DEPRECATED] DO NOT USE. + + Arguments: + video_files (list of str(s)/int): A list of one or more paths (str), or a list + of a single integer device ID, to open as an OpenCV VideoCapture object. + framerate (float, optional): Framerate to assume when storing FrameTimecodes. + If not set (i.e. is None), it will be deduced from the first open capture + in video_files, else raises a FrameRateUnavailable exception. + + Raises: + ValueError: No video file(s) specified, or invalid/multiple device IDs specified. + TypeError: `framerate` must be type `float`. + IOError: Video file(s) not found. + FrameRateUnavailable: Video framerate could not be obtained and `framerate` + was not set manually. + VideoParameterMismatch: All videos in `video_files` do not have equal parameters. + Set `validate_parameters=False` to skip this check. + VideoOpenFailure: Video(s) could not be opened. + """ + # TODO(v0.7): Add DeprecationWarning that this class will be removed in v0.8: 'VideoManager + # will be removed in PySceneDetect v0.8. Use VideoStreamCv2 or VideoCaptureAdapter instead.' + logger.error("VideoManager is deprecated and will be removed.") + if not video_files: + raise ValueError("At least one string/integer must be passed in the video_files list.") + # Need to support video_files as a single str too for compatibility. + if isinstance(video_files, str): + video_files = [video_files] + # These VideoCaptures are only open in this process. + self._is_device = isinstance(video_files[0], int) + self._cap_list, self._cap_framerate, self._cap_framesize = open_captures( + video_files=video_files, framerate=framerate) + self._path = video_files[0] if not self._is_device else video_files + self._end_of_video = False + self._start_time = self.get_base_timecode() + self._end_time = None + self._curr_time = self.get_base_timecode() + self._last_frame = None + self._curr_cap, self._curr_cap_idx = None, None + self._video_file_paths = video_files + self._logger = logger + if self._logger is not None: + self._logger.info('Loaded %d video%s, framerate: %.3f FPS, resolution: %d x %d', + len(self._cap_list), 's' if len(self._cap_list) > 1 else '', + self.get_framerate(), *self.get_framesize()) + self._started = False + self._frame_length = self.get_base_timecode() + get_num_frames(self._cap_list) + self._first_cap_len = self.get_base_timecode() + get_num_frames([self._cap_list[0]]) + self._aspect_ratio = _get_aspect_ratio(self._cap_list[0]) + + def set_downscale_factor(self, downscale_factor=None): + """No-op. Set downscale_factor in `SceneManager` instead.""" + _ = downscale_factor + + def get_num_videos(self) -> int: + """Get the length of the internal capture list, + representing the number of videos the VideoManager was constructed with. + + Returns: + int: Number of videos, equal to length of capture list. + """ + return len(self._cap_list) + + def get_video_paths(self) -> List[str]: + """Get list of strings containing paths to the open video(s). + + Returns: + List[str]: List of paths to the video files opened by the VideoManager. + """ + return list(self._video_file_paths) + + def get_video_name(self) -> str: + """Get name of the video based on the first video path. + + Returns: + The base name of the video file, without extension. + """ + video_paths = self.get_video_paths() + if not video_paths: + return '' + video_name = os.path.basename(video_paths[0]) + if video_name.rfind('.') >= 0: + video_name = video_name[:video_name.rfind('.')] + return video_name + + def get_framerate(self) -> float: + """Get the framerate the VideoManager is assuming for all + open VideoCaptures. Obtained from either the capture itself, or the passed + framerate parameter when the VideoManager object was constructed. + + Returns: + Framerate, in frames/sec. + """ + return self._cap_framerate + + def get_base_timecode(self) -> FrameTimecode: + """Get a FrameTimecode object at frame 0 / time 00:00:00. + + The timecode returned by this method can be used to perform arithmetic (e.g. + addition), passing the resulting values back to the VideoManager (e.g. for the + :meth:`set_duration()` method), as the framerate of the returned FrameTimecode + object matches that of the VideoManager. + + As such, this method is equivalent to creating a FrameTimecode at frame 0 with + the VideoManager framerate, for example, given a VideoManager called obj, + the following expression will evaluate as True: + + obj.get_base_timecode() == FrameTimecode(0, obj.get_framerate()) + + Furthermore, the base timecode object returned by a particular VideoManager + should not be passed to another one, unless you first verify that their + framerates are the same. + + Returns: + FrameTimecode at frame 0/time 00:00:00 with the video(s) framerate. + """ + return FrameTimecode(timecode=0, fps=self._cap_framerate) + + def get_current_timecode(self) -> FrameTimecode: + """ Get Current Timecode - returns a FrameTimecode object at current VideoManager position. + + Returns: + Timecode at the current VideoManager position. + """ + return self._curr_time + + def get_framesize(self) -> Tuple[int, int]: + """Get frame size of the video(s) open in the VideoManager's capture objects. + + Returns: + Video frame size, in pixels, in the form (width, height). + """ + return self._cap_framesize + + def get_framesize_effective(self) -> Tuple[int, int]: + """ Get Frame Size - returns the frame size of the video(s) open in the + VideoManager's capture objects. + + Returns: + Video frame size, in pixels, in the form (width, height). + """ + return self._cap_framesize + + def set_duration(self, + duration: Optional[FrameTimecode] = None, + start_time: Optional[FrameTimecode] = None, + end_time: Optional[FrameTimecode] = None) -> None: + """ Set Duration - sets the duration/length of the video(s) to decode, as well as + the start/end times. Must be called before :meth:`start()` is called, otherwise + a VideoDecodingInProgress exception will be thrown. May be called after + :meth:`reset()` as well. + + Arguments: + duration (Optional[FrameTimecode]): The (maximum) duration in time to + decode from the opened video(s). Mutually exclusive with end_time + (i.e. if duration is set, end_time must be None). + start_time (Optional[FrameTimecode]): The time/first frame at which to + start decoding frames from. If set, the input video(s) will be + seeked to when start() is called, at which point the frame at + start_time can be obtained by calling retrieve(). + end_time (Optional[FrameTimecode]): The time at which to stop decoding + frames from the opened video(s). Mutually exclusive with duration + (i.e. if end_time is set, duration must be None). + + Raises: + VideoDecodingInProgress: Must call before start(). + """ + if self._started: + raise VideoDecodingInProgress() + + # Ensure any passed timecodes have the proper framerate. + if ((duration is not None and not duration.equal_framerate(self._cap_framerate)) + or (start_time is not None and not start_time.equal_framerate(self._cap_framerate)) + or (end_time is not None and not end_time.equal_framerate(self._cap_framerate))): + raise ValueError("FrameTimecode framerate does not match.") + + if duration is not None and end_time is not None: + raise TypeError("Only one of duration and end_time may be specified, not both.") + + if start_time is not None: + self._start_time = start_time + + if end_time is not None: + if end_time < self._start_time: + raise ValueError("end_time is before start_time in time.") + self._end_time = end_time + elif duration is not None: + self._end_time = self._start_time + duration + + if self._end_time is not None: + self._frame_length = min(self._frame_length, self._end_time + 1) + self._frame_length -= self._start_time + + if self._logger is not None: + self._logger.info('Duration set, start: %s, duration: %s, end: %s.', + start_time.get_timecode() if start_time is not None else start_time, + duration.get_timecode() if duration is not None else duration, + end_time.get_timecode() if end_time is not None else end_time) + + def get_duration(self) -> FrameTimecode: + """ Get Duration - gets the duration/length of the video(s) to decode, + as well as the start/end times. + + If the end time was not set by :meth:`set_duration()`, the end timecode + is calculated as the start timecode + total duration. + + Returns: + Tuple[FrameTimecode, FrameTimecode, FrameTimecode]: The current video(s) + total duration, start timecode, and end timecode. + """ + end_time = self._end_time + if end_time is None: + end_time = self.get_base_timecode() + self._frame_length + return (self._frame_length, self._start_time, end_time) + + def start(self) -> None: + """ Start - starts video decoding and seeks to start time. Raises + exception VideoDecodingInProgress if the method is called after the + decoder process has already been started. + + Raises: + VideoDecodingInProgress: Must call :meth:`stop()` before this + method if :meth:`start()` has already been called after + initial construction. + """ + if self._started: + raise VideoDecodingInProgress() + + self._started = True + self._get_next_cap() + if self._start_time != 0: + self.seek(self._start_time) + + # This overrides the seek method from the VideoStream interface, but the name was changed + # from `timecode` to `target`. For compatibility, we allow calling seek with the form + # seek(0), seek(timecode=0), and seek(target=0). Specifying both arguments is an error. + # pylint: disable=arguments-differ + def seek(self, timecode: FrameTimecode = None, target: FrameTimecode = None) -> bool: + """Seek forwards to the passed timecode. + + Only supports seeking forwards (i.e. timecode must be greater than the + current position). Can only be used after the :meth:`start()` + method has been called. + + Arguments: + timecode: Time in video to seek forwards to. Only one of timecode or target can be set. + target: Same as timecode. Only one of timecode or target can be set. + + Returns: + bool: True if seeking succeeded, False if no more frames / end of video. + + Raises: + ValueError: Either none or both `timecode` and `target` were set. + """ + if timecode is None and target is None: + raise ValueError('`target` must be set.') + if timecode is not None and target is not None: + raise ValueError('Only one of `timecode` or `target` can be set.') + if target is not None: + timecode = target + assert timecode is not None + if timecode < 0: + raise ValueError("Target seek position cannot be negative!") + + if not self._started: + self.start() + + timecode = self.base_timecode + timecode + if self._end_time is not None and timecode > self._end_time: + timecode = self._end_time + + # TODO: Seeking only works for the first (or current) video in the VideoManager. + # Warn the user there are multiple videos in the VideoManager, and the requested + # seek time exceeds the length of the first video. + if len(self._cap_list) > 1 and timecode > self._first_cap_len: + # TODO: This should throw an exception instead of potentially failing silently + # if no logger was provided. + if self._logger is not None: + self._logger.error('Seeking past the first input video is not currently supported.') + self._logger.warning('Seeking to end of first input.') + timecode = self._first_cap_len + if self._curr_cap is not None and self._end_of_video is not True: + self._curr_cap.set(cv2.CAP_PROP_POS_FRAMES, timecode.get_frames() - 1) + self._curr_time = timecode - 1 + + while self._curr_time < timecode: + if not self.grab(): + return False + return True + + # pylint: enable=arguments-differ + + def release(self) -> None: + """ Release (cv2.VideoCapture method), releases all open capture(s). """ + for cap in self._cap_list: + cap.release() + self._cap_list = [] + self._started = False + + def reset(self) -> None: + """ Reset - Reopens captures passed to the constructor of the VideoManager. + + Can only be called after the :meth:`release()` method has been called. + + Raises: + VideoDecodingInProgress: Must call :meth:`release()` before this method. + """ + if self._started: + self.release() + + self._started = False + self._end_of_video = False + self._curr_time = self.get_base_timecode() + self._cap_list, self._cap_framerate, self._cap_framesize = open_captures( + video_files=self._video_file_paths, framerate=self._curr_time.get_framerate()) + self._curr_cap, self._curr_cap_idx = None, None + + def get(self, capture_prop: int, index: Optional[int] = None) -> Union[float, int]: + """ Get (cv2.VideoCapture method) - obtains capture properties from the current + VideoCapture object in use. Index represents the same index as the original + video_files list passed to the constructor. Getting/setting the position (POS) + properties has no effect; seeking is implemented using VideoDecoder methods. + + Note that getting the property CAP_PROP_FRAME_COUNT will return the integer sum of + the frame count for all VideoCapture objects if index is not specified (or is None), + otherwise the frame count for the given VideoCapture index is returned instead. + + Arguments: + capture_prop: OpenCV VideoCapture property to get (i.e. CAP_PROP_FPS). + index (int, optional): Index in file_list of capture to get property from (default + is zero). Index is not checked and will raise exception if out of bounds. + + Returns: + float: Return value from calling get(property) on the VideoCapture object. + """ + if capture_prop == cv2.CAP_PROP_FRAME_COUNT and index is None: + return self._frame_length.get_frames() + elif capture_prop == cv2.CAP_PROP_POS_FRAMES: + return self._curr_time + elif capture_prop == cv2.CAP_PROP_FPS: + return self._cap_framerate + elif index is None: + index = 0 + return self._cap_list[index].get(capture_prop) + + def grab(self) -> bool: + """ Grab (cv2.VideoCapture method) - retrieves a frame but does not return it. + + Returns: + bool: True if a frame was grabbed, False otherwise. + """ + if not self._started: + self.start() + + grabbed = False + if self._curr_cap is not None and not self._end_of_video: + while not grabbed: + grabbed = self._curr_cap.grab() + if not grabbed and not self._get_next_cap(): + break + if self._end_time is not None and self._curr_time > self._end_time: + grabbed = False + self._last_frame = None + if grabbed: + self._curr_time += 1 + else: + self._correct_frame_length() + return grabbed + + def retrieve(self) -> Tuple[bool, Optional[ndarray]]: + """ Retrieve (cv2.VideoCapture method) - retrieves and returns a frame. + + Frame returned corresponds to last call to :meth:`grab()`. + + Returns: + Tuple of (True, frame_image) if a frame was grabbed during the last call to grab(), + and where frame_image is a numpy ndarray of the decoded frame. Otherwise (False, None). + """ + if not self._started: + self.start() + + retrieved = False + if self._curr_cap is not None and not self._end_of_video: + while not retrieved: + retrieved, self._last_frame = self._curr_cap.retrieve() + if not retrieved and not self._get_next_cap(): + break + if self._end_time is not None and self._curr_time > self._end_time: + retrieved = False + self._last_frame = None + return (retrieved, self._last_frame) + + def read(self, decode: bool = True, advance: bool = True) -> Union[ndarray, bool]: + """ Return next frame (or current if advance = False), or False if end of video. + + Arguments: + decode: Decode and return the frame. + advance: Seek to the next frame. If False, will remain on the current frame. + + Returns: + If decode = True, returns either the decoded frame, or False if end of video. + If decode = False, a boolean indicating if the next frame was advanced to or not is + returned. + """ + if not self._started: + self.start() + has_grabbed = False + if advance: + has_grabbed = self.grab() + if decode: + retrieved, frame = self.retrieve() + return frame if retrieved else False + return has_grabbed + + def _get_next_cap(self) -> bool: + self._curr_cap = None + if self._curr_cap_idx is None: + self._curr_cap_idx = 0 + self._curr_cap = self._cap_list[0] + return True + else: + if not (self._curr_cap_idx + 1) < len(self._cap_list): + self._end_of_video = True + return False + self._curr_cap_idx += 1 + self._curr_cap = self._cap_list[self._curr_cap_idx] + return True + + def _correct_frame_length(self) -> None: + """ Checks if the current frame position exceeds that originally calculated, + and adjusts the internally calculated frame length accordingly. Called after + exhausting all input frames from the video source(s). + """ + self._end_time = self._curr_time + self._frame_length = self._curr_time - self._start_time + + # VideoStream Interface (Some Covered Above) + + @property + def aspect_ratio(self) -> float: + """Display/pixel aspect ratio as a float (1.0 represents square pixels).""" + return self._aspect_ratio + + @property + def duration(self) -> Optional[FrameTimecode]: + """Duration of the stream as a FrameTimecode, or None if non terminating.""" + return self.get_duration()[0] + + @property + def position(self) -> FrameTimecode: + """Current position within stream as FrameTimecode. + + This can be interpreted as presentation time stamp of the last frame which was + decoded by calling `read` with advance=True. + + This method will always return 0 (e.g. be equal to `base_timecode`) if no frames + have been `read`.""" + frames = self._curr_time.get_frames() + if frames < 1: + return self.base_timecode + return self.base_timecode + (frames - 1) + + @property + def position_ms(self) -> float: + """Current position within stream as a float of the presentation time in milliseconds. + The first frame has a time of 0.0 ms. + + This method will always return 0.0 if no frames have been `read`.""" + return self.position.get_seconds() * 1000.0 + + @property + def frame_number(self) -> int: + """Current position within stream in frames as an int. + + 1 indicates the first frame was just decoded by the last call to `read` with advance=True, + whereas 0 indicates that no frames have been `read`. + + This method will always return 0 if no frames have been `read`.""" + return self._curr_time.get_frames() + + @property + def frame_rate(self) -> float: + """Framerate in frames/sec.""" + return self._cap_framerate + + @property + def frame_size(self) -> Tuple[int, int]: + """Size of each video frame in pixels as a tuple of (width, height).""" + return (math.trunc(self._cap_list[0].get(cv2.CAP_PROP_FRAME_WIDTH)), + math.trunc(self._cap_list[0].get(cv2.CAP_PROP_FRAME_HEIGHT))) + + @property + def is_seekable(self) -> bool: + """Just returns True.""" + return True + + @property + def path(self) -> Union[bytes, str]: + """Video or device path.""" + if self._is_device: + return "Device %d" % self._path + return self._path + + @property + def name(self) -> Union[bytes, str]: + """Name of the video, without extension, or device.""" + if self._is_device: + return self.path + return get_file_name(self.path, include_extension=False) diff --git a/backend/scenedetect/video_splitter.py b/backend/scenedetect/video_splitter.py new file mode 100644 index 0000000..34550ad --- /dev/null +++ b/backend/scenedetect/video_splitter.py @@ -0,0 +1,296 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +# This software may also invoke mkvmerge or FFmpeg, if available. +# FFmpeg is a trademark of Fabrice Bellard. +# mkvmerge is Copyright (C) 2005-2016, Matroska. +# Certain distributions of PySceneDetect may include the above software; +# see the included LICENSE-FFMPEG and LICENSE-MKVMERGE files. +# +"""``scenedetect.video_splitter`` Module + +The `scenedetect.video_splitter` module contains functions to split existing videos into clips +using ffmpeg or mkvmerge. + +These programs can be obtained from following URLs (note that mkvmerge is a part mkvtoolnix): + + * FFmpeg: [ https://ffmpeg.org/download.html ] + * mkvmerge: [ https://mkvtoolnix.download/downloads.html ] + +If you are a Linux user, you can likely obtain the above programs from your package manager. + +Once installed, ensure the program can be accessed system-wide by calling the `mkvmerge` or `ffmpeg` +command from a terminal/command prompt. PySceneDetect will automatically use whichever program is +available on the computer, depending on the specified command-line options. +""" + +import logging +import subprocess +import math +import time +from typing import Iterable, Optional, Tuple + +from scenedetect.platform import (tqdm, invoke_command, CommandTooLong, get_file_name, + get_ffmpeg_path, Template) +from scenedetect.frame_timecode import FrameTimecode + +logger = logging.getLogger('pyscenedetect') + +TimecodePair = Tuple[FrameTimecode, FrameTimecode] +"""Named type for pairs of timecodes, which typically represents the start/end of a scene.""" + +COMMAND_TOO_LONG_STRING = """ +Cannot split video due to too many scenes (resulting command +is too large to process). To work around this issue, you can +split the video manually by exporting a list of cuts with the +`list-scenes` command. +See https://github.com/Breakthrough/PySceneDetect/issues/164 +for details. Sorry about that! +""" + +FFMPEG_PATH: Optional[str] = get_ffmpeg_path() +"""Relative path to the Ffmpeg binary on this system, if any (will be None if not available).""" + +DEFAULT_FFMPEG_ARGS = '-map 0 -c:v libx264 -preset veryfast -crf 22 -c:a aac' +"""Default arguments passed to ffmpeg when invoking the `split_video_ffmpeg` function.""" + +## +## Command Availability Checking Functions +## + + +def is_mkvmerge_available() -> bool: + """ Is mkvmerge Available: Gracefully checks if mkvmerge command is available. + + Returns: + True if `mkvmerge` can be invoked, False otherwise. + """ + ret_val = None + try: + ret_val = subprocess.call(['mkvmerge', '--quiet']) + except OSError: + return False + if ret_val is not None and ret_val != 2: + return False + return True + + +def is_ffmpeg_available() -> bool: + """ Is ffmpeg Available: Gracefully checks if ffmpeg command is available. + + Returns: + True if `ffmpeg` can be invoked, False otherwise. + """ + return FFMPEG_PATH is not None + + +## +## Split Video Functions +## + + +def split_video_mkvmerge( + input_video_path: str, + scene_list: Iterable[TimecodePair], + output_file_template: str = '$VIDEO_NAME.mkv', + video_name: Optional[str] = None, + show_output: bool = False, + suppress_output=None, +): + """ Calls the mkvmerge command on the input video, splitting it at the + passed timecodes, where each scene is written in sequence from 001. + + Arguments: + input_video_path: Path to the video to be split. + scene_list : List of scenes as pairs of FrameTimecodes denoting the start/end times. + output_file_template: Template to use for output files. Mkvmerge always adds the suffix + "-$SCENE_NUMBER". Can use $VIDEO_NAME as a template parameter (e.g. "$VIDEO_NAME.mkv"). + video_name (str): Name of the video to be substituted in output_file_template for + $VIDEO_NAME. If not specified, will be obtained from the filename. + show_output: If False, adds the --quiet flag when invoking `mkvmerge`.. + suppress_output: [DEPRECATED] DO NOT USE. For backwards compatibility only. + Returns: + Return code of invoking mkvmerge (0 on success). If scene_list is empty, will + still return 0, but no commands will be invoked. + """ + # Handle backwards compatibility with v0.5 API. + if isinstance(input_video_path, list): + logger.error('Using a list of paths is deprecated. Pass a single path instead.') + if len(input_video_path) > 1: + raise ValueError('Concatenating multiple input videos is not supported.') + input_video_path = input_video_path[0] + if suppress_output is not None: + logger.error('suppress_output is deprecated, use show_output instead.') + show_output = not suppress_output + + if not scene_list: + return 0 + + logger.info('Splitting input video using mkvmerge, output path template:\n %s', + output_file_template) + + if video_name is None: + video_name = get_file_name(input_video_path, include_extension=False) + + ret_val = 0 + # mkvmerge automatically appends '-$SCENE_NUMBER', so we remove it if present. + output_file_template = output_file_template.replace('-$SCENE_NUMBER', + '').replace('$SCENE_NUMBER', '') + output_file_name = Template(output_file_template).safe_substitute(VIDEO_NAME=video_name) + + try: + call_list = ['mkvmerge'] + if not show_output: + call_list.append('--quiet') + call_list += [ + '-o', output_file_name, '--split', + 'parts:%s' % ','.join([ + '%s-%s' % (start_time.get_timecode(), end_time.get_timecode()) + for start_time, end_time in scene_list + ]), input_video_path + ] + total_frames = scene_list[-1][1].get_frames() - scene_list[0][0].get_frames() + processing_start_time = time.time() + # TODO(v0.6.2): Capture stdout/stderr and show that if the command fails. + ret_val = invoke_command(call_list) + if show_output: + logger.info('Average processing speed %.2f frames/sec.', + float(total_frames) / (time.time() - processing_start_time)) + except CommandTooLong: + logger.error(COMMAND_TOO_LONG_STRING) + except OSError: + logger.error('mkvmerge could not be found on the system.' + ' Please install mkvmerge to enable video output support.') + if ret_val != 0: + logger.error('Error splitting video (mkvmerge returned %d).', ret_val) + return ret_val + + +def split_video_ffmpeg( + input_video_path: str, + scene_list: Iterable[TimecodePair], + output_file_template: str = '$VIDEO_NAME-Scene-$SCENE_NUMBER.mp4', + video_name: Optional[str] = None, + arg_override: str = DEFAULT_FFMPEG_ARGS, + show_progress: bool = False, + show_output: bool = False, + suppress_output=None, + hide_progress=None, +): + """ Calls the ffmpeg command on the input video, generating a new video for + each scene based on the start/end timecodes. + + Arguments: + input_video_path: Path to the video to be split. + scene_list (List[Tuple[FrameTimecode, FrameTimecode]]): List of scenes + (pairs of FrameTimecodes) denoting the start/end frames of each scene. + output_file_template (str): Template to use for generating the output filenames. + Can use $VIDEO_NAME and $SCENE_NUMBER in this format, for example: + `$VIDEO_NAME - Scene $SCENE_NUMBER.mp4` + video_name (str): Name of the video to be substituted in output_file_template. If not + passed will be calculated from input_video_path automatically. + arg_override (str): Allows overriding the arguments passed to ffmpeg for encoding. + show_progress (bool): If True, will show progress bar provided by tqdm (if installed). + show_output (bool): If True, will show output from ffmpeg for first split. + suppress_output: [DEPRECATED] DO NOT USE. For backwards compatibility only. + hide_progress: [DEPRECATED] DO NOT USE. For backwards compatibility only. + + Returns: + Return code of invoking ffmpeg (0 on success). If scene_list is empty, will + still return 0, but no commands will be invoked. + """ + # Handle backwards compatibility with v0.5 API. + if isinstance(input_video_path, list): + logger.error('Using a list of paths is deprecated. Pass a single path instead.') + if len(input_video_path) > 1: + raise ValueError('Concatenating multiple input videos is not supported.') + input_video_path = input_video_path[0] + if suppress_output is not None: + logger.error('suppress_output is deprecated, use show_output instead.') + show_output = not suppress_output + if hide_progress is not None: + logger.error('hide_progress is deprecated, use show_progress instead.') + show_progress = not hide_progress + + if not scene_list: + return 0 + + logger.info('Splitting input video using ffmpeg, output path template:\n %s', + output_file_template) + + if video_name is None: + video_name = get_file_name(input_video_path, include_extension=False) + + arg_override = arg_override.replace('\\"', '"') + + ret_val = 0 + arg_override = arg_override.split(' ') + scene_num_format = '%0' + scene_num_format += str(max(3, math.floor(math.log(len(scene_list), 10)) + 1)) + 'd' + + try: + progress_bar = None + total_frames = scene_list[-1][1].get_frames() - scene_list[0][0].get_frames() + if show_progress: + progress_bar = tqdm(total=total_frames, unit='frame', miniters=1, dynamic_ncols=True) + processing_start_time = time.time() + for i, (start_time, end_time) in enumerate(scene_list): + duration = (end_time - start_time) + # Format output filename with template variable + output_file_template_iter = Template(output_file_template).safe_substitute( + VIDEO_NAME=video_name, + SCENE_NUMBER=scene_num_format % (i + 1), + START_TIME=str(start_time.get_timecode().replace(":", ";")), + END_TIME=str(end_time.get_timecode().replace(":", ";")), + START_FRAME=str(start_time.get_frames()), + END_FRAME=str(end_time.get_frames())) + + # Gracefully handle case where FFMPEG_PATH might be unset. + call_list = [FFMPEG_PATH if FFMPEG_PATH is not None else 'ffmpeg'] + if not show_output: + call_list += ['-v', 'quiet'] + elif i > 0: + # Only show ffmpeg output for the first call, which will display any + # errors if it fails, and then break the loop. We only show error messages + # for the remaining calls. + call_list += ['-v', 'error'] + call_list += [ + '-nostdin', '-y', '-ss', + str(start_time.get_seconds()), '-i', input_video_path, '-t', + str(duration.get_seconds()) + ] + call_list += arg_override + call_list += ['-sn'] + call_list += [output_file_template_iter] + ret_val = invoke_command(call_list) + if show_output and i == 0 and len(scene_list) > 1: + logger.info( + 'Output from ffmpeg for Scene 1 shown above, splitting remaining scenes...') + if ret_val != 0: + # TODO(v0.6.2): Capture stdout/stderr and display it on any failed calls. + logger.error('Error splitting video (ffmpeg returned %d).', ret_val) + break + if progress_bar: + progress_bar.update(duration.get_frames()) + + if progress_bar: + progress_bar.close() + if show_output: + logger.info('Average processing speed %.2f frames/sec.', + float(total_frames) / (time.time() - processing_start_time)) + + except CommandTooLong: + logger.error(COMMAND_TOO_LONG_STRING) + except OSError: + logger.error('ffmpeg could not be found on the system.' + ' Please install ffmpeg to enable video output support.') + return ret_val diff --git a/backend/scenedetect/video_stream.py b/backend/scenedetect/video_stream.py new file mode 100644 index 0000000..ba97068 --- /dev/null +++ b/backend/scenedetect/video_stream.py @@ -0,0 +1,222 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# ------------------------------------------------------------------- +# [ Site: https://scenedetect.com ] +# [ Docs: https://scenedetect.com/docs/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2023 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +"""``scenedetect.video_stream`` Module + +This module contains the :class:`VideoStream` class, which provides a library agnostic +interface for video input. To open a video by path, use :func:`scenedetect.open_video`: + +.. code:: python + + from scenedetect import open_video + video = open_video('video.mp4') + while True: + frame = video.read() + if frame is False: + break + print("Read %d frames" % video.frame_number) + +You can also optionally specify a framerate and a specific backend library to use. Unless specified, +OpenCV will be used as the video backend. See :mod:`scenedetect.backends` for a detailed example. + +New :class:`VideoStream ` implementations can be +tested by adding it to the test suite in `tests/test_video_stream.py`. +""" + +from abc import ABC, abstractmethod +from logging import getLogger +from typing import Tuple, Optional, Union + +from numpy import ndarray + +from backend.scenedetect.frame_timecode import FrameTimecode + +## +## VideoStream Exceptions +## + + +class SeekError(Exception): + """Either an unrecoverable error happened while attempting to seek, or the underlying + stream is not seekable (additional information will be provided when possible). + + The stream is guaranteed to be left in a valid state, but the position may be reset.""" + + +class VideoOpenFailure(Exception): + """Raised by a backend if opening a video fails.""" + + # pylint: disable=useless-super-delegation + def __init__(self, message: str = "Unknown backend error."): + """ + Arguments: + message: Additional context the backend can provide for the open failure. + """ + super().__init__(message) + + # pylint: enable=useless-super-delegation + + +class FrameRateUnavailable(VideoOpenFailure): + """Exception instance to provide consistent error messaging across backends when the video frame + rate is unavailable or cannot be calculated. Subclass of VideoOpenFailure.""" + + def __init__(self): + super().__init__('Unable to obtain video framerate! Specify `framerate` manually, or' + ' re-encode/re-mux the video and try again.') + + +## +## VideoStream Interface (Base Class) +## + + +class VideoStream(ABC): + """ Interface which all video backends must implement. """ + + # + # Default Implementations + # + + @property + def base_timecode(self) -> FrameTimecode: + """FrameTimecode object to use as a time base.""" + return FrameTimecode(timecode=0, fps=self.frame_rate) + + # + # Abstract Static Methods + # + + @staticmethod + @abstractmethod + def BACKEND_NAME() -> str: + """Unique name used to identify this backend. Should be a static property in derived + classes (`BACKEND_NAME = 'backend_identifier'`).""" + raise NotImplementedError + + # + # Abstract Properties + # + + @property + @abstractmethod + def path(self) -> Union[bytes, str]: + """Video or device path.""" + raise NotImplementedError + + @property + @abstractmethod + def name(self) -> Union[bytes, str]: + """Name of the video, without extension, or device.""" + raise NotImplementedError + + @property + @abstractmethod + def is_seekable(self) -> bool: + """True if seek() is allowed, False otherwise.""" + raise NotImplementedError + + @property + @abstractmethod + def frame_rate(self) -> float: + """Frame rate in frames/sec.""" + raise NotImplementedError + + @property + @abstractmethod + def duration(self) -> Optional[FrameTimecode]: + """Duration of the stream as a FrameTimecode, or None if non terminating.""" + raise NotImplementedError + + @property + @abstractmethod + def frame_size(self) -> Tuple[int, int]: + """Size of each video frame in pixels as a tuple of (width, height).""" + raise NotImplementedError + + @property + @abstractmethod + def aspect_ratio(self) -> float: + """Pixel aspect ratio as a float (1.0 represents square pixels).""" + raise NotImplementedError + + @property + @abstractmethod + def position(self) -> FrameTimecode: + """Current position within stream as FrameTimecode. + + This can be interpreted as presentation time stamp, thus frame 1 corresponds + to the presentation time 0. Returns 0 even if `frame_number` is 1.""" + raise NotImplementedError + + @property + @abstractmethod + def position_ms(self) -> float: + """Current position within stream as a float of the presentation time in + milliseconds. The first frame has a PTS of 0.""" + raise NotImplementedError + + @property + @abstractmethod + def frame_number(self) -> int: + """Current position within stream as the frame number. + + Will return 0 until the first frame is `read`.""" + raise NotImplementedError + + # + # Abstract Methods + # + + @abstractmethod + def read(self, decode: bool = True, advance: bool = True) -> Union[ndarray, bool]: + """Read and decode the next frame as a numpy.ndarray. Returns False when video ends. + + Arguments: + decode: Decode and return the frame. + advance: Seek to the next frame. If False, will return the current (last) frame. + + Returns: + If decode = True, the decoded frame (numpy.ndarray), or False (bool) if end of video. + If decode = False, a bool indicating if advancing to the the next frame succeeded. + """ + raise NotImplementedError + + @abstractmethod + def reset(self) -> None: + """ Close and re-open the VideoStream (equivalent to seeking back to beginning). """ + raise NotImplementedError + + @abstractmethod + def seek(self, target: Union[FrameTimecode, float, int]) -> None: + """Seek to the given timecode. If given as a frame number, represents the current seek + pointer (e.g. if seeking to 0, the next frame decoded will be the first frame of the video). + + For 1-based indices (first frame is frame #1), the target frame number needs to be converted + to 0-based by subtracting one. For example, if we want to seek to the first frame, we call + seek(0) followed by read(). If we want to seek to the 5th frame, we call seek(4) followed + by read(), at which point frame_number will be 5. + + May not be supported on all backend types or inputs (e.g. cameras). + + Arguments: + target: Target position in video stream to seek to. + If float, interpreted as time in seconds. + If int, interpreted as frame number. + Raises: + SeekError: An error occurs while seeking, or seeking is not supported. + ValueError: `target` is not a valid value (i.e. it is negative). + """ + raise NotImplementedError + + +# TODO(0.6.3): Add a StreamJoiner class to concatenate multiple videos using a specified backend.