mirror of
https://github.com/YaoFANGUK/video-subtitle-remover.git
synced 2026-05-20 04:37:28 +08:00
新增视频场景检测
This commit is contained in:
355
backend/scenedetect/backends/pyav.py
Normal file
355
backend/scenedetect/backends/pyav.py
Normal file
@@ -0,0 +1,355 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# PySceneDetect: Python-Based Video Scene Detector
|
||||
# -------------------------------------------------------------------
|
||||
# [ Site: https://scenedetect.com ]
|
||||
# [ Docs: https://scenedetect.com/docs/ ]
|
||||
# [ Github: https://github.com/Breakthrough/PySceneDetect/ ]
|
||||
#
|
||||
# Copyright (C) 2014-2023 Brandon Castellano <http://www.bcastell.com>.
|
||||
# PySceneDetect is licensed under the BSD 3-Clause License; see the
|
||||
# included LICENSE file, or visit one of the above pages for details.
|
||||
#
|
||||
""":class:`VideoStreamAv` provides an adapter for the PyAV av.InputContainer object.
|
||||
|
||||
Uses string identifier ``'pyav'``.
|
||||
"""
|
||||
|
||||
from logging import getLogger
|
||||
from typing import AnyStr, BinaryIO, Optional, Tuple, Union
|
||||
|
||||
# pylint: disable=c-extension-no-member
|
||||
import av
|
||||
from numpy import ndarray
|
||||
|
||||
from scenedetect.frame_timecode import FrameTimecode, MAX_FPS_DELTA
|
||||
from scenedetect.platform import get_file_name
|
||||
from scenedetect.video_stream import VideoStream, VideoOpenFailure, FrameRateUnavailable
|
||||
|
||||
logger = getLogger('pyscenedetect')
|
||||
|
||||
VALID_THREAD_MODES = [
|
||||
av.codec.context.ThreadType.NONE,
|
||||
av.codec.context.ThreadType.SLICE,
|
||||
av.codec.context.ThreadType.FRAME,
|
||||
av.codec.context.ThreadType.AUTO,
|
||||
]
|
||||
|
||||
|
||||
class VideoStreamAv(VideoStream):
|
||||
"""PyAV `av.InputContainer` backend."""
|
||||
|
||||
# TODO: Investigate adding an accurate_duration option to backends to calculate the duration
|
||||
# with higher precision. Sometimes it doesn't exactly match what the codec or VLC reports,
|
||||
# but we can try to seek to the end of the video first to determine it. Investigate how VLC
|
||||
# calculates the end time.
|
||||
def __init__(
|
||||
self,
|
||||
path_or_io: Union[AnyStr, BinaryIO],
|
||||
framerate: Optional[float] = None,
|
||||
name: Optional[str] = None,
|
||||
threading_mode: Optional[str] = None,
|
||||
suppress_output: bool = False,
|
||||
):
|
||||
"""Open a video by path.
|
||||
|
||||
.. warning::
|
||||
|
||||
Using `threading_mode` with `suppress_output = True` can cause lockups in your
|
||||
application. See the PyAV documentation for details:
|
||||
https://pyav.org/docs/stable/overview/caveats.html#sub-interpeters
|
||||
|
||||
Arguments:
|
||||
path_or_io: Path to the video, or a file-like object.
|
||||
framerate: If set, overrides the detected framerate.
|
||||
name: Overrides the `name` property derived from the video path. Should be set if
|
||||
`path_or_io` is a file-like object.
|
||||
threading_mode: The PyAV video stream `thread_type`. See av.codec.context.ThreadType
|
||||
for valid threading modes ('AUTO', 'FRAME', 'NONE', and 'SLICE'). If this mode is
|
||||
'AUTO' or 'FRAME' and not all frames have been decoded, the video will be reopened
|
||||
if seekable, and the remaining frames decoded in single-threaded mode.
|
||||
suppress_output: If False, ffmpeg output will be sent to stdout/stderr by calling
|
||||
`av.logging.restore_default_callback()` before any other library calls. If True
|
||||
the application may deadlock if threading_mode is set. See the PyAV documentation
|
||||
for details: https://pyav.org/docs/stable/overview/caveats.html#sub-interpeters
|
||||
|
||||
Raises:
|
||||
OSError: file could not be found or access was denied
|
||||
VideoOpenFailure: video could not be opened (may be corrupted)
|
||||
ValueError: specified framerate is invalid
|
||||
"""
|
||||
self._container = None
|
||||
|
||||
# TODO(#258): See what self._container.discard_corrupt = True does with corrupt videos.
|
||||
super().__init__()
|
||||
|
||||
# Ensure specified framerate is valid if set.
|
||||
if framerate is not None and framerate < MAX_FPS_DELTA:
|
||||
raise ValueError('Specified framerate (%f) is invalid!' % framerate)
|
||||
|
||||
self._name = '' if name is None else name
|
||||
self._path = ''
|
||||
self._frame = None
|
||||
self._reopened = True
|
||||
|
||||
if threading_mode:
|
||||
threading_mode = threading_mode.upper()
|
||||
if not threading_mode in VALID_THREAD_MODES:
|
||||
raise ValueError('Invalid threading mode! Must be one of: %s' % VALID_THREAD_MODES)
|
||||
|
||||
if not suppress_output:
|
||||
logger.debug('Restoring default ffmpeg log callbacks.')
|
||||
av.logging.restore_default_callback()
|
||||
|
||||
try:
|
||||
if isinstance(path_or_io, (str, bytes)):
|
||||
self._path = path_or_io
|
||||
self._io = open(path_or_io, 'rb')
|
||||
if not self._name:
|
||||
self._name = get_file_name(self.path, include_extension=False)
|
||||
else:
|
||||
self._io = path_or_io
|
||||
|
||||
self._container = av.open(self._io)
|
||||
if threading_mode is not None:
|
||||
self._video_stream.thread_type = threading_mode
|
||||
self._reopened = False
|
||||
logger.debug('Threading mode set: %s', threading_mode)
|
||||
except OSError:
|
||||
raise
|
||||
except Exception as ex:
|
||||
raise VideoOpenFailure(str(ex)) from ex
|
||||
|
||||
if framerate is None:
|
||||
# Calculate framerate from video container. `guessed_rate` below appears in PyAV 9.
|
||||
frame_rate = self._video_stream.guessed_rate if hasattr(
|
||||
self._video_stream, 'guessed_rate') else self._codec_context.framerate
|
||||
if frame_rate is None or frame_rate == 0:
|
||||
raise FrameRateUnavailable()
|
||||
# TODO: Refactor FrameTimecode to support raw timing rather than framerate based calculations.
|
||||
# See https://pyav.org/docs/develop/api/stream.html for details.
|
||||
frame_rate = frame_rate.numerator / float(frame_rate.denominator)
|
||||
if frame_rate < MAX_FPS_DELTA:
|
||||
raise FrameRateUnavailable()
|
||||
self._frame_rate: float = frame_rate
|
||||
else:
|
||||
assert framerate >= MAX_FPS_DELTA
|
||||
self._frame_rate: float = framerate
|
||||
|
||||
# Calculate duration after we have set the framerate.
|
||||
self._duration_frames = self._get_duration()
|
||||
|
||||
def __del__(self):
|
||||
if self._container is not None:
|
||||
self._container.close()
|
||||
|
||||
#
|
||||
# VideoStream Methods/Properties
|
||||
#
|
||||
|
||||
BACKEND_NAME = 'pyav'
|
||||
"""Unique name used to identify this backend."""
|
||||
|
||||
@property
|
||||
def path(self) -> Union[bytes, str]:
|
||||
"""Video path."""
|
||||
return self._path
|
||||
|
||||
@property
|
||||
def name(self) -> Union[bytes, str]:
|
||||
"""Name of the video, without extension."""
|
||||
return self._name
|
||||
|
||||
@property
|
||||
def is_seekable(self) -> bool:
|
||||
"""True if seek() is allowed, False otherwise."""
|
||||
return self._io.seekable()
|
||||
|
||||
@property
|
||||
def frame_size(self) -> Tuple[int, int]:
|
||||
"""Size of each video frame in pixels as a tuple of (width, height)."""
|
||||
return (self._codec_context.width, self._codec_context.height)
|
||||
|
||||
@property
|
||||
def duration(self) -> FrameTimecode:
|
||||
"""Duration of the video as a FrameTimecode."""
|
||||
return self.base_timecode + self._duration_frames
|
||||
|
||||
@property
|
||||
def frame_rate(self) -> float:
|
||||
"""Frame rate in frames/sec."""
|
||||
return self._frame_rate
|
||||
|
||||
@property
|
||||
def position(self) -> FrameTimecode:
|
||||
"""Current position within stream as FrameTimecode.
|
||||
|
||||
This can be interpreted as presentation time stamp, thus frame 1 corresponds
|
||||
to the presentation time 0. Returns 0 even if `frame_number` is 1."""
|
||||
if self._frame is None:
|
||||
return self.base_timecode
|
||||
return FrameTimecode(round(self._frame.time * self.frame_rate), self.frame_rate)
|
||||
|
||||
@property
|
||||
def position_ms(self) -> float:
|
||||
"""Current position within stream as a float of the presentation time in
|
||||
milliseconds. The first frame has a PTS of 0."""
|
||||
if self._frame is None:
|
||||
return 0.0
|
||||
return self._frame.time * 1000.0
|
||||
|
||||
@property
|
||||
def frame_number(self) -> int:
|
||||
"""Current position within stream as the frame number.
|
||||
|
||||
Will return 0 until the first frame is `read`."""
|
||||
if self._frame:
|
||||
return self.position.frame_num + 1
|
||||
return 0
|
||||
|
||||
@property
|
||||
def aspect_ratio(self) -> float:
|
||||
"""Pixel aspect ratio as a float (1.0 represents square pixels)."""
|
||||
display_aspect_ratio = (
|
||||
self._codec_context.display_aspect_ratio.numerator /
|
||||
self._codec_context.display_aspect_ratio.denominator)
|
||||
frame_aspect_ratio = self.frame_size[0] / self.frame_size[1]
|
||||
return display_aspect_ratio / frame_aspect_ratio
|
||||
|
||||
def seek(self, target: Union[FrameTimecode, float, int]) -> None:
|
||||
"""Seek to the given timecode. If given as a frame number, represents the current seek
|
||||
pointer (e.g. if seeking to 0, the next frame decoded will be the first frame of the video).
|
||||
|
||||
For 1-based indices (first frame is frame #1), the target frame number needs to be converted
|
||||
to 0-based by subtracting one. For example, if we want to seek to the first frame, we call
|
||||
seek(0) followed by read(). If we want to seek to the 5th frame, we call seek(4) followed
|
||||
by read(), at which point frame_number will be 5.
|
||||
|
||||
May not be supported on all input codecs (see `is_seekable`).
|
||||
|
||||
Arguments:
|
||||
target: Target position in video stream to seek to.
|
||||
If float, interpreted as time in seconds.
|
||||
If int, interpreted as frame number.
|
||||
Raises:
|
||||
ValueError: `target` is not a valid value (i.e. it is negative).
|
||||
"""
|
||||
if target < 0:
|
||||
raise ValueError("Target cannot be negative!")
|
||||
beginning = (target == 0)
|
||||
target = (self.base_timecode + target)
|
||||
if target >= 1:
|
||||
target = target - 1
|
||||
target_pts = self._video_stream.start_time + int(
|
||||
(self.base_timecode + target).get_seconds() / self._video_stream.time_base)
|
||||
self._frame = None
|
||||
self._container.seek(target_pts, stream=self._video_stream)
|
||||
if not beginning:
|
||||
self.read(decode=False, advance=True)
|
||||
while self.position < target:
|
||||
if self.read(decode=False, advance=True) is False:
|
||||
break
|
||||
|
||||
def reset(self):
|
||||
""" Close and re-open the VideoStream (should be equivalent to calling `seek(0)`). """
|
||||
self._container.close()
|
||||
self._frame = None
|
||||
try:
|
||||
self._container = av.open(self._path if self._path else self._io)
|
||||
except Exception as ex:
|
||||
raise VideoOpenFailure() from ex
|
||||
|
||||
def read(self, decode: bool = True, advance: bool = True) -> Union[ndarray, bool]:
|
||||
"""Read and decode the next frame as a numpy.ndarray. Returns False when video ends.
|
||||
|
||||
Arguments:
|
||||
decode: Decode and return the frame.
|
||||
advance: Seek to the next frame. If False, will return the current (last) frame.
|
||||
|
||||
Returns:
|
||||
If decode = True, the decoded frame (numpy.ndarray), or False (bool) if end of video.
|
||||
If decode = False, a bool indicating if advancing to the the next frame succeeded.
|
||||
"""
|
||||
has_advanced = False
|
||||
if advance:
|
||||
try:
|
||||
last_frame = self._frame
|
||||
self._frame = next(self._container.decode(video=0))
|
||||
except av.error.EOFError:
|
||||
self._frame = last_frame
|
||||
if self._handle_eof():
|
||||
return self.read(decode, advance=True)
|
||||
return False
|
||||
except StopIteration:
|
||||
return False
|
||||
has_advanced = True
|
||||
if decode:
|
||||
return self._frame.to_ndarray(format='bgr24')
|
||||
return has_advanced
|
||||
|
||||
#
|
||||
# Private Methods/Properties
|
||||
#
|
||||
|
||||
@property
|
||||
def _video_stream(self):
|
||||
"""PyAV `av.video.stream.VideoStream` being used."""
|
||||
return self._container.streams.video[0]
|
||||
|
||||
@property
|
||||
def _codec_context(self):
|
||||
"""PyAV `av.codec.context.CodecContext` being used."""
|
||||
return self._video_stream.codec_context
|
||||
|
||||
def _get_duration(self) -> int:
|
||||
"""Get video duration as number of frames based on the video and set framerate."""
|
||||
# See https://pyav.org/docs/develop/api/time.html for details on how ffmpeg/PyAV
|
||||
# handle time calculations internally and which time base to use.
|
||||
assert self.frame_rate is not None, "Frame rate must be set before calling _get_duration!"
|
||||
# See if we can obtain the number of frames directly from the stream itself.
|
||||
if self._video_stream.frames > 0:
|
||||
return self._video_stream.frames
|
||||
# Calculate based on the reported container duration.
|
||||
duration_sec = None
|
||||
container = self._video_stream.container
|
||||
if container.duration is not None and container.duration > 0:
|
||||
# Containers use AV_TIME_BASE as the time base.
|
||||
duration_sec = float(self._video_stream.container.duration / av.time_base)
|
||||
# Lastly, if that calculation fails, try to calculate it based on the stream duration.
|
||||
if duration_sec is None or duration_sec < MAX_FPS_DELTA:
|
||||
if self._video_stream.duration is None:
|
||||
logger.warning('Video duration unavailable.')
|
||||
return 0
|
||||
# Streams use stream `time_base` as the time base.
|
||||
time_base = self._video_stream.time_base
|
||||
if time_base.denominator == 0:
|
||||
logger.warning(
|
||||
'Unable to calculate video duration: time_base (%s) has zero denominator!',
|
||||
str(time_base))
|
||||
return 0
|
||||
duration_sec = float(self._video_stream.duration / time_base)
|
||||
return round(duration_sec * self.frame_rate)
|
||||
|
||||
def _handle_eof(self):
|
||||
"""Fix for issue where if thread_type is 'AUTO' the whole video is not decoded.
|
||||
|
||||
Re-open video if the threading mode is AUTO and we didn't decode all of the frames."""
|
||||
# Don't re-open the video if we already did, or if we already decoded all the frames.
|
||||
if self._reopened or self.frame_number >= self.duration:
|
||||
return False
|
||||
self._reopened = True
|
||||
# Don't re-open the video if we can't seek or aren't in AUTO/FRAME thread_type mode.
|
||||
if not self.is_seekable or not self._video_stream.thread_type in ('AUTO', 'FRAME'):
|
||||
return False
|
||||
last_frame = self.frame_number
|
||||
orig_pos = self._io.tell()
|
||||
try:
|
||||
self._io.seek(0)
|
||||
container = av.open(self._io)
|
||||
except:
|
||||
self._io.seek(orig_pos)
|
||||
raise
|
||||
self._container.close()
|
||||
self._container = container
|
||||
self.seek(last_frame)
|
||||
return True
|
||||
Reference in New Issue
Block a user