From 7fba113f11e5dfcdc2a4237faa1571b571bfba0c Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Wed, 30 Jul 2025 15:20:10 -0400 Subject: [PATCH 001/128] ADD: Added audio stream for process_video --- pyproject.toml | 3 ++- supervision/utils/video.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index cae78492ac..787fa93ade 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,8 @@ dependencies = [ "pillow>=9.4", "requests>=2.26.0", "tqdm>=4.62.3", - "opencv-python>=4.5.5.64" + "opencv-python>=4.5.5.64", + "imageio-ffmpeg (>=0.6.0,<0.7.0)" ] [project.urls] diff --git a/supervision/utils/video.py b/supervision/utils/video.py index 3b281b4e22..029de694c9 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -9,6 +9,12 @@ import numpy as np from tqdm.auto import tqdm +import subprocess +import imageio_ffmpeg +import os + +ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe() +ffprobe_path = ffmpeg_path.replace("ffmpeg", "ffprobe") @dataclass class VideoInfo: @@ -254,6 +260,34 @@ def callback(scene: np.ndarray, index: int) -> np.ndarray: result_frame = callback(frame, index) sink.write_frame(frame=result_frame) + + def has_audio_stream(video_path): + result = subprocess.run( + [ffmpeg_path, "-i", video_path], + stderr=subprocess.PIPE, + stdout=subprocess.DEVNULL, + text=True + ) + + return "Audio:" in result.stderr + + if has_audio_stream(source_path): + video_input = target_path + audio_source = source_path + temp_output = "temp_output.mp4" + subprocess.run([ + ffmpeg_path, + "-i", video_input, + "-i", audio_source, + "-map", "0:v", + "-map", "1:a", + "-c:v", "copy", + "-c:a", "aac", + "-shortest", + temp_output + ], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + os.replace(temp_output, video_input) class FPSMonitor: """ From 8947f770b8b22867ce8066237a5064e508ff4cc7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 30 Jul 2025 19:30:13 +0000 Subject: [PATCH 002/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/utils/video.py | 47 +++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index 029de694c9..499c7aed8a 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -1,21 +1,21 @@ from __future__ import annotations +import os +import subprocess import time from collections import deque from collections.abc import Callable, Generator from dataclasses import dataclass import cv2 +import imageio_ffmpeg import numpy as np from tqdm.auto import tqdm -import subprocess -import imageio_ffmpeg -import os - ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe() ffprobe_path = ffmpeg_path.replace("ffmpeg", "ffprobe") + @dataclass class VideoInfo: """ @@ -260,35 +260,46 @@ def callback(scene: np.ndarray, index: int) -> np.ndarray: result_frame = callback(frame, index) sink.write_frame(frame=result_frame) - def has_audio_stream(video_path): result = subprocess.run( [ffmpeg_path, "-i", video_path], stderr=subprocess.PIPE, stdout=subprocess.DEVNULL, - text=True + text=True, ) return "Audio:" in result.stderr - + if has_audio_stream(source_path): video_input = target_path audio_source = source_path temp_output = "temp_output.mp4" - subprocess.run([ - ffmpeg_path, - "-i", video_input, - "-i", audio_source, - "-map", "0:v", - "-map", "1:a", - "-c:v", "copy", - "-c:a", "aac", - "-shortest", - temp_output - ], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + subprocess.run( + [ + ffmpeg_path, + "-i", + video_input, + "-i", + audio_source, + "-map", + "0:v", + "-map", + "1:a", + "-c:v", + "copy", + "-c:a", + "aac", + "-shortest", + temp_output, + ], + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) os.replace(temp_output, video_input) + class FPSMonitor: """ A class for monitoring frames per second (FPS) to benchmark latency. From 73b583684d710bdd9e629a0cc659ce8de99b0c82 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Wed, 30 Jul 2025 15:30:45 -0400 Subject: [PATCH 003/128] REMOVE: Removed ffprobe --- supervision/utils/video.py | 1 - 1 file changed, 1 deletion(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index 029de694c9..ee057b69f3 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -14,7 +14,6 @@ import os ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe() -ffprobe_path = ffmpeg_path.replace("ffmpeg", "ffprobe") @dataclass class VideoInfo: From 5e07794b4616264094d7b6c504255228d159a9d7 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Fri, 1 Aug 2025 22:51:24 -0400 Subject: [PATCH 004/128] UPDATE: Added a new Video class with OpenCV writer and backend --- supervision/__init__.py | 2 + supervision/utils/video.py | 211 +++++++++++++++++++++++++++++++++++++ 2 files changed, 213 insertions(+) diff --git a/supervision/__init__.py b/supervision/__init__.py index ab45651ac9..48dcfc49e5 100644 --- a/supervision/__init__.py +++ b/supervision/__init__.py @@ -134,6 +134,7 @@ VideoSink, get_video_frames_generator, process_video, + Video ) __all__ = [ @@ -194,6 +195,7 @@ "VertexLabelAnnotator", "VideoInfo", "VideoSink", + "Video", "approximate_polygon", "box_iou", "box_iou_batch", diff --git a/supervision/utils/video.py b/supervision/utils/video.py index 93b62f12ee..b30bacde0c 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -1,11 +1,13 @@ from __future__ import annotations +from typing import Protocol, Any, Tuple import os import subprocess import time from collections import deque from collections.abc import Callable, Generator from dataclasses import dataclass +from enum import Enum, auto import cv2 import imageio_ffmpeg @@ -14,6 +16,11 @@ ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe() +class SOURCE_TYPE(Enum): + VIDEO_FILE = "VIDEO_FILE" + WEBCAM = "WEBCAM" + RTSP = "RTSP" + @dataclass class VideoInfo: @@ -46,6 +53,7 @@ class VideoInfo: height: int fps: int total_frames: int | None = None + source_type: SOURCE_TYPE | None = None @classmethod def from_video_path(cls, video_path: str) -> VideoInfo: @@ -65,6 +73,209 @@ def resolution_wh(self) -> tuple[int, int]: return self.width, self.height +class OpenCVBackend(Protocol): + def __init__(self): + self.cap = None + self.video_info = None + self.writer = None + self.path = None + + def open(self, path: str) -> None: + self.cap = cv2.VideoCapture(path) + self.path = path + + if not self.cap.isOpened(): + raise RuntimeError(f"Cannot open video source: {path}") + self.video_info = self._set_video_info() + + if isinstance(path, int): + self.video_info.source_type = SOURCE_TYPE.WEBCAM + elif isinstance(path, str): + self.video_info.source_type = SOURCE_TYPE.RTSP if path.lower().startswith("rtsp://") else SOURCE_TYPE.VIDEO_FILE + else: + raise ValueError("Unsupported source type") + + def isOpened(self): + return self.cap.isOpened() + + def _set_video_info(self) -> VideoInfo: + if not self.isOpened(): + raise RuntimeError("Video not opened yet.") + width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = self.cap.get(cv2.CAP_PROP_FPS) + total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) + return VideoInfo(width, height, int(fps), total_frames) + + def info(self) -> VideoInfo: + if not self.isOpened(): + raise RuntimeError("Video not opened yet.") + return self.video_info + + def read(self) -> Tuple[bool, np.ndarray]: + if self.cap is None: + raise RuntimeError("Video not opened yet.") + ret, frame = self.cap.read() + return ret, frame + + def grab(self) -> bool: + if self.cap is None: + raise RuntimeError("Video not opened yet.") + return self.cap.grab() + + def seek(self, frame_idx: int) -> None: + if self.cap is None: + raise RuntimeError("Video not opened yet.") + self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx) + + def release(self) -> None: + if self.cap is not None and self.cap.isOpened(): + self.cap.release() + self.cap = None + + def frames(self, *, start=0, end=None, stride=1, resolution_wh=None): + if self.cap is None: + raise RuntimeError("Video not opened yet.") + + total_frames = self.video_info.total_frames if self.video_info else 0 + is_live_stream = (total_frames <= 0) + + if is_live_stream: + while True: + for _ in range(stride - 1): + if not self.grab(): + return + ret, frame = self.read() + if not ret: + return + if resolution_wh is not None: + frame = cv2.resize(frame, resolution_wh) + yield frame + else: + if end is None or end > total_frames: + end = total_frames + + frame_idx = start + while frame_idx < end: + self.seek(frame_idx) + ret, frame = self.read() + if not ret: + break + if resolution_wh is not None: + frame = cv2.resize(frame, resolution_wh) + yield frame + frame_idx += stride + + def save(self, target_path: str, callback: Callable[[np.ndarray, int], np.ndarray], fps: int = None, progress_message: str = "Processing video", show_progress: bool = False): + if self.cap is None: + raise RuntimeError("Video not opened yet.") + + if self.video_info.source_type != SOURCE_TYPE.VIDEO_FILE: + raise ValueError("Only video files can be saved.") + + if self.writer is not None: + self.writer.close() + self.writer = None + + source_codec = self.cap.get(cv2.CAP_PROP_FOURCC) + + if fps is None: + fps = self.video_info.fps + + self.writer = OpenCVWriter(target_path, fps, self.video_info.resolution_wh, source_codec) + total_frames = min(self.video_info.total_frames, fps) + frames_generator = self.frames() + for index, frame in enumerate( + tqdm( + frames_generator, + total=total_frames, + disable=not show_progress, + desc=progress_message, + ) + ): + result_frame = callback(frame, index) + self.writer.write(frame=result_frame) + + def has_audio_stream(video_path): + result = subprocess.run( + [ffmpeg_path, "-i", video_path], + stderr=subprocess.PIPE, + stdout=subprocess.DEVNULL, + text=True, + ) + + return "Audio:" in result.stderr + + if has_audio_stream(self.path): + video_input = target_path + audio_source = self.path + temp_output = "temp_output.mp4" + subprocess.run( + [ + ffmpeg_path, + "-i", + video_input, + "-i", + audio_source, + "-map", + "0:v", + "-map", + "1:a", + "-c:v", + "copy", + "-c:a", + "aac", + "-shortest", + temp_output, + ], + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + os.replace(temp_output, video_input) + + +class OpenCVWriter: + def __init__(self, filename: str, fps: float, frame_size: tuple[int, int], codec: str = "mp4v"): + try: + fourcc_int = cv2.VideoWriter_fourcc(*codec) + self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size) + except: + fourcc_int = cv2.VideoWriter_fourcc(*"mp4v") + self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size) + if not self.writer.isOpened(): + raise RuntimeError(f"Cannot open video writer for file: {filename}") + + def write(self, frame: np.ndarray) -> None: + self.writer.write(frame) + + def close(self) -> None: + self.writer.release() + +class Video: + info: VideoInfo + source: str | int + backend: OpenCVBackend + + def __init__(self, source: str | int, info: VideoInfo | None = None, backend: str = "opencv"): + if backend == "opencv": + self.backend = OpenCVBackend() + + self.backend.open(source) + self.info = self.backend.video_info + self.source = source + + def __iter__(self): + return self.backend.frames() + + def frames(self, stride=1, start=0, end=None, resolution_wh=None): + return self.backend.frames(stride=stride, start=start, end=end, resolution_wh=resolution_wh) + + def save(self, target_path: str, callback: Callable[[np.ndarray, int], np.ndarray], fps: int = None, progress_message: str = "Processing video", show_progress: bool = False): + self.backend.save(target_path=target_path, callback=callback, fps=fps, progress_message=progress_message, show_progress=show_progress) + + class VideoSink: """ Context manager that saves video frames to a file using OpenCV. From b2096d06111860bbd1559338b807d482d3b4ba6f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 2 Aug 2025 02:52:43 +0000 Subject: [PATCH 005/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/__init__.py | 4 +- supervision/utils/video.py | 76 ++++++++++++++++++++++++++++---------- 2 files changed, 59 insertions(+), 21 deletions(-) diff --git a/supervision/__init__.py b/supervision/__init__.py index 48dcfc49e5..57c48050cf 100644 --- a/supervision/__init__.py +++ b/supervision/__init__.py @@ -130,11 +130,11 @@ from supervision.utils.notebook import plot_image, plot_images_grid from supervision.utils.video import ( FPSMonitor, + Video, VideoInfo, VideoSink, get_video_frames_generator, process_video, - Video ) __all__ = [ @@ -193,9 +193,9 @@ "TriangleAnnotator", "VertexAnnotator", "VertexLabelAnnotator", + "Video", "VideoInfo", "VideoSink", - "Video", "approximate_polygon", "box_iou", "box_iou_batch", diff --git a/supervision/utils/video.py b/supervision/utils/video.py index b30bacde0c..186b22fb99 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -1,13 +1,13 @@ from __future__ import annotations -from typing import Protocol, Any, Tuple import os import subprocess import time from collections import deque from collections.abc import Callable, Generator from dataclasses import dataclass -from enum import Enum, auto +from enum import Enum +from typing import Protocol, Tuple import cv2 import imageio_ffmpeg @@ -16,6 +16,7 @@ ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe() + class SOURCE_TYPE(Enum): VIDEO_FILE = "VIDEO_FILE" WEBCAM = "WEBCAM" @@ -87,11 +88,15 @@ def open(self, path: str) -> None: if not self.cap.isOpened(): raise RuntimeError(f"Cannot open video source: {path}") self.video_info = self._set_video_info() - + if isinstance(path, int): self.video_info.source_type = SOURCE_TYPE.WEBCAM elif isinstance(path, str): - self.video_info.source_type = SOURCE_TYPE.RTSP if path.lower().startswith("rtsp://") else SOURCE_TYPE.VIDEO_FILE + self.video_info.source_type = ( + SOURCE_TYPE.RTSP + if path.lower().startswith("rtsp://") + else SOURCE_TYPE.VIDEO_FILE + ) else: raise ValueError("Unsupported source type") @@ -112,7 +117,7 @@ def info(self) -> VideoInfo: raise RuntimeError("Video not opened yet.") return self.video_info - def read(self) -> Tuple[bool, np.ndarray]: + def read(self) -> tuple[bool, np.ndarray]: if self.cap is None: raise RuntimeError("Video not opened yet.") ret, frame = self.cap.read() @@ -132,13 +137,13 @@ def release(self) -> None: if self.cap is not None and self.cap.isOpened(): self.cap.release() self.cap = None - + def frames(self, *, start=0, end=None, stride=1, resolution_wh=None): if self.cap is None: raise RuntimeError("Video not opened yet.") total_frames = self.video_info.total_frames if self.video_info else 0 - is_live_stream = (total_frames <= 0) + is_live_stream = total_frames <= 0 if is_live_stream: while True: @@ -166,13 +171,20 @@ def frames(self, *, start=0, end=None, stride=1, resolution_wh=None): yield frame frame_idx += stride - def save(self, target_path: str, callback: Callable[[np.ndarray, int], np.ndarray], fps: int = None, progress_message: str = "Processing video", show_progress: bool = False): + def save( + self, + target_path: str, + callback: Callable[[np.ndarray, int], np.ndarray], + fps: int = None, + progress_message: str = "Processing video", + show_progress: bool = False, + ): if self.cap is None: raise RuntimeError("Video not opened yet.") if self.video_info.source_type != SOURCE_TYPE.VIDEO_FILE: - raise ValueError("Only video files can be saved.") - + raise ValueError("Only video files can be saved.") + if self.writer is not None: self.writer.close() self.writer = None @@ -182,7 +194,9 @@ def save(self, target_path: str, callback: Callable[[np.ndarray, int], np.ndarra if fps is None: fps = self.video_info.fps - self.writer = OpenCVWriter(target_path, fps, self.video_info.resolution_wh, source_codec) + self.writer = OpenCVWriter( + target_path, fps, self.video_info.resolution_wh, source_codec + ) total_frames = min(self.video_info.total_frames, fps) frames_generator = self.frames() for index, frame in enumerate( @@ -237,7 +251,13 @@ def has_audio_stream(video_path): class OpenCVWriter: - def __init__(self, filename: str, fps: float, frame_size: tuple[int, int], codec: str = "mp4v"): + def __init__( + self, + filename: str, + fps: float, + frame_size: tuple[int, int], + codec: str = "mp4v", + ): try: fourcc_int = cv2.VideoWriter_fourcc(*codec) self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size) @@ -253,28 +273,46 @@ def write(self, frame: np.ndarray) -> None: def close(self) -> None: self.writer.release() + class Video: info: VideoInfo source: str | int backend: OpenCVBackend - def __init__(self, source: str | int, info: VideoInfo | None = None, backend: str = "opencv"): + def __init__( + self, source: str | int, info: VideoInfo | None = None, backend: str = "opencv" + ): if backend == "opencv": self.backend = OpenCVBackend() - + self.backend.open(source) self.info = self.backend.video_info self.source = source def __iter__(self): return self.backend.frames() - + def frames(self, stride=1, start=0, end=None, resolution_wh=None): - return self.backend.frames(stride=stride, start=start, end=end, resolution_wh=resolution_wh) + return self.backend.frames( + stride=stride, start=start, end=end, resolution_wh=resolution_wh + ) + + def save( + self, + target_path: str, + callback: Callable[[np.ndarray, int], np.ndarray], + fps: int = None, + progress_message: str = "Processing video", + show_progress: bool = False, + ): + self.backend.save( + target_path=target_path, + callback=callback, + fps=fps, + progress_message=progress_message, + show_progress=show_progress, + ) - def save(self, target_path: str, callback: Callable[[np.ndarray, int], np.ndarray], fps: int = None, progress_message: str = "Processing video", show_progress: bool = False): - self.backend.save(target_path=target_path, callback=callback, fps=fps, progress_message=progress_message, show_progress=show_progress) - class VideoSink: """ From 9fb709818cce7b8ff820ab162d1fc26f9a334521 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Fri, 1 Aug 2025 22:56:47 -0400 Subject: [PATCH 006/128] Precommit --- supervision/utils/video.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index 186b22fb99..434217544b 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -7,7 +7,7 @@ from collections.abc import Callable, Generator from dataclasses import dataclass from enum import Enum -from typing import Protocol, Tuple +from typing import Optional, Protocol, Tuple import cv2 import imageio_ffmpeg @@ -175,7 +175,7 @@ def save( self, target_path: str, callback: Callable[[np.ndarray, int], np.ndarray], - fps: int = None, + fps: Optional[int] = None, progress_message: str = "Processing video", show_progress: bool = False, ): @@ -261,7 +261,7 @@ def __init__( try: fourcc_int = cv2.VideoWriter_fourcc(*codec) self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size) - except: + except Exception: fourcc_int = cv2.VideoWriter_fourcc(*"mp4v") self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size) if not self.writer.isOpened(): @@ -301,7 +301,7 @@ def save( self, target_path: str, callback: Callable[[np.ndarray, int], np.ndarray], - fps: int = None, + fps: Optional[int] = None, progress_message: str = "Processing video", show_progress: bool = False, ): From 850a2c6d3ceb948f1e970344da7854c190b02a59 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 2 Aug 2025 02:57:56 +0000 Subject: [PATCH 007/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/utils/video.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index 434217544b..1ebcb085af 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -7,7 +7,7 @@ from collections.abc import Callable, Generator from dataclasses import dataclass from enum import Enum -from typing import Optional, Protocol, Tuple +from typing import Optional, Protocol import cv2 import imageio_ffmpeg @@ -175,7 +175,7 @@ def save( self, target_path: str, callback: Callable[[np.ndarray, int], np.ndarray], - fps: Optional[int] = None, + fps: int | None = None, progress_message: str = "Processing video", show_progress: bool = False, ): @@ -301,7 +301,7 @@ def save( self, target_path: str, callback: Callable[[np.ndarray, int], np.ndarray], - fps: Optional[int] = None, + fps: int | None = None, progress_message: str = "Processing video", show_progress: bool = False, ): From 46900f81232eafe5362d690b8f183cd41bc261ca Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Fri, 1 Aug 2025 22:59:24 -0400 Subject: [PATCH 008/128] Precommit --- supervision/utils/video.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index 434217544b..275cfe0ef2 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -7,7 +7,7 @@ from collections.abc import Callable, Generator from dataclasses import dataclass from enum import Enum -from typing import Optional, Protocol, Tuple +from typing import Protocol, Tuple import cv2 import imageio_ffmpeg @@ -175,7 +175,7 @@ def save( self, target_path: str, callback: Callable[[np.ndarray, int], np.ndarray], - fps: Optional[int] = None, + fps: int | None = None, progress_message: str = "Processing video", show_progress: bool = False, ): @@ -301,7 +301,7 @@ def save( self, target_path: str, callback: Callable[[np.ndarray, int], np.ndarray], - fps: Optional[int] = None, + fps: int | None = None, progress_message: str = "Processing video", show_progress: bool = False, ): From c70039471b68c3e1903cd06e60719a88e8cec489 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 2 Aug 2025 03:00:27 +0000 Subject: [PATCH 009/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/utils/video.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index c37d01021b..d7ae0e5bcd 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -7,7 +7,7 @@ from collections.abc import Callable, Generator from dataclasses import dataclass from enum import Enum -from typing import Optional, Protocol, Tuple +from typing import Protocol import cv2 import imageio_ffmpeg From fce8ade8cbb6a480024a98a7d5a50c927b6b341e Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 2 Aug 2025 02:19:45 -0400 Subject: [PATCH 010/128] UPDATE: Fixed incomplete write closing --- supervision/utils/video.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index d7ae0e5bcd..65b886eedc 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -197,7 +197,7 @@ def save( self.writer = OpenCVWriter( target_path, fps, self.video_info.resolution_wh, source_codec ) - total_frames = min(self.video_info.total_frames, fps) + total_frames = self.video_info.total_frames frames_generator = self.frames() for index, frame in enumerate( tqdm( @@ -210,6 +210,8 @@ def save( result_frame = callback(frame, index) self.writer.write(frame=result_frame) + self.writer.close() + def has_audio_stream(video_path): result = subprocess.run( [ffmpeg_path, "-i", video_path], From f86f4f2ed53a504b3384db628b173c241ebe2813 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 2 Aug 2025 02:25:03 -0400 Subject: [PATCH 011/128] ADD: Docstrings --- supervision/utils/video.py | 145 ++++++++++++++++++++++++++++++++++++- 1 file changed, 142 insertions(+), 3 deletions(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index 65b886eedc..0f1ae12a9e 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -75,13 +75,29 @@ def resolution_wh(self) -> tuple[int, int]: class OpenCVBackend(Protocol): + """ + Protocol class defining the interface for video backend implementations using OpenCV. + Handles video capture, frame reading, seeking, and writing operations. + """ + def __init__(self): + """Initialize the OpenCV backend with empty video capture and writer objects.""" self.cap = None self.video_info = None self.writer = None self.path = None def open(self, path: str) -> None: + """ + Open a video source and initialize the video capture object. + + Args: + path (str): Path to the video file, RTSP URL, or camera index. + + Raises: + RuntimeError: If unable to open the video source. + ValueError: If the source type is not supported. + """ self.cap = cv2.VideoCapture(path) self.path = path @@ -100,10 +116,23 @@ def open(self, path: str) -> None: else: raise ValueError("Unsupported source type") - def isOpened(self): + def isOpened(self) -> bool: + """Check if the video source is opened successfully. + + Returns: + bool: True if the video source is opened, False otherwise. + """ return self.cap.isOpened() def _set_video_info(self) -> VideoInfo: + """Set up video information from the opened video source. + + Returns: + VideoInfo: Object containing video properties like width, height, fps, etc. + + Raises: + RuntimeError: If the video source is not opened yet. + """ if not self.isOpened(): raise RuntimeError("Video not opened yet.") width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) @@ -113,32 +142,82 @@ def _set_video_info(self) -> VideoInfo: return VideoInfo(width, height, int(fps), total_frames) def info(self) -> VideoInfo: + """Get video information. + + Returns: + VideoInfo: Object containing video properties. + + Raises: + RuntimeError: If the video source is not opened yet. + """ if not self.isOpened(): raise RuntimeError("Video not opened yet.") return self.video_info def read(self) -> tuple[bool, np.ndarray]: + """Read a frame from the video source. + + Returns: + tuple[bool, np.ndarray]: A tuple containing: + - bool: True if frame was successfully read + - np.ndarray: The video frame in BGR format + + Raises: + RuntimeError: If the video source is not opened yet. + """ if self.cap is None: raise RuntimeError("Video not opened yet.") ret, frame = self.cap.read() return ret, frame def grab(self) -> bool: + """Grab a frame from video source without decoding. + + Returns: + bool: True if frame was successfully grabbed. + + Raises: + RuntimeError: If the video source is not opened yet. + """ if self.cap is None: raise RuntimeError("Video not opened yet.") return self.cap.grab() def seek(self, frame_idx: int) -> None: + """Seek to a specific frame in the video. + + Args: + frame_idx (int): Index of the frame to seek to (0-based). + + Raises: + RuntimeError: If the video source is not opened yet. + """ if self.cap is None: raise RuntimeError("Video not opened yet.") self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx) def release(self) -> None: + """Release the video capture resources.""" if self.cap is not None and self.cap.isOpened(): self.cap.release() self.cap = None - def frames(self, *, start=0, end=None, stride=1, resolution_wh=None): + def frames(self, *, start: int = 0, end: int | None = None, stride: int = 1, resolution_wh: tuple[int, int] | None = None): + """Generate frames from the video source. + + Args: + start (int, optional): Starting frame index. Defaults to 0. + end (int | None, optional): Ending frame index. Defaults to None. + stride (int, optional): Number of frames to skip. Defaults to 1. + resolution_wh (tuple[int, int] | None, optional): Target resolution (width, height). + If provided, frames will be resized. Defaults to None. + + Yields: + np.ndarray: Video frames in BGR format. + + Raises: + RuntimeError: If the video source is not opened yet. + """ if self.cap is None: raise RuntimeError("Video not opened yet.") @@ -253,6 +332,13 @@ def has_audio_stream(video_path): class OpenCVWriter: + """A class to handle video writing operations using OpenCV's VideoWriter. + + This class provides an interface to write frames to a video file using OpenCV, + with support for different codecs and automatic fallback to mp4v if the specified + codec fails. + """ + def __init__( self, filename: str, @@ -260,6 +346,17 @@ def __init__( frame_size: tuple[int, int], codec: str = "mp4v", ): + """Initialize the video writer. + + Args: + filename (str): Path to the output video file. + fps (float): Frames per second for the output video. + frame_size (tuple[int, int]): Width and height of the output video frames. + codec (str, optional): FourCC code for the video codec. Defaults to "mp4v". + + Raises: + RuntimeError: If the video writer cannot be initialized. + """ try: fourcc_int = cv2.VideoWriter_fourcc(*codec) self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size) @@ -270,13 +367,25 @@ def __init__( raise RuntimeError(f"Cannot open video writer for file: {filename}") def write(self, frame: np.ndarray) -> None: + """Write a frame to the video file. + + Args: + frame (np.ndarray): The frame to write, in BGR format. + """ self.writer.write(frame) def close(self) -> None: + """Release the video writer resources.""" self.writer.release() class Video: + """High-level interface for video operations. + + This class provides a convenient interface for video operations including + reading frames, saving processed videos, and video information access. + It uses OpenCVBackend as the default backend for video operations. + """ info: VideoInfo source: str | int backend: OpenCVBackend @@ -292,9 +401,26 @@ def __init__( self.source = source def __iter__(self): + """Make the Video class iterable over frames. + + Returns: + Generator: A generator yielding video frames. + """ return self.backend.frames() - def frames(self, stride=1, start=0, end=None, resolution_wh=None): + def frames(self, stride: int = 1, start: int = 0, end: int | None = None, resolution_wh: tuple[int, int] | None = None): + """Generate frames from the video. + + Args: + stride (int, optional): Number of frames to skip. Defaults to 1. + start (int, optional): Starting frame index. Defaults to 0. + end (int | None, optional): Ending frame index. Defaults to None. + resolution_wh (tuple[int, int] | None, optional): Target resolution (width, height). + If provided, frames will be resized. Defaults to None. + + Returns: + Generator: A generator yielding video frames. + """ return self.backend.frames( stride=stride, start=start, end=end, resolution_wh=resolution_wh ) @@ -307,6 +433,19 @@ def save( progress_message: str = "Processing video", show_progress: bool = False, ): + """Save processed video frames to a file. + + Args: + target_path (str): Path where the processed video will be saved. + callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes + each frame. Takes frame and frame index as input, returns processed frame. + fps (int | None, optional): Output video FPS. If None, uses source video FPS. + Defaults to None. + progress_message (str, optional): Message to show in progress bar. + Defaults to "Processing video". + show_progress (bool, optional): Whether to show progress bar. + Defaults to False. + """ self.backend.save( target_path=target_path, callback=callback, From 22659774755d26519bc43691832db586cb05dbe3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 2 Aug 2025 06:25:22 +0000 Subject: [PATCH 012/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/utils/video.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index 0f1ae12a9e..a685504428 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -93,7 +93,7 @@ def open(self, path: str) -> None: Args: path (str): Path to the video file, RTSP URL, or camera index. - + Raises: RuntimeError: If unable to open the video source. ValueError: If the source type is not supported. @@ -202,7 +202,14 @@ def release(self) -> None: self.cap.release() self.cap = None - def frames(self, *, start: int = 0, end: int | None = None, stride: int = 1, resolution_wh: tuple[int, int] | None = None): + def frames( + self, + *, + start: int = 0, + end: int | None = None, + stride: int = 1, + resolution_wh: tuple[int, int] | None = None, + ): """Generate frames from the video source. Args: @@ -386,6 +393,7 @@ class Video: reading frames, saving processed videos, and video information access. It uses OpenCVBackend as the default backend for video operations. """ + info: VideoInfo source: str | int backend: OpenCVBackend @@ -408,7 +416,13 @@ def __iter__(self): """ return self.backend.frames() - def frames(self, stride: int = 1, start: int = 0, end: int | None = None, resolution_wh: tuple[int, int] | None = None): + def frames( + self, + stride: int = 1, + start: int = 0, + end: int | None = None, + resolution_wh: tuple[int, int] | None = None, + ): """Generate frames from the video. Args: From bf67bfaea1f254bd44bab277a04a4e64ec94f67b Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 2 Aug 2025 02:28:01 -0400 Subject: [PATCH 013/128] UPDATE: Allow for ffmpeg error passthrough --- supervision/utils/video.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index a685504428..2b22658279 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -301,7 +301,6 @@ def save( def has_audio_stream(video_path): result = subprocess.run( [ffmpeg_path, "-i", video_path], - stderr=subprocess.PIPE, stdout=subprocess.DEVNULL, text=True, ) @@ -332,7 +331,6 @@ def has_audio_stream(video_path): ], check=True, stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, ) os.replace(temp_output, video_input) From ec4bd012faef2dc3a5c10a62d8872ea52aaf5bca Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 2 Aug 2025 02:36:08 -0400 Subject: [PATCH 014/128] UPDATE: Writer and Backend abstract class --- supervision/utils/video.py | 84 ++++++++++++++++++++++++++++++++++---- 1 file changed, 77 insertions(+), 7 deletions(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index 2b22658279..4a8f19287d 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -7,6 +7,7 @@ from collections.abc import Callable, Generator from dataclasses import dataclass from enum import Enum +from abc import ABC, abstractmethod from typing import Protocol import cv2 @@ -74,14 +75,85 @@ def resolution_wh(self) -> tuple[int, int]: return self.width, self.height -class OpenCVBackend(Protocol): +class Backend(ABC): + def __init__(self): + self.cap = None + self.video_info = None + self.writer = None + self.path = None + + @abstractmethod + def open(self, path: str) -> None: + pass + + @abstractmethod + def isOpened(self) -> bool: + pass + + @abstractmethod + def _set_video_info(self) -> VideoInfo: + pass + + @abstractmethod + def info(self) -> VideoInfo: + pass + + @abstractmethod + def read(self) -> tuple[bool, np.ndarray]: + pass + + @abstractmethod + def grab(self) -> bool: + pass + + @abstractmethod + def seek(self, frame_idx: int) -> None: + pass + + @abstractmethod + def release(self) -> None: + pass + + @abstractmethod + def frames( + self, + *, + start: int = 0, + end: int | None = None, + stride: int = 1, + resolution_wh: tuple[int, int] | None = None, + ): + pass + + @abstractmethod + def save( + self, + target_path: str, + callback: Callable[[np.ndarray, int], np.ndarray], + fps: int | None = None, + progress_message: str = "Processing video", + show_progress: bool = False, + ): + pass + +class Writer(ABC): + @abstractmethod + def write(self, frame: np.ndarray) -> None: + pass + + @abstractmethod + def close(self) -> None: + pass + +class OpenCVBackend(Backend): """ - Protocol class defining the interface for video backend implementations using OpenCV. - Handles video capture, frame reading, seeking, and writing operations. + OpenCV implementation of the Backend interface. + Handles video capture, frame reading, seeking, and writing operations using OpenCV. """ def __init__(self): """Initialize the OpenCV backend with empty video capture and writer objects.""" + super().__init__() self.cap = None self.video_info = None self.writer = None @@ -335,8 +407,7 @@ def has_audio_stream(video_path): os.replace(temp_output, video_input) - -class OpenCVWriter: +class OpenCVWriter(Writer): """A class to handle video writing operations using OpenCV's VideoWriter. This class provides an interface to write frames to a video file using OpenCV, @@ -389,12 +460,11 @@ class Video: This class provides a convenient interface for video operations including reading frames, saving processed videos, and video information access. - It uses OpenCVBackend as the default backend for video operations. """ info: VideoInfo source: str | int - backend: OpenCVBackend + backend: Backend def __init__( self, source: str | int, info: VideoInfo | None = None, backend: str = "opencv" From b9e79685b8c70835c782ba3c961981eda9a977db Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 2 Aug 2025 02:38:02 -0400 Subject: [PATCH 015/128] Precommit --- supervision/utils/video.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index 4a8f19287d..7f379f7b67 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -287,14 +287,12 @@ def frames( Args: start (int, optional): Starting frame index. Defaults to 0. end (int | None, optional): Ending frame index. Defaults to None. - stride (int, optional): Number of frames to skip. Defaults to 1. - resolution_wh (tuple[int, int] | None, optional): Target resolution (width, height). - If provided, frames will be resized. Defaults to None. - - Yields: - np.ndarray: Video frames in BGR format. + stride (int, optional): Number of frames to skip. Defaults to 1. + resolution_wh (tuple[int, int] | None, optional): Target resolution + (width, height). If provided, frames will be resized. Defaults to None. - Raises: + Yields: + np.ndarray: Video frames in BGR format. Raises: RuntimeError: If the video source is not opened yet. """ if self.cap is None: @@ -497,8 +495,8 @@ def frames( stride (int, optional): Number of frames to skip. Defaults to 1. start (int, optional): Starting frame index. Defaults to 0. end (int | None, optional): Ending frame index. Defaults to None. - resolution_wh (tuple[int, int] | None, optional): Target resolution (width, height). - If provided, frames will be resized. Defaults to None. + resolution_wh (tuple[int, int] | None, optional): Target resolution + (width, height). If provided, frames will be resized. Defaults to None. Returns: Generator: A generator yielding video frames. @@ -520,8 +518,8 @@ def save( Args: target_path (str): Path where the processed video will be saved. callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes - each frame. Takes frame and frame index as input, returns processed frame. - fps (int | None, optional): Output video FPS. If None, uses source video FPS. + each frame. Takes frame and index as input, returns processed frame. + fps (int | None, optional): Output video FPS. If None, uses source video FPS. Defaults to None. progress_message (str, optional): Message to show in progress bar. Defaults to "Processing video". From a96c3f08ebcc53ef5cd6d3a6573d86a0ff253c80 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 2 Aug 2025 06:38:23 +0000 Subject: [PATCH 016/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/utils/video.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index 7f379f7b67..0bf5bc4da9 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -3,12 +3,11 @@ import os import subprocess import time +from abc import ABC, abstractmethod from collections import deque from collections.abc import Callable, Generator from dataclasses import dataclass from enum import Enum -from abc import ABC, abstractmethod -from typing import Protocol import cv2 import imageio_ffmpeg @@ -136,6 +135,7 @@ def save( ): pass + class Writer(ABC): @abstractmethod def write(self, frame: np.ndarray) -> None: @@ -145,6 +145,7 @@ def write(self, frame: np.ndarray) -> None: def close(self) -> None: pass + class OpenCVBackend(Backend): """ OpenCV implementation of the Backend interface. @@ -405,6 +406,7 @@ def has_audio_stream(video_path): os.replace(temp_output, video_input) + class OpenCVWriter(Writer): """A class to handle video writing operations using OpenCV's VideoWriter. @@ -519,7 +521,7 @@ def save( target_path (str): Path where the processed video will be saved. callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes each frame. Takes frame and index as input, returns processed frame. - fps (int | None, optional): Output video FPS. If None, uses source video FPS. + fps (int | None, optional): Output video FPS. If None, uses source video FPS. Defaults to None. progress_message (str, optional): Message to show in progress bar. Defaults to "Processing video". From a6c91bc387447e0676cdacf9bd3614ca3fa5e277 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 2 Aug 2025 02:39:40 -0400 Subject: [PATCH 017/128] Precommit --- supervision/utils/video.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index 0bf5bc4da9..ec918a7a6b 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -521,8 +521,7 @@ def save( target_path (str): Path where the processed video will be saved. callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes each frame. Takes frame and index as input, returns processed frame. - fps (int | None, optional): Output video FPS. If None, uses source video FPS. - Defaults to None. + fps (int | None, optional): Output video FPS. progress_message (str, optional): Message to show in progress bar. Defaults to "Processing video". show_progress (bool, optional): Whether to show progress bar. From d075e03b7ff87f6a431769e7dfb6dd4a7237bedd Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Wed, 6 Aug 2025 16:21:35 -0400 Subject: [PATCH 018/128] UPDATE: Added manual control --- supervision/utils/video.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index ec918a7a6b..57ada81442 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -81,6 +81,10 @@ def __init__(self): self.writer = None self.path = None + @abstractmethod + def get_sink(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v") -> Writer: + pass + @abstractmethod def open(self, path: str) -> None: pass @@ -160,6 +164,9 @@ def __init__(self): self.writer = None self.path = None + def get_sink(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"): + return OpenCVWriter(target_path, video_info.fps, video_info.resolution_wh, codec) + def open(self, path: str) -> None: """ Open a video source and initialize the video capture object. @@ -441,6 +448,12 @@ def __init__( self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size) if not self.writer.isOpened(): raise RuntimeError(f"Cannot open video writer for file: {filename}") + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() def write(self, frame: np.ndarray) -> None: """Write a frame to the video file. @@ -484,6 +497,9 @@ def __iter__(self): """ return self.backend.frames() + def sink(self, target_path: str, info: VideoInfo, codec: str = "mp4v"): + return self.backend.get_sink(target_path, info, codec) + def frames( self, stride: int = 1, From 7f078ffea5f0896ea2e4c42882e5710c6a7a2000 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Wed, 6 Aug 2025 16:27:34 -0400 Subject: [PATCH 019/128] ADD: Added docstrings --- supervision/utils/video.py | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index 57ada81442..0bfaeecbd0 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -165,6 +165,16 @@ def __init__(self): self.path = None def get_sink(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"): + """Create a video writer for saving frames using OpenCV. + + Args: + target_path (str): Path where the video will be saved. + video_info (VideoInfo): Video information containing resolution and FPS. + codec (str, optional): FourCC code for video codec. Defaults to "mp4v". + + Returns: + OpenCVWriter: A video writer object for writing frames. + """ return OpenCVWriter(target_path, video_info.fps, video_info.resolution_wh, codec) def open(self, path: str) -> None: @@ -343,6 +353,20 @@ def save( progress_message: str = "Processing video", show_progress: bool = False, ): + """Save processed video frames to a file with audio preservation. + + Args: + target_path (str): Path where the processed video will be saved. + callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes + each frame. Takes frame and index as input, returns processed frame. + fps (int | None, optional): Output video FPS. If None, uses source FPS. + progress_message (str, optional): Message to show in progress bar. + show_progress (bool, optional): Whether to show progress bar. + + Raises: + RuntimeError: If video source is not opened. + ValueError: If source is not a video file. + """ if self.cap is None: raise RuntimeError("Video not opened yet.") @@ -497,7 +521,17 @@ def __iter__(self): """ return self.backend.frames() - def sink(self, target_path: str, info: VideoInfo, codec: str = "mp4v"): + def sink(self, target_path: str, info: VideoInfo, codec: str = "mp4v") -> Writer: + """Create a video writer for saving frames. + + Args: + target_path (str): Path where the video will be saved. + info (VideoInfo): Video information containing resolution and FPS. + codec (str, optional): FourCC code for video codec. Defaults to "mp4v". + + Returns: + Writer: A video writer object for writing frames. + """ return self.backend.get_sink(target_path, info, codec) def frames( From af49e9a9dfe42eecbe6c28f2a44dea5f622a7c73 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 6 Aug 2025 20:28:05 +0000 Subject: [PATCH 020/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/utils/video.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index 0bfaeecbd0..3af7db62a0 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -82,7 +82,9 @@ def __init__(self): self.path = None @abstractmethod - def get_sink(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v") -> Writer: + def get_sink( + self, target_path: str, video_info: VideoInfo, codec: str = "mp4v" + ) -> Writer: pass @abstractmethod @@ -175,7 +177,9 @@ def get_sink(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v") Returns: OpenCVWriter: A video writer object for writing frames. """ - return OpenCVWriter(target_path, video_info.fps, video_info.resolution_wh, codec) + return OpenCVWriter( + target_path, video_info.fps, video_info.resolution_wh, codec + ) def open(self, path: str) -> None: """ @@ -472,10 +476,10 @@ def __init__( self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size) if not self.writer.isOpened(): raise RuntimeError(f"Cannot open video writer for file: {filename}") - + def __enter__(self): return self - + def __exit__(self, exc_type, exc_value, traceback): self.close() From 320d817757a7906164f70e591116047e4ebc4c9e Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Wed, 6 Aug 2025 16:38:44 -0400 Subject: [PATCH 021/128] UPDATE: Deprecate warning old Video API --- supervision/utils/video.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index 3af7db62a0..38d00c464d 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -52,7 +52,7 @@ class VideoInfo: width: int height: int - fps: int + fps: float total_frames: int | None = None source_type: SOURCE_TYPE | None = None @@ -590,6 +590,7 @@ def save( ) +@DeprecationWarning class VideoSink: """ Context manager that saves video frames to a file using OpenCV. @@ -646,7 +647,7 @@ def write_frame(self, frame: np.ndarray): def __exit__(self, exc_type, exc_value, exc_traceback): self.__writer.release() - +@DeprecationWarning def _validate_and_setup_video( source_path: str, start: int, end: int | None, iterative_seek: bool = False ): @@ -670,7 +671,7 @@ def _validate_and_setup_video( return video, start, end - +@DeprecationWarning def get_video_frames_generator( source_path: str, stride: int = 1, @@ -721,7 +722,7 @@ def get_video_frames_generator( frame_position += stride video.release() - +@DeprecationWarning def process_video( source_path: str, target_path: str, From cb8d2f8b116638482635d312573ffb3f359928d7 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Wed, 6 Aug 2025 22:09:47 -0400 Subject: [PATCH 022/128] FIX: Prototype resolution for #1687 --- supervision/utils/video.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index 38d00c464d..f940568742 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -52,7 +52,7 @@ class VideoInfo: width: int height: int - fps: float + fps: int total_frames: int | None = None source_type: SOURCE_TYPE | None = None @@ -64,7 +64,7 @@ def from_video_path(cls, video_path: str) -> VideoInfo: width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) - fps = int(video.get(cv2.CAP_PROP_FPS)) + fps = int(round(video.get(cv2.CAP_PROP_FPS))) total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) video.release() return VideoInfo(width, height, fps, total_frames) @@ -73,7 +73,6 @@ def from_video_path(cls, video_path: str) -> VideoInfo: def resolution_wh(self) -> tuple[int, int]: return self.width, self.height - class Backend(ABC): def __init__(self): self.cap = None @@ -231,9 +230,9 @@ def _set_video_info(self) -> VideoInfo: raise RuntimeError("Video not opened yet.") width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - fps = self.cap.get(cv2.CAP_PROP_FPS) + fps = int(round(self.cap.get(cv2.CAP_PROP_FPS))) total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) - return VideoInfo(width, height, int(fps), total_frames) + return VideoInfo(width, height, fps, total_frames) def info(self) -> VideoInfo: """Get video information. From a3a3a9ebdb8998f32ae231331e89057c54fc70df Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 7 Aug 2025 02:10:09 +0000 Subject: [PATCH 023/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/utils/video.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index f940568742..b1d0db352c 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -73,6 +73,7 @@ def from_video_path(cls, video_path: str) -> VideoInfo: def resolution_wh(self) -> tuple[int, int]: return self.width, self.height + class Backend(ABC): def __init__(self): self.cap = None @@ -646,6 +647,7 @@ def write_frame(self, frame: np.ndarray): def __exit__(self, exc_type, exc_value, exc_traceback): self.__writer.release() + @DeprecationWarning def _validate_and_setup_video( source_path: str, start: int, end: int | None, iterative_seek: bool = False @@ -670,6 +672,7 @@ def _validate_and_setup_video( return video, start, end + @DeprecationWarning def get_video_frames_generator( source_path: str, @@ -721,6 +724,7 @@ def get_video_frames_generator( frame_position += stride video.release() + @DeprecationWarning def process_video( source_path: str, From ecbf5afef92df60c4a59d7558c40a8836962c060 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Wed, 6 Aug 2025 22:16:22 -0400 Subject: [PATCH 024/128] FIX: Rounding type --- supervision/utils/video.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index b1d0db352c..1f08e98bb4 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -64,7 +64,7 @@ def from_video_path(cls, video_path: str) -> VideoInfo: width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) - fps = int(round(video.get(cv2.CAP_PROP_FPS))) + fps = (round(video.get(cv2.CAP_PROP_FPS))) total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) video.release() return VideoInfo(width, height, fps, total_frames) @@ -231,7 +231,7 @@ def _set_video_info(self) -> VideoInfo: raise RuntimeError("Video not opened yet.") width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - fps = int(round(self.cap.get(cv2.CAP_PROP_FPS))) + fps = (round(self.cap.get(cv2.CAP_PROP_FPS))) total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) return VideoInfo(width, height, fps, total_frames) @@ -453,7 +453,7 @@ class OpenCVWriter(Writer): def __init__( self, filename: str, - fps: float, + fps: int, frame_size: tuple[int, int], codec: str = "mp4v", ): @@ -461,7 +461,7 @@ def __init__( Args: filename (str): Path to the output video file. - fps (float): Frames per second for the output video. + fps (int): Frames per second for the output video. frame_size (tuple[int, int]): Width and height of the output video frames. codec (str, optional): FourCC code for the video codec. Defaults to "mp4v". From 1da4466dcb138dff996ba4163fc363a36042a975 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 7 Aug 2025 02:16:44 +0000 Subject: [PATCH 025/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/utils/video.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index 1f08e98bb4..aec907dc06 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -64,7 +64,7 @@ def from_video_path(cls, video_path: str) -> VideoInfo: width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) - fps = (round(video.get(cv2.CAP_PROP_FPS))) + fps = round(video.get(cv2.CAP_PROP_FPS)) total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) video.release() return VideoInfo(width, height, fps, total_frames) @@ -231,7 +231,7 @@ def _set_video_info(self) -> VideoInfo: raise RuntimeError("Video not opened yet.") width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - fps = (round(self.cap.get(cv2.CAP_PROP_FPS))) + fps = round(self.cap.get(cv2.CAP_PROP_FPS)) total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) return VideoInfo(width, height, fps, total_frames) From 3b4c68fcee66893b6d002bc1b111759b06829a02 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Fri, 8 Aug 2025 16:29:07 -0400 Subject: [PATCH 026/128] UPDATE: Updated the file structure for the video API --- supervision/__init__.py | 10 +- supervision/utils/video.py | 572 +--------------------------- supervision/video/__init__.py | 0 supervision/video/backend/base.py | 71 ++++ supervision/video/backend/openCV.py | 261 +++++++++++++ supervision/video/backend/pyAV.py | 0 supervision/video/core.py | 134 +++++++ supervision/video/utils.py | 58 +++ 8 files changed, 534 insertions(+), 572 deletions(-) create mode 100644 supervision/video/__init__.py create mode 100644 supervision/video/backend/base.py create mode 100644 supervision/video/backend/openCV.py create mode 100644 supervision/video/backend/pyAV.py create mode 100644 supervision/video/core.py create mode 100644 supervision/video/utils.py diff --git a/supervision/__init__.py b/supervision/__init__.py index 57c48050cf..067b346a97 100644 --- a/supervision/__init__.py +++ b/supervision/__init__.py @@ -130,13 +130,19 @@ from supervision.utils.notebook import plot_image, plot_images_grid from supervision.utils.video import ( FPSMonitor, - Video, - VideoInfo, VideoSink, get_video_frames_generator, process_video, ) +from supervision.video.utils import ( + VideoInfo +) + +from supervision.video.core import ( + Video +) + __all__ = [ "LMM", "BackgroundOverlayAnnotator", diff --git a/supervision/utils/video.py b/supervision/utils/video.py index aec907dc06..a493850e88 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -1,7 +1,5 @@ from __future__ import annotations -import os -import subprocess import time from abc import ABC, abstractmethod from collections import deque @@ -10,19 +8,10 @@ from enum import Enum import cv2 -import imageio_ffmpeg import numpy as np from tqdm.auto import tqdm -ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe() - - -class SOURCE_TYPE(Enum): - VIDEO_FILE = "VIDEO_FILE" - WEBCAM = "WEBCAM" - RTSP = "RTSP" - - +@DeprecationWarning @dataclass class VideoInfo: """ @@ -54,7 +43,6 @@ class VideoInfo: height: int fps: int total_frames: int | None = None - source_type: SOURCE_TYPE | None = None @classmethod def from_video_path(cls, video_path: str) -> VideoInfo: @@ -64,7 +52,7 @@ def from_video_path(cls, video_path: str) -> VideoInfo: width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) - fps = round(video.get(cv2.CAP_PROP_FPS)) + fps = int(video.get(cv2.CAP_PROP_FPS)) total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) video.release() return VideoInfo(width, height, fps, total_frames) @@ -74,522 +62,6 @@ def resolution_wh(self) -> tuple[int, int]: return self.width, self.height -class Backend(ABC): - def __init__(self): - self.cap = None - self.video_info = None - self.writer = None - self.path = None - - @abstractmethod - def get_sink( - self, target_path: str, video_info: VideoInfo, codec: str = "mp4v" - ) -> Writer: - pass - - @abstractmethod - def open(self, path: str) -> None: - pass - - @abstractmethod - def isOpened(self) -> bool: - pass - - @abstractmethod - def _set_video_info(self) -> VideoInfo: - pass - - @abstractmethod - def info(self) -> VideoInfo: - pass - - @abstractmethod - def read(self) -> tuple[bool, np.ndarray]: - pass - - @abstractmethod - def grab(self) -> bool: - pass - - @abstractmethod - def seek(self, frame_idx: int) -> None: - pass - - @abstractmethod - def release(self) -> None: - pass - - @abstractmethod - def frames( - self, - *, - start: int = 0, - end: int | None = None, - stride: int = 1, - resolution_wh: tuple[int, int] | None = None, - ): - pass - - @abstractmethod - def save( - self, - target_path: str, - callback: Callable[[np.ndarray, int], np.ndarray], - fps: int | None = None, - progress_message: str = "Processing video", - show_progress: bool = False, - ): - pass - - -class Writer(ABC): - @abstractmethod - def write(self, frame: np.ndarray) -> None: - pass - - @abstractmethod - def close(self) -> None: - pass - - -class OpenCVBackend(Backend): - """ - OpenCV implementation of the Backend interface. - Handles video capture, frame reading, seeking, and writing operations using OpenCV. - """ - - def __init__(self): - """Initialize the OpenCV backend with empty video capture and writer objects.""" - super().__init__() - self.cap = None - self.video_info = None - self.writer = None - self.path = None - - def get_sink(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"): - """Create a video writer for saving frames using OpenCV. - - Args: - target_path (str): Path where the video will be saved. - video_info (VideoInfo): Video information containing resolution and FPS. - codec (str, optional): FourCC code for video codec. Defaults to "mp4v". - - Returns: - OpenCVWriter: A video writer object for writing frames. - """ - return OpenCVWriter( - target_path, video_info.fps, video_info.resolution_wh, codec - ) - - def open(self, path: str) -> None: - """ - Open a video source and initialize the video capture object. - - Args: - path (str): Path to the video file, RTSP URL, or camera index. - - Raises: - RuntimeError: If unable to open the video source. - ValueError: If the source type is not supported. - """ - self.cap = cv2.VideoCapture(path) - self.path = path - - if not self.cap.isOpened(): - raise RuntimeError(f"Cannot open video source: {path}") - self.video_info = self._set_video_info() - - if isinstance(path, int): - self.video_info.source_type = SOURCE_TYPE.WEBCAM - elif isinstance(path, str): - self.video_info.source_type = ( - SOURCE_TYPE.RTSP - if path.lower().startswith("rtsp://") - else SOURCE_TYPE.VIDEO_FILE - ) - else: - raise ValueError("Unsupported source type") - - def isOpened(self) -> bool: - """Check if the video source is opened successfully. - - Returns: - bool: True if the video source is opened, False otherwise. - """ - return self.cap.isOpened() - - def _set_video_info(self) -> VideoInfo: - """Set up video information from the opened video source. - - Returns: - VideoInfo: Object containing video properties like width, height, fps, etc. - - Raises: - RuntimeError: If the video source is not opened yet. - """ - if not self.isOpened(): - raise RuntimeError("Video not opened yet.") - width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - fps = round(self.cap.get(cv2.CAP_PROP_FPS)) - total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) - return VideoInfo(width, height, fps, total_frames) - - def info(self) -> VideoInfo: - """Get video information. - - Returns: - VideoInfo: Object containing video properties. - - Raises: - RuntimeError: If the video source is not opened yet. - """ - if not self.isOpened(): - raise RuntimeError("Video not opened yet.") - return self.video_info - - def read(self) -> tuple[bool, np.ndarray]: - """Read a frame from the video source. - - Returns: - tuple[bool, np.ndarray]: A tuple containing: - - bool: True if frame was successfully read - - np.ndarray: The video frame in BGR format - - Raises: - RuntimeError: If the video source is not opened yet. - """ - if self.cap is None: - raise RuntimeError("Video not opened yet.") - ret, frame = self.cap.read() - return ret, frame - - def grab(self) -> bool: - """Grab a frame from video source without decoding. - - Returns: - bool: True if frame was successfully grabbed. - - Raises: - RuntimeError: If the video source is not opened yet. - """ - if self.cap is None: - raise RuntimeError("Video not opened yet.") - return self.cap.grab() - - def seek(self, frame_idx: int) -> None: - """Seek to a specific frame in the video. - - Args: - frame_idx (int): Index of the frame to seek to (0-based). - - Raises: - RuntimeError: If the video source is not opened yet. - """ - if self.cap is None: - raise RuntimeError("Video not opened yet.") - self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx) - - def release(self) -> None: - """Release the video capture resources.""" - if self.cap is not None and self.cap.isOpened(): - self.cap.release() - self.cap = None - - def frames( - self, - *, - start: int = 0, - end: int | None = None, - stride: int = 1, - resolution_wh: tuple[int, int] | None = None, - ): - """Generate frames from the video source. - - Args: - start (int, optional): Starting frame index. Defaults to 0. - end (int | None, optional): Ending frame index. Defaults to None. - stride (int, optional): Number of frames to skip. Defaults to 1. - resolution_wh (tuple[int, int] | None, optional): Target resolution - (width, height). If provided, frames will be resized. Defaults to None. - - Yields: - np.ndarray: Video frames in BGR format. Raises: - RuntimeError: If the video source is not opened yet. - """ - if self.cap is None: - raise RuntimeError("Video not opened yet.") - - total_frames = self.video_info.total_frames if self.video_info else 0 - is_live_stream = total_frames <= 0 - - if is_live_stream: - while True: - for _ in range(stride - 1): - if not self.grab(): - return - ret, frame = self.read() - if not ret: - return - if resolution_wh is not None: - frame = cv2.resize(frame, resolution_wh) - yield frame - else: - if end is None or end > total_frames: - end = total_frames - - frame_idx = start - while frame_idx < end: - self.seek(frame_idx) - ret, frame = self.read() - if not ret: - break - if resolution_wh is not None: - frame = cv2.resize(frame, resolution_wh) - yield frame - frame_idx += stride - - def save( - self, - target_path: str, - callback: Callable[[np.ndarray, int], np.ndarray], - fps: int | None = None, - progress_message: str = "Processing video", - show_progress: bool = False, - ): - """Save processed video frames to a file with audio preservation. - - Args: - target_path (str): Path where the processed video will be saved. - callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes - each frame. Takes frame and index as input, returns processed frame. - fps (int | None, optional): Output video FPS. If None, uses source FPS. - progress_message (str, optional): Message to show in progress bar. - show_progress (bool, optional): Whether to show progress bar. - - Raises: - RuntimeError: If video source is not opened. - ValueError: If source is not a video file. - """ - if self.cap is None: - raise RuntimeError("Video not opened yet.") - - if self.video_info.source_type != SOURCE_TYPE.VIDEO_FILE: - raise ValueError("Only video files can be saved.") - - if self.writer is not None: - self.writer.close() - self.writer = None - - source_codec = self.cap.get(cv2.CAP_PROP_FOURCC) - - if fps is None: - fps = self.video_info.fps - - self.writer = OpenCVWriter( - target_path, fps, self.video_info.resolution_wh, source_codec - ) - total_frames = self.video_info.total_frames - frames_generator = self.frames() - for index, frame in enumerate( - tqdm( - frames_generator, - total=total_frames, - disable=not show_progress, - desc=progress_message, - ) - ): - result_frame = callback(frame, index) - self.writer.write(frame=result_frame) - - self.writer.close() - - def has_audio_stream(video_path): - result = subprocess.run( - [ffmpeg_path, "-i", video_path], - stdout=subprocess.DEVNULL, - text=True, - ) - - return "Audio:" in result.stderr - - if has_audio_stream(self.path): - video_input = target_path - audio_source = self.path - temp_output = "temp_output.mp4" - subprocess.run( - [ - ffmpeg_path, - "-i", - video_input, - "-i", - audio_source, - "-map", - "0:v", - "-map", - "1:a", - "-c:v", - "copy", - "-c:a", - "aac", - "-shortest", - temp_output, - ], - check=True, - stdout=subprocess.DEVNULL, - ) - - os.replace(temp_output, video_input) - - -class OpenCVWriter(Writer): - """A class to handle video writing operations using OpenCV's VideoWriter. - - This class provides an interface to write frames to a video file using OpenCV, - with support for different codecs and automatic fallback to mp4v if the specified - codec fails. - """ - - def __init__( - self, - filename: str, - fps: int, - frame_size: tuple[int, int], - codec: str = "mp4v", - ): - """Initialize the video writer. - - Args: - filename (str): Path to the output video file. - fps (int): Frames per second for the output video. - frame_size (tuple[int, int]): Width and height of the output video frames. - codec (str, optional): FourCC code for the video codec. Defaults to "mp4v". - - Raises: - RuntimeError: If the video writer cannot be initialized. - """ - try: - fourcc_int = cv2.VideoWriter_fourcc(*codec) - self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size) - except Exception: - fourcc_int = cv2.VideoWriter_fourcc(*"mp4v") - self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size) - if not self.writer.isOpened(): - raise RuntimeError(f"Cannot open video writer for file: {filename}") - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, traceback): - self.close() - - def write(self, frame: np.ndarray) -> None: - """Write a frame to the video file. - - Args: - frame (np.ndarray): The frame to write, in BGR format. - """ - self.writer.write(frame) - - def close(self) -> None: - """Release the video writer resources.""" - self.writer.release() - - -class Video: - """High-level interface for video operations. - - This class provides a convenient interface for video operations including - reading frames, saving processed videos, and video information access. - """ - - info: VideoInfo - source: str | int - backend: Backend - - def __init__( - self, source: str | int, info: VideoInfo | None = None, backend: str = "opencv" - ): - if backend == "opencv": - self.backend = OpenCVBackend() - - self.backend.open(source) - self.info = self.backend.video_info - self.source = source - - def __iter__(self): - """Make the Video class iterable over frames. - - Returns: - Generator: A generator yielding video frames. - """ - return self.backend.frames() - - def sink(self, target_path: str, info: VideoInfo, codec: str = "mp4v") -> Writer: - """Create a video writer for saving frames. - - Args: - target_path (str): Path where the video will be saved. - info (VideoInfo): Video information containing resolution and FPS. - codec (str, optional): FourCC code for video codec. Defaults to "mp4v". - - Returns: - Writer: A video writer object for writing frames. - """ - return self.backend.get_sink(target_path, info, codec) - - def frames( - self, - stride: int = 1, - start: int = 0, - end: int | None = None, - resolution_wh: tuple[int, int] | None = None, - ): - """Generate frames from the video. - - Args: - stride (int, optional): Number of frames to skip. Defaults to 1. - start (int, optional): Starting frame index. Defaults to 0. - end (int | None, optional): Ending frame index. Defaults to None. - resolution_wh (tuple[int, int] | None, optional): Target resolution - (width, height). If provided, frames will be resized. Defaults to None. - - Returns: - Generator: A generator yielding video frames. - """ - return self.backend.frames( - stride=stride, start=start, end=end, resolution_wh=resolution_wh - ) - - def save( - self, - target_path: str, - callback: Callable[[np.ndarray, int], np.ndarray], - fps: int | None = None, - progress_message: str = "Processing video", - show_progress: bool = False, - ): - """Save processed video frames to a file. - - Args: - target_path (str): Path where the processed video will be saved. - callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes - each frame. Takes frame and index as input, returns processed frame. - fps (int | None, optional): Output video FPS. - progress_message (str, optional): Message to show in progress bar. - Defaults to "Processing video". - show_progress (bool, optional): Whether to show progress bar. - Defaults to False. - """ - self.backend.save( - target_path=target_path, - callback=callback, - fps=fps, - progress_message=progress_message, - show_progress=show_progress, - ) - - @DeprecationWarning class VideoSink: """ @@ -788,46 +260,6 @@ def callback(scene: np.ndarray, index: int) -> np.ndarray: result_frame = callback(frame, index) sink.write_frame(frame=result_frame) - def has_audio_stream(video_path): - result = subprocess.run( - [ffmpeg_path, "-i", video_path], - stderr=subprocess.PIPE, - stdout=subprocess.DEVNULL, - text=True, - ) - - return "Audio:" in result.stderr - - if has_audio_stream(source_path): - video_input = target_path - audio_source = source_path - temp_output = "temp_output.mp4" - subprocess.run( - [ - ffmpeg_path, - "-i", - video_input, - "-i", - audio_source, - "-map", - "0:v", - "-map", - "1:a", - "-c:v", - "copy", - "-c:a", - "aac", - "-shortest", - temp_output, - ], - check=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - - os.replace(temp_output, video_input) - - class FPSMonitor: """ A class for monitoring frames per second (FPS) to benchmark latency. diff --git a/supervision/video/__init__.py b/supervision/video/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py new file mode 100644 index 0000000000..2642534bc3 --- /dev/null +++ b/supervision/video/backend/base.py @@ -0,0 +1,71 @@ +from abc import ABC, abstractmethod +import numpy as np +from collections.abc import Callable, Generator + +from supervision.video.utils import VideoInfo + +class BaseBackend(ABC): + def __init__(self): + self.cap = None + self.video_info = None + self.writer = None + self.path = None + + @abstractmethod + def get_sink( + self, target_path: str, video_info: VideoInfo, codec: str = "mp4v" + ) -> "BaseWriter": + pass + + @abstractmethod + def open(self, path: str) -> None: + pass + + @abstractmethod + def isOpened(self) -> bool: + pass + + @abstractmethod + def _set_video_info(self) -> VideoInfo: + pass + + @abstractmethod + def info(self) -> VideoInfo: + pass + + @abstractmethod + def read(self) -> tuple[bool, np.ndarray]: + pass + + @abstractmethod + def grab(self) -> bool: + pass + + @abstractmethod + def seek(self, frame_idx: int) -> None: + pass + + @abstractmethod + def release(self) -> None: + pass + + @abstractmethod + def save( + self, + target_path: str, + callback: Callable[[np.ndarray, int], np.ndarray], + fps: int | None = None, + progress_message: str = "Processing video", + show_progress: bool = False, + ): + pass + + +class BaseWriter(ABC): + @abstractmethod + def write(self, frame: np.ndarray) -> None: + pass + + @abstractmethod + def close(self) -> None: + pass diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py new file mode 100644 index 0000000000..9cadc5c212 --- /dev/null +++ b/supervision/video/backend/openCV.py @@ -0,0 +1,261 @@ +from supervision.video.backend.base import BaseBackend, BaseWriter +from supervision.video.utils import SOURCE_TYPE, VideoInfo + +import cv2 +import numpy as np +from tqdm.auto import tqdm +from typing import Callable + +class OpenCVBackend(BaseBackend): + """ + OpenCV implementation of the Backend interface. + Handles video capture, frame reading, seeking, and writing operations using OpenCV. + """ + + def __init__(self): + """Initialize the OpenCV backend with empty video capture and writer objects.""" + super().__init__() + self.cap = None + self.video_info = None + self.writer = None + self.path = None + + def get_sink(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"): + """Create a video writer for saving frames using OpenCV. + + Args: + target_path (str): Path where the video will be saved. + video_info (VideoInfo): Video information containing resolution and FPS. + codec (str, optional): FourCC code for video codec. Defaults to "mp4v". + + Returns: + OpenCVWriter: A video writer object for writing frames. + """ + return OpenCVWriter( + target_path, video_info.fps, video_info.resolution_wh, codec + ) + + def open(self, path: str) -> None: + """ + Open a video source and initialize the video capture object. + + Args: + path (str): Path to the video file, RTSP URL, or camera index. + + Raises: + RuntimeError: If unable to open the video source. + ValueError: If the source type is not supported. + """ + self.cap = cv2.VideoCapture(path) + self.path = path + + if not self.cap.isOpened(): + raise RuntimeError(f"Cannot open video source: {path}") + self.video_info = self._set_video_info() + + if isinstance(path, int): + self.video_info.source_type = SOURCE_TYPE.WEBCAM + elif isinstance(path, str): + self.video_info.source_type = ( + SOURCE_TYPE.RTSP + if path.lower().startswith("rtsp://") + else SOURCE_TYPE.VIDEO_FILE + ) + else: + raise ValueError("Unsupported source type") + + def isOpened(self) -> bool: + """Check if the video source is opened successfully. + + Returns: + bool: True if the video source is opened, False otherwise. + """ + return self.cap.isOpened() + + def _set_video_info(self) -> VideoInfo: + """Set up video information from the opened video source. + + Returns: + VideoInfo: Object containing video properties like width, height, fps, etc. + + Raises: + RuntimeError: If the video source is not opened yet. + """ + if not self.isOpened(): + raise RuntimeError("Video not opened yet.") + width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = round(self.cap.get(cv2.CAP_PROP_FPS)) + total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) + return VideoInfo(width, height, fps, total_frames) + + def info(self) -> VideoInfo: + """Get video information. + + Returns: + VideoInfo: Object containing video properties. + + Raises: + RuntimeError: If the video source is not opened yet. + """ + if not self.isOpened(): + raise RuntimeError("Video not opened yet.") + return self.video_info + + def read(self) -> tuple[bool, np.ndarray]: + """Read a frame from the video source. + + Returns: + tuple[bool, np.ndarray]: A tuple containing: + - bool: True if frame was successfully read + - np.ndarray: The video frame in BGR format + + Raises: + RuntimeError: If the video source is not opened yet. + """ + if self.cap is None: + raise RuntimeError("Video not opened yet.") + ret, frame = self.cap.read() + return ret, frame + + def grab(self) -> bool: + """Grab a frame from video source without decoding. + + Returns: + bool: True if frame was successfully grabbed. + + Raises: + RuntimeError: If the video source is not opened yet. + """ + if self.cap is None: + raise RuntimeError("Video not opened yet.") + return self.cap.grab() + + def seek(self, frame_idx: int) -> None: + """Seek to a specific frame in the video. + + Args: + frame_idx (int): Index of the frame to seek to (0-based). + + Raises: + RuntimeError: If the video source is not opened yet. + """ + if self.cap is None: + raise RuntimeError("Video not opened yet.") + self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx) + + def release(self) -> None: + """Release the video capture resources.""" + if self.cap is not None and self.cap.isOpened(): + self.cap.release() + self.cap = None + + + def save( + self, + target_path: str, + callback: Callable[[np.ndarray, int], np.ndarray], + fps: int | None = None, + progress_message: str = "Processing video", + show_progress: bool = False, + ): + """Save processed video frames to a file with audio preservation. + + Args: + target_path (str): Path where the processed video will be saved. + callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes + each frame. Takes frame and index as input, returns processed frame. + fps (int | None, optional): Output video FPS. If None, uses source FPS. + progress_message (str, optional): Message to show in progress bar. + show_progress (bool, optional): Whether to show progress bar. + + Raises: + RuntimeError: If video source is not opened. + ValueError: If source is not a video file. + """ + if self.cap is None: + raise RuntimeError("Video not opened yet.") + + if self.video_info.source_type != SOURCE_TYPE.VIDEO_FILE: + raise ValueError("Only video files can be saved.") + + if self.writer is not None: + self.writer.close() + self.writer = None + + source_codec = self.cap.get(cv2.CAP_PROP_FOURCC) + + if fps is None: + fps = self.video_info.fps + + self.writer = OpenCVWriter( + target_path, fps, self.video_info.resolution_wh, source_codec + ) + total_frames = self.video_info.total_frames + frames_generator = self.frames() + for index, frame in enumerate( + tqdm( + frames_generator, + total=total_frames, + disable=not show_progress, + desc=progress_message, + ) + ): + result_frame = callback(frame, index) + self.writer.write(frame=result_frame) + + self.writer.close() + + +class OpenCVWriter(BaseWriter): + """A class to handle video writing operations using OpenCV's VideoWriter. + + This class provides an interface to write frames to a video file using OpenCV, + with support for different codecs and automatic fallback to mp4v if the specified + codec fails. + """ + + def __init__( + self, + filename: str, + fps: int, + frame_size: tuple[int, int], + codec: str = "mp4v", + ): + """Initialize the video writer. + + Args: + filename (str): Path to the output video file. + fps (int): Frames per second for the output video. + frame_size (tuple[int, int]): Width and height of the output video frames. + codec (str, optional): FourCC code for the video codec. Defaults to "mp4v". + + Raises: + RuntimeError: If the video writer cannot be initialized. + """ + try: + fourcc_int = cv2.VideoWriter_fourcc(*codec) + self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size) + except Exception: + fourcc_int = cv2.VideoWriter_fourcc(*"mp4v") + self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size) + if not self.writer.isOpened(): + raise RuntimeError(f"Cannot open video writer for file: {filename}") + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + + def write(self, frame: np.ndarray) -> None: + """Write a frame to the video file. + + Args: + frame (np.ndarray): The frame to write, in BGR format. + """ + self.writer.write(frame) + + def close(self) -> None: + """Release the video writer resources.""" + self.writer.release() diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/supervision/video/core.py b/supervision/video/core.py new file mode 100644 index 0000000000..05d9d5d7ab --- /dev/null +++ b/supervision/video/core.py @@ -0,0 +1,134 @@ + +from collections.abc import Callable, Generator + +import cv2 +import numpy as np +from tqdm.auto import tqdm + +from supervision.video.utils import VideoInfo +from supervision.video.backend.base import BaseBackend, BaseWriter + +from supervision.video.backend.openCV import OpenCVBackend + + +class Video: + """High-level interface for video operations. + + This class provides a convenient interface for video operations including + reading frames, saving processed videos, and video information access. + """ + + info: VideoInfo + source: str | int + backend: BaseBackend + + def __init__( + self, source: str | int, info: VideoInfo | None = None, backend: str = "opencv" + ): + if backend == "opencv": + self.backend = OpenCVBackend() + + self.backend.open(source) + self.info = self.backend.video_info + self.source = source + + def __iter__(self): + """Make the Video class iterable over frames. + + Returns: + Generator: A generator yielding video frames. + """ + return self.backend.frames() + + def sink(self, target_path: str, info: VideoInfo, codec: str = "mp4v") -> BaseWriter: + """Create a video writer for saving frames. + + Args: + target_path (str): Path where the video will be saved. + info (VideoInfo): Video information containing resolution and FPS. + codec (str, optional): FourCC code for video codec. Defaults to "mp4v". + + Returns: + Writer: A video writer object for writing frames. + """ + return self.backend.get_sink(target_path, info, codec) + + def frames( + self, + stride: int = 1, + start: int = 0, + end: int | None = None, + resolution_wh: tuple[int, int] | None = None, + ): + """Generate frames from the video. + + Args: + stride (int, optional): Number of frames to skip. Defaults to 1. + start (int, optional): Starting frame index. Defaults to 0. + end (int | None, optional): Ending frame index. Defaults to None. + resolution_wh (tuple[int, int] | None, optional): Target resolution + (width, height). If provided, frames will be resized. Defaults to None. + + Returns: + Generator: A generator yielding video frames. + """ + if self.backend.cap is None: + raise RuntimeError("Video not opened yet.") + + total_frames = self.backend.video_info.total_frames if self.backend.video_info else 0 + is_live_stream = total_frames <= 0 + + if is_live_stream: + while True: + for _ in range(stride - 1): + if not self.backend.grab(): + return + ret, frame = self.backend.read() + if not ret: + return + if resolution_wh is not None: + frame = cv2.resize(frame, resolution_wh) + yield frame + else: + if end is None or end > total_frames: + end = total_frames + + frame_idx = start + while frame_idx < end: + self.backend.seek(frame_idx) + ret, frame = self.backend.read() + if not ret: + break + if resolution_wh is not None: + frame = cv2.resize(frame, resolution_wh) + yield frame + frame_idx += stride + + def save( + self, + target_path: str, + callback: Callable[[np.ndarray, int], np.ndarray], + fps: int | None = None, + progress_message: str = "Processing video", + show_progress: bool = False, + ): + """Save processed video frames to a file. + + Args: + target_path (str): Path where the processed video will be saved. + callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes + each frame. Takes frame and index as input, returns processed frame. + fps (int | None, optional): Output video FPS. + progress_message (str, optional): Message to show in progress bar. + Defaults to "Processing video". + show_progress (bool, optional): Whether to show progress bar. + Defaults to False. + """ + self.backend.save( + target_path=target_path, + callback=callback, + fps=fps, + progress_message=progress_message, + show_progress=show_progress, + ) + diff --git a/supervision/video/utils.py b/supervision/video/utils.py new file mode 100644 index 0000000000..a8667cf919 --- /dev/null +++ b/supervision/video/utils.py @@ -0,0 +1,58 @@ +from dataclasses import dataclass +from enum import Enum +import cv2 + +class SOURCE_TYPE(Enum): + VIDEO_FILE = "VIDEO_FILE" + WEBCAM = "WEBCAM" + RTSP = "RTSP" + +@dataclass +class VideoInfo: + """ + A class to store video information, including width, height, fps and + total number of frames. + + Attributes: + width (int): width of the video in pixels + height (int): height of the video in pixels + fps (int): frames per second of the video + total_frames (Optional[int]): total number of frames in the video, + default is None + + Examples: + ```python + import supervision as sv + + video_info = sv.VideoInfo.from_video_path(video_path=) + + video_info + # VideoInfo(width=3840, height=2160, fps=25, total_frames=538) + + video_info.resolution_wh + # (3840, 2160) + ``` + """ + + width: int + height: int + fps: int + total_frames: int | None = None + source_type: SOURCE_TYPE | None = None + + @classmethod + def from_video_path(cls, video_path: str) -> "VideoInfo": + video = cv2.VideoCapture(video_path) + if not video.isOpened(): + raise Exception(f"Could not open video at {video_path}") + + width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = round(video.get(cv2.CAP_PROP_FPS)) + total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) + video.release() + return VideoInfo(width, height, fps, total_frames) + + @property + def resolution_wh(self) -> tuple[int, int]: + return self.width, self.height From 385639b32ac59fce9f2635df9aae96fb7993e289 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Fri, 8 Aug 2025 16:34:36 -0400 Subject: [PATCH 027/128] UPDATE: Added frame generator within backend --- supervision/video/backend/base.py | 11 ++++++ supervision/video/backend/openCV.py | 52 +++++++++++++++++++++++++++++ supervision/video/core.py | 34 ++----------------- 3 files changed, 66 insertions(+), 31 deletions(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 2642534bc3..4e047dd789 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -44,6 +44,17 @@ def grab(self) -> bool: @abstractmethod def seek(self, frame_idx: int) -> None: pass + + @abstractmethod + def frames( + self, + *, + start: int = 0, + end: int | None = None, + stride: int = 1, + resolution_wh: tuple[int, int] | None = None, + ): + pass @abstractmethod def release(self) -> None: diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index 9cadc5c212..a591b597cd 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -150,6 +150,58 @@ def release(self) -> None: self.cap.release() self.cap = None + def frames( + self, + *, + start: int = 0, + end: int | None = None, + stride: int = 1, + resolution_wh: tuple[int, int] | None = None, + ): + """Generate frames from the video source. + + Args: + start (int, optional): Starting frame index. Defaults to 0. + end (int | None, optional): Ending frame index. Defaults to None. + stride (int, optional): Number of frames to skip. Defaults to 1. + resolution_wh (tuple[int, int] | None, optional): Target resolution + (width, height). If provided, frames will be resized. Defaults to None. + + Yields: + np.ndarray: Video frames in BGR format. Raises: + RuntimeError: If the video source is not opened yet. + """ + if self.cap is None: + raise RuntimeError("Video not opened yet.") + + total_frames = self.video_info.total_frames if self.video_info else 0 + is_live_stream = total_frames <= 0 + + if is_live_stream: + while True: + for _ in range(stride - 1): + if not self.grab(): + return + ret, frame = self.read() + if not ret: + return + if resolution_wh is not None: + frame = cv2.resize(frame, resolution_wh) + yield frame + else: + if end is None or end > total_frames: + end = total_frames + + frame_idx = start + while frame_idx < end: + self.seek(frame_idx) + ret, frame = self.read() + if not ret: + break + if resolution_wh is not None: + frame = cv2.resize(frame, resolution_wh) + yield frame + frame_idx += stride def save( self, diff --git a/supervision/video/core.py b/supervision/video/core.py index 05d9d5d7ab..43add0ee77 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -72,37 +72,9 @@ def frames( Returns: Generator: A generator yielding video frames. """ - if self.backend.cap is None: - raise RuntimeError("Video not opened yet.") - - total_frames = self.backend.video_info.total_frames if self.backend.video_info else 0 - is_live_stream = total_frames <= 0 - - if is_live_stream: - while True: - for _ in range(stride - 1): - if not self.backend.grab(): - return - ret, frame = self.backend.read() - if not ret: - return - if resolution_wh is not None: - frame = cv2.resize(frame, resolution_wh) - yield frame - else: - if end is None or end > total_frames: - end = total_frames - - frame_idx = start - while frame_idx < end: - self.backend.seek(frame_idx) - ret, frame = self.backend.read() - if not ret: - break - if resolution_wh is not None: - frame = cv2.resize(frame, resolution_wh) - yield frame - frame_idx += stride + return self.backend.frames( + stride=stride, start=start, end=end, resolution_wh=resolution_wh + ) def save( self, From 61dd0169b297a2bc406be875b1f2d757b3104d82 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Fri, 8 Aug 2025 16:37:58 -0400 Subject: [PATCH 028/128] UPDATE: Added .save codec support --- supervision/video/backend/base.py | 1 + supervision/video/backend/openCV.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 4e047dd789..1e2f578d7b 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -68,6 +68,7 @@ def save( fps: int | None = None, progress_message: str = "Processing video", show_progress: bool = False, + codec = "mp4v" ): pass diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index a591b597cd..8a2e860586 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -210,6 +210,7 @@ def save( fps: int | None = None, progress_message: str = "Processing video", show_progress: bool = False, + codec: str = None ): """Save processed video frames to a file with audio preservation. @@ -235,7 +236,7 @@ def save( self.writer.close() self.writer = None - source_codec = self.cap.get(cv2.CAP_PROP_FOURCC) + source_codec = codec if codec is None else self.cap.get(cv2.CAP_PROP_FOURCC) if fps is None: fps = self.video_info.fps From d2bb428e1c88662e7f28a1016dd484ca5a0fa76e Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Fri, 8 Aug 2025 16:43:00 -0400 Subject: [PATCH 029/128] UPDATE: Added default mp4v codec --- supervision/video/backend/base.py | 2 +- supervision/video/backend/openCV.py | 8 +++----- supervision/video/core.py | 2 ++ 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 1e2f578d7b..27a5797a89 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -68,7 +68,7 @@ def save( fps: int | None = None, progress_message: str = "Processing video", show_progress: bool = False, - codec = "mp4v" + codec: str = "mp4v" ): pass diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index 8a2e860586..f31716b24a 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -210,7 +210,7 @@ def save( fps: int | None = None, progress_message: str = "Processing video", show_progress: bool = False, - codec: str = None + codec: str = "mp4v" ): """Save processed video frames to a file with audio preservation. @@ -235,14 +235,12 @@ def save( if self.writer is not None: self.writer.close() self.writer = None - - source_codec = codec if codec is None else self.cap.get(cv2.CAP_PROP_FOURCC) - + if fps is None: fps = self.video_info.fps self.writer = OpenCVWriter( - target_path, fps, self.video_info.resolution_wh, source_codec + target_path, fps, self.video_info.resolution_wh, codec ) total_frames = self.video_info.total_frames frames_generator = self.frames() diff --git a/supervision/video/core.py b/supervision/video/core.py index 43add0ee77..9a2921a8b9 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -83,6 +83,7 @@ def save( fps: int | None = None, progress_message: str = "Processing video", show_progress: bool = False, + codec: str = "mp4v" ): """Save processed video frames to a file. @@ -102,5 +103,6 @@ def save( fps=fps, progress_message=progress_message, show_progress=show_progress, + codec=codec ) From f3a3133b2ebd1dd5743683d5915c2678b5e70023 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 8 Aug 2025 20:43:53 +0000 Subject: [PATCH 030/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/__init__.py | 10 ++-------- supervision/utils/video.py | 4 ++-- supervision/video/backend/base.py | 8 +++++--- supervision/video/backend/openCV.py | 12 +++++++----- supervision/video/core.py | 17 +++++++---------- supervision/video/utils.py | 3 +++ 6 files changed, 26 insertions(+), 28 deletions(-) diff --git a/supervision/__init__.py b/supervision/__init__.py index 067b346a97..dff62b5c95 100644 --- a/supervision/__init__.py +++ b/supervision/__init__.py @@ -134,14 +134,8 @@ get_video_frames_generator, process_video, ) - -from supervision.video.utils import ( - VideoInfo -) - -from supervision.video.core import ( - Video -) +from supervision.video.core import Video +from supervision.video.utils import VideoInfo __all__ = [ "LMM", diff --git a/supervision/utils/video.py b/supervision/utils/video.py index a493850e88..00eb9b4906 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -1,16 +1,15 @@ from __future__ import annotations import time -from abc import ABC, abstractmethod from collections import deque from collections.abc import Callable, Generator from dataclasses import dataclass -from enum import Enum import cv2 import numpy as np from tqdm.auto import tqdm + @DeprecationWarning @dataclass class VideoInfo: @@ -260,6 +259,7 @@ def callback(scene: np.ndarray, index: int) -> np.ndarray: result_frame = callback(frame, index) sink.write_frame(frame=result_frame) + class FPSMonitor: """ A class for monitoring frames per second (FPS) to benchmark latency. diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 27a5797a89..5ee7553396 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -1,9 +1,11 @@ from abc import ABC, abstractmethod +from collections.abc import Callable + import numpy as np -from collections.abc import Callable, Generator from supervision.video.utils import VideoInfo + class BaseBackend(ABC): def __init__(self): self.cap = None @@ -44,7 +46,7 @@ def grab(self) -> bool: @abstractmethod def seek(self, frame_idx: int) -> None: pass - + @abstractmethod def frames( self, @@ -68,7 +70,7 @@ def save( fps: int | None = None, progress_message: str = "Processing video", show_progress: bool = False, - codec: str = "mp4v" + codec: str = "mp4v", ): pass diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index f31716b24a..88dfedf6f7 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -1,10 +1,12 @@ -from supervision.video.backend.base import BaseBackend, BaseWriter -from supervision.video.utils import SOURCE_TYPE, VideoInfo +from collections.abc import Callable import cv2 import numpy as np from tqdm.auto import tqdm -from typing import Callable + +from supervision.video.backend.base import BaseBackend, BaseWriter +from supervision.video.utils import SOURCE_TYPE, VideoInfo + class OpenCVBackend(BaseBackend): """ @@ -210,7 +212,7 @@ def save( fps: int | None = None, progress_message: str = "Processing video", show_progress: bool = False, - codec: str = "mp4v" + codec: str = "mp4v", ): """Save processed video frames to a file with audio preservation. @@ -235,7 +237,7 @@ def save( if self.writer is not None: self.writer.close() self.writer = None - + if fps is None: fps = self.video_info.fps diff --git a/supervision/video/core.py b/supervision/video/core.py index 9a2921a8b9..61d1b2bd38 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -1,14 +1,10 @@ +from collections.abc import Callable -from collections.abc import Callable, Generator - -import cv2 import numpy as np -from tqdm.auto import tqdm -from supervision.video.utils import VideoInfo from supervision.video.backend.base import BaseBackend, BaseWriter - from supervision.video.backend.openCV import OpenCVBackend +from supervision.video.utils import VideoInfo class Video: @@ -40,7 +36,9 @@ def __iter__(self): """ return self.backend.frames() - def sink(self, target_path: str, info: VideoInfo, codec: str = "mp4v") -> BaseWriter: + def sink( + self, target_path: str, info: VideoInfo, codec: str = "mp4v" + ) -> BaseWriter: """Create a video writer for saving frames. Args: @@ -83,7 +81,7 @@ def save( fps: int | None = None, progress_message: str = "Processing video", show_progress: bool = False, - codec: str = "mp4v" + codec: str = "mp4v", ): """Save processed video frames to a file. @@ -103,6 +101,5 @@ def save( fps=fps, progress_message=progress_message, show_progress=show_progress, - codec=codec + codec=codec, ) - diff --git a/supervision/video/utils.py b/supervision/video/utils.py index a8667cf919..d79069e536 100644 --- a/supervision/video/utils.py +++ b/supervision/video/utils.py @@ -1,12 +1,15 @@ from dataclasses import dataclass from enum import Enum + import cv2 + class SOURCE_TYPE(Enum): VIDEO_FILE = "VIDEO_FILE" WEBCAM = "WEBCAM" RTSP = "RTSP" + @dataclass class VideoInfo: """ From 421cd5d4ea9efc21f9ac32aaf6b187e0391360de Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 01:06:05 -0400 Subject: [PATCH 031/128] UPDATE: Trying to pass checks --- supervision/video/utils.py | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/supervision/video/utils.py b/supervision/video/utils.py index a8667cf919..ce1c84230b 100644 --- a/supervision/video/utils.py +++ b/supervision/video/utils.py @@ -42,15 +42,38 @@ class VideoInfo: @classmethod def from_video_path(cls, video_path: str) -> "VideoInfo": + """Create VideoInfo from a video file path. + + Args: + video_path (str): Path to the video file. + + Returns: + VideoInfo: Video information containing width, height, fps, and total frames. + + Raises: + ValueError: If video cannot be opened or has invalid properties. + """ video = cv2.VideoCapture(video_path) if not video.isOpened(): - raise Exception(f"Could not open video at {video_path}") + raise ValueError(f"Could not open video at {video_path}") + + try: + width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + if width <= 0 or height <= 0: + raise ValueError(f"Invalid video dimensions: {width}x{height}") + + fps = video.get(cv2.CAP_PROP_FPS) + if fps <= 0: + fps = 30 # Default to 30fps if invalid + fps = round(fps) + + total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) + if total_frames < 0: + total_frames = None # Some video formats may not report frame count + finally: + video.release() - width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) - fps = round(video.get(cv2.CAP_PROP_FPS)) - total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) - video.release() return VideoInfo(width, height, fps, total_frames) @property From b9da794c77b9415145987c9a29dd3c2fdbf7dfae Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 01:08:41 -0400 Subject: [PATCH 032/128] UPDATE: Typings for optional params --- supervision/video/utils.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/supervision/video/utils.py b/supervision/video/utils.py index 44d7c0b838..608de4ad0a 100644 --- a/supervision/video/utils.py +++ b/supervision/video/utils.py @@ -1,5 +1,6 @@ from dataclasses import dataclass from enum import Enum +from typing import Optional, Tuple, Union import cv2 @@ -40,8 +41,8 @@ class VideoInfo: width: int height: int fps: int - total_frames: int | None = None - source_type: SOURCE_TYPE | None = None + total_frames: Optional[int] = None + source_type: Optional[SOURCE_TYPE] = None @classmethod def from_video_path(cls, video_path: str) -> "VideoInfo": @@ -51,7 +52,7 @@ def from_video_path(cls, video_path: str) -> "VideoInfo": video_path (str): Path to the video file. Returns: - VideoInfo: Video information containing width, height, fps, and total frames. + VideoInfo: Video info containing width, height, fps, and total frames. Raises: ValueError: If video cannot be opened or has invalid properties. @@ -80,5 +81,5 @@ def from_video_path(cls, video_path: str) -> "VideoInfo": return VideoInfo(width, height, fps, total_frames) @property - def resolution_wh(self) -> tuple[int, int]: + def resolution_wh(self) -> Tuple[int, int]: return self.width, self.height From e8b39a10d8f200dac592ac846835012eb6bfe354 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 9 Aug 2025 05:09:00 +0000 Subject: [PATCH 033/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/supervision/video/utils.py b/supervision/video/utils.py index 608de4ad0a..e20e447b30 100644 --- a/supervision/video/utils.py +++ b/supervision/video/utils.py @@ -1,6 +1,6 @@ from dataclasses import dataclass from enum import Enum -from typing import Optional, Tuple, Union +from typing import Optional, Tuple import cv2 @@ -41,8 +41,8 @@ class VideoInfo: width: int height: int fps: int - total_frames: Optional[int] = None - source_type: Optional[SOURCE_TYPE] = None + total_frames: int | None = None + source_type: SOURCE_TYPE | None = None @classmethod def from_video_path(cls, video_path: str) -> "VideoInfo": @@ -81,5 +81,5 @@ def from_video_path(cls, video_path: str) -> "VideoInfo": return VideoInfo(width, height, fps, total_frames) @property - def resolution_wh(self) -> Tuple[int, int]: + def resolution_wh(self) -> tuple[int, int]: return self.width, self.height From 90885b0dabc1ad997008c11df3d41f43e3e8e303 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 01:15:59 -0400 Subject: [PATCH 034/128] Import check fix --- supervision/video/backend/base.py | 1 - supervision/video/utils.py | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 5ee7553396..073131b9b2 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -5,7 +5,6 @@ from supervision.video.utils import VideoInfo - class BaseBackend(ABC): def __init__(self): self.cap = None diff --git a/supervision/video/utils.py b/supervision/video/utils.py index e20e447b30..9e33315a26 100644 --- a/supervision/video/utils.py +++ b/supervision/video/utils.py @@ -23,6 +23,8 @@ class VideoInfo: fps (int): frames per second of the video total_frames (Optional[int]): total number of frames in the video, default is None + source_type (Optional[SOURCE_TYPE]): source type of the video, + default is None Examples: ```python @@ -41,7 +43,7 @@ class VideoInfo: width: int height: int fps: int - total_frames: int | None = None + total_frames: Optional[int] = None source_type: SOURCE_TYPE | None = None @classmethod From 9d0b5ed0b2d801d5270f8d3d9a11a0ebba0aa777 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 9 Aug 2025 05:16:18 +0000 Subject: [PATCH 035/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/backend/base.py | 1 + supervision/video/utils.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 073131b9b2..5ee7553396 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -5,6 +5,7 @@ from supervision.video.utils import VideoInfo + class BaseBackend(ABC): def __init__(self): self.cap = None diff --git a/supervision/video/utils.py b/supervision/video/utils.py index 9e33315a26..15bf7f6e66 100644 --- a/supervision/video/utils.py +++ b/supervision/video/utils.py @@ -1,6 +1,6 @@ from dataclasses import dataclass from enum import Enum -from typing import Optional, Tuple +from typing import Optional import cv2 @@ -43,7 +43,7 @@ class VideoInfo: width: int height: int fps: int - total_frames: Optional[int] = None + total_frames: int | None = None source_type: SOURCE_TYPE | None = None @classmethod From f983113ce8fc5e4e050755b28c30b28c12531c26 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 01:19:10 -0400 Subject: [PATCH 036/128] Import check fix --- supervision/video/utils.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/supervision/video/utils.py b/supervision/video/utils.py index 15bf7f6e66..fc165b6c1c 100644 --- a/supervision/video/utils.py +++ b/supervision/video/utils.py @@ -1,6 +1,6 @@ from dataclasses import dataclass from enum import Enum -from typing import Optional +from typing import Optional, Tuple import cv2 @@ -43,8 +43,8 @@ class VideoInfo: width: int height: int fps: int - total_frames: int | None = None - source_type: SOURCE_TYPE | None = None + total_frames: Optional[int] = None + source_type: Optional[SOURCE_TYPE] = None @classmethod def from_video_path(cls, video_path: str) -> "VideoInfo": @@ -83,5 +83,10 @@ def from_video_path(cls, video_path: str) -> "VideoInfo": return VideoInfo(width, height, fps, total_frames) @property - def resolution_wh(self) -> tuple[int, int]: + def resolution_wh(self) -> Tuple[int, int]: + """Get the video resolution as (width, height). + + Returns: + Tuple[int, int]: Video dimensions as (width, height). + """ return self.width, self.height From c1544f0549c9695e0f29f66230ce8426bb9ab72f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 9 Aug 2025 05:19:29 +0000 Subject: [PATCH 037/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/supervision/video/utils.py b/supervision/video/utils.py index fc165b6c1c..64ec2fdbe5 100644 --- a/supervision/video/utils.py +++ b/supervision/video/utils.py @@ -43,8 +43,8 @@ class VideoInfo: width: int height: int fps: int - total_frames: Optional[int] = None - source_type: Optional[SOURCE_TYPE] = None + total_frames: int | None = None + source_type: SOURCE_TYPE | None = None @classmethod def from_video_path(cls, video_path: str) -> "VideoInfo": @@ -83,7 +83,7 @@ def from_video_path(cls, video_path: str) -> "VideoInfo": return VideoInfo(width, height, fps, total_frames) @property - def resolution_wh(self) -> Tuple[int, int]: + def resolution_wh(self) -> tuple[int, int]: """Get the video resolution as (width, height). Returns: From 3a339823222d21ed19c33f8997b4c39b9dcf4433 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 01:22:02 -0400 Subject: [PATCH 038/128] Import check fix --- supervision/video/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/supervision/video/utils.py b/supervision/video/utils.py index 64ec2fdbe5..df7203fbe8 100644 --- a/supervision/video/utils.py +++ b/supervision/video/utils.py @@ -43,8 +43,8 @@ class VideoInfo: width: int height: int fps: int - total_frames: int | None = None - source_type: SOURCE_TYPE | None = None + total_frames: int = None + source_type: SOURCE_TYPE = None @classmethod def from_video_path(cls, video_path: str) -> "VideoInfo": From 343600da59db1019449c27a9c63b11770e5ac322 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 9 Aug 2025 05:22:20 +0000 Subject: [PATCH 039/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/supervision/video/utils.py b/supervision/video/utils.py index df7203fbe8..fa5cbe8685 100644 --- a/supervision/video/utils.py +++ b/supervision/video/utils.py @@ -1,6 +1,5 @@ from dataclasses import dataclass from enum import Enum -from typing import Optional, Tuple import cv2 From bf0f8841e68d9ccf7cee72a895c5ed63cbf7dc43 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 01:23:43 -0400 Subject: [PATCH 040/128] Import check fix --- supervision/video/backend/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 5ee7553396..929dd03324 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -52,7 +52,7 @@ def frames( self, *, start: int = 0, - end: int | None = None, + end: int = None, stride: int = 1, resolution_wh: tuple[int, int] | None = None, ): @@ -67,7 +67,7 @@ def save( self, target_path: str, callback: Callable[[np.ndarray, int], np.ndarray], - fps: int | None = None, + fps: int = None, progress_message: str = "Processing video", show_progress: bool = False, codec: str = "mp4v", From 881c9f4e70db79fef7608c45149086eb8c9787a8 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 01:25:41 -0400 Subject: [PATCH 041/128] Import check fix --- supervision/video/backend/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 929dd03324..14854ed65a 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -54,7 +54,7 @@ def frames( start: int = 0, end: int = None, stride: int = 1, - resolution_wh: tuple[int, int] | None = None, + resolution_wh: tuple[int, int] = None, ): pass From 5fed10baa9c3b26176c64d05633b0d08049f6fac Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 01:27:43 -0400 Subject: [PATCH 042/128] Import check fix --- supervision/video/backend/openCV.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index 88dfedf6f7..ac8452d5ab 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -156,17 +156,17 @@ def frames( self, *, start: int = 0, - end: int | None = None, + end: int = None, stride: int = 1, - resolution_wh: tuple[int, int] | None = None, + resolution_wh: tuple[int, int] = None, ): """Generate frames from the video source. Args: start (int, optional): Starting frame index. Defaults to 0. - end (int | None, optional): Ending frame index. Defaults to None. + end (int, optional): Ending frame index. Defaults to None. stride (int, optional): Number of frames to skip. Defaults to 1. - resolution_wh (tuple[int, int] | None, optional): Target resolution + resolution_wh (tuple[int, int], optional): Target resolution (width, height). If provided, frames will be resized. Defaults to None. Yields: @@ -209,7 +209,7 @@ def save( self, target_path: str, callback: Callable[[np.ndarray, int], np.ndarray], - fps: int | None = None, + fps: int = None, progress_message: str = "Processing video", show_progress: bool = False, codec: str = "mp4v", @@ -220,7 +220,7 @@ def save( target_path (str): Path where the processed video will be saved. callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes each frame. Takes frame and index as input, returns processed frame. - fps (int | None, optional): Output video FPS. If None, uses source FPS. + fps (int, optional): Output video FPS. If None, uses source FPS. progress_message (str, optional): Message to show in progress bar. show_progress (bool, optional): Whether to show progress bar. From a84dde837481c643f2fe565c41646c57f0dd6430 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 01:28:49 -0400 Subject: [PATCH 043/128] Import check fix --- supervision/video/core.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/supervision/video/core.py b/supervision/video/core.py index 61d1b2bd38..410890207e 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -19,7 +19,7 @@ class Video: backend: BaseBackend def __init__( - self, source: str | int, info: VideoInfo | None = None, backend: str = "opencv" + self, source: str | int, info: VideoInfo = None, backend: str = "opencv" ): if backend == "opencv": self.backend = OpenCVBackend() @@ -55,16 +55,16 @@ def frames( self, stride: int = 1, start: int = 0, - end: int | None = None, - resolution_wh: tuple[int, int] | None = None, + end: int = None, + resolution_wh: tuple[int, int] = None, ): """Generate frames from the video. Args: stride (int, optional): Number of frames to skip. Defaults to 1. start (int, optional): Starting frame index. Defaults to 0. - end (int | None, optional): Ending frame index. Defaults to None. - resolution_wh (tuple[int, int] | None, optional): Target resolution + end (int, optional): Ending frame index. Defaults to None. + resolution_wh (tuple[int, int], optional): Target resolution (width, height). If provided, frames will be resized. Defaults to None. Returns: @@ -78,7 +78,7 @@ def save( self, target_path: str, callback: Callable[[np.ndarray, int], np.ndarray], - fps: int | None = None, + fps: int = None, progress_message: str = "Processing video", show_progress: bool = False, codec: str = "mp4v", @@ -89,7 +89,7 @@ def save( target_path (str): Path where the processed video will be saved. callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes each frame. Takes frame and index as input, returns processed frame. - fps (int | None, optional): Output video FPS. + fps (int, optional): Output video FPS. progress_message (str, optional): Message to show in progress bar. Defaults to "Processing video". show_progress (bool, optional): Whether to show progress bar. From 012b95060a99f794c3f282c720c3d25a0a7c0bb4 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 01:32:05 -0400 Subject: [PATCH 044/128] RM: Remove video folder --- supervision/__init__.py | 4 +- supervision/video/__init__.py | 0 supervision/video/backend/base.py | 85 -------- supervision/video/backend/openCV.py | 314 ---------------------------- supervision/video/backend/pyAV.py | 0 supervision/video/core.py | 105 ---------- supervision/video/utils.py | 91 -------- 7 files changed, 1 insertion(+), 598 deletions(-) delete mode 100644 supervision/video/__init__.py delete mode 100644 supervision/video/backend/base.py delete mode 100644 supervision/video/backend/openCV.py delete mode 100644 supervision/video/backend/pyAV.py delete mode 100644 supervision/video/core.py delete mode 100644 supervision/video/utils.py diff --git a/supervision/__init__.py b/supervision/__init__.py index dff62b5c95..53f98a8136 100644 --- a/supervision/__init__.py +++ b/supervision/__init__.py @@ -131,11 +131,10 @@ from supervision.utils.video import ( FPSMonitor, VideoSink, + VideoInfo, get_video_frames_generator, process_video, ) -from supervision.video.core import Video -from supervision.video.utils import VideoInfo __all__ = [ "LMM", @@ -193,7 +192,6 @@ "TriangleAnnotator", "VertexAnnotator", "VertexLabelAnnotator", - "Video", "VideoInfo", "VideoSink", "approximate_polygon", diff --git a/supervision/video/__init__.py b/supervision/video/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py deleted file mode 100644 index 14854ed65a..0000000000 --- a/supervision/video/backend/base.py +++ /dev/null @@ -1,85 +0,0 @@ -from abc import ABC, abstractmethod -from collections.abc import Callable - -import numpy as np - -from supervision.video.utils import VideoInfo - - -class BaseBackend(ABC): - def __init__(self): - self.cap = None - self.video_info = None - self.writer = None - self.path = None - - @abstractmethod - def get_sink( - self, target_path: str, video_info: VideoInfo, codec: str = "mp4v" - ) -> "BaseWriter": - pass - - @abstractmethod - def open(self, path: str) -> None: - pass - - @abstractmethod - def isOpened(self) -> bool: - pass - - @abstractmethod - def _set_video_info(self) -> VideoInfo: - pass - - @abstractmethod - def info(self) -> VideoInfo: - pass - - @abstractmethod - def read(self) -> tuple[bool, np.ndarray]: - pass - - @abstractmethod - def grab(self) -> bool: - pass - - @abstractmethod - def seek(self, frame_idx: int) -> None: - pass - - @abstractmethod - def frames( - self, - *, - start: int = 0, - end: int = None, - stride: int = 1, - resolution_wh: tuple[int, int] = None, - ): - pass - - @abstractmethod - def release(self) -> None: - pass - - @abstractmethod - def save( - self, - target_path: str, - callback: Callable[[np.ndarray, int], np.ndarray], - fps: int = None, - progress_message: str = "Processing video", - show_progress: bool = False, - codec: str = "mp4v", - ): - pass - - -class BaseWriter(ABC): - @abstractmethod - def write(self, frame: np.ndarray) -> None: - pass - - @abstractmethod - def close(self) -> None: - pass diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py deleted file mode 100644 index ac8452d5ab..0000000000 --- a/supervision/video/backend/openCV.py +++ /dev/null @@ -1,314 +0,0 @@ -from collections.abc import Callable - -import cv2 -import numpy as np -from tqdm.auto import tqdm - -from supervision.video.backend.base import BaseBackend, BaseWriter -from supervision.video.utils import SOURCE_TYPE, VideoInfo - - -class OpenCVBackend(BaseBackend): - """ - OpenCV implementation of the Backend interface. - Handles video capture, frame reading, seeking, and writing operations using OpenCV. - """ - - def __init__(self): - """Initialize the OpenCV backend with empty video capture and writer objects.""" - super().__init__() - self.cap = None - self.video_info = None - self.writer = None - self.path = None - - def get_sink(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"): - """Create a video writer for saving frames using OpenCV. - - Args: - target_path (str): Path where the video will be saved. - video_info (VideoInfo): Video information containing resolution and FPS. - codec (str, optional): FourCC code for video codec. Defaults to "mp4v". - - Returns: - OpenCVWriter: A video writer object for writing frames. - """ - return OpenCVWriter( - target_path, video_info.fps, video_info.resolution_wh, codec - ) - - def open(self, path: str) -> None: - """ - Open a video source and initialize the video capture object. - - Args: - path (str): Path to the video file, RTSP URL, or camera index. - - Raises: - RuntimeError: If unable to open the video source. - ValueError: If the source type is not supported. - """ - self.cap = cv2.VideoCapture(path) - self.path = path - - if not self.cap.isOpened(): - raise RuntimeError(f"Cannot open video source: {path}") - self.video_info = self._set_video_info() - - if isinstance(path, int): - self.video_info.source_type = SOURCE_TYPE.WEBCAM - elif isinstance(path, str): - self.video_info.source_type = ( - SOURCE_TYPE.RTSP - if path.lower().startswith("rtsp://") - else SOURCE_TYPE.VIDEO_FILE - ) - else: - raise ValueError("Unsupported source type") - - def isOpened(self) -> bool: - """Check if the video source is opened successfully. - - Returns: - bool: True if the video source is opened, False otherwise. - """ - return self.cap.isOpened() - - def _set_video_info(self) -> VideoInfo: - """Set up video information from the opened video source. - - Returns: - VideoInfo: Object containing video properties like width, height, fps, etc. - - Raises: - RuntimeError: If the video source is not opened yet. - """ - if not self.isOpened(): - raise RuntimeError("Video not opened yet.") - width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - fps = round(self.cap.get(cv2.CAP_PROP_FPS)) - total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) - return VideoInfo(width, height, fps, total_frames) - - def info(self) -> VideoInfo: - """Get video information. - - Returns: - VideoInfo: Object containing video properties. - - Raises: - RuntimeError: If the video source is not opened yet. - """ - if not self.isOpened(): - raise RuntimeError("Video not opened yet.") - return self.video_info - - def read(self) -> tuple[bool, np.ndarray]: - """Read a frame from the video source. - - Returns: - tuple[bool, np.ndarray]: A tuple containing: - - bool: True if frame was successfully read - - np.ndarray: The video frame in BGR format - - Raises: - RuntimeError: If the video source is not opened yet. - """ - if self.cap is None: - raise RuntimeError("Video not opened yet.") - ret, frame = self.cap.read() - return ret, frame - - def grab(self) -> bool: - """Grab a frame from video source without decoding. - - Returns: - bool: True if frame was successfully grabbed. - - Raises: - RuntimeError: If the video source is not opened yet. - """ - if self.cap is None: - raise RuntimeError("Video not opened yet.") - return self.cap.grab() - - def seek(self, frame_idx: int) -> None: - """Seek to a specific frame in the video. - - Args: - frame_idx (int): Index of the frame to seek to (0-based). - - Raises: - RuntimeError: If the video source is not opened yet. - """ - if self.cap is None: - raise RuntimeError("Video not opened yet.") - self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx) - - def release(self) -> None: - """Release the video capture resources.""" - if self.cap is not None and self.cap.isOpened(): - self.cap.release() - self.cap = None - - def frames( - self, - *, - start: int = 0, - end: int = None, - stride: int = 1, - resolution_wh: tuple[int, int] = None, - ): - """Generate frames from the video source. - - Args: - start (int, optional): Starting frame index. Defaults to 0. - end (int, optional): Ending frame index. Defaults to None. - stride (int, optional): Number of frames to skip. Defaults to 1. - resolution_wh (tuple[int, int], optional): Target resolution - (width, height). If provided, frames will be resized. Defaults to None. - - Yields: - np.ndarray: Video frames in BGR format. Raises: - RuntimeError: If the video source is not opened yet. - """ - if self.cap is None: - raise RuntimeError("Video not opened yet.") - - total_frames = self.video_info.total_frames if self.video_info else 0 - is_live_stream = total_frames <= 0 - - if is_live_stream: - while True: - for _ in range(stride - 1): - if not self.grab(): - return - ret, frame = self.read() - if not ret: - return - if resolution_wh is not None: - frame = cv2.resize(frame, resolution_wh) - yield frame - else: - if end is None or end > total_frames: - end = total_frames - - frame_idx = start - while frame_idx < end: - self.seek(frame_idx) - ret, frame = self.read() - if not ret: - break - if resolution_wh is not None: - frame = cv2.resize(frame, resolution_wh) - yield frame - frame_idx += stride - - def save( - self, - target_path: str, - callback: Callable[[np.ndarray, int], np.ndarray], - fps: int = None, - progress_message: str = "Processing video", - show_progress: bool = False, - codec: str = "mp4v", - ): - """Save processed video frames to a file with audio preservation. - - Args: - target_path (str): Path where the processed video will be saved. - callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes - each frame. Takes frame and index as input, returns processed frame. - fps (int, optional): Output video FPS. If None, uses source FPS. - progress_message (str, optional): Message to show in progress bar. - show_progress (bool, optional): Whether to show progress bar. - - Raises: - RuntimeError: If video source is not opened. - ValueError: If source is not a video file. - """ - if self.cap is None: - raise RuntimeError("Video not opened yet.") - - if self.video_info.source_type != SOURCE_TYPE.VIDEO_FILE: - raise ValueError("Only video files can be saved.") - - if self.writer is not None: - self.writer.close() - self.writer = None - - if fps is None: - fps = self.video_info.fps - - self.writer = OpenCVWriter( - target_path, fps, self.video_info.resolution_wh, codec - ) - total_frames = self.video_info.total_frames - frames_generator = self.frames() - for index, frame in enumerate( - tqdm( - frames_generator, - total=total_frames, - disable=not show_progress, - desc=progress_message, - ) - ): - result_frame = callback(frame, index) - self.writer.write(frame=result_frame) - - self.writer.close() - - -class OpenCVWriter(BaseWriter): - """A class to handle video writing operations using OpenCV's VideoWriter. - - This class provides an interface to write frames to a video file using OpenCV, - with support for different codecs and automatic fallback to mp4v if the specified - codec fails. - """ - - def __init__( - self, - filename: str, - fps: int, - frame_size: tuple[int, int], - codec: str = "mp4v", - ): - """Initialize the video writer. - - Args: - filename (str): Path to the output video file. - fps (int): Frames per second for the output video. - frame_size (tuple[int, int]): Width and height of the output video frames. - codec (str, optional): FourCC code for the video codec. Defaults to "mp4v". - - Raises: - RuntimeError: If the video writer cannot be initialized. - """ - try: - fourcc_int = cv2.VideoWriter_fourcc(*codec) - self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size) - except Exception: - fourcc_int = cv2.VideoWriter_fourcc(*"mp4v") - self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size) - if not self.writer.isOpened(): - raise RuntimeError(f"Cannot open video writer for file: {filename}") - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, traceback): - self.close() - - def write(self, frame: np.ndarray) -> None: - """Write a frame to the video file. - - Args: - frame (np.ndarray): The frame to write, in BGR format. - """ - self.writer.write(frame) - - def close(self) -> None: - """Release the video writer resources.""" - self.writer.release() diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/supervision/video/core.py b/supervision/video/core.py deleted file mode 100644 index 410890207e..0000000000 --- a/supervision/video/core.py +++ /dev/null @@ -1,105 +0,0 @@ -from collections.abc import Callable - -import numpy as np - -from supervision.video.backend.base import BaseBackend, BaseWriter -from supervision.video.backend.openCV import OpenCVBackend -from supervision.video.utils import VideoInfo - - -class Video: - """High-level interface for video operations. - - This class provides a convenient interface for video operations including - reading frames, saving processed videos, and video information access. - """ - - info: VideoInfo - source: str | int - backend: BaseBackend - - def __init__( - self, source: str | int, info: VideoInfo = None, backend: str = "opencv" - ): - if backend == "opencv": - self.backend = OpenCVBackend() - - self.backend.open(source) - self.info = self.backend.video_info - self.source = source - - def __iter__(self): - """Make the Video class iterable over frames. - - Returns: - Generator: A generator yielding video frames. - """ - return self.backend.frames() - - def sink( - self, target_path: str, info: VideoInfo, codec: str = "mp4v" - ) -> BaseWriter: - """Create a video writer for saving frames. - - Args: - target_path (str): Path where the video will be saved. - info (VideoInfo): Video information containing resolution and FPS. - codec (str, optional): FourCC code for video codec. Defaults to "mp4v". - - Returns: - Writer: A video writer object for writing frames. - """ - return self.backend.get_sink(target_path, info, codec) - - def frames( - self, - stride: int = 1, - start: int = 0, - end: int = None, - resolution_wh: tuple[int, int] = None, - ): - """Generate frames from the video. - - Args: - stride (int, optional): Number of frames to skip. Defaults to 1. - start (int, optional): Starting frame index. Defaults to 0. - end (int, optional): Ending frame index. Defaults to None. - resolution_wh (tuple[int, int], optional): Target resolution - (width, height). If provided, frames will be resized. Defaults to None. - - Returns: - Generator: A generator yielding video frames. - """ - return self.backend.frames( - stride=stride, start=start, end=end, resolution_wh=resolution_wh - ) - - def save( - self, - target_path: str, - callback: Callable[[np.ndarray, int], np.ndarray], - fps: int = None, - progress_message: str = "Processing video", - show_progress: bool = False, - codec: str = "mp4v", - ): - """Save processed video frames to a file. - - Args: - target_path (str): Path where the processed video will be saved. - callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes - each frame. Takes frame and index as input, returns processed frame. - fps (int, optional): Output video FPS. - progress_message (str, optional): Message to show in progress bar. - Defaults to "Processing video". - show_progress (bool, optional): Whether to show progress bar. - Defaults to False. - """ - self.backend.save( - target_path=target_path, - callback=callback, - fps=fps, - progress_message=progress_message, - show_progress=show_progress, - codec=codec, - ) diff --git a/supervision/video/utils.py b/supervision/video/utils.py deleted file mode 100644 index fa5cbe8685..0000000000 --- a/supervision/video/utils.py +++ /dev/null @@ -1,91 +0,0 @@ -from dataclasses import dataclass -from enum import Enum - -import cv2 - - -class SOURCE_TYPE(Enum): - VIDEO_FILE = "VIDEO_FILE" - WEBCAM = "WEBCAM" - RTSP = "RTSP" - - -@dataclass -class VideoInfo: - """ - A class to store video information, including width, height, fps and - total number of frames. - - Attributes: - width (int): width of the video in pixels - height (int): height of the video in pixels - fps (int): frames per second of the video - total_frames (Optional[int]): total number of frames in the video, - default is None - source_type (Optional[SOURCE_TYPE]): source type of the video, - default is None - - Examples: - ```python - import supervision as sv - - video_info = sv.VideoInfo.from_video_path(video_path=) - - video_info - # VideoInfo(width=3840, height=2160, fps=25, total_frames=538) - - video_info.resolution_wh - # (3840, 2160) - ``` - """ - - width: int - height: int - fps: int - total_frames: int = None - source_type: SOURCE_TYPE = None - - @classmethod - def from_video_path(cls, video_path: str) -> "VideoInfo": - """Create VideoInfo from a video file path. - - Args: - video_path (str): Path to the video file. - - Returns: - VideoInfo: Video info containing width, height, fps, and total frames. - - Raises: - ValueError: If video cannot be opened or has invalid properties. - """ - video = cv2.VideoCapture(video_path) - if not video.isOpened(): - raise ValueError(f"Could not open video at {video_path}") - - try: - width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) - if width <= 0 or height <= 0: - raise ValueError(f"Invalid video dimensions: {width}x{height}") - - fps = video.get(cv2.CAP_PROP_FPS) - if fps <= 0: - fps = 30 # Default to 30fps if invalid - fps = round(fps) - - total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) - if total_frames < 0: - total_frames = None # Some video formats may not report frame count - finally: - video.release() - - return VideoInfo(width, height, fps, total_frames) - - @property - def resolution_wh(self) -> tuple[int, int]: - """Get the video resolution as (width, height). - - Returns: - Tuple[int, int]: Video dimensions as (width, height). - """ - return self.width, self.height From af3db7f7d4a43c7d8c9b9ffbebb6f141276e8b60 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 9 Aug 2025 05:32:19 +0000 Subject: [PATCH 045/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supervision/__init__.py b/supervision/__init__.py index 53f98a8136..ab45651ac9 100644 --- a/supervision/__init__.py +++ b/supervision/__init__.py @@ -130,8 +130,8 @@ from supervision.utils.notebook import plot_image, plot_images_grid from supervision.utils.video import ( FPSMonitor, - VideoSink, VideoInfo, + VideoSink, get_video_frames_generator, process_video, ) From e2edb9603dfd5a8c1352349bbd0fc67911f82937 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 01:34:58 -0400 Subject: [PATCH 046/128] ADD: Added video folder --- supervision/video/__init__.py | 0 supervision/video/backend/base.py | 85 ++++++++ supervision/video/backend/openCV.py | 314 ++++++++++++++++++++++++++++ supervision/video/backend/pyAV.py | 0 supervision/video/core.py | 105 ++++++++++ supervision/video/utils.py | 91 ++++++++ 6 files changed, 595 insertions(+) create mode 100644 supervision/video/__init__.py create mode 100644 supervision/video/backend/base.py create mode 100644 supervision/video/backend/openCV.py create mode 100644 supervision/video/backend/pyAV.py create mode 100644 supervision/video/core.py create mode 100644 supervision/video/utils.py diff --git a/supervision/video/__init__.py b/supervision/video/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py new file mode 100644 index 0000000000..14854ed65a --- /dev/null +++ b/supervision/video/backend/base.py @@ -0,0 +1,85 @@ +from abc import ABC, abstractmethod +from collections.abc import Callable + +import numpy as np + +from supervision.video.utils import VideoInfo + + +class BaseBackend(ABC): + def __init__(self): + self.cap = None + self.video_info = None + self.writer = None + self.path = None + + @abstractmethod + def get_sink( + self, target_path: str, video_info: VideoInfo, codec: str = "mp4v" + ) -> "BaseWriter": + pass + + @abstractmethod + def open(self, path: str) -> None: + pass + + @abstractmethod + def isOpened(self) -> bool: + pass + + @abstractmethod + def _set_video_info(self) -> VideoInfo: + pass + + @abstractmethod + def info(self) -> VideoInfo: + pass + + @abstractmethod + def read(self) -> tuple[bool, np.ndarray]: + pass + + @abstractmethod + def grab(self) -> bool: + pass + + @abstractmethod + def seek(self, frame_idx: int) -> None: + pass + + @abstractmethod + def frames( + self, + *, + start: int = 0, + end: int = None, + stride: int = 1, + resolution_wh: tuple[int, int] = None, + ): + pass + + @abstractmethod + def release(self) -> None: + pass + + @abstractmethod + def save( + self, + target_path: str, + callback: Callable[[np.ndarray, int], np.ndarray], + fps: int = None, + progress_message: str = "Processing video", + show_progress: bool = False, + codec: str = "mp4v", + ): + pass + + +class BaseWriter(ABC): + @abstractmethod + def write(self, frame: np.ndarray) -> None: + pass + + @abstractmethod + def close(self) -> None: + pass diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py new file mode 100644 index 0000000000..ac8452d5ab --- /dev/null +++ b/supervision/video/backend/openCV.py @@ -0,0 +1,314 @@ +from collections.abc import Callable + +import cv2 +import numpy as np +from tqdm.auto import tqdm + +from supervision.video.backend.base import BaseBackend, BaseWriter +from supervision.video.utils import SOURCE_TYPE, VideoInfo + + +class OpenCVBackend(BaseBackend): + """ + OpenCV implementation of the Backend interface. + Handles video capture, frame reading, seeking, and writing operations using OpenCV. + """ + + def __init__(self): + """Initialize the OpenCV backend with empty video capture and writer objects.""" + super().__init__() + self.cap = None + self.video_info = None + self.writer = None + self.path = None + + def get_sink(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"): + """Create a video writer for saving frames using OpenCV. + + Args: + target_path (str): Path where the video will be saved. + video_info (VideoInfo): Video information containing resolution and FPS. + codec (str, optional): FourCC code for video codec. Defaults to "mp4v". + + Returns: + OpenCVWriter: A video writer object for writing frames. + """ + return OpenCVWriter( + target_path, video_info.fps, video_info.resolution_wh, codec + ) + + def open(self, path: str) -> None: + """ + Open a video source and initialize the video capture object. + + Args: + path (str): Path to the video file, RTSP URL, or camera index. + + Raises: + RuntimeError: If unable to open the video source. + ValueError: If the source type is not supported. + """ + self.cap = cv2.VideoCapture(path) + self.path = path + + if not self.cap.isOpened(): + raise RuntimeError(f"Cannot open video source: {path}") + self.video_info = self._set_video_info() + + if isinstance(path, int): + self.video_info.source_type = SOURCE_TYPE.WEBCAM + elif isinstance(path, str): + self.video_info.source_type = ( + SOURCE_TYPE.RTSP + if path.lower().startswith("rtsp://") + else SOURCE_TYPE.VIDEO_FILE + ) + else: + raise ValueError("Unsupported source type") + + def isOpened(self) -> bool: + """Check if the video source is opened successfully. + + Returns: + bool: True if the video source is opened, False otherwise. + """ + return self.cap.isOpened() + + def _set_video_info(self) -> VideoInfo: + """Set up video information from the opened video source. + + Returns: + VideoInfo: Object containing video properties like width, height, fps, etc. + + Raises: + RuntimeError: If the video source is not opened yet. + """ + if not self.isOpened(): + raise RuntimeError("Video not opened yet.") + width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = round(self.cap.get(cv2.CAP_PROP_FPS)) + total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) + return VideoInfo(width, height, fps, total_frames) + + def info(self) -> VideoInfo: + """Get video information. + + Returns: + VideoInfo: Object containing video properties. + + Raises: + RuntimeError: If the video source is not opened yet. + """ + if not self.isOpened(): + raise RuntimeError("Video not opened yet.") + return self.video_info + + def read(self) -> tuple[bool, np.ndarray]: + """Read a frame from the video source. + + Returns: + tuple[bool, np.ndarray]: A tuple containing: + - bool: True if frame was successfully read + - np.ndarray: The video frame in BGR format + + Raises: + RuntimeError: If the video source is not opened yet. + """ + if self.cap is None: + raise RuntimeError("Video not opened yet.") + ret, frame = self.cap.read() + return ret, frame + + def grab(self) -> bool: + """Grab a frame from video source without decoding. + + Returns: + bool: True if frame was successfully grabbed. + + Raises: + RuntimeError: If the video source is not opened yet. + """ + if self.cap is None: + raise RuntimeError("Video not opened yet.") + return self.cap.grab() + + def seek(self, frame_idx: int) -> None: + """Seek to a specific frame in the video. + + Args: + frame_idx (int): Index of the frame to seek to (0-based). + + Raises: + RuntimeError: If the video source is not opened yet. + """ + if self.cap is None: + raise RuntimeError("Video not opened yet.") + self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx) + + def release(self) -> None: + """Release the video capture resources.""" + if self.cap is not None and self.cap.isOpened(): + self.cap.release() + self.cap = None + + def frames( + self, + *, + start: int = 0, + end: int = None, + stride: int = 1, + resolution_wh: tuple[int, int] = None, + ): + """Generate frames from the video source. + + Args: + start (int, optional): Starting frame index. Defaults to 0. + end (int, optional): Ending frame index. Defaults to None. + stride (int, optional): Number of frames to skip. Defaults to 1. + resolution_wh (tuple[int, int], optional): Target resolution + (width, height). If provided, frames will be resized. Defaults to None. + + Yields: + np.ndarray: Video frames in BGR format. Raises: + RuntimeError: If the video source is not opened yet. + """ + if self.cap is None: + raise RuntimeError("Video not opened yet.") + + total_frames = self.video_info.total_frames if self.video_info else 0 + is_live_stream = total_frames <= 0 + + if is_live_stream: + while True: + for _ in range(stride - 1): + if not self.grab(): + return + ret, frame = self.read() + if not ret: + return + if resolution_wh is not None: + frame = cv2.resize(frame, resolution_wh) + yield frame + else: + if end is None or end > total_frames: + end = total_frames + + frame_idx = start + while frame_idx < end: + self.seek(frame_idx) + ret, frame = self.read() + if not ret: + break + if resolution_wh is not None: + frame = cv2.resize(frame, resolution_wh) + yield frame + frame_idx += stride + + def save( + self, + target_path: str, + callback: Callable[[np.ndarray, int], np.ndarray], + fps: int = None, + progress_message: str = "Processing video", + show_progress: bool = False, + codec: str = "mp4v", + ): + """Save processed video frames to a file with audio preservation. + + Args: + target_path (str): Path where the processed video will be saved. + callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes + each frame. Takes frame and index as input, returns processed frame. + fps (int, optional): Output video FPS. If None, uses source FPS. + progress_message (str, optional): Message to show in progress bar. + show_progress (bool, optional): Whether to show progress bar. + + Raises: + RuntimeError: If video source is not opened. + ValueError: If source is not a video file. + """ + if self.cap is None: + raise RuntimeError("Video not opened yet.") + + if self.video_info.source_type != SOURCE_TYPE.VIDEO_FILE: + raise ValueError("Only video files can be saved.") + + if self.writer is not None: + self.writer.close() + self.writer = None + + if fps is None: + fps = self.video_info.fps + + self.writer = OpenCVWriter( + target_path, fps, self.video_info.resolution_wh, codec + ) + total_frames = self.video_info.total_frames + frames_generator = self.frames() + for index, frame in enumerate( + tqdm( + frames_generator, + total=total_frames, + disable=not show_progress, + desc=progress_message, + ) + ): + result_frame = callback(frame, index) + self.writer.write(frame=result_frame) + + self.writer.close() + + +class OpenCVWriter(BaseWriter): + """A class to handle video writing operations using OpenCV's VideoWriter. + + This class provides an interface to write frames to a video file using OpenCV, + with support for different codecs and automatic fallback to mp4v if the specified + codec fails. + """ + + def __init__( + self, + filename: str, + fps: int, + frame_size: tuple[int, int], + codec: str = "mp4v", + ): + """Initialize the video writer. + + Args: + filename (str): Path to the output video file. + fps (int): Frames per second for the output video. + frame_size (tuple[int, int]): Width and height of the output video frames. + codec (str, optional): FourCC code for the video codec. Defaults to "mp4v". + + Raises: + RuntimeError: If the video writer cannot be initialized. + """ + try: + fourcc_int = cv2.VideoWriter_fourcc(*codec) + self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size) + except Exception: + fourcc_int = cv2.VideoWriter_fourcc(*"mp4v") + self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size) + if not self.writer.isOpened(): + raise RuntimeError(f"Cannot open video writer for file: {filename}") + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + + def write(self, frame: np.ndarray) -> None: + """Write a frame to the video file. + + Args: + frame (np.ndarray): The frame to write, in BGR format. + """ + self.writer.write(frame) + + def close(self) -> None: + """Release the video writer resources.""" + self.writer.release() diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/supervision/video/core.py b/supervision/video/core.py new file mode 100644 index 0000000000..410890207e --- /dev/null +++ b/supervision/video/core.py @@ -0,0 +1,105 @@ +from collections.abc import Callable + +import numpy as np + +from supervision.video.backend.base import BaseBackend, BaseWriter +from supervision.video.backend.openCV import OpenCVBackend +from supervision.video.utils import VideoInfo + + +class Video: + """High-level interface for video operations. + + This class provides a convenient interface for video operations including + reading frames, saving processed videos, and video information access. + """ + + info: VideoInfo + source: str | int + backend: BaseBackend + + def __init__( + self, source: str | int, info: VideoInfo = None, backend: str = "opencv" + ): + if backend == "opencv": + self.backend = OpenCVBackend() + + self.backend.open(source) + self.info = self.backend.video_info + self.source = source + + def __iter__(self): + """Make the Video class iterable over frames. + + Returns: + Generator: A generator yielding video frames. + """ + return self.backend.frames() + + def sink( + self, target_path: str, info: VideoInfo, codec: str = "mp4v" + ) -> BaseWriter: + """Create a video writer for saving frames. + + Args: + target_path (str): Path where the video will be saved. + info (VideoInfo): Video information containing resolution and FPS. + codec (str, optional): FourCC code for video codec. Defaults to "mp4v". + + Returns: + Writer: A video writer object for writing frames. + """ + return self.backend.get_sink(target_path, info, codec) + + def frames( + self, + stride: int = 1, + start: int = 0, + end: int = None, + resolution_wh: tuple[int, int] = None, + ): + """Generate frames from the video. + + Args: + stride (int, optional): Number of frames to skip. Defaults to 1. + start (int, optional): Starting frame index. Defaults to 0. + end (int, optional): Ending frame index. Defaults to None. + resolution_wh (tuple[int, int], optional): Target resolution + (width, height). If provided, frames will be resized. Defaults to None. + + Returns: + Generator: A generator yielding video frames. + """ + return self.backend.frames( + stride=stride, start=start, end=end, resolution_wh=resolution_wh + ) + + def save( + self, + target_path: str, + callback: Callable[[np.ndarray, int], np.ndarray], + fps: int = None, + progress_message: str = "Processing video", + show_progress: bool = False, + codec: str = "mp4v", + ): + """Save processed video frames to a file. + + Args: + target_path (str): Path where the processed video will be saved. + callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes + each frame. Takes frame and index as input, returns processed frame. + fps (int, optional): Output video FPS. + progress_message (str, optional): Message to show in progress bar. + Defaults to "Processing video". + show_progress (bool, optional): Whether to show progress bar. + Defaults to False. + """ + self.backend.save( + target_path=target_path, + callback=callback, + fps=fps, + progress_message=progress_message, + show_progress=show_progress, + codec=codec, + ) diff --git a/supervision/video/utils.py b/supervision/video/utils.py new file mode 100644 index 0000000000..fa5cbe8685 --- /dev/null +++ b/supervision/video/utils.py @@ -0,0 +1,91 @@ +from dataclasses import dataclass +from enum import Enum + +import cv2 + + +class SOURCE_TYPE(Enum): + VIDEO_FILE = "VIDEO_FILE" + WEBCAM = "WEBCAM" + RTSP = "RTSP" + + +@dataclass +class VideoInfo: + """ + A class to store video information, including width, height, fps and + total number of frames. + + Attributes: + width (int): width of the video in pixels + height (int): height of the video in pixels + fps (int): frames per second of the video + total_frames (Optional[int]): total number of frames in the video, + default is None + source_type (Optional[SOURCE_TYPE]): source type of the video, + default is None + + Examples: + ```python + import supervision as sv + + video_info = sv.VideoInfo.from_video_path(video_path=) + + video_info + # VideoInfo(width=3840, height=2160, fps=25, total_frames=538) + + video_info.resolution_wh + # (3840, 2160) + ``` + """ + + width: int + height: int + fps: int + total_frames: int = None + source_type: SOURCE_TYPE = None + + @classmethod + def from_video_path(cls, video_path: str) -> "VideoInfo": + """Create VideoInfo from a video file path. + + Args: + video_path (str): Path to the video file. + + Returns: + VideoInfo: Video info containing width, height, fps, and total frames. + + Raises: + ValueError: If video cannot be opened or has invalid properties. + """ + video = cv2.VideoCapture(video_path) + if not video.isOpened(): + raise ValueError(f"Could not open video at {video_path}") + + try: + width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + if width <= 0 or height <= 0: + raise ValueError(f"Invalid video dimensions: {width}x{height}") + + fps = video.get(cv2.CAP_PROP_FPS) + if fps <= 0: + fps = 30 # Default to 30fps if invalid + fps = round(fps) + + total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) + if total_frames < 0: + total_frames = None # Some video formats may not report frame count + finally: + video.release() + + return VideoInfo(width, height, fps, total_frames) + + @property + def resolution_wh(self) -> tuple[int, int]: + """Get the video resolution as (width, height). + + Returns: + Tuple[int, int]: Video dimensions as (width, height). + """ + return self.width, self.height From ed5e15b254eaf72251106f260cba0497cb6a2d90 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 01:37:43 -0400 Subject: [PATCH 047/128] UPDATE: Revert typings --- supervision/video/backend/base.py | 14 ++++++------- supervision/video/backend/openCV.py | 24 ++++++++++------------ supervision/video/core.py | 31 ++++++++++++++++------------- supervision/video/utils.py | 5 +++-- 4 files changed, 37 insertions(+), 37 deletions(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 14854ed65a..27a5797a89 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -1,11 +1,9 @@ from abc import ABC, abstractmethod -from collections.abc import Callable - import numpy as np +from collections.abc import Callable, Generator from supervision.video.utils import VideoInfo - class BaseBackend(ABC): def __init__(self): self.cap = None @@ -46,15 +44,15 @@ def grab(self) -> bool: @abstractmethod def seek(self, frame_idx: int) -> None: pass - + @abstractmethod def frames( self, *, start: int = 0, - end: int = None, + end: int | None = None, stride: int = 1, - resolution_wh: tuple[int, int] = None, + resolution_wh: tuple[int, int] | None = None, ): pass @@ -67,10 +65,10 @@ def save( self, target_path: str, callback: Callable[[np.ndarray, int], np.ndarray], - fps: int = None, + fps: int | None = None, progress_message: str = "Processing video", show_progress: bool = False, - codec: str = "mp4v", + codec: str = "mp4v" ): pass diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index ac8452d5ab..f31716b24a 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -1,12 +1,10 @@ -from collections.abc import Callable +from supervision.video.backend.base import BaseBackend, BaseWriter +from supervision.video.utils import SOURCE_TYPE, VideoInfo import cv2 import numpy as np from tqdm.auto import tqdm - -from supervision.video.backend.base import BaseBackend, BaseWriter -from supervision.video.utils import SOURCE_TYPE, VideoInfo - +from typing import Callable class OpenCVBackend(BaseBackend): """ @@ -156,17 +154,17 @@ def frames( self, *, start: int = 0, - end: int = None, + end: int | None = None, stride: int = 1, - resolution_wh: tuple[int, int] = None, + resolution_wh: tuple[int, int] | None = None, ): """Generate frames from the video source. Args: start (int, optional): Starting frame index. Defaults to 0. - end (int, optional): Ending frame index. Defaults to None. + end (int | None, optional): Ending frame index. Defaults to None. stride (int, optional): Number of frames to skip. Defaults to 1. - resolution_wh (tuple[int, int], optional): Target resolution + resolution_wh (tuple[int, int] | None, optional): Target resolution (width, height). If provided, frames will be resized. Defaults to None. Yields: @@ -209,10 +207,10 @@ def save( self, target_path: str, callback: Callable[[np.ndarray, int], np.ndarray], - fps: int = None, + fps: int | None = None, progress_message: str = "Processing video", show_progress: bool = False, - codec: str = "mp4v", + codec: str = "mp4v" ): """Save processed video frames to a file with audio preservation. @@ -220,7 +218,7 @@ def save( target_path (str): Path where the processed video will be saved. callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes each frame. Takes frame and index as input, returns processed frame. - fps (int, optional): Output video FPS. If None, uses source FPS. + fps (int | None, optional): Output video FPS. If None, uses source FPS. progress_message (str, optional): Message to show in progress bar. show_progress (bool, optional): Whether to show progress bar. @@ -237,7 +235,7 @@ def save( if self.writer is not None: self.writer.close() self.writer = None - + if fps is None: fps = self.video_info.fps diff --git a/supervision/video/core.py b/supervision/video/core.py index 410890207e..9a2921a8b9 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -1,10 +1,14 @@ -from collections.abc import Callable +from collections.abc import Callable, Generator + +import cv2 import numpy as np +from tqdm.auto import tqdm +from supervision.video.utils import VideoInfo from supervision.video.backend.base import BaseBackend, BaseWriter + from supervision.video.backend.openCV import OpenCVBackend -from supervision.video.utils import VideoInfo class Video: @@ -19,7 +23,7 @@ class Video: backend: BaseBackend def __init__( - self, source: str | int, info: VideoInfo = None, backend: str = "opencv" + self, source: str | int, info: VideoInfo | None = None, backend: str = "opencv" ): if backend == "opencv": self.backend = OpenCVBackend() @@ -36,9 +40,7 @@ def __iter__(self): """ return self.backend.frames() - def sink( - self, target_path: str, info: VideoInfo, codec: str = "mp4v" - ) -> BaseWriter: + def sink(self, target_path: str, info: VideoInfo, codec: str = "mp4v") -> BaseWriter: """Create a video writer for saving frames. Args: @@ -55,16 +57,16 @@ def frames( self, stride: int = 1, start: int = 0, - end: int = None, - resolution_wh: tuple[int, int] = None, + end: int | None = None, + resolution_wh: tuple[int, int] | None = None, ): """Generate frames from the video. Args: stride (int, optional): Number of frames to skip. Defaults to 1. start (int, optional): Starting frame index. Defaults to 0. - end (int, optional): Ending frame index. Defaults to None. - resolution_wh (tuple[int, int], optional): Target resolution + end (int | None, optional): Ending frame index. Defaults to None. + resolution_wh (tuple[int, int] | None, optional): Target resolution (width, height). If provided, frames will be resized. Defaults to None. Returns: @@ -78,10 +80,10 @@ def save( self, target_path: str, callback: Callable[[np.ndarray, int], np.ndarray], - fps: int = None, + fps: int | None = None, progress_message: str = "Processing video", show_progress: bool = False, - codec: str = "mp4v", + codec: str = "mp4v" ): """Save processed video frames to a file. @@ -89,7 +91,7 @@ def save( target_path (str): Path where the processed video will be saved. callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes each frame. Takes frame and index as input, returns processed frame. - fps (int, optional): Output video FPS. + fps (int | None, optional): Output video FPS. progress_message (str, optional): Message to show in progress bar. Defaults to "Processing video". show_progress (bool, optional): Whether to show progress bar. @@ -101,5 +103,6 @@ def save( fps=fps, progress_message=progress_message, show_progress=show_progress, - codec=codec, + codec=codec ) + diff --git a/supervision/video/utils.py b/supervision/video/utils.py index fa5cbe8685..64ec2fdbe5 100644 --- a/supervision/video/utils.py +++ b/supervision/video/utils.py @@ -1,5 +1,6 @@ from dataclasses import dataclass from enum import Enum +from typing import Optional, Tuple import cv2 @@ -42,8 +43,8 @@ class VideoInfo: width: int height: int fps: int - total_frames: int = None - source_type: SOURCE_TYPE = None + total_frames: int | None = None + source_type: SOURCE_TYPE | None = None @classmethod def from_video_path(cls, video_path: str) -> "VideoInfo": From fd669252b007a047ba8ebfbfe96c2493ead0003f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 9 Aug 2025 05:38:08 +0000 Subject: [PATCH 048/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/backend/base.py | 8 +++++--- supervision/video/backend/openCV.py | 12 +++++++----- supervision/video/core.py | 17 +++++++---------- supervision/video/utils.py | 1 - 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 27a5797a89..5ee7553396 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -1,9 +1,11 @@ from abc import ABC, abstractmethod +from collections.abc import Callable + import numpy as np -from collections.abc import Callable, Generator from supervision.video.utils import VideoInfo + class BaseBackend(ABC): def __init__(self): self.cap = None @@ -44,7 +46,7 @@ def grab(self) -> bool: @abstractmethod def seek(self, frame_idx: int) -> None: pass - + @abstractmethod def frames( self, @@ -68,7 +70,7 @@ def save( fps: int | None = None, progress_message: str = "Processing video", show_progress: bool = False, - codec: str = "mp4v" + codec: str = "mp4v", ): pass diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index f31716b24a..88dfedf6f7 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -1,10 +1,12 @@ -from supervision.video.backend.base import BaseBackend, BaseWriter -from supervision.video.utils import SOURCE_TYPE, VideoInfo +from collections.abc import Callable import cv2 import numpy as np from tqdm.auto import tqdm -from typing import Callable + +from supervision.video.backend.base import BaseBackend, BaseWriter +from supervision.video.utils import SOURCE_TYPE, VideoInfo + class OpenCVBackend(BaseBackend): """ @@ -210,7 +212,7 @@ def save( fps: int | None = None, progress_message: str = "Processing video", show_progress: bool = False, - codec: str = "mp4v" + codec: str = "mp4v", ): """Save processed video frames to a file with audio preservation. @@ -235,7 +237,7 @@ def save( if self.writer is not None: self.writer.close() self.writer = None - + if fps is None: fps = self.video_info.fps diff --git a/supervision/video/core.py b/supervision/video/core.py index 9a2921a8b9..61d1b2bd38 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -1,14 +1,10 @@ +from collections.abc import Callable -from collections.abc import Callable, Generator - -import cv2 import numpy as np -from tqdm.auto import tqdm -from supervision.video.utils import VideoInfo from supervision.video.backend.base import BaseBackend, BaseWriter - from supervision.video.backend.openCV import OpenCVBackend +from supervision.video.utils import VideoInfo class Video: @@ -40,7 +36,9 @@ def __iter__(self): """ return self.backend.frames() - def sink(self, target_path: str, info: VideoInfo, codec: str = "mp4v") -> BaseWriter: + def sink( + self, target_path: str, info: VideoInfo, codec: str = "mp4v" + ) -> BaseWriter: """Create a video writer for saving frames. Args: @@ -83,7 +81,7 @@ def save( fps: int | None = None, progress_message: str = "Processing video", show_progress: bool = False, - codec: str = "mp4v" + codec: str = "mp4v", ): """Save processed video frames to a file. @@ -103,6 +101,5 @@ def save( fps=fps, progress_message=progress_message, show_progress=show_progress, - codec=codec + codec=codec, ) - diff --git a/supervision/video/utils.py b/supervision/video/utils.py index 64ec2fdbe5..aa0d430f91 100644 --- a/supervision/video/utils.py +++ b/supervision/video/utils.py @@ -1,6 +1,5 @@ from dataclasses import dataclass from enum import Enum -from typing import Optional, Tuple import cv2 From 13010e02e4e31be6531ae622c1568a1e5f488d38 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 01:39:43 -0400 Subject: [PATCH 049/128] UPDATE: sv imports --- supervision/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/supervision/__init__.py b/supervision/__init__.py index ab45651ac9..dff62b5c95 100644 --- a/supervision/__init__.py +++ b/supervision/__init__.py @@ -130,11 +130,12 @@ from supervision.utils.notebook import plot_image, plot_images_grid from supervision.utils.video import ( FPSMonitor, - VideoInfo, VideoSink, get_video_frames_generator, process_video, ) +from supervision.video.core import Video +from supervision.video.utils import VideoInfo __all__ = [ "LMM", @@ -192,6 +193,7 @@ "TriangleAnnotator", "VertexAnnotator", "VertexLabelAnnotator", + "Video", "VideoInfo", "VideoSink", "approximate_polygon", From 25e3b5fe357763a65ab358d136f3ea12b4fff4a8 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 01:45:00 -0400 Subject: [PATCH 050/128] UPDATE: default imports using __init__ --- supervision/__init__.py | 3 +-- supervision/video/__init__.py | 11 +++++++++++ supervision/video/backend/base.py | 4 +++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/supervision/__init__.py b/supervision/__init__.py index dff62b5c95..6e2e329b75 100644 --- a/supervision/__init__.py +++ b/supervision/__init__.py @@ -134,8 +134,7 @@ get_video_frames_generator, process_video, ) -from supervision.video.core import Video -from supervision.video.utils import VideoInfo +from supervision.video import Video, VideoInfo __all__ = [ "LMM", diff --git a/supervision/video/__init__.py b/supervision/video/__init__.py index e69de29bb2..4102ee5e87 100644 --- a/supervision/video/__init__.py +++ b/supervision/video/__init__.py @@ -0,0 +1,11 @@ +from supervision.video.utils import SOURCE_TYPE, VideoInfo +from supervision.video.core import Video +from supervision.video.backend.base import BaseBackend, BaseWriter + +__all__ = [ + 'Video', + 'VideoInfo', + 'SOURCE_TYPE', + 'BaseBackend', + 'BaseWriter' +] diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 5ee7553396..5c8e890dbb 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -1,9 +1,11 @@ from abc import ABC, abstractmethod from collections.abc import Callable +from typing import Optional, Tuple, TYPE_CHECKING import numpy as np -from supervision.video.utils import VideoInfo +if TYPE_CHECKING: + from supervision.video.utils import VideoInfo class BaseBackend(ABC): From 0ad6cd161dd970b4f093902bc20be68f76716010 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 9 Aug 2025 05:45:17 +0000 Subject: [PATCH 051/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/__init__.py | 12 +++--------- supervision/video/backend/base.py | 2 +- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/supervision/video/__init__.py b/supervision/video/__init__.py index 4102ee5e87..d5d5559ba8 100644 --- a/supervision/video/__init__.py +++ b/supervision/video/__init__.py @@ -1,11 +1,5 @@ -from supervision.video.utils import SOURCE_TYPE, VideoInfo -from supervision.video.core import Video from supervision.video.backend.base import BaseBackend, BaseWriter +from supervision.video.core import Video +from supervision.video.utils import SOURCE_TYPE, VideoInfo -__all__ = [ - 'Video', - 'VideoInfo', - 'SOURCE_TYPE', - 'BaseBackend', - 'BaseWriter' -] +__all__ = ["SOURCE_TYPE", "BaseBackend", "BaseWriter", "Video", "VideoInfo"] diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 5c8e890dbb..48d2f05966 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod from collections.abc import Callable -from typing import Optional, Tuple, TYPE_CHECKING +from typing import TYPE_CHECKING import numpy as np From 3e7b247daf4477ea504178768c65a677714e0c28 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 01:47:02 -0400 Subject: [PATCH 052/128] FIX: Fixed VideoInfo definition --- supervision/video/backend/base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 5c8e890dbb..8e323df069 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -4,8 +4,7 @@ import numpy as np -if TYPE_CHECKING: - from supervision.video.utils import VideoInfo +from supervision.video.utils import VideoInfo class BaseBackend(ABC): From e855b1456ccf7dae2582b7da737cabb83915c2b7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 9 Aug 2025 05:47:50 +0000 Subject: [PATCH 053/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/backend/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 2b03d27e8a..5ee7553396 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -1,6 +1,5 @@ from abc import ABC, abstractmethod from collections.abc import Callable -from typing import TYPE_CHECKING import numpy as np From 29fb6665dc593cc05b894bb7d3fec443a6b3e77e Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 01:51:08 -0400 Subject: [PATCH 054/128] FIX: Fixed futuer annotations --- supervision/video/backend/base.py | 2 ++ supervision/video/backend/openCV.py | 1 + supervision/video/backend/pyAV.py | 1 + supervision/video/core.py | 1 + supervision/video/utils.py | 1 + 5 files changed, 6 insertions(+) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 5ee7553396..47208c5ce6 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from abc import ABC, abstractmethod from collections.abc import Callable diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index 88dfedf6f7..3843c555d1 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -1,3 +1,4 @@ +from __future__ import annotations from collections.abc import Callable import cv2 diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index e69de29bb2..6c43ea250f 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -0,0 +1 @@ +from __future__ import annotations \ No newline at end of file diff --git a/supervision/video/core.py b/supervision/video/core.py index 61d1b2bd38..7c487a2900 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -1,3 +1,4 @@ +from __future__ import annotations from collections.abc import Callable import numpy as np diff --git a/supervision/video/utils.py b/supervision/video/utils.py index aa0d430f91..cb8be96a3a 100644 --- a/supervision/video/utils.py +++ b/supervision/video/utils.py @@ -1,3 +1,4 @@ +from __future__ import annotations from dataclasses import dataclass from enum import Enum From d9ab61caedbe6608360d0cd3d5a69e6198889b3e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 9 Aug 2025 05:51:37 +0000 Subject: [PATCH 055/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/backend/base.py | 2 +- supervision/video/backend/openCV.py | 1 + supervision/video/backend/pyAV.py | 2 +- supervision/video/core.py | 1 + supervision/video/utils.py | 3 ++- 5 files changed, 6 insertions(+), 3 deletions(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 47208c5ce6..943fdb3d76 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -18,7 +18,7 @@ def __init__(self): @abstractmethod def get_sink( self, target_path: str, video_info: VideoInfo, codec: str = "mp4v" - ) -> "BaseWriter": + ) -> BaseWriter: pass @abstractmethod diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index 3843c555d1..44d5121308 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -1,4 +1,5 @@ from __future__ import annotations + from collections.abc import Callable import cv2 diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index 6c43ea250f..9d48db4f9f 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -1 +1 @@ -from __future__ import annotations \ No newline at end of file +from __future__ import annotations diff --git a/supervision/video/core.py b/supervision/video/core.py index 7c487a2900..a358814c4f 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -1,4 +1,5 @@ from __future__ import annotations + from collections.abc import Callable import numpy as np diff --git a/supervision/video/utils.py b/supervision/video/utils.py index cb8be96a3a..629ce7b262 100644 --- a/supervision/video/utils.py +++ b/supervision/video/utils.py @@ -1,4 +1,5 @@ from __future__ import annotations + from dataclasses import dataclass from enum import Enum @@ -47,7 +48,7 @@ class VideoInfo: source_type: SOURCE_TYPE | None = None @classmethod - def from_video_path(cls, video_path: str) -> "VideoInfo": + def from_video_path(cls, video_path: str) -> VideoInfo: """Create VideoInfo from a video file path. Args: From 941d48777b247c39b9723e190cd6dda29ca70203 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 15:50:40 -0400 Subject: [PATCH 056/128] UPDATE: Restructure video saves --- supervision/video/backend/base.py | 13 ------- supervision/video/backend/openCV.py | 58 +---------------------------- supervision/video/core.py | 35 +++++++++++++---- 3 files changed, 29 insertions(+), 77 deletions(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 47208c5ce6..13d557fae4 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -64,19 +64,6 @@ def frames( def release(self) -> None: pass - @abstractmethod - def save( - self, - target_path: str, - callback: Callable[[np.ndarray, int], np.ndarray], - fps: int | None = None, - progress_message: str = "Processing video", - show_progress: bool = False, - codec: str = "mp4v", - ): - pass - - class BaseWriter(ABC): @abstractmethod def write(self, frame: np.ndarray) -> None: diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index 3843c555d1..b81e6c0730 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -3,7 +3,6 @@ import cv2 import numpy as np -from tqdm.auto import tqdm from supervision.video.backend.base import BaseBackend, BaseWriter from supervision.video.utils import SOURCE_TYPE, VideoInfo @@ -20,7 +19,7 @@ def __init__(self): super().__init__() self.cap = None self.video_info = None - self.writer = None + self.writer = OpenCVWriter self.path = None def get_sink(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"): @@ -205,60 +204,7 @@ def frames( frame = cv2.resize(frame, resolution_wh) yield frame frame_idx += stride - - def save( - self, - target_path: str, - callback: Callable[[np.ndarray, int], np.ndarray], - fps: int | None = None, - progress_message: str = "Processing video", - show_progress: bool = False, - codec: str = "mp4v", - ): - """Save processed video frames to a file with audio preservation. - - Args: - target_path (str): Path where the processed video will be saved. - callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes - each frame. Takes frame and index as input, returns processed frame. - fps (int | None, optional): Output video FPS. If None, uses source FPS. - progress_message (str, optional): Message to show in progress bar. - show_progress (bool, optional): Whether to show progress bar. - - Raises: - RuntimeError: If video source is not opened. - ValueError: If source is not a video file. - """ - if self.cap is None: - raise RuntimeError("Video not opened yet.") - - if self.video_info.source_type != SOURCE_TYPE.VIDEO_FILE: - raise ValueError("Only video files can be saved.") - - if self.writer is not None: - self.writer.close() - self.writer = None - - if fps is None: - fps = self.video_info.fps - - self.writer = OpenCVWriter( - target_path, fps, self.video_info.resolution_wh, codec - ) - total_frames = self.video_info.total_frames - frames_generator = self.frames() - for index, frame in enumerate( - tqdm( - frames_generator, - total=total_frames, - disable=not show_progress, - desc=progress_message, - ) - ): - result_frame = callback(frame, index) - self.writer.write(frame=result_frame) - - self.writer.close() + class OpenCVWriter(BaseWriter): diff --git a/supervision/video/core.py b/supervision/video/core.py index 7c487a2900..b7e7d6b719 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -2,10 +2,11 @@ from collections.abc import Callable import numpy as np +from tqdm.auto import tqdm from supervision.video.backend.base import BaseBackend, BaseWriter from supervision.video.backend.openCV import OpenCVBackend -from supervision.video.utils import VideoInfo +from supervision.video.utils import VideoInfo, SOURCE_TYPE class Video: @@ -96,11 +97,29 @@ def save( show_progress (bool, optional): Whether to show progress bar. Defaults to False. """ - self.backend.save( - target_path=target_path, - callback=callback, - fps=fps, - progress_message=progress_message, - show_progress=show_progress, - codec=codec, + if self.backend.cap is None: + raise RuntimeError("Video not opened yet.") + + if self.backend.video_info.source_type != SOURCE_TYPE.VIDEO_FILE: + raise ValueError("Only video files can be saved.") + + if fps is None: + fps = self.backend.video_info.fps + + writer = self.backend.writer( + target_path, fps, self.backend.video_info.resolution_wh, codec ) + total_frames = self.backend.video_info.total_frames + frames_generator = self.frames() + for index, frame in enumerate( + tqdm( + frames_generator, + total=total_frames, + disable=not show_progress, + desc=progress_message, + ) + ): + result_frame = callback(frame, index) + writer.write(frame=result_frame) + + writer.close() From 2175d2554db89ac5cc5183548fcf5b82d9a173d0 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 15:53:36 -0400 Subject: [PATCH 057/128] UPDATE: Restructure manual writer --- supervision/video/backend/base.py | 6 ------ supervision/video/backend/openCV.py | 15 --------------- supervision/video/core.py | 4 +++- 3 files changed, 3 insertions(+), 22 deletions(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 38b8012691..f8e8aa492f 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -15,12 +15,6 @@ def __init__(self): self.writer = None self.path = None - @abstractmethod - def get_sink( - self, target_path: str, video_info: VideoInfo, codec: str = "mp4v" - ) -> BaseWriter: - pass - @abstractmethod def open(self, path: str) -> None: pass diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index f1cc4e4a82..42a0961aad 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -23,21 +23,6 @@ def __init__(self): self.writer = OpenCVWriter self.path = None - def get_sink(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"): - """Create a video writer for saving frames using OpenCV. - - Args: - target_path (str): Path where the video will be saved. - video_info (VideoInfo): Video information containing resolution and FPS. - codec (str, optional): FourCC code for video codec. Defaults to "mp4v". - - Returns: - OpenCVWriter: A video writer object for writing frames. - """ - return OpenCVWriter( - target_path, video_info.fps, video_info.resolution_wh, codec - ) - def open(self, path: str) -> None: """ Open a video source and initialize the video capture object. diff --git a/supervision/video/core.py b/supervision/video/core.py index 284ae1d475..6dd0e4942d 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -52,7 +52,9 @@ def sink( Returns: Writer: A video writer object for writing frames. """ - return self.backend.get_sink(target_path, info, codec) + return self.backend.writer( + target_path, info.fps, info.resolution_wh, codec + ) def frames( self, From 27f126605aa57f322f1d97ac31951218755cd2a0 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 16:02:23 -0400 Subject: [PATCH 058/128] FORMAT: Formatting changes --- supervision/video/backend/openCV.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index 42a0961aad..4bc7e37c10 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -191,8 +191,6 @@ def frames( yield frame frame_idx += stride - - class OpenCVWriter(BaseWriter): """A class to handle video writing operations using OpenCV's VideoWriter. From cf31e29587ec994ddc74acfb88d3c56fb3ab693c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 9 Aug 2025 20:02:51 +0000 Subject: [PATCH 059/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/backend/base.py | 2 +- supervision/video/backend/openCV.py | 5 ++--- supervision/video/core.py | 6 ++---- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index f8e8aa492f..78b3875d8e 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -1,7 +1,6 @@ from __future__ import annotations from abc import ABC, abstractmethod -from collections.abc import Callable import numpy as np @@ -58,6 +57,7 @@ def frames( def release(self) -> None: pass + class BaseWriter(ABC): @abstractmethod def write(self, frame: np.ndarray) -> None: diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index 4bc7e37c10..00f5b6ade2 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -1,7 +1,5 @@ from __future__ import annotations -from collections.abc import Callable - import cv2 import numpy as np @@ -190,7 +188,8 @@ def frames( frame = cv2.resize(frame, resolution_wh) yield frame frame_idx += stride - + + class OpenCVWriter(BaseWriter): """A class to handle video writing operations using OpenCV's VideoWriter. diff --git a/supervision/video/core.py b/supervision/video/core.py index 6dd0e4942d..0cd404bd57 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -7,7 +7,7 @@ from supervision.video.backend.base import BaseBackend, BaseWriter from supervision.video.backend.openCV import OpenCVBackend -from supervision.video.utils import VideoInfo, SOURCE_TYPE +from supervision.video.utils import SOURCE_TYPE, VideoInfo class Video: @@ -52,9 +52,7 @@ def sink( Returns: Writer: A video writer object for writing frames. """ - return self.backend.writer( - target_path, info.fps, info.resolution_wh, codec - ) + return self.backend.writer(target_path, info.fps, info.resolution_wh, codec) def frames( self, From abba880145a69ad2c52d353883c231adde9615d0 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 16:18:22 -0400 Subject: [PATCH 060/128] UPDATE: Restructured frame generator --- pyproject.toml | 2 +- supervision/video/backend/openCV.py | 53 ----------------------------- supervision/video/core.py | 41 +++++++++++++++++++--- 3 files changed, 37 insertions(+), 59 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 787fa93ade..554dd16553 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,6 @@ dependencies = [ "requests>=2.26.0", "tqdm>=4.62.3", "opencv-python>=4.5.5.64", - "imageio-ffmpeg (>=0.6.0,<0.7.0)" ] [project.urls] @@ -59,6 +58,7 @@ Documentation = "https://supervision.roboflow.com/latest/" metrics = [ "pandas>=2.0.0", ] +video = ["av (>=15.0.0,<16.0.0)"] [dependency-groups] dev = [ diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index 00f5b6ade2..7e0bd37f9b 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -136,59 +136,6 @@ def release(self) -> None: self.cap.release() self.cap = None - def frames( - self, - *, - start: int = 0, - end: int | None = None, - stride: int = 1, - resolution_wh: tuple[int, int] | None = None, - ): - """Generate frames from the video source. - - Args: - start (int, optional): Starting frame index. Defaults to 0. - end (int | None, optional): Ending frame index. Defaults to None. - stride (int, optional): Number of frames to skip. Defaults to 1. - resolution_wh (tuple[int, int] | None, optional): Target resolution - (width, height). If provided, frames will be resized. Defaults to None. - - Yields: - np.ndarray: Video frames in BGR format. Raises: - RuntimeError: If the video source is not opened yet. - """ - if self.cap is None: - raise RuntimeError("Video not opened yet.") - - total_frames = self.video_info.total_frames if self.video_info else 0 - is_live_stream = total_frames <= 0 - - if is_live_stream: - while True: - for _ in range(stride - 1): - if not self.grab(): - return - ret, frame = self.read() - if not ret: - return - if resolution_wh is not None: - frame = cv2.resize(frame, resolution_wh) - yield frame - else: - if end is None or end > total_frames: - end = total_frames - - frame_idx = start - while frame_idx < end: - self.seek(frame_idx) - ret, frame = self.read() - if not ret: - break - if resolution_wh is not None: - frame = cv2.resize(frame, resolution_wh) - yield frame - frame_idx += stride - class OpenCVWriter(BaseWriter): """A class to handle video writing operations using OpenCV's VideoWriter. diff --git a/supervision/video/core.py b/supervision/video/core.py index 0cd404bd57..6e13fc9625 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -3,11 +3,12 @@ from collections.abc import Callable import numpy as np +import cv2 from tqdm.auto import tqdm from supervision.video.backend.base import BaseBackend, BaseWriter from supervision.video.backend.openCV import OpenCVBackend -from supervision.video.utils import SOURCE_TYPE, VideoInfo +from supervision.video.utils import VideoInfo, SOURCE_TYPE class Video: @@ -52,7 +53,9 @@ def sink( Returns: Writer: A video writer object for writing frames. """ - return self.backend.writer(target_path, info.fps, info.resolution_wh, codec) + return self.backend.writer( + target_path, info.fps, info.resolution_wh, codec + ) def frames( self, @@ -73,9 +76,37 @@ def frames( Returns: Generator: A generator yielding video frames. """ - return self.backend.frames( - stride=stride, start=start, end=end, resolution_wh=resolution_wh - ) + if self.backend.cap is None: + raise RuntimeError("Video not opened yet.") + + total_frames = self.backend.video_info.total_frames if self.backend.video_info else 0 + is_live_stream = total_frames <= 0 + + if is_live_stream: + while True: + for _ in range(stride - 1): + if not self.backend.grab(): + return + ret, frame = self.backend.read() + if not ret: + return + if resolution_wh is not None: + frame = cv2.resize(frame, resolution_wh) + yield frame + else: + if end is None or end > total_frames: + end = total_frames + + frame_idx = start + while frame_idx < end: + self.backend.seek(frame_idx) + ret, frame = self.backend.read() + if not ret: + break + if resolution_wh is not None: + frame = cv2.resize(frame, resolution_wh) + yield frame + frame_idx += stride def save( self, From 3374544d4613a1b81df91f61247695e973f81067 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 16:18:49 -0400 Subject: [PATCH 061/128] FIX: Removed frames from BaseBackend --- supervision/video/backend/base.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 78b3875d8e..623d3c74f8 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -42,17 +42,6 @@ def grab(self) -> bool: def seek(self, frame_idx: int) -> None: pass - @abstractmethod - def frames( - self, - *, - start: int = 0, - end: int | None = None, - stride: int = 1, - resolution_wh: tuple[int, int] | None = None, - ): - pass - @abstractmethod def release(self) -> None: pass From 2e78c483ffd609c8501ec16a597a839794d506cf Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 18:23:23 -0400 Subject: [PATCH 062/128] ADD: Added pyAV backend support --- supervision/video/backend/pyAV.py | 191 +++++++++++++++++++++++++++++- supervision/video/core.py | 10 +- 2 files changed, 196 insertions(+), 5 deletions(-) diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index 9d48db4f9f..85e52a89e0 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -1 +1,190 @@ -from __future__ import annotations +import av +import numpy as np + +from fractions import Fraction +from supervision.video.backend.base import BaseBackend, BaseWriter +from supervision.video.utils import VideoInfo, SOURCE_TYPE + + +class pyAVWriter(BaseWriter): + def __init__( + self, + filename: str, + fps: int, + frame_size: tuple[int, int], + codec: str = "h264", + ): + try: + self.container = av.open(filename, mode="w") + + if codec is None: + codec = "h264" + self.stream = self.container.add_stream(codec, rate=fps) + self.stream.width = frame_size[0] + self.stream.height = frame_size[1] + self.stream.pix_fmt = "yuv420p" + + # Set time_base explicitly for correct timing + self.stream.codec_context.time_base = Fraction(1, fps) + + # Frame index for PTS + self.frame_idx = 0 + + except Exception as e: + raise RuntimeError(f"Cannot open video writer for file: {filename}") from e + + def write(self, frame: np.ndarray) -> None: + # Convert BGR (OpenCV) to RGB for PyAV + frame_rgb = frame[..., ::-1] + av_frame = av.VideoFrame.from_ndarray(frame_rgb, format="rgb24") + + av_frame.pts = self.frame_idx + av_frame.time_base = self.stream.codec_context.time_base + self.frame_idx += 1 + + # Encode frame and mux packets immediately + packets = self.stream.encode(av_frame) + for packet in packets: + self.container.mux(packet) + + def close(self) -> None: + # Flush encoder by calling encode() with no frame, mux all packets + packets = self.stream.encode() + for packet in packets: + self.container.mux(packet) + + self.container.close() + +class pyAVBackend(BaseBackend): + + + def __init__(self): + super().__init__() + self.container = None + self.stream = None + self.writer = pyAVWriter + self.frame_generator = None + self.video_info = None + self.current_frame_idx = 0 + + def open(self, path: str) -> None: + + try: + self.container = av.open(path) + self.stream = self.container.streams.video[0] + self.stream.thread_type = "AUTO" + + # cap is used for internals + self.cap = self.container + + self.frame_generator = self.container.decode(video=0) + self.video_info = self._set_video_info() + self.current_frame_idx = 0 + + if isinstance(path, int): + self.video_info.source_type = SOURCE_TYPE.WEBCAM + elif isinstance(path, str): + self.video_info.source_type = ( + SOURCE_TYPE.RTSP + if path.lower().startswith("rtsp://") + else SOURCE_TYPE.VIDEO_FILE + ) + else: + raise ValueError("Unsupported source type") + + except Exception as e: + raise RuntimeError(f"Cannot open video source: {path}") from e + + def isOpened(self) -> bool: + return self.container is not None and self.stream is not None + + def _set_video_info(self) -> VideoInfo: + if not self.isOpened(): + raise RuntimeError("Video not opened yet.") + + width = self.stream.width + height = self.stream.height + fps = float(self.stream.average_rate or self.stream.guessed_rate) + if fps <= 0: + fps = 30 # Default to 30fps if invalid + + total_frames = self.stream.frames + if total_frames == 0: + total_frames = None + + return VideoInfo(width, height, round(fps), total_frames) + + def info(self) -> VideoInfo: + if not self.isOpened(): + raise RuntimeError("Video not opened yet.") + return self.video_info + + def read(self) -> tuple[bool, np.ndarray]: + + if not self.isOpened(): + raise RuntimeError("Video not opened yet.") + + try: + frame = next(self.frame_generator) + self.current_frame_idx += 1 + frame_bgr = frame.to_ndarray(format="bgr24") + return True, frame_bgr + except (StopIteration, av.error.EOFError): + return False, np.array([]) + + def grab(self) -> bool: + + if not self.isOpened(): + raise RuntimeError("Video not opened yet.") + + try: + for packet in self.container.demux(video=0): + if packet.stream.type == 'video': + return True + return False + except (StopIteration, av.error.EOFError): + return False + + def seek(self, frame_idx: int) -> None: + + if not self.isOpened(): + raise RuntimeError("Video not opened yet.") + + framerate = float(self.stream.average_rate or self.stream.guessed_rate or 30.0) + if framerate <= 0: + framerate = 30.0 + + time_base = float(self.stream.time_base) + timestamp = int((frame_idx / framerate) / time_base) + + self.container.seek(timestamp, stream=self.stream, any_frame=False, backward=True) + self.frame_generator = self.container.decode(video=0) + + self.current_frame_idx = 0 + while True: + try: + frame = next(self.frame_generator) + except (StopIteration, av.error.EOFError): + break + + if getattr(frame, "time", None) is not None: + self.current_frame_idx = int(round(frame.time * framerate)) + elif getattr(frame, "pts", None) is not None: + self.current_frame_idx = int(round((frame.pts * time_base) * framerate)) + else: + self.current_frame_idx += 1 + + if self.current_frame_idx >= frame_idx: + def _prepend_frame(first_frame, gen): + yield first_frame + yield from gen + self.frame_generator = _prepend_frame(frame, self.frame_generator) + break + + def release(self) -> None: + + if self.container: + self.container.close() + self.container = None + self.stream = None + self.frame_generator = None diff --git a/supervision/video/core.py b/supervision/video/core.py index 6e13fc9625..a24c9a8044 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -9,6 +9,7 @@ from supervision.video.backend.base import BaseBackend, BaseWriter from supervision.video.backend.openCV import OpenCVBackend from supervision.video.utils import VideoInfo, SOURCE_TYPE +from supervision.video.backend.pyAV import pyAVBackend class Video: @@ -26,7 +27,7 @@ def __init__( self, source: str | int, info: VideoInfo | None = None, backend: str = "opencv" ): if backend == "opencv": - self.backend = OpenCVBackend() + self.backend = pyAVBackend() self.backend.open(source) self.info = self.backend.video_info @@ -41,14 +42,14 @@ def __iter__(self): return self.backend.frames() def sink( - self, target_path: str, info: VideoInfo, codec: str = "mp4v" + self, target_path: str, info: VideoInfo, codec: str | None = None ) -> BaseWriter: """Create a video writer for saving frames. Args: target_path (str): Path where the video will be saved. info (VideoInfo): Video information containing resolution and FPS. - codec (str, optional): FourCC code for video codec. Defaults to "mp4v". + codec (str, optional): FourCC code for video codec. Defaults to "None". Returns: Writer: A video writer object for writing frames. @@ -115,7 +116,7 @@ def save( fps: int | None = None, progress_message: str = "Processing video", show_progress: bool = False, - codec: str = "mp4v", + codec: str | None = None, ): """Save processed video frames to a file. @@ -142,6 +143,7 @@ def save( target_path, fps, self.backend.video_info.resolution_wh, codec ) total_frames = self.backend.video_info.total_frames + print(self.backend.video_info) frames_generator = self.frames() for index, frame in enumerate( tqdm( From a3aca42e2cdca010b743df2020ffc8ab3c0ffc5c Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 18:23:39 -0400 Subject: [PATCH 063/128] ADD: Added pyAV docstrings --- supervision/video/backend/pyAV.py | 64 ++++++++++++++++++++++++++----- 1 file changed, 55 insertions(+), 9 deletions(-) diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index 85e52a89e0..db99af9b4a 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -56,7 +56,10 @@ def close(self) -> None: self.container.close() class pyAVBackend(BaseBackend): - + """ + PyAV implementation of the Backend interface. + Handles video capture, frame reading, seeking, and writing operations using PyAV. + """ def __init__(self): super().__init__() @@ -65,16 +68,25 @@ def __init__(self): self.writer = pyAVWriter self.frame_generator = None self.video_info = None - self.current_frame_idx = 0 + self.current_frame_idx = 0 # Track current frame number in decoding def open(self, path: str) -> None: - + """Open and initialize a video source. + + Opens a video file, RTSP stream, or webcam and initializes all necessary + components for video processing. + + Args: + path (str): Path to video file, RTSP URL, or camera index. + + Raises: + RuntimeError: If unable to open the video source. + ValueError: If the source type is not supported. + """ try: self.container = av.open(path) self.stream = self.container.streams.video[0] self.stream.thread_type = "AUTO" - - # cap is used for internals self.cap = self.container self.frame_generator = self.container.decode(video=0) @@ -120,7 +132,16 @@ def info(self) -> VideoInfo: return self.video_info def read(self) -> tuple[bool, np.ndarray]: - + """Read the next frame from the video stream. + + Returns: + tuple[bool, np.ndarray]: A tuple containing: + - bool: True if frame was successfully read + - np.ndarray: The video frame in BGR format (H, W, 3) + + Raises: + RuntimeError: If the video source is not opened. + """ if not self.isOpened(): raise RuntimeError("Video not opened yet.") @@ -133,7 +154,17 @@ def read(self) -> tuple[bool, np.ndarray]: return False, np.array([]) def grab(self) -> bool: - + """Grab the next frame packet without decoding. + + A lightweight operation that skips frame decoding, useful for + quick frame navigation. Returns success status of the grab operation. + + Returns: + bool: True if a frame was successfully grabbed, False otherwise. + + Raises: + RuntimeError: If the video source is not opened. + """ if not self.isOpened(): raise RuntimeError("Video not opened yet.") @@ -146,7 +177,18 @@ def grab(self) -> bool: return False def seek(self, frame_idx: int) -> None: - + """Seek to a specific frame in the video. + + Performs frame-accurate seeking by navigating to the nearest keyframe and + decoding forward to the exact target frame. The next read() call will + return the target frame. + + Args: + frame_idx (int): Target frame index (0-based) to seek to. + + Raises: + RuntimeError: If the video source is not opened. + """ if not self.isOpened(): raise RuntimeError("Video not opened yet.") @@ -182,7 +224,11 @@ def _prepend_frame(first_frame, gen): break def release(self) -> None: - + """Release all resources associated with the video stream. + + Closes the video container and resets all internal state variables + to ensure proper cleanup of resources. + """ if self.container: self.container.close() self.container = None From 819c70be3875731a5d5f6d7df55e0fc51025ceb8 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 19:02:02 -0400 Subject: [PATCH 064/128] ADD: Added pyAV audio support --- supervision/video/backend/base.py | 11 +++++++++++ supervision/video/backend/pyAV.py | 30 ++++++++++++++++++++++++++---- supervision/video/core.py | 2 +- 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 623d3c74f8..231cb7578e 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -48,6 +48,17 @@ def release(self) -> None: class BaseWriter(ABC): + @abstractmethod + def __init__( + self, + filename: str, + fps: int, + frame_size: tuple[int, int], + codec: str | None = None, + backend: BaseBackend = None, + ): + pass + @abstractmethod def write(self, frame: np.ndarray) -> None: pass diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index db99af9b4a..cfd9ba3f75 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -13,10 +13,12 @@ def __init__( fps: int, frame_size: tuple[int, int], codec: str = "h264", - ): + backend: "pyAVBackend" = None, + ): try: self.container = av.open(filename, mode="w") - + self.backend = backend + if codec is None: codec = "h264" self.stream = self.container.add_stream(codec, rate=fps) @@ -29,6 +31,16 @@ def __init__( # Frame index for PTS self.frame_idx = 0 + + self.audio_stream_out = None + self.audio_packets = [] + if backend.audio_stream and backend.audio_src_container: + audio_codec_name = backend.audio_stream.codec_context.name + audio_rate = backend.audio_stream.codec_context.rate # Can be None for some codecs + self.audio_stream_out = self.container.add_stream(audio_codec_name, rate=audio_rate) + for packet in backend.audio_src_container.demux(backend.audio_stream): + if packet.dts is not None: + self.audio_packets.append(packet) except Exception as e: raise RuntimeError(f"Cannot open video writer for file: {filename}") from e @@ -53,6 +65,11 @@ def close(self) -> None: for packet in packets: self.container.mux(packet) + if self.audio_stream_out: + for packet in self.audio_packets: + packet.stream = self.audio_stream_out + self.container.mux(packet) + self.container.close() class pyAVBackend(BaseBackend): @@ -69,7 +86,7 @@ def __init__(self): self.frame_generator = None self.video_info = None self.current_frame_idx = 0 # Track current frame number in decoding - + def open(self, path: str) -> None: """Open and initialize a video source. @@ -85,6 +102,7 @@ def open(self, path: str) -> None: """ try: self.container = av.open(path) + self.audio_src_container = self.container self.stream = self.container.streams.video[0] self.stream.thread_type = "AUTO" self.cap = self.container @@ -93,6 +111,10 @@ def open(self, path: str) -> None: self.video_info = self._set_video_info() self.current_frame_idx = 0 + # If audio exists + if len(self.container.streams.audio) > 0: + self.audio_stream = self.container.streams.audio[0] + if isinstance(path, int): self.video_info.source_type = SOURCE_TYPE.WEBCAM elif isinstance(path, str): @@ -106,7 +128,7 @@ def open(self, path: str) -> None: except Exception as e: raise RuntimeError(f"Cannot open video source: {path}") from e - + def isOpened(self) -> bool: return self.container is not None and self.stream is not None diff --git a/supervision/video/core.py b/supervision/video/core.py index a24c9a8044..31d73b1831 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -140,7 +140,7 @@ def save( fps = self.backend.video_info.fps writer = self.backend.writer( - target_path, fps, self.backend.video_info.resolution_wh, codec + target_path, fps, self.backend.video_info.resolution_wh, codec, self.backend ) total_frames = self.backend.video_info.total_frames print(self.backend.video_info) From dfd9407e0ac9b4e5fc61024af61d9c64c8474c52 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 19:02:31 -0400 Subject: [PATCH 065/128] UPDATE: Backend support needed for all writers --- supervision/video/backend/openCV.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index 7e0bd37f9b..c3cb823e08 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -151,6 +151,7 @@ def __init__( fps: int, frame_size: tuple[int, int], codec: str = "mp4v", + backend: OpenCVBackend = None, ): """Initialize the video writer. @@ -163,6 +164,7 @@ def __init__( Raises: RuntimeError: If the video writer cannot be initialized. """ + self.backend = backend try: fourcc_int = cv2.VideoWriter_fourcc(*codec) self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size) From d29e50e5adf47464154ff8a4c76eb6bf35055cfa Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 19:04:08 -0400 Subject: [PATCH 066/128] FIX: Formatting --- supervision/video/backend/base.py | 2 +- supervision/video/backend/openCV.py | 2 +- supervision/video/backend/pyAV.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 231cb7578e..2e3f68fc75 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -52,10 +52,10 @@ class BaseWriter(ABC): def __init__( self, filename: str, + backend: BaseBackend, fps: int, frame_size: tuple[int, int], codec: str | None = None, - backend: BaseBackend = None, ): pass diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index c3cb823e08..382f694031 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -148,10 +148,10 @@ class OpenCVWriter(BaseWriter): def __init__( self, filename: str, + backend: OpenCVBackend, fps: int, frame_size: tuple[int, int], codec: str = "mp4v", - backend: OpenCVBackend = None, ): """Initialize the video writer. diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index cfd9ba3f75..81bb760e09 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -10,10 +10,10 @@ class pyAVWriter(BaseWriter): def __init__( self, filename: str, + backend: "pyAVBackend", fps: int, frame_size: tuple[int, int], codec: str = "h264", - backend: "pyAVBackend" = None, ): try: self.container = av.open(filename, mode="w") From 3ff86442e3893241236d584c073985757696ff76 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 19:14:58 -0400 Subject: [PATCH 067/128] ADD: Added backend utils and type safe support --- supervision/video/backend/__init__.py | 40 +++++++++++++++++++++++++++ supervision/video/core.py | 34 ++++++++++++++--------- 2 files changed, 61 insertions(+), 13 deletions(-) create mode 100644 supervision/video/backend/__init__.py diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py new file mode 100644 index 0000000000..e8e154e730 --- /dev/null +++ b/supervision/video/backend/__init__.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from typing import Literal, overload, TypeVar, Union + +from supervision.video.backend.base import BaseBackend, BaseWriter +from supervision.video.backend.openCV import OpenCVBackend, OpenCVWriter +from supervision.video.backend.pyAV import pyAVBackend, pyAVWriter + +BackendT = TypeVar('BackendT', bound=BaseBackend) +BackendLiteral = Literal["opencv", "pyav"] +BackendType = Union[OpenCVBackend, pyAVBackend] + +@overload +def getBackend(backend: Literal["opencv"]) -> OpenCVBackend: + ... + +@overload +def getBackend(backend: Literal["pyav"]) -> pyAVBackend: + ... + +def getBackend(backend: str) -> BaseBackend: + if backend == "opencv": + return OpenCVBackend() + elif backend == "pyav": + return pyAVBackend() + else: + raise ValueError(f"Unsupported backend: {backend}") + +__all__ = [ + "BaseBackend", + "BaseWriter", + "OpenCVBackend", + "OpenCVWriter", + "pyAVBackend", + "pyAVWriter", + "getBackend", + "BackendT", + "BackendLiteral", + "BackendType" +] \ No newline at end of file diff --git a/supervision/video/core.py b/supervision/video/core.py index 31d73b1831..f2eb96ad41 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -1,34 +1,42 @@ from __future__ import annotations from collections.abc import Callable +from typing import overload import numpy as np import cv2 from tqdm.auto import tqdm -from supervision.video.backend.base import BaseBackend, BaseWriter -from supervision.video.backend.openCV import OpenCVBackend +from supervision.video.backend import ( + BaseBackend, + BaseWriter, + getBackend, + BackendLiteral, +) from supervision.video.utils import VideoInfo, SOURCE_TYPE -from supervision.video.backend.pyAV import pyAVBackend class Video: - """High-level interface for video operations. - - This class provides a convenient interface for video operations including - reading frames, saving processed videos, and video information access. - """ - info: VideoInfo source: str | int backend: BaseBackend + @overload def __init__( - self, source: str | int, info: VideoInfo | None = None, backend: str = "opencv" - ): - if backend == "opencv": - self.backend = pyAVBackend() + self, + source: str | int, + info: VideoInfo | None = None, + backend: BackendLiteral = "opencv" + ) -> None: + ... + def __init__( + self, + source: str | int, + info: VideoInfo | None = None, + backend: BackendLiteral = "opencv" + ) -> None: + self.backend = getBackend(backend) self.backend.open(source) self.info = self.backend.video_info self.source = source From d006c478efc3a39868866632c9718f141c1bf8a4 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 20:53:04 -0400 Subject: [PATCH 068/128] UPDATE: Type safe syntax --- supervision/video/backend/__init__.py | 23 ++--- supervision/video/backend/base.py | 5 +- supervision/video/backend/openCV.py | 2 +- supervision/video/backend/pyAV.py | 134 +++++++++++++------------- supervision/video/core.py | 20 +--- 5 files changed, 84 insertions(+), 100 deletions(-) diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py index e8e154e730..5e8ae0b61e 100644 --- a/supervision/video/backend/__init__.py +++ b/supervision/video/backend/__init__.py @@ -1,28 +1,23 @@ from __future__ import annotations -from typing import Literal, overload, TypeVar, Union +from typing import Literal, Union from supervision.video.backend.base import BaseBackend, BaseWriter from supervision.video.backend.openCV import OpenCVBackend, OpenCVWriter from supervision.video.backend.pyAV import pyAVBackend, pyAVWriter -BackendT = TypeVar('BackendT', bound=BaseBackend) BackendLiteral = Literal["opencv", "pyav"] -BackendType = Union[OpenCVBackend, pyAVBackend] +BackendTypes = Union[OpenCVBackend, pyAVBackend] +WriterTypes = Union[OpenCVWriter, pyAVWriter] -@overload -def getBackend(backend: Literal["opencv"]) -> OpenCVBackend: - ... - -@overload -def getBackend(backend: Literal["pyav"]) -> pyAVBackend: - ... +_backends = { + "opencv": OpenCVBackend, + "pyav": pyAVBackend, +} def getBackend(backend: str) -> BaseBackend: - if backend == "opencv": - return OpenCVBackend() - elif backend == "pyav": - return pyAVBackend() + if backend.lower() in _backends: + return _backends[backend.lower()]() else: raise ValueError(f"Unsupported backend: {backend}") diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 2e3f68fc75..6a26429249 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -5,13 +5,14 @@ import numpy as np from supervision.video.utils import VideoInfo +from supervision.video.backend import BackendTypes, WriterTypes class BaseBackend(ABC): def __init__(self): self.cap = None self.video_info = None - self.writer = None + self.writer: WriterTypes = None self.path = None @abstractmethod @@ -52,7 +53,7 @@ class BaseWriter(ABC): def __init__( self, filename: str, - backend: BaseBackend, + backend: BackendTypes, fps: int, frame_size: tuple[int, int], codec: str | None = None, diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index 382f694031..95dc4ccda8 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -3,7 +3,7 @@ import cv2 import numpy as np -from supervision.video.backend.base import BaseBackend, BaseWriter +from supervision.video.backend import BaseBackend, BaseWriter from supervision.video.utils import SOURCE_TYPE, VideoInfo diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index 81bb760e09..7d4c25b3eb 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -2,76 +2,10 @@ import numpy as np from fractions import Fraction -from supervision.video.backend.base import BaseBackend, BaseWriter +from supervision.video.backend import BaseBackend, BaseWriter from supervision.video.utils import VideoInfo, SOURCE_TYPE -class pyAVWriter(BaseWriter): - def __init__( - self, - filename: str, - backend: "pyAVBackend", - fps: int, - frame_size: tuple[int, int], - codec: str = "h264", - ): - try: - self.container = av.open(filename, mode="w") - self.backend = backend - - if codec is None: - codec = "h264" - self.stream = self.container.add_stream(codec, rate=fps) - self.stream.width = frame_size[0] - self.stream.height = frame_size[1] - self.stream.pix_fmt = "yuv420p" - - # Set time_base explicitly for correct timing - self.stream.codec_context.time_base = Fraction(1, fps) - - # Frame index for PTS - self.frame_idx = 0 - - self.audio_stream_out = None - self.audio_packets = [] - if backend.audio_stream and backend.audio_src_container: - audio_codec_name = backend.audio_stream.codec_context.name - audio_rate = backend.audio_stream.codec_context.rate # Can be None for some codecs - self.audio_stream_out = self.container.add_stream(audio_codec_name, rate=audio_rate) - for packet in backend.audio_src_container.demux(backend.audio_stream): - if packet.dts is not None: - self.audio_packets.append(packet) - - except Exception as e: - raise RuntimeError(f"Cannot open video writer for file: {filename}") from e - - def write(self, frame: np.ndarray) -> None: - # Convert BGR (OpenCV) to RGB for PyAV - frame_rgb = frame[..., ::-1] - av_frame = av.VideoFrame.from_ndarray(frame_rgb, format="rgb24") - - av_frame.pts = self.frame_idx - av_frame.time_base = self.stream.codec_context.time_base - self.frame_idx += 1 - - # Encode frame and mux packets immediately - packets = self.stream.encode(av_frame) - for packet in packets: - self.container.mux(packet) - - def close(self) -> None: - # Flush encoder by calling encode() with no frame, mux all packets - packets = self.stream.encode() - for packet in packets: - self.container.mux(packet) - - if self.audio_stream_out: - for packet in self.audio_packets: - packet.stream = self.audio_stream_out - self.container.mux(packet) - - self.container.close() - class pyAVBackend(BaseBackend): """ PyAV implementation of the Backend interface. @@ -256,3 +190,69 @@ def release(self) -> None: self.container = None self.stream = None self.frame_generator = None + +class pyAVWriter(BaseWriter): + def __init__( + self, + filename: str, + backend: pyAVBackend, + fps: int, + frame_size: tuple[int, int], + codec: str = "h264", + ): + try: + self.container = av.open(filename, mode="w") + self.backend = backend + + if codec is None: + codec = "h264" + self.stream = self.container.add_stream(codec, rate=fps) + self.stream.width = frame_size[0] + self.stream.height = frame_size[1] + self.stream.pix_fmt = "yuv420p" + + # Set time_base explicitly for correct timing + self.stream.codec_context.time_base = Fraction(1, fps) + + # Frame index for PTS + self.frame_idx = 0 + + self.audio_stream_out = None + self.audio_packets = [] + if backend.audio_stream and backend.audio_src_container: + audio_codec_name = backend.audio_stream.codec_context.name + audio_rate = backend.audio_stream.codec_context.rate # Can be None for some codecs + self.audio_stream_out = self.container.add_stream(audio_codec_name, rate=audio_rate) + for packet in backend.audio_src_container.demux(backend.audio_stream): + if packet.dts is not None: + self.audio_packets.append(packet) + + except Exception as e: + raise RuntimeError(f"Cannot open video writer for file: {filename}") from e + + def write(self, frame: np.ndarray) -> None: + # Convert BGR (OpenCV) to RGB for PyAV + frame_rgb = frame[..., ::-1] + av_frame = av.VideoFrame.from_ndarray(frame_rgb, format="rgb24") + + av_frame.pts = self.frame_idx + av_frame.time_base = self.stream.codec_context.time_base + self.frame_idx += 1 + + # Encode frame and mux packets immediately + packets = self.stream.encode(av_frame) + for packet in packets: + self.container.mux(packet) + + def close(self) -> None: + # Flush encoder by calling encode() with no frame, mux all packets + packets = self.stream.encode() + for packet in packets: + self.container.mux(packet) + + if self.audio_stream_out: + for packet in self.audio_packets: + packet.stream = self.audio_stream_out + self.container.mux(packet) + + self.container.close() diff --git a/supervision/video/core.py b/supervision/video/core.py index f2eb96ad41..4587f83fe0 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -1,17 +1,15 @@ from __future__ import annotations from collections.abc import Callable -from typing import overload - import numpy as np import cv2 from tqdm.auto import tqdm from supervision.video.backend import ( - BaseBackend, + BackendTypes, + BackendLiteral, BaseWriter, getBackend, - BackendLiteral, ) from supervision.video.utils import VideoInfo, SOURCE_TYPE @@ -19,26 +17,16 @@ class Video: info: VideoInfo source: str | int - backend: BaseBackend - - @overload - def __init__( - self, - source: str | int, - info: VideoInfo | None = None, - backend: BackendLiteral = "opencv" - ) -> None: - ... + backend: BackendTypes def __init__( self, source: str | int, - info: VideoInfo | None = None, backend: BackendLiteral = "opencv" ) -> None: self.backend = getBackend(backend) self.backend.open(source) - self.info = self.backend.video_info + self.info = self.backend.info() self.source = source def __iter__(self): From 8894819819243f40301ca65f1b07b3a89ba59bde Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 21:12:51 -0400 Subject: [PATCH 069/128] FORMAT: Changed backend param --- supervision/video/backend/__init__.py | 1 - supervision/video/backend/base.py | 5 ++--- supervision/video/backend/openCV.py | 3 ++- supervision/video/backend/pyAV.py | 2 +- supervision/video/core.py | 2 +- 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py index 5e8ae0b61e..805a050968 100644 --- a/supervision/video/backend/__init__.py +++ b/supervision/video/backend/__init__.py @@ -29,7 +29,6 @@ def getBackend(backend: str) -> BaseBackend: "pyAVBackend", "pyAVWriter", "getBackend", - "BackendT", "BackendLiteral", "BackendType" ] \ No newline at end of file diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 6a26429249..6a9bf97cb2 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -5,14 +5,13 @@ import numpy as np from supervision.video.utils import VideoInfo -from supervision.video.backend import BackendTypes, WriterTypes class BaseBackend(ABC): def __init__(self): self.cap = None self.video_info = None - self.writer: WriterTypes = None + self.writer = None self.path = None @abstractmethod @@ -53,10 +52,10 @@ class BaseWriter(ABC): def __init__( self, filename: str, - backend: BackendTypes, fps: int, frame_size: tuple[int, int], codec: str | None = None, + backend: BaseBackend | None = None, ): pass diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index 95dc4ccda8..68a41e0d72 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -148,10 +148,10 @@ class OpenCVWriter(BaseWriter): def __init__( self, filename: str, - backend: OpenCVBackend, fps: int, frame_size: tuple[int, int], codec: str = "mp4v", + backend: OpenCVBackend | None = None, ): """Initialize the video writer. @@ -160,6 +160,7 @@ def __init__( fps (int): Frames per second for the output video. frame_size (tuple[int, int]): Width and height of the output video frames. codec (str, optional): FourCC code for the video codec. Defaults to "mp4v". + backend (OpenCVBackend | None, optional): Backend instance. Defaults to None. Raises: RuntimeError: If the video writer cannot be initialized. diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index 7d4c25b3eb..4ac9830990 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -195,10 +195,10 @@ class pyAVWriter(BaseWriter): def __init__( self, filename: str, - backend: pyAVBackend, fps: int, frame_size: tuple[int, int], codec: str = "h264", + backend: pyAVBackend | None = None, ): try: self.container = av.open(filename, mode="w") diff --git a/supervision/video/core.py b/supervision/video/core.py index 4587f83fe0..cfa57209e1 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -51,7 +51,7 @@ def sink( Writer: A video writer object for writing frames. """ return self.backend.writer( - target_path, info.fps, info.resolution_wh, codec + target_path, info.fps, info.resolution_wh, codec, self.backend ) def frames( From 55b5ea0f361fa60566460251d8e906cabf44a356 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 21:16:19 -0400 Subject: [PATCH 070/128] UPDATE: Added ctx for pyAV --- supervision/video/backend/base.py | 8 ++++++++ supervision/video/backend/pyAV.py | 15 +++++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 6a9bf97cb2..9a552053f0 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -58,6 +58,14 @@ def __init__( backend: BaseBackend | None = None, ): pass + + @abstractmethod + def __enter__(self): + pass + + @abstractmethod + def __exit__(self, exc_type, exc_value, traceback): + pass @abstractmethod def write(self, frame: np.ndarray) -> None: diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index 4ac9830990..629095d7f0 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -19,7 +19,7 @@ def __init__(self): self.writer = pyAVWriter self.frame_generator = None self.video_info = None - self.current_frame_idx = 0 # Track current frame number in decoding + self.current_frame_idx = 0 def open(self, path: str) -> None: """Open and initialize a video source. @@ -74,7 +74,7 @@ def _set_video_info(self) -> VideoInfo: height = self.stream.height fps = float(self.stream.average_rate or self.stream.guessed_rate) if fps <= 0: - fps = 30 # Default to 30fps if invalid + fps = 30 total_frames = self.stream.frames if total_frames == 0: @@ -221,7 +221,7 @@ def __init__( self.audio_packets = [] if backend.audio_stream and backend.audio_src_container: audio_codec_name = backend.audio_stream.codec_context.name - audio_rate = backend.audio_stream.codec_context.rate # Can be None for some codecs + audio_rate = backend.audio_stream.codec_context.rate self.audio_stream_out = self.container.add_stream(audio_codec_name, rate=audio_rate) for packet in backend.audio_src_container.demux(backend.audio_stream): if packet.dts is not None: @@ -229,9 +229,14 @@ def __init__( except Exception as e: raise RuntimeError(f"Cannot open video writer for file: {filename}") from e + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() def write(self, frame: np.ndarray) -> None: - # Convert BGR (OpenCV) to RGB for PyAV frame_rgb = frame[..., ::-1] av_frame = av.VideoFrame.from_ndarray(frame_rgb, format="rgb24") @@ -239,13 +244,11 @@ def write(self, frame: np.ndarray) -> None: av_frame.time_base = self.stream.codec_context.time_base self.frame_idx += 1 - # Encode frame and mux packets immediately packets = self.stream.encode(av_frame) for packet in packets: self.container.mux(packet) def close(self) -> None: - # Flush encoder by calling encode() with no frame, mux all packets packets = self.stream.encode() for packet in packets: self.container.mux(packet) From 4b1c9bcc1a00c399e97b9a55c44f35345176e866 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 21:26:27 -0400 Subject: [PATCH 071/128] UPDATE: Updated docstrings --- supervision/video/backend/base.py | 1 - supervision/video/backend/openCV.py | 101 +++++++++++++----------- supervision/video/backend/pyAV.py | 117 ++++++++++++++++++++-------- supervision/video/core.py | 86 +++++++++++++------- supervision/video/utils.py | 45 ++++++----- 5 files changed, 227 insertions(+), 123 deletions(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 9a552053f0..aac1a83b7a 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -46,7 +46,6 @@ def seek(self, frame_idx: int) -> None: def release(self) -> None: pass - class BaseWriter(ABC): @abstractmethod def __init__( diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index 68a41e0d72..fd7a026c27 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -9,34 +9,37 @@ class OpenCVBackend(BaseBackend): """ - OpenCV implementation of the Backend interface. - Handles video capture, frame reading, seeking, and writing operations using OpenCV. + OpenCV-based implementation of the video backend interface. + + Provides methods for opening video sources, reading frames, seeking, + grabbing, and retrieving metadata using OpenCV. """ def __init__(self): - """Initialize the OpenCV backend with empty video capture and writer objects.""" + """Initialize with no active capture, writer, or path.""" super().__init__() self.cap = None self.video_info = None self.writer = OpenCVWriter self.path = None - def open(self, path: str) -> None: + def open(self, path: str | int) -> None: """ - Open a video source and initialize the video capture object. + Open a video source and initialize capture. Args: - path (str): Path to the video file, RTSP URL, or camera index. + path (str | int): Path to a video file, RTSP URL, or webcam index. Raises: - RuntimeError: If unable to open the video source. - ValueError: If the source type is not supported. + RuntimeError: If the source cannot be opened. + ValueError: If the source type is unsupported. """ self.cap = cv2.VideoCapture(path) self.path = path if not self.cap.isOpened(): raise RuntimeError(f"Cannot open video source: {path}") + self.video_info = self._set_video_info() if isinstance(path, int): @@ -51,98 +54,105 @@ def open(self, path: str) -> None: raise ValueError("Unsupported source type") def isOpened(self) -> bool: - """Check if the video source is opened successfully. + """ + Check if the video source is currently open. Returns: - bool: True if the video source is opened, False otherwise. + bool: True if the source is open, False otherwise. """ return self.cap.isOpened() def _set_video_info(self) -> VideoInfo: - """Set up video information from the opened video source. + """ + Extract and store video metadata from the open capture. Returns: - VideoInfo: Object containing video properties like width, height, fps, etc. + VideoInfo: Video properties such as width, height, FPS, and frame count. Raises: - RuntimeError: If the video source is not opened yet. + RuntimeError: If no source is open. """ if not self.isOpened(): raise RuntimeError("Video not opened yet.") + width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = round(self.cap.get(cv2.CAP_PROP_FPS)) total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) + return VideoInfo(width, height, fps, total_frames) def info(self) -> VideoInfo: - """Get video information. + """ + Get the stored video metadata. Returns: - VideoInfo: Object containing video properties. + VideoInfo: Metadata for the open source. Raises: - RuntimeError: If the video source is not opened yet. + RuntimeError: If no source is open. """ if not self.isOpened(): raise RuntimeError("Video not opened yet.") return self.video_info def read(self) -> tuple[bool, np.ndarray]: - """Read a frame from the video source. + """ + Read the next frame from the source. Returns: - tuple[bool, np.ndarray]: A tuple containing: - - bool: True if frame was successfully read - - np.ndarray: The video frame in BGR format + tuple[bool, np.ndarray]: + - bool: True if a frame was read successfully. + - np.ndarray: The frame in BGR format. Raises: - RuntimeError: If the video source is not opened yet. + RuntimeError: If no source is open. """ if self.cap is None: raise RuntimeError("Video not opened yet.") - ret, frame = self.cap.read() - return ret, frame + return self.cap.read() def grab(self) -> bool: - """Grab a frame from video source without decoding. + """ + Grab the next frame without decoding. Returns: - bool: True if frame was successfully grabbed. + bool: True if the frame pointer advanced successfully. Raises: - RuntimeError: If the video source is not opened yet. + RuntimeError: If no source is open. """ if self.cap is None: raise RuntimeError("Video not opened yet.") return self.cap.grab() def seek(self, frame_idx: int) -> None: - """Seek to a specific frame in the video. + """ + Jump to a specific frame. Args: - frame_idx (int): Index of the frame to seek to (0-based). + frame_idx (int): Zero-based frame index to seek to. Raises: - RuntimeError: If the video source is not opened yet. + RuntimeError: If no source is open. """ if self.cap is None: raise RuntimeError("Video not opened yet.") self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx) def release(self) -> None: - """Release the video capture resources.""" + """Release capture resources.""" if self.cap is not None and self.cap.isOpened(): self.cap.release() self.cap = None class OpenCVWriter(BaseWriter): - """A class to handle video writing operations using OpenCV's VideoWriter. + """ + Video writer implementation using OpenCV's VideoWriter. - This class provides an interface to write frames to a video file using OpenCV, - with support for different codecs and automatic fallback to mp4v if the specified - codec fails. + Supports configurable codecs, frame sizes, and FPS, with a fallback + to "mp4v" if the specified codec fails. """ def __init__( @@ -153,17 +163,18 @@ def __init__( codec: str = "mp4v", backend: OpenCVBackend | None = None, ): - """Initialize the video writer. + """ + Initialize the writer. Args: - filename (str): Path to the output video file. - fps (int): Frames per second for the output video. - frame_size (tuple[int, int]): Width and height of the output video frames. - codec (str, optional): FourCC code for the video codec. Defaults to "mp4v". + filename (str): Output video file path. + fps (int): Output frames per second. + frame_size (tuple[int, int]): Frame dimensions (width, height). + codec (str, optional): FourCC codec code. Defaults to "mp4v". backend (OpenCVBackend | None, optional): Backend instance. Defaults to None. Raises: - RuntimeError: If the video writer cannot be initialized. + RuntimeError: If the writer cannot be opened. """ self.backend = backend try: @@ -172,6 +183,7 @@ def __init__( except Exception: fourcc_int = cv2.VideoWriter_fourcc(*"mp4v") self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size) + if not self.writer.isOpened(): raise RuntimeError(f"Cannot open video writer for file: {filename}") @@ -182,13 +194,14 @@ def __exit__(self, exc_type, exc_value, traceback): self.close() def write(self, frame: np.ndarray) -> None: - """Write a frame to the video file. + """ + Write a frame to the output. Args: - frame (np.ndarray): The frame to write, in BGR format. + frame (np.ndarray): Frame in BGR format. """ self.writer.write(frame) def close(self) -> None: - """Release the video writer resources.""" - self.writer.release() + """Release writer resources.""" + self.writer.release() \ No newline at end of file diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index 629095d7f0..e3d80373ed 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -8,8 +8,11 @@ class pyAVBackend(BaseBackend): """ - PyAV implementation of the Backend interface. - Handles video capture, frame reading, seeking, and writing operations using PyAV. + PyAV-based implementation of the `BaseBackend` interface. + + This backend handles video capture, frame reading, seeking, and writing + operations using the PyAV library. Supports local video files, webcams, + and RTSP streams. """ def __init__(self): @@ -20,19 +23,20 @@ def __init__(self): self.frame_generator = None self.video_info = None self.current_frame_idx = 0 - + def open(self, path: str) -> None: - """Open and initialize a video source. + """ + Open and initialize a video source. - Opens a video file, RTSP stream, or webcam and initializes all necessary - components for video processing. + This method opens a video file, RTSP stream, or webcam, and sets up + the necessary components for decoding and reading frames. Args: - path (str): Path to video file, RTSP URL, or camera index. + path (str | int): Path to the video file, RTSP URL, or webcam index. Raises: - RuntimeError: If unable to open the video source. - ValueError: If the source type is not supported. + RuntimeError: If the video source cannot be opened. + ValueError: If the source type is unsupported. """ try: self.container = av.open(path) @@ -45,7 +49,7 @@ def open(self, path: str) -> None: self.video_info = self._set_video_info() self.current_frame_idx = 0 - # If audio exists + # If audio exists if len(self.container.streams.audio) > 0: self.audio_stream = self.container.streams.audio[0] @@ -62,11 +66,21 @@ def open(self, path: str) -> None: except Exception as e: raise RuntimeError(f"Cannot open video source: {path}") from e - + def isOpened(self) -> bool: + """Check if the video source has been successfully opened.""" return self.container is not None and self.stream is not None def _set_video_info(self) -> VideoInfo: + """ + Extract video information from the opened source. + + Returns: + VideoInfo: Object containing width, height, fps, and frame count. + + Raises: + RuntimeError: If the video source is not opened. + """ if not self.isOpened(): raise RuntimeError("Video not opened yet.") @@ -74,7 +88,7 @@ def _set_video_info(self) -> VideoInfo: height = self.stream.height fps = float(self.stream.average_rate or self.stream.guessed_rate) if fps <= 0: - fps = 30 + fps = 30 total_frames = self.stream.frames if total_frames == 0: @@ -83,17 +97,27 @@ def _set_video_info(self) -> VideoInfo: return VideoInfo(width, height, round(fps), total_frames) def info(self) -> VideoInfo: + """ + Retrieve video information. + + Returns: + VideoInfo: Video properties for the opened source. + + Raises: + RuntimeError: If the video source is not opened. + """ if not self.isOpened(): raise RuntimeError("Video not opened yet.") return self.video_info def read(self) -> tuple[bool, np.ndarray]: - """Read the next frame from the video stream. + """ + Read and decode the next frame from the video source. Returns: - tuple[bool, np.ndarray]: A tuple containing: - - bool: True if frame was successfully read - - np.ndarray: The video frame in BGR format (H, W, 3) + tuple[bool, np.ndarray]: + - `bool`: True if a frame was read successfully, False if end of stream. + - `np.ndarray`: Frame data in BGR format (H, W, 3). Empty array if unsuccessful. Raises: RuntimeError: If the video source is not opened. @@ -110,13 +134,13 @@ def read(self) -> tuple[bool, np.ndarray]: return False, np.array([]) def grab(self) -> bool: - """Grab the next frame packet without decoding. + """ + Grab the next frame packet without decoding it. - A lightweight operation that skips frame decoding, useful for - quick frame navigation. Returns success status of the grab operation. + Useful for skipping frames quickly without the overhead of decoding. Returns: - bool: True if a frame was successfully grabbed, False otherwise. + bool: True if a frame packet was grabbed successfully, False otherwise. Raises: RuntimeError: If the video source is not opened. @@ -133,14 +157,14 @@ def grab(self) -> bool: return False def seek(self, frame_idx: int) -> None: - """Seek to a specific frame in the video. + """ + Seek to a specific frame index in the video. - Performs frame-accurate seeking by navigating to the nearest keyframe and - decoding forward to the exact target frame. The next read() call will - return the target frame. + This uses keyframe-based seeking, then decodes forward to the exact + requested frame. Args: - frame_idx (int): Target frame index (0-based) to seek to. + frame_idx (int): Zero-based index of the target frame. Raises: RuntimeError: If the video source is not opened. @@ -180,10 +204,8 @@ def _prepend_frame(first_frame, gen): break def release(self) -> None: - """Release all resources associated with the video stream. - - Closes the video container and resets all internal state variables - to ensure proper cleanup of resources. + """ + Release the video source and free all associated resources. """ if self.container: self.container.close() @@ -191,7 +213,14 @@ def release(self) -> None: self.stream = None self.frame_generator = None + class pyAVWriter(BaseWriter): + """ + PyAV-based video writer. + + Writes frames to a video file with optional audio from a backend source. + """ + def __init__( self, filename: str, @@ -199,7 +228,20 @@ def __init__( frame_size: tuple[int, int], codec: str = "h264", backend: pyAVBackend | None = None, - ): + ): + """ + Initialize a video writer. + + Args: + filename (str): Output video file path. + fps (int): Frames per second for the output video. + frame_size (tuple[int, int]): Frame dimensions as (width, height). + codec (str, optional): Video codec (default: "h264"). + backend (pyAVBackend, optional): Backend providing audio stream. + + Raises: + RuntimeError: If the output file cannot be created. + """ try: self.container = av.open(filename, mode="w") self.backend = backend @@ -219,9 +261,9 @@ def __init__( self.audio_stream_out = None self.audio_packets = [] - if backend.audio_stream and backend.audio_src_container: + if backend and backend.audio_stream and backend.audio_src_container: audio_codec_name = backend.audio_stream.codec_context.name - audio_rate = backend.audio_stream.codec_context.rate + audio_rate = backend.audio_stream.codec_context.rate self.audio_stream_out = self.container.add_stream(audio_codec_name, rate=audio_rate) for packet in backend.audio_src_container.demux(backend.audio_stream): if packet.dts is not None: @@ -229,7 +271,7 @@ def __init__( except Exception as e: raise RuntimeError(f"Cannot open video writer for file: {filename}") from e - + def __enter__(self): return self @@ -237,6 +279,12 @@ def __exit__(self, exc_type, exc_value, traceback): self.close() def write(self, frame: np.ndarray) -> None: + """ + Write a single frame to the output video. + + Args: + frame (np.ndarray): Frame in BGR format (H, W, 3). + """ frame_rgb = frame[..., ::-1] av_frame = av.VideoFrame.from_ndarray(frame_rgb, format="rgb24") @@ -249,6 +297,9 @@ def write(self, frame: np.ndarray) -> None: self.container.mux(packet) def close(self) -> None: + """ + Finalize the video file and close the writer. + """ packets = self.stream.encode() for packet in packets: self.container.mux(packet) diff --git a/supervision/video/core.py b/supervision/video/core.py index cfa57209e1..a6890f7f18 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -15,43 +15,63 @@ class Video: + """ + A high-level interface for reading, processing, and writing video files or streams. + + Attributes: + info (VideoInfo): Metadata about the video, such as resolution, FPS, and frame count. + source (str | int): Path to the video file or index of the camera device. + backend (BackendTypes): Video backend used for I/O operations. + """ + info: VideoInfo source: str | int backend: BackendTypes def __init__( - self, - source: str | int, + self, + source: str | int, backend: BackendLiteral = "opencv" ) -> None: + """ + Initialize the Video object. + + Args: + source (str | int): Path to a video file or index of a camera device. + backend (BackendLiteral, optional): Backend type for video I/O. + Defaults to "opencv". + """ self.backend = getBackend(backend) self.backend.open(source) self.info = self.backend.info() self.source = source def __iter__(self): - """Make the Video class iterable over frames. + """ + Make the Video object iterable over frames. - Returns: - Generator: A generator yielding video frames. + Yields: + np.ndarray: The next frame in the video. """ return self.backend.frames() def sink( self, target_path: str, info: VideoInfo, codec: str | None = None ) -> BaseWriter: - """Create a video writer for saving frames. + """ + Create a video writer for saving frames to a file. Args: - target_path (str): Path where the video will be saved. - info (VideoInfo): Video information containing resolution and FPS. - codec (str, optional): FourCC code for video codec. Defaults to "None". + target_path (str): Output file path for the video. + info (VideoInfo): Video information including resolution and FPS. + codec (str, optional): FourCC video codec code. + If None, the backend's default codec is used. Returns: - Writer: A video writer object for writing frames. + BaseWriter: Video writer instance for writing frames. """ return self.backend.writer( - target_path, info.fps, info.resolution_wh, codec, self.backend + target_path, info.fps, info.resolution_wh, codec, self.backend ) def frames( @@ -61,17 +81,20 @@ def frames( end: int | None = None, resolution_wh: tuple[int, int] | None = None, ): - """Generate frames from the video. + """ + Generate frames from the video with optional skipping, cropping, and resizing. Args: - stride (int, optional): Number of frames to skip. Defaults to 1. - start (int, optional): Starting frame index. Defaults to 0. - end (int | None, optional): Ending frame index. Defaults to None. + stride (int, optional): Number of frames to skip between each yield. + Defaults to 1 (no skipping). + start (int, optional): Index of the first frame to read. Defaults to 0. + end (int | None, optional): Index after the last frame to read. + If None, reads until the end of the video. resolution_wh (tuple[int, int] | None, optional): Target resolution - (width, height). If provided, frames will be resized. Defaults to None. + (width, height) for resizing frames. If None, keeps original size. - Returns: - Generator: A generator yielding video frames. + Yields: + np.ndarray: The next frame in the video. """ if self.backend.cap is None: raise RuntimeError("Video not opened yet.") @@ -114,17 +137,29 @@ def save( show_progress: bool = False, codec: str | None = None, ): - """Save processed video frames to a file. + """ + Process and save video frames to a file. Args: - target_path (str): Path where the processed video will be saved. - callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes - each frame. Takes frame and index as input, returns processed frame. - fps (int | None, optional): Output video FPS. - progress_message (str, optional): Message to show in progress bar. + target_path (str): Output file path for the processed video. + callback (Callable[[np.ndarray, int], np.ndarray]): Function applied to each frame. + Takes the frame (np.ndarray) and frame index (int) as input, + returns the processed frame (np.ndarray). + fps (int | None, optional): Frames per second of the output video. + If None, uses the original FPS. + progress_message (str, optional): Message displayed in the progress bar. Defaults to "Processing video". - show_progress (bool, optional): Whether to show progress bar. + show_progress (bool, optional): If True, displays a tqdm progress bar. Defaults to False. + codec (str | None, optional): FourCC video codec code. + If None, uses the backend's default codec. + + Raises: + RuntimeError: If the video has not been opened. + ValueError: If the video source is not a file. + + Returns: + None """ if self.backend.cap is None: raise RuntimeError("Video not opened yet.") @@ -139,7 +174,6 @@ def save( target_path, fps, self.backend.video_info.resolution_wh, codec, self.backend ) total_frames = self.backend.video_info.total_frames - print(self.backend.video_info) frames_generator = self.frames() for index, frame in enumerate( tqdm( diff --git a/supervision/video/utils.py b/supervision/video/utils.py index 629ce7b262..fa1d7854a2 100644 --- a/supervision/video/utils.py +++ b/supervision/video/utils.py @@ -7,6 +7,14 @@ class SOURCE_TYPE(Enum): + """ + Enumeration of supported video source types. + + Attributes: + VIDEO_FILE: A standard video file on disk. + WEBCAM: A webcam or other direct camera device. + RTSP: A network RTSP video stream. + """ VIDEO_FILE = "VIDEO_FILE" WEBCAM = "WEBCAM" RTSP = "RTSP" @@ -15,28 +23,25 @@ class SOURCE_TYPE(Enum): @dataclass class VideoInfo: """ - A class to store video information, including width, height, fps and - total number of frames. + Stores metadata about a video, such as dimensions, frame rate, and source type. Attributes: - width (int): width of the video in pixels - height (int): height of the video in pixels - fps (int): frames per second of the video - total_frames (Optional[int]): total number of frames in the video, - default is None - source_type (Optional[SOURCE_TYPE]): source type of the video, - default is None + width (int): Width of the video in pixels. + height (int): Height of the video in pixels. + fps (int): Frames per second of the video. + total_frames (int | None): Total number of frames, or None if unknown. + source_type (SOURCE_TYPE | None): The source type of the video (file, webcam, RTSP), or None. Examples: ```python import supervision as sv - video_info = sv.VideoInfo.from_video_path(video_path=) + video_info = sv.VideoInfo.from_video_path("video.mp4") - video_info + print(video_info) # VideoInfo(width=3840, height=2160, fps=25, total_frames=538) - video_info.resolution_wh + print(video_info.resolution_wh) # (3840, 2160) ``` """ @@ -49,16 +54,17 @@ class VideoInfo: @classmethod def from_video_path(cls, video_path: str) -> VideoInfo: - """Create VideoInfo from a video file path. + """ + Create a VideoInfo instance from a video file. Args: video_path (str): Path to the video file. Returns: - VideoInfo: Video info containing width, height, fps, and total frames. + VideoInfo: Metadata including width, height, FPS, and total frames. Raises: - ValueError: If video cannot be opened or has invalid properties. + ValueError: If the video cannot be opened or has invalid properties. """ video = cv2.VideoCapture(video_path) if not video.isOpened(): @@ -77,7 +83,7 @@ def from_video_path(cls, video_path: str) -> VideoInfo: total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) if total_frames < 0: - total_frames = None # Some video formats may not report frame count + total_frames = None # Some formats may not report frame count finally: video.release() @@ -85,9 +91,10 @@ def from_video_path(cls, video_path: str) -> VideoInfo: @property def resolution_wh(self) -> tuple[int, int]: - """Get the video resolution as (width, height). + """ + Get the video resolution as a (width, height) tuple. Returns: - Tuple[int, int]: Video dimensions as (width, height). + tuple[int, int]: The video dimensions in pixels. """ - return self.width, self.height + return self.width, self.height \ No newline at end of file From 706e85a191bb947923765834b61482fa1f7f8a1f Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 21:57:20 -0400 Subject: [PATCH 072/128] UPDATE: Negate audio stream --- supervision/video/backend/pyAV.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index e3d80373ed..11904cd815 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -52,6 +52,8 @@ def open(self, path: str) -> None: # If audio exists if len(self.container.streams.audio) > 0: self.audio_stream = self.container.streams.audio[0] + else: + self.audio_stream = None if isinstance(path, int): self.video_info.source_type = SOURCE_TYPE.WEBCAM @@ -254,6 +256,7 @@ def __init__( self.stream.pix_fmt = "yuv420p" # Set time_base explicitly for correct timing + print(fps) self.stream.codec_context.time_base = Fraction(1, fps) # Frame index for PTS @@ -310,3 +313,4 @@ def close(self) -> None: self.container.mux(packet) self.container.close() + From 8eb0774e355b7f5f8c17ff0d899825b9a1ecb7fd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 10 Aug 2025 01:57:37 +0000 Subject: [PATCH 073/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/backend/__init__.py | 10 ++++++---- supervision/video/backend/base.py | 3 ++- supervision/video/backend/openCV.py | 2 +- supervision/video/backend/pyAV.py | 28 ++++++++++++++++----------- supervision/video/core.py | 17 ++++++++-------- supervision/video/utils.py | 3 ++- 6 files changed, 36 insertions(+), 27 deletions(-) diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py index 805a050968..dea20aece2 100644 --- a/supervision/video/backend/__init__.py +++ b/supervision/video/backend/__init__.py @@ -15,20 +15,22 @@ "pyav": pyAVBackend, } + def getBackend(backend: str) -> BaseBackend: if backend.lower() in _backends: return _backends[backend.lower()]() else: raise ValueError(f"Unsupported backend: {backend}") + __all__ = [ + "BackendLiteral", + "BackendType", "BaseBackend", "BaseWriter", "OpenCVBackend", "OpenCVWriter", + "getBackend", "pyAVBackend", "pyAVWriter", - "getBackend", - "BackendLiteral", - "BackendType" -] \ No newline at end of file +] diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index aac1a83b7a..4d7e3a12fd 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -46,6 +46,7 @@ def seek(self, frame_idx: int) -> None: def release(self) -> None: pass + class BaseWriter(ABC): @abstractmethod def __init__( @@ -57,7 +58,7 @@ def __init__( backend: BaseBackend | None = None, ): pass - + @abstractmethod def __enter__(self): pass diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index fd7a026c27..894745dbdf 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -204,4 +204,4 @@ def write(self, frame: np.ndarray) -> None: def close(self) -> None: """Release writer resources.""" - self.writer.release() \ No newline at end of file + self.writer.release() diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index 11904cd815..fca0782d18 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -1,9 +1,10 @@ +from fractions import Fraction + import av import numpy as np -from fractions import Fraction from supervision.video.backend import BaseBackend, BaseWriter -from supervision.video.utils import VideoInfo, SOURCE_TYPE +from supervision.video.utils import SOURCE_TYPE, VideoInfo class pyAVBackend(BaseBackend): @@ -22,7 +23,7 @@ def __init__(self): self.writer = pyAVWriter self.frame_generator = None self.video_info = None - self.current_frame_idx = 0 + self.current_frame_idx = 0 def open(self, path: str) -> None: """ @@ -117,8 +118,8 @@ def read(self) -> tuple[bool, np.ndarray]: Read and decode the next frame from the video source. Returns: - tuple[bool, np.ndarray]: - - `bool`: True if a frame was read successfully, False if end of stream. + tuple[bool, np.ndarray]: + - `bool`: True if a frame was read successfully, False if end of stream. - `np.ndarray`: Frame data in BGR format (H, W, 3). Empty array if unsuccessful. Raises: @@ -152,7 +153,7 @@ def grab(self) -> bool: try: for packet in self.container.demux(video=0): - if packet.stream.type == 'video': + if packet.stream.type == "video": return True return False except (StopIteration, av.error.EOFError): @@ -181,7 +182,9 @@ def seek(self, frame_idx: int) -> None: time_base = float(self.stream.time_base) timestamp = int((frame_idx / framerate) / time_base) - self.container.seek(timestamp, stream=self.stream, any_frame=False, backward=True) + self.container.seek( + timestamp, stream=self.stream, any_frame=False, backward=True + ) self.frame_generator = self.container.decode(video=0) self.current_frame_idx = 0 @@ -199,9 +202,11 @@ def seek(self, frame_idx: int) -> None: self.current_frame_idx += 1 if self.current_frame_idx >= frame_idx: + def _prepend_frame(first_frame, gen): yield first_frame yield from gen + self.frame_generator = _prepend_frame(frame, self.frame_generator) break @@ -247,7 +252,7 @@ def __init__( try: self.container = av.open(filename, mode="w") self.backend = backend - + if codec is None: codec = "h264" self.stream = self.container.add_stream(codec, rate=fps) @@ -261,13 +266,15 @@ def __init__( # Frame index for PTS self.frame_idx = 0 - + self.audio_stream_out = None self.audio_packets = [] if backend and backend.audio_stream and backend.audio_src_container: audio_codec_name = backend.audio_stream.codec_context.name audio_rate = backend.audio_stream.codec_context.rate - self.audio_stream_out = self.container.add_stream(audio_codec_name, rate=audio_rate) + self.audio_stream_out = self.container.add_stream( + audio_codec_name, rate=audio_rate + ) for packet in backend.audio_src_container.demux(backend.audio_stream): if packet.dts is not None: self.audio_packets.append(packet) @@ -313,4 +320,3 @@ def close(self) -> None: self.container.mux(packet) self.container.close() - diff --git a/supervision/video/core.py b/supervision/video/core.py index a6890f7f18..3b03111ea9 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -1,17 +1,18 @@ from __future__ import annotations from collections.abc import Callable -import numpy as np + import cv2 +import numpy as np from tqdm.auto import tqdm from supervision.video.backend import ( - BackendTypes, BackendLiteral, + BackendTypes, BaseWriter, getBackend, ) -from supervision.video.utils import VideoInfo, SOURCE_TYPE +from supervision.video.utils import SOURCE_TYPE, VideoInfo class Video: @@ -28,11 +29,7 @@ class Video: source: str | int backend: BackendTypes - def __init__( - self, - source: str | int, - backend: BackendLiteral = "opencv" - ) -> None: + def __init__(self, source: str | int, backend: BackendLiteral = "opencv") -> None: """ Initialize the Video object. @@ -99,7 +96,9 @@ def frames( if self.backend.cap is None: raise RuntimeError("Video not opened yet.") - total_frames = self.backend.video_info.total_frames if self.backend.video_info else 0 + total_frames = ( + self.backend.video_info.total_frames if self.backend.video_info else 0 + ) is_live_stream = total_frames <= 0 if is_live_stream: diff --git a/supervision/video/utils.py b/supervision/video/utils.py index fa1d7854a2..c2d7bb9702 100644 --- a/supervision/video/utils.py +++ b/supervision/video/utils.py @@ -15,6 +15,7 @@ class SOURCE_TYPE(Enum): WEBCAM: A webcam or other direct camera device. RTSP: A network RTSP video stream. """ + VIDEO_FILE = "VIDEO_FILE" WEBCAM = "WEBCAM" RTSP = "RTSP" @@ -97,4 +98,4 @@ def resolution_wh(self) -> tuple[int, int]: Returns: tuple[int, int]: The video dimensions in pixels. """ - return self.width, self.height \ No newline at end of file + return self.width, self.height From 9ea0f2bd305392942909bceec38103dfac156ae7 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 22:21:05 -0400 Subject: [PATCH 074/128] FIX: Fixed av import error and formatting --- supervision/video/backend/openCV.py | 2 +- supervision/video/backend/pyAV.py | 15 +++++++++++---- supervision/video/core.py | 9 +++++---- supervision/video/utils.py | 5 ++++- 4 files changed, 21 insertions(+), 10 deletions(-) diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index 894745dbdf..b0a394ffbf 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -171,7 +171,7 @@ def __init__( fps (int): Output frames per second. frame_size (tuple[int, int]): Frame dimensions (width, height). codec (str, optional): FourCC codec code. Defaults to "mp4v". - backend (OpenCVBackend | None, optional): Backend instance. Defaults to None. + backend (OpenCVBackend | None, optional): Backend instance. Defaults to None Raises: RuntimeError: If the writer cannot be opened. diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index fca0782d18..ed75864f0b 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -1,6 +1,9 @@ from fractions import Fraction -import av +try: + import av +except ImportError: + av = None import numpy as np from supervision.video.backend import BaseBackend, BaseWriter @@ -18,6 +21,10 @@ class pyAVBackend(BaseBackend): def __init__(self): super().__init__() + + if av is None: + raise RuntimeError("PyAV (`av` module) is not installed. Please install it to use this feature.") + self.container = None self.stream = None self.writer = pyAVWriter @@ -120,7 +127,7 @@ def read(self) -> tuple[bool, np.ndarray]: Returns: tuple[bool, np.ndarray]: - `bool`: True if a frame was read successfully, False if end of stream. - - `np.ndarray`: Frame data in BGR format (H, W, 3). Empty array if unsuccessful. + - `np.ndarray`: Frame data in BGR format (H, W, 3). Raises: RuntimeError: If the video source is not opened. @@ -195,9 +202,9 @@ def seek(self, frame_idx: int) -> None: break if getattr(frame, "time", None) is not None: - self.current_frame_idx = int(round(frame.time * framerate)) + self.current_frame_idx = (round(frame.time * framerate)) elif getattr(frame, "pts", None) is not None: - self.current_frame_idx = int(round((frame.pts * time_base) * framerate)) + self.current_frame_idx = (round((frame.pts * time_base) * framerate)) else: self.current_frame_idx += 1 diff --git a/supervision/video/core.py b/supervision/video/core.py index 3b03111ea9..e0a72b5e02 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -20,7 +20,7 @@ class Video: A high-level interface for reading, processing, and writing video files or streams. Attributes: - info (VideoInfo): Metadata about the video, such as resolution, FPS, and frame count. + info (VideoInfo): Metadata about the video. source (str | int): Path to the video file or index of the camera device. backend (BackendTypes): Video backend used for I/O operations. """ @@ -141,9 +141,10 @@ def save( Args: target_path (str): Output file path for the processed video. - callback (Callable[[np.ndarray, int], np.ndarray]): Function applied to each frame. - Takes the frame (np.ndarray) and frame index (int) as input, - returns the processed frame (np.ndarray). + callback (Callable[[np.ndarray, int], np.ndarray]): A function that takes in + a numpy ndarray representation of a video frame and an + int index of the frame and returns a processed numpy ndarray + representation of the frame. fps (int | None, optional): Frames per second of the output video. If None, uses the original FPS. progress_message (str, optional): Message displayed in the progress bar. diff --git a/supervision/video/utils.py b/supervision/video/utils.py index c2d7bb9702..ccc7643c5b 100644 --- a/supervision/video/utils.py +++ b/supervision/video/utils.py @@ -31,7 +31,10 @@ class VideoInfo: height (int): Height of the video in pixels. fps (int): Frames per second of the video. total_frames (int | None): Total number of frames, or None if unknown. - source_type (SOURCE_TYPE | None): The source type of the video (file, webcam, RTSP), or None. + source_type (SOURCE_TYPE | None): Source type: VIDEO_FILE, WEBCAM, RTSP, or None. + + Methods: + from_video_path(video file, webcam, RTSP, or None). Examples: ```python From 40c2a7c5085447380cf691db7f9bb599efda59b2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 10 Aug 2025 02:21:24 +0000 Subject: [PATCH 075/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/backend/pyAV.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index ed75864f0b..9ad0b1b694 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -23,8 +23,10 @@ def __init__(self): super().__init__() if av is None: - raise RuntimeError("PyAV (`av` module) is not installed. Please install it to use this feature.") - + raise RuntimeError( + "PyAV (`av` module) is not installed. Please install it to use this feature." + ) + self.container = None self.stream = None self.writer = pyAVWriter @@ -202,9 +204,9 @@ def seek(self, frame_idx: int) -> None: break if getattr(frame, "time", None) is not None: - self.current_frame_idx = (round(frame.time * framerate)) + self.current_frame_idx = round(frame.time * framerate) elif getattr(frame, "pts", None) is not None: - self.current_frame_idx = (round((frame.pts * time_base) * framerate)) + self.current_frame_idx = round((frame.pts * time_base) * framerate) else: self.current_frame_idx += 1 From 10482f4c413539df3ddb2ac5fe41157131e02be5 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sat, 9 Aug 2025 22:23:49 -0400 Subject: [PATCH 076/128] FIX: Fixed av error message and formatting --- supervision/video/backend/pyAV.py | 3 ++- supervision/video/utils.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index 9ad0b1b694..96857c387f 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -1,3 +1,4 @@ +from __future__ import annotations from fractions import Fraction try: @@ -24,7 +25,7 @@ def __init__(self): if av is None: raise RuntimeError( - "PyAV (`av` module) is not installed. Please install it to use this feature." + "PyAV (`av` module) is not installed. Run `pip install av`." ) self.container = None diff --git a/supervision/video/utils.py b/supervision/video/utils.py index ccc7643c5b..7e0ceed245 100644 --- a/supervision/video/utils.py +++ b/supervision/video/utils.py @@ -31,7 +31,7 @@ class VideoInfo: height (int): Height of the video in pixels. fps (int): Frames per second of the video. total_frames (int | None): Total number of frames, or None if unknown. - source_type (SOURCE_TYPE | None): Source type: VIDEO_FILE, WEBCAM, RTSP, or None. + source_type (SOURCE_TYPE | None): Source type: VIDEO_FILE, WEBCAM, RTSP. Methods: from_video_path(video file, webcam, RTSP, or None). From 337f65e457c0af84c005c25d4d07a9046ac963f4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 10 Aug 2025 02:24:04 +0000 Subject: [PATCH 077/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/backend/pyAV.py | 1 + 1 file changed, 1 insertion(+) diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index 96857c387f..c3c9fc5e4c 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -1,4 +1,5 @@ from __future__ import annotations + from fractions import Fraction try: From 1e404206b54d737d5cb4ce496b7ab36fe294ef43 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sun, 10 Aug 2025 00:23:21 -0400 Subject: [PATCH 078/128] FIX: Fixed issue with audio sync with fps --- supervision/video/backend/pyAV.py | 35 ++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index c3c9fc5e4c..01cdd2aea5 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -1,5 +1,4 @@ from __future__ import annotations - from fractions import Fraction try: @@ -286,6 +285,7 @@ def __init__( self.audio_stream_out = self.container.add_stream( audio_codec_name, rate=audio_rate ) + for packet in backend.audio_src_container.demux(backend.audio_stream): if packet.dts is not None: self.audio_packets.append(packet) @@ -320,14 +320,43 @@ def write(self, frame: np.ndarray) -> None: def close(self) -> None: """ Finalize the video file and close the writer. + Automatically calculate speed factor based on original audio length + and output video length, then speed up audio accordingly. """ + # Flush video encoder packets = self.stream.encode() for packet in packets: self.container.mux(packet) - if self.audio_stream_out: + speed_factor = 1.0 + try: + if self.backend and self.backend.audio_stream and self.backend.audio_stream.duration: + orig_audio_duration = float(self.backend.audio_stream.duration * self.backend.audio_stream.time_base) + elif self.backend and self.backend.audio_src_container and self.backend.audio_src_container.duration: + orig_audio_duration = self.backend.audio_src_container.duration / 1_000_000 # us to s + else: + orig_audio_duration = None + + fps = float(1 / self.stream.codec_context.time_base) + new_video_duration = self.frame_idx / fps + + if orig_audio_duration and new_video_duration > 0: + speed_factor = orig_audio_duration / new_video_duration + except Exception: + speed_factor = 1.0 + + if self.audio_stream_out and speed_factor != 1.0: + for packet in self.audio_packets: + if packet.pts is not None: + packet.pts = int(packet.pts / speed_factor) + if packet.dts is not None: + packet.dts = int(packet.dts / speed_factor) + packet.stream = self.audio_stream_out + packet.time_base = self.audio_stream_out.time_base + self.container.mux(packet) + elif self.audio_stream_out: for packet in self.audio_packets: packet.stream = self.audio_stream_out self.container.mux(packet) - self.container.close() + self.container.close() \ No newline at end of file From 62b1fc61577da209b8c96898e9247ca29cf9275f Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sun, 10 Aug 2025 00:42:41 -0400 Subject: [PATCH 079/128] UPDATE: Finer audio mux --- supervision/video/backend/pyAV.py | 70 +++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 23 deletions(-) diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index 01cdd2aea5..3885d6dfc7 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -236,6 +236,7 @@ class pyAVWriter(BaseWriter): PyAV-based video writer. Writes frames to a video file with optional audio from a backend source. + Uses finer timestamp granularity (milliseconds) for smoother video playback. """ def __init__( @@ -244,16 +245,16 @@ def __init__( fps: int, frame_size: tuple[int, int], codec: str = "h264", - backend: pyAVBackend | None = None, + backend: "pyAVBackend" | None = None, ): """ - Initialize a video writer. + Initialize the video writer. Args: - filename (str): Output video file path. + filename (str): Path to the output video file. fps (int): Frames per second for the output video. - frame_size (tuple[int, int]): Frame dimensions as (width, height). - codec (str, optional): Video codec (default: "h264"). + frame_size (tuple[int, int]): Width and height of the video frames. + codec (str, optional): Video codec name (default "h264"). backend (pyAVBackend, optional): Backend providing audio stream. Raises: @@ -270,12 +271,11 @@ def __init__( self.stream.height = frame_size[1] self.stream.pix_fmt = "yuv420p" - # Set time_base explicitly for correct timing - print(fps) - self.stream.codec_context.time_base = Fraction(1, fps) + # Use finer time_base (1/1000) for millisecond precision timestamps + self.stream.codec_context.time_base = Fraction(1, 1000) - # Frame index for PTS self.frame_idx = 0 + self.fps = fps # Store FPS for timestamp calculations self.audio_stream_out = None self.audio_packets = [] @@ -285,7 +285,8 @@ def __init__( self.audio_stream_out = self.container.add_stream( audio_codec_name, rate=audio_rate ) - + + # Buffer all audio packets from backend for muxing later for packet in backend.audio_src_container.demux(backend.audio_stream): if packet.dts is not None: self.audio_packets.append(packet) @@ -294,22 +295,27 @@ def __init__( raise RuntimeError(f"Cannot open video writer for file: {filename}") from e def __enter__(self): + """Enable use as a context manager.""" return self def __exit__(self, exc_type, exc_value, traceback): + """Close the writer on context exit.""" self.close() def write(self, frame: np.ndarray) -> None: """ - Write a single frame to the output video. + Write a single video frame. Args: - frame (np.ndarray): Frame in BGR format (H, W, 3). + frame (np.ndarray): Frame data in BGR format (H, W, 3). """ - frame_rgb = frame[..., ::-1] + # Calculate PTS as milliseconds: frame_index * (1000 ms / fps) + pts = int(self.frame_idx * (1000 / self.fps)) + + frame_rgb = frame[..., ::-1] # Convert BGR to RGB av_frame = av.VideoFrame.from_ndarray(frame_rgb, format="rgb24") - av_frame.pts = self.frame_idx + av_frame.pts = pts av_frame.time_base = self.stream.codec_context.time_base self.frame_idx += 1 @@ -319,26 +325,38 @@ def write(self, frame: np.ndarray) -> None: def close(self) -> None: """ - Finalize the video file and close the writer. - Automatically calculate speed factor based on original audio length - and output video length, then speed up audio accordingly. + Finalize the video file, mux audio with adjusted timestamps to sync with video, + and close the container. """ - # Flush video encoder + def rescale_timestamp(value, src_tb, dst_tb): + """ + Rescale timestamp value from source timebase to destination timebase. + + Args: + value (int): Timestamp value (PTS or DTS). + src_tb (Fraction): Source time base. + dst_tb (Fraction): Destination time base. + + Returns: + int: Rescaled timestamp. + """ + return int(value * src_tb / dst_tb) + packets = self.stream.encode() for packet in packets: self.container.mux(packet) speed_factor = 1.0 + try: if self.backend and self.backend.audio_stream and self.backend.audio_stream.duration: orig_audio_duration = float(self.backend.audio_stream.duration * self.backend.audio_stream.time_base) elif self.backend and self.backend.audio_src_container and self.backend.audio_src_container.duration: - orig_audio_duration = self.backend.audio_src_container.duration / 1_000_000 # us to s + orig_audio_duration = self.backend.audio_src_container.duration / 1000 else: orig_audio_duration = None - fps = float(1 / self.stream.codec_context.time_base) - new_video_duration = self.frame_idx / fps + new_video_duration = (self.frame_idx * (1 / self.fps)) if orig_audio_duration and new_video_duration > 0: speed_factor = orig_audio_duration / new_video_duration @@ -348,15 +366,21 @@ def close(self) -> None: if self.audio_stream_out and speed_factor != 1.0: for packet in self.audio_packets: if packet.pts is not None: + packet.pts = rescale_timestamp(packet.pts, packet.time_base, self.audio_stream_out.time_base) packet.pts = int(packet.pts / speed_factor) if packet.dts is not None: + packet.dts = rescale_timestamp(packet.dts, packet.time_base, self.audio_stream_out.time_base) packet.dts = int(packet.dts / speed_factor) packet.stream = self.audio_stream_out - packet.time_base = self.audio_stream_out.time_base self.container.mux(packet) elif self.audio_stream_out: for packet in self.audio_packets: + if packet.pts is not None: + packet.pts = rescale_timestamp(packet.pts, packet.time_base, self.audio_stream_out.time_base) + if packet.dts is not None: + packet.dts = rescale_timestamp(packet.dts, packet.time_base, self.audio_stream_out.time_base) packet.stream = self.audio_stream_out self.container.mux(packet) - self.container.close() \ No newline at end of file + self.container.close() + From e3e21abcba524c82b11b3f7cc5f007619aeef476 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 10 Aug 2025 04:43:05 +0000 Subject: [PATCH 080/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/backend/pyAV.py | 40 +++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index 3885d6dfc7..e6a5c74e8c 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -1,4 +1,5 @@ from __future__ import annotations + from fractions import Fraction try: @@ -245,7 +246,7 @@ def __init__( fps: int, frame_size: tuple[int, int], codec: str = "h264", - backend: "pyAVBackend" | None = None, + backend: pyAVBackend | None = None, ): """ Initialize the video writer. @@ -328,6 +329,7 @@ def close(self) -> None: Finalize the video file, mux audio with adjusted timestamps to sync with video, and close the container. """ + def rescale_timestamp(value, src_tb, dst_tb): """ Rescale timestamp value from source timebase to destination timebase. @@ -349,14 +351,25 @@ def rescale_timestamp(value, src_tb, dst_tb): speed_factor = 1.0 try: - if self.backend and self.backend.audio_stream and self.backend.audio_stream.duration: - orig_audio_duration = float(self.backend.audio_stream.duration * self.backend.audio_stream.time_base) - elif self.backend and self.backend.audio_src_container and self.backend.audio_src_container.duration: + if ( + self.backend + and self.backend.audio_stream + and self.backend.audio_stream.duration + ): + orig_audio_duration = float( + self.backend.audio_stream.duration + * self.backend.audio_stream.time_base + ) + elif ( + self.backend + and self.backend.audio_src_container + and self.backend.audio_src_container.duration + ): orig_audio_duration = self.backend.audio_src_container.duration / 1000 else: orig_audio_duration = None - new_video_duration = (self.frame_idx * (1 / self.fps)) + new_video_duration = self.frame_idx * (1 / self.fps) if orig_audio_duration and new_video_duration > 0: speed_factor = orig_audio_duration / new_video_duration @@ -366,21 +379,28 @@ def rescale_timestamp(value, src_tb, dst_tb): if self.audio_stream_out and speed_factor != 1.0: for packet in self.audio_packets: if packet.pts is not None: - packet.pts = rescale_timestamp(packet.pts, packet.time_base, self.audio_stream_out.time_base) + packet.pts = rescale_timestamp( + packet.pts, packet.time_base, self.audio_stream_out.time_base + ) packet.pts = int(packet.pts / speed_factor) if packet.dts is not None: - packet.dts = rescale_timestamp(packet.dts, packet.time_base, self.audio_stream_out.time_base) + packet.dts = rescale_timestamp( + packet.dts, packet.time_base, self.audio_stream_out.time_base + ) packet.dts = int(packet.dts / speed_factor) packet.stream = self.audio_stream_out self.container.mux(packet) elif self.audio_stream_out: for packet in self.audio_packets: if packet.pts is not None: - packet.pts = rescale_timestamp(packet.pts, packet.time_base, self.audio_stream_out.time_base) + packet.pts = rescale_timestamp( + packet.pts, packet.time_base, self.audio_stream_out.time_base + ) if packet.dts is not None: - packet.dts = rescale_timestamp(packet.dts, packet.time_base, self.audio_stream_out.time_base) + packet.dts = rescale_timestamp( + packet.dts, packet.time_base, self.audio_stream_out.time_base + ) packet.stream = self.audio_stream_out self.container.mux(packet) self.container.close() - From 055c4fce0c76093831ab5f1147bcd493f858dba9 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sun, 10 Aug 2025 00:49:32 -0400 Subject: [PATCH 081/128] UPDATE: Param config to render audio --- supervision/video/backend/__init__.py | 2 +- supervision/video/backend/base.py | 3 ++- supervision/video/backend/openCV.py | 8 ++++++-- supervision/video/backend/pyAV.py | 6 +++--- supervision/video/core.py | 4 ++-- 5 files changed, 14 insertions(+), 9 deletions(-) diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py index dea20aece2..97065c8217 100644 --- a/supervision/video/backend/__init__.py +++ b/supervision/video/backend/__init__.py @@ -18,7 +18,7 @@ def getBackend(backend: str) -> BaseBackend: if backend.lower() in _backends: - return _backends[backend.lower()]() + return _backends[backend.lower()] else: raise ValueError(f"Unsupported backend: {backend}") diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 4d7e3a12fd..a1247abf6f 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -8,11 +8,12 @@ class BaseBackend(ABC): - def __init__(self): + def __init__(self, render_audio=False): self.cap = None self.video_info = None self.writer = None self.path = None + self.render_audio = render_audio @abstractmethod def open(self, path: str) -> None: diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index b0a394ffbf..b80545e43e 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -15,13 +15,17 @@ class OpenCVBackend(BaseBackend): grabbing, and retrieving metadata using OpenCV. """ - def __init__(self): + def __init__(self, render_audio=False): """Initialize with no active capture, writer, or path.""" - super().__init__() + if render_audio: + raise ValueError("OpenCV backend does not support audio. " \ + "Please use `pyAV` backend instead or set `render_audio=False`") + self.cap = None self.video_info = None self.writer = OpenCVWriter self.path = None + self.render_audio = render_audio def open(self, path: str | int) -> None: """ diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index 3885d6dfc7..dd6b24dbc3 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -20,7 +20,7 @@ class pyAVBackend(BaseBackend): and RTSP streams. """ - def __init__(self): + def __init__(self, render_audio=False): super().__init__() if av is None: @@ -34,6 +34,7 @@ def __init__(self): self.frame_generator = None self.video_info = None self.current_frame_idx = 0 + self.render_audio = render_audio def open(self, path: str) -> None: """ @@ -61,7 +62,7 @@ def open(self, path: str) -> None: self.current_frame_idx = 0 # If audio exists - if len(self.container.streams.audio) > 0: + if self.render_audio and len(self.container.streams.audio) > 0: self.audio_stream = self.container.streams.audio[0] else: self.audio_stream = None @@ -230,7 +231,6 @@ def release(self) -> None: self.stream = None self.frame_generator = None - class pyAVWriter(BaseWriter): """ PyAV-based video writer. diff --git a/supervision/video/core.py b/supervision/video/core.py index e0a72b5e02..da4f4bd820 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -29,7 +29,7 @@ class Video: source: str | int backend: BackendTypes - def __init__(self, source: str | int, backend: BackendLiteral = "opencv") -> None: + def __init__(self, source: str | int, backend: BackendLiteral = "opencv", render_audio=False) -> None: """ Initialize the Video object. @@ -38,7 +38,7 @@ def __init__(self, source: str | int, backend: BackendLiteral = "opencv") -> Non backend (BackendLiteral, optional): Backend type for video I/O. Defaults to "opencv". """ - self.backend = getBackend(backend) + self.backend = getBackend(backend)(render_audio=render_audio) self.backend.open(source) self.info = self.backend.info() self.source = source From 7450573d6ee7b832c65dde479022f174e124a762 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sun, 10 Aug 2025 01:06:33 -0400 Subject: [PATCH 082/128] UPDATE: Changed config to render audio to be isolated --- supervision/video/backend/base.py | 4 ++-- supervision/video/backend/openCV.py | 12 ++++++------ supervision/video/backend/pyAV.py | 9 +++++---- supervision/video/core.py | 11 ++++++----- 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index a1247abf6f..806230ca63 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -8,12 +8,11 @@ class BaseBackend(ABC): - def __init__(self, render_audio=False): + def __init__(self): self.cap = None self.video_info = None self.writer = None self.path = None - self.render_audio = render_audio @abstractmethod def open(self, path: str) -> None: @@ -57,6 +56,7 @@ def __init__( frame_size: tuple[int, int], codec: str | None = None, backend: BaseBackend | None = None, + render_audio: bool = False, ): pass diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index b80545e43e..fd9ee5515f 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -15,17 +15,12 @@ class OpenCVBackend(BaseBackend): grabbing, and retrieving metadata using OpenCV. """ - def __init__(self, render_audio=False): + def __init__(self): """Initialize with no active capture, writer, or path.""" - if render_audio: - raise ValueError("OpenCV backend does not support audio. " \ - "Please use `pyAV` backend instead or set `render_audio=False`") - self.cap = None self.video_info = None self.writer = OpenCVWriter self.path = None - self.render_audio = render_audio def open(self, path: str | int) -> None: """ @@ -166,6 +161,7 @@ def __init__( frame_size: tuple[int, int], codec: str = "mp4v", backend: OpenCVBackend | None = None, + render_audio: bool = False, ): """ Initialize the writer. @@ -180,6 +176,10 @@ def __init__( Raises: RuntimeError: If the writer cannot be opened. """ + if render_audio: + raise ValueError("OpenCV backend does not support audio. " \ + "Please use `pyav` backend instead or set `render_audio=False`") + self.backend = backend try: fourcc_int = cv2.VideoWriter_fourcc(*codec) diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index 1a0b686440..36fff00f45 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -21,7 +21,7 @@ class pyAVBackend(BaseBackend): and RTSP streams. """ - def __init__(self, render_audio=False): + def __init__(self): super().__init__() if av is None: @@ -35,7 +35,6 @@ def __init__(self, render_audio=False): self.frame_generator = None self.video_info = None self.current_frame_idx = 0 - self.render_audio = render_audio def open(self, path: str) -> None: """ @@ -63,7 +62,7 @@ def open(self, path: str) -> None: self.current_frame_idx = 0 # If audio exists - if self.render_audio and len(self.container.streams.audio) > 0: + if len(self.container.streams.audio) > 0: self.audio_stream = self.container.streams.audio[0] else: self.audio_stream = None @@ -247,6 +246,7 @@ def __init__( frame_size: tuple[int, int], codec: str = "h264", backend: pyAVBackend | None = None, + render_audio: bool = False, ): """ Initialize the video writer. @@ -280,7 +280,8 @@ def __init__( self.audio_stream_out = None self.audio_packets = [] - if backend and backend.audio_stream and backend.audio_src_container: + + if render_audio and backend and backend.audio_stream and backend.audio_src_container: audio_codec_name = backend.audio_stream.codec_context.name audio_rate = backend.audio_stream.codec_context.rate self.audio_stream_out = self.container.add_stream( diff --git a/supervision/video/core.py b/supervision/video/core.py index da4f4bd820..970a4e7a25 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -29,7 +29,7 @@ class Video: source: str | int backend: BackendTypes - def __init__(self, source: str | int, backend: BackendLiteral = "opencv", render_audio=False) -> None: + def __init__(self, source: str | int, backend: BackendLiteral = "opencv") -> None: """ Initialize the Video object. @@ -38,7 +38,7 @@ def __init__(self, source: str | int, backend: BackendLiteral = "opencv", render backend (BackendLiteral, optional): Backend type for video I/O. Defaults to "opencv". """ - self.backend = getBackend(backend)(render_audio=render_audio) + self.backend = getBackend(backend)() self.backend.open(source) self.info = self.backend.info() self.source = source @@ -53,7 +53,7 @@ def __iter__(self): return self.backend.frames() def sink( - self, target_path: str, info: VideoInfo, codec: str | None = None + self, target_path: str, info: VideoInfo, codec: str | None = None, render_audio: bool = False ) -> BaseWriter: """ Create a video writer for saving frames to a file. @@ -68,7 +68,7 @@ def sink( BaseWriter: Video writer instance for writing frames. """ return self.backend.writer( - target_path, info.fps, info.resolution_wh, codec, self.backend + target_path, info.fps, info.resolution_wh, codec, self.backend, render_audio ) def frames( @@ -135,6 +135,7 @@ def save( progress_message: str = "Processing video", show_progress: bool = False, codec: str | None = None, + render_audio: bool = False, ): """ Process and save video frames to a file. @@ -171,7 +172,7 @@ def save( fps = self.backend.video_info.fps writer = self.backend.writer( - target_path, fps, self.backend.video_info.resolution_wh, codec, self.backend + target_path, fps, self.backend.video_info.resolution_wh, codec, self.backend, render_audio ) total_frames = self.backend.video_info.total_frames frames_generator = self.frames() From 92238044185a920417f2feba4925a4e5985506c2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 10 Aug 2025 05:06:52 +0000 Subject: [PATCH 083/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/backend/openCV.py | 8 +++++--- supervision/video/backend/pyAV.py | 10 ++++++++-- supervision/video/core.py | 13 +++++++++++-- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index fd9ee5515f..1398b19ea6 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -177,9 +177,11 @@ def __init__( RuntimeError: If the writer cannot be opened. """ if render_audio: - raise ValueError("OpenCV backend does not support audio. " \ - "Please use `pyav` backend instead or set `render_audio=False`") - + raise ValueError( + "OpenCV backend does not support audio. " + "Please use `pyav` backend instead or set `render_audio=False`" + ) + self.backend = backend try: fourcc_int = cv2.VideoWriter_fourcc(*codec) diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index 36fff00f45..89005a2cbb 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -231,6 +231,7 @@ def release(self) -> None: self.stream = None self.frame_generator = None + class pyAVWriter(BaseWriter): """ PyAV-based video writer. @@ -280,8 +281,13 @@ def __init__( self.audio_stream_out = None self.audio_packets = [] - - if render_audio and backend and backend.audio_stream and backend.audio_src_container: + + if ( + render_audio + and backend + and backend.audio_stream + and backend.audio_src_container + ): audio_codec_name = backend.audio_stream.codec_context.name audio_rate = backend.audio_stream.codec_context.rate self.audio_stream_out = self.container.add_stream( diff --git a/supervision/video/core.py b/supervision/video/core.py index 970a4e7a25..c28bfa4fad 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -53,7 +53,11 @@ def __iter__(self): return self.backend.frames() def sink( - self, target_path: str, info: VideoInfo, codec: str | None = None, render_audio: bool = False + self, + target_path: str, + info: VideoInfo, + codec: str | None = None, + render_audio: bool = False, ) -> BaseWriter: """ Create a video writer for saving frames to a file. @@ -172,7 +176,12 @@ def save( fps = self.backend.video_info.fps writer = self.backend.writer( - target_path, fps, self.backend.video_info.resolution_wh, codec, self.backend, render_audio + target_path, + fps, + self.backend.video_info.resolution_wh, + codec, + self.backend, + render_audio, ) total_frames = self.backend.video_info.total_frames frames_generator = self.frames() From fcee1a1ec0b7af1844d4e4430a5e41ddfe436b3a Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sun, 10 Aug 2025 01:23:11 -0400 Subject: [PATCH 084/128] UPDATE: Updated deprecated warnings --- supervision/utils/video.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index 00eb9b4906..da77e8d68d 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -5,12 +5,13 @@ from collections.abc import Callable, Generator from dataclasses import dataclass +from supervision.utils.internal import deprecated + import cv2 import numpy as np from tqdm.auto import tqdm - -@DeprecationWarning +@deprecated("Use `sv.VideoInfo` for video metadata.") @dataclass class VideoInfo: """ @@ -60,8 +61,7 @@ def from_video_path(cls, video_path: str) -> VideoInfo: def resolution_wh(self) -> tuple[int, int]: return self.width, self.height - -@DeprecationWarning +@deprecated("Please use `sv.Video` for video writing and processing.") class VideoSink: """ Context manager that saves video frames to a file using OpenCV. @@ -118,8 +118,6 @@ def write_frame(self, frame: np.ndarray): def __exit__(self, exc_type, exc_value, exc_traceback): self.__writer.release() - -@DeprecationWarning def _validate_and_setup_video( source_path: str, start: int, end: int | None, iterative_seek: bool = False ): @@ -143,8 +141,7 @@ def _validate_and_setup_video( return video, start, end - -@DeprecationWarning +@deprecated("Use `sv.Video().frames()` or `sv.Video()` for frame iteration.") def get_video_frames_generator( source_path: str, stride: int = 1, @@ -195,8 +192,7 @@ def get_video_frames_generator( frame_position += stride video.release() - -@DeprecationWarning +@deprecated("Use `sv.Video.save()` with a callback for processing and saving videos.") def process_video( source_path: str, target_path: str, From 42a0f244c5ca154297ba6ac556d808ca203609f9 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sun, 10 Aug 2025 01:24:07 -0400 Subject: [PATCH 085/128] ADD: Added unit tests from PR #1941 --- test/video/test_video.py | 78 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 test/video/test_video.py diff --git a/test/video/test_video.py b/test/video/test_video.py new file mode 100644 index 0000000000..c6c8e1f407 --- /dev/null +++ b/test/video/test_video.py @@ -0,0 +1,78 @@ +import os + +import cv2 +import numpy as np + +import supervision as sv + + +def _create_temp_video(path: str, width=320, height=240, fps=30, frames=10): + fourcc = cv2.VideoWriter_fourcc(*"mp4v") + writer = cv2.VideoWriter(path, fourcc, fps, (width, height)) + for _ in range(frames): + frame = np.random.randint(0, 255, (height, width, 3), dtype=np.uint8) + writer.write(frame) + writer.release() + + +def test_video_info_and_iteration(tmp_path): + vid_path = tmp_path / "test.mp4" + _create_temp_video(str(vid_path)) + + video = sv.Video(str(vid_path)) + info = video.info + + assert info.width == 320 + assert info.height == 240 + assert info.total_frames == 10 + + frames = list(video.frames()) + assert len(frames) == 10 + + +def test_frames_stride(tmp_path): + vid_path = tmp_path / "test_stride.mp4" + _create_temp_video(str(vid_path), frames=9) + + video = sv.Video(str(vid_path)) + frames = list(video.frames(stride=2)) + assert len(frames) == 5 # ceil(9/2) + + +def test_save_with_callback(tmp_path): + src = tmp_path / "src.mp4" + dst = tmp_path / "dst.mp4" + _create_temp_video(str(src)) + + def identity(frame, i): + return frame + + sv.Video(str(src)).save(str(dst), callback=identity, show_progress=False) + + # confirm destination exists and metadata matches + dst_video = sv.Video(str(dst)) + assert dst_video.info.total_frames == 10 + + +def test_legacy_get_video_frames_generator(tmp_path): + vid_path = tmp_path / "legacy.mp4" + _create_temp_video(str(vid_path), frames=6) + + frames = list(sv.get_video_frames_generator(str(vid_path))) + assert len(frames) == 6 + + +def test_legacy_process_video(tmp_path): + src = tmp_path / "legacy_src.mp4" + dst = tmp_path / "legacy_dst.mp4" + _create_temp_video(str(src), frames=4) + + sv.process_video( + source_path=str(src), + target_path=str(dst), + callback=lambda f, i: f, + show_progress=False, + ) + + assert os.path.exists(dst) + assert sv.Video(str(dst)).info.total_frames == 4 \ No newline at end of file From c004d240bbeb5cf3f99d8d97b2638f51b93a0ac8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 10 Aug 2025 05:24:27 +0000 Subject: [PATCH 086/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/utils/video.py | 9 +++++++-- test/video/test_video.py | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index da77e8d68d..c485bd96e6 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -5,12 +5,13 @@ from collections.abc import Callable, Generator from dataclasses import dataclass -from supervision.utils.internal import deprecated - import cv2 import numpy as np from tqdm.auto import tqdm +from supervision.utils.internal import deprecated + + @deprecated("Use `sv.VideoInfo` for video metadata.") @dataclass class VideoInfo: @@ -61,6 +62,7 @@ def from_video_path(cls, video_path: str) -> VideoInfo: def resolution_wh(self) -> tuple[int, int]: return self.width, self.height + @deprecated("Please use `sv.Video` for video writing and processing.") class VideoSink: """ @@ -118,6 +120,7 @@ def write_frame(self, frame: np.ndarray): def __exit__(self, exc_type, exc_value, exc_traceback): self.__writer.release() + def _validate_and_setup_video( source_path: str, start: int, end: int | None, iterative_seek: bool = False ): @@ -141,6 +144,7 @@ def _validate_and_setup_video( return video, start, end + @deprecated("Use `sv.Video().frames()` or `sv.Video()` for frame iteration.") def get_video_frames_generator( source_path: str, @@ -192,6 +196,7 @@ def get_video_frames_generator( frame_position += stride video.release() + @deprecated("Use `sv.Video.save()` with a callback for processing and saving videos.") def process_video( source_path: str, diff --git a/test/video/test_video.py b/test/video/test_video.py index c6c8e1f407..785b0c704b 100644 --- a/test/video/test_video.py +++ b/test/video/test_video.py @@ -75,4 +75,4 @@ def test_legacy_process_video(tmp_path): ) assert os.path.exists(dst) - assert sv.Video(str(dst)).info.total_frames == 4 \ No newline at end of file + assert sv.Video(str(dst)).info.total_frames == 4 From c5e1d2ebe58db4662ce810d1b83fe2366b4516ac Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Mon, 11 Aug 2025 16:21:58 -0400 Subject: [PATCH 087/128] FIX: Updated the pyproject.toml's extra requirements for ffmpeg --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 554dd16553..9fb58c765d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,7 +58,7 @@ Documentation = "https://supervision.roboflow.com/latest/" metrics = [ "pandas>=2.0.0", ] -video = ["av (>=15.0.0,<16.0.0)"] +ffmpeg = ["av (>=15.0.0)"] [dependency-groups] dev = [ From b258d1ba26a4dc12dea5acabe61ee60e43d70b2a Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Mon, 11 Aug 2025 19:05:59 -0400 Subject: [PATCH 088/128] FIX: Fixes of formatting from code review --- supervision/utils/video.py | 20 +++++++++++++++---- supervision/video/__init__.py | 4 ++-- supervision/video/backend/openCV.py | 10 +++++----- supervision/video/backend/pyAV.py | 10 +++++----- supervision/video/core.py | 4 ++-- supervision/video/utils.py | 31 +++++++++++++++++++++++------ 6 files changed, 55 insertions(+), 24 deletions(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index c485bd96e6..b7fbf87ccc 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -12,7 +12,10 @@ from supervision.utils.internal import deprecated -@deprecated("Use `sv.VideoInfo` for video metadata.") +@deprecated( + "`process_video` is deprecated and will be removed in " + "`supervision-0.32.0`. Use `sv.VideoInfo` instead." +) @dataclass class VideoInfo: """ @@ -63,7 +66,10 @@ def resolution_wh(self) -> tuple[int, int]: return self.width, self.height -@deprecated("Please use `sv.Video` for video writing and processing.") +@deprecated( + "`process_video` is deprecated and will be removed in " + "`supervision-0.32.0`. Use `sv.Video().save` instead." +) class VideoSink: """ Context manager that saves video frames to a file using OpenCV. @@ -145,7 +151,10 @@ def _validate_and_setup_video( return video, start, end -@deprecated("Use `sv.Video().frames()` or `sv.Video()` for frame iteration.") +@deprecated( + "`process_video` is deprecated and will be removed in " + "`supervision-0.32.0`. Use `sv.Video().frame()` or `sv.Video()` instead." +) def get_video_frames_generator( source_path: str, stride: int = 1, @@ -197,7 +206,10 @@ def get_video_frames_generator( video.release() -@deprecated("Use `sv.Video.save()` with a callback for processing and saving videos.") +@deprecated( + "`process_video` is deprecated and will be removed in " + "`supervision-0.32.0`. Use `sv.Video().save` instead." +) def process_video( source_path: str, target_path: str, diff --git a/supervision/video/__init__.py b/supervision/video/__init__.py index d5d5559ba8..f4dc506445 100644 --- a/supervision/video/__init__.py +++ b/supervision/video/__init__.py @@ -1,5 +1,5 @@ from supervision.video.backend.base import BaseBackend, BaseWriter from supervision.video.core import Video -from supervision.video.utils import SOURCE_TYPE, VideoInfo +from supervision.video.utils import SourceType, VideoInfo -__all__ = ["SOURCE_TYPE", "BaseBackend", "BaseWriter", "Video", "VideoInfo"] +__all__ = ["SourceType", "BaseBackend", "BaseWriter", "Video", "VideoInfo"] diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index 1398b19ea6..0b595d388b 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -4,7 +4,7 @@ import numpy as np from supervision.video.backend import BaseBackend, BaseWriter -from supervision.video.utils import SOURCE_TYPE, VideoInfo +from supervision.video.utils import SourceType, VideoInfo class OpenCVBackend(BaseBackend): @@ -42,12 +42,12 @@ def open(self, path: str | int) -> None: self.video_info = self._set_video_info() if isinstance(path, int): - self.video_info.source_type = SOURCE_TYPE.WEBCAM + self.video_info.SourceType = SourceType.WEBCAM elif isinstance(path, str): - self.video_info.source_type = ( - SOURCE_TYPE.RTSP + self.video_info.SourceType = ( + SourceType.RTSP if path.lower().startswith("rtsp://") - else SOURCE_TYPE.VIDEO_FILE + else SourceType.VIDEO_FILE ) else: raise ValueError("Unsupported source type") diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index 89005a2cbb..bb4f745202 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -9,7 +9,7 @@ import numpy as np from supervision.video.backend import BaseBackend, BaseWriter -from supervision.video.utils import SOURCE_TYPE, VideoInfo +from supervision.video.utils import SourceType, VideoInfo class pyAVBackend(BaseBackend): @@ -68,12 +68,12 @@ def open(self, path: str) -> None: self.audio_stream = None if isinstance(path, int): - self.video_info.source_type = SOURCE_TYPE.WEBCAM + self.video_info.SourceType = SourceType.WEBCAM elif isinstance(path, str): - self.video_info.source_type = ( - SOURCE_TYPE.RTSP + self.video_info.SourceType = ( + SourceType.RTSP if path.lower().startswith("rtsp://") - else SOURCE_TYPE.VIDEO_FILE + else SourceType.VIDEO_FILE ) else: raise ValueError("Unsupported source type") diff --git a/supervision/video/core.py b/supervision/video/core.py index c28bfa4fad..714cf35334 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -12,7 +12,7 @@ BaseWriter, getBackend, ) -from supervision.video.utils import SOURCE_TYPE, VideoInfo +from supervision.video.utils import SourceType, VideoInfo class Video: @@ -169,7 +169,7 @@ def save( if self.backend.cap is None: raise RuntimeError("Video not opened yet.") - if self.backend.video_info.source_type != SOURCE_TYPE.VIDEO_FILE: + if self.backend.video_info.SourceType != SourceType.VIDEO_FILE: raise ValueError("Only video files can be saved.") if fps is None: diff --git a/supervision/video/utils.py b/supervision/video/utils.py index 7e0ceed245..caaaf8fcb5 100644 --- a/supervision/video/utils.py +++ b/supervision/video/utils.py @@ -6,7 +6,7 @@ import cv2 -class SOURCE_TYPE(Enum): +class SourceType(Enum): """ Enumeration of supported video source types. @@ -16,9 +16,28 @@ class SOURCE_TYPE(Enum): RTSP: A network RTSP video stream. """ - VIDEO_FILE = "VIDEO_FILE" - WEBCAM = "WEBCAM" - RTSP = "RTSP" + VIDEO_FILE = "video_file" + WEBCAM = "webcam" + RTSP = "rtsp" + + @classmethod + def list(cls): + return list(map(lambda c: c.value, cls)) + + @classmethod + def from_value(cls, value: SourceType | str) -> SourceType: + if isinstance(value, cls): + return value + if isinstance(value, str): + value = value.lower() + try: + return cls(value) + except ValueError: + raise ValueError(f"Invalid value: {value}. Must be one of {cls.list()}") + raise ValueError( + f"Invalid value type: {type(value)}. Must be an instance of " + f"{cls.__name__} or str." + ) @dataclass @@ -31,7 +50,7 @@ class VideoInfo: height (int): Height of the video in pixels. fps (int): Frames per second of the video. total_frames (int | None): Total number of frames, or None if unknown. - source_type (SOURCE_TYPE | None): Source type: VIDEO_FILE, WEBCAM, RTSP. + SourceType (SourceType | None): Source type: VIDEO_FILE, WEBCAM, RTSP. Methods: from_video_path(video file, webcam, RTSP, or None). @@ -54,7 +73,7 @@ class VideoInfo: height: int fps: int total_frames: int | None = None - source_type: SOURCE_TYPE | None = None + SourceType: SourceType | None = None @classmethod def from_video_path(cls, video_path: str) -> VideoInfo: From 79e003d4cb22eff53e29bb5b6c6ba8bfddba3afd Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Mon, 11 Aug 2025 19:16:56 -0400 Subject: [PATCH 089/128] UPDATE: Removed video info from path as it is dependant on the backend --- supervision/utils/video.py | 27 ++++++++++++++++----------- supervision/video/utils.py | 37 ------------------------------------- 2 files changed, 16 insertions(+), 48 deletions(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index b7fbf87ccc..261033eeb0 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -10,6 +10,7 @@ from tqdm.auto import tqdm from supervision.utils.internal import deprecated +from supervision.video.backend import BackendTypes @deprecated( @@ -49,17 +50,21 @@ class VideoInfo: total_frames: int | None = None @classmethod - def from_video_path(cls, video_path: str) -> VideoInfo: - video = cv2.VideoCapture(video_path) - if not video.isOpened(): - raise Exception(f"Could not open video at {video_path}") - - width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) - fps = int(video.get(cv2.CAP_PROP_FPS)) - total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) - video.release() - return VideoInfo(width, height, fps, total_frames) + def from_video_path(cls, backend: BackendTypes) -> VideoInfo: + if not backend.isOpened(): + raise RuntimeError("Video not opened yet.") + + width = backend.stream.width + height = backend.stream.height + fps = float(backend.stream.average_rate or backend.stream.guessed_rate) + if fps <= 0: + fps = 30 + + total_frames = backend.stream.frames + if total_frames == 0: + total_frames = None + + return VideoInfo(width, height, round(fps), total_frames) @property def resolution_wh(self) -> tuple[int, int]: diff --git a/supervision/video/utils.py b/supervision/video/utils.py index caaaf8fcb5..ae4125d085 100644 --- a/supervision/video/utils.py +++ b/supervision/video/utils.py @@ -75,43 +75,6 @@ class VideoInfo: total_frames: int | None = None SourceType: SourceType | None = None - @classmethod - def from_video_path(cls, video_path: str) -> VideoInfo: - """ - Create a VideoInfo instance from a video file. - - Args: - video_path (str): Path to the video file. - - Returns: - VideoInfo: Metadata including width, height, FPS, and total frames. - - Raises: - ValueError: If the video cannot be opened or has invalid properties. - """ - video = cv2.VideoCapture(video_path) - if not video.isOpened(): - raise ValueError(f"Could not open video at {video_path}") - - try: - width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) - if width <= 0 or height <= 0: - raise ValueError(f"Invalid video dimensions: {width}x{height}") - - fps = video.get(cv2.CAP_PROP_FPS) - if fps <= 0: - fps = 30 # Default to 30fps if invalid - fps = round(fps) - - total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) - if total_frames < 0: - total_frames = None # Some formats may not report frame count - finally: - video.release() - - return VideoInfo(width, height, fps, total_frames) - @property def resolution_wh(self) -> tuple[int, int]: """ From c6e6e6cd629c930566c3f6946ac98ccba9f7ccfc Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Mon, 11 Aug 2025 19:31:16 -0400 Subject: [PATCH 090/128] UPDATE: Updated backend to Enum --- supervision/video/__init__.py | 3 +- supervision/video/backend/__init__.py | 63 +++++++++++++++++---------- supervision/video/backend/base.py | 7 +-- supervision/video/backend/openCV.py | 2 +- supervision/video/backend/pyAV.py | 2 +- supervision/video/core.py | 15 ++++--- 6 files changed, 57 insertions(+), 35 deletions(-) diff --git a/supervision/video/__init__.py b/supervision/video/__init__.py index f4dc506445..54393f5d03 100644 --- a/supervision/video/__init__.py +++ b/supervision/video/__init__.py @@ -1,5 +1,4 @@ -from supervision.video.backend.base import BaseBackend, BaseWriter from supervision.video.core import Video from supervision.video.utils import SourceType, VideoInfo -__all__ = ["SourceType", "BaseBackend", "BaseWriter", "Video", "VideoInfo"] +__all__ = ["SourceType", "Video", "VideoInfo"] diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py index 97065c8217..d4e3674ba7 100644 --- a/supervision/video/backend/__init__.py +++ b/supervision/video/backend/__init__.py @@ -1,36 +1,55 @@ from __future__ import annotations from typing import Literal, Union +from enum import Enum -from supervision.video.backend.base import BaseBackend, BaseWriter -from supervision.video.backend.openCV import OpenCVBackend, OpenCVWriter -from supervision.video.backend.pyAV import pyAVBackend, pyAVWriter +from supervision.video.backend.opencv import OpenCVBackend, OpenCVWriter +from supervision.video.backend.pyav import pyAVBackend, pyAVWriter -BackendLiteral = Literal["opencv", "pyav"] BackendTypes = Union[OpenCVBackend, pyAVBackend] WriterTypes = Union[OpenCVWriter, pyAVWriter] -_backends = { - "opencv": OpenCVBackend, - "pyav": pyAVBackend, +class Backend(Enum): + """ + Enumeration of Backends. + """ + + PYAV = "pyav" + OPENCV = "opencv" + + @classmethod + def list(cls): + return list(map(lambda c: c.value, cls)) + + @classmethod + def from_value(cls, value: Backend | str) -> Backend: + if isinstance(value, cls): + return value + if isinstance(value, str): + value = value.lower() + try: + return cls(value) + except ValueError: + raise ValueError(f"Invalid value: {value}. Must be one of {cls.list()}") + raise ValueError( + f"Invalid value type: {type(value)}. Must be an instance of " + f"{cls.__name__} or str." + ) + +BackendDict = { + Backend.PYAV: pyAVBackend, + Backend.OPENCV: OpenCVBackend, } - -def getBackend(backend: str) -> BaseBackend: - if backend.lower() in _backends: - return _backends[backend.lower()] - else: - raise ValueError(f"Unsupported backend: {backend}") - +WriterDict = { + Backend.PYAV: pyAVWriter, + Backend.OPENCV: OpenCVWriter, +} __all__ = [ - "BackendLiteral", "BackendType", - "BaseBackend", - "BaseWriter", - "OpenCVBackend", - "OpenCVWriter", - "getBackend", - "pyAVBackend", - "pyAVWriter", + "WriterType", + "Backend", + "BackendDict", + "WriterDict", ] diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 806230ca63..7eb2e1899d 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -5,13 +5,14 @@ import numpy as np from supervision.video.utils import VideoInfo +from supervision.video.backend import BackendTypes, WriterTypes class BaseBackend(ABC): def __init__(self): self.cap = None - self.video_info = None - self.writer = None + self.video_info: VideoInfo | None = None + self.writer: WriterTypes | None = None self.path = None @abstractmethod @@ -55,7 +56,7 @@ def __init__( fps: int, frame_size: tuple[int, int], codec: str | None = None, - backend: BaseBackend | None = None, + backend: BackendTypes | None = None, render_audio: bool = False, ): pass diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index 0b595d388b..8e531e0d79 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -3,7 +3,7 @@ import cv2 import numpy as np -from supervision.video.backend import BaseBackend, BaseWriter +from supervision.video.backend.base import BaseBackend, BaseWriter from supervision.video.utils import SourceType, VideoInfo diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index bb4f745202..26a2278bb5 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -8,7 +8,7 @@ av = None import numpy as np -from supervision.video.backend import BaseBackend, BaseWriter +from supervision.video.backend.base import BaseBackend, BaseWriter from supervision.video.utils import SourceType, VideoInfo diff --git a/supervision/video/core.py b/supervision/video/core.py index 714cf35334..f522cc61d1 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -7,10 +7,10 @@ from tqdm.auto import tqdm from supervision.video.backend import ( - BackendLiteral, BackendTypes, - BaseWriter, - getBackend, + Backend, + BackendDict, + WriterTypes ) from supervision.video.utils import SourceType, VideoInfo @@ -29,7 +29,7 @@ class Video: source: str | int backend: BackendTypes - def __init__(self, source: str | int, backend: BackendLiteral = "opencv") -> None: + def __init__(self, source: str | int, backend: Backend | str = Backend.OPENCV) -> None: """ Initialize the Video object. @@ -38,7 +38,10 @@ def __init__(self, source: str | int, backend: BackendLiteral = "opencv") -> Non backend (BackendLiteral, optional): Backend type for video I/O. Defaults to "opencv". """ - self.backend = getBackend(backend)() + self.backend = BackendDict.get(Backend.from_value(backend)) + if self.backend is None: + raise ValueError(f"Unsupported backend: {backend}") + self.backend.open(source) self.info = self.backend.info() self.source = source @@ -58,7 +61,7 @@ def sink( info: VideoInfo, codec: str | None = None, render_audio: bool = False, - ) -> BaseWriter: + ) -> WriterTypes: """ Create a video writer for saving frames to a file. From 4f5ac5865b4671e46c1e965bef1f33d6bce6f0aa Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Mon, 11 Aug 2025 19:35:07 -0400 Subject: [PATCH 091/128] FIX: Reverted video utils --- supervision/utils/video.py | 27 +++++++++++---------------- supervision/video/backend/__init__.py | 4 ++-- supervision/video/core.py | 2 +- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index 261033eeb0..b7fbf87ccc 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -10,7 +10,6 @@ from tqdm.auto import tqdm from supervision.utils.internal import deprecated -from supervision.video.backend import BackendTypes @deprecated( @@ -50,21 +49,17 @@ class VideoInfo: total_frames: int | None = None @classmethod - def from_video_path(cls, backend: BackendTypes) -> VideoInfo: - if not backend.isOpened(): - raise RuntimeError("Video not opened yet.") - - width = backend.stream.width - height = backend.stream.height - fps = float(backend.stream.average_rate or backend.stream.guessed_rate) - if fps <= 0: - fps = 30 - - total_frames = backend.stream.frames - if total_frames == 0: - total_frames = None - - return VideoInfo(width, height, round(fps), total_frames) + def from_video_path(cls, video_path: str) -> VideoInfo: + video = cv2.VideoCapture(video_path) + if not video.isOpened(): + raise Exception(f"Could not open video at {video_path}") + + width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = int(video.get(cv2.CAP_PROP_FPS)) + total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) + video.release() + return VideoInfo(width, height, fps, total_frames) @property def resolution_wh(self) -> tuple[int, int]: diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py index d4e3674ba7..b32e9921d9 100644 --- a/supervision/video/backend/__init__.py +++ b/supervision/video/backend/__init__.py @@ -47,8 +47,8 @@ def from_value(cls, value: Backend | str) -> Backend: } __all__ = [ - "BackendType", - "WriterType", + "BackendTypes", + "WriterTypes", "Backend", "BackendDict", "WriterDict", diff --git a/supervision/video/core.py b/supervision/video/core.py index f522cc61d1..0b92463f20 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -7,8 +7,8 @@ from tqdm.auto import tqdm from supervision.video.backend import ( - BackendTypes, Backend, + BackendTypes, BackendDict, WriterTypes ) From 29a10458d7a80f81c237fd011466181e013641d3 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Mon, 11 Aug 2025 19:44:15 -0400 Subject: [PATCH 092/128] FIX: Fixed backend bug issues --- supervision/video/backend/base.py | 5 ++--- supervision/video/core.py | 3 +++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 7eb2e1899d..107276d16c 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -5,14 +5,13 @@ import numpy as np from supervision.video.utils import VideoInfo -from supervision.video.backend import BackendTypes, WriterTypes class BaseBackend(ABC): def __init__(self): self.cap = None self.video_info: VideoInfo | None = None - self.writer: WriterTypes | None = None + self.writer: BaseWriter | None = None self.path = None @abstractmethod @@ -56,7 +55,7 @@ def __init__( fps: int, frame_size: tuple[int, int], codec: str | None = None, - backend: BackendTypes | None = None, + backend: BaseBackend | None = None, render_audio: bool = False, ): pass diff --git a/supervision/video/core.py b/supervision/video/core.py index 0b92463f20..2aeaca6b4b 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -42,6 +42,9 @@ def __init__(self, source: str | int, backend: Backend | str = Backend.OPENCV) - if self.backend is None: raise ValueError(f"Unsupported backend: {backend}") + # Instantiate the backend class once sanity check is done + self.backend = self.backend() + self.backend.open(source) self.info = self.backend.info() self.source = source From 7e401ca2bdab6152c107b5bffc02eba231c686cd Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Mon, 11 Aug 2025 19:50:56 -0400 Subject: [PATCH 093/128] UPDATE: Updated render_audio param default value config --- supervision/video/backend/base.py | 2 +- supervision/video/backend/openCV.py | 6 +++--- supervision/video/backend/pyAV.py | 5 ++++- supervision/video/core.py | 4 ++-- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index 107276d16c..eda2f7d7f9 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -56,7 +56,7 @@ def __init__( frame_size: tuple[int, int], codec: str | None = None, backend: BaseBackend | None = None, - render_audio: bool = False, + render_audio: bool | None = None, ): pass diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py index 8e531e0d79..8ec8ee80bc 100644 --- a/supervision/video/backend/openCV.py +++ b/supervision/video/backend/openCV.py @@ -161,7 +161,7 @@ def __init__( frame_size: tuple[int, int], codec: str = "mp4v", backend: OpenCVBackend | None = None, - render_audio: bool = False, + render_audio: bool | None = None, ): """ Initialize the writer. @@ -176,10 +176,10 @@ def __init__( Raises: RuntimeError: If the writer cannot be opened. """ - if render_audio: + if render_audio or render_audio == False: raise ValueError( "OpenCV backend does not support audio. " - "Please use `pyav` backend instead or set `render_audio=False`" + "Please use `pyav` backend instead or set `render_audio=None`" ) self.backend = backend diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index 26a2278bb5..83e14e8e11 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -247,7 +247,7 @@ def __init__( frame_size: tuple[int, int], codec: str = "h264", backend: pyAVBackend | None = None, - render_audio: bool = False, + render_audio: bool | None = None, ): """ Initialize the video writer. @@ -266,6 +266,9 @@ def __init__( self.container = av.open(filename, mode="w") self.backend = backend + if render_audio is None: + render_audio = True + if codec is None: codec = "h264" self.stream = self.container.add_stream(codec, rate=fps) diff --git a/supervision/video/core.py b/supervision/video/core.py index 2aeaca6b4b..0bea072a19 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -63,7 +63,7 @@ def sink( target_path: str, info: VideoInfo, codec: str | None = None, - render_audio: bool = False, + render_audio: bool | None = None, ) -> WriterTypes: """ Create a video writer for saving frames to a file. @@ -145,7 +145,7 @@ def save( progress_message: str = "Processing video", show_progress: bool = False, codec: str | None = None, - render_audio: bool = False, + render_audio: bool | None = None, ): """ Process and save video frames to a file. From 232ace57873c4d1de9741c9526dd0d1a62bd2bd3 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Mon, 11 Aug 2025 20:19:42 -0400 Subject: [PATCH 094/128] BUG: Fixed pyav not resolving webcam source path --- supervision/video/backend/base.py | 2 +- supervision/video/backend/pyAV.py | 49 +++++++++++++++++++++++-------- supervision/video/core.py | 2 +- 3 files changed, 39 insertions(+), 14 deletions(-) diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py index eda2f7d7f9..5ba634708a 100644 --- a/supervision/video/backend/base.py +++ b/supervision/video/backend/base.py @@ -15,7 +15,7 @@ def __init__(self): self.path = None @abstractmethod - def open(self, path: str) -> None: + def open(self, path: str | int) -> None: pass @abstractmethod diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index 83e14e8e11..3a33dfe166 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -1,6 +1,8 @@ from __future__ import annotations from fractions import Fraction +import platform +import re try: import av @@ -36,7 +38,7 @@ def __init__(self): self.video_info = None self.current_frame_idx = 0 - def open(self, path: str) -> None: + def open(self, path: str | int) -> None: """ Open and initialize a video source. @@ -50,8 +52,40 @@ def open(self, path: str) -> None: RuntimeError: If the video source cannot be opened. ValueError: If the source type is unsupported. """ + _source_type = None + _format = None + + def is_webcam_path(path: str) -> tuple[bool, str]: + if not isinstance(path, str): + return False + + system = platform.system() + path_lower = path.lower() + + if system == "Windows": + return path_lower.startswith("video="), "dshow" + elif system == "Linux": + return bool(re.match(r"^/dev/video\d+$", path_lower)), "v4l2" + elif system == "Darwin": + return path_lower.isdigit(), "avfoundation" + else: + return False + + isWebcam, ffmpeg_os_format = is_webcam_path(path=path) + if isWebcam: + _source_type = SourceType.WEBCAM + _format = ffmpeg_os_format + elif isinstance(path, str): + _source_type = ( + SourceType.RTSP + if path.lower().startswith("rtsp://") + else SourceType.VIDEO_FILE + ) + else: + raise ValueError("Unsupported source type") + try: - self.container = av.open(path) + self.container = av.open(path, format=_format) self.audio_src_container = self.container self.stream = self.container.streams.video[0] self.stream.thread_type = "AUTO" @@ -67,16 +101,7 @@ def open(self, path: str) -> None: else: self.audio_stream = None - if isinstance(path, int): - self.video_info.SourceType = SourceType.WEBCAM - elif isinstance(path, str): - self.video_info.SourceType = ( - SourceType.RTSP - if path.lower().startswith("rtsp://") - else SourceType.VIDEO_FILE - ) - else: - raise ValueError("Unsupported source type") + self.video_info.SourceType = _source_type except Exception as e: raise RuntimeError(f"Cannot open video source: {path}") from e diff --git a/supervision/video/core.py b/supervision/video/core.py index 0bea072a19..800591fc90 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -109,7 +109,7 @@ def frames( total_frames = ( self.backend.video_info.total_frames if self.backend.video_info else 0 ) - is_live_stream = total_frames <= 0 + is_live_stream = total_frames is None or total_frames <= 0 if is_live_stream: while True: From 84f90ea8882d2b32232cab826215d8d29475861c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 12 Aug 2025 00:20:10 +0000 Subject: [PATCH 095/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/utils/video.py | 32 +++++++++---------- supervision/video/backend/__init__.py | 44 ++++++++++++++------------- supervision/video/backend/pyAV.py | 10 +++--- supervision/video/core.py | 13 +++----- supervision/video/utils.py | 38 +++++++++++------------ 5 files changed, 67 insertions(+), 70 deletions(-) diff --git a/supervision/utils/video.py b/supervision/utils/video.py index b7fbf87ccc..d3408b90e9 100644 --- a/supervision/utils/video.py +++ b/supervision/utils/video.py @@ -12,10 +12,10 @@ from supervision.utils.internal import deprecated -@deprecated( - "`process_video` is deprecated and will be removed in " - "`supervision-0.32.0`. Use `sv.VideoInfo` instead." -) +@deprecated( + "`process_video` is deprecated and will be removed in " + "`supervision-0.32.0`. Use `sv.VideoInfo` instead." +) @dataclass class VideoInfo: """ @@ -66,10 +66,10 @@ def resolution_wh(self) -> tuple[int, int]: return self.width, self.height -@deprecated( - "`process_video` is deprecated and will be removed in " - "`supervision-0.32.0`. Use `sv.Video().save` instead." -) +@deprecated( + "`process_video` is deprecated and will be removed in " + "`supervision-0.32.0`. Use `sv.Video().save` instead." +) class VideoSink: """ Context manager that saves video frames to a file using OpenCV. @@ -151,10 +151,10 @@ def _validate_and_setup_video( return video, start, end -@deprecated( - "`process_video` is deprecated and will be removed in " - "`supervision-0.32.0`. Use `sv.Video().frame()` or `sv.Video()` instead." -) +@deprecated( + "`process_video` is deprecated and will be removed in " + "`supervision-0.32.0`. Use `sv.Video().frame()` or `sv.Video()` instead." +) def get_video_frames_generator( source_path: str, stride: int = 1, @@ -206,10 +206,10 @@ def get_video_frames_generator( video.release() -@deprecated( - "`process_video` is deprecated and will be removed in " - "`supervision-0.32.0`. Use `sv.Video().save` instead." -) +@deprecated( + "`process_video` is deprecated and will be removed in " + "`supervision-0.32.0`. Use `sv.Video().save` instead." +) def process_video( source_path: str, target_path: str, diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py index b32e9921d9..0ec704dbc1 100644 --- a/supervision/video/backend/__init__.py +++ b/supervision/video/backend/__init__.py @@ -1,7 +1,7 @@ from __future__ import annotations -from typing import Literal, Union from enum import Enum +from typing import Literal, Union from supervision.video.backend.opencv import OpenCVBackend, OpenCVWriter from supervision.video.backend.pyav import pyAVBackend, pyAVWriter @@ -9,6 +9,7 @@ BackendTypes = Union[OpenCVBackend, pyAVBackend] WriterTypes = Union[OpenCVWriter, pyAVWriter] + class Backend(Enum): """ Enumeration of Backends. @@ -17,24 +18,25 @@ class Backend(Enum): PYAV = "pyav" OPENCV = "opencv" - @classmethod - def list(cls): - return list(map(lambda c: c.value, cls)) - - @classmethod - def from_value(cls, value: Backend | str) -> Backend: - if isinstance(value, cls): - return value - if isinstance(value, str): - value = value.lower() - try: - return cls(value) - except ValueError: - raise ValueError(f"Invalid value: {value}. Must be one of {cls.list()}") - raise ValueError( - f"Invalid value type: {type(value)}. Must be an instance of " - f"{cls.__name__} or str." - ) + @classmethod + def list(cls): + return list(map(lambda c: c.value, cls)) + + @classmethod + def from_value(cls, value: Backend | str) -> Backend: + if isinstance(value, cls): + return value + if isinstance(value, str): + value = value.lower() + try: + return cls(value) + except ValueError: + raise ValueError(f"Invalid value: {value}. Must be one of {cls.list()}") + raise ValueError( + f"Invalid value type: {type(value)}. Must be an instance of " + f"{cls.__name__} or str." + ) + BackendDict = { Backend.PYAV: pyAVBackend, @@ -47,9 +49,9 @@ def from_value(cls, value: Backend | str) -> Backend: } __all__ = [ - "BackendTypes", - "WriterTypes", "Backend", "BackendDict", + "BackendTypes", "WriterDict", + "WriterTypes", ] diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py index 3a33dfe166..26c47dedbd 100644 --- a/supervision/video/backend/pyAV.py +++ b/supervision/video/backend/pyAV.py @@ -1,8 +1,8 @@ from __future__ import annotations -from fractions import Fraction import platform import re +from fractions import Fraction try: import av @@ -54,11 +54,11 @@ def open(self, path: str | int) -> None: """ _source_type = None _format = None - + def is_webcam_path(path: str) -> tuple[bool, str]: if not isinstance(path, str): return False - + system = platform.system() path_lower = path.lower() @@ -70,7 +70,7 @@ def is_webcam_path(path: str) -> tuple[bool, str]: return path_lower.isdigit(), "avfoundation" else: return False - + isWebcam, ffmpeg_os_format = is_webcam_path(path=path) if isWebcam: _source_type = SourceType.WEBCAM @@ -83,7 +83,7 @@ def is_webcam_path(path: str) -> tuple[bool, str]: ) else: raise ValueError("Unsupported source type") - + try: self.container = av.open(path, format=_format) self.audio_src_container = self.container diff --git a/supervision/video/core.py b/supervision/video/core.py index 800591fc90..e366e5be4e 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -6,12 +6,7 @@ import numpy as np from tqdm.auto import tqdm -from supervision.video.backend import ( - Backend, - BackendTypes, - BackendDict, - WriterTypes -) +from supervision.video.backend import Backend, BackendDict, BackendTypes, WriterTypes from supervision.video.utils import SourceType, VideoInfo @@ -29,7 +24,9 @@ class Video: source: str | int backend: BackendTypes - def __init__(self, source: str | int, backend: Backend | str = Backend.OPENCV) -> None: + def __init__( + self, source: str | int, backend: Backend | str = Backend.OPENCV + ) -> None: """ Initialize the Video object. @@ -41,7 +38,7 @@ def __init__(self, source: str | int, backend: Backend | str = Backend.OPENCV) - self.backend = BackendDict.get(Backend.from_value(backend)) if self.backend is None: raise ValueError(f"Unsupported backend: {backend}") - + # Instantiate the backend class once sanity check is done self.backend = self.backend() diff --git a/supervision/video/utils.py b/supervision/video/utils.py index ae4125d085..9b90c8dec8 100644 --- a/supervision/video/utils.py +++ b/supervision/video/utils.py @@ -3,8 +3,6 @@ from dataclasses import dataclass from enum import Enum -import cv2 - class SourceType(Enum): """ @@ -20,24 +18,24 @@ class SourceType(Enum): WEBCAM = "webcam" RTSP = "rtsp" - @classmethod - def list(cls): - return list(map(lambda c: c.value, cls)) - - @classmethod - def from_value(cls, value: SourceType | str) -> SourceType: - if isinstance(value, cls): - return value - if isinstance(value, str): - value = value.lower() - try: - return cls(value) - except ValueError: - raise ValueError(f"Invalid value: {value}. Must be one of {cls.list()}") - raise ValueError( - f"Invalid value type: {type(value)}. Must be an instance of " - f"{cls.__name__} or str." - ) + @classmethod + def list(cls): + return list(map(lambda c: c.value, cls)) + + @classmethod + def from_value(cls, value: SourceType | str) -> SourceType: + if isinstance(value, cls): + return value + if isinstance(value, str): + value = value.lower() + try: + return cls(value) + except ValueError: + raise ValueError(f"Invalid value: {value}. Must be one of {cls.list()}") + raise ValueError( + f"Invalid value type: {type(value)}. Must be an instance of " + f"{cls.__name__} or str." + ) @dataclass From f6cea28f1f74b6ff438696b79f3a539518183287 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Mon, 11 Aug 2025 20:45:19 -0400 Subject: [PATCH 096/128] UPDATE: Updated file names --- supervision/video/backend/{openCV.py => opencv.py} | 0 supervision/video/backend/{pyAV.py => pyav.py} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename supervision/video/backend/{openCV.py => opencv.py} (100%) rename supervision/video/backend/{pyAV.py => pyav.py} (100%) diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/opencv.py similarity index 100% rename from supervision/video/backend/openCV.py rename to supervision/video/backend/opencv.py diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyav.py similarity index 100% rename from supervision/video/backend/pyAV.py rename to supervision/video/backend/pyav.py From 048ebc0cb253008f67850d19694e2d1f574e23b5 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Mon, 11 Aug 2025 20:47:33 -0400 Subject: [PATCH 097/128] Precommit error fix --- supervision/video/backend/opencv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supervision/video/backend/opencv.py b/supervision/video/backend/opencv.py index 8ec8ee80bc..3011925b62 100644 --- a/supervision/video/backend/opencv.py +++ b/supervision/video/backend/opencv.py @@ -176,7 +176,7 @@ def __init__( Raises: RuntimeError: If the writer cannot be opened. """ - if render_audio or render_audio == False: + if render_audio or not render_audio: raise ValueError( "OpenCV backend does not support audio. " "Please use `pyav` backend instead or set `render_audio=None`" From de62eb8f4d70bf4c0e4290f19ebcb7c1c6d726ab Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Mon, 11 Aug 2025 20:50:29 -0400 Subject: [PATCH 098/128] FIX: render_audio opencv param fix --- supervision/video/backend/opencv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supervision/video/backend/opencv.py b/supervision/video/backend/opencv.py index 3011925b62..2b5527f413 100644 --- a/supervision/video/backend/opencv.py +++ b/supervision/video/backend/opencv.py @@ -176,7 +176,7 @@ def __init__( Raises: RuntimeError: If the writer cannot be opened. """ - if render_audio or not render_audio: + if render_audio or render_audio is False: raise ValueError( "OpenCV backend does not support audio. " "Please use `pyav` backend instead or set `render_audio=None`" From afea7c045056773011f5eabee85d13ce614cb4f1 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Mon, 11 Aug 2025 21:06:22 -0400 Subject: [PATCH 099/128] UPDATE: Updated docstrings --- supervision/video/backend/__init__.py | 5 ++ supervision/video/backend/opencv.py | 94 +++++++++++++-------- supervision/video/backend/pyav.py | 113 +++++++++++++++++--------- supervision/video/core.py | 35 +++++--- supervision/video/utils.py | 61 ++++++++++---- 5 files changed, 206 insertions(+), 102 deletions(-) diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py index 0ec704dbc1..7fa432ad25 100644 --- a/supervision/video/backend/__init__.py +++ b/supervision/video/backend/__init__.py @@ -13,6 +13,11 @@ class Backend(Enum): """ Enumeration of Backends. + + Attributes: + PYAV (str): PyAV backend (powered by FFmpeg, supports audio rendering) + OPENCV (str): OpenCV backend + """ PYAV = "pyav" diff --git a/supervision/video/backend/opencv.py b/supervision/video/backend/opencv.py index 2b5527f413..69d87b37f1 100644 --- a/supervision/video/backend/opencv.py +++ b/supervision/video/backend/opencv.py @@ -9,14 +9,24 @@ class OpenCVBackend(BaseBackend): """ - OpenCV-based implementation of the video backend interface. + OpenCV-based video backend implementation for video capture and processing. + + This backend provides video reading capabilities using OpenCV's VideoCapture. + It supports: + - Local video files + - Webcam streams + - RTSP network streams + + Attributes: + cap (cv2.VideoCapture): OpenCV video capture instance. + video_info (VideoInfo): Metadata about the video source. + writer (class): Reference to the OpenCVWriter class for video writing. + path (str | int): Path to the video source or webcam index. - Provides methods for opening video sources, reading frames, seeking, - grabbing, and retrieving metadata using OpenCV. """ def __init__(self): - """Initialize with no active capture, writer, or path.""" + """Initialize the OpenCV backend with no active capture.""" self.cap = None self.video_info = None self.writer = OpenCVWriter @@ -24,10 +34,11 @@ def __init__(self): def open(self, path: str | int) -> None: """ - Open a video source and initialize capture. + Open a video source for reading. Args: - path (str | int): Path to a video file, RTSP URL, or webcam index. + path (str | int): Path to video file, RTSP URL, or webcam index. + Webcam indices are typically 0 for default camera. Raises: RuntimeError: If the source cannot be opened. @@ -54,19 +65,23 @@ def open(self, path: str | int) -> None: def isOpened(self) -> bool: """ - Check if the video source is currently open. + Check if the video source is currently open and available. Returns: - bool: True if the source is open, False otherwise. + bool: True if source is open and ready for reading, False otherwise. """ return self.cap.isOpened() def _set_video_info(self) -> VideoInfo: """ - Extract and store video metadata from the open capture. + Extract and store video metadata from the opened source. Returns: - VideoInfo: Video properties such as width, height, FPS, and frame count. + VideoInfo: Object containing: + - width (int): Frame width in pixels + - height (int): Frame height in pixels + - fps (int): Frames per second + - total_frames (int): Total frame count (0 for streams) Raises: RuntimeError: If no source is open. @@ -83,10 +98,10 @@ def _set_video_info(self) -> VideoInfo: def info(self) -> VideoInfo: """ - Get the stored video metadata. + Retrieve stored video metadata. Returns: - VideoInfo: Metadata for the open source. + VideoInfo: Video properties including dimensions, FPS, and frame count. Raises: RuntimeError: If no source is open. @@ -97,12 +112,12 @@ def info(self) -> VideoInfo: def read(self) -> tuple[bool, np.ndarray]: """ - Read the next frame from the source. + Read and decode the next frame from the video source. Returns: tuple[bool, np.ndarray]: - - bool: True if a frame was read successfully. - - np.ndarray: The frame in BGR format. + - bool: True if frame was read successfully, False at end of stream + - np.ndarray: Frame data in BGR format (height, width, 3) Raises: RuntimeError: If no source is open. @@ -113,10 +128,12 @@ def read(self) -> tuple[bool, np.ndarray]: def grab(self) -> bool: """ - Grab the next frame without decoding. + Advance to the next frame without decoding. + + Useful for quickly skipping frames when pixel data isn't needed. Returns: - bool: True if the frame pointer advanced successfully. + bool: True if frame was advanced successfully, False otherwise Raises: RuntimeError: If no source is open. @@ -127,10 +144,12 @@ def grab(self) -> bool: def seek(self, frame_idx: int) -> None: """ - Jump to a specific frame. + Seek to a specific frame index. + + Note: Seeking may be imprecise with compressed video formats. Args: - frame_idx (int): Zero-based frame index to seek to. + frame_idx (int): Zero-based index of target frame. Raises: RuntimeError: If no source is open. @@ -140,7 +159,7 @@ def seek(self, frame_idx: int) -> None: self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx) def release(self) -> None: - """Release capture resources.""" + """Release the video capture resources.""" if self.cap is not None and self.cap.isOpened(): self.cap.release() self.cap = None @@ -148,10 +167,10 @@ def release(self) -> None: class OpenCVWriter(BaseWriter): """ - Video writer implementation using OpenCV's VideoWriter. + OpenCV-based video writer for creating video files. - Supports configurable codecs, frame sizes, and FPS, with a fallback - to "mp4v" if the specified codec fails. + This writer provides basic video encoding capabilities using OpenCV's VideoWriter. + Note: Does not support audio writing - use pyAVWriter for audio support. """ def __init__( @@ -164,17 +183,22 @@ def __init__( render_audio: bool | None = None, ): """ - Initialize the writer. + Initialize the video writer. Args: - filename (str): Output video file path. - fps (int): Output frames per second. - frame_size (tuple[int, int]): Frame dimensions (width, height). - codec (str, optional): FourCC codec code. Defaults to "mp4v". - backend (OpenCVBackend | None, optional): Backend instance. Defaults to None + filename (str): Output video file path (e.g., "output.mp4"). + fps (int): Target frames per second for output video. + frame_size (tuple[int, int]): (width, height) of output frames. + codec (str, optional): FourCC codec code (default "mp4v"). + backend (OpenCVBackend, optional): Unused (for API compatibility). + render_audio (bool, optional): Must be None (OpenCV doesn't support audio). Raises: - RuntimeError: If the writer cannot be opened. + ValueError: If render_audio is specified (not supported). + RuntimeError: If writer cannot be initialized. + + Note: + Falls back to "mp4v" codec if specified codec fails. """ if render_audio or render_audio is False: raise ValueError( @@ -194,20 +218,22 @@ def __init__( raise RuntimeError(f"Cannot open video writer for file: {filename}") def __enter__(self): + """Enable context manager support (with statement).""" return self def __exit__(self, exc_type, exc_value, traceback): + """Ensure proper cleanup when exiting context.""" self.close() def write(self, frame: np.ndarray) -> None: """ - Write a frame to the output. + Write a single frame to the output video. Args: - frame (np.ndarray): Frame in BGR format. + frame (np.ndarray): Frame data in BGR format (height, width, 3). """ self.writer.write(frame) def close(self) -> None: - """Release writer resources.""" - self.writer.release() + """Finalize and close the output video file.""" + self.writer.release() \ No newline at end of file diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py index 26c47dedbd..888038075f 100644 --- a/supervision/video/backend/pyav.py +++ b/supervision/video/backend/pyav.py @@ -16,14 +16,22 @@ class pyAVBackend(BaseBackend): """ - PyAV-based implementation of the `BaseBackend` interface. + PyAV-based implementation of the `BaseBackend` interface for video processing. - This backend handles video capture, frame reading, seeking, and writing - operations using the PyAV library. Supports local video files, webcams, - and RTSP streams. + This backend provides video capture and frame reading capabilities using the PyAV + library, which is a Pythonic binding for FFmpeg. It supports: + - Local video files + - Webcam streams (platform-specific) + - RTSP network streams """ def __init__(self): + """ + Initialize the pyAVBackend instance. + + Raises: + RuntimeError: If PyAV (`av` module) is not installed. + """ super().__init__() if av is None: @@ -46,7 +54,7 @@ def open(self, path: str | int) -> None: the necessary components for decoding and reading frames. Args: - path (str | int): Path to the video file, RTSP URL, or webcam index. + path (str | int): Path to the video file, RTSP URL, or webcam path. Raises: RuntimeError: If the video source cannot be opened. @@ -56,8 +64,17 @@ def open(self, path: str | int) -> None: _format = None def is_webcam_path(path: str) -> tuple[bool, str]: + """ + Determine if the path refers to a webcam and get platform-specific format. + + Args: + path (str): The path to check. + + Returns: + tuple[bool, str]: (True if webcam, FFmpeg format string) + """ if not isinstance(path, str): - return False + return False, None system = platform.system() path_lower = path.lower() @@ -69,7 +86,7 @@ def is_webcam_path(path: str) -> tuple[bool, str]: elif system == "Darwin": return path_lower.isdigit(), "avfoundation" else: - return False + return False, None isWebcam, ffmpeg_os_format = is_webcam_path(path=path) if isWebcam: @@ -107,15 +124,24 @@ def is_webcam_path(path: str) -> tuple[bool, str]: raise RuntimeError(f"Cannot open video source: {path}") from e def isOpened(self) -> bool: - """Check if the video source has been successfully opened.""" + """ + Check if the video source has been successfully opened. + + Returns: + bool: True if video source is opened and ready, False otherwise. + """ return self.container is not None and self.stream is not None def _set_video_info(self) -> VideoInfo: """ - Extract video information from the opened source. + Extract and calculate video information from the opened source. Returns: - VideoInfo: Object containing width, height, fps, and frame count. + VideoInfo: Object containing: + - width (int): Frame width in pixels + - height (int): Frame height in pixels + - fps (int): Frames per second (estimated if not available) + - total_frames (int | None): Total frame count if available Raises: RuntimeError: If the video source is not opened. @@ -127,20 +153,20 @@ def _set_video_info(self) -> VideoInfo: height = self.stream.height fps = float(self.stream.average_rate or self.stream.guessed_rate) if fps <= 0: - fps = 30 + fps = 30 # Default FPS if cannot be determined total_frames = self.stream.frames if total_frames == 0: - total_frames = None + total_frames = None # Unknown frame count return VideoInfo(width, height, round(fps), total_frames) def info(self) -> VideoInfo: """ - Retrieve video information. + Retrieve video information for the opened source. Returns: - VideoInfo: Video properties for the opened source. + VideoInfo: Video properties including dimensions, FPS, and frame count. Raises: RuntimeError: If the video source is not opened. @@ -155,8 +181,8 @@ def read(self) -> tuple[bool, np.ndarray]: Returns: tuple[bool, np.ndarray]: - - `bool`: True if a frame was read successfully, False if end of stream. - - `np.ndarray`: Frame data in BGR format (H, W, 3). + - bool: True if frame was read successfully, False at end of stream + - np.ndarray: Frame data in BGR format with shape (height, width, 3) Raises: RuntimeError: If the video source is not opened. @@ -174,12 +200,12 @@ def read(self) -> tuple[bool, np.ndarray]: def grab(self) -> bool: """ - Grab the next frame packet without decoding it. + Advance to the next frame packet without decoding it. - Useful for skipping frames quickly without the overhead of decoding. + This is useful for quickly skipping frames when decoding isn't needed. Returns: - bool: True if a frame packet was grabbed successfully, False otherwise. + bool: True if frame packet was advanced, False at end of stream Raises: RuntimeError: If the video source is not opened. @@ -199,8 +225,9 @@ def seek(self, frame_idx: int) -> None: """ Seek to a specific frame index in the video. - This uses keyframe-based seeking, then decodes forward to the exact - requested frame. + Uses keyframe-based seeking followed by sequential decoding to reach + the exact requested frame. This is more efficient than sequential seeking + but may be slower for very large jumps. Args: frame_idx (int): Zero-based index of the target frame. @@ -249,6 +276,9 @@ def _prepend_frame(first_frame, gen): def release(self) -> None: """ Release the video source and free all associated resources. + + This closes the video container and resets all internal state. + Should be called when finished with the video source. """ if self.container: self.container.close() @@ -259,10 +289,14 @@ def release(self) -> None: class pyAVWriter(BaseWriter): """ - PyAV-based video writer. + PyAV-based video writer for creating video files with optional audio. - Writes frames to a video file with optional audio from a backend source. - Uses finer timestamp granularity (milliseconds) for smoother video playback. + This writer provides high-quality video encoding with precise frame timing + (millisecond accuracy) and supports audio muxing from a source video. + + Methods: + write(frame): Write a video frame. + close(): Finalize and close the video file. """ def __init__( @@ -279,10 +313,11 @@ def __init__( Args: filename (str): Path to the output video file. - fps (int): Frames per second for the output video. - frame_size (tuple[int, int]): Width and height of the video frames. + fps (int): Target frames per second for the output video. + frame_size (tuple[int, int]): (width, height) of output frames. codec (str, optional): Video codec name (default "h264"). - backend (pyAVBackend, optional): Backend providing audio stream. + backend (pyAVBackend, optional): Source backend for audio muxing. + render_audio (bool, optional): Whether to include audio (default True if available). Raises: RuntimeError: If the output file cannot be created. @@ -331,19 +366,19 @@ def __init__( raise RuntimeError(f"Cannot open video writer for file: {filename}") from e def __enter__(self): - """Enable use as a context manager.""" + """Enable use as a context manager (with statement).""" return self def __exit__(self, exc_type, exc_value, traceback): - """Close the writer on context exit.""" + """Ensure proper cleanup when exiting context.""" self.close() def write(self, frame: np.ndarray) -> None: """ - Write a single video frame. + Write a single video frame to the output file. Args: - frame (np.ndarray): Frame data in BGR format (H, W, 3). + frame (np.ndarray): Frame data in BGR format (height, width, 3). """ # Calculate PTS as milliseconds: frame_index * (1000 ms / fps) pts = int(self.frame_idx * (1000 / self.fps)) @@ -364,25 +399,26 @@ def close(self) -> None: Finalize the video file, mux audio with adjusted timestamps to sync with video, and close the container. """ - def rescale_timestamp(value, src_tb, dst_tb): """ - Rescale timestamp value from source timebase to destination timebase. + Rescale timestamp between timebases. Args: - value (int): Timestamp value (PTS or DTS). - src_tb (Fraction): Source time base. - dst_tb (Fraction): Destination time base. + value (int): Original timestamp value + src_tb (Fraction): Source timebase + dst_tb (Fraction): Destination timebase Returns: - int: Rescaled timestamp. + int: Rescaled timestamp """ return int(value * src_tb / dst_tb) + # Flush any remaining video packets packets = self.stream.encode() for packet in packets: self.container.mux(packet) + # Calculate audio speed adjustment factor if needed speed_factor = 1.0 try: @@ -411,6 +447,7 @@ def rescale_timestamp(value, src_tb, dst_tb): except Exception: speed_factor = 1.0 + # Process and mux audio packets with timestamp adjustments if self.audio_stream_out and speed_factor != 1.0: for packet in self.audio_packets: if packet.pts is not None: @@ -438,4 +475,4 @@ def rescale_timestamp(value, src_tb, dst_tb): packet.stream = self.audio_stream_out self.container.mux(packet) - self.container.close() + self.container.close() \ No newline at end of file diff --git a/supervision/video/core.py b/supervision/video/core.py index e366e5be4e..13647263c4 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -15,7 +15,7 @@ class Video: A high-level interface for reading, processing, and writing video files or streams. Attributes: - info (VideoInfo): Metadata about the video. + info (VideoInfo): Metadata about the opened video (e.g., FPS, resolution, duration). source (str | int): Path to the video file or index of the camera device. backend (BackendTypes): Video backend used for I/O operations. """ @@ -28,12 +28,15 @@ def __init__( self, source: str | int, backend: Backend | str = Backend.OPENCV ) -> None: """ - Initialize the Video object. + Initialize the Video object and open the source. Args: source (str | int): Path to a video file or index of a camera device. - backend (BackendLiteral, optional): Backend type for video I/O. - Defaults to "opencv". + backend (Backend | str, optional): Backend type or name for video I/O. + Defaults to Backend.OPENCV. + + Raises: + ValueError: If the specified backend is not supported. """ self.backend = BackendDict.get(Backend.from_value(backend)) if self.backend is None: @@ -48,10 +51,10 @@ def __init__( def __iter__(self): """ - Make the Video object iterable over frames. + Make the Video object directly iterable over frames. Yields: - np.ndarray: The next frame in the video. + np.ndarray: The next frame in the video stream. """ return self.backend.frames() @@ -67,12 +70,13 @@ def sink( Args: target_path (str): Output file path for the video. - info (VideoInfo): Video information including resolution and FPS. + info (VideoInfo): Video metadata including resolution and FPS. codec (str, optional): FourCC video codec code. If None, the backend's default codec is used. + render_audio (bool | None, optional): Whether to include audio if supported. Returns: - BaseWriter: Video writer instance for writing frames. + WriterTypes: Video writer instance for writing frames. """ return self.backend.writer( target_path, info.fps, info.resolution_wh, codec, self.backend, render_audio @@ -86,7 +90,7 @@ def frames( resolution_wh: tuple[int, int] | None = None, ): """ - Generate frames from the video with optional skipping, cropping, and resizing. + Generate frames from the video with optional skipping, seeking, and resizing. Args: stride (int, optional): Number of frames to skip between each yield. @@ -99,6 +103,9 @@ def frames( Yields: np.ndarray: The next frame in the video. + + Raises: + RuntimeError: If the video has not been opened. """ if self.backend.cap is None: raise RuntimeError("Video not opened yet.") @@ -109,6 +116,7 @@ def frames( is_live_stream = total_frames is None or total_frames <= 0 if is_live_stream: + # Live stream handling while True: for _ in range(stride - 1): if not self.backend.grab(): @@ -120,6 +128,7 @@ def frames( frame = cv2.resize(frame, resolution_wh) yield frame else: + # Video file handling if end is None or end > total_frames: end = total_frames @@ -147,12 +156,13 @@ def save( """ Process and save video frames to a file. + Reads frames from the source, applies the given `callback` function to each + frame, and writes the processed frames to the specified output file. + Args: target_path (str): Output file path for the processed video. callback (Callable[[np.ndarray, int], np.ndarray]): A function that takes in - a numpy ndarray representation of a video frame and an - int index of the frame and returns a processed numpy ndarray - representation of the frame. + a video frame (numpy array) and its frame index, and returns a processed frame. fps (int | None, optional): Frames per second of the output video. If None, uses the original FPS. progress_message (str, optional): Message displayed in the progress bar. @@ -161,6 +171,7 @@ def save( Defaults to False. codec (str | None, optional): FourCC video codec code. If None, uses the backend's default codec. + render_audio (bool | None, optional): Whether to include audio if supported. Raises: RuntimeError: If the video has not been opened. diff --git a/supervision/video/utils.py b/supervision/video/utils.py index 9b90c8dec8..cb9cdbc10e 100644 --- a/supervision/video/utils.py +++ b/supervision/video/utils.py @@ -19,11 +19,37 @@ class SourceType(Enum): RTSP = "rtsp" @classmethod - def list(cls): + def list(cls) -> list[str]: + """ + Get a list of all supported source type values. + + Returns: + list[str]: List of enum values as lowercase strings. + + Example: + >>> SourceType.list() + ['video_file', 'webcam', 'rtsp'] + """ return list(map(lambda c: c.value, cls)) @classmethod def from_value(cls, value: SourceType | str) -> SourceType: + """ + Convert a string or SourceType instance to a SourceType enum member. + + Args: + value (SourceType | str): The value to convert. + + Returns: + SourceType: Corresponding SourceType enum member. + + Raises: + ValueError: If the value is invalid or not a supported type. + + Example: + >>> SourceType.from_value("webcam") + + """ if isinstance(value, cls): return value if isinstance(value, str): @@ -48,23 +74,18 @@ class VideoInfo: height (int): Height of the video in pixels. fps (int): Frames per second of the video. total_frames (int | None): Total number of frames, or None if unknown. - SourceType (SourceType | None): Source type: VIDEO_FILE, WEBCAM, RTSP. - - Methods: - from_video_path(video file, webcam, RTSP, or None). - - Examples: - ```python - import supervision as sv - - video_info = sv.VideoInfo.from_video_path("video.mp4") - - print(video_info) - # VideoInfo(width=3840, height=2160, fps=25, total_frames=538) - - print(video_info.resolution_wh) - # (3840, 2160) - ``` + SourceType (SourceType | None): Source type (VIDEO_FILE, WEBCAM, or RTSP). + + Properties: + resolution_wh (tuple[int, int]): The (width, height) tuple for the video. + + Example: + >>> import supervision as sv + >>> video_info = sv.VideoInfo.from_video_path("video.mp4") + >>> print(video_info) + VideoInfo(width=3840, height=2160, fps=25, total_frames=538) + >>> video_info.resolution_wh + (3840, 2160) """ width: int @@ -80,5 +101,9 @@ def resolution_wh(self) -> tuple[int, int]: Returns: tuple[int, int]: The video dimensions in pixels. + + Example: + >>> VideoInfo(width=1920, height=1080, fps=30).resolution_wh + (1920, 1080) """ return self.width, self.height From 6208f3dbf88d83121b832d70bf7b1a5187d07bf5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 12 Aug 2025 01:07:02 +0000 Subject: [PATCH 100/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/backend/__init__.py | 2 +- supervision/video/backend/opencv.py | 2 +- supervision/video/backend/pyav.py | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py index 7fa432ad25..20f90998bd 100644 --- a/supervision/video/backend/__init__.py +++ b/supervision/video/backend/__init__.py @@ -13,7 +13,7 @@ class Backend(Enum): """ Enumeration of Backends. - + Attributes: PYAV (str): PyAV backend (powered by FFmpeg, supports audio rendering) OPENCV (str): OpenCV backend diff --git a/supervision/video/backend/opencv.py b/supervision/video/backend/opencv.py index 69d87b37f1..6d49b668a2 100644 --- a/supervision/video/backend/opencv.py +++ b/supervision/video/backend/opencv.py @@ -236,4 +236,4 @@ def write(self, frame: np.ndarray) -> None: def close(self) -> None: """Finalize and close the output video file.""" - self.writer.release() \ No newline at end of file + self.writer.release() diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py index 888038075f..d856d00888 100644 --- a/supervision/video/backend/pyav.py +++ b/supervision/video/backend/pyav.py @@ -399,6 +399,7 @@ def close(self) -> None: Finalize the video file, mux audio with adjusted timestamps to sync with video, and close the container. """ + def rescale_timestamp(value, src_tb, dst_tb): """ Rescale timestamp between timebases. @@ -475,4 +476,4 @@ def rescale_timestamp(value, src_tb, dst_tb): packet.stream = self.audio_stream_out self.container.mux(packet) - self.container.close() \ No newline at end of file + self.container.close() From 9cb2170a42b2a78d3fffaae2bbde9a2b4c31a3e4 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Mon, 11 Aug 2025 21:11:43 -0400 Subject: [PATCH 101/128] UPDATE: Updated docstrings --- supervision/video/backend/pyav.py | 2 +- supervision/video/core.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py index d856d00888..3fe0ead6b8 100644 --- a/supervision/video/backend/pyav.py +++ b/supervision/video/backend/pyav.py @@ -317,7 +317,7 @@ def __init__( frame_size (tuple[int, int]): (width, height) of output frames. codec (str, optional): Video codec name (default "h264"). backend (pyAVBackend, optional): Source backend for audio muxing. - render_audio (bool, optional): Whether to include audio (default True if available). + render_audio (bool, optional): Include audio (default True if available). Raises: RuntimeError: If the output file cannot be created. diff --git a/supervision/video/core.py b/supervision/video/core.py index 13647263c4..04afb7d887 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -15,7 +15,7 @@ class Video: A high-level interface for reading, processing, and writing video files or streams. Attributes: - info (VideoInfo): Metadata about the opened video (e.g., FPS, resolution, duration). + info (VideoInfo): Metadata about the opened video. source (str | int): Path to the video file or index of the camera device. backend (BackendTypes): Video backend used for I/O operations. """ @@ -162,7 +162,7 @@ def save( Args: target_path (str): Output file path for the processed video. callback (Callable[[np.ndarray, int], np.ndarray]): A function that takes in - a video frame (numpy array) and its frame index, and returns a processed frame. + a video frame (numpy array) and its frame index, and returns a frame. fps (int | None, optional): Frames per second of the output video. If None, uses the original FPS. progress_message (str, optional): Message displayed in the progress bar. From 70b73d8c5c6f912d79e6614ab21990b54b278037 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Mon, 11 Aug 2025 21:32:07 -0400 Subject: [PATCH 102/128] UPDATE: Added .show() --- supervision/video/core.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/supervision/video/core.py b/supervision/video/core.py index 04afb7d887..eaca4f6930 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -211,3 +211,35 @@ def save( writer.write(frame=result_frame) writer.close() + + def show(self, resolution_wh: tuple[int, int] | None = None): + """ + Display video frames in a window with interactive playback controls. + + This method streams video frames to an OpenCV window, allowing real-time + visualization. Press 'q' to quit playback. The method handles various + display-related exceptions gracefully. + + Args: + resolution_wh (tuple[int, int] | None): Optional target resolution as + (width, height) tuple. If None, uses native video resolution. + Note: Aspect ratio may not be preserved. + """ + try: + for frame in self.frames(resolution_wh=resolution_wh): + cv2.imshow(str(self.source), frame) + key = cv2.waitKey(1) & 0xFF + + if key == ord('q'): + break + + cv2.destroyAllWindows() + except cv2.error as e: + if "The function is not implemented" in str(e) or "could not connect to display" in str(e).lower(): + print("Error: No display found or GUI support not available.") + else: + print("OpenCV error:", e) + except Exception as e: + print("Error:", e) + finally: + cv2.destroyAllWindows() \ No newline at end of file From 01f8b2c40c524a5daa455cc19c6f0e0f111096d3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 12 Aug 2025 01:32:58 +0000 Subject: [PATCH 103/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/core.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/supervision/video/core.py b/supervision/video/core.py index eaca4f6930..ed570c51fb 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -221,7 +221,7 @@ def show(self, resolution_wh: tuple[int, int] | None = None): display-related exceptions gracefully. Args: - resolution_wh (tuple[int, int] | None): Optional target resolution as + resolution_wh (tuple[int, int] | None): Optional target resolution as (width, height) tuple. If None, uses native video resolution. Note: Aspect ratio may not be preserved. """ @@ -230,16 +230,19 @@ def show(self, resolution_wh: tuple[int, int] | None = None): cv2.imshow(str(self.source), frame) key = cv2.waitKey(1) & 0xFF - if key == ord('q'): + if key == ord("q"): break cv2.destroyAllWindows() except cv2.error as e: - if "The function is not implemented" in str(e) or "could not connect to display" in str(e).lower(): + if ( + "The function is not implemented" in str(e) + or "could not connect to display" in str(e).lower() + ): print("Error: No display found or GUI support not available.") else: print("OpenCV error:", e) except Exception as e: print("Error:", e) finally: - cv2.destroyAllWindows() \ No newline at end of file + cv2.destroyAllWindows() From 11fc8a542d7345560344bb33fefe6a9afdaa7fb8 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Wed, 13 Aug 2025 02:07:26 -0400 Subject: [PATCH 104/128] UPDATE: Add support for IPython display --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 9fb58c765d..a7c46fcc23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,6 +59,7 @@ metrics = [ "pandas>=2.0.0", ] ffmpeg = ["av (>=15.0.0)"] +rich_display = ["ipython (>=8.15,<9.0)"] [dependency-groups] dev = [ From 1dac635d72143679b9e8dcb9ab7008cfbc87e05d Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Wed, 13 Aug 2025 02:55:10 -0400 Subject: [PATCH 105/128] UPDATE: Added support for headless machines and notebook for sv.Video().show() --- supervision/video/core.py | 75 ++++++++++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 13 deletions(-) diff --git a/supervision/video/core.py b/supervision/video/core.py index ed570c51fb..0871d8af46 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -3,12 +3,19 @@ from collections.abc import Callable import cv2 +import os +import sys import numpy as np from tqdm.auto import tqdm from supervision.video.backend import Backend, BackendDict, BackendTypes, WriterTypes from supervision.video.utils import SourceType, VideoInfo +try: + import IPython.display as iPyDisplay +except ImportError: + iPyDisplay = None + class Video: """ @@ -225,7 +232,38 @@ def show(self, resolution_wh: tuple[int, int] | None = None): (width, height) tuple. If None, uses native video resolution. Note: Aspect ratio may not be preserved. """ - try: + # On Jupyter Notebook + def in_notebook(): + argv = getattr(sys, "argv", []) + return any("jupyter" in arg or "ipykernel_launcher" in arg for arg in argv) + + def is_Headless(): + if sys.platform.startswith("linux"): + return not bool(os.environ.get("DISPLAY", "")) + if sys.platform == "darwin": + return not bool(os.environ.get("TERM_PROGRAM") or os.environ.get("DISPLAY")) + if sys.platform.startswith("win"): + try: + import ctypes + user32 = ctypes.windll.user32 + return user32.GetDesktopWindow() == 0 + except Exception: + return True + return True + + # On a notebook + if in_notebook(): + if iPyDisplay is None: + raise ValueError("IPython is not installed") + + self.save("temp.mp4", lambda frame, _: frame, show_progress=False) + + width = resolution_wh[0] if resolution_wh is not None else None + height = resolution_wh[1] if resolution_wh is not None else None + iPyDisplay.display(iPyDisplay.Video("temp.mp4", embed=True, width=width, height=height)) + os.remove("temp.mp4") + # On a computer + elif not is_Headless(): for frame in self.frames(resolution_wh=resolution_wh): cv2.imshow(str(self.source), frame) key = cv2.waitKey(1) & 0xFF @@ -233,16 +271,27 @@ def show(self, resolution_wh: tuple[int, int] | None = None): if key == ord("q"): break + while True: + if cv2.getWindowProperty(str(self.source), cv2.WND_PROP_VISIBLE) < 1: + break + cv2.waitKey(100) cv2.destroyAllWindows() - except cv2.error as e: - if ( - "The function is not implemented" in str(e) - or "could not connect to display" in str(e).lower() - ): - print("Error: No display found or GUI support not available.") - else: - print("OpenCV error:", e) - except Exception as e: - print("Error:", e) - finally: - cv2.destroyAllWindows() + # On a headless system + else: + if iPyDisplay is None: + raise ValueError("IPython is not installed") + + self.save("temp.mp4", lambda frame, _: frame, show_progress=False) + + width = resolution_wh[0] if resolution_wh is not None else None + height = resolution_wh[1] if resolution_wh is not None else None + + display_video = (iPyDisplay.Video("temp.mp4", embed=True, width=width, height=height)) + html_code = display_video._repr_html_() + export_path = "video_display.html" + + with open(export_path, "w") as f: + f.write(html_code) + print(f"Video exported as HTML to {export_path}") + + os.remove("temp.mp4") From dedb68aa263e88ae84faef912aa58d90a1c21260 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 13 Aug 2025 06:57:31 +0000 Subject: [PATCH 106/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/core.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/supervision/video/core.py b/supervision/video/core.py index 0871d8af46..4e204123f4 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -1,10 +1,10 @@ from __future__ import annotations +import os +import sys from collections.abc import Callable import cv2 -import os -import sys import numpy as np from tqdm.auto import tqdm @@ -232,35 +232,41 @@ def show(self, resolution_wh: tuple[int, int] | None = None): (width, height) tuple. If None, uses native video resolution. Note: Aspect ratio may not be preserved. """ + # On Jupyter Notebook def in_notebook(): argv = getattr(sys, "argv", []) return any("jupyter" in arg or "ipykernel_launcher" in arg for arg in argv) - + def is_Headless(): if sys.platform.startswith("linux"): return not bool(os.environ.get("DISPLAY", "")) if sys.platform == "darwin": - return not bool(os.environ.get("TERM_PROGRAM") or os.environ.get("DISPLAY")) + return not bool( + os.environ.get("TERM_PROGRAM") or os.environ.get("DISPLAY") + ) if sys.platform.startswith("win"): try: import ctypes + user32 = ctypes.windll.user32 return user32.GetDesktopWindow() == 0 except Exception: return True return True - + # On a notebook if in_notebook(): if iPyDisplay is None: raise ValueError("IPython is not installed") - + self.save("temp.mp4", lambda frame, _: frame, show_progress=False) width = resolution_wh[0] if resolution_wh is not None else None height = resolution_wh[1] if resolution_wh is not None else None - iPyDisplay.display(iPyDisplay.Video("temp.mp4", embed=True, width=width, height=height)) + iPyDisplay.display( + iPyDisplay.Video("temp.mp4", embed=True, width=width, height=height) + ) os.remove("temp.mp4") # On a computer elif not is_Headless(): @@ -280,13 +286,15 @@ def is_Headless(): else: if iPyDisplay is None: raise ValueError("IPython is not installed") - + self.save("temp.mp4", lambda frame, _: frame, show_progress=False) - + width = resolution_wh[0] if resolution_wh is not None else None height = resolution_wh[1] if resolution_wh is not None else None - display_video = (iPyDisplay.Video("temp.mp4", embed=True, width=width, height=height)) + display_video = iPyDisplay.Video( + "temp.mp4", embed=True, width=width, height=height + ) html_code = display_video._repr_html_() export_path = "video_display.html" From 035196a1821ab0905e60160cd50c8cf26ae20e39 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Wed, 13 Aug 2025 03:06:23 -0400 Subject: [PATCH 107/128] UPDATE: Updated error msg for IPython --- supervision/video/core.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/supervision/video/core.py b/supervision/video/core.py index 4e204123f4..36999bcb64 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -258,7 +258,9 @@ def is_Headless(): # On a notebook if in_notebook(): if iPyDisplay is None: - raise ValueError("IPython is not installed") + raise RuntimeError( + "IPython (`IPython` module) is not installed. Run `pip install IPython`." + ) self.save("temp.mp4", lambda frame, _: frame, show_progress=False) @@ -285,7 +287,9 @@ def is_Headless(): # On a headless system else: if iPyDisplay is None: - raise ValueError("IPython is not installed") + raise RuntimeError( + "IPython (`IPython` module) is not installed. Run `pip install IPython`." + ) self.save("temp.mp4", lambda frame, _: frame, show_progress=False) From a1218e8b98eb45d4262b37baab841f08817a2638 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 13 Aug 2025 07:06:44 +0000 Subject: [PATCH 108/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/core.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/supervision/video/core.py b/supervision/video/core.py index 36999bcb64..df7c27fe85 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -259,8 +259,8 @@ def is_Headless(): if in_notebook(): if iPyDisplay is None: raise RuntimeError( - "IPython (`IPython` module) is not installed. Run `pip install IPython`." - ) + "IPython (`IPython` module) is not installed. Run `pip install IPython`." + ) self.save("temp.mp4", lambda frame, _: frame, show_progress=False) @@ -288,8 +288,8 @@ def is_Headless(): else: if iPyDisplay is None: raise RuntimeError( - "IPython (`IPython` module) is not installed. Run `pip install IPython`." - ) + "IPython (`IPython` module) is not installed. Run `pip install IPython`." + ) self.save("temp.mp4", lambda frame, _: frame, show_progress=False) From 76d814501bfc81fb61bbba6cfdb6abe4c0f6869a Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Wed, 13 Aug 2025 03:07:36 -0400 Subject: [PATCH 109/128] Precommit --- supervision/video/core.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/supervision/video/core.py b/supervision/video/core.py index df7c27fe85..9794d8e726 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -259,7 +259,8 @@ def is_Headless(): if in_notebook(): if iPyDisplay is None: raise RuntimeError( - "IPython (`IPython` module) is not installed. Run `pip install IPython`." + "IPython (`IPython` module) is not installed. " \ + "Run `pip install IPython`." ) self.save("temp.mp4", lambda frame, _: frame, show_progress=False) @@ -288,7 +289,8 @@ def is_Headless(): else: if iPyDisplay is None: raise RuntimeError( - "IPython (`IPython` module) is not installed. Run `pip install IPython`." + "IPython (`IPython` module) is not installed. " \ + "Run `pip install IPython`." ) self.save("temp.mp4", lambda frame, _: frame, show_progress=False) From 32a5e2c137680327916aa9e4a1251086ce03cda2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 13 Aug 2025 07:08:18 +0000 Subject: [PATCH 110/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/supervision/video/core.py b/supervision/video/core.py index 9794d8e726..140eb9e153 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -259,7 +259,7 @@ def is_Headless(): if in_notebook(): if iPyDisplay is None: raise RuntimeError( - "IPython (`IPython` module) is not installed. " \ + "IPython (`IPython` module) is not installed. " "Run `pip install IPython`." ) @@ -289,7 +289,7 @@ def is_Headless(): else: if iPyDisplay is None: raise RuntimeError( - "IPython (`IPython` module) is not installed. " \ + "IPython (`IPython` module) is not installed. " "Run `pip install IPython`." ) From 7680cae6e7ac7be37790e1f70dd1e872b1853048 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Thu, 14 Aug 2025 14:42:04 -0400 Subject: [PATCH 111/128] UPDATE: Fixed av module install --- supervision/video/backend/pyav.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py index 3fe0ead6b8..ade9583a36 100644 --- a/supervision/video/backend/pyav.py +++ b/supervision/video/backend/pyav.py @@ -35,9 +35,13 @@ def __init__(self): super().__init__() if av is None: - raise RuntimeError( - "PyAV (`av` module) is not installed. Run `pip install av`." - ) + try: + import av + except ImportError: + av = None + raise RuntimeError( + "PyAV (`av` module) is not installed. Run `pip install av`." + ) self.container = None self.stream = None From 68fb727d9c506da476347da0daf6048e5f812135 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Thu, 14 Aug 2025 14:47:42 -0400 Subject: [PATCH 112/128] UPDATE: Revert av error --- supervision/video/backend/pyav.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py index ade9583a36..3fe0ead6b8 100644 --- a/supervision/video/backend/pyav.py +++ b/supervision/video/backend/pyav.py @@ -35,13 +35,9 @@ def __init__(self): super().__init__() if av is None: - try: - import av - except ImportError: - av = None - raise RuntimeError( - "PyAV (`av` module) is not installed. Run `pip install av`." - ) + raise RuntimeError( + "PyAV (`av` module) is not installed. Run `pip install av`." + ) self.container = None self.stream = None From 3f403fe0e8ab6af1286a996be1b04a36c9ab251c Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Thu, 14 Aug 2025 15:02:58 -0400 Subject: [PATCH 113/128] UPDATE: updated av module getter --- supervision/video/backend/pyav.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py index 3fe0ead6b8..b3822180a5 100644 --- a/supervision/video/backend/pyav.py +++ b/supervision/video/backend/pyav.py @@ -2,17 +2,27 @@ import platform import re +import sys from fractions import Fraction -try: - import av -except ImportError: - av = None import numpy as np from supervision.video.backend.base import BaseBackend, BaseWriter from supervision.video.utils import SourceType, VideoInfo +av = None + +def get_av(): + if 'av' in sys.modules and sys.modules['av'] is None: + del sys.modules['av'] + + try: + import av + return av + except ImportError: + raise RuntimeError( + "PyAV (`av` module) is not installed. Run `pip install av`." + ) class pyAVBackend(BaseBackend): """ @@ -34,10 +44,8 @@ def __init__(self): """ super().__init__() - if av is None: - raise RuntimeError( - "PyAV (`av` module) is not installed. Run `pip install av`." - ) + global av + av = get_av() self.container = None self.stream = None From c9badc1ccea9d5fa6757cd56351e0f64bafd4950 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 14 Aug 2025 19:03:20 +0000 Subject: [PATCH 114/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/backend/pyav.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py index b3822180a5..80152c18fa 100644 --- a/supervision/video/backend/pyav.py +++ b/supervision/video/backend/pyav.py @@ -12,17 +12,18 @@ av = None + def get_av(): - if 'av' in sys.modules and sys.modules['av'] is None: - del sys.modules['av'] + if "av" in sys.modules and sys.modules["av"] is None: + del sys.modules["av"] try: import av + return av except ImportError: - raise RuntimeError( - "PyAV (`av` module) is not installed. Run `pip install av`." - ) + raise RuntimeError("PyAV (`av` module) is not installed. Run `pip install av`.") + class pyAVBackend(BaseBackend): """ From 7a507e589bf6df6c8dced18379b99ffb90c616a2 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Thu, 14 Aug 2025 15:18:47 -0400 Subject: [PATCH 115/128] UPDATE: Updated .show() with more configuration params --- supervision/video/core.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/supervision/video/core.py b/supervision/video/core.py index 140eb9e153..4e07b7872b 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -219,7 +219,12 @@ def save( writer.close() - def show(self, resolution_wh: tuple[int, int] | None = None): + def show(self, resolution_wh: tuple[int, int] | None = None, + callback: Callable[[np.ndarray, int], np.ndarray] = lambda f, i: f, + fps: int | None = None, + progress_message: str = "Processing video", + show_progress: bool = False, + render_audio: bool | None = None): """ Display video frames in a window with interactive playback controls. @@ -263,7 +268,13 @@ def is_Headless(): "Run `pip install IPython`." ) - self.save("temp.mp4", lambda frame, _: frame, show_progress=False) + self.save("temp.mp4", + callback=callback, + fps=fps, + progress_message=progress_message, + show_progress=show_progress, + render_audio=render_audio + ) width = resolution_wh[0] if resolution_wh is not None else None height = resolution_wh[1] if resolution_wh is not None else None @@ -293,7 +304,13 @@ def is_Headless(): "Run `pip install IPython`." ) - self.save("temp.mp4", lambda frame, _: frame, show_progress=False) + self.save("temp.mp4", + callback=callback, + fps=fps, + progress_message=progress_message, + show_progress=show_progress, + render_audio=render_audio + ) width = resolution_wh[0] if resolution_wh is not None else None height = resolution_wh[1] if resolution_wh is not None else None From 824fa9833da6be3620c25315b6d693c90f7f1c2c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 14 Aug 2025 19:19:12 +0000 Subject: [PATCH 116/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/core.py | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/supervision/video/core.py b/supervision/video/core.py index 4e07b7872b..b3148f0736 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -219,12 +219,15 @@ def save( writer.close() - def show(self, resolution_wh: tuple[int, int] | None = None, + def show( + self, + resolution_wh: tuple[int, int] | None = None, callback: Callable[[np.ndarray, int], np.ndarray] = lambda f, i: f, fps: int | None = None, progress_message: str = "Processing video", show_progress: bool = False, - render_audio: bool | None = None): + render_audio: bool | None = None, + ): """ Display video frames in a window with interactive playback controls. @@ -268,13 +271,14 @@ def is_Headless(): "Run `pip install IPython`." ) - self.save("temp.mp4", - callback=callback, - fps=fps, - progress_message=progress_message, - show_progress=show_progress, - render_audio=render_audio - ) + self.save( + "temp.mp4", + callback=callback, + fps=fps, + progress_message=progress_message, + show_progress=show_progress, + render_audio=render_audio, + ) width = resolution_wh[0] if resolution_wh is not None else None height = resolution_wh[1] if resolution_wh is not None else None @@ -304,13 +308,14 @@ def is_Headless(): "Run `pip install IPython`." ) - self.save("temp.mp4", - callback=callback, - fps=fps, - progress_message=progress_message, - show_progress=show_progress, - render_audio=render_audio - ) + self.save( + "temp.mp4", + callback=callback, + fps=fps, + progress_message=progress_message, + show_progress=show_progress, + render_audio=render_audio, + ) width = resolution_wh[0] if resolution_wh is not None else None height = resolution_wh[1] if resolution_wh is not None else None From 8aa364ced34493038f106c5e1a91402fa7193cc5 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Thu, 14 Aug 2025 21:49:38 -0400 Subject: [PATCH 117/128] UPDATE: Updated IPython import --- supervision/video/core.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/supervision/video/core.py b/supervision/video/core.py index b3148f0736..e05fda97e8 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -11,11 +11,18 @@ from supervision.video.backend import Backend, BackendDict, BackendTypes, WriterTypes from supervision.video.utils import SourceType, VideoInfo -try: - import IPython.display as iPyDisplay -except ImportError: - iPyDisplay = None - +def get_iPython(): + if "IPython" in sys.modules and sys.modules["IPython"] is None: + del sys.modules["IPython"] + + try: + import IPython + return IPython + except ImportError: + raise RuntimeError( + "IPython (`IPython` module) is not installed. " + "Run `pip install IPython`." + ) class Video: """ @@ -265,11 +272,7 @@ def is_Headless(): # On a notebook if in_notebook(): - if iPyDisplay is None: - raise RuntimeError( - "IPython (`IPython` module) is not installed. " - "Run `pip install IPython`." - ) + iPyDisplay = get_iPython().display self.save( "temp.mp4", From 2bec9916f9bb87a3648ba9a2c142aa9b4b0af874 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 15 Aug 2025 01:49:58 +0000 Subject: [PATCH 118/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/core.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/supervision/video/core.py b/supervision/video/core.py index e05fda97e8..2b3cceb1bf 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -11,18 +11,20 @@ from supervision.video.backend import Backend, BackendDict, BackendTypes, WriterTypes from supervision.video.utils import SourceType, VideoInfo + def get_iPython(): if "IPython" in sys.modules and sys.modules["IPython"] is None: del sys.modules["IPython"] try: import IPython + return IPython except ImportError: raise RuntimeError( - "IPython (`IPython` module) is not installed. " - "Run `pip install IPython`." - ) + "IPython (`IPython` module) is not installed. Run `pip install IPython`." + ) + class Video: """ From 43830f7dfff2c6f99d130ce46a8c14c7022d4f80 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Thu, 14 Aug 2025 22:02:08 -0400 Subject: [PATCH 119/128] BUG: Frame iteration fix --- supervision/video/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supervision/video/core.py b/supervision/video/core.py index 2b3cceb1bf..ded78a99e5 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -72,7 +72,7 @@ def __iter__(self): Yields: np.ndarray: The next frame in the video stream. """ - return self.backend.frames() + return self.frames() def sink( self, From f82745561948ba44ed9bda5c90e1f8986d291a0b Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sun, 31 Aug 2025 00:45:17 -0400 Subject: [PATCH 120/128] UPDATE: Updated audio stream to use atempo reflecting changes in fps --- supervision/video/backend/pyav.py | 130 ++++++++++-------------------- 1 file changed, 43 insertions(+), 87 deletions(-) diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py index 80152c18fa..f5766a66f4 100644 --- a/supervision/video/backend/pyav.py +++ b/supervision/video/backend/pyav.py @@ -112,7 +112,7 @@ def is_webcam_path(path: str) -> tuple[bool, str]: try: self.container = av.open(path, format=_format) - self.audio_src_container = self.container + self.path = path self.stream = self.container.streams.video[0] self.stream.thread_type = "AUTO" self.cap = self.container @@ -333,6 +333,7 @@ def __init__( """ try: self.container = av.open(filename, mode="w") + self.path = filename self.backend = backend if render_audio is None: @@ -358,7 +359,6 @@ def __init__( render_audio and backend and backend.audio_stream - and backend.audio_src_container ): audio_codec_name = backend.audio_stream.codec_context.name audio_rate = backend.audio_stream.codec_context.rate @@ -366,11 +366,6 @@ def __init__( audio_codec_name, rate=audio_rate ) - # Buffer all audio packets from backend for muxing later - for packet in backend.audio_src_container.demux(backend.audio_stream): - if packet.dts is not None: - self.audio_packets.append(packet) - except Exception as e: raise RuntimeError(f"Cannot open video writer for file: {filename}") from e @@ -404,85 +399,46 @@ def write(self, frame: np.ndarray) -> None: self.container.mux(packet) def close(self) -> None: - """ - Finalize the video file, mux audio with adjusted timestamps to sync with video, - and close the container. - """ - - def rescale_timestamp(value, src_tb, dst_tb): - """ - Rescale timestamp between timebases. - - Args: - value (int): Original timestamp value - src_tb (Fraction): Source timebase - dst_tb (Fraction): Destination timebase - - Returns: - int: Rescaled timestamp - """ - return int(value * src_tb / dst_tb) - - # Flush any remaining video packets - packets = self.stream.encode() - for packet in packets: - self.container.mux(packet) - - # Calculate audio speed adjustment factor if needed - speed_factor = 1.0 - - try: - if ( - self.backend - and self.backend.audio_stream - and self.backend.audio_stream.duration - ): - orig_audio_duration = float( - self.backend.audio_stream.duration - * self.backend.audio_stream.time_base - ) - elif ( - self.backend - and self.backend.audio_src_container - and self.backend.audio_src_container.duration - ): - orig_audio_duration = self.backend.audio_src_container.duration / 1000 - else: - orig_audio_duration = None - - new_video_duration = self.frame_idx * (1 / self.fps) - - if orig_audio_duration and new_video_duration > 0: - speed_factor = orig_audio_duration / new_video_duration - except Exception: - speed_factor = 1.0 - - # Process and mux audio packets with timestamp adjustments - if self.audio_stream_out and speed_factor != 1.0: - for packet in self.audio_packets: - if packet.pts is not None: - packet.pts = rescale_timestamp( - packet.pts, packet.time_base, self.audio_stream_out.time_base - ) - packet.pts = int(packet.pts / speed_factor) - if packet.dts is not None: - packet.dts = rescale_timestamp( - packet.dts, packet.time_base, self.audio_stream_out.time_base - ) - packet.dts = int(packet.dts / speed_factor) - packet.stream = self.audio_stream_out - self.container.mux(packet) - elif self.audio_stream_out: - for packet in self.audio_packets: - if packet.pts is not None: - packet.pts = rescale_timestamp( - packet.pts, packet.time_base, self.audio_stream_out.time_base - ) - if packet.dts is not None: - packet.dts = rescale_timestamp( - packet.dts, packet.time_base, self.audio_stream_out.time_base - ) - packet.stream = self.audio_stream_out - self.container.mux(packet) + if (self.audio_stream_out is not None): + src = av.open(self.backend.path) + src_fps = src.streams.video[0].average_rate or src.streams.video[0].guessed_rate + audio_stream = src.streams.audio[0] + + graph = av.filter.Graph() + graph.link_nodes( + graph.add_abuffer(template=audio_stream), + graph.add("atempo", str(self.fps/src_fps)), + graph.add("abuffersink"), + ).configure() + + for packet in src.demux(audio_stream): + for frame in packet.decode(): + graph.push(frame) + + while True: + try: + f = graph.pull() + except Exception: + break + for pkt in self.audio_stream_out.encode(f): + self.container.mux(pkt) + + graph.push(None) + while True: + try: + f = graph.pull() + except Exception: + break + for pkt in self.audio_stream_out.encode(f): + self.container.mux(pkt) + + for pkt in self.audio_stream_out.encode(None): + self.container.mux(pkt) + + src.close() + + # flush video + for pkt in self.stream.encode(): + self.container.mux(pkt) self.container.close() From 2d909151e751573d345abb6149a0698a8c6754be Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sun, 31 Aug 2025 00:47:41 -0400 Subject: [PATCH 121/128] UPDATE: Updated docstrings --- supervision/video/backend/pyav.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py index f5766a66f4..4d3a62a5d1 100644 --- a/supervision/video/backend/pyav.py +++ b/supervision/video/backend/pyav.py @@ -399,6 +399,24 @@ def write(self, frame: np.ndarray) -> None: self.container.mux(packet) def close(self) -> None: + """ + Finalize and close the video file, including audio processing if enabled. + + This method performs several critical operations: + 1. If audio is enabled, processes and muxes the audio stream from the source + 2. Applies tempo adjustment to match the output video FPS + 3. Flushes all remaining video frames from the encoder + 4. Properly closes the output container + + The audio processing uses FFmpeg filters to: + - Read audio from the original source + - Apply tempo scaling based on FPS differences between source and output + - Encode and mux the processed audio into the output file + + Note: + This method should always be called when finished writing frames. + It ensures proper file finalization and resource cleanup. + """ if (self.audio_stream_out is not None): src = av.open(self.backend.path) src_fps = src.streams.video[0].average_rate or src.streams.video[0].guessed_rate From e9ccca21c2d6a19ac4264ec7b8d90529c6670485 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 31 Aug 2025 04:48:10 +0000 Subject: [PATCH 122/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/backend/pyav.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py index 4d3a62a5d1..81a3fd055d 100644 --- a/supervision/video/backend/pyav.py +++ b/supervision/video/backend/pyav.py @@ -355,11 +355,7 @@ def __init__( self.audio_stream_out = None self.audio_packets = [] - if ( - render_audio - and backend - and backend.audio_stream - ): + if render_audio and backend and backend.audio_stream: audio_codec_name = backend.audio_stream.codec_context.name audio_rate = backend.audio_stream.codec_context.rate self.audio_stream_out = self.container.add_stream( @@ -417,15 +413,17 @@ def close(self) -> None: This method should always be called when finished writing frames. It ensures proper file finalization and resource cleanup. """ - if (self.audio_stream_out is not None): + if self.audio_stream_out is not None: src = av.open(self.backend.path) - src_fps = src.streams.video[0].average_rate or src.streams.video[0].guessed_rate + src_fps = ( + src.streams.video[0].average_rate or src.streams.video[0].guessed_rate + ) audio_stream = src.streams.audio[0] graph = av.filter.Graph() graph.link_nodes( graph.add_abuffer(template=audio_stream), - graph.add("atempo", str(self.fps/src_fps)), + graph.add("atempo", str(self.fps / src_fps)), graph.add("abuffersink"), ).configure() From 9f115c48f299979d51001fad8499ad4b2330c8f6 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sun, 31 Aug 2025 01:07:59 -0400 Subject: [PATCH 123/128] UPDATE: Changed backend type class and added ref to root --- supervision/__init__.py | 2 ++ supervision/video/backend/__init__.py | 30 +++++++++++++-------------- supervision/video/core.py | 12 +++++------ 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/supervision/__init__.py b/supervision/__init__.py index 6e2e329b75..7ae921ee3d 100644 --- a/supervision/__init__.py +++ b/supervision/__init__.py @@ -135,6 +135,7 @@ process_video, ) from supervision.video import Video, VideoInfo +from supervision.video.backend import VideoBackendType __all__ = [ "LMM", @@ -193,6 +194,7 @@ "VertexAnnotator", "VertexLabelAnnotator", "Video", + "VideoBackendType", "VideoInfo", "VideoSink", "approximate_polygon", diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py index 20f90998bd..9509e916e8 100644 --- a/supervision/video/backend/__init__.py +++ b/supervision/video/backend/__init__.py @@ -6,11 +6,11 @@ from supervision.video.backend.opencv import OpenCVBackend, OpenCVWriter from supervision.video.backend.pyav import pyAVBackend, pyAVWriter -BackendTypes = Union[OpenCVBackend, pyAVBackend] -WriterTypes = Union[OpenCVWriter, pyAVWriter] +VideoBackendTypes = Union[OpenCVBackend, pyAVBackend] +VideoWriterTypes = Union[OpenCVWriter, pyAVWriter] -class Backend(Enum): +class VideoBackendType(Enum): """ Enumeration of Backends. @@ -28,7 +28,7 @@ def list(cls): return list(map(lambda c: c.value, cls)) @classmethod - def from_value(cls, value: Backend | str) -> Backend: + def from_value(cls, value: VideoBackendType | str) -> VideoBackendType: if isinstance(value, cls): return value if isinstance(value, str): @@ -43,20 +43,20 @@ def from_value(cls, value: Backend | str) -> Backend: ) -BackendDict = { - Backend.PYAV: pyAVBackend, - Backend.OPENCV: OpenCVBackend, +VideoBackendDict = { + VideoBackendType.PYAV: pyAVBackend, + VideoBackendType.OPENCV: OpenCVBackend, } -WriterDict = { - Backend.PYAV: pyAVWriter, - Backend.OPENCV: OpenCVWriter, +VideoWriterDict = { + VideoBackendType.PYAV: pyAVWriter, + VideoBackendType.OPENCV: OpenCVWriter, } __all__ = [ - "Backend", - "BackendDict", - "BackendTypes", - "WriterDict", - "WriterTypes", + "VideoBackendType", + "VideoBackendDict", + "VideoBackendTypes", + "VideoWriterDict", + "VideoWriterTypes", ] diff --git a/supervision/video/core.py b/supervision/video/core.py index ded78a99e5..ebbcee3eba 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -8,7 +8,7 @@ import numpy as np from tqdm.auto import tqdm -from supervision.video.backend import Backend, BackendDict, BackendTypes, WriterTypes +from supervision.video.backend import VideoBackend, VideoBackendDict, VideoBackendTypes, VideoWriterTypes from supervision.video.utils import SourceType, VideoInfo @@ -33,15 +33,15 @@ class Video: Attributes: info (VideoInfo): Metadata about the opened video. source (str | int): Path to the video file or index of the camera device. - backend (BackendTypes): Video backend used for I/O operations. + backend (VideoBackendTypes): Video backend used for I/O operations. """ info: VideoInfo source: str | int - backend: BackendTypes + backend: VideoBackendTypes def __init__( - self, source: str | int, backend: Backend | str = Backend.OPENCV + self, source: str | int, backend: VideoBackend | str = VideoBackend.OPENCV ) -> None: """ Initialize the Video object and open the source. @@ -54,7 +54,7 @@ def __init__( Raises: ValueError: If the specified backend is not supported. """ - self.backend = BackendDict.get(Backend.from_value(backend)) + self.backend = VideoBackendDict.get(VideoBackend.from_value(backend)) if self.backend is None: raise ValueError(f"Unsupported backend: {backend}") @@ -80,7 +80,7 @@ def sink( info: VideoInfo, codec: str | None = None, render_audio: bool | None = None, - ) -> WriterTypes: + ) -> VideoWriterTypes: """ Create a video writer for saving frames to a file. From c67aad3b9bea89fec63dbaf4f37df41fd51ba13f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 31 Aug 2025 05:08:20 +0000 Subject: [PATCH 124/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/backend/__init__.py | 2 +- supervision/video/core.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py index 9509e916e8..5f5a83c04a 100644 --- a/supervision/video/backend/__init__.py +++ b/supervision/video/backend/__init__.py @@ -54,8 +54,8 @@ def from_value(cls, value: VideoBackendType | str) -> VideoBackendType: } __all__ = [ - "VideoBackendType", "VideoBackendDict", + "VideoBackendType", "VideoBackendTypes", "VideoWriterDict", "VideoWriterTypes", diff --git a/supervision/video/core.py b/supervision/video/core.py index ebbcee3eba..4ff60d1c00 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -8,7 +8,12 @@ import numpy as np from tqdm.auto import tqdm -from supervision.video.backend import VideoBackend, VideoBackendDict, VideoBackendTypes, VideoWriterTypes +from supervision.video.backend import ( + VideoBackend, + VideoBackendDict, + VideoBackendTypes, + VideoWriterTypes, +) from supervision.video.utils import SourceType, VideoInfo From ba9efc2c6a79b3afcf709e039da773c12dbaefc1 Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sun, 31 Aug 2025 01:10:31 -0400 Subject: [PATCH 125/128] BUG: Appending fixes for VideoBackend error --- supervision/video/core.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/supervision/video/core.py b/supervision/video/core.py index ebbcee3eba..16e8c35a17 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -8,7 +8,7 @@ import numpy as np from tqdm.auto import tqdm -from supervision.video.backend import VideoBackend, VideoBackendDict, VideoBackendTypes, VideoWriterTypes +from supervision.video.backend import VideoBackendType, VideoBackendDict, VideoBackendTypes, VideoWriterTypes from supervision.video.utils import SourceType, VideoInfo @@ -41,7 +41,7 @@ class Video: backend: VideoBackendTypes def __init__( - self, source: str | int, backend: VideoBackend | str = VideoBackend.OPENCV + self, source: str | int, backend: VideoBackendType | str = VideoBackendType.OPENCV ) -> None: """ Initialize the Video object and open the source. @@ -54,7 +54,7 @@ def __init__( Raises: ValueError: If the specified backend is not supported. """ - self.backend = VideoBackendDict.get(VideoBackend.from_value(backend)) + self.backend = VideoBackendDict.get(VideoBackendType.from_value(backend)) if self.backend is None: raise ValueError(f"Unsupported backend: {backend}") From 9c7a9ecdff7a860315445daed5ae785d0dc2f0d7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 31 Aug 2025 05:12:01 +0000 Subject: [PATCH 126/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/core.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/supervision/video/core.py b/supervision/video/core.py index 16e8c35a17..c866ba2759 100644 --- a/supervision/video/core.py +++ b/supervision/video/core.py @@ -8,7 +8,12 @@ import numpy as np from tqdm.auto import tqdm -from supervision.video.backend import VideoBackendType, VideoBackendDict, VideoBackendTypes, VideoWriterTypes +from supervision.video.backend import ( + VideoBackendDict, + VideoBackendType, + VideoBackendTypes, + VideoWriterTypes, +) from supervision.video.utils import SourceType, VideoInfo @@ -41,7 +46,9 @@ class Video: backend: VideoBackendTypes def __init__( - self, source: str | int, backend: VideoBackendType | str = VideoBackendType.OPENCV + self, + source: str | int, + backend: VideoBackendType | str = VideoBackendType.OPENCV, ) -> None: """ Initialize the Video object and open the source. From fb2171ce835a555f9b41ae6d04135fb5e220056d Mon Sep 17 00:00:00 2001 From: Ashp116 Date: Sun, 31 Aug 2025 17:52:49 -0400 Subject: [PATCH 127/128] UPDATE: Decompose playback speed into valid atempo chain --- supervision/video/backend/pyav.py | 32 ++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py index 81a3fd055d..0cdbbd16be 100644 --- a/supervision/video/backend/pyav.py +++ b/supervision/video/backend/pyav.py @@ -414,6 +414,25 @@ def close(self) -> None: It ensures proper file finalization and resource cleanup. """ if self.audio_stream_out is not None: + def atempo_chain(speed: float) -> list[str]: + if speed <= 0: + raise ValueError("Speed factor must be > 0") + + chain = [] + + while speed > 2.0: + chain.append("2.0") + speed /= 2.0 + + while speed < 0.5: + chain.append("0.5") + speed /= 0.5 + + if abs(speed - 1.0) > 1e-6: + chain.append(f"{speed:.6f}") + + return chain + src = av.open(self.backend.path) src_fps = ( src.streams.video[0].average_rate or src.streams.video[0].guessed_rate @@ -421,11 +440,14 @@ def close(self) -> None: audio_stream = src.streams.audio[0] graph = av.filter.Graph() - graph.link_nodes( - graph.add_abuffer(template=audio_stream), - graph.add("atempo", str(self.fps / src_fps)), - graph.add("abuffersink"), - ).configure() + filters = atempo_chain(self.fps / src_fps) + nodes = [graph.add_abuffer(template=audio_stream)] + for f in filters: + nodes.append(graph.add("atempo", f)) + + nodes.append(graph.add("abuffersink")) + graph.link_nodes(*nodes) + graph.configure() for packet in src.demux(audio_stream): for frame in packet.decode(): From c78e4f7021cab99e6f9bd1629075a421ce013145 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 31 Aug 2025 21:53:25 +0000 Subject: [PATCH 128/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?= =?UTF-8?q?o=20format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- supervision/video/backend/pyav.py | 1 + 1 file changed, 1 insertion(+) diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py index 0cdbbd16be..4db1e0a9ba 100644 --- a/supervision/video/backend/pyav.py +++ b/supervision/video/backend/pyav.py @@ -414,6 +414,7 @@ def close(self) -> None: It ensures proper file finalization and resource cleanup. """ if self.audio_stream_out is not None: + def atempo_chain(speed: float) -> list[str]: if speed <= 0: raise ValueError("Speed factor must be > 0")