From 7fba113f11e5dfcdc2a4237faa1571b571bfba0c Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Wed, 30 Jul 2025 15:20:10 -0400
Subject: [PATCH 001/128] ADD: Added audio stream for process_video

---
 pyproject.toml             |  3 ++-
 supervision/utils/video.py | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index cae78492ac..787fa93ade 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,7 +46,8 @@ dependencies = [
     "pillow>=9.4",
     "requests>=2.26.0",
     "tqdm>=4.62.3",
-    "opencv-python>=4.5.5.64"
+    "opencv-python>=4.5.5.64",
+    "imageio-ffmpeg (>=0.6.0,<0.7.0)"
 ]
 
 [project.urls]
diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 3b281b4e22..029de694c9 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -9,6 +9,12 @@
 import numpy as np
 from tqdm.auto import tqdm
 
+import subprocess
+import imageio_ffmpeg
+import os
+
+ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe()
+ffprobe_path = ffmpeg_path.replace("ffmpeg", "ffprobe")
 
 @dataclass
 class VideoInfo:
@@ -254,6 +260,34 @@ def callback(scene: np.ndarray, index: int) -> np.ndarray:
                 result_frame = callback(frame, index)
                 sink.write_frame(frame=result_frame)
 
+    
+    def has_audio_stream(video_path):
+        result = subprocess.run(
+            [ffmpeg_path, "-i", video_path],
+            stderr=subprocess.PIPE,
+            stdout=subprocess.DEVNULL,
+            text=True
+        )
+
+        return "Audio:" in result.stderr
+    
+    if has_audio_stream(source_path):
+        video_input = target_path
+        audio_source = source_path
+        temp_output = "temp_output.mp4"
+        subprocess.run([
+            ffmpeg_path,
+            "-i", video_input,
+            "-i", audio_source,
+            "-map", "0:v",
+            "-map", "1:a",
+            "-c:v", "copy",
+            "-c:a", "aac",
+            "-shortest",
+            temp_output
+        ], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+
+        os.replace(temp_output, video_input)
 
 class FPSMonitor:
     """

From 8947f770b8b22867ce8066237a5064e508ff4cc7 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 30 Jul 2025 19:30:13 +0000
Subject: [PATCH 002/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/utils/video.py | 47 +++++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 18 deletions(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 029de694c9..499c7aed8a 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -1,21 +1,21 @@
 from __future__ import annotations
 
+import os
+import subprocess
 import time
 from collections import deque
 from collections.abc import Callable, Generator
 from dataclasses import dataclass
 
 import cv2
+import imageio_ffmpeg
 import numpy as np
 from tqdm.auto import tqdm
 
-import subprocess
-import imageio_ffmpeg
-import os
-
 ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe()
 ffprobe_path = ffmpeg_path.replace("ffmpeg", "ffprobe")
 
+
 @dataclass
 class VideoInfo:
     """
@@ -260,35 +260,46 @@ def callback(scene: np.ndarray, index: int) -> np.ndarray:
                 result_frame = callback(frame, index)
                 sink.write_frame(frame=result_frame)
 
-    
     def has_audio_stream(video_path):
         result = subprocess.run(
             [ffmpeg_path, "-i", video_path],
             stderr=subprocess.PIPE,
             stdout=subprocess.DEVNULL,
-            text=True
+            text=True,
         )
 
         return "Audio:" in result.stderr
-    
+
     if has_audio_stream(source_path):
         video_input = target_path
         audio_source = source_path
         temp_output = "temp_output.mp4"
-        subprocess.run([
-            ffmpeg_path,
-            "-i", video_input,
-            "-i", audio_source,
-            "-map", "0:v",
-            "-map", "1:a",
-            "-c:v", "copy",
-            "-c:a", "aac",
-            "-shortest",
-            temp_output
-        ], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        subprocess.run(
+            [
+                ffmpeg_path,
+                "-i",
+                video_input,
+                "-i",
+                audio_source,
+                "-map",
+                "0:v",
+                "-map",
+                "1:a",
+                "-c:v",
+                "copy",
+                "-c:a",
+                "aac",
+                "-shortest",
+                temp_output,
+            ],
+            check=True,
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+        )
 
         os.replace(temp_output, video_input)
 
+
 class FPSMonitor:
     """
     A class for monitoring frames per second (FPS) to benchmark latency.

From 73b583684d710bdd9e629a0cc659ce8de99b0c82 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Wed, 30 Jul 2025 15:30:45 -0400
Subject: [PATCH 003/128] REMOVE: Removed ffprobe

---
 supervision/utils/video.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 029de694c9..ee057b69f3 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -14,7 +14,6 @@
 import os
 
 ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe()
-ffprobe_path = ffmpeg_path.replace("ffmpeg", "ffprobe")
 
 @dataclass
 class VideoInfo:

From 5e07794b4616264094d7b6c504255228d159a9d7 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Fri, 1 Aug 2025 22:51:24 -0400
Subject: [PATCH 004/128] UPDATE: Added a new Video class with OpenCV writer
 and backend

---
 supervision/__init__.py    |   2 +
 supervision/utils/video.py | 211 +++++++++++++++++++++++++++++++++++++
 2 files changed, 213 insertions(+)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index ab45651ac9..48dcfc49e5 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -134,6 +134,7 @@
     VideoSink,
     get_video_frames_generator,
     process_video,
+    Video
 )
 
 __all__ = [
@@ -194,6 +195,7 @@
     "VertexLabelAnnotator",
     "VideoInfo",
     "VideoSink",
+    "Video",
     "approximate_polygon",
     "box_iou",
     "box_iou_batch",
diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 93b62f12ee..b30bacde0c 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -1,11 +1,13 @@
 from __future__ import annotations
 
+from typing import Protocol, Any, Tuple
 import os
 import subprocess
 import time
 from collections import deque
 from collections.abc import Callable, Generator
 from dataclasses import dataclass
+from enum import Enum, auto
 
 import cv2
 import imageio_ffmpeg
@@ -14,6 +16,11 @@
 
 ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe()
 
+class SOURCE_TYPE(Enum):
+    VIDEO_FILE = "VIDEO_FILE"
+    WEBCAM = "WEBCAM"
+    RTSP = "RTSP"
+
 
 @dataclass
 class VideoInfo:
@@ -46,6 +53,7 @@ class VideoInfo:
     height: int
     fps: int
     total_frames: int | None = None
+    source_type: SOURCE_TYPE | None = None
 
     @classmethod
     def from_video_path(cls, video_path: str) -> VideoInfo:
@@ -65,6 +73,209 @@ def resolution_wh(self) -> tuple[int, int]:
         return self.width, self.height
 
 
+class OpenCVBackend(Protocol):
+    def __init__(self):
+        self.cap = None
+        self.video_info = None
+        self.writer = None
+        self.path = None
+
+    def open(self, path: str) -> None:
+        self.cap = cv2.VideoCapture(path)
+        self.path = path
+
+        if not self.cap.isOpened():
+            raise RuntimeError(f"Cannot open video source: {path}")
+        self.video_info = self._set_video_info()
+        
+        if isinstance(path, int):
+            self.video_info.source_type = SOURCE_TYPE.WEBCAM
+        elif isinstance(path, str):
+            self.video_info.source_type = SOURCE_TYPE.RTSP if path.lower().startswith("rtsp://") else SOURCE_TYPE.VIDEO_FILE
+        else:
+            raise ValueError("Unsupported source type")
+
+    def isOpened(self):
+        return self.cap.isOpened()
+
+    def _set_video_info(self) -> VideoInfo:
+        if not self.isOpened():
+            raise RuntimeError("Video not opened yet.")
+        width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        fps = self.cap.get(cv2.CAP_PROP_FPS)
+        total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        return VideoInfo(width, height, int(fps), total_frames)
+
+    def info(self) -> VideoInfo:
+        if not self.isOpened():
+            raise RuntimeError("Video not opened yet.")
+        return self.video_info
+
+    def read(self) -> Tuple[bool, np.ndarray]:
+        if self.cap is None:
+            raise RuntimeError("Video not opened yet.")
+        ret, frame = self.cap.read()
+        return ret, frame
+
+    def grab(self) -> bool:
+        if self.cap is None:
+            raise RuntimeError("Video not opened yet.")
+        return self.cap.grab()
+
+    def seek(self, frame_idx: int) -> None:
+        if self.cap is None:
+            raise RuntimeError("Video not opened yet.")
+        self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
+
+    def release(self) -> None:
+        if self.cap is not None and self.cap.isOpened():
+            self.cap.release()
+            self.cap = None
+    
+    def frames(self, *, start=0, end=None, stride=1, resolution_wh=None):
+        if self.cap is None:
+            raise RuntimeError("Video not opened yet.")
+
+        total_frames = self.video_info.total_frames if self.video_info else 0
+        is_live_stream = (total_frames <= 0)
+
+        if is_live_stream:
+            while True:
+                for _ in range(stride - 1):
+                    if not self.grab():
+                        return
+                ret, frame = self.read()
+                if not ret:
+                    return
+                if resolution_wh is not None:
+                    frame = cv2.resize(frame, resolution_wh)
+                yield frame
+        else:
+            if end is None or end > total_frames:
+                end = total_frames
+
+            frame_idx = start
+            while frame_idx < end:
+                self.seek(frame_idx)
+                ret, frame = self.read()
+                if not ret:
+                    break
+                if resolution_wh is not None:
+                    frame = cv2.resize(frame, resolution_wh)
+                yield frame
+                frame_idx += stride
+
+    def save(self, target_path: str, callback: Callable[[np.ndarray, int], np.ndarray], fps: int = None, progress_message: str = "Processing video", show_progress: bool = False):
+        if self.cap is None:
+            raise RuntimeError("Video not opened yet.")
+
+        if self.video_info.source_type != SOURCE_TYPE.VIDEO_FILE:
+            raise ValueError("Only video files can be saved.")  
+        
+        if self.writer is not None:
+            self.writer.close()
+            self.writer = None
+
+        source_codec = self.cap.get(cv2.CAP_PROP_FOURCC)
+
+        if fps is None:
+            fps = self.video_info.fps
+
+        self.writer = OpenCVWriter(target_path, fps, self.video_info.resolution_wh, source_codec)
+        total_frames = min(self.video_info.total_frames, fps)
+        frames_generator = self.frames()
+        for index, frame in enumerate(
+            tqdm(
+                frames_generator,
+                total=total_frames,
+                disable=not show_progress,
+                desc=progress_message,
+            )
+        ):
+            result_frame = callback(frame, index)
+            self.writer.write(frame=result_frame)
+
+        def has_audio_stream(video_path):
+            result = subprocess.run(
+                [ffmpeg_path, "-i", video_path],
+                stderr=subprocess.PIPE,
+                stdout=subprocess.DEVNULL,
+                text=True,
+            )
+
+            return "Audio:" in result.stderr
+
+        if has_audio_stream(self.path):
+            video_input = target_path
+            audio_source = self.path
+            temp_output = "temp_output.mp4"
+            subprocess.run(
+                [
+                    ffmpeg_path,
+                    "-i",
+                    video_input,
+                    "-i",
+                    audio_source,
+                    "-map",
+                    "0:v",
+                    "-map",
+                    "1:a",
+                    "-c:v",
+                    "copy",
+                    "-c:a",
+                    "aac",
+                    "-shortest",
+                    temp_output,
+                ],
+                check=True,
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+            )
+
+            os.replace(temp_output, video_input)
+
+
+class OpenCVWriter:
+    def __init__(self, filename: str, fps: float, frame_size: tuple[int, int], codec: str = "mp4v"):
+        try:
+            fourcc_int = cv2.VideoWriter_fourcc(*codec)
+            self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size)
+        except:
+            fourcc_int = cv2.VideoWriter_fourcc(*"mp4v")
+            self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size)
+        if not self.writer.isOpened():
+            raise RuntimeError(f"Cannot open video writer for file: {filename}")
+
+    def write(self, frame: np.ndarray) -> None:
+        self.writer.write(frame)
+
+    def close(self) -> None:
+        self.writer.release()
+
+class Video:
+    info: VideoInfo
+    source: str | int
+    backend: OpenCVBackend
+
+    def __init__(self, source: str | int, info: VideoInfo | None = None, backend: str = "opencv"):
+        if backend == "opencv":
+            self.backend = OpenCVBackend()
+        
+        self.backend.open(source)
+        self.info = self.backend.video_info
+        self.source = source
+
+    def __iter__(self):
+        return self.backend.frames()
+    
+    def frames(self, stride=1, start=0, end=None, resolution_wh=None):
+        return self.backend.frames(stride=stride, start=start, end=end, resolution_wh=resolution_wh)
+
+    def save(self, target_path: str, callback: Callable[[np.ndarray, int], np.ndarray], fps: int = None, progress_message: str = "Processing video", show_progress: bool = False):
+        self.backend.save(target_path=target_path, callback=callback, fps=fps, progress_message=progress_message, show_progress=show_progress)
+        
+
 class VideoSink:
     """
     Context manager that saves video frames to a file using OpenCV.

From b2096d06111860bbd1559338b807d482d3b4ba6f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 2 Aug 2025 02:52:43 +0000
Subject: [PATCH 005/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/__init__.py    |  4 +-
 supervision/utils/video.py | 76 ++++++++++++++++++++++++++++----------
 2 files changed, 59 insertions(+), 21 deletions(-)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index 48dcfc49e5..57c48050cf 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -130,11 +130,11 @@
 from supervision.utils.notebook import plot_image, plot_images_grid
 from supervision.utils.video import (
     FPSMonitor,
+    Video,
     VideoInfo,
     VideoSink,
     get_video_frames_generator,
     process_video,
-    Video
 )
 
 __all__ = [
@@ -193,9 +193,9 @@
     "TriangleAnnotator",
     "VertexAnnotator",
     "VertexLabelAnnotator",
+    "Video",
     "VideoInfo",
     "VideoSink",
-    "Video",
     "approximate_polygon",
     "box_iou",
     "box_iou_batch",
diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index b30bacde0c..186b22fb99 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -1,13 +1,13 @@
 from __future__ import annotations
 
-from typing import Protocol, Any, Tuple
 import os
 import subprocess
 import time
 from collections import deque
 from collections.abc import Callable, Generator
 from dataclasses import dataclass
-from enum import Enum, auto
+from enum import Enum
+from typing import Protocol, Tuple
 
 import cv2
 import imageio_ffmpeg
@@ -16,6 +16,7 @@
 
 ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe()
 
+
 class SOURCE_TYPE(Enum):
     VIDEO_FILE = "VIDEO_FILE"
     WEBCAM = "WEBCAM"
@@ -87,11 +88,15 @@ def open(self, path: str) -> None:
         if not self.cap.isOpened():
             raise RuntimeError(f"Cannot open video source: {path}")
         self.video_info = self._set_video_info()
-        
+
         if isinstance(path, int):
             self.video_info.source_type = SOURCE_TYPE.WEBCAM
         elif isinstance(path, str):
-            self.video_info.source_type = SOURCE_TYPE.RTSP if path.lower().startswith("rtsp://") else SOURCE_TYPE.VIDEO_FILE
+            self.video_info.source_type = (
+                SOURCE_TYPE.RTSP
+                if path.lower().startswith("rtsp://")
+                else SOURCE_TYPE.VIDEO_FILE
+            )
         else:
             raise ValueError("Unsupported source type")
 
@@ -112,7 +117,7 @@ def info(self) -> VideoInfo:
             raise RuntimeError("Video not opened yet.")
         return self.video_info
 
-    def read(self) -> Tuple[bool, np.ndarray]:
+    def read(self) -> tuple[bool, np.ndarray]:
         if self.cap is None:
             raise RuntimeError("Video not opened yet.")
         ret, frame = self.cap.read()
@@ -132,13 +137,13 @@ def release(self) -> None:
         if self.cap is not None and self.cap.isOpened():
             self.cap.release()
             self.cap = None
-    
+
     def frames(self, *, start=0, end=None, stride=1, resolution_wh=None):
         if self.cap is None:
             raise RuntimeError("Video not opened yet.")
 
         total_frames = self.video_info.total_frames if self.video_info else 0
-        is_live_stream = (total_frames <= 0)
+        is_live_stream = total_frames <= 0
 
         if is_live_stream:
             while True:
@@ -166,13 +171,20 @@ def frames(self, *, start=0, end=None, stride=1, resolution_wh=None):
                 yield frame
                 frame_idx += stride
 
-    def save(self, target_path: str, callback: Callable[[np.ndarray, int], np.ndarray], fps: int = None, progress_message: str = "Processing video", show_progress: bool = False):
+    def save(
+        self,
+        target_path: str,
+        callback: Callable[[np.ndarray, int], np.ndarray],
+        fps: int = None,
+        progress_message: str = "Processing video",
+        show_progress: bool = False,
+    ):
         if self.cap is None:
             raise RuntimeError("Video not opened yet.")
 
         if self.video_info.source_type != SOURCE_TYPE.VIDEO_FILE:
-            raise ValueError("Only video files can be saved.")  
-        
+            raise ValueError("Only video files can be saved.")
+
         if self.writer is not None:
             self.writer.close()
             self.writer = None
@@ -182,7 +194,9 @@ def save(self, target_path: str, callback: Callable[[np.ndarray, int], np.ndarra
         if fps is None:
             fps = self.video_info.fps
 
-        self.writer = OpenCVWriter(target_path, fps, self.video_info.resolution_wh, source_codec)
+        self.writer = OpenCVWriter(
+            target_path, fps, self.video_info.resolution_wh, source_codec
+        )
         total_frames = min(self.video_info.total_frames, fps)
         frames_generator = self.frames()
         for index, frame in enumerate(
@@ -237,7 +251,13 @@ def has_audio_stream(video_path):
 
 
 class OpenCVWriter:
-    def __init__(self, filename: str, fps: float, frame_size: tuple[int, int], codec: str = "mp4v"):
+    def __init__(
+        self,
+        filename: str,
+        fps: float,
+        frame_size: tuple[int, int],
+        codec: str = "mp4v",
+    ):
         try:
             fourcc_int = cv2.VideoWriter_fourcc(*codec)
             self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size)
@@ -253,28 +273,46 @@ def write(self, frame: np.ndarray) -> None:
     def close(self) -> None:
         self.writer.release()
 
+
 class Video:
     info: VideoInfo
     source: str | int
     backend: OpenCVBackend
 
-    def __init__(self, source: str | int, info: VideoInfo | None = None, backend: str = "opencv"):
+    def __init__(
+        self, source: str | int, info: VideoInfo | None = None, backend: str = "opencv"
+    ):
         if backend == "opencv":
             self.backend = OpenCVBackend()
-        
+
         self.backend.open(source)
         self.info = self.backend.video_info
         self.source = source
 
     def __iter__(self):
         return self.backend.frames()
-    
+
     def frames(self, stride=1, start=0, end=None, resolution_wh=None):
-        return self.backend.frames(stride=stride, start=start, end=end, resolution_wh=resolution_wh)
+        return self.backend.frames(
+            stride=stride, start=start, end=end, resolution_wh=resolution_wh
+        )
+
+    def save(
+        self,
+        target_path: str,
+        callback: Callable[[np.ndarray, int], np.ndarray],
+        fps: int = None,
+        progress_message: str = "Processing video",
+        show_progress: bool = False,
+    ):
+        self.backend.save(
+            target_path=target_path,
+            callback=callback,
+            fps=fps,
+            progress_message=progress_message,
+            show_progress=show_progress,
+        )
 
-    def save(self, target_path: str, callback: Callable[[np.ndarray, int], np.ndarray], fps: int = None, progress_message: str = "Processing video", show_progress: bool = False):
-        self.backend.save(target_path=target_path, callback=callback, fps=fps, progress_message=progress_message, show_progress=show_progress)
-        
 
 class VideoSink:
     """

From 9fb709818cce7b8ff820ab162d1fc26f9a334521 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Fri, 1 Aug 2025 22:56:47 -0400
Subject: [PATCH 006/128] Precommit

---
 supervision/utils/video.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 186b22fb99..434217544b 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -7,7 +7,7 @@
 from collections.abc import Callable, Generator
 from dataclasses import dataclass
 from enum import Enum
-from typing import Protocol, Tuple
+from typing import Optional, Protocol, Tuple
 
 import cv2
 import imageio_ffmpeg
@@ -175,7 +175,7 @@ def save(
         self,
         target_path: str,
         callback: Callable[[np.ndarray, int], np.ndarray],
-        fps: int = None,
+        fps: Optional[int] = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
     ):
@@ -261,7 +261,7 @@ def __init__(
         try:
             fourcc_int = cv2.VideoWriter_fourcc(*codec)
             self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size)
-        except:
+        except Exception:
             fourcc_int = cv2.VideoWriter_fourcc(*"mp4v")
             self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size)
         if not self.writer.isOpened():
@@ -301,7 +301,7 @@ def save(
         self,
         target_path: str,
         callback: Callable[[np.ndarray, int], np.ndarray],
-        fps: int = None,
+        fps: Optional[int] = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
     ):

From 850a2c6d3ceb948f1e970344da7854c190b02a59 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 2 Aug 2025 02:57:56 +0000
Subject: [PATCH 007/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/utils/video.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 434217544b..1ebcb085af 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -7,7 +7,7 @@
 from collections.abc import Callable, Generator
 from dataclasses import dataclass
 from enum import Enum
-from typing import Optional, Protocol, Tuple
+from typing import Optional, Protocol
 
 import cv2
 import imageio_ffmpeg
@@ -175,7 +175,7 @@ def save(
         self,
         target_path: str,
         callback: Callable[[np.ndarray, int], np.ndarray],
-        fps: Optional[int] = None,
+        fps: int | None = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
     ):
@@ -301,7 +301,7 @@ def save(
         self,
         target_path: str,
         callback: Callable[[np.ndarray, int], np.ndarray],
-        fps: Optional[int] = None,
+        fps: int | None = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
     ):

From 46900f81232eafe5362d690b8f183cd41bc261ca Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Fri, 1 Aug 2025 22:59:24 -0400
Subject: [PATCH 008/128] Precommit

---
 supervision/utils/video.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 434217544b..275cfe0ef2 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -7,7 +7,7 @@
 from collections.abc import Callable, Generator
 from dataclasses import dataclass
 from enum import Enum
-from typing import Optional, Protocol, Tuple
+from typing import Protocol, Tuple
 
 import cv2
 import imageio_ffmpeg
@@ -175,7 +175,7 @@ def save(
         self,
         target_path: str,
         callback: Callable[[np.ndarray, int], np.ndarray],
-        fps: Optional[int] = None,
+        fps: int | None = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
     ):
@@ -301,7 +301,7 @@ def save(
         self,
         target_path: str,
         callback: Callable[[np.ndarray, int], np.ndarray],
-        fps: Optional[int] = None,
+        fps: int | None = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
     ):

From c70039471b68c3e1903cd06e60719a88e8cec489 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 2 Aug 2025 03:00:27 +0000
Subject: [PATCH 009/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/utils/video.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index c37d01021b..d7ae0e5bcd 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -7,7 +7,7 @@
 from collections.abc import Callable, Generator
 from dataclasses import dataclass
 from enum import Enum
-from typing import Optional, Protocol, Tuple
+from typing import Protocol
 
 import cv2
 import imageio_ffmpeg

From fce8ade8cbb6a480024a98a7d5a50c927b6b341e Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 2 Aug 2025 02:19:45 -0400
Subject: [PATCH 010/128] UPDATE: Fixed incomplete write closing

---
 supervision/utils/video.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index d7ae0e5bcd..65b886eedc 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -197,7 +197,7 @@ def save(
         self.writer = OpenCVWriter(
             target_path, fps, self.video_info.resolution_wh, source_codec
         )
-        total_frames = min(self.video_info.total_frames, fps)
+        total_frames = self.video_info.total_frames
         frames_generator = self.frames()
         for index, frame in enumerate(
             tqdm(
@@ -210,6 +210,8 @@ def save(
             result_frame = callback(frame, index)
             self.writer.write(frame=result_frame)
 
+        self.writer.close()
+
         def has_audio_stream(video_path):
             result = subprocess.run(
                 [ffmpeg_path, "-i", video_path],

From f86f4f2ed53a504b3384db628b173c241ebe2813 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 2 Aug 2025 02:25:03 -0400
Subject: [PATCH 011/128] ADD: Docstrings

---
 supervision/utils/video.py | 145 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 142 insertions(+), 3 deletions(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 65b886eedc..0f1ae12a9e 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -75,13 +75,29 @@ def resolution_wh(self) -> tuple[int, int]:
 
 
 class OpenCVBackend(Protocol):
+    """
+    Protocol class defining the interface for video backend implementations using OpenCV.
+    Handles video capture, frame reading, seeking, and writing operations.
+    """
+
     def __init__(self):
+        """Initialize the OpenCV backend with empty video capture and writer objects."""
         self.cap = None
         self.video_info = None
         self.writer = None
         self.path = None
 
     def open(self, path: str) -> None:
+        """
+        Open a video source and initialize the video capture object.
+
+        Args:
+            path (str): Path to the video file, RTSP URL, or camera index.
+        
+        Raises:
+            RuntimeError: If unable to open the video source.
+            ValueError: If the source type is not supported.
+        """
         self.cap = cv2.VideoCapture(path)
         self.path = path
 
@@ -100,10 +116,23 @@ def open(self, path: str) -> None:
         else:
             raise ValueError("Unsupported source type")
 
-    def isOpened(self):
+    def isOpened(self) -> bool:
+        """Check if the video source is opened successfully.
+
+        Returns:
+            bool: True if the video source is opened, False otherwise.
+        """
         return self.cap.isOpened()
 
     def _set_video_info(self) -> VideoInfo:
+        """Set up video information from the opened video source.
+
+        Returns:
+            VideoInfo: Object containing video properties like width, height, fps, etc.
+
+        Raises:
+            RuntimeError: If the video source is not opened yet.
+        """
         if not self.isOpened():
             raise RuntimeError("Video not opened yet.")
         width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
@@ -113,32 +142,82 @@ def _set_video_info(self) -> VideoInfo:
         return VideoInfo(width, height, int(fps), total_frames)
 
     def info(self) -> VideoInfo:
+        """Get video information.
+
+        Returns:
+            VideoInfo: Object containing video properties.
+
+        Raises:
+            RuntimeError: If the video source is not opened yet.
+        """
         if not self.isOpened():
             raise RuntimeError("Video not opened yet.")
         return self.video_info
 
     def read(self) -> tuple[bool, np.ndarray]:
+        """Read a frame from the video source.
+
+        Returns:
+            tuple[bool, np.ndarray]: A tuple containing:
+                - bool: True if frame was successfully read
+                - np.ndarray: The video frame in BGR format
+
+        Raises:
+            RuntimeError: If the video source is not opened yet.
+        """
         if self.cap is None:
             raise RuntimeError("Video not opened yet.")
         ret, frame = self.cap.read()
         return ret, frame
 
     def grab(self) -> bool:
+        """Grab a frame from video source without decoding.
+
+        Returns:
+            bool: True if frame was successfully grabbed.
+
+        Raises:
+            RuntimeError: If the video source is not opened yet.
+        """
         if self.cap is None:
             raise RuntimeError("Video not opened yet.")
         return self.cap.grab()
 
     def seek(self, frame_idx: int) -> None:
+        """Seek to a specific frame in the video.
+
+        Args:
+            frame_idx (int): Index of the frame to seek to (0-based).
+
+        Raises:
+            RuntimeError: If the video source is not opened yet.
+        """
         if self.cap is None:
             raise RuntimeError("Video not opened yet.")
         self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
 
     def release(self) -> None:
+        """Release the video capture resources."""
         if self.cap is not None and self.cap.isOpened():
             self.cap.release()
             self.cap = None
 
-    def frames(self, *, start=0, end=None, stride=1, resolution_wh=None):
+    def frames(self, *, start: int = 0, end: int | None = None, stride: int = 1, resolution_wh: tuple[int, int] | None = None):
+        """Generate frames from the video source.
+
+        Args:
+            start (int, optional): Starting frame index. Defaults to 0.
+            end (int | None, optional): Ending frame index. Defaults to None.
+            stride (int, optional): Number of frames to skip. Defaults to 1.
+            resolution_wh (tuple[int, int] | None, optional): Target resolution (width, height).
+                If provided, frames will be resized. Defaults to None.
+
+        Yields:
+            np.ndarray: Video frames in BGR format.
+
+        Raises:
+            RuntimeError: If the video source is not opened yet.
+        """
         if self.cap is None:
             raise RuntimeError("Video not opened yet.")
 
@@ -253,6 +332,13 @@ def has_audio_stream(video_path):
 
 
 class OpenCVWriter:
+    """A class to handle video writing operations using OpenCV's VideoWriter.
+
+    This class provides an interface to write frames to a video file using OpenCV,
+    with support for different codecs and automatic fallback to mp4v if the specified
+    codec fails.
+    """
+
     def __init__(
         self,
         filename: str,
@@ -260,6 +346,17 @@ def __init__(
         frame_size: tuple[int, int],
         codec: str = "mp4v",
     ):
+        """Initialize the video writer.
+
+        Args:
+            filename (str): Path to the output video file.
+            fps (float): Frames per second for the output video.
+            frame_size (tuple[int, int]): Width and height of the output video frames.
+            codec (str, optional): FourCC code for the video codec. Defaults to "mp4v".
+
+        Raises:
+            RuntimeError: If the video writer cannot be initialized.
+        """
         try:
             fourcc_int = cv2.VideoWriter_fourcc(*codec)
             self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size)
@@ -270,13 +367,25 @@ def __init__(
             raise RuntimeError(f"Cannot open video writer for file: {filename}")
 
     def write(self, frame: np.ndarray) -> None:
+        """Write a frame to the video file.
+
+        Args:
+            frame (np.ndarray): The frame to write, in BGR format.
+        """
         self.writer.write(frame)
 
     def close(self) -> None:
+        """Release the video writer resources."""
         self.writer.release()
 
 
 class Video:
+    """High-level interface for video operations.
+
+    This class provides a convenient interface for video operations including
+    reading frames, saving processed videos, and video information access.
+    It uses OpenCVBackend as the default backend for video operations.
+    """
     info: VideoInfo
     source: str | int
     backend: OpenCVBackend
@@ -292,9 +401,26 @@ def __init__(
         self.source = source
 
     def __iter__(self):
+        """Make the Video class iterable over frames.
+
+        Returns:
+            Generator: A generator yielding video frames.
+        """
         return self.backend.frames()
 
-    def frames(self, stride=1, start=0, end=None, resolution_wh=None):
+    def frames(self, stride: int = 1, start: int = 0, end: int | None = None, resolution_wh: tuple[int, int] | None = None):
+        """Generate frames from the video.
+
+        Args:
+            stride (int, optional): Number of frames to skip. Defaults to 1.
+            start (int, optional): Starting frame index. Defaults to 0.
+            end (int | None, optional): Ending frame index. Defaults to None.
+            resolution_wh (tuple[int, int] | None, optional): Target resolution (width, height).
+                If provided, frames will be resized. Defaults to None.
+
+        Returns:
+            Generator: A generator yielding video frames.
+        """
         return self.backend.frames(
             stride=stride, start=start, end=end, resolution_wh=resolution_wh
         )
@@ -307,6 +433,19 @@ def save(
         progress_message: str = "Processing video",
         show_progress: bool = False,
     ):
+        """Save processed video frames to a file.
+
+        Args:
+            target_path (str): Path where the processed video will be saved.
+            callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes
+                each frame. Takes frame and frame index as input, returns processed frame.
+            fps (int | None, optional): Output video FPS. If None, uses source video FPS.
+                Defaults to None.
+            progress_message (str, optional): Message to show in progress bar.
+                Defaults to "Processing video".
+            show_progress (bool, optional): Whether to show progress bar.
+                Defaults to False.
+        """
         self.backend.save(
             target_path=target_path,
             callback=callback,

From 22659774755d26519bc43691832db586cb05dbe3 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 2 Aug 2025 06:25:22 +0000
Subject: [PATCH 012/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/utils/video.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 0f1ae12a9e..a685504428 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -93,7 +93,7 @@ def open(self, path: str) -> None:
 
         Args:
             path (str): Path to the video file, RTSP URL, or camera index.
-        
+
         Raises:
             RuntimeError: If unable to open the video source.
             ValueError: If the source type is not supported.
@@ -202,7 +202,14 @@ def release(self) -> None:
             self.cap.release()
             self.cap = None
 
-    def frames(self, *, start: int = 0, end: int | None = None, stride: int = 1, resolution_wh: tuple[int, int] | None = None):
+    def frames(
+        self,
+        *,
+        start: int = 0,
+        end: int | None = None,
+        stride: int = 1,
+        resolution_wh: tuple[int, int] | None = None,
+    ):
         """Generate frames from the video source.
 
         Args:
@@ -386,6 +393,7 @@ class Video:
     reading frames, saving processed videos, and video information access.
     It uses OpenCVBackend as the default backend for video operations.
     """
+
     info: VideoInfo
     source: str | int
     backend: OpenCVBackend
@@ -408,7 +416,13 @@ def __iter__(self):
         """
         return self.backend.frames()
 
-    def frames(self, stride: int = 1, start: int = 0, end: int | None = None, resolution_wh: tuple[int, int] | None = None):
+    def frames(
+        self,
+        stride: int = 1,
+        start: int = 0,
+        end: int | None = None,
+        resolution_wh: tuple[int, int] | None = None,
+    ):
         """Generate frames from the video.
 
         Args:

From bf67bfaea1f254bd44bab277a04a4e64ec94f67b Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 2 Aug 2025 02:28:01 -0400
Subject: [PATCH 013/128] UPDATE: Allow for ffmpeg error passthrough

---
 supervision/utils/video.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index a685504428..2b22658279 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -301,7 +301,6 @@ def save(
         def has_audio_stream(video_path):
             result = subprocess.run(
                 [ffmpeg_path, "-i", video_path],
-                stderr=subprocess.PIPE,
                 stdout=subprocess.DEVNULL,
                 text=True,
             )
@@ -332,7 +331,6 @@ def has_audio_stream(video_path):
                 ],
                 check=True,
                 stdout=subprocess.DEVNULL,
-                stderr=subprocess.DEVNULL,
             )
 
             os.replace(temp_output, video_input)

From ec4bd012faef2dc3a5c10a62d8872ea52aaf5bca Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 2 Aug 2025 02:36:08 -0400
Subject: [PATCH 014/128] UPDATE: Writer and Backend abstract class

---
 supervision/utils/video.py | 84 ++++++++++++++++++++++++++++++++++----
 1 file changed, 77 insertions(+), 7 deletions(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 2b22658279..4a8f19287d 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -7,6 +7,7 @@
 from collections.abc import Callable, Generator
 from dataclasses import dataclass
 from enum import Enum
+from abc import ABC, abstractmethod
 from typing import Protocol
 
 import cv2
@@ -74,14 +75,85 @@ def resolution_wh(self) -> tuple[int, int]:
         return self.width, self.height
 
 
-class OpenCVBackend(Protocol):
+class Backend(ABC):
+    def __init__(self):
+        self.cap = None
+        self.video_info = None
+        self.writer = None
+        self.path = None
+
+    @abstractmethod
+    def open(self, path: str) -> None:
+        pass
+
+    @abstractmethod
+    def isOpened(self) -> bool:
+        pass
+
+    @abstractmethod
+    def _set_video_info(self) -> VideoInfo:
+        pass
+
+    @abstractmethod
+    def info(self) -> VideoInfo:
+        pass
+
+    @abstractmethod
+    def read(self) -> tuple[bool, np.ndarray]:
+        pass
+
+    @abstractmethod
+    def grab(self) -> bool:
+        pass
+
+    @abstractmethod
+    def seek(self, frame_idx: int) -> None:
+        pass
+
+    @abstractmethod
+    def release(self) -> None:
+        pass
+
+    @abstractmethod
+    def frames(
+        self,
+        *,
+        start: int = 0,
+        end: int | None = None,
+        stride: int = 1,
+        resolution_wh: tuple[int, int] | None = None,
+    ):
+        pass
+
+    @abstractmethod
+    def save(
+        self,
+        target_path: str,
+        callback: Callable[[np.ndarray, int], np.ndarray],
+        fps: int | None = None,
+        progress_message: str = "Processing video",
+        show_progress: bool = False,
+    ):
+        pass
+
+class Writer(ABC):
+    @abstractmethod
+    def write(self, frame: np.ndarray) -> None:
+        pass
+
+    @abstractmethod
+    def close(self) -> None:
+        pass
+
+class OpenCVBackend(Backend):
     """
-    Protocol class defining the interface for video backend implementations using OpenCV.
-    Handles video capture, frame reading, seeking, and writing operations.
+    OpenCV implementation of the Backend interface.
+    Handles video capture, frame reading, seeking, and writing operations using OpenCV.
     """
 
     def __init__(self):
         """Initialize the OpenCV backend with empty video capture and writer objects."""
+        super().__init__()
         self.cap = None
         self.video_info = None
         self.writer = None
@@ -335,8 +407,7 @@ def has_audio_stream(video_path):
 
             os.replace(temp_output, video_input)
 
-
-class OpenCVWriter:
+class OpenCVWriter(Writer):
     """A class to handle video writing operations using OpenCV's VideoWriter.
 
     This class provides an interface to write frames to a video file using OpenCV,
@@ -389,12 +460,11 @@ class Video:
 
     This class provides a convenient interface for video operations including
     reading frames, saving processed videos, and video information access.
-    It uses OpenCVBackend as the default backend for video operations.
     """
 
     info: VideoInfo
     source: str | int
-    backend: OpenCVBackend
+    backend: Backend
 
     def __init__(
         self, source: str | int, info: VideoInfo | None = None, backend: str = "opencv"

From b9e79685b8c70835c782ba3c961981eda9a977db Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 2 Aug 2025 02:38:02 -0400
Subject: [PATCH 015/128] Precommit

---
 supervision/utils/video.py | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 4a8f19287d..7f379f7b67 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -287,14 +287,12 @@ def frames(
         Args:
             start (int, optional): Starting frame index. Defaults to 0.
             end (int | None, optional): Ending frame index. Defaults to None.
-            stride (int, optional): Number of frames to skip. Defaults to 1.
-            resolution_wh (tuple[int, int] | None, optional): Target resolution (width, height).
-                If provided, frames will be resized. Defaults to None.
-
-        Yields:
-            np.ndarray: Video frames in BGR format.
+        stride (int, optional): Number of frames to skip. Defaults to 1.
+            resolution_wh (tuple[int, int] | None, optional): Target resolution
+                (width, height). If provided, frames will be resized. Defaults to None.
 
-        Raises:
+            Yields:
+                np.ndarray: Video frames in BGR format.        Raises:
             RuntimeError: If the video source is not opened yet.
         """
         if self.cap is None:
@@ -497,8 +495,8 @@ def frames(
             stride (int, optional): Number of frames to skip. Defaults to 1.
             start (int, optional): Starting frame index. Defaults to 0.
             end (int | None, optional): Ending frame index. Defaults to None.
-            resolution_wh (tuple[int, int] | None, optional): Target resolution (width, height).
-                If provided, frames will be resized. Defaults to None.
+            resolution_wh (tuple[int, int] | None, optional): Target resolution
+                (width, height). If provided, frames will be resized. Defaults to None.
 
         Returns:
             Generator: A generator yielding video frames.
@@ -520,8 +518,8 @@ def save(
         Args:
             target_path (str): Path where the processed video will be saved.
             callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes
-                each frame. Takes frame and frame index as input, returns processed frame.
-            fps (int | None, optional): Output video FPS. If None, uses source video FPS.
+                each frame. Takes frame and index as input, returns processed frame.
+            fps (int | None, optional): Output video FPS. If None, uses source video FPS. 
                 Defaults to None.
             progress_message (str, optional): Message to show in progress bar.
                 Defaults to "Processing video".

From a96c3f08ebcc53ef5cd6d3a6573d86a0ff253c80 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 2 Aug 2025 06:38:23 +0000
Subject: [PATCH 016/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/utils/video.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 7f379f7b67..0bf5bc4da9 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -3,12 +3,11 @@
 import os
 import subprocess
 import time
+from abc import ABC, abstractmethod
 from collections import deque
 from collections.abc import Callable, Generator
 from dataclasses import dataclass
 from enum import Enum
-from abc import ABC, abstractmethod
-from typing import Protocol
 
 import cv2
 import imageio_ffmpeg
@@ -136,6 +135,7 @@ def save(
     ):
         pass
 
+
 class Writer(ABC):
     @abstractmethod
     def write(self, frame: np.ndarray) -> None:
@@ -145,6 +145,7 @@ def write(self, frame: np.ndarray) -> None:
     def close(self) -> None:
         pass
 
+
 class OpenCVBackend(Backend):
     """
     OpenCV implementation of the Backend interface.
@@ -405,6 +406,7 @@ def has_audio_stream(video_path):
 
             os.replace(temp_output, video_input)
 
+
 class OpenCVWriter(Writer):
     """A class to handle video writing operations using OpenCV's VideoWriter.
 
@@ -519,7 +521,7 @@ def save(
             target_path (str): Path where the processed video will be saved.
             callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes
                 each frame. Takes frame and index as input, returns processed frame.
-            fps (int | None, optional): Output video FPS. If None, uses source video FPS. 
+            fps (int | None, optional): Output video FPS. If None, uses source video FPS.
                 Defaults to None.
             progress_message (str, optional): Message to show in progress bar.
                 Defaults to "Processing video".

From a6c91bc387447e0676cdacf9bd3614ca3fa5e277 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 2 Aug 2025 02:39:40 -0400
Subject: [PATCH 017/128] Precommit

---
 supervision/utils/video.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 0bf5bc4da9..ec918a7a6b 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -521,8 +521,7 @@ def save(
             target_path (str): Path where the processed video will be saved.
             callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes
                 each frame. Takes frame and index as input, returns processed frame.
-            fps (int | None, optional): Output video FPS. If None, uses source video FPS.
-                Defaults to None.
+            fps (int | None, optional): Output video FPS.
             progress_message (str, optional): Message to show in progress bar.
                 Defaults to "Processing video".
             show_progress (bool, optional): Whether to show progress bar.

From d075e03b7ff87f6a431769e7dfb6dd4a7237bedd Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Wed, 6 Aug 2025 16:21:35 -0400
Subject: [PATCH 018/128] UPDATE: Added manual control

---
 supervision/utils/video.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index ec918a7a6b..57ada81442 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -81,6 +81,10 @@ def __init__(self):
         self.writer = None
         self.path = None
 
+    @abstractmethod
+    def get_sink(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v") -> Writer:
+        pass
+
     @abstractmethod
     def open(self, path: str) -> None:
         pass
@@ -160,6 +164,9 @@ def __init__(self):
         self.writer = None
         self.path = None
 
+    def get_sink(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"):
+        return OpenCVWriter(target_path, video_info.fps, video_info.resolution_wh, codec)
+
     def open(self, path: str) -> None:
         """
         Open a video source and initialize the video capture object.
@@ -441,6 +448,12 @@ def __init__(
             self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size)
         if not self.writer.isOpened():
             raise RuntimeError(f"Cannot open video writer for file: {filename}")
+        
+    def __enter__(self):
+        return self
+    
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
 
     def write(self, frame: np.ndarray) -> None:
         """Write a frame to the video file.
@@ -484,6 +497,9 @@ def __iter__(self):
         """
         return self.backend.frames()
 
+    def sink(self, target_path: str, info: VideoInfo, codec: str = "mp4v"):
+        return self.backend.get_sink(target_path, info, codec)
+
     def frames(
         self,
         stride: int = 1,

From 7f078ffea5f0896ea2e4c42882e5710c6a7a2000 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Wed, 6 Aug 2025 16:27:34 -0400
Subject: [PATCH 019/128] ADD: Added docstrings

---
 supervision/utils/video.py | 36 +++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 57ada81442..0bfaeecbd0 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -165,6 +165,16 @@ def __init__(self):
         self.path = None
 
     def get_sink(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"):
+        """Create a video writer for saving frames using OpenCV.
+
+        Args:
+            target_path (str): Path where the video will be saved.
+            video_info (VideoInfo): Video information containing resolution and FPS.
+            codec (str, optional): FourCC code for video codec. Defaults to "mp4v".
+
+        Returns:
+            OpenCVWriter: A video writer object for writing frames.
+        """
         return OpenCVWriter(target_path, video_info.fps, video_info.resolution_wh, codec)
 
     def open(self, path: str) -> None:
@@ -343,6 +353,20 @@ def save(
         progress_message: str = "Processing video",
         show_progress: bool = False,
     ):
+        """Save processed video frames to a file with audio preservation.
+
+        Args:
+            target_path (str): Path where the processed video will be saved.
+            callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes
+                each frame. Takes frame and index as input, returns processed frame.
+            fps (int | None, optional): Output video FPS. If None, uses source FPS.
+            progress_message (str, optional): Message to show in progress bar.
+            show_progress (bool, optional): Whether to show progress bar.
+
+        Raises:
+            RuntimeError: If video source is not opened.
+            ValueError: If source is not a video file.
+        """
         if self.cap is None:
             raise RuntimeError("Video not opened yet.")
 
@@ -497,7 +521,17 @@ def __iter__(self):
         """
         return self.backend.frames()
 
-    def sink(self, target_path: str, info: VideoInfo, codec: str = "mp4v"):
+    def sink(self, target_path: str, info: VideoInfo, codec: str = "mp4v") -> Writer:
+        """Create a video writer for saving frames.
+
+        Args:
+            target_path (str): Path where the video will be saved.
+            info (VideoInfo): Video information containing resolution and FPS.
+            codec (str, optional): FourCC code for video codec. Defaults to "mp4v".
+
+        Returns:
+            Writer: A video writer object for writing frames.
+        """
         return self.backend.get_sink(target_path, info, codec)
 
     def frames(

From af49e9a9dfe42eecbe6c28f2a44dea5f622a7c73 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 6 Aug 2025 20:28:05 +0000
Subject: [PATCH 020/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/utils/video.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 0bfaeecbd0..3af7db62a0 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -82,7 +82,9 @@ def __init__(self):
         self.path = None
 
     @abstractmethod
-    def get_sink(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v") -> Writer:
+    def get_sink(
+        self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"
+    ) -> Writer:
         pass
 
     @abstractmethod
@@ -175,7 +177,9 @@ def get_sink(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v")
         Returns:
             OpenCVWriter: A video writer object for writing frames.
         """
-        return OpenCVWriter(target_path, video_info.fps, video_info.resolution_wh, codec)
+        return OpenCVWriter(
+            target_path, video_info.fps, video_info.resolution_wh, codec
+        )
 
     def open(self, path: str) -> None:
         """
@@ -472,10 +476,10 @@ def __init__(
             self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size)
         if not self.writer.isOpened():
             raise RuntimeError(f"Cannot open video writer for file: {filename}")
-        
+
     def __enter__(self):
         return self
-    
+
     def __exit__(self, exc_type, exc_value, traceback):
         self.close()
 

From 320d817757a7906164f70e591116047e4ebc4c9e Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Wed, 6 Aug 2025 16:38:44 -0400
Subject: [PATCH 021/128] UPDATE: Deprecate warning old Video API

---
 supervision/utils/video.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 3af7db62a0..38d00c464d 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -52,7 +52,7 @@ class VideoInfo:
 
     width: int
     height: int
-    fps: int
+    fps: float
     total_frames: int | None = None
     source_type: SOURCE_TYPE | None = None
 
@@ -590,6 +590,7 @@ def save(
         )
 
 
+@DeprecationWarning
 class VideoSink:
     """
     Context manager that saves video frames to a file using OpenCV.
@@ -646,7 +647,7 @@ def write_frame(self, frame: np.ndarray):
     def __exit__(self, exc_type, exc_value, exc_traceback):
         self.__writer.release()
 
-
+@DeprecationWarning
 def _validate_and_setup_video(
     source_path: str, start: int, end: int | None, iterative_seek: bool = False
 ):
@@ -670,7 +671,7 @@ def _validate_and_setup_video(
 
     return video, start, end
 
-
+@DeprecationWarning
 def get_video_frames_generator(
     source_path: str,
     stride: int = 1,
@@ -721,7 +722,7 @@ def get_video_frames_generator(
         frame_position += stride
     video.release()
 
-
+@DeprecationWarning
 def process_video(
     source_path: str,
     target_path: str,

From cb8d2f8b116638482635d312573ffb3f359928d7 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Wed, 6 Aug 2025 22:09:47 -0400
Subject: [PATCH 022/128] FIX: Prototype resolution for #1687

---
 supervision/utils/video.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 38d00c464d..f940568742 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -52,7 +52,7 @@ class VideoInfo:
 
     width: int
     height: int
-    fps: float
+    fps: int
     total_frames: int | None = None
     source_type: SOURCE_TYPE | None = None
 
@@ -64,7 +64,7 @@ def from_video_path(cls, video_path: str) -> VideoInfo:
 
         width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        fps = int(video.get(cv2.CAP_PROP_FPS))
+        fps = int(round(video.get(cv2.CAP_PROP_FPS)))
         total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
         video.release()
         return VideoInfo(width, height, fps, total_frames)
@@ -73,7 +73,6 @@ def from_video_path(cls, video_path: str) -> VideoInfo:
     def resolution_wh(self) -> tuple[int, int]:
         return self.width, self.height
 
-
 class Backend(ABC):
     def __init__(self):
         self.cap = None
@@ -231,9 +230,9 @@ def _set_video_info(self) -> VideoInfo:
             raise RuntimeError("Video not opened yet.")
         width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        fps = self.cap.get(cv2.CAP_PROP_FPS)
+        fps = int(round(self.cap.get(cv2.CAP_PROP_FPS)))
         total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        return VideoInfo(width, height, int(fps), total_frames)
+        return VideoInfo(width, height, fps, total_frames)
 
     def info(self) -> VideoInfo:
         """Get video information.

From a3a3a9ebdb8998f32ae231331e89057c54fc70df Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 7 Aug 2025 02:10:09 +0000
Subject: [PATCH 023/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/utils/video.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index f940568742..b1d0db352c 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -73,6 +73,7 @@ def from_video_path(cls, video_path: str) -> VideoInfo:
     def resolution_wh(self) -> tuple[int, int]:
         return self.width, self.height
 
+
 class Backend(ABC):
     def __init__(self):
         self.cap = None
@@ -646,6 +647,7 @@ def write_frame(self, frame: np.ndarray):
     def __exit__(self, exc_type, exc_value, exc_traceback):
         self.__writer.release()
 
+
 @DeprecationWarning
 def _validate_and_setup_video(
     source_path: str, start: int, end: int | None, iterative_seek: bool = False
@@ -670,6 +672,7 @@ def _validate_and_setup_video(
 
     return video, start, end
 
+
 @DeprecationWarning
 def get_video_frames_generator(
     source_path: str,
@@ -721,6 +724,7 @@ def get_video_frames_generator(
         frame_position += stride
     video.release()
 
+
 @DeprecationWarning
 def process_video(
     source_path: str,

From ecbf5afef92df60c4a59d7558c40a8836962c060 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Wed, 6 Aug 2025 22:16:22 -0400
Subject: [PATCH 024/128] FIX: Rounding type

---
 supervision/utils/video.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index b1d0db352c..1f08e98bb4 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -64,7 +64,7 @@ def from_video_path(cls, video_path: str) -> VideoInfo:
 
         width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        fps = int(round(video.get(cv2.CAP_PROP_FPS)))
+        fps = (round(video.get(cv2.CAP_PROP_FPS)))
         total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
         video.release()
         return VideoInfo(width, height, fps, total_frames)
@@ -231,7 +231,7 @@ def _set_video_info(self) -> VideoInfo:
             raise RuntimeError("Video not opened yet.")
         width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        fps = int(round(self.cap.get(cv2.CAP_PROP_FPS)))
+        fps = (round(self.cap.get(cv2.CAP_PROP_FPS)))
         total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
         return VideoInfo(width, height, fps, total_frames)
 
@@ -453,7 +453,7 @@ class OpenCVWriter(Writer):
     def __init__(
         self,
         filename: str,
-        fps: float,
+        fps: int,
         frame_size: tuple[int, int],
         codec: str = "mp4v",
     ):
@@ -461,7 +461,7 @@ def __init__(
 
         Args:
             filename (str): Path to the output video file.
-            fps (float): Frames per second for the output video.
+            fps (int): Frames per second for the output video.
             frame_size (tuple[int, int]): Width and height of the output video frames.
             codec (str, optional): FourCC code for the video codec. Defaults to "mp4v".
 

From 1da4466dcb138dff996ba4163fc363a36042a975 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 7 Aug 2025 02:16:44 +0000
Subject: [PATCH 025/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/utils/video.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 1f08e98bb4..aec907dc06 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -64,7 +64,7 @@ def from_video_path(cls, video_path: str) -> VideoInfo:
 
         width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        fps = (round(video.get(cv2.CAP_PROP_FPS)))
+        fps = round(video.get(cv2.CAP_PROP_FPS))
         total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
         video.release()
         return VideoInfo(width, height, fps, total_frames)
@@ -231,7 +231,7 @@ def _set_video_info(self) -> VideoInfo:
             raise RuntimeError("Video not opened yet.")
         width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        fps = (round(self.cap.get(cv2.CAP_PROP_FPS)))
+        fps = round(self.cap.get(cv2.CAP_PROP_FPS))
         total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
         return VideoInfo(width, height, fps, total_frames)
 

From 3b4c68fcee66893b6d002bc1b111759b06829a02 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Fri, 8 Aug 2025 16:29:07 -0400
Subject: [PATCH 026/128] UPDATE: Updated the file structure for the video API

---
 supervision/__init__.py             |  10 +-
 supervision/utils/video.py          | 572 +---------------------------
 supervision/video/__init__.py       |   0
 supervision/video/backend/base.py   |  71 ++++
 supervision/video/backend/openCV.py | 261 +++++++++++++
 supervision/video/backend/pyAV.py   |   0
 supervision/video/core.py           | 134 +++++++
 supervision/video/utils.py          |  58 +++
 8 files changed, 534 insertions(+), 572 deletions(-)
 create mode 100644 supervision/video/__init__.py
 create mode 100644 supervision/video/backend/base.py
 create mode 100644 supervision/video/backend/openCV.py
 create mode 100644 supervision/video/backend/pyAV.py
 create mode 100644 supervision/video/core.py
 create mode 100644 supervision/video/utils.py

diff --git a/supervision/__init__.py b/supervision/__init__.py
index 57c48050cf..067b346a97 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -130,13 +130,19 @@
 from supervision.utils.notebook import plot_image, plot_images_grid
 from supervision.utils.video import (
     FPSMonitor,
-    Video,
-    VideoInfo,
     VideoSink,
     get_video_frames_generator,
     process_video,
 )
 
+from supervision.video.utils import (
+    VideoInfo
+)
+
+from supervision.video.core import (
+    Video
+)
+
 __all__ = [
     "LMM",
     "BackgroundOverlayAnnotator",
diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index aec907dc06..a493850e88 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -1,7 +1,5 @@
 from __future__ import annotations
 
-import os
-import subprocess
 import time
 from abc import ABC, abstractmethod
 from collections import deque
@@ -10,19 +8,10 @@
 from enum import Enum
 
 import cv2
-import imageio_ffmpeg
 import numpy as np
 from tqdm.auto import tqdm
 
-ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe()
-
-
-class SOURCE_TYPE(Enum):
-    VIDEO_FILE = "VIDEO_FILE"
-    WEBCAM = "WEBCAM"
-    RTSP = "RTSP"
-
-
+@DeprecationWarning
 @dataclass
 class VideoInfo:
     """
@@ -54,7 +43,6 @@ class VideoInfo:
     height: int
     fps: int
     total_frames: int | None = None
-    source_type: SOURCE_TYPE | None = None
 
     @classmethod
     def from_video_path(cls, video_path: str) -> VideoInfo:
@@ -64,7 +52,7 @@ def from_video_path(cls, video_path: str) -> VideoInfo:
 
         width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        fps = round(video.get(cv2.CAP_PROP_FPS))
+        fps = int(video.get(cv2.CAP_PROP_FPS))
         total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
         video.release()
         return VideoInfo(width, height, fps, total_frames)
@@ -74,522 +62,6 @@ def resolution_wh(self) -> tuple[int, int]:
         return self.width, self.height
 
 
-class Backend(ABC):
-    def __init__(self):
-        self.cap = None
-        self.video_info = None
-        self.writer = None
-        self.path = None
-
-    @abstractmethod
-    def get_sink(
-        self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"
-    ) -> Writer:
-        pass
-
-    @abstractmethod
-    def open(self, path: str) -> None:
-        pass
-
-    @abstractmethod
-    def isOpened(self) -> bool:
-        pass
-
-    @abstractmethod
-    def _set_video_info(self) -> VideoInfo:
-        pass
-
-    @abstractmethod
-    def info(self) -> VideoInfo:
-        pass
-
-    @abstractmethod
-    def read(self) -> tuple[bool, np.ndarray]:
-        pass
-
-    @abstractmethod
-    def grab(self) -> bool:
-        pass
-
-    @abstractmethod
-    def seek(self, frame_idx: int) -> None:
-        pass
-
-    @abstractmethod
-    def release(self) -> None:
-        pass
-
-    @abstractmethod
-    def frames(
-        self,
-        *,
-        start: int = 0,
-        end: int | None = None,
-        stride: int = 1,
-        resolution_wh: tuple[int, int] | None = None,
-    ):
-        pass
-
-    @abstractmethod
-    def save(
-        self,
-        target_path: str,
-        callback: Callable[[np.ndarray, int], np.ndarray],
-        fps: int | None = None,
-        progress_message: str = "Processing video",
-        show_progress: bool = False,
-    ):
-        pass
-
-
-class Writer(ABC):
-    @abstractmethod
-    def write(self, frame: np.ndarray) -> None:
-        pass
-
-    @abstractmethod
-    def close(self) -> None:
-        pass
-
-
-class OpenCVBackend(Backend):
-    """
-    OpenCV implementation of the Backend interface.
-    Handles video capture, frame reading, seeking, and writing operations using OpenCV.
-    """
-
-    def __init__(self):
-        """Initialize the OpenCV backend with empty video capture and writer objects."""
-        super().__init__()
-        self.cap = None
-        self.video_info = None
-        self.writer = None
-        self.path = None
-
-    def get_sink(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"):
-        """Create a video writer for saving frames using OpenCV.
-
-        Args:
-            target_path (str): Path where the video will be saved.
-            video_info (VideoInfo): Video information containing resolution and FPS.
-            codec (str, optional): FourCC code for video codec. Defaults to "mp4v".
-
-        Returns:
-            OpenCVWriter: A video writer object for writing frames.
-        """
-        return OpenCVWriter(
-            target_path, video_info.fps, video_info.resolution_wh, codec
-        )
-
-    def open(self, path: str) -> None:
-        """
-        Open a video source and initialize the video capture object.
-
-        Args:
-            path (str): Path to the video file, RTSP URL, or camera index.
-
-        Raises:
-            RuntimeError: If unable to open the video source.
-            ValueError: If the source type is not supported.
-        """
-        self.cap = cv2.VideoCapture(path)
-        self.path = path
-
-        if not self.cap.isOpened():
-            raise RuntimeError(f"Cannot open video source: {path}")
-        self.video_info = self._set_video_info()
-
-        if isinstance(path, int):
-            self.video_info.source_type = SOURCE_TYPE.WEBCAM
-        elif isinstance(path, str):
-            self.video_info.source_type = (
-                SOURCE_TYPE.RTSP
-                if path.lower().startswith("rtsp://")
-                else SOURCE_TYPE.VIDEO_FILE
-            )
-        else:
-            raise ValueError("Unsupported source type")
-
-    def isOpened(self) -> bool:
-        """Check if the video source is opened successfully.
-
-        Returns:
-            bool: True if the video source is opened, False otherwise.
-        """
-        return self.cap.isOpened()
-
-    def _set_video_info(self) -> VideoInfo:
-        """Set up video information from the opened video source.
-
-        Returns:
-            VideoInfo: Object containing video properties like width, height, fps, etc.
-
-        Raises:
-            RuntimeError: If the video source is not opened yet.
-        """
-        if not self.isOpened():
-            raise RuntimeError("Video not opened yet.")
-        width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-        height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        fps = round(self.cap.get(cv2.CAP_PROP_FPS))
-        total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        return VideoInfo(width, height, fps, total_frames)
-
-    def info(self) -> VideoInfo:
-        """Get video information.
-
-        Returns:
-            VideoInfo: Object containing video properties.
-
-        Raises:
-            RuntimeError: If the video source is not opened yet.
-        """
-        if not self.isOpened():
-            raise RuntimeError("Video not opened yet.")
-        return self.video_info
-
-    def read(self) -> tuple[bool, np.ndarray]:
-        """Read a frame from the video source.
-
-        Returns:
-            tuple[bool, np.ndarray]: A tuple containing:
-                - bool: True if frame was successfully read
-                - np.ndarray: The video frame in BGR format
-
-        Raises:
-            RuntimeError: If the video source is not opened yet.
-        """
-        if self.cap is None:
-            raise RuntimeError("Video not opened yet.")
-        ret, frame = self.cap.read()
-        return ret, frame
-
-    def grab(self) -> bool:
-        """Grab a frame from video source without decoding.
-
-        Returns:
-            bool: True if frame was successfully grabbed.
-
-        Raises:
-            RuntimeError: If the video source is not opened yet.
-        """
-        if self.cap is None:
-            raise RuntimeError("Video not opened yet.")
-        return self.cap.grab()
-
-    def seek(self, frame_idx: int) -> None:
-        """Seek to a specific frame in the video.
-
-        Args:
-            frame_idx (int): Index of the frame to seek to (0-based).
-
-        Raises:
-            RuntimeError: If the video source is not opened yet.
-        """
-        if self.cap is None:
-            raise RuntimeError("Video not opened yet.")
-        self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
-
-    def release(self) -> None:
-        """Release the video capture resources."""
-        if self.cap is not None and self.cap.isOpened():
-            self.cap.release()
-            self.cap = None
-
-    def frames(
-        self,
-        *,
-        start: int = 0,
-        end: int | None = None,
-        stride: int = 1,
-        resolution_wh: tuple[int, int] | None = None,
-    ):
-        """Generate frames from the video source.
-
-        Args:
-            start (int, optional): Starting frame index. Defaults to 0.
-            end (int | None, optional): Ending frame index. Defaults to None.
-        stride (int, optional): Number of frames to skip. Defaults to 1.
-            resolution_wh (tuple[int, int] | None, optional): Target resolution
-                (width, height). If provided, frames will be resized. Defaults to None.
-
-            Yields:
-                np.ndarray: Video frames in BGR format.        Raises:
-            RuntimeError: If the video source is not opened yet.
-        """
-        if self.cap is None:
-            raise RuntimeError("Video not opened yet.")
-
-        total_frames = self.video_info.total_frames if self.video_info else 0
-        is_live_stream = total_frames <= 0
-
-        if is_live_stream:
-            while True:
-                for _ in range(stride - 1):
-                    if not self.grab():
-                        return
-                ret, frame = self.read()
-                if not ret:
-                    return
-                if resolution_wh is not None:
-                    frame = cv2.resize(frame, resolution_wh)
-                yield frame
-        else:
-            if end is None or end > total_frames:
-                end = total_frames
-
-            frame_idx = start
-            while frame_idx < end:
-                self.seek(frame_idx)
-                ret, frame = self.read()
-                if not ret:
-                    break
-                if resolution_wh is not None:
-                    frame = cv2.resize(frame, resolution_wh)
-                yield frame
-                frame_idx += stride
-
-    def save(
-        self,
-        target_path: str,
-        callback: Callable[[np.ndarray, int], np.ndarray],
-        fps: int | None = None,
-        progress_message: str = "Processing video",
-        show_progress: bool = False,
-    ):
-        """Save processed video frames to a file with audio preservation.
-
-        Args:
-            target_path (str): Path where the processed video will be saved.
-            callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes
-                each frame. Takes frame and index as input, returns processed frame.
-            fps (int | None, optional): Output video FPS. If None, uses source FPS.
-            progress_message (str, optional): Message to show in progress bar.
-            show_progress (bool, optional): Whether to show progress bar.
-
-        Raises:
-            RuntimeError: If video source is not opened.
-            ValueError: If source is not a video file.
-        """
-        if self.cap is None:
-            raise RuntimeError("Video not opened yet.")
-
-        if self.video_info.source_type != SOURCE_TYPE.VIDEO_FILE:
-            raise ValueError("Only video files can be saved.")
-
-        if self.writer is not None:
-            self.writer.close()
-            self.writer = None
-
-        source_codec = self.cap.get(cv2.CAP_PROP_FOURCC)
-
-        if fps is None:
-            fps = self.video_info.fps
-
-        self.writer = OpenCVWriter(
-            target_path, fps, self.video_info.resolution_wh, source_codec
-        )
-        total_frames = self.video_info.total_frames
-        frames_generator = self.frames()
-        for index, frame in enumerate(
-            tqdm(
-                frames_generator,
-                total=total_frames,
-                disable=not show_progress,
-                desc=progress_message,
-            )
-        ):
-            result_frame = callback(frame, index)
-            self.writer.write(frame=result_frame)
-
-        self.writer.close()
-
-        def has_audio_stream(video_path):
-            result = subprocess.run(
-                [ffmpeg_path, "-i", video_path],
-                stdout=subprocess.DEVNULL,
-                text=True,
-            )
-
-            return "Audio:" in result.stderr
-
-        if has_audio_stream(self.path):
-            video_input = target_path
-            audio_source = self.path
-            temp_output = "temp_output.mp4"
-            subprocess.run(
-                [
-                    ffmpeg_path,
-                    "-i",
-                    video_input,
-                    "-i",
-                    audio_source,
-                    "-map",
-                    "0:v",
-                    "-map",
-                    "1:a",
-                    "-c:v",
-                    "copy",
-                    "-c:a",
-                    "aac",
-                    "-shortest",
-                    temp_output,
-                ],
-                check=True,
-                stdout=subprocess.DEVNULL,
-            )
-
-            os.replace(temp_output, video_input)
-
-
-class OpenCVWriter(Writer):
-    """A class to handle video writing operations using OpenCV's VideoWriter.
-
-    This class provides an interface to write frames to a video file using OpenCV,
-    with support for different codecs and automatic fallback to mp4v if the specified
-    codec fails.
-    """
-
-    def __init__(
-        self,
-        filename: str,
-        fps: int,
-        frame_size: tuple[int, int],
-        codec: str = "mp4v",
-    ):
-        """Initialize the video writer.
-
-        Args:
-            filename (str): Path to the output video file.
-            fps (int): Frames per second for the output video.
-            frame_size (tuple[int, int]): Width and height of the output video frames.
-            codec (str, optional): FourCC code for the video codec. Defaults to "mp4v".
-
-        Raises:
-            RuntimeError: If the video writer cannot be initialized.
-        """
-        try:
-            fourcc_int = cv2.VideoWriter_fourcc(*codec)
-            self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size)
-        except Exception:
-            fourcc_int = cv2.VideoWriter_fourcc(*"mp4v")
-            self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size)
-        if not self.writer.isOpened():
-            raise RuntimeError(f"Cannot open video writer for file: {filename}")
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, exc_type, exc_value, traceback):
-        self.close()
-
-    def write(self, frame: np.ndarray) -> None:
-        """Write a frame to the video file.
-
-        Args:
-            frame (np.ndarray): The frame to write, in BGR format.
-        """
-        self.writer.write(frame)
-
-    def close(self) -> None:
-        """Release the video writer resources."""
-        self.writer.release()
-
-
-class Video:
-    """High-level interface for video operations.
-
-    This class provides a convenient interface for video operations including
-    reading frames, saving processed videos, and video information access.
-    """
-
-    info: VideoInfo
-    source: str | int
-    backend: Backend
-
-    def __init__(
-        self, source: str | int, info: VideoInfo | None = None, backend: str = "opencv"
-    ):
-        if backend == "opencv":
-            self.backend = OpenCVBackend()
-
-        self.backend.open(source)
-        self.info = self.backend.video_info
-        self.source = source
-
-    def __iter__(self):
-        """Make the Video class iterable over frames.
-
-        Returns:
-            Generator: A generator yielding video frames.
-        """
-        return self.backend.frames()
-
-    def sink(self, target_path: str, info: VideoInfo, codec: str = "mp4v") -> Writer:
-        """Create a video writer for saving frames.
-
-        Args:
-            target_path (str): Path where the video will be saved.
-            info (VideoInfo): Video information containing resolution and FPS.
-            codec (str, optional): FourCC code for video codec. Defaults to "mp4v".
-
-        Returns:
-            Writer: A video writer object for writing frames.
-        """
-        return self.backend.get_sink(target_path, info, codec)
-
-    def frames(
-        self,
-        stride: int = 1,
-        start: int = 0,
-        end: int | None = None,
-        resolution_wh: tuple[int, int] | None = None,
-    ):
-        """Generate frames from the video.
-
-        Args:
-            stride (int, optional): Number of frames to skip. Defaults to 1.
-            start (int, optional): Starting frame index. Defaults to 0.
-            end (int | None, optional): Ending frame index. Defaults to None.
-            resolution_wh (tuple[int, int] | None, optional): Target resolution
-                (width, height). If provided, frames will be resized. Defaults to None.
-
-        Returns:
-            Generator: A generator yielding video frames.
-        """
-        return self.backend.frames(
-            stride=stride, start=start, end=end, resolution_wh=resolution_wh
-        )
-
-    def save(
-        self,
-        target_path: str,
-        callback: Callable[[np.ndarray, int], np.ndarray],
-        fps: int | None = None,
-        progress_message: str = "Processing video",
-        show_progress: bool = False,
-    ):
-        """Save processed video frames to a file.
-
-        Args:
-            target_path (str): Path where the processed video will be saved.
-            callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes
-                each frame. Takes frame and index as input, returns processed frame.
-            fps (int | None, optional): Output video FPS.
-            progress_message (str, optional): Message to show in progress bar.
-                Defaults to "Processing video".
-            show_progress (bool, optional): Whether to show progress bar.
-                Defaults to False.
-        """
-        self.backend.save(
-            target_path=target_path,
-            callback=callback,
-            fps=fps,
-            progress_message=progress_message,
-            show_progress=show_progress,
-        )
-
-
 @DeprecationWarning
 class VideoSink:
     """
@@ -788,46 +260,6 @@ def callback(scene: np.ndarray, index: int) -> np.ndarray:
                 result_frame = callback(frame, index)
                 sink.write_frame(frame=result_frame)
 
-    def has_audio_stream(video_path):
-        result = subprocess.run(
-            [ffmpeg_path, "-i", video_path],
-            stderr=subprocess.PIPE,
-            stdout=subprocess.DEVNULL,
-            text=True,
-        )
-
-        return "Audio:" in result.stderr
-
-    if has_audio_stream(source_path):
-        video_input = target_path
-        audio_source = source_path
-        temp_output = "temp_output.mp4"
-        subprocess.run(
-            [
-                ffmpeg_path,
-                "-i",
-                video_input,
-                "-i",
-                audio_source,
-                "-map",
-                "0:v",
-                "-map",
-                "1:a",
-                "-c:v",
-                "copy",
-                "-c:a",
-                "aac",
-                "-shortest",
-                temp_output,
-            ],
-            check=True,
-            stdout=subprocess.DEVNULL,
-            stderr=subprocess.DEVNULL,
-        )
-
-        os.replace(temp_output, video_input)
-
-
 class FPSMonitor:
     """
     A class for monitoring frames per second (FPS) to benchmark latency.
diff --git a/supervision/video/__init__.py b/supervision/video/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
new file mode 100644
index 0000000000..2642534bc3
--- /dev/null
+++ b/supervision/video/backend/base.py
@@ -0,0 +1,71 @@
+from abc import ABC, abstractmethod
+import numpy as np
+from collections.abc import Callable, Generator
+
+from supervision.video.utils import VideoInfo
+
+class BaseBackend(ABC):
+    def __init__(self):
+        self.cap = None
+        self.video_info = None
+        self.writer = None
+        self.path = None
+
+    @abstractmethod
+    def get_sink(
+        self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"
+    ) -> "BaseWriter":
+        pass
+
+    @abstractmethod
+    def open(self, path: str) -> None:
+        pass
+
+    @abstractmethod
+    def isOpened(self) -> bool:
+        pass
+
+    @abstractmethod
+    def _set_video_info(self) -> VideoInfo:
+        pass
+
+    @abstractmethod
+    def info(self) -> VideoInfo:
+        pass
+
+    @abstractmethod
+    def read(self) -> tuple[bool, np.ndarray]:
+        pass
+
+    @abstractmethod
+    def grab(self) -> bool:
+        pass
+
+    @abstractmethod
+    def seek(self, frame_idx: int) -> None:
+        pass
+
+    @abstractmethod
+    def release(self) -> None:
+        pass
+
+    @abstractmethod
+    def save(
+        self,
+        target_path: str,
+        callback: Callable[[np.ndarray, int], np.ndarray],
+        fps: int | None = None,
+        progress_message: str = "Processing video",
+        show_progress: bool = False,
+    ):
+        pass
+
+
+class BaseWriter(ABC):
+    @abstractmethod
+    def write(self, frame: np.ndarray) -> None:
+        pass
+
+    @abstractmethod
+    def close(self) -> None:
+        pass
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
new file mode 100644
index 0000000000..9cadc5c212
--- /dev/null
+++ b/supervision/video/backend/openCV.py
@@ -0,0 +1,261 @@
+from supervision.video.backend.base import BaseBackend, BaseWriter
+from supervision.video.utils import SOURCE_TYPE, VideoInfo
+
+import cv2
+import numpy as np
+from tqdm.auto import tqdm
+from typing import Callable
+
+class OpenCVBackend(BaseBackend):
+    """
+    OpenCV implementation of the Backend interface.
+    Handles video capture, frame reading, seeking, and writing operations using OpenCV.
+    """
+
+    def __init__(self):
+        """Initialize the OpenCV backend with empty video capture and writer objects."""
+        super().__init__()
+        self.cap = None
+        self.video_info = None
+        self.writer = None
+        self.path = None
+
+    def get_sink(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"):
+        """Create a video writer for saving frames using OpenCV.
+
+        Args:
+            target_path (str): Path where the video will be saved.
+            video_info (VideoInfo): Video information containing resolution and FPS.
+            codec (str, optional): FourCC code for video codec. Defaults to "mp4v".
+
+        Returns:
+            OpenCVWriter: A video writer object for writing frames.
+        """
+        return OpenCVWriter(
+            target_path, video_info.fps, video_info.resolution_wh, codec
+        )
+
+    def open(self, path: str) -> None:
+        """
+        Open a video source and initialize the video capture object.
+
+        Args:
+            path (str): Path to the video file, RTSP URL, or camera index.
+
+        Raises:
+            RuntimeError: If unable to open the video source.
+            ValueError: If the source type is not supported.
+        """
+        self.cap = cv2.VideoCapture(path)
+        self.path = path
+
+        if not self.cap.isOpened():
+            raise RuntimeError(f"Cannot open video source: {path}")
+        self.video_info = self._set_video_info()
+
+        if isinstance(path, int):
+            self.video_info.source_type = SOURCE_TYPE.WEBCAM
+        elif isinstance(path, str):
+            self.video_info.source_type = (
+                SOURCE_TYPE.RTSP
+                if path.lower().startswith("rtsp://")
+                else SOURCE_TYPE.VIDEO_FILE
+            )
+        else:
+            raise ValueError("Unsupported source type")
+
+    def isOpened(self) -> bool:
+        """Check if the video source is opened successfully.
+
+        Returns:
+            bool: True if the video source is opened, False otherwise.
+        """
+        return self.cap.isOpened()
+
+    def _set_video_info(self) -> VideoInfo:
+        """Set up video information from the opened video source.
+
+        Returns:
+            VideoInfo: Object containing video properties like width, height, fps, etc.
+
+        Raises:
+            RuntimeError: If the video source is not opened yet.
+        """
+        if not self.isOpened():
+            raise RuntimeError("Video not opened yet.")
+        width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        fps = round(self.cap.get(cv2.CAP_PROP_FPS))
+        total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        return VideoInfo(width, height, fps, total_frames)
+
+    def info(self) -> VideoInfo:
+        """Get video information.
+
+        Returns:
+            VideoInfo: Object containing video properties.
+
+        Raises:
+            RuntimeError: If the video source is not opened yet.
+        """
+        if not self.isOpened():
+            raise RuntimeError("Video not opened yet.")
+        return self.video_info
+
+    def read(self) -> tuple[bool, np.ndarray]:
+        """Read a frame from the video source.
+
+        Returns:
+            tuple[bool, np.ndarray]: A tuple containing:
+                - bool: True if frame was successfully read
+                - np.ndarray: The video frame in BGR format
+
+        Raises:
+            RuntimeError: If the video source is not opened yet.
+        """
+        if self.cap is None:
+            raise RuntimeError("Video not opened yet.")
+        ret, frame = self.cap.read()
+        return ret, frame
+
+    def grab(self) -> bool:
+        """Grab a frame from video source without decoding.
+
+        Returns:
+            bool: True if frame was successfully grabbed.
+
+        Raises:
+            RuntimeError: If the video source is not opened yet.
+        """
+        if self.cap is None:
+            raise RuntimeError("Video not opened yet.")
+        return self.cap.grab()
+
+    def seek(self, frame_idx: int) -> None:
+        """Seek to a specific frame in the video.
+
+        Args:
+            frame_idx (int): Index of the frame to seek to (0-based).
+
+        Raises:
+            RuntimeError: If the video source is not opened yet.
+        """
+        if self.cap is None:
+            raise RuntimeError("Video not opened yet.")
+        self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
+
+    def release(self) -> None:
+        """Release the video capture resources."""
+        if self.cap is not None and self.cap.isOpened():
+            self.cap.release()
+            self.cap = None
+
+
+    def save(
+        self,
+        target_path: str,
+        callback: Callable[[np.ndarray, int], np.ndarray],
+        fps: int | None = None,
+        progress_message: str = "Processing video",
+        show_progress: bool = False,
+    ):
+        """Save processed video frames to a file with audio preservation.
+
+        Args:
+            target_path (str): Path where the processed video will be saved.
+            callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes
+                each frame. Takes frame and index as input, returns processed frame.
+            fps (int | None, optional): Output video FPS. If None, uses source FPS.
+            progress_message (str, optional): Message to show in progress bar.
+            show_progress (bool, optional): Whether to show progress bar.
+
+        Raises:
+            RuntimeError: If video source is not opened.
+            ValueError: If source is not a video file.
+        """
+        if self.cap is None:
+            raise RuntimeError("Video not opened yet.")
+
+        if self.video_info.source_type != SOURCE_TYPE.VIDEO_FILE:
+            raise ValueError("Only video files can be saved.")
+
+        if self.writer is not None:
+            self.writer.close()
+            self.writer = None
+
+        source_codec = self.cap.get(cv2.CAP_PROP_FOURCC)
+
+        if fps is None:
+            fps = self.video_info.fps
+
+        self.writer = OpenCVWriter(
+            target_path, fps, self.video_info.resolution_wh, source_codec
+        )
+        total_frames = self.video_info.total_frames
+        frames_generator = self.frames()
+        for index, frame in enumerate(
+            tqdm(
+                frames_generator,
+                total=total_frames,
+                disable=not show_progress,
+                desc=progress_message,
+            )
+        ):
+            result_frame = callback(frame, index)
+            self.writer.write(frame=result_frame)
+
+        self.writer.close()
+
+
+class OpenCVWriter(BaseWriter):
+    """A class to handle video writing operations using OpenCV's VideoWriter.
+
+    This class provides an interface to write frames to a video file using OpenCV,
+    with support for different codecs and automatic fallback to mp4v if the specified
+    codec fails.
+    """
+
+    def __init__(
+        self,
+        filename: str,
+        fps: int,
+        frame_size: tuple[int, int],
+        codec: str = "mp4v",
+    ):
+        """Initialize the video writer.
+
+        Args:
+            filename (str): Path to the output video file.
+            fps (int): Frames per second for the output video.
+            frame_size (tuple[int, int]): Width and height of the output video frames.
+            codec (str, optional): FourCC code for the video codec. Defaults to "mp4v".
+
+        Raises:
+            RuntimeError: If the video writer cannot be initialized.
+        """
+        try:
+            fourcc_int = cv2.VideoWriter_fourcc(*codec)
+            self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size)
+        except Exception:
+            fourcc_int = cv2.VideoWriter_fourcc(*"mp4v")
+            self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size)
+        if not self.writer.isOpened():
+            raise RuntimeError(f"Cannot open video writer for file: {filename}")
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+
+    def write(self, frame: np.ndarray) -> None:
+        """Write a frame to the video file.
+
+        Args:
+            frame (np.ndarray): The frame to write, in BGR format.
+        """
+        self.writer.write(frame)
+
+    def close(self) -> None:
+        """Release the video writer resources."""
+        self.writer.release()
diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/supervision/video/core.py b/supervision/video/core.py
new file mode 100644
index 0000000000..05d9d5d7ab
--- /dev/null
+++ b/supervision/video/core.py
@@ -0,0 +1,134 @@
+
+from collections.abc import Callable, Generator
+
+import cv2
+import numpy as np
+from tqdm.auto import tqdm
+
+from supervision.video.utils import VideoInfo
+from supervision.video.backend.base import BaseBackend, BaseWriter
+
+from supervision.video.backend.openCV import OpenCVBackend
+
+
+class Video:
+    """High-level interface for video operations.
+
+    This class provides a convenient interface for video operations including
+    reading frames, saving processed videos, and video information access.
+    """
+
+    info: VideoInfo
+    source: str | int
+    backend: BaseBackend
+
+    def __init__(
+        self, source: str | int, info: VideoInfo | None = None, backend: str = "opencv"
+    ):
+        if backend == "opencv":
+            self.backend = OpenCVBackend()
+
+        self.backend.open(source)
+        self.info = self.backend.video_info
+        self.source = source
+
+    def __iter__(self):
+        """Make the Video class iterable over frames.
+
+        Returns:
+            Generator: A generator yielding video frames.
+        """
+        return self.backend.frames()
+
+    def sink(self, target_path: str, info: VideoInfo, codec: str = "mp4v") -> BaseWriter:
+        """Create a video writer for saving frames.
+
+        Args:
+            target_path (str): Path where the video will be saved.
+            info (VideoInfo): Video information containing resolution and FPS.
+            codec (str, optional): FourCC code for video codec. Defaults to "mp4v".
+
+        Returns:
+            Writer: A video writer object for writing frames.
+        """
+        return self.backend.get_sink(target_path, info, codec)
+
+    def frames(
+        self,
+        stride: int = 1,
+        start: int = 0,
+        end: int | None = None,
+        resolution_wh: tuple[int, int] | None = None,
+    ):
+        """Generate frames from the video.
+
+        Args:
+            stride (int, optional): Number of frames to skip. Defaults to 1.
+            start (int, optional): Starting frame index. Defaults to 0.
+            end (int | None, optional): Ending frame index. Defaults to None.
+            resolution_wh (tuple[int, int] | None, optional): Target resolution
+                (width, height). If provided, frames will be resized. Defaults to None.
+
+        Returns:
+            Generator: A generator yielding video frames.
+        """
+        if self.backend.cap is None:
+            raise RuntimeError("Video not opened yet.")
+
+        total_frames = self.backend.video_info.total_frames if self.backend.video_info else 0
+        is_live_stream = total_frames <= 0
+
+        if is_live_stream:
+            while True:
+                for _ in range(stride - 1):
+                    if not self.backend.grab():
+                        return
+                ret, frame = self.backend.read()
+                if not ret:
+                    return
+                if resolution_wh is not None:
+                    frame = cv2.resize(frame, resolution_wh)
+                yield frame
+        else:
+            if end is None or end > total_frames:
+                end = total_frames
+
+            frame_idx = start
+            while frame_idx < end:
+                self.backend.seek(frame_idx)
+                ret, frame = self.backend.read()
+                if not ret:
+                    break
+                if resolution_wh is not None:
+                    frame = cv2.resize(frame, resolution_wh)
+                yield frame
+                frame_idx += stride
+
+    def save(
+        self,
+        target_path: str,
+        callback: Callable[[np.ndarray, int], np.ndarray],
+        fps: int | None = None,
+        progress_message: str = "Processing video",
+        show_progress: bool = False,
+    ):
+        """Save processed video frames to a file.
+
+        Args:
+            target_path (str): Path where the processed video will be saved.
+            callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes
+                each frame. Takes frame and index as input, returns processed frame.
+            fps (int | None, optional): Output video FPS.
+            progress_message (str, optional): Message to show in progress bar.
+                Defaults to "Processing video".
+            show_progress (bool, optional): Whether to show progress bar.
+                Defaults to False.
+        """
+        self.backend.save(
+            target_path=target_path,
+            callback=callback,
+            fps=fps,
+            progress_message=progress_message,
+            show_progress=show_progress,
+        )
+
diff --git a/supervision/video/utils.py b/supervision/video/utils.py
new file mode 100644
index 0000000000..a8667cf919
--- /dev/null
+++ b/supervision/video/utils.py
@@ -0,0 +1,58 @@
+from dataclasses import dataclass
+from enum import Enum
+import cv2
+
+class SOURCE_TYPE(Enum):
+    VIDEO_FILE = "VIDEO_FILE"
+    WEBCAM = "WEBCAM"
+    RTSP = "RTSP"
+
+@dataclass
+class VideoInfo:
+    """
+    A class to store video information, including width, height, fps and
+        total number of frames.
+
+    Attributes:
+        width (int): width of the video in pixels
+        height (int): height of the video in pixels
+        fps (int): frames per second of the video
+        total_frames (Optional[int]): total number of frames in the video,
+            default is None
+
+    Examples:
+        ```python
+        import supervision as sv
+
+        video_info = sv.VideoInfo.from_video_path(video_path=<SOURCE_VIDEO_FILE>)
+
+        video_info
+        # VideoInfo(width=3840, height=2160, fps=25, total_frames=538)
+
+        video_info.resolution_wh
+        # (3840, 2160)
+        ```
+    """
+
+    width: int
+    height: int
+    fps: int
+    total_frames: int | None = None
+    source_type: SOURCE_TYPE | None = None
+
+    @classmethod
+    def from_video_path(cls, video_path: str) -> "VideoInfo":
+        video = cv2.VideoCapture(video_path)
+        if not video.isOpened():
+            raise Exception(f"Could not open video at {video_path}")
+
+        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        fps = round(video.get(cv2.CAP_PROP_FPS))
+        total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+        video.release()
+        return VideoInfo(width, height, fps, total_frames)
+
+    @property
+    def resolution_wh(self) -> tuple[int, int]:
+        return self.width, self.height

From 385639b32ac59fce9f2635df9aae96fb7993e289 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Fri, 8 Aug 2025 16:34:36 -0400
Subject: [PATCH 027/128] UPDATE: Added frame generator within backend

---
 supervision/video/backend/base.py   | 11 ++++++
 supervision/video/backend/openCV.py | 52 +++++++++++++++++++++++++++++
 supervision/video/core.py           | 34 ++-----------------
 3 files changed, 66 insertions(+), 31 deletions(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 2642534bc3..4e047dd789 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -44,6 +44,17 @@ def grab(self) -> bool:
     @abstractmethod
     def seek(self, frame_idx: int) -> None:
         pass
+    
+    @abstractmethod
+    def frames(
+        self,
+        *,
+        start: int = 0,
+        end: int | None = None,
+        stride: int = 1,
+        resolution_wh: tuple[int, int] | None = None,
+    ):
+        pass
 
     @abstractmethod
     def release(self) -> None:
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index 9cadc5c212..a591b597cd 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -150,6 +150,58 @@ def release(self) -> None:
             self.cap.release()
             self.cap = None
 
+    def frames(
+        self,
+        *,
+        start: int = 0,
+        end: int | None = None,
+        stride: int = 1,
+        resolution_wh: tuple[int, int] | None = None,
+    ):
+        """Generate frames from the video source.
+
+        Args:
+            start (int, optional): Starting frame index. Defaults to 0.
+            end (int | None, optional): Ending frame index. Defaults to None.
+        stride (int, optional): Number of frames to skip. Defaults to 1.
+            resolution_wh (tuple[int, int] | None, optional): Target resolution
+                (width, height). If provided, frames will be resized. Defaults to None.
+
+            Yields:
+                np.ndarray: Video frames in BGR format.        Raises:
+            RuntimeError: If the video source is not opened yet.
+        """
+        if self.cap is None:
+            raise RuntimeError("Video not opened yet.")
+
+        total_frames = self.video_info.total_frames if self.video_info else 0
+        is_live_stream = total_frames <= 0
+
+        if is_live_stream:
+            while True:
+                for _ in range(stride - 1):
+                    if not self.grab():
+                        return
+                ret, frame = self.read()
+                if not ret:
+                    return
+                if resolution_wh is not None:
+                    frame = cv2.resize(frame, resolution_wh)
+                yield frame
+        else:
+            if end is None or end > total_frames:
+                end = total_frames
+
+            frame_idx = start
+            while frame_idx < end:
+                self.seek(frame_idx)
+                ret, frame = self.read()
+                if not ret:
+                    break
+                if resolution_wh is not None:
+                    frame = cv2.resize(frame, resolution_wh)
+                yield frame
+                frame_idx += stride
 
     def save(
         self,
diff --git a/supervision/video/core.py b/supervision/video/core.py
index 05d9d5d7ab..43add0ee77 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -72,37 +72,9 @@ def frames(
         Returns:
             Generator: A generator yielding video frames.
         """
-        if self.backend.cap is None:
-            raise RuntimeError("Video not opened yet.")
-
-        total_frames = self.backend.video_info.total_frames if self.backend.video_info else 0
-        is_live_stream = total_frames <= 0
-
-        if is_live_stream:
-            while True:
-                for _ in range(stride - 1):
-                    if not self.backend.grab():
-                        return
-                ret, frame = self.backend.read()
-                if not ret:
-                    return
-                if resolution_wh is not None:
-                    frame = cv2.resize(frame, resolution_wh)
-                yield frame
-        else:
-            if end is None or end > total_frames:
-                end = total_frames
-
-            frame_idx = start
-            while frame_idx < end:
-                self.backend.seek(frame_idx)
-                ret, frame = self.backend.read()
-                if not ret:
-                    break
-                if resolution_wh is not None:
-                    frame = cv2.resize(frame, resolution_wh)
-                yield frame
-                frame_idx += stride
+        return self.backend.frames(
+            stride=stride, start=start, end=end, resolution_wh=resolution_wh
+        )
 
     def save(
         self,

From 61dd0169b297a2bc406be875b1f2d757b3104d82 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Fri, 8 Aug 2025 16:37:58 -0400
Subject: [PATCH 028/128] UPDATE: Added .save codec support

---
 supervision/video/backend/base.py   | 1 +
 supervision/video/backend/openCV.py | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 4e047dd789..1e2f578d7b 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -68,6 +68,7 @@ def save(
         fps: int | None = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
+        codec = "mp4v"
     ):
         pass
 
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index a591b597cd..8a2e860586 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -210,6 +210,7 @@ def save(
         fps: int | None = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
+        codec: str = None
     ):
         """Save processed video frames to a file with audio preservation.
 
@@ -235,7 +236,7 @@ def save(
             self.writer.close()
             self.writer = None
 
-        source_codec = self.cap.get(cv2.CAP_PROP_FOURCC)
+        source_codec = codec if codec is None else self.cap.get(cv2.CAP_PROP_FOURCC)
 
         if fps is None:
             fps = self.video_info.fps

From d2bb428e1c88662e7f28a1016dd484ca5a0fa76e Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Fri, 8 Aug 2025 16:43:00 -0400
Subject: [PATCH 029/128] UPDATE: Added default mp4v codec

---
 supervision/video/backend/base.py   | 2 +-
 supervision/video/backend/openCV.py | 8 +++-----
 supervision/video/core.py           | 2 ++
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 1e2f578d7b..27a5797a89 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -68,7 +68,7 @@ def save(
         fps: int | None = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
-        codec = "mp4v"
+        codec: str = "mp4v"
     ):
         pass
 
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index 8a2e860586..f31716b24a 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -210,7 +210,7 @@ def save(
         fps: int | None = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
-        codec: str = None
+        codec: str = "mp4v"
     ):
         """Save processed video frames to a file with audio preservation.
 
@@ -235,14 +235,12 @@ def save(
         if self.writer is not None:
             self.writer.close()
             self.writer = None
-
-        source_codec = codec if codec is None else self.cap.get(cv2.CAP_PROP_FOURCC)
-
+        
         if fps is None:
             fps = self.video_info.fps
 
         self.writer = OpenCVWriter(
-            target_path, fps, self.video_info.resolution_wh, source_codec
+            target_path, fps, self.video_info.resolution_wh, codec
         )
         total_frames = self.video_info.total_frames
         frames_generator = self.frames()
diff --git a/supervision/video/core.py b/supervision/video/core.py
index 43add0ee77..9a2921a8b9 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -83,6 +83,7 @@ def save(
         fps: int | None = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
+        codec: str = "mp4v"
     ):
         """Save processed video frames to a file.
 
@@ -102,5 +103,6 @@ def save(
             fps=fps,
             progress_message=progress_message,
             show_progress=show_progress,
+            codec=codec
         )
 

From f3a3133b2ebd1dd5743683d5915c2678b5e70023 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 8 Aug 2025 20:43:53 +0000
Subject: [PATCH 030/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/__init__.py             | 10 ++--------
 supervision/utils/video.py          |  4 ++--
 supervision/video/backend/base.py   |  8 +++++---
 supervision/video/backend/openCV.py | 12 +++++++-----
 supervision/video/core.py           | 17 +++++++----------
 supervision/video/utils.py          |  3 +++
 6 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index 067b346a97..dff62b5c95 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -134,14 +134,8 @@
     get_video_frames_generator,
     process_video,
 )
-
-from supervision.video.utils import (
-    VideoInfo
-)
-
-from supervision.video.core import (
-    Video
-)
+from supervision.video.core import Video
+from supervision.video.utils import VideoInfo
 
 __all__ = [
     "LMM",
diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index a493850e88..00eb9b4906 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -1,16 +1,15 @@
 from __future__ import annotations
 
 import time
-from abc import ABC, abstractmethod
 from collections import deque
 from collections.abc import Callable, Generator
 from dataclasses import dataclass
-from enum import Enum
 
 import cv2
 import numpy as np
 from tqdm.auto import tqdm
 
+
 @DeprecationWarning
 @dataclass
 class VideoInfo:
@@ -260,6 +259,7 @@ def callback(scene: np.ndarray, index: int) -> np.ndarray:
                 result_frame = callback(frame, index)
                 sink.write_frame(frame=result_frame)
 
+
 class FPSMonitor:
     """
     A class for monitoring frames per second (FPS) to benchmark latency.
diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 27a5797a89..5ee7553396 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -1,9 +1,11 @@
 from abc import ABC, abstractmethod
+from collections.abc import Callable
+
 import numpy as np
-from collections.abc import Callable, Generator
 
 from supervision.video.utils import VideoInfo
 
+
 class BaseBackend(ABC):
     def __init__(self):
         self.cap = None
@@ -44,7 +46,7 @@ def grab(self) -> bool:
     @abstractmethod
     def seek(self, frame_idx: int) -> None:
         pass
-    
+
     @abstractmethod
     def frames(
         self,
@@ -68,7 +70,7 @@ def save(
         fps: int | None = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
-        codec: str = "mp4v"
+        codec: str = "mp4v",
     ):
         pass
 
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index f31716b24a..88dfedf6f7 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -1,10 +1,12 @@
-from supervision.video.backend.base import BaseBackend, BaseWriter
-from supervision.video.utils import SOURCE_TYPE, VideoInfo
+from collections.abc import Callable
 
 import cv2
 import numpy as np
 from tqdm.auto import tqdm
-from typing import Callable
+
+from supervision.video.backend.base import BaseBackend, BaseWriter
+from supervision.video.utils import SOURCE_TYPE, VideoInfo
+
 
 class OpenCVBackend(BaseBackend):
     """
@@ -210,7 +212,7 @@ def save(
         fps: int | None = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
-        codec: str = "mp4v"
+        codec: str = "mp4v",
     ):
         """Save processed video frames to a file with audio preservation.
 
@@ -235,7 +237,7 @@ def save(
         if self.writer is not None:
             self.writer.close()
             self.writer = None
-        
+
         if fps is None:
             fps = self.video_info.fps
 
diff --git a/supervision/video/core.py b/supervision/video/core.py
index 9a2921a8b9..61d1b2bd38 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -1,14 +1,10 @@
+from collections.abc import Callable
 
-from collections.abc import Callable, Generator
-
-import cv2
 import numpy as np
-from tqdm.auto import tqdm
 
-from supervision.video.utils import VideoInfo
 from supervision.video.backend.base import BaseBackend, BaseWriter
-
 from supervision.video.backend.openCV import OpenCVBackend
+from supervision.video.utils import VideoInfo
 
 
 class Video:
@@ -40,7 +36,9 @@ def __iter__(self):
         """
         return self.backend.frames()
 
-    def sink(self, target_path: str, info: VideoInfo, codec: str = "mp4v") -> BaseWriter:
+    def sink(
+        self, target_path: str, info: VideoInfo, codec: str = "mp4v"
+    ) -> BaseWriter:
         """Create a video writer for saving frames.
 
         Args:
@@ -83,7 +81,7 @@ def save(
         fps: int | None = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
-        codec: str = "mp4v"
+        codec: str = "mp4v",
     ):
         """Save processed video frames to a file.
 
@@ -103,6 +101,5 @@ def save(
             fps=fps,
             progress_message=progress_message,
             show_progress=show_progress,
-            codec=codec
+            codec=codec,
         )
-
diff --git a/supervision/video/utils.py b/supervision/video/utils.py
index a8667cf919..d79069e536 100644
--- a/supervision/video/utils.py
+++ b/supervision/video/utils.py
@@ -1,12 +1,15 @@
 from dataclasses import dataclass
 from enum import Enum
+
 import cv2
 
+
 class SOURCE_TYPE(Enum):
     VIDEO_FILE = "VIDEO_FILE"
     WEBCAM = "WEBCAM"
     RTSP = "RTSP"
 
+
 @dataclass
 class VideoInfo:
     """

From 421cd5d4ea9efc21f9ac32aaf6b187e0391360de Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 01:06:05 -0400
Subject: [PATCH 031/128] UPDATE: Trying to pass checks

---
 supervision/video/utils.py | 35 +++++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/supervision/video/utils.py b/supervision/video/utils.py
index a8667cf919..ce1c84230b 100644
--- a/supervision/video/utils.py
+++ b/supervision/video/utils.py
@@ -42,15 +42,38 @@ class VideoInfo:
 
     @classmethod
     def from_video_path(cls, video_path: str) -> "VideoInfo":
+        """Create VideoInfo from a video file path.
+
+        Args:
+            video_path (str): Path to the video file.
+
+        Returns:
+            VideoInfo: Video information containing width, height, fps, and total frames.
+
+        Raises:
+            ValueError: If video cannot be opened or has invalid properties.
+        """
         video = cv2.VideoCapture(video_path)
         if not video.isOpened():
-            raise Exception(f"Could not open video at {video_path}")
+            raise ValueError(f"Could not open video at {video_path}")
+
+        try:
+            width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            if width <= 0 or height <= 0:
+                raise ValueError(f"Invalid video dimensions: {width}x{height}")
+
+            fps = video.get(cv2.CAP_PROP_FPS)
+            if fps <= 0:
+                fps = 30  # Default to 30fps if invalid
+            fps = round(fps)
+
+            total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+            if total_frames < 0:
+                total_frames = None  # Some video formats may not report frame count
+        finally:
+            video.release()
 
-        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
-        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        fps = round(video.get(cv2.CAP_PROP_FPS))
-        total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
-        video.release()
         return VideoInfo(width, height, fps, total_frames)
 
     @property

From b9da794c77b9415145987c9a29dd3c2fdbf7dfae Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 01:08:41 -0400
Subject: [PATCH 032/128] UPDATE: Typings for optional params

---
 supervision/video/utils.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/supervision/video/utils.py b/supervision/video/utils.py
index 44d7c0b838..608de4ad0a 100644
--- a/supervision/video/utils.py
+++ b/supervision/video/utils.py
@@ -1,5 +1,6 @@
 from dataclasses import dataclass
 from enum import Enum
+from typing import Optional, Tuple, Union
 
 import cv2
 
@@ -40,8 +41,8 @@ class VideoInfo:
     width: int
     height: int
     fps: int
-    total_frames: int | None = None
-    source_type: SOURCE_TYPE | None = None
+    total_frames: Optional[int] = None
+    source_type: Optional[SOURCE_TYPE] = None
 
     @classmethod
     def from_video_path(cls, video_path: str) -> "VideoInfo":
@@ -51,7 +52,7 @@ def from_video_path(cls, video_path: str) -> "VideoInfo":
             video_path (str): Path to the video file.
 
         Returns:
-            VideoInfo: Video information containing width, height, fps, and total frames.
+            VideoInfo: Video info containing width, height, fps, and total frames.
 
         Raises:
             ValueError: If video cannot be opened or has invalid properties.
@@ -80,5 +81,5 @@ def from_video_path(cls, video_path: str) -> "VideoInfo":
         return VideoInfo(width, height, fps, total_frames)
 
     @property
-    def resolution_wh(self) -> tuple[int, int]:
+    def resolution_wh(self) -> Tuple[int, int]:
         return self.width, self.height

From e8b39a10d8f200dac592ac846835012eb6bfe354 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 9 Aug 2025 05:09:00 +0000
Subject: [PATCH 033/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/supervision/video/utils.py b/supervision/video/utils.py
index 608de4ad0a..e20e447b30 100644
--- a/supervision/video/utils.py
+++ b/supervision/video/utils.py
@@ -1,6 +1,6 @@
 from dataclasses import dataclass
 from enum import Enum
-from typing import Optional, Tuple, Union
+from typing import Optional, Tuple
 
 import cv2
 
@@ -41,8 +41,8 @@ class VideoInfo:
     width: int
     height: int
     fps: int
-    total_frames: Optional[int] = None
-    source_type: Optional[SOURCE_TYPE] = None
+    total_frames: int | None = None
+    source_type: SOURCE_TYPE | None = None
 
     @classmethod
     def from_video_path(cls, video_path: str) -> "VideoInfo":
@@ -81,5 +81,5 @@ def from_video_path(cls, video_path: str) -> "VideoInfo":
         return VideoInfo(width, height, fps, total_frames)
 
     @property
-    def resolution_wh(self) -> Tuple[int, int]:
+    def resolution_wh(self) -> tuple[int, int]:
         return self.width, self.height

From 90885b0dabc1ad997008c11df3d41f43e3e8e303 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 01:15:59 -0400
Subject: [PATCH 034/128] Import check fix

---
 supervision/video/backend/base.py | 1 -
 supervision/video/utils.py        | 4 +++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 5ee7553396..073131b9b2 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -5,7 +5,6 @@
 
 from supervision.video.utils import VideoInfo
 
-
 class BaseBackend(ABC):
     def __init__(self):
         self.cap = None
diff --git a/supervision/video/utils.py b/supervision/video/utils.py
index e20e447b30..9e33315a26 100644
--- a/supervision/video/utils.py
+++ b/supervision/video/utils.py
@@ -23,6 +23,8 @@ class VideoInfo:
         fps (int): frames per second of the video
         total_frames (Optional[int]): total number of frames in the video,
             default is None
+        source_type (Optional[SOURCE_TYPE]): source type of the video,
+            default is None
 
     Examples:
         ```python
@@ -41,7 +43,7 @@ class VideoInfo:
     width: int
     height: int
     fps: int
-    total_frames: int | None = None
+    total_frames: Optional[int] = None
     source_type: SOURCE_TYPE | None = None
 
     @classmethod

From 9d0b5ed0b2d801d5270f8d3d9a11a0ebba0aa777 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 9 Aug 2025 05:16:18 +0000
Subject: [PATCH 035/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/backend/base.py | 1 +
 supervision/video/utils.py        | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 073131b9b2..5ee7553396 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -5,6 +5,7 @@
 
 from supervision.video.utils import VideoInfo
 
+
 class BaseBackend(ABC):
     def __init__(self):
         self.cap = None
diff --git a/supervision/video/utils.py b/supervision/video/utils.py
index 9e33315a26..15bf7f6e66 100644
--- a/supervision/video/utils.py
+++ b/supervision/video/utils.py
@@ -1,6 +1,6 @@
 from dataclasses import dataclass
 from enum import Enum
-from typing import Optional, Tuple
+from typing import Optional
 
 import cv2
 
@@ -43,7 +43,7 @@ class VideoInfo:
     width: int
     height: int
     fps: int
-    total_frames: Optional[int] = None
+    total_frames: int | None = None
     source_type: SOURCE_TYPE | None = None
 
     @classmethod

From f983113ce8fc5e4e050755b28c30b28c12531c26 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 01:19:10 -0400
Subject: [PATCH 036/128] Import check fix

---
 supervision/video/utils.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/supervision/video/utils.py b/supervision/video/utils.py
index 15bf7f6e66..fc165b6c1c 100644
--- a/supervision/video/utils.py
+++ b/supervision/video/utils.py
@@ -1,6 +1,6 @@
 from dataclasses import dataclass
 from enum import Enum
-from typing import Optional
+from typing import Optional, Tuple
 
 import cv2
 
@@ -43,8 +43,8 @@ class VideoInfo:
     width: int
     height: int
     fps: int
-    total_frames: int | None = None
-    source_type: SOURCE_TYPE | None = None
+    total_frames: Optional[int] = None
+    source_type: Optional[SOURCE_TYPE] = None
 
     @classmethod
     def from_video_path(cls, video_path: str) -> "VideoInfo":
@@ -83,5 +83,10 @@ def from_video_path(cls, video_path: str) -> "VideoInfo":
         return VideoInfo(width, height, fps, total_frames)
 
     @property
-    def resolution_wh(self) -> tuple[int, int]:
+    def resolution_wh(self) -> Tuple[int, int]:
+        """Get the video resolution as (width, height).
+
+        Returns:
+            Tuple[int, int]: Video dimensions as (width, height).
+        """
         return self.width, self.height

From c1544f0549c9695e0f29f66230ce8426bb9ab72f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 9 Aug 2025 05:19:29 +0000
Subject: [PATCH 037/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/supervision/video/utils.py b/supervision/video/utils.py
index fc165b6c1c..64ec2fdbe5 100644
--- a/supervision/video/utils.py
+++ b/supervision/video/utils.py
@@ -43,8 +43,8 @@ class VideoInfo:
     width: int
     height: int
     fps: int
-    total_frames: Optional[int] = None
-    source_type: Optional[SOURCE_TYPE] = None
+    total_frames: int | None = None
+    source_type: SOURCE_TYPE | None = None
 
     @classmethod
     def from_video_path(cls, video_path: str) -> "VideoInfo":
@@ -83,7 +83,7 @@ def from_video_path(cls, video_path: str) -> "VideoInfo":
         return VideoInfo(width, height, fps, total_frames)
 
     @property
-    def resolution_wh(self) -> Tuple[int, int]:
+    def resolution_wh(self) -> tuple[int, int]:
         """Get the video resolution as (width, height).
 
         Returns:

From 3a339823222d21ed19c33f8997b4c39b9dcf4433 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 01:22:02 -0400
Subject: [PATCH 038/128] Import check fix

---
 supervision/video/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/supervision/video/utils.py b/supervision/video/utils.py
index 64ec2fdbe5..df7203fbe8 100644
--- a/supervision/video/utils.py
+++ b/supervision/video/utils.py
@@ -43,8 +43,8 @@ class VideoInfo:
     width: int
     height: int
     fps: int
-    total_frames: int | None = None
-    source_type: SOURCE_TYPE | None = None
+    total_frames: int = None
+    source_type: SOURCE_TYPE = None
 
     @classmethod
     def from_video_path(cls, video_path: str) -> "VideoInfo":

From 343600da59db1019449c27a9c63b11770e5ac322 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 9 Aug 2025 05:22:20 +0000
Subject: [PATCH 039/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/supervision/video/utils.py b/supervision/video/utils.py
index df7203fbe8..fa5cbe8685 100644
--- a/supervision/video/utils.py
+++ b/supervision/video/utils.py
@@ -1,6 +1,5 @@
 from dataclasses import dataclass
 from enum import Enum
-from typing import Optional, Tuple
 
 import cv2
 

From bf0f8841e68d9ccf7cee72a895c5ed63cbf7dc43 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 01:23:43 -0400
Subject: [PATCH 040/128] Import check fix

---
 supervision/video/backend/base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 5ee7553396..929dd03324 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -52,7 +52,7 @@ def frames(
         self,
         *,
         start: int = 0,
-        end: int | None = None,
+        end: int = None,
         stride: int = 1,
         resolution_wh: tuple[int, int] | None = None,
     ):
@@ -67,7 +67,7 @@ def save(
         self,
         target_path: str,
         callback: Callable[[np.ndarray, int], np.ndarray],
-        fps: int | None = None,
+        fps: int = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
         codec: str = "mp4v",

From 881c9f4e70db79fef7608c45149086eb8c9787a8 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 01:25:41 -0400
Subject: [PATCH 041/128] Import check fix

---
 supervision/video/backend/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 929dd03324..14854ed65a 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -54,7 +54,7 @@ def frames(
         start: int = 0,
         end: int = None,
         stride: int = 1,
-        resolution_wh: tuple[int, int] | None = None,
+        resolution_wh: tuple[int, int] = None,
     ):
         pass
 

From 5fed10baa9c3b26176c64d05633b0d08049f6fac Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 01:27:43 -0400
Subject: [PATCH 042/128] Import check fix

---
 supervision/video/backend/openCV.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index 88dfedf6f7..ac8452d5ab 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -156,17 +156,17 @@ def frames(
         self,
         *,
         start: int = 0,
-        end: int | None = None,
+        end: int = None,
         stride: int = 1,
-        resolution_wh: tuple[int, int] | None = None,
+        resolution_wh: tuple[int, int] = None,
     ):
         """Generate frames from the video source.
 
         Args:
             start (int, optional): Starting frame index. Defaults to 0.
-            end (int | None, optional): Ending frame index. Defaults to None.
+            end (int, optional): Ending frame index. Defaults to None.
         stride (int, optional): Number of frames to skip. Defaults to 1.
-            resolution_wh (tuple[int, int] | None, optional): Target resolution
+            resolution_wh (tuple[int, int], optional): Target resolution
                 (width, height). If provided, frames will be resized. Defaults to None.
 
             Yields:
@@ -209,7 +209,7 @@ def save(
         self,
         target_path: str,
         callback: Callable[[np.ndarray, int], np.ndarray],
-        fps: int | None = None,
+        fps: int = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
         codec: str = "mp4v",
@@ -220,7 +220,7 @@ def save(
             target_path (str): Path where the processed video will be saved.
             callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes
                 each frame. Takes frame and index as input, returns processed frame.
-            fps (int | None, optional): Output video FPS. If None, uses source FPS.
+            fps (int, optional): Output video FPS. If None, uses source FPS.
             progress_message (str, optional): Message to show in progress bar.
             show_progress (bool, optional): Whether to show progress bar.
 

From a84dde837481c643f2fe565c41646c57f0dd6430 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 01:28:49 -0400
Subject: [PATCH 043/128] Import check fix

---
 supervision/video/core.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/supervision/video/core.py b/supervision/video/core.py
index 61d1b2bd38..410890207e 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -19,7 +19,7 @@ class Video:
     backend: BaseBackend
 
     def __init__(
-        self, source: str | int, info: VideoInfo | None = None, backend: str = "opencv"
+        self, source: str | int, info: VideoInfo = None, backend: str = "opencv"
     ):
         if backend == "opencv":
             self.backend = OpenCVBackend()
@@ -55,16 +55,16 @@ def frames(
         self,
         stride: int = 1,
         start: int = 0,
-        end: int | None = None,
-        resolution_wh: tuple[int, int] | None = None,
+        end: int = None,
+        resolution_wh: tuple[int, int] = None,
     ):
         """Generate frames from the video.
 
         Args:
             stride (int, optional): Number of frames to skip. Defaults to 1.
             start (int, optional): Starting frame index. Defaults to 0.
-            end (int | None, optional): Ending frame index. Defaults to None.
-            resolution_wh (tuple[int, int] | None, optional): Target resolution
+            end (int, optional): Ending frame index. Defaults to None.
+            resolution_wh (tuple[int, int], optional): Target resolution
                 (width, height). If provided, frames will be resized. Defaults to None.
 
         Returns:
@@ -78,7 +78,7 @@ def save(
         self,
         target_path: str,
         callback: Callable[[np.ndarray, int], np.ndarray],
-        fps: int | None = None,
+        fps: int = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
         codec: str = "mp4v",
@@ -89,7 +89,7 @@ def save(
             target_path (str): Path where the processed video will be saved.
             callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes
                 each frame. Takes frame and index as input, returns processed frame.
-            fps (int | None, optional): Output video FPS.
+            fps (int, optional): Output video FPS.
             progress_message (str, optional): Message to show in progress bar.
                 Defaults to "Processing video".
             show_progress (bool, optional): Whether to show progress bar.

From 012b95060a99f794c3f282c720c3d25a0a7c0bb4 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 01:32:05 -0400
Subject: [PATCH 044/128] RM: Remove video folder

---
 supervision/__init__.py             |   4 +-
 supervision/video/__init__.py       |   0
 supervision/video/backend/base.py   |  85 --------
 supervision/video/backend/openCV.py | 314 ----------------------------
 supervision/video/backend/pyAV.py   |   0
 supervision/video/core.py           | 105 ----------
 supervision/video/utils.py          |  91 --------
 7 files changed, 1 insertion(+), 598 deletions(-)
 delete mode 100644 supervision/video/__init__.py
 delete mode 100644 supervision/video/backend/base.py
 delete mode 100644 supervision/video/backend/openCV.py
 delete mode 100644 supervision/video/backend/pyAV.py
 delete mode 100644 supervision/video/core.py
 delete mode 100644 supervision/video/utils.py

diff --git a/supervision/__init__.py b/supervision/__init__.py
index dff62b5c95..53f98a8136 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -131,11 +131,10 @@
 from supervision.utils.video import (
     FPSMonitor,
     VideoSink,
+    VideoInfo,
     get_video_frames_generator,
     process_video,
 )
-from supervision.video.core import Video
-from supervision.video.utils import VideoInfo
 
 __all__ = [
     "LMM",
@@ -193,7 +192,6 @@
     "TriangleAnnotator",
     "VertexAnnotator",
     "VertexLabelAnnotator",
-    "Video",
     "VideoInfo",
     "VideoSink",
     "approximate_polygon",
diff --git a/supervision/video/__init__.py b/supervision/video/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
deleted file mode 100644
index 14854ed65a..0000000000
--- a/supervision/video/backend/base.py
+++ /dev/null
@@ -1,85 +0,0 @@
-from abc import ABC, abstractmethod
-from collections.abc import Callable
-
-import numpy as np
-
-from supervision.video.utils import VideoInfo
-
-
-class BaseBackend(ABC):
-    def __init__(self):
-        self.cap = None
-        self.video_info = None
-        self.writer = None
-        self.path = None
-
-    @abstractmethod
-    def get_sink(
-        self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"
-    ) -> "BaseWriter":
-        pass
-
-    @abstractmethod
-    def open(self, path: str) -> None:
-        pass
-
-    @abstractmethod
-    def isOpened(self) -> bool:
-        pass
-
-    @abstractmethod
-    def _set_video_info(self) -> VideoInfo:
-        pass
-
-    @abstractmethod
-    def info(self) -> VideoInfo:
-        pass
-
-    @abstractmethod
-    def read(self) -> tuple[bool, np.ndarray]:
-        pass
-
-    @abstractmethod
-    def grab(self) -> bool:
-        pass
-
-    @abstractmethod
-    def seek(self, frame_idx: int) -> None:
-        pass
-
-    @abstractmethod
-    def frames(
-        self,
-        *,
-        start: int = 0,
-        end: int = None,
-        stride: int = 1,
-        resolution_wh: tuple[int, int] = None,
-    ):
-        pass
-
-    @abstractmethod
-    def release(self) -> None:
-        pass
-
-    @abstractmethod
-    def save(
-        self,
-        target_path: str,
-        callback: Callable[[np.ndarray, int], np.ndarray],
-        fps: int = None,
-        progress_message: str = "Processing video",
-        show_progress: bool = False,
-        codec: str = "mp4v",
-    ):
-        pass
-
-
-class BaseWriter(ABC):
-    @abstractmethod
-    def write(self, frame: np.ndarray) -> None:
-        pass
-
-    @abstractmethod
-    def close(self) -> None:
-        pass
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
deleted file mode 100644
index ac8452d5ab..0000000000
--- a/supervision/video/backend/openCV.py
+++ /dev/null
@@ -1,314 +0,0 @@
-from collections.abc import Callable
-
-import cv2
-import numpy as np
-from tqdm.auto import tqdm
-
-from supervision.video.backend.base import BaseBackend, BaseWriter
-from supervision.video.utils import SOURCE_TYPE, VideoInfo
-
-
-class OpenCVBackend(BaseBackend):
-    """
-    OpenCV implementation of the Backend interface.
-    Handles video capture, frame reading, seeking, and writing operations using OpenCV.
-    """
-
-    def __init__(self):
-        """Initialize the OpenCV backend with empty video capture and writer objects."""
-        super().__init__()
-        self.cap = None
-        self.video_info = None
-        self.writer = None
-        self.path = None
-
-    def get_sink(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"):
-        """Create a video writer for saving frames using OpenCV.
-
-        Args:
-            target_path (str): Path where the video will be saved.
-            video_info (VideoInfo): Video information containing resolution and FPS.
-            codec (str, optional): FourCC code for video codec. Defaults to "mp4v".
-
-        Returns:
-            OpenCVWriter: A video writer object for writing frames.
-        """
-        return OpenCVWriter(
-            target_path, video_info.fps, video_info.resolution_wh, codec
-        )
-
-    def open(self, path: str) -> None:
-        """
-        Open a video source and initialize the video capture object.
-
-        Args:
-            path (str): Path to the video file, RTSP URL, or camera index.
-
-        Raises:
-            RuntimeError: If unable to open the video source.
-            ValueError: If the source type is not supported.
-        """
-        self.cap = cv2.VideoCapture(path)
-        self.path = path
-
-        if not self.cap.isOpened():
-            raise RuntimeError(f"Cannot open video source: {path}")
-        self.video_info = self._set_video_info()
-
-        if isinstance(path, int):
-            self.video_info.source_type = SOURCE_TYPE.WEBCAM
-        elif isinstance(path, str):
-            self.video_info.source_type = (
-                SOURCE_TYPE.RTSP
-                if path.lower().startswith("rtsp://")
-                else SOURCE_TYPE.VIDEO_FILE
-            )
-        else:
-            raise ValueError("Unsupported source type")
-
-    def isOpened(self) -> bool:
-        """Check if the video source is opened successfully.
-
-        Returns:
-            bool: True if the video source is opened, False otherwise.
-        """
-        return self.cap.isOpened()
-
-    def _set_video_info(self) -> VideoInfo:
-        """Set up video information from the opened video source.
-
-        Returns:
-            VideoInfo: Object containing video properties like width, height, fps, etc.
-
-        Raises:
-            RuntimeError: If the video source is not opened yet.
-        """
-        if not self.isOpened():
-            raise RuntimeError("Video not opened yet.")
-        width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-        height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        fps = round(self.cap.get(cv2.CAP_PROP_FPS))
-        total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        return VideoInfo(width, height, fps, total_frames)
-
-    def info(self) -> VideoInfo:
-        """Get video information.
-
-        Returns:
-            VideoInfo: Object containing video properties.
-
-        Raises:
-            RuntimeError: If the video source is not opened yet.
-        """
-        if not self.isOpened():
-            raise RuntimeError("Video not opened yet.")
-        return self.video_info
-
-    def read(self) -> tuple[bool, np.ndarray]:
-        """Read a frame from the video source.
-
-        Returns:
-            tuple[bool, np.ndarray]: A tuple containing:
-                - bool: True if frame was successfully read
-                - np.ndarray: The video frame in BGR format
-
-        Raises:
-            RuntimeError: If the video source is not opened yet.
-        """
-        if self.cap is None:
-            raise RuntimeError("Video not opened yet.")
-        ret, frame = self.cap.read()
-        return ret, frame
-
-    def grab(self) -> bool:
-        """Grab a frame from video source without decoding.
-
-        Returns:
-            bool: True if frame was successfully grabbed.
-
-        Raises:
-            RuntimeError: If the video source is not opened yet.
-        """
-        if self.cap is None:
-            raise RuntimeError("Video not opened yet.")
-        return self.cap.grab()
-
-    def seek(self, frame_idx: int) -> None:
-        """Seek to a specific frame in the video.
-
-        Args:
-            frame_idx (int): Index of the frame to seek to (0-based).
-
-        Raises:
-            RuntimeError: If the video source is not opened yet.
-        """
-        if self.cap is None:
-            raise RuntimeError("Video not opened yet.")
-        self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
-
-    def release(self) -> None:
-        """Release the video capture resources."""
-        if self.cap is not None and self.cap.isOpened():
-            self.cap.release()
-            self.cap = None
-
-    def frames(
-        self,
-        *,
-        start: int = 0,
-        end: int = None,
-        stride: int = 1,
-        resolution_wh: tuple[int, int] = None,
-    ):
-        """Generate frames from the video source.
-
-        Args:
-            start (int, optional): Starting frame index. Defaults to 0.
-            end (int, optional): Ending frame index. Defaults to None.
-        stride (int, optional): Number of frames to skip. Defaults to 1.
-            resolution_wh (tuple[int, int], optional): Target resolution
-                (width, height). If provided, frames will be resized. Defaults to None.
-
-            Yields:
-                np.ndarray: Video frames in BGR format.        Raises:
-            RuntimeError: If the video source is not opened yet.
-        """
-        if self.cap is None:
-            raise RuntimeError("Video not opened yet.")
-
-        total_frames = self.video_info.total_frames if self.video_info else 0
-        is_live_stream = total_frames <= 0
-
-        if is_live_stream:
-            while True:
-                for _ in range(stride - 1):
-                    if not self.grab():
-                        return
-                ret, frame = self.read()
-                if not ret:
-                    return
-                if resolution_wh is not None:
-                    frame = cv2.resize(frame, resolution_wh)
-                yield frame
-        else:
-            if end is None or end > total_frames:
-                end = total_frames
-
-            frame_idx = start
-            while frame_idx < end:
-                self.seek(frame_idx)
-                ret, frame = self.read()
-                if not ret:
-                    break
-                if resolution_wh is not None:
-                    frame = cv2.resize(frame, resolution_wh)
-                yield frame
-                frame_idx += stride
-
-    def save(
-        self,
-        target_path: str,
-        callback: Callable[[np.ndarray, int], np.ndarray],
-        fps: int = None,
-        progress_message: str = "Processing video",
-        show_progress: bool = False,
-        codec: str = "mp4v",
-    ):
-        """Save processed video frames to a file with audio preservation.
-
-        Args:
-            target_path (str): Path where the processed video will be saved.
-            callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes
-                each frame. Takes frame and index as input, returns processed frame.
-            fps (int, optional): Output video FPS. If None, uses source FPS.
-            progress_message (str, optional): Message to show in progress bar.
-            show_progress (bool, optional): Whether to show progress bar.
-
-        Raises:
-            RuntimeError: If video source is not opened.
-            ValueError: If source is not a video file.
-        """
-        if self.cap is None:
-            raise RuntimeError("Video not opened yet.")
-
-        if self.video_info.source_type != SOURCE_TYPE.VIDEO_FILE:
-            raise ValueError("Only video files can be saved.")
-
-        if self.writer is not None:
-            self.writer.close()
-            self.writer = None
-
-        if fps is None:
-            fps = self.video_info.fps
-
-        self.writer = OpenCVWriter(
-            target_path, fps, self.video_info.resolution_wh, codec
-        )
-        total_frames = self.video_info.total_frames
-        frames_generator = self.frames()
-        for index, frame in enumerate(
-            tqdm(
-                frames_generator,
-                total=total_frames,
-                disable=not show_progress,
-                desc=progress_message,
-            )
-        ):
-            result_frame = callback(frame, index)
-            self.writer.write(frame=result_frame)
-
-        self.writer.close()
-
-
-class OpenCVWriter(BaseWriter):
-    """A class to handle video writing operations using OpenCV's VideoWriter.
-
-    This class provides an interface to write frames to a video file using OpenCV,
-    with support for different codecs and automatic fallback to mp4v if the specified
-    codec fails.
-    """
-
-    def __init__(
-        self,
-        filename: str,
-        fps: int,
-        frame_size: tuple[int, int],
-        codec: str = "mp4v",
-    ):
-        """Initialize the video writer.
-
-        Args:
-            filename (str): Path to the output video file.
-            fps (int): Frames per second for the output video.
-            frame_size (tuple[int, int]): Width and height of the output video frames.
-            codec (str, optional): FourCC code for the video codec. Defaults to "mp4v".
-
-        Raises:
-            RuntimeError: If the video writer cannot be initialized.
-        """
-        try:
-            fourcc_int = cv2.VideoWriter_fourcc(*codec)
-            self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size)
-        except Exception:
-            fourcc_int = cv2.VideoWriter_fourcc(*"mp4v")
-            self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size)
-        if not self.writer.isOpened():
-            raise RuntimeError(f"Cannot open video writer for file: {filename}")
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, exc_type, exc_value, traceback):
-        self.close()
-
-    def write(self, frame: np.ndarray) -> None:
-        """Write a frame to the video file.
-
-        Args:
-            frame (np.ndarray): The frame to write, in BGR format.
-        """
-        self.writer.write(frame)
-
-    def close(self) -> None:
-        """Release the video writer resources."""
-        self.writer.release()
diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/supervision/video/core.py b/supervision/video/core.py
deleted file mode 100644
index 410890207e..0000000000
--- a/supervision/video/core.py
+++ /dev/null
@@ -1,105 +0,0 @@
-from collections.abc import Callable
-
-import numpy as np
-
-from supervision.video.backend.base import BaseBackend, BaseWriter
-from supervision.video.backend.openCV import OpenCVBackend
-from supervision.video.utils import VideoInfo
-
-
-class Video:
-    """High-level interface for video operations.
-
-    This class provides a convenient interface for video operations including
-    reading frames, saving processed videos, and video information access.
-    """
-
-    info: VideoInfo
-    source: str | int
-    backend: BaseBackend
-
-    def __init__(
-        self, source: str | int, info: VideoInfo = None, backend: str = "opencv"
-    ):
-        if backend == "opencv":
-            self.backend = OpenCVBackend()
-
-        self.backend.open(source)
-        self.info = self.backend.video_info
-        self.source = source
-
-    def __iter__(self):
-        """Make the Video class iterable over frames.
-
-        Returns:
-            Generator: A generator yielding video frames.
-        """
-        return self.backend.frames()
-
-    def sink(
-        self, target_path: str, info: VideoInfo, codec: str = "mp4v"
-    ) -> BaseWriter:
-        """Create a video writer for saving frames.
-
-        Args:
-            target_path (str): Path where the video will be saved.
-            info (VideoInfo): Video information containing resolution and FPS.
-            codec (str, optional): FourCC code for video codec. Defaults to "mp4v".
-
-        Returns:
-            Writer: A video writer object for writing frames.
-        """
-        return self.backend.get_sink(target_path, info, codec)
-
-    def frames(
-        self,
-        stride: int = 1,
-        start: int = 0,
-        end: int = None,
-        resolution_wh: tuple[int, int] = None,
-    ):
-        """Generate frames from the video.
-
-        Args:
-            stride (int, optional): Number of frames to skip. Defaults to 1.
-            start (int, optional): Starting frame index. Defaults to 0.
-            end (int, optional): Ending frame index. Defaults to None.
-            resolution_wh (tuple[int, int], optional): Target resolution
-                (width, height). If provided, frames will be resized. Defaults to None.
-
-        Returns:
-            Generator: A generator yielding video frames.
-        """
-        return self.backend.frames(
-            stride=stride, start=start, end=end, resolution_wh=resolution_wh
-        )
-
-    def save(
-        self,
-        target_path: str,
-        callback: Callable[[np.ndarray, int], np.ndarray],
-        fps: int = None,
-        progress_message: str = "Processing video",
-        show_progress: bool = False,
-        codec: str = "mp4v",
-    ):
-        """Save processed video frames to a file.
-
-        Args:
-            target_path (str): Path where the processed video will be saved.
-            callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes
-                each frame. Takes frame and index as input, returns processed frame.
-            fps (int, optional): Output video FPS.
-            progress_message (str, optional): Message to show in progress bar.
-                Defaults to "Processing video".
-            show_progress (bool, optional): Whether to show progress bar.
-                Defaults to False.
-        """
-        self.backend.save(
-            target_path=target_path,
-            callback=callback,
-            fps=fps,
-            progress_message=progress_message,
-            show_progress=show_progress,
-            codec=codec,
-        )
diff --git a/supervision/video/utils.py b/supervision/video/utils.py
deleted file mode 100644
index fa5cbe8685..0000000000
--- a/supervision/video/utils.py
+++ /dev/null
@@ -1,91 +0,0 @@
-from dataclasses import dataclass
-from enum import Enum
-
-import cv2
-
-
-class SOURCE_TYPE(Enum):
-    VIDEO_FILE = "VIDEO_FILE"
-    WEBCAM = "WEBCAM"
-    RTSP = "RTSP"
-
-
-@dataclass
-class VideoInfo:
-    """
-    A class to store video information, including width, height, fps and
-        total number of frames.
-
-    Attributes:
-        width (int): width of the video in pixels
-        height (int): height of the video in pixels
-        fps (int): frames per second of the video
-        total_frames (Optional[int]): total number of frames in the video,
-            default is None
-        source_type (Optional[SOURCE_TYPE]): source type of the video,
-            default is None
-
-    Examples:
-        ```python
-        import supervision as sv
-
-        video_info = sv.VideoInfo.from_video_path(video_path=<SOURCE_VIDEO_FILE>)
-
-        video_info
-        # VideoInfo(width=3840, height=2160, fps=25, total_frames=538)
-
-        video_info.resolution_wh
-        # (3840, 2160)
-        ```
-    """
-
-    width: int
-    height: int
-    fps: int
-    total_frames: int = None
-    source_type: SOURCE_TYPE = None
-
-    @classmethod
-    def from_video_path(cls, video_path: str) -> "VideoInfo":
-        """Create VideoInfo from a video file path.
-
-        Args:
-            video_path (str): Path to the video file.
-
-        Returns:
-            VideoInfo: Video info containing width, height, fps, and total frames.
-
-        Raises:
-            ValueError: If video cannot be opened or has invalid properties.
-        """
-        video = cv2.VideoCapture(video_path)
-        if not video.isOpened():
-            raise ValueError(f"Could not open video at {video_path}")
-
-        try:
-            width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
-            height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
-            if width <= 0 or height <= 0:
-                raise ValueError(f"Invalid video dimensions: {width}x{height}")
-
-            fps = video.get(cv2.CAP_PROP_FPS)
-            if fps <= 0:
-                fps = 30  # Default to 30fps if invalid
-            fps = round(fps)
-
-            total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
-            if total_frames < 0:
-                total_frames = None  # Some video formats may not report frame count
-        finally:
-            video.release()
-
-        return VideoInfo(width, height, fps, total_frames)
-
-    @property
-    def resolution_wh(self) -> tuple[int, int]:
-        """Get the video resolution as (width, height).
-
-        Returns:
-            Tuple[int, int]: Video dimensions as (width, height).
-        """
-        return self.width, self.height

From af3db7f7d4a43c7d8c9b9ffbebb6f141276e8b60 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 9 Aug 2025 05:32:19 +0000
Subject: [PATCH 045/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index 53f98a8136..ab45651ac9 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -130,8 +130,8 @@
 from supervision.utils.notebook import plot_image, plot_images_grid
 from supervision.utils.video import (
     FPSMonitor,
-    VideoSink,
     VideoInfo,
+    VideoSink,
     get_video_frames_generator,
     process_video,
 )

From e2edb9603dfd5a8c1352349bbd0fc67911f82937 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 01:34:58 -0400
Subject: [PATCH 046/128] ADD: Added video folder

---
 supervision/video/__init__.py       |   0
 supervision/video/backend/base.py   |  85 ++++++++
 supervision/video/backend/openCV.py | 314 ++++++++++++++++++++++++++++
 supervision/video/backend/pyAV.py   |   0
 supervision/video/core.py           | 105 ++++++++++
 supervision/video/utils.py          |  91 ++++++++
 6 files changed, 595 insertions(+)
 create mode 100644 supervision/video/__init__.py
 create mode 100644 supervision/video/backend/base.py
 create mode 100644 supervision/video/backend/openCV.py
 create mode 100644 supervision/video/backend/pyAV.py
 create mode 100644 supervision/video/core.py
 create mode 100644 supervision/video/utils.py

diff --git a/supervision/video/__init__.py b/supervision/video/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
new file mode 100644
index 0000000000..14854ed65a
--- /dev/null
+++ b/supervision/video/backend/base.py
@@ -0,0 +1,85 @@
+from abc import ABC, abstractmethod
+from collections.abc import Callable
+
+import numpy as np
+
+from supervision.video.utils import VideoInfo
+
+
+class BaseBackend(ABC):
+    def __init__(self):
+        self.cap = None
+        self.video_info = None
+        self.writer = None
+        self.path = None
+
+    @abstractmethod
+    def get_sink(
+        self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"
+    ) -> "BaseWriter":
+        pass
+
+    @abstractmethod
+    def open(self, path: str) -> None:
+        pass
+
+    @abstractmethod
+    def isOpened(self) -> bool:
+        pass
+
+    @abstractmethod
+    def _set_video_info(self) -> VideoInfo:
+        pass
+
+    @abstractmethod
+    def info(self) -> VideoInfo:
+        pass
+
+    @abstractmethod
+    def read(self) -> tuple[bool, np.ndarray]:
+        pass
+
+    @abstractmethod
+    def grab(self) -> bool:
+        pass
+
+    @abstractmethod
+    def seek(self, frame_idx: int) -> None:
+        pass
+
+    @abstractmethod
+    def frames(
+        self,
+        *,
+        start: int = 0,
+        end: int = None,
+        stride: int = 1,
+        resolution_wh: tuple[int, int] = None,
+    ):
+        pass
+
+    @abstractmethod
+    def release(self) -> None:
+        pass
+
+    @abstractmethod
+    def save(
+        self,
+        target_path: str,
+        callback: Callable[[np.ndarray, int], np.ndarray],
+        fps: int = None,
+        progress_message: str = "Processing video",
+        show_progress: bool = False,
+        codec: str = "mp4v",
+    ):
+        pass
+
+
+class BaseWriter(ABC):
+    @abstractmethod
+    def write(self, frame: np.ndarray) -> None:
+        pass
+
+    @abstractmethod
+    def close(self) -> None:
+        pass
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
new file mode 100644
index 0000000000..ac8452d5ab
--- /dev/null
+++ b/supervision/video/backend/openCV.py
@@ -0,0 +1,314 @@
+from collections.abc import Callable
+
+import cv2
+import numpy as np
+from tqdm.auto import tqdm
+
+from supervision.video.backend.base import BaseBackend, BaseWriter
+from supervision.video.utils import SOURCE_TYPE, VideoInfo
+
+
+class OpenCVBackend(BaseBackend):
+    """
+    OpenCV implementation of the Backend interface.
+    Handles video capture, frame reading, seeking, and writing operations using OpenCV.
+    """
+
+    def __init__(self):
+        """Initialize the OpenCV backend with empty video capture and writer objects."""
+        super().__init__()
+        self.cap = None
+        self.video_info = None
+        self.writer = None
+        self.path = None
+
+    def get_sink(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"):
+        """Create a video writer for saving frames using OpenCV.
+
+        Args:
+            target_path (str): Path where the video will be saved.
+            video_info (VideoInfo): Video information containing resolution and FPS.
+            codec (str, optional): FourCC code for video codec. Defaults to "mp4v".
+
+        Returns:
+            OpenCVWriter: A video writer object for writing frames.
+        """
+        return OpenCVWriter(
+            target_path, video_info.fps, video_info.resolution_wh, codec
+        )
+
+    def open(self, path: str) -> None:
+        """
+        Open a video source and initialize the video capture object.
+
+        Args:
+            path (str): Path to the video file, RTSP URL, or camera index.
+
+        Raises:
+            RuntimeError: If unable to open the video source.
+            ValueError: If the source type is not supported.
+        """
+        self.cap = cv2.VideoCapture(path)
+        self.path = path
+
+        if not self.cap.isOpened():
+            raise RuntimeError(f"Cannot open video source: {path}")
+        self.video_info = self._set_video_info()
+
+        if isinstance(path, int):
+            self.video_info.source_type = SOURCE_TYPE.WEBCAM
+        elif isinstance(path, str):
+            self.video_info.source_type = (
+                SOURCE_TYPE.RTSP
+                if path.lower().startswith("rtsp://")
+                else SOURCE_TYPE.VIDEO_FILE
+            )
+        else:
+            raise ValueError("Unsupported source type")
+
+    def isOpened(self) -> bool:
+        """Check if the video source is opened successfully.
+
+        Returns:
+            bool: True if the video source is opened, False otherwise.
+        """
+        return self.cap.isOpened()
+
+    def _set_video_info(self) -> VideoInfo:
+        """Set up video information from the opened video source.
+
+        Returns:
+            VideoInfo: Object containing video properties like width, height, fps, etc.
+
+        Raises:
+            RuntimeError: If the video source is not opened yet.
+        """
+        if not self.isOpened():
+            raise RuntimeError("Video not opened yet.")
+        width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        fps = round(self.cap.get(cv2.CAP_PROP_FPS))
+        total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        return VideoInfo(width, height, fps, total_frames)
+
+    def info(self) -> VideoInfo:
+        """Get video information.
+
+        Returns:
+            VideoInfo: Object containing video properties.
+
+        Raises:
+            RuntimeError: If the video source is not opened yet.
+        """
+        if not self.isOpened():
+            raise RuntimeError("Video not opened yet.")
+        return self.video_info
+
+    def read(self) -> tuple[bool, np.ndarray]:
+        """Read a frame from the video source.
+
+        Returns:
+            tuple[bool, np.ndarray]: A tuple containing:
+                - bool: True if frame was successfully read
+                - np.ndarray: The video frame in BGR format
+
+        Raises:
+            RuntimeError: If the video source is not opened yet.
+        """
+        if self.cap is None:
+            raise RuntimeError("Video not opened yet.")
+        ret, frame = self.cap.read()
+        return ret, frame
+
+    def grab(self) -> bool:
+        """Grab a frame from video source without decoding.
+
+        Returns:
+            bool: True if frame was successfully grabbed.
+
+        Raises:
+            RuntimeError: If the video source is not opened yet.
+        """
+        if self.cap is None:
+            raise RuntimeError("Video not opened yet.")
+        return self.cap.grab()
+
+    def seek(self, frame_idx: int) -> None:
+        """Seek to a specific frame in the video.
+
+        Args:
+            frame_idx (int): Index of the frame to seek to (0-based).
+
+        Raises:
+            RuntimeError: If the video source is not opened yet.
+        """
+        if self.cap is None:
+            raise RuntimeError("Video not opened yet.")
+        self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
+
+    def release(self) -> None:
+        """Release the video capture resources."""
+        if self.cap is not None and self.cap.isOpened():
+            self.cap.release()
+            self.cap = None
+
+    def frames(
+        self,
+        *,
+        start: int = 0,
+        end: int = None,
+        stride: int = 1,
+        resolution_wh: tuple[int, int] = None,
+    ):
+        """Generate frames from the video source.
+
+        Args:
+            start (int, optional): Starting frame index. Defaults to 0.
+            end (int, optional): Ending frame index. Defaults to None.
+        stride (int, optional): Number of frames to skip. Defaults to 1.
+            resolution_wh (tuple[int, int], optional): Target resolution
+                (width, height). If provided, frames will be resized. Defaults to None.
+
+            Yields:
+                np.ndarray: Video frames in BGR format.        Raises:
+            RuntimeError: If the video source is not opened yet.
+        """
+        if self.cap is None:
+            raise RuntimeError("Video not opened yet.")
+
+        total_frames = self.video_info.total_frames if self.video_info else 0
+        is_live_stream = total_frames <= 0
+
+        if is_live_stream:
+            while True:
+                for _ in range(stride - 1):
+                    if not self.grab():
+                        return
+                ret, frame = self.read()
+                if not ret:
+                    return
+                if resolution_wh is not None:
+                    frame = cv2.resize(frame, resolution_wh)
+                yield frame
+        else:
+            if end is None or end > total_frames:
+                end = total_frames
+
+            frame_idx = start
+            while frame_idx < end:
+                self.seek(frame_idx)
+                ret, frame = self.read()
+                if not ret:
+                    break
+                if resolution_wh is not None:
+                    frame = cv2.resize(frame, resolution_wh)
+                yield frame
+                frame_idx += stride
+
+    def save(
+        self,
+        target_path: str,
+        callback: Callable[[np.ndarray, int], np.ndarray],
+        fps: int = None,
+        progress_message: str = "Processing video",
+        show_progress: bool = False,
+        codec: str = "mp4v",
+    ):
+        """Save processed video frames to a file with audio preservation.
+
+        Args:
+            target_path (str): Path where the processed video will be saved.
+            callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes
+                each frame. Takes frame and index as input, returns processed frame.
+            fps (int, optional): Output video FPS. If None, uses source FPS.
+            progress_message (str, optional): Message to show in progress bar.
+            show_progress (bool, optional): Whether to show progress bar.
+
+        Raises:
+            RuntimeError: If video source is not opened.
+            ValueError: If source is not a video file.
+        """
+        if self.cap is None:
+            raise RuntimeError("Video not opened yet.")
+
+        if self.video_info.source_type != SOURCE_TYPE.VIDEO_FILE:
+            raise ValueError("Only video files can be saved.")
+
+        if self.writer is not None:
+            self.writer.close()
+            self.writer = None
+
+        if fps is None:
+            fps = self.video_info.fps
+
+        self.writer = OpenCVWriter(
+            target_path, fps, self.video_info.resolution_wh, codec
+        )
+        total_frames = self.video_info.total_frames
+        frames_generator = self.frames()
+        for index, frame in enumerate(
+            tqdm(
+                frames_generator,
+                total=total_frames,
+                disable=not show_progress,
+                desc=progress_message,
+            )
+        ):
+            result_frame = callback(frame, index)
+            self.writer.write(frame=result_frame)
+
+        self.writer.close()
+
+
+class OpenCVWriter(BaseWriter):
+    """A class to handle video writing operations using OpenCV's VideoWriter.
+
+    This class provides an interface to write frames to a video file using OpenCV,
+    with support for different codecs and automatic fallback to mp4v if the specified
+    codec fails.
+    """
+
+    def __init__(
+        self,
+        filename: str,
+        fps: int,
+        frame_size: tuple[int, int],
+        codec: str = "mp4v",
+    ):
+        """Initialize the video writer.
+
+        Args:
+            filename (str): Path to the output video file.
+            fps (int): Frames per second for the output video.
+            frame_size (tuple[int, int]): Width and height of the output video frames.
+            codec (str, optional): FourCC code for the video codec. Defaults to "mp4v".
+
+        Raises:
+            RuntimeError: If the video writer cannot be initialized.
+        """
+        try:
+            fourcc_int = cv2.VideoWriter_fourcc(*codec)
+            self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size)
+        except Exception:
+            fourcc_int = cv2.VideoWriter_fourcc(*"mp4v")
+            self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size)
+        if not self.writer.isOpened():
+            raise RuntimeError(f"Cannot open video writer for file: {filename}")
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+
+    def write(self, frame: np.ndarray) -> None:
+        """Write a frame to the video file.
+
+        Args:
+            frame (np.ndarray): The frame to write, in BGR format.
+        """
+        self.writer.write(frame)
+
+    def close(self) -> None:
+        """Release the video writer resources."""
+        self.writer.release()
diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/supervision/video/core.py b/supervision/video/core.py
new file mode 100644
index 0000000000..410890207e
--- /dev/null
+++ b/supervision/video/core.py
@@ -0,0 +1,105 @@
+from collections.abc import Callable
+
+import numpy as np
+
+from supervision.video.backend.base import BaseBackend, BaseWriter
+from supervision.video.backend.openCV import OpenCVBackend
+from supervision.video.utils import VideoInfo
+
+
+class Video:
+    """High-level interface for video operations.
+
+    This class provides a convenient interface for video operations including
+    reading frames, saving processed videos, and video information access.
+    """
+
+    info: VideoInfo
+    source: str | int
+    backend: BaseBackend
+
+    def __init__(
+        self, source: str | int, info: VideoInfo = None, backend: str = "opencv"
+    ):
+        if backend == "opencv":
+            self.backend = OpenCVBackend()
+
+        self.backend.open(source)
+        self.info = self.backend.video_info
+        self.source = source
+
+    def __iter__(self):
+        """Make the Video class iterable over frames.
+
+        Returns:
+            Generator: A generator yielding video frames.
+        """
+        return self.backend.frames()
+
+    def sink(
+        self, target_path: str, info: VideoInfo, codec: str = "mp4v"
+    ) -> BaseWriter:
+        """Create a video writer for saving frames.
+
+        Args:
+            target_path (str): Path where the video will be saved.
+            info (VideoInfo): Video information containing resolution and FPS.
+            codec (str, optional): FourCC code for video codec. Defaults to "mp4v".
+
+        Returns:
+            Writer: A video writer object for writing frames.
+        """
+        return self.backend.get_sink(target_path, info, codec)
+
+    def frames(
+        self,
+        stride: int = 1,
+        start: int = 0,
+        end: int = None,
+        resolution_wh: tuple[int, int] = None,
+    ):
+        """Generate frames from the video.
+
+        Args:
+            stride (int, optional): Number of frames to skip. Defaults to 1.
+            start (int, optional): Starting frame index. Defaults to 0.
+            end (int, optional): Ending frame index. Defaults to None.
+            resolution_wh (tuple[int, int], optional): Target resolution
+                (width, height). If provided, frames will be resized. Defaults to None.
+
+        Returns:
+            Generator: A generator yielding video frames.
+        """
+        return self.backend.frames(
+            stride=stride, start=start, end=end, resolution_wh=resolution_wh
+        )
+
+    def save(
+        self,
+        target_path: str,
+        callback: Callable[[np.ndarray, int], np.ndarray],
+        fps: int = None,
+        progress_message: str = "Processing video",
+        show_progress: bool = False,
+        codec: str = "mp4v",
+    ):
+        """Save processed video frames to a file.
+
+        Args:
+            target_path (str): Path where the processed video will be saved.
+            callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes
+                each frame. Takes frame and index as input, returns processed frame.
+            fps (int, optional): Output video FPS.
+            progress_message (str, optional): Message to show in progress bar.
+                Defaults to "Processing video".
+            show_progress (bool, optional): Whether to show progress bar.
+                Defaults to False.
+        """
+        self.backend.save(
+            target_path=target_path,
+            callback=callback,
+            fps=fps,
+            progress_message=progress_message,
+            show_progress=show_progress,
+            codec=codec,
+        )
diff --git a/supervision/video/utils.py b/supervision/video/utils.py
new file mode 100644
index 0000000000..fa5cbe8685
--- /dev/null
+++ b/supervision/video/utils.py
@@ -0,0 +1,91 @@
+from dataclasses import dataclass
+from enum import Enum
+
+import cv2
+
+
+class SOURCE_TYPE(Enum):
+    VIDEO_FILE = "VIDEO_FILE"
+    WEBCAM = "WEBCAM"
+    RTSP = "RTSP"
+
+
+@dataclass
+class VideoInfo:
+    """
+    A class to store video information, including width, height, fps and
+        total number of frames.
+
+    Attributes:
+        width (int): width of the video in pixels
+        height (int): height of the video in pixels
+        fps (int): frames per second of the video
+        total_frames (Optional[int]): total number of frames in the video,
+            default is None
+        source_type (Optional[SOURCE_TYPE]): source type of the video,
+            default is None
+
+    Examples:
+        ```python
+        import supervision as sv
+
+        video_info = sv.VideoInfo.from_video_path(video_path=<SOURCE_VIDEO_FILE>)
+
+        video_info
+        # VideoInfo(width=3840, height=2160, fps=25, total_frames=538)
+
+        video_info.resolution_wh
+        # (3840, 2160)
+        ```
+    """
+
+    width: int
+    height: int
+    fps: int
+    total_frames: int = None
+    source_type: SOURCE_TYPE = None
+
+    @classmethod
+    def from_video_path(cls, video_path: str) -> "VideoInfo":
+        """Create VideoInfo from a video file path.
+
+        Args:
+            video_path (str): Path to the video file.
+
+        Returns:
+            VideoInfo: Video info containing width, height, fps, and total frames.
+
+        Raises:
+            ValueError: If video cannot be opened or has invalid properties.
+        """
+        video = cv2.VideoCapture(video_path)
+        if not video.isOpened():
+            raise ValueError(f"Could not open video at {video_path}")
+
+        try:
+            width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            if width <= 0 or height <= 0:
+                raise ValueError(f"Invalid video dimensions: {width}x{height}")
+
+            fps = video.get(cv2.CAP_PROP_FPS)
+            if fps <= 0:
+                fps = 30  # Default to 30fps if invalid
+            fps = round(fps)
+
+            total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+            if total_frames < 0:
+                total_frames = None  # Some video formats may not report frame count
+        finally:
+            video.release()
+
+        return VideoInfo(width, height, fps, total_frames)
+
+    @property
+    def resolution_wh(self) -> tuple[int, int]:
+        """Get the video resolution as (width, height).
+
+        Returns:
+            Tuple[int, int]: Video dimensions as (width, height).
+        """
+        return self.width, self.height

From ed5e15b254eaf72251106f260cba0497cb6a2d90 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 01:37:43 -0400
Subject: [PATCH 047/128] UPDATE: Revert typings

---
 supervision/video/backend/base.py   | 14 ++++++-------
 supervision/video/backend/openCV.py | 24 ++++++++++------------
 supervision/video/core.py           | 31 ++++++++++++++++-------------
 supervision/video/utils.py          |  5 +++--
 4 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 14854ed65a..27a5797a89 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -1,11 +1,9 @@
 from abc import ABC, abstractmethod
-from collections.abc import Callable
-
 import numpy as np
+from collections.abc import Callable, Generator
 
 from supervision.video.utils import VideoInfo
 
-
 class BaseBackend(ABC):
     def __init__(self):
         self.cap = None
@@ -46,15 +44,15 @@ def grab(self) -> bool:
     @abstractmethod
     def seek(self, frame_idx: int) -> None:
         pass
-
+    
     @abstractmethod
     def frames(
         self,
         *,
         start: int = 0,
-        end: int = None,
+        end: int | None = None,
         stride: int = 1,
-        resolution_wh: tuple[int, int] = None,
+        resolution_wh: tuple[int, int] | None = None,
     ):
         pass
 
@@ -67,10 +65,10 @@ def save(
         self,
         target_path: str,
         callback: Callable[[np.ndarray, int], np.ndarray],
-        fps: int = None,
+        fps: int | None = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
-        codec: str = "mp4v",
+        codec: str = "mp4v"
     ):
         pass
 
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index ac8452d5ab..f31716b24a 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -1,12 +1,10 @@
-from collections.abc import Callable
+from supervision.video.backend.base import BaseBackend, BaseWriter
+from supervision.video.utils import SOURCE_TYPE, VideoInfo
 
 import cv2
 import numpy as np
 from tqdm.auto import tqdm
-
-from supervision.video.backend.base import BaseBackend, BaseWriter
-from supervision.video.utils import SOURCE_TYPE, VideoInfo
-
+from typing import Callable
 
 class OpenCVBackend(BaseBackend):
     """
@@ -156,17 +154,17 @@ def frames(
         self,
         *,
         start: int = 0,
-        end: int = None,
+        end: int | None = None,
         stride: int = 1,
-        resolution_wh: tuple[int, int] = None,
+        resolution_wh: tuple[int, int] | None = None,
     ):
         """Generate frames from the video source.
 
         Args:
             start (int, optional): Starting frame index. Defaults to 0.
-            end (int, optional): Ending frame index. Defaults to None.
+            end (int | None, optional): Ending frame index. Defaults to None.
         stride (int, optional): Number of frames to skip. Defaults to 1.
-            resolution_wh (tuple[int, int], optional): Target resolution
+            resolution_wh (tuple[int, int] | None, optional): Target resolution
                 (width, height). If provided, frames will be resized. Defaults to None.
 
             Yields:
@@ -209,10 +207,10 @@ def save(
         self,
         target_path: str,
         callback: Callable[[np.ndarray, int], np.ndarray],
-        fps: int = None,
+        fps: int | None = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
-        codec: str = "mp4v",
+        codec: str = "mp4v"
     ):
         """Save processed video frames to a file with audio preservation.
 
@@ -220,7 +218,7 @@ def save(
             target_path (str): Path where the processed video will be saved.
             callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes
                 each frame. Takes frame and index as input, returns processed frame.
-            fps (int, optional): Output video FPS. If None, uses source FPS.
+            fps (int | None, optional): Output video FPS. If None, uses source FPS.
             progress_message (str, optional): Message to show in progress bar.
             show_progress (bool, optional): Whether to show progress bar.
 
@@ -237,7 +235,7 @@ def save(
         if self.writer is not None:
             self.writer.close()
             self.writer = None
-
+        
         if fps is None:
             fps = self.video_info.fps
 
diff --git a/supervision/video/core.py b/supervision/video/core.py
index 410890207e..9a2921a8b9 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -1,10 +1,14 @@
-from collections.abc import Callable
 
+from collections.abc import Callable, Generator
+
+import cv2
 import numpy as np
+from tqdm.auto import tqdm
 
+from supervision.video.utils import VideoInfo
 from supervision.video.backend.base import BaseBackend, BaseWriter
+
 from supervision.video.backend.openCV import OpenCVBackend
-from supervision.video.utils import VideoInfo
 
 
 class Video:
@@ -19,7 +23,7 @@ class Video:
     backend: BaseBackend
 
     def __init__(
-        self, source: str | int, info: VideoInfo = None, backend: str = "opencv"
+        self, source: str | int, info: VideoInfo | None = None, backend: str = "opencv"
     ):
         if backend == "opencv":
             self.backend = OpenCVBackend()
@@ -36,9 +40,7 @@ def __iter__(self):
         """
         return self.backend.frames()
 
-    def sink(
-        self, target_path: str, info: VideoInfo, codec: str = "mp4v"
-    ) -> BaseWriter:
+    def sink(self, target_path: str, info: VideoInfo, codec: str = "mp4v") -> BaseWriter:
         """Create a video writer for saving frames.
 
         Args:
@@ -55,16 +57,16 @@ def frames(
         self,
         stride: int = 1,
         start: int = 0,
-        end: int = None,
-        resolution_wh: tuple[int, int] = None,
+        end: int | None = None,
+        resolution_wh: tuple[int, int] | None = None,
     ):
         """Generate frames from the video.
 
         Args:
             stride (int, optional): Number of frames to skip. Defaults to 1.
             start (int, optional): Starting frame index. Defaults to 0.
-            end (int, optional): Ending frame index. Defaults to None.
-            resolution_wh (tuple[int, int], optional): Target resolution
+            end (int | None, optional): Ending frame index. Defaults to None.
+            resolution_wh (tuple[int, int] | None, optional): Target resolution
                 (width, height). If provided, frames will be resized. Defaults to None.
 
         Returns:
@@ -78,10 +80,10 @@ def save(
         self,
         target_path: str,
         callback: Callable[[np.ndarray, int], np.ndarray],
-        fps: int = None,
+        fps: int | None = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
-        codec: str = "mp4v",
+        codec: str = "mp4v"
     ):
         """Save processed video frames to a file.
 
@@ -89,7 +91,7 @@ def save(
             target_path (str): Path where the processed video will be saved.
             callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes
                 each frame. Takes frame and index as input, returns processed frame.
-            fps (int, optional): Output video FPS.
+            fps (int | None, optional): Output video FPS.
             progress_message (str, optional): Message to show in progress bar.
                 Defaults to "Processing video".
             show_progress (bool, optional): Whether to show progress bar.
@@ -101,5 +103,6 @@ def save(
             fps=fps,
             progress_message=progress_message,
             show_progress=show_progress,
-            codec=codec,
+            codec=codec
         )
+
diff --git a/supervision/video/utils.py b/supervision/video/utils.py
index fa5cbe8685..64ec2fdbe5 100644
--- a/supervision/video/utils.py
+++ b/supervision/video/utils.py
@@ -1,5 +1,6 @@
 from dataclasses import dataclass
 from enum import Enum
+from typing import Optional, Tuple
 
 import cv2
 
@@ -42,8 +43,8 @@ class VideoInfo:
     width: int
     height: int
     fps: int
-    total_frames: int = None
-    source_type: SOURCE_TYPE = None
+    total_frames: int | None = None
+    source_type: SOURCE_TYPE | None = None
 
     @classmethod
     def from_video_path(cls, video_path: str) -> "VideoInfo":

From fd669252b007a047ba8ebfbfe96c2493ead0003f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 9 Aug 2025 05:38:08 +0000
Subject: [PATCH 048/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/backend/base.py   |  8 +++++---
 supervision/video/backend/openCV.py | 12 +++++++-----
 supervision/video/core.py           | 17 +++++++----------
 supervision/video/utils.py          |  1 -
 4 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 27a5797a89..5ee7553396 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -1,9 +1,11 @@
 from abc import ABC, abstractmethod
+from collections.abc import Callable
+
 import numpy as np
-from collections.abc import Callable, Generator
 
 from supervision.video.utils import VideoInfo
 
+
 class BaseBackend(ABC):
     def __init__(self):
         self.cap = None
@@ -44,7 +46,7 @@ def grab(self) -> bool:
     @abstractmethod
     def seek(self, frame_idx: int) -> None:
         pass
-    
+
     @abstractmethod
     def frames(
         self,
@@ -68,7 +70,7 @@ def save(
         fps: int | None = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
-        codec: str = "mp4v"
+        codec: str = "mp4v",
     ):
         pass
 
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index f31716b24a..88dfedf6f7 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -1,10 +1,12 @@
-from supervision.video.backend.base import BaseBackend, BaseWriter
-from supervision.video.utils import SOURCE_TYPE, VideoInfo
+from collections.abc import Callable
 
 import cv2
 import numpy as np
 from tqdm.auto import tqdm
-from typing import Callable
+
+from supervision.video.backend.base import BaseBackend, BaseWriter
+from supervision.video.utils import SOURCE_TYPE, VideoInfo
+
 
 class OpenCVBackend(BaseBackend):
     """
@@ -210,7 +212,7 @@ def save(
         fps: int | None = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
-        codec: str = "mp4v"
+        codec: str = "mp4v",
     ):
         """Save processed video frames to a file with audio preservation.
 
@@ -235,7 +237,7 @@ def save(
         if self.writer is not None:
             self.writer.close()
             self.writer = None
-        
+
         if fps is None:
             fps = self.video_info.fps
 
diff --git a/supervision/video/core.py b/supervision/video/core.py
index 9a2921a8b9..61d1b2bd38 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -1,14 +1,10 @@
+from collections.abc import Callable
 
-from collections.abc import Callable, Generator
-
-import cv2
 import numpy as np
-from tqdm.auto import tqdm
 
-from supervision.video.utils import VideoInfo
 from supervision.video.backend.base import BaseBackend, BaseWriter
-
 from supervision.video.backend.openCV import OpenCVBackend
+from supervision.video.utils import VideoInfo
 
 
 class Video:
@@ -40,7 +36,9 @@ def __iter__(self):
         """
         return self.backend.frames()
 
-    def sink(self, target_path: str, info: VideoInfo, codec: str = "mp4v") -> BaseWriter:
+    def sink(
+        self, target_path: str, info: VideoInfo, codec: str = "mp4v"
+    ) -> BaseWriter:
         """Create a video writer for saving frames.
 
         Args:
@@ -83,7 +81,7 @@ def save(
         fps: int | None = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
-        codec: str = "mp4v"
+        codec: str = "mp4v",
     ):
         """Save processed video frames to a file.
 
@@ -103,6 +101,5 @@ def save(
             fps=fps,
             progress_message=progress_message,
             show_progress=show_progress,
-            codec=codec
+            codec=codec,
         )
-
diff --git a/supervision/video/utils.py b/supervision/video/utils.py
index 64ec2fdbe5..aa0d430f91 100644
--- a/supervision/video/utils.py
+++ b/supervision/video/utils.py
@@ -1,6 +1,5 @@
 from dataclasses import dataclass
 from enum import Enum
-from typing import Optional, Tuple
 
 import cv2
 

From 13010e02e4e31be6531ae622c1568a1e5f488d38 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 01:39:43 -0400
Subject: [PATCH 049/128] UPDATE: sv imports

---
 supervision/__init__.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index ab45651ac9..dff62b5c95 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -130,11 +130,12 @@
 from supervision.utils.notebook import plot_image, plot_images_grid
 from supervision.utils.video import (
     FPSMonitor,
-    VideoInfo,
     VideoSink,
     get_video_frames_generator,
     process_video,
 )
+from supervision.video.core import Video
+from supervision.video.utils import VideoInfo
 
 __all__ = [
     "LMM",
@@ -192,6 +193,7 @@
     "TriangleAnnotator",
     "VertexAnnotator",
     "VertexLabelAnnotator",
+    "Video",
     "VideoInfo",
     "VideoSink",
     "approximate_polygon",

From 25e3b5fe357763a65ab358d136f3ea12b4fff4a8 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 01:45:00 -0400
Subject: [PATCH 050/128] UPDATE: default imports using __init__

---
 supervision/__init__.py           |  3 +--
 supervision/video/__init__.py     | 11 +++++++++++
 supervision/video/backend/base.py |  4 +++-
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index dff62b5c95..6e2e329b75 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -134,8 +134,7 @@
     get_video_frames_generator,
     process_video,
 )
-from supervision.video.core import Video
-from supervision.video.utils import VideoInfo
+from supervision.video import Video, VideoInfo
 
 __all__ = [
     "LMM",
diff --git a/supervision/video/__init__.py b/supervision/video/__init__.py
index e69de29bb2..4102ee5e87 100644
--- a/supervision/video/__init__.py
+++ b/supervision/video/__init__.py
@@ -0,0 +1,11 @@
+from supervision.video.utils import SOURCE_TYPE, VideoInfo
+from supervision.video.core import Video
+from supervision.video.backend.base import BaseBackend, BaseWriter
+
+__all__ = [
+    'Video',
+    'VideoInfo',
+    'SOURCE_TYPE',
+    'BaseBackend',
+    'BaseWriter'
+]
diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 5ee7553396..5c8e890dbb 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -1,9 +1,11 @@
 from abc import ABC, abstractmethod
 from collections.abc import Callable
+from typing import Optional, Tuple, TYPE_CHECKING
 
 import numpy as np
 
-from supervision.video.utils import VideoInfo
+if TYPE_CHECKING:
+    from supervision.video.utils import VideoInfo
 
 
 class BaseBackend(ABC):

From 0ad6cd161dd970b4f093902bc20be68f76716010 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 9 Aug 2025 05:45:17 +0000
Subject: [PATCH 051/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/__init__.py     | 12 +++---------
 supervision/video/backend/base.py |  2 +-
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/supervision/video/__init__.py b/supervision/video/__init__.py
index 4102ee5e87..d5d5559ba8 100644
--- a/supervision/video/__init__.py
+++ b/supervision/video/__init__.py
@@ -1,11 +1,5 @@
-from supervision.video.utils import SOURCE_TYPE, VideoInfo
-from supervision.video.core import Video
 from supervision.video.backend.base import BaseBackend, BaseWriter
+from supervision.video.core import Video
+from supervision.video.utils import SOURCE_TYPE, VideoInfo
 
-__all__ = [
-    'Video',
-    'VideoInfo',
-    'SOURCE_TYPE',
-    'BaseBackend',
-    'BaseWriter'
-]
+__all__ = ["SOURCE_TYPE", "BaseBackend", "BaseWriter", "Video", "VideoInfo"]
diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 5c8e890dbb..48d2f05966 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -1,6 +1,6 @@
 from abc import ABC, abstractmethod
 from collections.abc import Callable
-from typing import Optional, Tuple, TYPE_CHECKING
+from typing import TYPE_CHECKING
 
 import numpy as np
 

From 3e7b247daf4477ea504178768c65a677714e0c28 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 01:47:02 -0400
Subject: [PATCH 052/128] FIX: Fixed VideoInfo definition

---
 supervision/video/backend/base.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 5c8e890dbb..8e323df069 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -4,8 +4,7 @@
 
 import numpy as np
 
-if TYPE_CHECKING:
-    from supervision.video.utils import VideoInfo
+from supervision.video.utils import VideoInfo
 
 
 class BaseBackend(ABC):

From e855b1456ccf7dae2582b7da737cabb83915c2b7 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 9 Aug 2025 05:47:50 +0000
Subject: [PATCH 053/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/backend/base.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 2b03d27e8a..5ee7553396 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -1,6 +1,5 @@
 from abc import ABC, abstractmethod
 from collections.abc import Callable
-from typing import TYPE_CHECKING
 
 import numpy as np
 

From 29fb6665dc593cc05b894bb7d3fec443a6b3e77e Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 01:51:08 -0400
Subject: [PATCH 054/128] FIX: Fixed futuer annotations

---
 supervision/video/backend/base.py   | 2 ++
 supervision/video/backend/openCV.py | 1 +
 supervision/video/backend/pyAV.py   | 1 +
 supervision/video/core.py           | 1 +
 supervision/video/utils.py          | 1 +
 5 files changed, 6 insertions(+)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 5ee7553396..47208c5ce6 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from abc import ABC, abstractmethod
 from collections.abc import Callable
 
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index 88dfedf6f7..3843c555d1 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -1,3 +1,4 @@
+from __future__ import annotations
 from collections.abc import Callable
 
 import cv2
diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index e69de29bb2..6c43ea250f 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -0,0 +1 @@
+from __future__ import annotations
\ No newline at end of file
diff --git a/supervision/video/core.py b/supervision/video/core.py
index 61d1b2bd38..7c487a2900 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -1,3 +1,4 @@
+from __future__ import annotations
 from collections.abc import Callable
 
 import numpy as np
diff --git a/supervision/video/utils.py b/supervision/video/utils.py
index aa0d430f91..cb8be96a3a 100644
--- a/supervision/video/utils.py
+++ b/supervision/video/utils.py
@@ -1,3 +1,4 @@
+from __future__ import annotations
 from dataclasses import dataclass
 from enum import Enum
 

From d9ab61caedbe6608360d0cd3d5a69e6198889b3e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 9 Aug 2025 05:51:37 +0000
Subject: [PATCH 055/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/backend/base.py   | 2 +-
 supervision/video/backend/openCV.py | 1 +
 supervision/video/backend/pyAV.py   | 2 +-
 supervision/video/core.py           | 1 +
 supervision/video/utils.py          | 3 ++-
 5 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 47208c5ce6..943fdb3d76 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -18,7 +18,7 @@ def __init__(self):
     @abstractmethod
     def get_sink(
         self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"
-    ) -> "BaseWriter":
+    ) -> BaseWriter:
         pass
 
     @abstractmethod
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index 3843c555d1..44d5121308 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -1,4 +1,5 @@
 from __future__ import annotations
+
 from collections.abc import Callable
 
 import cv2
diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index 6c43ea250f..9d48db4f9f 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -1 +1 @@
-from __future__ import annotations
\ No newline at end of file
+from __future__ import annotations
diff --git a/supervision/video/core.py b/supervision/video/core.py
index 7c487a2900..a358814c4f 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -1,4 +1,5 @@
 from __future__ import annotations
+
 from collections.abc import Callable
 
 import numpy as np
diff --git a/supervision/video/utils.py b/supervision/video/utils.py
index cb8be96a3a..629ce7b262 100644
--- a/supervision/video/utils.py
+++ b/supervision/video/utils.py
@@ -1,4 +1,5 @@
 from __future__ import annotations
+
 from dataclasses import dataclass
 from enum import Enum
 
@@ -47,7 +48,7 @@ class VideoInfo:
     source_type: SOURCE_TYPE | None = None
 
     @classmethod
-    def from_video_path(cls, video_path: str) -> "VideoInfo":
+    def from_video_path(cls, video_path: str) -> VideoInfo:
         """Create VideoInfo from a video file path.
 
         Args:

From 941d48777b247c39b9723e190cd6dda29ca70203 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 15:50:40 -0400
Subject: [PATCH 056/128] UPDATE: Restructure video saves

---
 supervision/video/backend/base.py   | 13 -------
 supervision/video/backend/openCV.py | 58 +----------------------------
 supervision/video/core.py           | 35 +++++++++++++----
 3 files changed, 29 insertions(+), 77 deletions(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 47208c5ce6..13d557fae4 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -64,19 +64,6 @@ def frames(
     def release(self) -> None:
         pass
 
-    @abstractmethod
-    def save(
-        self,
-        target_path: str,
-        callback: Callable[[np.ndarray, int], np.ndarray],
-        fps: int | None = None,
-        progress_message: str = "Processing video",
-        show_progress: bool = False,
-        codec: str = "mp4v",
-    ):
-        pass
-
-
 class BaseWriter(ABC):
     @abstractmethod
     def write(self, frame: np.ndarray) -> None:
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index 3843c555d1..b81e6c0730 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -3,7 +3,6 @@
 
 import cv2
 import numpy as np
-from tqdm.auto import tqdm
 
 from supervision.video.backend.base import BaseBackend, BaseWriter
 from supervision.video.utils import SOURCE_TYPE, VideoInfo
@@ -20,7 +19,7 @@ def __init__(self):
         super().__init__()
         self.cap = None
         self.video_info = None
-        self.writer = None
+        self.writer = OpenCVWriter
         self.path = None
 
     def get_sink(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"):
@@ -205,60 +204,7 @@ def frames(
                     frame = cv2.resize(frame, resolution_wh)
                 yield frame
                 frame_idx += stride
-
-    def save(
-        self,
-        target_path: str,
-        callback: Callable[[np.ndarray, int], np.ndarray],
-        fps: int | None = None,
-        progress_message: str = "Processing video",
-        show_progress: bool = False,
-        codec: str = "mp4v",
-    ):
-        """Save processed video frames to a file with audio preservation.
-
-        Args:
-            target_path (str): Path where the processed video will be saved.
-            callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes
-                each frame. Takes frame and index as input, returns processed frame.
-            fps (int | None, optional): Output video FPS. If None, uses source FPS.
-            progress_message (str, optional): Message to show in progress bar.
-            show_progress (bool, optional): Whether to show progress bar.
-
-        Raises:
-            RuntimeError: If video source is not opened.
-            ValueError: If source is not a video file.
-        """
-        if self.cap is None:
-            raise RuntimeError("Video not opened yet.")
-
-        if self.video_info.source_type != SOURCE_TYPE.VIDEO_FILE:
-            raise ValueError("Only video files can be saved.")
-
-        if self.writer is not None:
-            self.writer.close()
-            self.writer = None
-
-        if fps is None:
-            fps = self.video_info.fps
-
-        self.writer = OpenCVWriter(
-            target_path, fps, self.video_info.resolution_wh, codec
-        )
-        total_frames = self.video_info.total_frames
-        frames_generator = self.frames()
-        for index, frame in enumerate(
-            tqdm(
-                frames_generator,
-                total=total_frames,
-                disable=not show_progress,
-                desc=progress_message,
-            )
-        ):
-            result_frame = callback(frame, index)
-            self.writer.write(frame=result_frame)
-
-        self.writer.close()
+        
 
 
 class OpenCVWriter(BaseWriter):
diff --git a/supervision/video/core.py b/supervision/video/core.py
index 7c487a2900..b7e7d6b719 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -2,10 +2,11 @@
 from collections.abc import Callable
 
 import numpy as np
+from tqdm.auto import tqdm
 
 from supervision.video.backend.base import BaseBackend, BaseWriter
 from supervision.video.backend.openCV import OpenCVBackend
-from supervision.video.utils import VideoInfo
+from supervision.video.utils import VideoInfo, SOURCE_TYPE
 
 
 class Video:
@@ -96,11 +97,29 @@ def save(
             show_progress (bool, optional): Whether to show progress bar.
                 Defaults to False.
         """
-        self.backend.save(
-            target_path=target_path,
-            callback=callback,
-            fps=fps,
-            progress_message=progress_message,
-            show_progress=show_progress,
-            codec=codec,
+        if self.backend.cap is None:
+            raise RuntimeError("Video not opened yet.")
+
+        if self.backend.video_info.source_type != SOURCE_TYPE.VIDEO_FILE:
+            raise ValueError("Only video files can be saved.")
+
+        if fps is None:
+            fps = self.backend.video_info.fps
+
+        writer = self.backend.writer(
+            target_path, fps, self.backend.video_info.resolution_wh, codec
         )
+        total_frames = self.backend.video_info.total_frames
+        frames_generator = self.frames()
+        for index, frame in enumerate(
+            tqdm(
+                frames_generator,
+                total=total_frames,
+                disable=not show_progress,
+                desc=progress_message,
+            )
+        ):
+            result_frame = callback(frame, index)
+            writer.write(frame=result_frame)
+
+        writer.close()

From 2175d2554db89ac5cc5183548fcf5b82d9a173d0 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 15:53:36 -0400
Subject: [PATCH 057/128] UPDATE: Restructure manual writer

---
 supervision/video/backend/base.py   |  6 ------
 supervision/video/backend/openCV.py | 15 ---------------
 supervision/video/core.py           |  4 +++-
 3 files changed, 3 insertions(+), 22 deletions(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 38b8012691..f8e8aa492f 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -15,12 +15,6 @@ def __init__(self):
         self.writer = None
         self.path = None
 
-    @abstractmethod
-    def get_sink(
-        self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"
-    ) -> BaseWriter:
-        pass
-
     @abstractmethod
     def open(self, path: str) -> None:
         pass
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index f1cc4e4a82..42a0961aad 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -23,21 +23,6 @@ def __init__(self):
         self.writer = OpenCVWriter
         self.path = None
 
-    def get_sink(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"):
-        """Create a video writer for saving frames using OpenCV.
-
-        Args:
-            target_path (str): Path where the video will be saved.
-            video_info (VideoInfo): Video information containing resolution and FPS.
-            codec (str, optional): FourCC code for video codec. Defaults to "mp4v".
-
-        Returns:
-            OpenCVWriter: A video writer object for writing frames.
-        """
-        return OpenCVWriter(
-            target_path, video_info.fps, video_info.resolution_wh, codec
-        )
-
     def open(self, path: str) -> None:
         """
         Open a video source and initialize the video capture object.
diff --git a/supervision/video/core.py b/supervision/video/core.py
index 284ae1d475..6dd0e4942d 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -52,7 +52,9 @@ def sink(
         Returns:
             Writer: A video writer object for writing frames.
         """
-        return self.backend.get_sink(target_path, info, codec)
+        return self.backend.writer(
+             target_path, info.fps, info.resolution_wh, codec
+        )
 
     def frames(
         self,

From 27f126605aa57f322f1d97ac31951218755cd2a0 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 16:02:23 -0400
Subject: [PATCH 058/128] FORMAT: Formatting changes

---
 supervision/video/backend/openCV.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index 42a0961aad..4bc7e37c10 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -191,8 +191,6 @@ def frames(
                 yield frame
                 frame_idx += stride
         
-
-
 class OpenCVWriter(BaseWriter):
     """A class to handle video writing operations using OpenCV's VideoWriter.
 

From cf31e29587ec994ddc74acfb88d3c56fb3ab693c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 9 Aug 2025 20:02:51 +0000
Subject: [PATCH 059/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/backend/base.py   | 2 +-
 supervision/video/backend/openCV.py | 5 ++---
 supervision/video/core.py           | 6 ++----
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index f8e8aa492f..78b3875d8e 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
-from collections.abc import Callable
 
 import numpy as np
 
@@ -58,6 +57,7 @@ def frames(
     def release(self) -> None:
         pass
 
+
 class BaseWriter(ABC):
     @abstractmethod
     def write(self, frame: np.ndarray) -> None:
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index 4bc7e37c10..00f5b6ade2 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -1,7 +1,5 @@
 from __future__ import annotations
 
-from collections.abc import Callable
-
 import cv2
 import numpy as np
 
@@ -190,7 +188,8 @@ def frames(
                     frame = cv2.resize(frame, resolution_wh)
                 yield frame
                 frame_idx += stride
-        
+
+
 class OpenCVWriter(BaseWriter):
     """A class to handle video writing operations using OpenCV's VideoWriter.
 
diff --git a/supervision/video/core.py b/supervision/video/core.py
index 6dd0e4942d..0cd404bd57 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -7,7 +7,7 @@
 
 from supervision.video.backend.base import BaseBackend, BaseWriter
 from supervision.video.backend.openCV import OpenCVBackend
-from supervision.video.utils import VideoInfo, SOURCE_TYPE
+from supervision.video.utils import SOURCE_TYPE, VideoInfo
 
 
 class Video:
@@ -52,9 +52,7 @@ def sink(
         Returns:
             Writer: A video writer object for writing frames.
         """
-        return self.backend.writer(
-             target_path, info.fps, info.resolution_wh, codec
-        )
+        return self.backend.writer(target_path, info.fps, info.resolution_wh, codec)
 
     def frames(
         self,

From abba880145a69ad2c52d353883c231adde9615d0 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 16:18:22 -0400
Subject: [PATCH 060/128] UPDATE: Restructured frame generator

---
 pyproject.toml                      |  2 +-
 supervision/video/backend/openCV.py | 53 -----------------------------
 supervision/video/core.py           | 41 +++++++++++++++++++---
 3 files changed, 37 insertions(+), 59 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 787fa93ade..554dd16553 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,7 +47,6 @@ dependencies = [
     "requests>=2.26.0",
     "tqdm>=4.62.3",
     "opencv-python>=4.5.5.64",
-    "imageio-ffmpeg (>=0.6.0,<0.7.0)"
 ]
 
 [project.urls]
@@ -59,6 +58,7 @@ Documentation = "https://supervision.roboflow.com/latest/"
 metrics = [
     "pandas>=2.0.0",
 ]
+video = ["av (>=15.0.0,<16.0.0)"]
 
 [dependency-groups]
 dev = [
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index 00f5b6ade2..7e0bd37f9b 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -136,59 +136,6 @@ def release(self) -> None:
             self.cap.release()
             self.cap = None
 
-    def frames(
-        self,
-        *,
-        start: int = 0,
-        end: int | None = None,
-        stride: int = 1,
-        resolution_wh: tuple[int, int] | None = None,
-    ):
-        """Generate frames from the video source.
-
-        Args:
-            start (int, optional): Starting frame index. Defaults to 0.
-            end (int | None, optional): Ending frame index. Defaults to None.
-        stride (int, optional): Number of frames to skip. Defaults to 1.
-            resolution_wh (tuple[int, int] | None, optional): Target resolution
-                (width, height). If provided, frames will be resized. Defaults to None.
-
-            Yields:
-                np.ndarray: Video frames in BGR format.        Raises:
-            RuntimeError: If the video source is not opened yet.
-        """
-        if self.cap is None:
-            raise RuntimeError("Video not opened yet.")
-
-        total_frames = self.video_info.total_frames if self.video_info else 0
-        is_live_stream = total_frames <= 0
-
-        if is_live_stream:
-            while True:
-                for _ in range(stride - 1):
-                    if not self.grab():
-                        return
-                ret, frame = self.read()
-                if not ret:
-                    return
-                if resolution_wh is not None:
-                    frame = cv2.resize(frame, resolution_wh)
-                yield frame
-        else:
-            if end is None or end > total_frames:
-                end = total_frames
-
-            frame_idx = start
-            while frame_idx < end:
-                self.seek(frame_idx)
-                ret, frame = self.read()
-                if not ret:
-                    break
-                if resolution_wh is not None:
-                    frame = cv2.resize(frame, resolution_wh)
-                yield frame
-                frame_idx += stride
-
 
 class OpenCVWriter(BaseWriter):
     """A class to handle video writing operations using OpenCV's VideoWriter.
diff --git a/supervision/video/core.py b/supervision/video/core.py
index 0cd404bd57..6e13fc9625 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -3,11 +3,12 @@
 from collections.abc import Callable
 
 import numpy as np
+import cv2
 from tqdm.auto import tqdm
 
 from supervision.video.backend.base import BaseBackend, BaseWriter
 from supervision.video.backend.openCV import OpenCVBackend
-from supervision.video.utils import SOURCE_TYPE, VideoInfo
+from supervision.video.utils import VideoInfo, SOURCE_TYPE
 
 
 class Video:
@@ -52,7 +53,9 @@ def sink(
         Returns:
             Writer: A video writer object for writing frames.
         """
-        return self.backend.writer(target_path, info.fps, info.resolution_wh, codec)
+        return self.backend.writer(
+             target_path, info.fps, info.resolution_wh, codec
+        )
 
     def frames(
         self,
@@ -73,9 +76,37 @@ def frames(
         Returns:
             Generator: A generator yielding video frames.
         """
-        return self.backend.frames(
-            stride=stride, start=start, end=end, resolution_wh=resolution_wh
-        )
+        if self.backend.cap is None:
+            raise RuntimeError("Video not opened yet.")
+
+        total_frames = self.backend.video_info.total_frames if self.backend.video_info else 0
+        is_live_stream = total_frames <= 0
+
+        if is_live_stream:
+            while True:
+                for _ in range(stride - 1):
+                    if not self.backend.grab():
+                        return
+                ret, frame = self.backend.read()
+                if not ret:
+                    return
+                if resolution_wh is not None:
+                    frame = cv2.resize(frame, resolution_wh)
+                yield frame
+        else:
+            if end is None or end > total_frames:
+                end = total_frames
+
+            frame_idx = start
+            while frame_idx < end:
+                self.backend.seek(frame_idx)
+                ret, frame = self.backend.read()
+                if not ret:
+                    break
+                if resolution_wh is not None:
+                    frame = cv2.resize(frame, resolution_wh)
+                yield frame
+                frame_idx += stride
 
     def save(
         self,

From 3374544d4613a1b81df91f61247695e973f81067 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 16:18:49 -0400
Subject: [PATCH 061/128] FIX: Removed frames from BaseBackend

---
 supervision/video/backend/base.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 78b3875d8e..623d3c74f8 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -42,17 +42,6 @@ def grab(self) -> bool:
     def seek(self, frame_idx: int) -> None:
         pass
 
-    @abstractmethod
-    def frames(
-        self,
-        *,
-        start: int = 0,
-        end: int | None = None,
-        stride: int = 1,
-        resolution_wh: tuple[int, int] | None = None,
-    ):
-        pass
-
     @abstractmethod
     def release(self) -> None:
         pass

From 2e78c483ffd609c8501ec16a597a839794d506cf Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 18:23:23 -0400
Subject: [PATCH 062/128] ADD: Added pyAV backend support

---
 supervision/video/backend/pyAV.py | 191 +++++++++++++++++++++++++++++-
 supervision/video/core.py         |  10 +-
 2 files changed, 196 insertions(+), 5 deletions(-)

diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index 9d48db4f9f..85e52a89e0 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -1 +1,190 @@
-from __future__ import annotations
+import av
+import numpy as np
+
+from fractions import Fraction
+from supervision.video.backend.base import BaseBackend, BaseWriter
+from supervision.video.utils import VideoInfo, SOURCE_TYPE
+
+
+class pyAVWriter(BaseWriter):
+    def __init__(
+        self,
+        filename: str,
+        fps: int,
+        frame_size: tuple[int, int],
+        codec: str = "h264",
+    ):
+        try:
+            self.container = av.open(filename, mode="w")
+
+            if codec is None:
+                codec = "h264"
+            self.stream = self.container.add_stream(codec, rate=fps)
+            self.stream.width = frame_size[0]
+            self.stream.height = frame_size[1]
+            self.stream.pix_fmt = "yuv420p"
+
+            # Set time_base explicitly for correct timing
+            self.stream.codec_context.time_base = Fraction(1, fps)
+
+            # Frame index for PTS
+            self.frame_idx = 0
+
+        except Exception as e:
+            raise RuntimeError(f"Cannot open video writer for file: {filename}") from e
+
+    def write(self, frame: np.ndarray) -> None:
+        # Convert BGR (OpenCV) to RGB for PyAV
+        frame_rgb = frame[..., ::-1]
+        av_frame = av.VideoFrame.from_ndarray(frame_rgb, format="rgb24")
+
+        av_frame.pts = self.frame_idx
+        av_frame.time_base = self.stream.codec_context.time_base
+        self.frame_idx += 1
+
+        # Encode frame and mux packets immediately
+        packets = self.stream.encode(av_frame)
+        for packet in packets:
+            self.container.mux(packet)
+
+    def close(self) -> None:
+        # Flush encoder by calling encode() with no frame, mux all packets
+        packets = self.stream.encode()
+        for packet in packets:
+            self.container.mux(packet)
+
+        self.container.close()
+
+class pyAVBackend(BaseBackend):
+
+
+    def __init__(self):
+        super().__init__()
+        self.container = None
+        self.stream = None
+        self.writer = pyAVWriter
+        self.frame_generator = None
+        self.video_info = None
+        self.current_frame_idx = 0 
+
+    def open(self, path: str) -> None:
+    
+        try:
+            self.container = av.open(path)
+            self.stream = self.container.streams.video[0]
+            self.stream.thread_type = "AUTO"
+
+            # cap is used for internals
+            self.cap = self.container
+
+            self.frame_generator = self.container.decode(video=0)
+            self.video_info = self._set_video_info()
+            self.current_frame_idx = 0
+
+            if isinstance(path, int):
+                self.video_info.source_type = SOURCE_TYPE.WEBCAM
+            elif isinstance(path, str):
+                self.video_info.source_type = (
+                    SOURCE_TYPE.RTSP
+                    if path.lower().startswith("rtsp://")
+                    else SOURCE_TYPE.VIDEO_FILE
+                )
+            else:
+                raise ValueError("Unsupported source type")
+
+        except Exception as e:
+            raise RuntimeError(f"Cannot open video source: {path}") from e
+
+    def isOpened(self) -> bool:
+        return self.container is not None and self.stream is not None
+
+    def _set_video_info(self) -> VideoInfo:
+        if not self.isOpened():
+            raise RuntimeError("Video not opened yet.")
+
+        width = self.stream.width
+        height = self.stream.height
+        fps = float(self.stream.average_rate or self.stream.guessed_rate)
+        if fps <= 0:
+            fps = 30  # Default to 30fps if invalid
+
+        total_frames = self.stream.frames
+        if total_frames == 0:
+            total_frames = None
+
+        return VideoInfo(width, height, round(fps), total_frames)
+
+    def info(self) -> VideoInfo:
+        if not self.isOpened():
+            raise RuntimeError("Video not opened yet.")
+        return self.video_info
+
+    def read(self) -> tuple[bool, np.ndarray]:
+    
+        if not self.isOpened():
+            raise RuntimeError("Video not opened yet.")
+
+        try:
+            frame = next(self.frame_generator)
+            self.current_frame_idx += 1
+            frame_bgr = frame.to_ndarray(format="bgr24")
+            return True, frame_bgr
+        except (StopIteration, av.error.EOFError):
+            return False, np.array([])
+
+    def grab(self) -> bool:
+    
+        if not self.isOpened():
+            raise RuntimeError("Video not opened yet.")
+
+        try:
+            for packet in self.container.demux(video=0):
+                if packet.stream.type == 'video':
+                    return True
+            return False
+        except (StopIteration, av.error.EOFError):
+            return False
+
+    def seek(self, frame_idx: int) -> None:
+    
+        if not self.isOpened():
+            raise RuntimeError("Video not opened yet.")
+
+        framerate = float(self.stream.average_rate or self.stream.guessed_rate or 30.0)
+        if framerate <= 0:
+            framerate = 30.0
+
+        time_base = float(self.stream.time_base)
+        timestamp = int((frame_idx / framerate) / time_base)
+
+        self.container.seek(timestamp, stream=self.stream, any_frame=False, backward=True)
+        self.frame_generator = self.container.decode(video=0)
+
+        self.current_frame_idx = 0
+        while True:
+            try:
+                frame = next(self.frame_generator)
+            except (StopIteration, av.error.EOFError):
+                break
+
+            if getattr(frame, "time", None) is not None:
+                self.current_frame_idx = int(round(frame.time * framerate))
+            elif getattr(frame, "pts", None) is not None:
+                self.current_frame_idx = int(round((frame.pts * time_base) * framerate))
+            else:
+                self.current_frame_idx += 1
+
+            if self.current_frame_idx >= frame_idx:
+                def _prepend_frame(first_frame, gen):
+                    yield first_frame
+                    yield from gen
+                self.frame_generator = _prepend_frame(frame, self.frame_generator)
+                break
+
+    def release(self) -> None:
+    
+        if self.container:
+            self.container.close()
+            self.container = None
+            self.stream = None
+            self.frame_generator = None
diff --git a/supervision/video/core.py b/supervision/video/core.py
index 6e13fc9625..a24c9a8044 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -9,6 +9,7 @@
 from supervision.video.backend.base import BaseBackend, BaseWriter
 from supervision.video.backend.openCV import OpenCVBackend
 from supervision.video.utils import VideoInfo, SOURCE_TYPE
+from supervision.video.backend.pyAV import pyAVBackend
 
 
 class Video:
@@ -26,7 +27,7 @@ def __init__(
         self, source: str | int, info: VideoInfo | None = None, backend: str = "opencv"
     ):
         if backend == "opencv":
-            self.backend = OpenCVBackend()
+            self.backend = pyAVBackend()
 
         self.backend.open(source)
         self.info = self.backend.video_info
@@ -41,14 +42,14 @@ def __iter__(self):
         return self.backend.frames()
 
     def sink(
-        self, target_path: str, info: VideoInfo, codec: str = "mp4v"
+        self, target_path: str, info: VideoInfo, codec: str | None = None
     ) -> BaseWriter:
         """Create a video writer for saving frames.
 
         Args:
             target_path (str): Path where the video will be saved.
             info (VideoInfo): Video information containing resolution and FPS.
-            codec (str, optional): FourCC code for video codec. Defaults to "mp4v".
+            codec (str, optional): FourCC code for video codec. Defaults to "None".
 
         Returns:
             Writer: A video writer object for writing frames.
@@ -115,7 +116,7 @@ def save(
         fps: int | None = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
-        codec: str = "mp4v",
+        codec: str | None = None,
     ):
         """Save processed video frames to a file.
 
@@ -142,6 +143,7 @@ def save(
             target_path, fps, self.backend.video_info.resolution_wh, codec
         )
         total_frames = self.backend.video_info.total_frames
+        print(self.backend.video_info)
         frames_generator = self.frames()
         for index, frame in enumerate(
             tqdm(

From a3aca42e2cdca010b743df2020ffc8ab3c0ffc5c Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 18:23:39 -0400
Subject: [PATCH 063/128] ADD: Added pyAV docstrings

---
 supervision/video/backend/pyAV.py | 64 ++++++++++++++++++++++++++-----
 1 file changed, 55 insertions(+), 9 deletions(-)

diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index 85e52a89e0..db99af9b4a 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -56,7 +56,10 @@ def close(self) -> None:
         self.container.close()
 
 class pyAVBackend(BaseBackend):
-
+    """
+    PyAV implementation of the Backend interface.
+    Handles video capture, frame reading, seeking, and writing operations using PyAV.
+    """
 
     def __init__(self):
         super().__init__()
@@ -65,16 +68,25 @@ def __init__(self):
         self.writer = pyAVWriter
         self.frame_generator = None
         self.video_info = None
-        self.current_frame_idx = 0 
+        self.current_frame_idx = 0  # Track current frame number in decoding
 
     def open(self, path: str) -> None:
-    
+        """Open and initialize a video source.
+
+        Opens a video file, RTSP stream, or webcam and initializes all necessary
+        components for video processing.
+
+        Args:
+            path (str): Path to video file, RTSP URL, or camera index.
+
+        Raises:
+            RuntimeError: If unable to open the video source.
+            ValueError: If the source type is not supported.
+        """
         try:
             self.container = av.open(path)
             self.stream = self.container.streams.video[0]
             self.stream.thread_type = "AUTO"
-
-            # cap is used for internals
             self.cap = self.container
 
             self.frame_generator = self.container.decode(video=0)
@@ -120,7 +132,16 @@ def info(self) -> VideoInfo:
         return self.video_info
 
     def read(self) -> tuple[bool, np.ndarray]:
-    
+        """Read the next frame from the video stream.
+
+        Returns:
+            tuple[bool, np.ndarray]: A tuple containing:
+                - bool: True if frame was successfully read
+                - np.ndarray: The video frame in BGR format (H, W, 3)
+
+        Raises:
+            RuntimeError: If the video source is not opened.
+        """
         if not self.isOpened():
             raise RuntimeError("Video not opened yet.")
 
@@ -133,7 +154,17 @@ def read(self) -> tuple[bool, np.ndarray]:
             return False, np.array([])
 
     def grab(self) -> bool:
-    
+        """Grab the next frame packet without decoding.
+
+        A lightweight operation that skips frame decoding, useful for
+        quick frame navigation. Returns success status of the grab operation.
+
+        Returns:
+            bool: True if a frame was successfully grabbed, False otherwise.
+
+        Raises:
+            RuntimeError: If the video source is not opened.
+        """
         if not self.isOpened():
             raise RuntimeError("Video not opened yet.")
 
@@ -146,7 +177,18 @@ def grab(self) -> bool:
             return False
 
     def seek(self, frame_idx: int) -> None:
-    
+        """Seek to a specific frame in the video.
+
+        Performs frame-accurate seeking by navigating to the nearest keyframe and
+        decoding forward to the exact target frame. The next read() call will
+        return the target frame.
+
+        Args:
+            frame_idx (int): Target frame index (0-based) to seek to.
+
+        Raises:
+            RuntimeError: If the video source is not opened.
+        """
         if not self.isOpened():
             raise RuntimeError("Video not opened yet.")
 
@@ -182,7 +224,11 @@ def _prepend_frame(first_frame, gen):
                 break
 
     def release(self) -> None:
-    
+        """Release all resources associated with the video stream.
+
+        Closes the video container and resets all internal state variables
+        to ensure proper cleanup of resources.
+        """
         if self.container:
             self.container.close()
             self.container = None

From 819c70be3875731a5d5f6d7df55e0fc51025ceb8 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 19:02:02 -0400
Subject: [PATCH 064/128] ADD: Added pyAV audio support

---
 supervision/video/backend/base.py | 11 +++++++++++
 supervision/video/backend/pyAV.py | 30 ++++++++++++++++++++++++++----
 supervision/video/core.py         |  2 +-
 3 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 623d3c74f8..231cb7578e 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -48,6 +48,17 @@ def release(self) -> None:
 
 
 class BaseWriter(ABC):
+    @abstractmethod
+    def __init__(
+        self,
+        filename: str,
+        fps: int,
+        frame_size: tuple[int, int],
+        codec: str | None = None,
+        backend: BaseBackend = None,
+    ):
+        pass
+
     @abstractmethod
     def write(self, frame: np.ndarray) -> None:
         pass
diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index db99af9b4a..cfd9ba3f75 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -13,10 +13,12 @@ def __init__(
         fps: int,
         frame_size: tuple[int, int],
         codec: str = "h264",
-    ):
+        backend: "pyAVBackend" = None,
+    ):        
         try:
             self.container = av.open(filename, mode="w")
-
+            self.backend = backend
+            
             if codec is None:
                 codec = "h264"
             self.stream = self.container.add_stream(codec, rate=fps)
@@ -29,6 +31,16 @@ def __init__(
 
             # Frame index for PTS
             self.frame_idx = 0
+            
+            self.audio_stream_out = None
+            self.audio_packets = []
+            if backend.audio_stream and backend.audio_src_container:
+                audio_codec_name = backend.audio_stream.codec_context.name
+                audio_rate = backend.audio_stream.codec_context.rate  # Can be None for some codecs
+                self.audio_stream_out = self.container.add_stream(audio_codec_name, rate=audio_rate)
+                for packet in backend.audio_src_container.demux(backend.audio_stream):
+                    if packet.dts is not None:
+                        self.audio_packets.append(packet)
 
         except Exception as e:
             raise RuntimeError(f"Cannot open video writer for file: {filename}") from e
@@ -53,6 +65,11 @@ def close(self) -> None:
         for packet in packets:
             self.container.mux(packet)
 
+        if self.audio_stream_out:
+            for packet in self.audio_packets:
+                packet.stream = self.audio_stream_out
+                self.container.mux(packet)
+
         self.container.close()
 
 class pyAVBackend(BaseBackend):
@@ -69,7 +86,7 @@ def __init__(self):
         self.frame_generator = None
         self.video_info = None
         self.current_frame_idx = 0  # Track current frame number in decoding
-
+    
     def open(self, path: str) -> None:
         """Open and initialize a video source.
 
@@ -85,6 +102,7 @@ def open(self, path: str) -> None:
         """
         try:
             self.container = av.open(path)
+            self.audio_src_container = self.container
             self.stream = self.container.streams.video[0]
             self.stream.thread_type = "AUTO"
             self.cap = self.container
@@ -93,6 +111,10 @@ def open(self, path: str) -> None:
             self.video_info = self._set_video_info()
             self.current_frame_idx = 0
 
+             # If audio exists
+            if len(self.container.streams.audio) > 0:
+                self.audio_stream = self.container.streams.audio[0]
+
             if isinstance(path, int):
                 self.video_info.source_type = SOURCE_TYPE.WEBCAM
             elif isinstance(path, str):
@@ -106,7 +128,7 @@ def open(self, path: str) -> None:
 
         except Exception as e:
             raise RuntimeError(f"Cannot open video source: {path}") from e
-
+    
     def isOpened(self) -> bool:
         return self.container is not None and self.stream is not None
 
diff --git a/supervision/video/core.py b/supervision/video/core.py
index a24c9a8044..31d73b1831 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -140,7 +140,7 @@ def save(
             fps = self.backend.video_info.fps
 
         writer = self.backend.writer(
-            target_path, fps, self.backend.video_info.resolution_wh, codec
+            target_path, fps, self.backend.video_info.resolution_wh, codec, self.backend
         )
         total_frames = self.backend.video_info.total_frames
         print(self.backend.video_info)

From dfd9407e0ac9b4e5fc61024af61d9c64c8474c52 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 19:02:31 -0400
Subject: [PATCH 065/128] UPDATE: Backend support needed for all writers

---
 supervision/video/backend/openCV.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index 7e0bd37f9b..c3cb823e08 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -151,6 +151,7 @@ def __init__(
         fps: int,
         frame_size: tuple[int, int],
         codec: str = "mp4v",
+        backend: OpenCVBackend = None,
     ):
         """Initialize the video writer.
 
@@ -163,6 +164,7 @@ def __init__(
         Raises:
             RuntimeError: If the video writer cannot be initialized.
         """
+        self.backend = backend
         try:
             fourcc_int = cv2.VideoWriter_fourcc(*codec)
             self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size)

From d29e50e5adf47464154ff8a4c76eb6bf35055cfa Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 19:04:08 -0400
Subject: [PATCH 066/128] FIX: Formatting

---
 supervision/video/backend/base.py   | 2 +-
 supervision/video/backend/openCV.py | 2 +-
 supervision/video/backend/pyAV.py   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 231cb7578e..2e3f68fc75 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -52,10 +52,10 @@ class BaseWriter(ABC):
     def __init__(
         self,
         filename: str,
+        backend: BaseBackend,
         fps: int,
         frame_size: tuple[int, int],
         codec: str | None = None,
-        backend: BaseBackend = None,
     ):
         pass
 
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index c3cb823e08..382f694031 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -148,10 +148,10 @@ class OpenCVWriter(BaseWriter):
     def __init__(
         self,
         filename: str,
+        backend: OpenCVBackend,
         fps: int,
         frame_size: tuple[int, int],
         codec: str = "mp4v",
-        backend: OpenCVBackend = None,
     ):
         """Initialize the video writer.
 
diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index cfd9ba3f75..81bb760e09 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -10,10 +10,10 @@ class pyAVWriter(BaseWriter):
     def __init__(
         self,
         filename: str,
+        backend: "pyAVBackend",
         fps: int,
         frame_size: tuple[int, int],
         codec: str = "h264",
-        backend: "pyAVBackend" = None,
     ):        
         try:
             self.container = av.open(filename, mode="w")

From 3ff86442e3893241236d584c073985757696ff76 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 19:14:58 -0400
Subject: [PATCH 067/128] ADD: Added backend utils and type safe support

---
 supervision/video/backend/__init__.py | 40 +++++++++++++++++++++++++++
 supervision/video/core.py             | 34 ++++++++++++++---------
 2 files changed, 61 insertions(+), 13 deletions(-)
 create mode 100644 supervision/video/backend/__init__.py

diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py
new file mode 100644
index 0000000000..e8e154e730
--- /dev/null
+++ b/supervision/video/backend/__init__.py
@@ -0,0 +1,40 @@
+from __future__ import annotations
+
+from typing import Literal, overload, TypeVar, Union
+
+from supervision.video.backend.base import BaseBackend, BaseWriter
+from supervision.video.backend.openCV import OpenCVBackend, OpenCVWriter
+from supervision.video.backend.pyAV import pyAVBackend, pyAVWriter
+
+BackendT = TypeVar('BackendT', bound=BaseBackend)
+BackendLiteral = Literal["opencv", "pyav"]
+BackendType = Union[OpenCVBackend, pyAVBackend]
+
+@overload
+def getBackend(backend: Literal["opencv"]) -> OpenCVBackend:
+    ...
+
+@overload
+def getBackend(backend: Literal["pyav"]) -> pyAVBackend:
+    ...
+
+def getBackend(backend: str) -> BaseBackend:
+    if backend == "opencv":
+        return OpenCVBackend()
+    elif backend == "pyav":
+        return pyAVBackend()
+    else:
+        raise ValueError(f"Unsupported backend: {backend}")
+
+__all__ = [
+    "BaseBackend",
+    "BaseWriter",
+    "OpenCVBackend",
+    "OpenCVWriter",
+    "pyAVBackend",
+    "pyAVWriter",
+    "getBackend",
+    "BackendT",
+    "BackendLiteral",
+    "BackendType"
+]
\ No newline at end of file
diff --git a/supervision/video/core.py b/supervision/video/core.py
index 31d73b1831..f2eb96ad41 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -1,34 +1,42 @@
 from __future__ import annotations
 
 from collections.abc import Callable
+from typing import overload
 
 import numpy as np
 import cv2
 from tqdm.auto import tqdm
 
-from supervision.video.backend.base import BaseBackend, BaseWriter
-from supervision.video.backend.openCV import OpenCVBackend
+from supervision.video.backend import (
+    BaseBackend,
+    BaseWriter,
+    getBackend,
+    BackendLiteral,
+)
 from supervision.video.utils import VideoInfo, SOURCE_TYPE
-from supervision.video.backend.pyAV import pyAVBackend
 
 
 class Video:
-    """High-level interface for video operations.
-
-    This class provides a convenient interface for video operations including
-    reading frames, saving processed videos, and video information access.
-    """
-
     info: VideoInfo
     source: str | int
     backend: BaseBackend
 
+    @overload
     def __init__(
-        self, source: str | int, info: VideoInfo | None = None, backend: str = "opencv"
-    ):
-        if backend == "opencv":
-            self.backend = pyAVBackend()
+        self, 
+        source: str | int, 
+        info: VideoInfo | None = None, 
+        backend: BackendLiteral = "opencv"
+    ) -> None:
+        ...
 
+    def __init__(
+        self, 
+        source: str | int, 
+        info: VideoInfo | None = None, 
+        backend: BackendLiteral = "opencv"
+    ) -> None:
+        self.backend = getBackend(backend)
         self.backend.open(source)
         self.info = self.backend.video_info
         self.source = source

From d006c478efc3a39868866632c9718f141c1bf8a4 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 20:53:04 -0400
Subject: [PATCH 068/128] UPDATE: Type safe syntax

---
 supervision/video/backend/__init__.py |  23 ++---
 supervision/video/backend/base.py     |   5 +-
 supervision/video/backend/openCV.py   |   2 +-
 supervision/video/backend/pyAV.py     | 134 +++++++++++++-------------
 supervision/video/core.py             |  20 +---
 5 files changed, 84 insertions(+), 100 deletions(-)

diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py
index e8e154e730..5e8ae0b61e 100644
--- a/supervision/video/backend/__init__.py
+++ b/supervision/video/backend/__init__.py
@@ -1,28 +1,23 @@
 from __future__ import annotations
 
-from typing import Literal, overload, TypeVar, Union
+from typing import Literal, Union
 
 from supervision.video.backend.base import BaseBackend, BaseWriter
 from supervision.video.backend.openCV import OpenCVBackend, OpenCVWriter
 from supervision.video.backend.pyAV import pyAVBackend, pyAVWriter
 
-BackendT = TypeVar('BackendT', bound=BaseBackend)
 BackendLiteral = Literal["opencv", "pyav"]
-BackendType = Union[OpenCVBackend, pyAVBackend]
+BackendTypes = Union[OpenCVBackend, pyAVBackend]
+WriterTypes = Union[OpenCVWriter, pyAVWriter]
 
-@overload
-def getBackend(backend: Literal["opencv"]) -> OpenCVBackend:
-    ...
-
-@overload
-def getBackend(backend: Literal["pyav"]) -> pyAVBackend:
-    ...
+_backends = {
+    "opencv": OpenCVBackend,
+    "pyav": pyAVBackend,
+}
 
 def getBackend(backend: str) -> BaseBackend:
-    if backend == "opencv":
-        return OpenCVBackend()
-    elif backend == "pyav":
-        return pyAVBackend()
+    if backend.lower() in _backends:
+        return _backends[backend.lower()]()
     else:
         raise ValueError(f"Unsupported backend: {backend}")
 
diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 2e3f68fc75..6a26429249 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -5,13 +5,14 @@
 import numpy as np
 
 from supervision.video.utils import VideoInfo
+from supervision.video.backend import BackendTypes, WriterTypes
 
 
 class BaseBackend(ABC):
     def __init__(self):
         self.cap = None
         self.video_info = None
-        self.writer = None
+        self.writer: WriterTypes = None
         self.path = None
 
     @abstractmethod
@@ -52,7 +53,7 @@ class BaseWriter(ABC):
     def __init__(
         self,
         filename: str,
-        backend: BaseBackend,
+        backend: BackendTypes,
         fps: int,
         frame_size: tuple[int, int],
         codec: str | None = None,
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index 382f694031..95dc4ccda8 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -3,7 +3,7 @@
 import cv2
 import numpy as np
 
-from supervision.video.backend.base import BaseBackend, BaseWriter
+from supervision.video.backend import BaseBackend, BaseWriter
 from supervision.video.utils import SOURCE_TYPE, VideoInfo
 
 
diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index 81bb760e09..7d4c25b3eb 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -2,76 +2,10 @@
 import numpy as np
 
 from fractions import Fraction
-from supervision.video.backend.base import BaseBackend, BaseWriter
+from supervision.video.backend import BaseBackend, BaseWriter
 from supervision.video.utils import VideoInfo, SOURCE_TYPE
 
 
-class pyAVWriter(BaseWriter):
-    def __init__(
-        self,
-        filename: str,
-        backend: "pyAVBackend",
-        fps: int,
-        frame_size: tuple[int, int],
-        codec: str = "h264",
-    ):        
-        try:
-            self.container = av.open(filename, mode="w")
-            self.backend = backend
-            
-            if codec is None:
-                codec = "h264"
-            self.stream = self.container.add_stream(codec, rate=fps)
-            self.stream.width = frame_size[0]
-            self.stream.height = frame_size[1]
-            self.stream.pix_fmt = "yuv420p"
-
-            # Set time_base explicitly for correct timing
-            self.stream.codec_context.time_base = Fraction(1, fps)
-
-            # Frame index for PTS
-            self.frame_idx = 0
-            
-            self.audio_stream_out = None
-            self.audio_packets = []
-            if backend.audio_stream and backend.audio_src_container:
-                audio_codec_name = backend.audio_stream.codec_context.name
-                audio_rate = backend.audio_stream.codec_context.rate  # Can be None for some codecs
-                self.audio_stream_out = self.container.add_stream(audio_codec_name, rate=audio_rate)
-                for packet in backend.audio_src_container.demux(backend.audio_stream):
-                    if packet.dts is not None:
-                        self.audio_packets.append(packet)
-
-        except Exception as e:
-            raise RuntimeError(f"Cannot open video writer for file: {filename}") from e
-
-    def write(self, frame: np.ndarray) -> None:
-        # Convert BGR (OpenCV) to RGB for PyAV
-        frame_rgb = frame[..., ::-1]
-        av_frame = av.VideoFrame.from_ndarray(frame_rgb, format="rgb24")
-
-        av_frame.pts = self.frame_idx
-        av_frame.time_base = self.stream.codec_context.time_base
-        self.frame_idx += 1
-
-        # Encode frame and mux packets immediately
-        packets = self.stream.encode(av_frame)
-        for packet in packets:
-            self.container.mux(packet)
-
-    def close(self) -> None:
-        # Flush encoder by calling encode() with no frame, mux all packets
-        packets = self.stream.encode()
-        for packet in packets:
-            self.container.mux(packet)
-
-        if self.audio_stream_out:
-            for packet in self.audio_packets:
-                packet.stream = self.audio_stream_out
-                self.container.mux(packet)
-
-        self.container.close()
-
 class pyAVBackend(BaseBackend):
     """
     PyAV implementation of the Backend interface.
@@ -256,3 +190,69 @@ def release(self) -> None:
             self.container = None
             self.stream = None
             self.frame_generator = None
+
+class pyAVWriter(BaseWriter):
+    def __init__(
+        self,
+        filename: str,
+        backend: pyAVBackend,
+        fps: int,
+        frame_size: tuple[int, int],
+        codec: str = "h264",
+    ):        
+        try:
+            self.container = av.open(filename, mode="w")
+            self.backend = backend
+            
+            if codec is None:
+                codec = "h264"
+            self.stream = self.container.add_stream(codec, rate=fps)
+            self.stream.width = frame_size[0]
+            self.stream.height = frame_size[1]
+            self.stream.pix_fmt = "yuv420p"
+
+            # Set time_base explicitly for correct timing
+            self.stream.codec_context.time_base = Fraction(1, fps)
+
+            # Frame index for PTS
+            self.frame_idx = 0
+            
+            self.audio_stream_out = None
+            self.audio_packets = []
+            if backend.audio_stream and backend.audio_src_container:
+                audio_codec_name = backend.audio_stream.codec_context.name
+                audio_rate = backend.audio_stream.codec_context.rate  # Can be None for some codecs
+                self.audio_stream_out = self.container.add_stream(audio_codec_name, rate=audio_rate)
+                for packet in backend.audio_src_container.demux(backend.audio_stream):
+                    if packet.dts is not None:
+                        self.audio_packets.append(packet)
+
+        except Exception as e:
+            raise RuntimeError(f"Cannot open video writer for file: {filename}") from e
+
+    def write(self, frame: np.ndarray) -> None:
+        # Convert BGR (OpenCV) to RGB for PyAV
+        frame_rgb = frame[..., ::-1]
+        av_frame = av.VideoFrame.from_ndarray(frame_rgb, format="rgb24")
+
+        av_frame.pts = self.frame_idx
+        av_frame.time_base = self.stream.codec_context.time_base
+        self.frame_idx += 1
+
+        # Encode frame and mux packets immediately
+        packets = self.stream.encode(av_frame)
+        for packet in packets:
+            self.container.mux(packet)
+
+    def close(self) -> None:
+        # Flush encoder by calling encode() with no frame, mux all packets
+        packets = self.stream.encode()
+        for packet in packets:
+            self.container.mux(packet)
+
+        if self.audio_stream_out:
+            for packet in self.audio_packets:
+                packet.stream = self.audio_stream_out
+                self.container.mux(packet)
+
+        self.container.close()
diff --git a/supervision/video/core.py b/supervision/video/core.py
index f2eb96ad41..4587f83fe0 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -1,17 +1,15 @@
 from __future__ import annotations
 
 from collections.abc import Callable
-from typing import overload
-
 import numpy as np
 import cv2
 from tqdm.auto import tqdm
 
 from supervision.video.backend import (
-    BaseBackend,
+    BackendTypes,
+    BackendLiteral,
     BaseWriter,
     getBackend,
-    BackendLiteral,
 )
 from supervision.video.utils import VideoInfo, SOURCE_TYPE
 
@@ -19,26 +17,16 @@
 class Video:
     info: VideoInfo
     source: str | int
-    backend: BaseBackend
-
-    @overload
-    def __init__(
-        self, 
-        source: str | int, 
-        info: VideoInfo | None = None, 
-        backend: BackendLiteral = "opencv"
-    ) -> None:
-        ...
+    backend: BackendTypes
 
     def __init__(
         self, 
         source: str | int, 
-        info: VideoInfo | None = None, 
         backend: BackendLiteral = "opencv"
     ) -> None:
         self.backend = getBackend(backend)
         self.backend.open(source)
-        self.info = self.backend.video_info
+        self.info = self.backend.info()
         self.source = source
 
     def __iter__(self):

From 8894819819243f40301ca65f1b07b3a89ba59bde Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 21:12:51 -0400
Subject: [PATCH 069/128] FORMAT: Changed backend param

---
 supervision/video/backend/__init__.py | 1 -
 supervision/video/backend/base.py     | 5 ++---
 supervision/video/backend/openCV.py   | 3 ++-
 supervision/video/backend/pyAV.py     | 2 +-
 supervision/video/core.py             | 2 +-
 5 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py
index 5e8ae0b61e..805a050968 100644
--- a/supervision/video/backend/__init__.py
+++ b/supervision/video/backend/__init__.py
@@ -29,7 +29,6 @@ def getBackend(backend: str) -> BaseBackend:
     "pyAVBackend",
     "pyAVWriter",
     "getBackend",
-    "BackendT",
     "BackendLiteral",
     "BackendType"
 ]
\ No newline at end of file
diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 6a26429249..6a9bf97cb2 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -5,14 +5,13 @@
 import numpy as np
 
 from supervision.video.utils import VideoInfo
-from supervision.video.backend import BackendTypes, WriterTypes
 
 
 class BaseBackend(ABC):
     def __init__(self):
         self.cap = None
         self.video_info = None
-        self.writer: WriterTypes = None
+        self.writer = None
         self.path = None
 
     @abstractmethod
@@ -53,10 +52,10 @@ class BaseWriter(ABC):
     def __init__(
         self,
         filename: str,
-        backend: BackendTypes,
         fps: int,
         frame_size: tuple[int, int],
         codec: str | None = None,
+        backend: BaseBackend | None = None,
     ):
         pass
 
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index 95dc4ccda8..68a41e0d72 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -148,10 +148,10 @@ class OpenCVWriter(BaseWriter):
     def __init__(
         self,
         filename: str,
-        backend: OpenCVBackend,
         fps: int,
         frame_size: tuple[int, int],
         codec: str = "mp4v",
+        backend: OpenCVBackend | None = None,
     ):
         """Initialize the video writer.
 
@@ -160,6 +160,7 @@ def __init__(
             fps (int): Frames per second for the output video.
             frame_size (tuple[int, int]): Width and height of the output video frames.
             codec (str, optional): FourCC code for the video codec. Defaults to "mp4v".
+            backend (OpenCVBackend | None, optional): Backend instance. Defaults to None.
 
         Raises:
             RuntimeError: If the video writer cannot be initialized.
diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index 7d4c25b3eb..4ac9830990 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -195,10 +195,10 @@ class pyAVWriter(BaseWriter):
     def __init__(
         self,
         filename: str,
-        backend: pyAVBackend,
         fps: int,
         frame_size: tuple[int, int],
         codec: str = "h264",
+        backend: pyAVBackend | None = None,
     ):        
         try:
             self.container = av.open(filename, mode="w")
diff --git a/supervision/video/core.py b/supervision/video/core.py
index 4587f83fe0..cfa57209e1 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -51,7 +51,7 @@ def sink(
             Writer: A video writer object for writing frames.
         """
         return self.backend.writer(
-             target_path, info.fps, info.resolution_wh, codec
+             target_path, info.fps, info.resolution_wh, codec, self.backend
         )
 
     def frames(

From 55b5ea0f361fa60566460251d8e906cabf44a356 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 21:16:19 -0400
Subject: [PATCH 070/128] UPDATE: Added ctx for pyAV

---
 supervision/video/backend/base.py |  8 ++++++++
 supervision/video/backend/pyAV.py | 15 +++++++++------
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 6a9bf97cb2..9a552053f0 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -58,6 +58,14 @@ def __init__(
         backend: BaseBackend | None = None,
     ):
         pass
+    
+    @abstractmethod
+    def __enter__(self):
+        pass
+
+    @abstractmethod
+    def __exit__(self, exc_type, exc_value, traceback):
+        pass
 
     @abstractmethod
     def write(self, frame: np.ndarray) -> None:
diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index 4ac9830990..629095d7f0 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -19,7 +19,7 @@ def __init__(self):
         self.writer = pyAVWriter
         self.frame_generator = None
         self.video_info = None
-        self.current_frame_idx = 0  # Track current frame number in decoding
+        self.current_frame_idx = 0  
     
     def open(self, path: str) -> None:
         """Open and initialize a video source.
@@ -74,7 +74,7 @@ def _set_video_info(self) -> VideoInfo:
         height = self.stream.height
         fps = float(self.stream.average_rate or self.stream.guessed_rate)
         if fps <= 0:
-            fps = 30  # Default to 30fps if invalid
+            fps = 30 
 
         total_frames = self.stream.frames
         if total_frames == 0:
@@ -221,7 +221,7 @@ def __init__(
             self.audio_packets = []
             if backend.audio_stream and backend.audio_src_container:
                 audio_codec_name = backend.audio_stream.codec_context.name
-                audio_rate = backend.audio_stream.codec_context.rate  # Can be None for some codecs
+                audio_rate = backend.audio_stream.codec_context.rate 
                 self.audio_stream_out = self.container.add_stream(audio_codec_name, rate=audio_rate)
                 for packet in backend.audio_src_container.demux(backend.audio_stream):
                     if packet.dts is not None:
@@ -229,9 +229,14 @@ def __init__(
 
         except Exception as e:
             raise RuntimeError(f"Cannot open video writer for file: {filename}") from e
+        
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
 
     def write(self, frame: np.ndarray) -> None:
-        # Convert BGR (OpenCV) to RGB for PyAV
         frame_rgb = frame[..., ::-1]
         av_frame = av.VideoFrame.from_ndarray(frame_rgb, format="rgb24")
 
@@ -239,13 +244,11 @@ def write(self, frame: np.ndarray) -> None:
         av_frame.time_base = self.stream.codec_context.time_base
         self.frame_idx += 1
 
-        # Encode frame and mux packets immediately
         packets = self.stream.encode(av_frame)
         for packet in packets:
             self.container.mux(packet)
 
     def close(self) -> None:
-        # Flush encoder by calling encode() with no frame, mux all packets
         packets = self.stream.encode()
         for packet in packets:
             self.container.mux(packet)

From 4b1c9bcc1a00c399e97b9a55c44f35345176e866 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 21:26:27 -0400
Subject: [PATCH 071/128] UPDATE: Updated docstrings

---
 supervision/video/backend/base.py   |   1 -
 supervision/video/backend/openCV.py | 101 +++++++++++++-----------
 supervision/video/backend/pyAV.py   | 117 ++++++++++++++++++++--------
 supervision/video/core.py           |  86 +++++++++++++-------
 supervision/video/utils.py          |  45 ++++++-----
 5 files changed, 227 insertions(+), 123 deletions(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 9a552053f0..aac1a83b7a 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -46,7 +46,6 @@ def seek(self, frame_idx: int) -> None:
     def release(self) -> None:
         pass
 
-
 class BaseWriter(ABC):
     @abstractmethod
     def __init__(
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index 68a41e0d72..fd7a026c27 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -9,34 +9,37 @@
 
 class OpenCVBackend(BaseBackend):
     """
-    OpenCV implementation of the Backend interface.
-    Handles video capture, frame reading, seeking, and writing operations using OpenCV.
+    OpenCV-based implementation of the video backend interface.
+
+    Provides methods for opening video sources, reading frames, seeking,
+    grabbing, and retrieving metadata using OpenCV.
     """
 
     def __init__(self):
-        """Initialize the OpenCV backend with empty video capture and writer objects."""
+        """Initialize with no active capture, writer, or path."""
         super().__init__()
         self.cap = None
         self.video_info = None
         self.writer = OpenCVWriter
         self.path = None
 
-    def open(self, path: str) -> None:
+    def open(self, path: str | int) -> None:
         """
-        Open a video source and initialize the video capture object.
+        Open a video source and initialize capture.
 
         Args:
-            path (str): Path to the video file, RTSP URL, or camera index.
+            path (str | int): Path to a video file, RTSP URL, or webcam index.
 
         Raises:
-            RuntimeError: If unable to open the video source.
-            ValueError: If the source type is not supported.
+            RuntimeError: If the source cannot be opened.
+            ValueError: If the source type is unsupported.
         """
         self.cap = cv2.VideoCapture(path)
         self.path = path
 
         if not self.cap.isOpened():
             raise RuntimeError(f"Cannot open video source: {path}")
+
         self.video_info = self._set_video_info()
 
         if isinstance(path, int):
@@ -51,98 +54,105 @@ def open(self, path: str) -> None:
             raise ValueError("Unsupported source type")
 
     def isOpened(self) -> bool:
-        """Check if the video source is opened successfully.
+        """
+        Check if the video source is currently open.
 
         Returns:
-            bool: True if the video source is opened, False otherwise.
+            bool: True if the source is open, False otherwise.
         """
         return self.cap.isOpened()
 
     def _set_video_info(self) -> VideoInfo:
-        """Set up video information from the opened video source.
+        """
+        Extract and store video metadata from the open capture.
 
         Returns:
-            VideoInfo: Object containing video properties like width, height, fps, etc.
+            VideoInfo: Video properties such as width, height, FPS, and frame count.
 
         Raises:
-            RuntimeError: If the video source is not opened yet.
+            RuntimeError: If no source is open.
         """
         if not self.isOpened():
             raise RuntimeError("Video not opened yet.")
+
         width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
         fps = round(self.cap.get(cv2.CAP_PROP_FPS))
         total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
+
         return VideoInfo(width, height, fps, total_frames)
 
     def info(self) -> VideoInfo:
-        """Get video information.
+        """
+        Get the stored video metadata.
 
         Returns:
-            VideoInfo: Object containing video properties.
+            VideoInfo: Metadata for the open source.
 
         Raises:
-            RuntimeError: If the video source is not opened yet.
+            RuntimeError: If no source is open.
         """
         if not self.isOpened():
             raise RuntimeError("Video not opened yet.")
         return self.video_info
 
     def read(self) -> tuple[bool, np.ndarray]:
-        """Read a frame from the video source.
+        """
+        Read the next frame from the source.
 
         Returns:
-            tuple[bool, np.ndarray]: A tuple containing:
-                - bool: True if frame was successfully read
-                - np.ndarray: The video frame in BGR format
+            tuple[bool, np.ndarray]:
+                - bool: True if a frame was read successfully.
+                - np.ndarray: The frame in BGR format.
 
         Raises:
-            RuntimeError: If the video source is not opened yet.
+            RuntimeError: If no source is open.
         """
         if self.cap is None:
             raise RuntimeError("Video not opened yet.")
-        ret, frame = self.cap.read()
-        return ret, frame
+        return self.cap.read()
 
     def grab(self) -> bool:
-        """Grab a frame from video source without decoding.
+        """
+        Grab the next frame without decoding.
 
         Returns:
-            bool: True if frame was successfully grabbed.
+            bool: True if the frame pointer advanced successfully.
 
         Raises:
-            RuntimeError: If the video source is not opened yet.
+            RuntimeError: If no source is open.
         """
         if self.cap is None:
             raise RuntimeError("Video not opened yet.")
         return self.cap.grab()
 
     def seek(self, frame_idx: int) -> None:
-        """Seek to a specific frame in the video.
+        """
+        Jump to a specific frame.
 
         Args:
-            frame_idx (int): Index of the frame to seek to (0-based).
+            frame_idx (int): Zero-based frame index to seek to.
 
         Raises:
-            RuntimeError: If the video source is not opened yet.
+            RuntimeError: If no source is open.
         """
         if self.cap is None:
             raise RuntimeError("Video not opened yet.")
         self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
 
     def release(self) -> None:
-        """Release the video capture resources."""
+        """Release capture resources."""
         if self.cap is not None and self.cap.isOpened():
             self.cap.release()
             self.cap = None
 
 
 class OpenCVWriter(BaseWriter):
-    """A class to handle video writing operations using OpenCV's VideoWriter.
+    """
+    Video writer implementation using OpenCV's VideoWriter.
 
-    This class provides an interface to write frames to a video file using OpenCV,
-    with support for different codecs and automatic fallback to mp4v if the specified
-    codec fails.
+    Supports configurable codecs, frame sizes, and FPS, with a fallback
+    to "mp4v" if the specified codec fails.
     """
 
     def __init__(
@@ -153,17 +163,18 @@ def __init__(
         codec: str = "mp4v",
         backend: OpenCVBackend | None = None,
     ):
-        """Initialize the video writer.
+        """
+        Initialize the writer.
 
         Args:
-            filename (str): Path to the output video file.
-            fps (int): Frames per second for the output video.
-            frame_size (tuple[int, int]): Width and height of the output video frames.
-            codec (str, optional): FourCC code for the video codec. Defaults to "mp4v".
+            filename (str): Output video file path.
+            fps (int): Output frames per second.
+            frame_size (tuple[int, int]): Frame dimensions (width, height).
+            codec (str, optional): FourCC codec code. Defaults to "mp4v".
             backend (OpenCVBackend | None, optional): Backend instance. Defaults to None.
 
         Raises:
-            RuntimeError: If the video writer cannot be initialized.
+            RuntimeError: If the writer cannot be opened.
         """
         self.backend = backend
         try:
@@ -172,6 +183,7 @@ def __init__(
         except Exception:
             fourcc_int = cv2.VideoWriter_fourcc(*"mp4v")
             self.writer = cv2.VideoWriter(filename, fourcc_int, fps, frame_size)
+
         if not self.writer.isOpened():
             raise RuntimeError(f"Cannot open video writer for file: {filename}")
 
@@ -182,13 +194,14 @@ def __exit__(self, exc_type, exc_value, traceback):
         self.close()
 
     def write(self, frame: np.ndarray) -> None:
-        """Write a frame to the video file.
+        """
+        Write a frame to the output.
 
         Args:
-            frame (np.ndarray): The frame to write, in BGR format.
+            frame (np.ndarray): Frame in BGR format.
         """
         self.writer.write(frame)
 
     def close(self) -> None:
-        """Release the video writer resources."""
-        self.writer.release()
+        """Release writer resources."""
+        self.writer.release()
\ No newline at end of file
diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index 629095d7f0..e3d80373ed 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -8,8 +8,11 @@
 
 class pyAVBackend(BaseBackend):
     """
-    PyAV implementation of the Backend interface.
-    Handles video capture, frame reading, seeking, and writing operations using PyAV.
+    PyAV-based implementation of the `BaseBackend` interface.
+
+    This backend handles video capture, frame reading, seeking, and writing
+    operations using the PyAV library. Supports local video files, webcams,
+    and RTSP streams.
     """
 
     def __init__(self):
@@ -20,19 +23,20 @@ def __init__(self):
         self.frame_generator = None
         self.video_info = None
         self.current_frame_idx = 0  
-    
+
     def open(self, path: str) -> None:
-        """Open and initialize a video source.
+        """
+        Open and initialize a video source.
 
-        Opens a video file, RTSP stream, or webcam and initializes all necessary
-        components for video processing.
+        This method opens a video file, RTSP stream, or webcam, and sets up
+        the necessary components for decoding and reading frames.
 
         Args:
-            path (str): Path to video file, RTSP URL, or camera index.
+            path (str | int): Path to the video file, RTSP URL, or webcam index.
 
         Raises:
-            RuntimeError: If unable to open the video source.
-            ValueError: If the source type is not supported.
+            RuntimeError: If the video source cannot be opened.
+            ValueError: If the source type is unsupported.
         """
         try:
             self.container = av.open(path)
@@ -45,7 +49,7 @@ def open(self, path: str) -> None:
             self.video_info = self._set_video_info()
             self.current_frame_idx = 0
 
-             # If audio exists
+            # If audio exists
             if len(self.container.streams.audio) > 0:
                 self.audio_stream = self.container.streams.audio[0]
 
@@ -62,11 +66,21 @@ def open(self, path: str) -> None:
 
         except Exception as e:
             raise RuntimeError(f"Cannot open video source: {path}") from e
-    
+
     def isOpened(self) -> bool:
+        """Check if the video source has been successfully opened."""
         return self.container is not None and self.stream is not None
 
     def _set_video_info(self) -> VideoInfo:
+        """
+        Extract video information from the opened source.
+
+        Returns:
+            VideoInfo: Object containing width, height, fps, and frame count.
+
+        Raises:
+            RuntimeError: If the video source is not opened.
+        """
         if not self.isOpened():
             raise RuntimeError("Video not opened yet.")
 
@@ -74,7 +88,7 @@ def _set_video_info(self) -> VideoInfo:
         height = self.stream.height
         fps = float(self.stream.average_rate or self.stream.guessed_rate)
         if fps <= 0:
-            fps = 30 
+            fps = 30
 
         total_frames = self.stream.frames
         if total_frames == 0:
@@ -83,17 +97,27 @@ def _set_video_info(self) -> VideoInfo:
         return VideoInfo(width, height, round(fps), total_frames)
 
     def info(self) -> VideoInfo:
+        """
+        Retrieve video information.
+
+        Returns:
+            VideoInfo: Video properties for the opened source.
+
+        Raises:
+            RuntimeError: If the video source is not opened.
+        """
         if not self.isOpened():
             raise RuntimeError("Video not opened yet.")
         return self.video_info
 
     def read(self) -> tuple[bool, np.ndarray]:
-        """Read the next frame from the video stream.
+        """
+        Read and decode the next frame from the video source.
 
         Returns:
-            tuple[bool, np.ndarray]: A tuple containing:
-                - bool: True if frame was successfully read
-                - np.ndarray: The video frame in BGR format (H, W, 3)
+            tuple[bool, np.ndarray]:  
+                - `bool`: True if a frame was read successfully, False if end of stream.  
+                - `np.ndarray`: Frame data in BGR format (H, W, 3). Empty array if unsuccessful.
 
         Raises:
             RuntimeError: If the video source is not opened.
@@ -110,13 +134,13 @@ def read(self) -> tuple[bool, np.ndarray]:
             return False, np.array([])
 
     def grab(self) -> bool:
-        """Grab the next frame packet without decoding.
+        """
+        Grab the next frame packet without decoding it.
 
-        A lightweight operation that skips frame decoding, useful for
-        quick frame navigation. Returns success status of the grab operation.
+        Useful for skipping frames quickly without the overhead of decoding.
 
         Returns:
-            bool: True if a frame was successfully grabbed, False otherwise.
+            bool: True if a frame packet was grabbed successfully, False otherwise.
 
         Raises:
             RuntimeError: If the video source is not opened.
@@ -133,14 +157,14 @@ def grab(self) -> bool:
             return False
 
     def seek(self, frame_idx: int) -> None:
-        """Seek to a specific frame in the video.
+        """
+        Seek to a specific frame index in the video.
 
-        Performs frame-accurate seeking by navigating to the nearest keyframe and
-        decoding forward to the exact target frame. The next read() call will
-        return the target frame.
+        This uses keyframe-based seeking, then decodes forward to the exact
+        requested frame.
 
         Args:
-            frame_idx (int): Target frame index (0-based) to seek to.
+            frame_idx (int): Zero-based index of the target frame.
 
         Raises:
             RuntimeError: If the video source is not opened.
@@ -180,10 +204,8 @@ def _prepend_frame(first_frame, gen):
                 break
 
     def release(self) -> None:
-        """Release all resources associated with the video stream.
-
-        Closes the video container and resets all internal state variables
-        to ensure proper cleanup of resources.
+        """
+        Release the video source and free all associated resources.
         """
         if self.container:
             self.container.close()
@@ -191,7 +213,14 @@ def release(self) -> None:
             self.stream = None
             self.frame_generator = None
 
+
 class pyAVWriter(BaseWriter):
+    """
+    PyAV-based video writer.
+
+    Writes frames to a video file with optional audio from a backend source.
+    """
+
     def __init__(
         self,
         filename: str,
@@ -199,7 +228,20 @@ def __init__(
         frame_size: tuple[int, int],
         codec: str = "h264",
         backend: pyAVBackend | None = None,
-    ):        
+    ):
+        """
+        Initialize a video writer.
+
+        Args:
+            filename (str): Output video file path.
+            fps (int): Frames per second for the output video.
+            frame_size (tuple[int, int]): Frame dimensions as (width, height).
+            codec (str, optional): Video codec (default: "h264").
+            backend (pyAVBackend, optional): Backend providing audio stream.
+
+        Raises:
+            RuntimeError: If the output file cannot be created.
+        """
         try:
             self.container = av.open(filename, mode="w")
             self.backend = backend
@@ -219,9 +261,9 @@ def __init__(
             
             self.audio_stream_out = None
             self.audio_packets = []
-            if backend.audio_stream and backend.audio_src_container:
+            if backend and backend.audio_stream and backend.audio_src_container:
                 audio_codec_name = backend.audio_stream.codec_context.name
-                audio_rate = backend.audio_stream.codec_context.rate 
+                audio_rate = backend.audio_stream.codec_context.rate
                 self.audio_stream_out = self.container.add_stream(audio_codec_name, rate=audio_rate)
                 for packet in backend.audio_src_container.demux(backend.audio_stream):
                     if packet.dts is not None:
@@ -229,7 +271,7 @@ def __init__(
 
         except Exception as e:
             raise RuntimeError(f"Cannot open video writer for file: {filename}") from e
-        
+
     def __enter__(self):
         return self
 
@@ -237,6 +279,12 @@ def __exit__(self, exc_type, exc_value, traceback):
         self.close()
 
     def write(self, frame: np.ndarray) -> None:
+        """
+        Write a single frame to the output video.
+
+        Args:
+            frame (np.ndarray): Frame in BGR format (H, W, 3).
+        """
         frame_rgb = frame[..., ::-1]
         av_frame = av.VideoFrame.from_ndarray(frame_rgb, format="rgb24")
 
@@ -249,6 +297,9 @@ def write(self, frame: np.ndarray) -> None:
             self.container.mux(packet)
 
     def close(self) -> None:
+        """
+        Finalize the video file and close the writer.
+        """
         packets = self.stream.encode()
         for packet in packets:
             self.container.mux(packet)
diff --git a/supervision/video/core.py b/supervision/video/core.py
index cfa57209e1..a6890f7f18 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -15,43 +15,63 @@
 
 
 class Video:
+    """
+    A high-level interface for reading, processing, and writing video files or streams.
+
+    Attributes:
+        info (VideoInfo): Metadata about the video, such as resolution, FPS, and frame count.
+        source (str | int): Path to the video file or index of the camera device.
+        backend (BackendTypes): Video backend used for I/O operations.
+    """
+
     info: VideoInfo
     source: str | int
     backend: BackendTypes
 
     def __init__(
-        self, 
-        source: str | int, 
+        self,
+        source: str | int,
         backend: BackendLiteral = "opencv"
     ) -> None:
+        """
+        Initialize the Video object.
+
+        Args:
+            source (str | int): Path to a video file or index of a camera device.
+            backend (BackendLiteral, optional): Backend type for video I/O.
+                Defaults to "opencv".
+        """
         self.backend = getBackend(backend)
         self.backend.open(source)
         self.info = self.backend.info()
         self.source = source
 
     def __iter__(self):
-        """Make the Video class iterable over frames.
+        """
+        Make the Video object iterable over frames.
 
-        Returns:
-            Generator: A generator yielding video frames.
+        Yields:
+            np.ndarray: The next frame in the video.
         """
         return self.backend.frames()
 
     def sink(
         self, target_path: str, info: VideoInfo, codec: str | None = None
     ) -> BaseWriter:
-        """Create a video writer for saving frames.
+        """
+        Create a video writer for saving frames to a file.
 
         Args:
-            target_path (str): Path where the video will be saved.
-            info (VideoInfo): Video information containing resolution and FPS.
-            codec (str, optional): FourCC code for video codec. Defaults to "None".
+            target_path (str): Output file path for the video.
+            info (VideoInfo): Video information including resolution and FPS.
+            codec (str, optional): FourCC video codec code.
+                If None, the backend's default codec is used.
 
         Returns:
-            Writer: A video writer object for writing frames.
+            BaseWriter: Video writer instance for writing frames.
         """
         return self.backend.writer(
-             target_path, info.fps, info.resolution_wh, codec, self.backend
+            target_path, info.fps, info.resolution_wh, codec, self.backend
         )
 
     def frames(
@@ -61,17 +81,20 @@ def frames(
         end: int | None = None,
         resolution_wh: tuple[int, int] | None = None,
     ):
-        """Generate frames from the video.
+        """
+        Generate frames from the video with optional skipping, cropping, and resizing.
 
         Args:
-            stride (int, optional): Number of frames to skip. Defaults to 1.
-            start (int, optional): Starting frame index. Defaults to 0.
-            end (int | None, optional): Ending frame index. Defaults to None.
+            stride (int, optional): Number of frames to skip between each yield.
+                Defaults to 1 (no skipping).
+            start (int, optional): Index of the first frame to read. Defaults to 0.
+            end (int | None, optional): Index after the last frame to read.
+                If None, reads until the end of the video.
             resolution_wh (tuple[int, int] | None, optional): Target resolution
-                (width, height). If provided, frames will be resized. Defaults to None.
+                (width, height) for resizing frames. If None, keeps original size.
 
-        Returns:
-            Generator: A generator yielding video frames.
+        Yields:
+            np.ndarray: The next frame in the video.
         """
         if self.backend.cap is None:
             raise RuntimeError("Video not opened yet.")
@@ -114,17 +137,29 @@ def save(
         show_progress: bool = False,
         codec: str | None = None,
     ):
-        """Save processed video frames to a file.
+        """
+        Process and save video frames to a file.
 
         Args:
-            target_path (str): Path where the processed video will be saved.
-            callback (Callable[[np.ndarray, int], np.ndarray]): Function that processes
-                each frame. Takes frame and index as input, returns processed frame.
-            fps (int | None, optional): Output video FPS.
-            progress_message (str, optional): Message to show in progress bar.
+            target_path (str): Output file path for the processed video.
+            callback (Callable[[np.ndarray, int], np.ndarray]): Function applied to each frame.
+                Takes the frame (np.ndarray) and frame index (int) as input,
+                returns the processed frame (np.ndarray).
+            fps (int | None, optional): Frames per second of the output video.
+                If None, uses the original FPS.
+            progress_message (str, optional): Message displayed in the progress bar.
                 Defaults to "Processing video".
-            show_progress (bool, optional): Whether to show progress bar.
+            show_progress (bool, optional): If True, displays a tqdm progress bar.
                 Defaults to False.
+            codec (str | None, optional): FourCC video codec code.
+                If None, uses the backend's default codec.
+
+        Raises:
+            RuntimeError: If the video has not been opened.
+            ValueError: If the video source is not a file.
+
+        Returns:
+            None
         """
         if self.backend.cap is None:
             raise RuntimeError("Video not opened yet.")
@@ -139,7 +174,6 @@ def save(
             target_path, fps, self.backend.video_info.resolution_wh, codec, self.backend
         )
         total_frames = self.backend.video_info.total_frames
-        print(self.backend.video_info)
         frames_generator = self.frames()
         for index, frame in enumerate(
             tqdm(
diff --git a/supervision/video/utils.py b/supervision/video/utils.py
index 629ce7b262..fa1d7854a2 100644
--- a/supervision/video/utils.py
+++ b/supervision/video/utils.py
@@ -7,6 +7,14 @@
 
 
 class SOURCE_TYPE(Enum):
+    """
+    Enumeration of supported video source types.
+
+    Attributes:
+        VIDEO_FILE: A standard video file on disk.
+        WEBCAM: A webcam or other direct camera device.
+        RTSP: A network RTSP video stream.
+    """
     VIDEO_FILE = "VIDEO_FILE"
     WEBCAM = "WEBCAM"
     RTSP = "RTSP"
@@ -15,28 +23,25 @@ class SOURCE_TYPE(Enum):
 @dataclass
 class VideoInfo:
     """
-    A class to store video information, including width, height, fps and
-        total number of frames.
+    Stores metadata about a video, such as dimensions, frame rate, and source type.
 
     Attributes:
-        width (int): width of the video in pixels
-        height (int): height of the video in pixels
-        fps (int): frames per second of the video
-        total_frames (Optional[int]): total number of frames in the video,
-            default is None
-        source_type (Optional[SOURCE_TYPE]): source type of the video,
-            default is None
+        width (int): Width of the video in pixels.
+        height (int): Height of the video in pixels.
+        fps (int): Frames per second of the video.
+        total_frames (int | None): Total number of frames, or None if unknown.
+        source_type (SOURCE_TYPE | None): The source type of the video (file, webcam, RTSP), or None.
 
     Examples:
         ```python
         import supervision as sv
 
-        video_info = sv.VideoInfo.from_video_path(video_path=<SOURCE_VIDEO_FILE>)
+        video_info = sv.VideoInfo.from_video_path("video.mp4")
 
-        video_info
+        print(video_info)
         # VideoInfo(width=3840, height=2160, fps=25, total_frames=538)
 
-        video_info.resolution_wh
+        print(video_info.resolution_wh)
         # (3840, 2160)
         ```
     """
@@ -49,16 +54,17 @@ class VideoInfo:
 
     @classmethod
     def from_video_path(cls, video_path: str) -> VideoInfo:
-        """Create VideoInfo from a video file path.
+        """
+        Create a VideoInfo instance from a video file.
 
         Args:
             video_path (str): Path to the video file.
 
         Returns:
-            VideoInfo: Video info containing width, height, fps, and total frames.
+            VideoInfo: Metadata including width, height, FPS, and total frames.
 
         Raises:
-            ValueError: If video cannot be opened or has invalid properties.
+            ValueError: If the video cannot be opened or has invalid properties.
         """
         video = cv2.VideoCapture(video_path)
         if not video.isOpened():
@@ -77,7 +83,7 @@ def from_video_path(cls, video_path: str) -> VideoInfo:
 
             total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
             if total_frames < 0:
-                total_frames = None  # Some video formats may not report frame count
+                total_frames = None  # Some formats may not report frame count
         finally:
             video.release()
 
@@ -85,9 +91,10 @@ def from_video_path(cls, video_path: str) -> VideoInfo:
 
     @property
     def resolution_wh(self) -> tuple[int, int]:
-        """Get the video resolution as (width, height).
+        """
+        Get the video resolution as a (width, height) tuple.
 
         Returns:
-            Tuple[int, int]: Video dimensions as (width, height).
+            tuple[int, int]: The video dimensions in pixels.
         """
-        return self.width, self.height
+        return self.width, self.height
\ No newline at end of file

From 706e85a191bb947923765834b61482fa1f7f8a1f Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 21:57:20 -0400
Subject: [PATCH 072/128] UPDATE: Negate audio stream

---
 supervision/video/backend/pyAV.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index e3d80373ed..11904cd815 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -52,6 +52,8 @@ def open(self, path: str) -> None:
             # If audio exists
             if len(self.container.streams.audio) > 0:
                 self.audio_stream = self.container.streams.audio[0]
+            else:
+                self.audio_stream = None
 
             if isinstance(path, int):
                 self.video_info.source_type = SOURCE_TYPE.WEBCAM
@@ -254,6 +256,7 @@ def __init__(
             self.stream.pix_fmt = "yuv420p"
 
             # Set time_base explicitly for correct timing
+            print(fps)
             self.stream.codec_context.time_base = Fraction(1, fps)
 
             # Frame index for PTS
@@ -310,3 +313,4 @@ def close(self) -> None:
                 self.container.mux(packet)
 
         self.container.close()
+

From 8eb0774e355b7f5f8c17ff0d899825b9a1ecb7fd Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 10 Aug 2025 01:57:37 +0000
Subject: [PATCH 073/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/backend/__init__.py | 10 ++++++----
 supervision/video/backend/base.py     |  3 ++-
 supervision/video/backend/openCV.py   |  2 +-
 supervision/video/backend/pyAV.py     | 28 ++++++++++++++++-----------
 supervision/video/core.py             | 17 ++++++++--------
 supervision/video/utils.py            |  3 ++-
 6 files changed, 36 insertions(+), 27 deletions(-)

diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py
index 805a050968..dea20aece2 100644
--- a/supervision/video/backend/__init__.py
+++ b/supervision/video/backend/__init__.py
@@ -15,20 +15,22 @@
     "pyav": pyAVBackend,
 }
 
+
 def getBackend(backend: str) -> BaseBackend:
     if backend.lower() in _backends:
         return _backends[backend.lower()]()
     else:
         raise ValueError(f"Unsupported backend: {backend}")
 
+
 __all__ = [
+    "BackendLiteral",
+    "BackendType",
     "BaseBackend",
     "BaseWriter",
     "OpenCVBackend",
     "OpenCVWriter",
+    "getBackend",
     "pyAVBackend",
     "pyAVWriter",
-    "getBackend",
-    "BackendLiteral",
-    "BackendType"
-]
\ No newline at end of file
+]
diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index aac1a83b7a..4d7e3a12fd 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -46,6 +46,7 @@ def seek(self, frame_idx: int) -> None:
     def release(self) -> None:
         pass
 
+
 class BaseWriter(ABC):
     @abstractmethod
     def __init__(
@@ -57,7 +58,7 @@ def __init__(
         backend: BaseBackend | None = None,
     ):
         pass
-    
+
     @abstractmethod
     def __enter__(self):
         pass
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index fd7a026c27..894745dbdf 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -204,4 +204,4 @@ def write(self, frame: np.ndarray) -> None:
 
     def close(self) -> None:
         """Release writer resources."""
-        self.writer.release()
\ No newline at end of file
+        self.writer.release()
diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index 11904cd815..fca0782d18 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -1,9 +1,10 @@
+from fractions import Fraction
+
 import av
 import numpy as np
 
-from fractions import Fraction
 from supervision.video.backend import BaseBackend, BaseWriter
-from supervision.video.utils import VideoInfo, SOURCE_TYPE
+from supervision.video.utils import SOURCE_TYPE, VideoInfo
 
 
 class pyAVBackend(BaseBackend):
@@ -22,7 +23,7 @@ def __init__(self):
         self.writer = pyAVWriter
         self.frame_generator = None
         self.video_info = None
-        self.current_frame_idx = 0  
+        self.current_frame_idx = 0
 
     def open(self, path: str) -> None:
         """
@@ -117,8 +118,8 @@ def read(self) -> tuple[bool, np.ndarray]:
         Read and decode the next frame from the video source.
 
         Returns:
-            tuple[bool, np.ndarray]:  
-                - `bool`: True if a frame was read successfully, False if end of stream.  
+            tuple[bool, np.ndarray]:
+                - `bool`: True if a frame was read successfully, False if end of stream.
                 - `np.ndarray`: Frame data in BGR format (H, W, 3). Empty array if unsuccessful.
 
         Raises:
@@ -152,7 +153,7 @@ def grab(self) -> bool:
 
         try:
             for packet in self.container.demux(video=0):
-                if packet.stream.type == 'video':
+                if packet.stream.type == "video":
                     return True
             return False
         except (StopIteration, av.error.EOFError):
@@ -181,7 +182,9 @@ def seek(self, frame_idx: int) -> None:
         time_base = float(self.stream.time_base)
         timestamp = int((frame_idx / framerate) / time_base)
 
-        self.container.seek(timestamp, stream=self.stream, any_frame=False, backward=True)
+        self.container.seek(
+            timestamp, stream=self.stream, any_frame=False, backward=True
+        )
         self.frame_generator = self.container.decode(video=0)
 
         self.current_frame_idx = 0
@@ -199,9 +202,11 @@ def seek(self, frame_idx: int) -> None:
                 self.current_frame_idx += 1
 
             if self.current_frame_idx >= frame_idx:
+
                 def _prepend_frame(first_frame, gen):
                     yield first_frame
                     yield from gen
+
                 self.frame_generator = _prepend_frame(frame, self.frame_generator)
                 break
 
@@ -247,7 +252,7 @@ def __init__(
         try:
             self.container = av.open(filename, mode="w")
             self.backend = backend
-            
+
             if codec is None:
                 codec = "h264"
             self.stream = self.container.add_stream(codec, rate=fps)
@@ -261,13 +266,15 @@ def __init__(
 
             # Frame index for PTS
             self.frame_idx = 0
-            
+
             self.audio_stream_out = None
             self.audio_packets = []
             if backend and backend.audio_stream and backend.audio_src_container:
                 audio_codec_name = backend.audio_stream.codec_context.name
                 audio_rate = backend.audio_stream.codec_context.rate
-                self.audio_stream_out = self.container.add_stream(audio_codec_name, rate=audio_rate)
+                self.audio_stream_out = self.container.add_stream(
+                    audio_codec_name, rate=audio_rate
+                )
                 for packet in backend.audio_src_container.demux(backend.audio_stream):
                     if packet.dts is not None:
                         self.audio_packets.append(packet)
@@ -313,4 +320,3 @@ def close(self) -> None:
                 self.container.mux(packet)
 
         self.container.close()
-
diff --git a/supervision/video/core.py b/supervision/video/core.py
index a6890f7f18..3b03111ea9 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -1,17 +1,18 @@
 from __future__ import annotations
 
 from collections.abc import Callable
-import numpy as np
+
 import cv2
+import numpy as np
 from tqdm.auto import tqdm
 
 from supervision.video.backend import (
-    BackendTypes,
     BackendLiteral,
+    BackendTypes,
     BaseWriter,
     getBackend,
 )
-from supervision.video.utils import VideoInfo, SOURCE_TYPE
+from supervision.video.utils import SOURCE_TYPE, VideoInfo
 
 
 class Video:
@@ -28,11 +29,7 @@ class Video:
     source: str | int
     backend: BackendTypes
 
-    def __init__(
-        self,
-        source: str | int,
-        backend: BackendLiteral = "opencv"
-    ) -> None:
+    def __init__(self, source: str | int, backend: BackendLiteral = "opencv") -> None:
         """
         Initialize the Video object.
 
@@ -99,7 +96,9 @@ def frames(
         if self.backend.cap is None:
             raise RuntimeError("Video not opened yet.")
 
-        total_frames = self.backend.video_info.total_frames if self.backend.video_info else 0
+        total_frames = (
+            self.backend.video_info.total_frames if self.backend.video_info else 0
+        )
         is_live_stream = total_frames <= 0
 
         if is_live_stream:
diff --git a/supervision/video/utils.py b/supervision/video/utils.py
index fa1d7854a2..c2d7bb9702 100644
--- a/supervision/video/utils.py
+++ b/supervision/video/utils.py
@@ -15,6 +15,7 @@ class SOURCE_TYPE(Enum):
         WEBCAM: A webcam or other direct camera device.
         RTSP: A network RTSP video stream.
     """
+
     VIDEO_FILE = "VIDEO_FILE"
     WEBCAM = "WEBCAM"
     RTSP = "RTSP"
@@ -97,4 +98,4 @@ def resolution_wh(self) -> tuple[int, int]:
         Returns:
             tuple[int, int]: The video dimensions in pixels.
         """
-        return self.width, self.height
\ No newline at end of file
+        return self.width, self.height

From 9ea0f2bd305392942909bceec38103dfac156ae7 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 22:21:05 -0400
Subject: [PATCH 074/128] FIX: Fixed av import error and formatting

---
 supervision/video/backend/openCV.py |  2 +-
 supervision/video/backend/pyAV.py   | 15 +++++++++++----
 supervision/video/core.py           |  9 +++++----
 supervision/video/utils.py          |  5 ++++-
 4 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index 894745dbdf..b0a394ffbf 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -171,7 +171,7 @@ def __init__(
             fps (int): Output frames per second.
             frame_size (tuple[int, int]): Frame dimensions (width, height).
             codec (str, optional): FourCC codec code. Defaults to "mp4v".
-            backend (OpenCVBackend | None, optional): Backend instance. Defaults to None.
+            backend (OpenCVBackend | None, optional): Backend instance. Defaults to None
 
         Raises:
             RuntimeError: If the writer cannot be opened.
diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index fca0782d18..ed75864f0b 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -1,6 +1,9 @@
 from fractions import Fraction
 
-import av
+try:
+    import av
+except ImportError:
+    av = None
 import numpy as np
 
 from supervision.video.backend import BaseBackend, BaseWriter
@@ -18,6 +21,10 @@ class pyAVBackend(BaseBackend):
 
     def __init__(self):
         super().__init__()
+
+        if av is None:
+            raise RuntimeError("PyAV (`av` module) is not installed. Please install it to use this feature.")
+        
         self.container = None
         self.stream = None
         self.writer = pyAVWriter
@@ -120,7 +127,7 @@ def read(self) -> tuple[bool, np.ndarray]:
         Returns:
             tuple[bool, np.ndarray]:
                 - `bool`: True if a frame was read successfully, False if end of stream.
-                - `np.ndarray`: Frame data in BGR format (H, W, 3). Empty array if unsuccessful.
+                - `np.ndarray`: Frame data in BGR format (H, W, 3).
 
         Raises:
             RuntimeError: If the video source is not opened.
@@ -195,9 +202,9 @@ def seek(self, frame_idx: int) -> None:
                 break
 
             if getattr(frame, "time", None) is not None:
-                self.current_frame_idx = int(round(frame.time * framerate))
+                self.current_frame_idx = (round(frame.time * framerate))
             elif getattr(frame, "pts", None) is not None:
-                self.current_frame_idx = int(round((frame.pts * time_base) * framerate))
+                self.current_frame_idx = (round((frame.pts * time_base) * framerate))
             else:
                 self.current_frame_idx += 1
 
diff --git a/supervision/video/core.py b/supervision/video/core.py
index 3b03111ea9..e0a72b5e02 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -20,7 +20,7 @@ class Video:
     A high-level interface for reading, processing, and writing video files or streams.
 
     Attributes:
-        info (VideoInfo): Metadata about the video, such as resolution, FPS, and frame count.
+        info (VideoInfo): Metadata about the video.
         source (str | int): Path to the video file or index of the camera device.
         backend (BackendTypes): Video backend used for I/O operations.
     """
@@ -141,9 +141,10 @@ def save(
 
         Args:
             target_path (str): Output file path for the processed video.
-            callback (Callable[[np.ndarray, int], np.ndarray]): Function applied to each frame.
-                Takes the frame (np.ndarray) and frame index (int) as input,
-                returns the processed frame (np.ndarray).
+            callback (Callable[[np.ndarray, int], np.ndarray]): A function that takes in
+                a numpy ndarray representation of a video frame and an
+                int index of the frame and returns a processed numpy ndarray
+                representation of the frame.
             fps (int | None, optional): Frames per second of the output video.
                 If None, uses the original FPS.
             progress_message (str, optional): Message displayed in the progress bar.
diff --git a/supervision/video/utils.py b/supervision/video/utils.py
index c2d7bb9702..ccc7643c5b 100644
--- a/supervision/video/utils.py
+++ b/supervision/video/utils.py
@@ -31,7 +31,10 @@ class VideoInfo:
         height (int): Height of the video in pixels.
         fps (int): Frames per second of the video.
         total_frames (int | None): Total number of frames, or None if unknown.
-        source_type (SOURCE_TYPE | None): The source type of the video (file, webcam, RTSP), or None.
+        source_type (SOURCE_TYPE | None): Source type: VIDEO_FILE, WEBCAM, RTSP, or None.
+
+    Methods:
+        from_video_path(video file, webcam, RTSP, or None).
 
     Examples:
         ```python

From 40c2a7c5085447380cf691db7f9bb599efda59b2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 10 Aug 2025 02:21:24 +0000
Subject: [PATCH 075/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/backend/pyAV.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index ed75864f0b..9ad0b1b694 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -23,8 +23,10 @@ def __init__(self):
         super().__init__()
 
         if av is None:
-            raise RuntimeError("PyAV (`av` module) is not installed. Please install it to use this feature.")
-        
+            raise RuntimeError(
+                "PyAV (`av` module) is not installed. Please install it to use this feature."
+            )
+
         self.container = None
         self.stream = None
         self.writer = pyAVWriter
@@ -202,9 +204,9 @@ def seek(self, frame_idx: int) -> None:
                 break
 
             if getattr(frame, "time", None) is not None:
-                self.current_frame_idx = (round(frame.time * framerate))
+                self.current_frame_idx = round(frame.time * framerate)
             elif getattr(frame, "pts", None) is not None:
-                self.current_frame_idx = (round((frame.pts * time_base) * framerate))
+                self.current_frame_idx = round((frame.pts * time_base) * framerate)
             else:
                 self.current_frame_idx += 1
 

From 10482f4c413539df3ddb2ac5fe41157131e02be5 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sat, 9 Aug 2025 22:23:49 -0400
Subject: [PATCH 076/128] FIX: Fixed av error message and formatting

---
 supervision/video/backend/pyAV.py | 3 ++-
 supervision/video/utils.py        | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index 9ad0b1b694..96857c387f 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -1,3 +1,4 @@
+from __future__ import annotations
 from fractions import Fraction
 
 try:
@@ -24,7 +25,7 @@ def __init__(self):
 
         if av is None:
             raise RuntimeError(
-                "PyAV (`av` module) is not installed. Please install it to use this feature."
+                "PyAV (`av` module) is not installed. Run `pip install av`."
             )
 
         self.container = None
diff --git a/supervision/video/utils.py b/supervision/video/utils.py
index ccc7643c5b..7e0ceed245 100644
--- a/supervision/video/utils.py
+++ b/supervision/video/utils.py
@@ -31,7 +31,7 @@ class VideoInfo:
         height (int): Height of the video in pixels.
         fps (int): Frames per second of the video.
         total_frames (int | None): Total number of frames, or None if unknown.
-        source_type (SOURCE_TYPE | None): Source type: VIDEO_FILE, WEBCAM, RTSP, or None.
+        source_type (SOURCE_TYPE | None): Source type: VIDEO_FILE, WEBCAM, RTSP.
 
     Methods:
         from_video_path(video file, webcam, RTSP, or None).

From 337f65e457c0af84c005c25d4d07a9046ac963f4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 10 Aug 2025 02:24:04 +0000
Subject: [PATCH 077/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/backend/pyAV.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index 96857c387f..c3c9fc5e4c 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -1,4 +1,5 @@
 from __future__ import annotations
+
 from fractions import Fraction
 
 try:

From 1e404206b54d737d5cb4ce496b7ab36fe294ef43 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sun, 10 Aug 2025 00:23:21 -0400
Subject: [PATCH 078/128] FIX: Fixed issue with audio sync with fps

---
 supervision/video/backend/pyAV.py | 35 ++++++++++++++++++++++++++++---
 1 file changed, 32 insertions(+), 3 deletions(-)

diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index c3c9fc5e4c..01cdd2aea5 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -1,5 +1,4 @@
 from __future__ import annotations
-
 from fractions import Fraction
 
 try:
@@ -286,6 +285,7 @@ def __init__(
                 self.audio_stream_out = self.container.add_stream(
                     audio_codec_name, rate=audio_rate
                 )
+                
                 for packet in backend.audio_src_container.demux(backend.audio_stream):
                     if packet.dts is not None:
                         self.audio_packets.append(packet)
@@ -320,14 +320,43 @@ def write(self, frame: np.ndarray) -> None:
     def close(self) -> None:
         """
         Finalize the video file and close the writer.
+        Automatically calculate speed factor based on original audio length
+        and output video length, then speed up audio accordingly.
         """
+        # Flush video encoder
         packets = self.stream.encode()
         for packet in packets:
             self.container.mux(packet)
 
-        if self.audio_stream_out:
+        speed_factor = 1.0
+        try:
+            if self.backend and self.backend.audio_stream and self.backend.audio_stream.duration:
+                orig_audio_duration = float(self.backend.audio_stream.duration * self.backend.audio_stream.time_base)
+            elif self.backend and self.backend.audio_src_container and self.backend.audio_src_container.duration:
+                orig_audio_duration = self.backend.audio_src_container.duration / 1_000_000  # us to s
+            else:
+                orig_audio_duration = None
+
+            fps = float(1 / self.stream.codec_context.time_base)
+            new_video_duration = self.frame_idx / fps
+
+            if orig_audio_duration and new_video_duration > 0:
+                speed_factor = orig_audio_duration / new_video_duration
+        except Exception:
+            speed_factor = 1.0
+
+        if self.audio_stream_out and speed_factor != 1.0:
+            for packet in self.audio_packets:
+                if packet.pts is not None:
+                    packet.pts = int(packet.pts / speed_factor)
+                if packet.dts is not None:
+                    packet.dts = int(packet.dts / speed_factor)
+                packet.stream = self.audio_stream_out
+                packet.time_base = self.audio_stream_out.time_base
+                self.container.mux(packet)
+        elif self.audio_stream_out:
             for packet in self.audio_packets:
                 packet.stream = self.audio_stream_out
                 self.container.mux(packet)
 
-        self.container.close()
+        self.container.close()
\ No newline at end of file

From 62b1fc61577da209b8c96898e9247ca29cf9275f Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sun, 10 Aug 2025 00:42:41 -0400
Subject: [PATCH 079/128] UPDATE: Finer audio mux

---
 supervision/video/backend/pyAV.py | 70 +++++++++++++++++++++----------
 1 file changed, 47 insertions(+), 23 deletions(-)

diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index 01cdd2aea5..3885d6dfc7 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -236,6 +236,7 @@ class pyAVWriter(BaseWriter):
     PyAV-based video writer.
 
     Writes frames to a video file with optional audio from a backend source.
+    Uses finer timestamp granularity (milliseconds) for smoother video playback.
     """
 
     def __init__(
@@ -244,16 +245,16 @@ def __init__(
         fps: int,
         frame_size: tuple[int, int],
         codec: str = "h264",
-        backend: pyAVBackend | None = None,
+        backend: "pyAVBackend" | None = None,
     ):
         """
-        Initialize a video writer.
+        Initialize the video writer.
 
         Args:
-            filename (str): Output video file path.
+            filename (str): Path to the output video file.
             fps (int): Frames per second for the output video.
-            frame_size (tuple[int, int]): Frame dimensions as (width, height).
-            codec (str, optional): Video codec (default: "h264").
+            frame_size (tuple[int, int]): Width and height of the video frames.
+            codec (str, optional): Video codec name (default "h264").
             backend (pyAVBackend, optional): Backend providing audio stream.
 
         Raises:
@@ -270,12 +271,11 @@ def __init__(
             self.stream.height = frame_size[1]
             self.stream.pix_fmt = "yuv420p"
 
-            # Set time_base explicitly for correct timing
-            print(fps)
-            self.stream.codec_context.time_base = Fraction(1, fps)
+            # Use finer time_base (1/1000) for millisecond precision timestamps
+            self.stream.codec_context.time_base = Fraction(1, 1000)
 
-            # Frame index for PTS
             self.frame_idx = 0
+            self.fps = fps  # Store FPS for timestamp calculations
 
             self.audio_stream_out = None
             self.audio_packets = []
@@ -285,7 +285,8 @@ def __init__(
                 self.audio_stream_out = self.container.add_stream(
                     audio_codec_name, rate=audio_rate
                 )
-                
+
+                # Buffer all audio packets from backend for muxing later
                 for packet in backend.audio_src_container.demux(backend.audio_stream):
                     if packet.dts is not None:
                         self.audio_packets.append(packet)
@@ -294,22 +295,27 @@ def __init__(
             raise RuntimeError(f"Cannot open video writer for file: {filename}") from e
 
     def __enter__(self):
+        """Enable use as a context manager."""
         return self
 
     def __exit__(self, exc_type, exc_value, traceback):
+        """Close the writer on context exit."""
         self.close()
 
     def write(self, frame: np.ndarray) -> None:
         """
-        Write a single frame to the output video.
+        Write a single video frame.
 
         Args:
-            frame (np.ndarray): Frame in BGR format (H, W, 3).
+            frame (np.ndarray): Frame data in BGR format (H, W, 3).
         """
-        frame_rgb = frame[..., ::-1]
+        # Calculate PTS as milliseconds: frame_index * (1000 ms / fps)
+        pts = int(self.frame_idx * (1000 / self.fps))
+
+        frame_rgb = frame[..., ::-1]  # Convert BGR to RGB
         av_frame = av.VideoFrame.from_ndarray(frame_rgb, format="rgb24")
 
-        av_frame.pts = self.frame_idx
+        av_frame.pts = pts
         av_frame.time_base = self.stream.codec_context.time_base
         self.frame_idx += 1
 
@@ -319,26 +325,38 @@ def write(self, frame: np.ndarray) -> None:
 
     def close(self) -> None:
         """
-        Finalize the video file and close the writer.
-        Automatically calculate speed factor based on original audio length
-        and output video length, then speed up audio accordingly.
+        Finalize the video file, mux audio with adjusted timestamps to sync with video,
+        and close the container.
         """
-        # Flush video encoder
+        def rescale_timestamp(value, src_tb, dst_tb):
+            """
+            Rescale timestamp value from source timebase to destination timebase.
+
+            Args:
+                value (int): Timestamp value (PTS or DTS).
+                src_tb (Fraction): Source time base.
+                dst_tb (Fraction): Destination time base.
+
+            Returns:
+                int: Rescaled timestamp.
+            """
+            return int(value * src_tb / dst_tb)
+
         packets = self.stream.encode()
         for packet in packets:
             self.container.mux(packet)
 
         speed_factor = 1.0
+
         try:
             if self.backend and self.backend.audio_stream and self.backend.audio_stream.duration:
                 orig_audio_duration = float(self.backend.audio_stream.duration * self.backend.audio_stream.time_base)
             elif self.backend and self.backend.audio_src_container and self.backend.audio_src_container.duration:
-                orig_audio_duration = self.backend.audio_src_container.duration / 1_000_000  # us to s
+                orig_audio_duration = self.backend.audio_src_container.duration / 1000
             else:
                 orig_audio_duration = None
 
-            fps = float(1 / self.stream.codec_context.time_base)
-            new_video_duration = self.frame_idx / fps
+            new_video_duration = (self.frame_idx * (1 / self.fps))
 
             if orig_audio_duration and new_video_duration > 0:
                 speed_factor = orig_audio_duration / new_video_duration
@@ -348,15 +366,21 @@ def close(self) -> None:
         if self.audio_stream_out and speed_factor != 1.0:
             for packet in self.audio_packets:
                 if packet.pts is not None:
+                    packet.pts = rescale_timestamp(packet.pts, packet.time_base, self.audio_stream_out.time_base)
                     packet.pts = int(packet.pts / speed_factor)
                 if packet.dts is not None:
+                    packet.dts = rescale_timestamp(packet.dts, packet.time_base, self.audio_stream_out.time_base)
                     packet.dts = int(packet.dts / speed_factor)
                 packet.stream = self.audio_stream_out
-                packet.time_base = self.audio_stream_out.time_base
                 self.container.mux(packet)
         elif self.audio_stream_out:
             for packet in self.audio_packets:
+                if packet.pts is not None:
+                    packet.pts = rescale_timestamp(packet.pts, packet.time_base, self.audio_stream_out.time_base)
+                if packet.dts is not None:
+                    packet.dts = rescale_timestamp(packet.dts, packet.time_base, self.audio_stream_out.time_base)
                 packet.stream = self.audio_stream_out
                 self.container.mux(packet)
 
-        self.container.close()
\ No newline at end of file
+        self.container.close()
+

From e3e21abcba524c82b11b3f7cc5f007619aeef476 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 10 Aug 2025 04:43:05 +0000
Subject: [PATCH 080/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/backend/pyAV.py | 40 +++++++++++++++++++++++--------
 1 file changed, 30 insertions(+), 10 deletions(-)

diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index 3885d6dfc7..e6a5c74e8c 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -1,4 +1,5 @@
 from __future__ import annotations
+
 from fractions import Fraction
 
 try:
@@ -245,7 +246,7 @@ def __init__(
         fps: int,
         frame_size: tuple[int, int],
         codec: str = "h264",
-        backend: "pyAVBackend" | None = None,
+        backend: pyAVBackend | None = None,
     ):
         """
         Initialize the video writer.
@@ -328,6 +329,7 @@ def close(self) -> None:
         Finalize the video file, mux audio with adjusted timestamps to sync with video,
         and close the container.
         """
+
         def rescale_timestamp(value, src_tb, dst_tb):
             """
             Rescale timestamp value from source timebase to destination timebase.
@@ -349,14 +351,25 @@ def rescale_timestamp(value, src_tb, dst_tb):
         speed_factor = 1.0
 
         try:
-            if self.backend and self.backend.audio_stream and self.backend.audio_stream.duration:
-                orig_audio_duration = float(self.backend.audio_stream.duration * self.backend.audio_stream.time_base)
-            elif self.backend and self.backend.audio_src_container and self.backend.audio_src_container.duration:
+            if (
+                self.backend
+                and self.backend.audio_stream
+                and self.backend.audio_stream.duration
+            ):
+                orig_audio_duration = float(
+                    self.backend.audio_stream.duration
+                    * self.backend.audio_stream.time_base
+                )
+            elif (
+                self.backend
+                and self.backend.audio_src_container
+                and self.backend.audio_src_container.duration
+            ):
                 orig_audio_duration = self.backend.audio_src_container.duration / 1000
             else:
                 orig_audio_duration = None
 
-            new_video_duration = (self.frame_idx * (1 / self.fps))
+            new_video_duration = self.frame_idx * (1 / self.fps)
 
             if orig_audio_duration and new_video_duration > 0:
                 speed_factor = orig_audio_duration / new_video_duration
@@ -366,21 +379,28 @@ def rescale_timestamp(value, src_tb, dst_tb):
         if self.audio_stream_out and speed_factor != 1.0:
             for packet in self.audio_packets:
                 if packet.pts is not None:
-                    packet.pts = rescale_timestamp(packet.pts, packet.time_base, self.audio_stream_out.time_base)
+                    packet.pts = rescale_timestamp(
+                        packet.pts, packet.time_base, self.audio_stream_out.time_base
+                    )
                     packet.pts = int(packet.pts / speed_factor)
                 if packet.dts is not None:
-                    packet.dts = rescale_timestamp(packet.dts, packet.time_base, self.audio_stream_out.time_base)
+                    packet.dts = rescale_timestamp(
+                        packet.dts, packet.time_base, self.audio_stream_out.time_base
+                    )
                     packet.dts = int(packet.dts / speed_factor)
                 packet.stream = self.audio_stream_out
                 self.container.mux(packet)
         elif self.audio_stream_out:
             for packet in self.audio_packets:
                 if packet.pts is not None:
-                    packet.pts = rescale_timestamp(packet.pts, packet.time_base, self.audio_stream_out.time_base)
+                    packet.pts = rescale_timestamp(
+                        packet.pts, packet.time_base, self.audio_stream_out.time_base
+                    )
                 if packet.dts is not None:
-                    packet.dts = rescale_timestamp(packet.dts, packet.time_base, self.audio_stream_out.time_base)
+                    packet.dts = rescale_timestamp(
+                        packet.dts, packet.time_base, self.audio_stream_out.time_base
+                    )
                 packet.stream = self.audio_stream_out
                 self.container.mux(packet)
 
         self.container.close()
-

From 055c4fce0c76093831ab5f1147bcd493f858dba9 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sun, 10 Aug 2025 00:49:32 -0400
Subject: [PATCH 081/128] UPDATE: Param config to render audio

---
 supervision/video/backend/__init__.py | 2 +-
 supervision/video/backend/base.py     | 3 ++-
 supervision/video/backend/openCV.py   | 8 ++++++--
 supervision/video/backend/pyAV.py     | 6 +++---
 supervision/video/core.py             | 4 ++--
 5 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py
index dea20aece2..97065c8217 100644
--- a/supervision/video/backend/__init__.py
+++ b/supervision/video/backend/__init__.py
@@ -18,7 +18,7 @@
 
 def getBackend(backend: str) -> BaseBackend:
     if backend.lower() in _backends:
-        return _backends[backend.lower()]()
+        return _backends[backend.lower()]
     else:
         raise ValueError(f"Unsupported backend: {backend}")
 
diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 4d7e3a12fd..a1247abf6f 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -8,11 +8,12 @@
 
 
 class BaseBackend(ABC):
-    def __init__(self):
+    def __init__(self, render_audio=False):
         self.cap = None
         self.video_info = None
         self.writer = None
         self.path = None
+        self.render_audio = render_audio
 
     @abstractmethod
     def open(self, path: str) -> None:
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index b0a394ffbf..b80545e43e 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -15,13 +15,17 @@ class OpenCVBackend(BaseBackend):
     grabbing, and retrieving metadata using OpenCV.
     """
 
-    def __init__(self):
+    def __init__(self, render_audio=False):
         """Initialize with no active capture, writer, or path."""
-        super().__init__()
+        if render_audio:
+            raise ValueError("OpenCV backend does not support audio. " \
+            "Please use `pyAV` backend instead or set `render_audio=False`")
+        
         self.cap = None
         self.video_info = None
         self.writer = OpenCVWriter
         self.path = None
+        self.render_audio = render_audio
 
     def open(self, path: str | int) -> None:
         """
diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index 3885d6dfc7..dd6b24dbc3 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -20,7 +20,7 @@ class pyAVBackend(BaseBackend):
     and RTSP streams.
     """
 
-    def __init__(self):
+    def __init__(self, render_audio=False):
         super().__init__()
 
         if av is None:
@@ -34,6 +34,7 @@ def __init__(self):
         self.frame_generator = None
         self.video_info = None
         self.current_frame_idx = 0
+        self.render_audio = render_audio
 
     def open(self, path: str) -> None:
         """
@@ -61,7 +62,7 @@ def open(self, path: str) -> None:
             self.current_frame_idx = 0
 
             # If audio exists
-            if len(self.container.streams.audio) > 0:
+            if self.render_audio and len(self.container.streams.audio) > 0:
                 self.audio_stream = self.container.streams.audio[0]
             else:
                 self.audio_stream = None
@@ -230,7 +231,6 @@ def release(self) -> None:
             self.stream = None
             self.frame_generator = None
 
-
 class pyAVWriter(BaseWriter):
     """
     PyAV-based video writer.
diff --git a/supervision/video/core.py b/supervision/video/core.py
index e0a72b5e02..da4f4bd820 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -29,7 +29,7 @@ class Video:
     source: str | int
     backend: BackendTypes
 
-    def __init__(self, source: str | int, backend: BackendLiteral = "opencv") -> None:
+    def __init__(self, source: str | int, backend: BackendLiteral = "opencv", render_audio=False) -> None:
         """
         Initialize the Video object.
 
@@ -38,7 +38,7 @@ def __init__(self, source: str | int, backend: BackendLiteral = "opencv") -> Non
             backend (BackendLiteral, optional): Backend type for video I/O.
                 Defaults to "opencv".
         """
-        self.backend = getBackend(backend)
+        self.backend = getBackend(backend)(render_audio=render_audio)
         self.backend.open(source)
         self.info = self.backend.info()
         self.source = source

From 7450573d6ee7b832c65dde479022f174e124a762 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sun, 10 Aug 2025 01:06:33 -0400
Subject: [PATCH 082/128] UPDATE: Changed config to render audio to be isolated

---
 supervision/video/backend/base.py   |  4 ++--
 supervision/video/backend/openCV.py | 12 ++++++------
 supervision/video/backend/pyAV.py   |  9 +++++----
 supervision/video/core.py           | 11 ++++++-----
 4 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index a1247abf6f..806230ca63 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -8,12 +8,11 @@
 
 
 class BaseBackend(ABC):
-    def __init__(self, render_audio=False):
+    def __init__(self):
         self.cap = None
         self.video_info = None
         self.writer = None
         self.path = None
-        self.render_audio = render_audio
 
     @abstractmethod
     def open(self, path: str) -> None:
@@ -57,6 +56,7 @@ def __init__(
         frame_size: tuple[int, int],
         codec: str | None = None,
         backend: BaseBackend | None = None,
+        render_audio: bool = False,
     ):
         pass
 
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index b80545e43e..fd9ee5515f 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -15,17 +15,12 @@ class OpenCVBackend(BaseBackend):
     grabbing, and retrieving metadata using OpenCV.
     """
 
-    def __init__(self, render_audio=False):
+    def __init__(self):
         """Initialize with no active capture, writer, or path."""
-        if render_audio:
-            raise ValueError("OpenCV backend does not support audio. " \
-            "Please use `pyAV` backend instead or set `render_audio=False`")
-        
         self.cap = None
         self.video_info = None
         self.writer = OpenCVWriter
         self.path = None
-        self.render_audio = render_audio
 
     def open(self, path: str | int) -> None:
         """
@@ -166,6 +161,7 @@ def __init__(
         frame_size: tuple[int, int],
         codec: str = "mp4v",
         backend: OpenCVBackend | None = None,
+        render_audio: bool = False,
     ):
         """
         Initialize the writer.
@@ -180,6 +176,10 @@ def __init__(
         Raises:
             RuntimeError: If the writer cannot be opened.
         """
+        if render_audio:
+            raise ValueError("OpenCV backend does not support audio. " \
+            "Please use `pyav` backend instead or set `render_audio=False`")
+        
         self.backend = backend
         try:
             fourcc_int = cv2.VideoWriter_fourcc(*codec)
diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index 1a0b686440..36fff00f45 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -21,7 +21,7 @@ class pyAVBackend(BaseBackend):
     and RTSP streams.
     """
 
-    def __init__(self, render_audio=False):
+    def __init__(self):
         super().__init__()
 
         if av is None:
@@ -35,7 +35,6 @@ def __init__(self, render_audio=False):
         self.frame_generator = None
         self.video_info = None
         self.current_frame_idx = 0
-        self.render_audio = render_audio
 
     def open(self, path: str) -> None:
         """
@@ -63,7 +62,7 @@ def open(self, path: str) -> None:
             self.current_frame_idx = 0
 
             # If audio exists
-            if self.render_audio and len(self.container.streams.audio) > 0:
+            if len(self.container.streams.audio) > 0:
                 self.audio_stream = self.container.streams.audio[0]
             else:
                 self.audio_stream = None
@@ -247,6 +246,7 @@ def __init__(
         frame_size: tuple[int, int],
         codec: str = "h264",
         backend: pyAVBackend | None = None,
+        render_audio: bool = False,
     ):
         """
         Initialize the video writer.
@@ -280,7 +280,8 @@ def __init__(
 
             self.audio_stream_out = None
             self.audio_packets = []
-            if backend and backend.audio_stream and backend.audio_src_container:
+            
+            if render_audio and backend and backend.audio_stream and backend.audio_src_container:
                 audio_codec_name = backend.audio_stream.codec_context.name
                 audio_rate = backend.audio_stream.codec_context.rate
                 self.audio_stream_out = self.container.add_stream(
diff --git a/supervision/video/core.py b/supervision/video/core.py
index da4f4bd820..970a4e7a25 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -29,7 +29,7 @@ class Video:
     source: str | int
     backend: BackendTypes
 
-    def __init__(self, source: str | int, backend: BackendLiteral = "opencv", render_audio=False) -> None:
+    def __init__(self, source: str | int, backend: BackendLiteral = "opencv") -> None:
         """
         Initialize the Video object.
 
@@ -38,7 +38,7 @@ def __init__(self, source: str | int, backend: BackendLiteral = "opencv", render
             backend (BackendLiteral, optional): Backend type for video I/O.
                 Defaults to "opencv".
         """
-        self.backend = getBackend(backend)(render_audio=render_audio)
+        self.backend = getBackend(backend)()
         self.backend.open(source)
         self.info = self.backend.info()
         self.source = source
@@ -53,7 +53,7 @@ def __iter__(self):
         return self.backend.frames()
 
     def sink(
-        self, target_path: str, info: VideoInfo, codec: str | None = None
+        self, target_path: str, info: VideoInfo, codec: str | None = None, render_audio: bool = False
     ) -> BaseWriter:
         """
         Create a video writer for saving frames to a file.
@@ -68,7 +68,7 @@ def sink(
             BaseWriter: Video writer instance for writing frames.
         """
         return self.backend.writer(
-            target_path, info.fps, info.resolution_wh, codec, self.backend
+            target_path, info.fps, info.resolution_wh, codec, self.backend, render_audio
         )
 
     def frames(
@@ -135,6 +135,7 @@ def save(
         progress_message: str = "Processing video",
         show_progress: bool = False,
         codec: str | None = None,
+        render_audio: bool = False,
     ):
         """
         Process and save video frames to a file.
@@ -171,7 +172,7 @@ def save(
             fps = self.backend.video_info.fps
 
         writer = self.backend.writer(
-            target_path, fps, self.backend.video_info.resolution_wh, codec, self.backend
+            target_path, fps, self.backend.video_info.resolution_wh, codec, self.backend, render_audio
         )
         total_frames = self.backend.video_info.total_frames
         frames_generator = self.frames()

From 92238044185a920417f2feba4925a4e5985506c2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 10 Aug 2025 05:06:52 +0000
Subject: [PATCH 083/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/backend/openCV.py |  8 +++++---
 supervision/video/backend/pyAV.py   | 10 ++++++++--
 supervision/video/core.py           | 13 +++++++++++--
 3 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index fd9ee5515f..1398b19ea6 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -177,9 +177,11 @@ def __init__(
             RuntimeError: If the writer cannot be opened.
         """
         if render_audio:
-            raise ValueError("OpenCV backend does not support audio. " \
-            "Please use `pyav` backend instead or set `render_audio=False`")
-        
+            raise ValueError(
+                "OpenCV backend does not support audio. "
+                "Please use `pyav` backend instead or set `render_audio=False`"
+            )
+
         self.backend = backend
         try:
             fourcc_int = cv2.VideoWriter_fourcc(*codec)
diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index 36fff00f45..89005a2cbb 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -231,6 +231,7 @@ def release(self) -> None:
             self.stream = None
             self.frame_generator = None
 
+
 class pyAVWriter(BaseWriter):
     """
     PyAV-based video writer.
@@ -280,8 +281,13 @@ def __init__(
 
             self.audio_stream_out = None
             self.audio_packets = []
-            
-            if render_audio and backend and backend.audio_stream and backend.audio_src_container:
+
+            if (
+                render_audio
+                and backend
+                and backend.audio_stream
+                and backend.audio_src_container
+            ):
                 audio_codec_name = backend.audio_stream.codec_context.name
                 audio_rate = backend.audio_stream.codec_context.rate
                 self.audio_stream_out = self.container.add_stream(
diff --git a/supervision/video/core.py b/supervision/video/core.py
index 970a4e7a25..c28bfa4fad 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -53,7 +53,11 @@ def __iter__(self):
         return self.backend.frames()
 
     def sink(
-        self, target_path: str, info: VideoInfo, codec: str | None = None, render_audio: bool = False
+        self,
+        target_path: str,
+        info: VideoInfo,
+        codec: str | None = None,
+        render_audio: bool = False,
     ) -> BaseWriter:
         """
         Create a video writer for saving frames to a file.
@@ -172,7 +176,12 @@ def save(
             fps = self.backend.video_info.fps
 
         writer = self.backend.writer(
-            target_path, fps, self.backend.video_info.resolution_wh, codec, self.backend, render_audio
+            target_path,
+            fps,
+            self.backend.video_info.resolution_wh,
+            codec,
+            self.backend,
+            render_audio,
         )
         total_frames = self.backend.video_info.total_frames
         frames_generator = self.frames()

From fcee1a1ec0b7af1844d4e4430a5e41ddfe436b3a Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sun, 10 Aug 2025 01:23:11 -0400
Subject: [PATCH 084/128] UPDATE: Updated deprecated warnings

---
 supervision/utils/video.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 00eb9b4906..da77e8d68d 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -5,12 +5,13 @@
 from collections.abc import Callable, Generator
 from dataclasses import dataclass
 
+from supervision.utils.internal import deprecated
+
 import cv2
 import numpy as np
 from tqdm.auto import tqdm
 
-
-@DeprecationWarning
+@deprecated("Use `sv.VideoInfo` for video metadata.")
 @dataclass
 class VideoInfo:
     """
@@ -60,8 +61,7 @@ def from_video_path(cls, video_path: str) -> VideoInfo:
     def resolution_wh(self) -> tuple[int, int]:
         return self.width, self.height
 
-
-@DeprecationWarning
+@deprecated("Please use `sv.Video` for video writing and processing.")
 class VideoSink:
     """
     Context manager that saves video frames to a file using OpenCV.
@@ -118,8 +118,6 @@ def write_frame(self, frame: np.ndarray):
     def __exit__(self, exc_type, exc_value, exc_traceback):
         self.__writer.release()
 
-
-@DeprecationWarning
 def _validate_and_setup_video(
     source_path: str, start: int, end: int | None, iterative_seek: bool = False
 ):
@@ -143,8 +141,7 @@ def _validate_and_setup_video(
 
     return video, start, end
 
-
-@DeprecationWarning
+@deprecated("Use `sv.Video().frames()` or `sv.Video()` for frame iteration.")
 def get_video_frames_generator(
     source_path: str,
     stride: int = 1,
@@ -195,8 +192,7 @@ def get_video_frames_generator(
         frame_position += stride
     video.release()
 
-
-@DeprecationWarning
+@deprecated("Use `sv.Video.save()` with a callback for processing and saving videos.")
 def process_video(
     source_path: str,
     target_path: str,

From 42a0f244c5ca154297ba6ac556d808ca203609f9 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sun, 10 Aug 2025 01:24:07 -0400
Subject: [PATCH 085/128] ADD: Added unit tests from PR #1941

---
 test/video/test_video.py | 78 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)
 create mode 100644 test/video/test_video.py

diff --git a/test/video/test_video.py b/test/video/test_video.py
new file mode 100644
index 0000000000..c6c8e1f407
--- /dev/null
+++ b/test/video/test_video.py
@@ -0,0 +1,78 @@
+import os
+
+import cv2
+import numpy as np
+
+import supervision as sv
+
+
+def _create_temp_video(path: str, width=320, height=240, fps=30, frames=10):
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    writer = cv2.VideoWriter(path, fourcc, fps, (width, height))
+    for _ in range(frames):
+        frame = np.random.randint(0, 255, (height, width, 3), dtype=np.uint8)
+        writer.write(frame)
+    writer.release()
+
+
+def test_video_info_and_iteration(tmp_path):
+    vid_path = tmp_path / "test.mp4"
+    _create_temp_video(str(vid_path))
+
+    video = sv.Video(str(vid_path))
+    info = video.info
+
+    assert info.width == 320
+    assert info.height == 240
+    assert info.total_frames == 10
+
+    frames = list(video.frames())
+    assert len(frames) == 10
+
+
+def test_frames_stride(tmp_path):
+    vid_path = tmp_path / "test_stride.mp4"
+    _create_temp_video(str(vid_path), frames=9)
+
+    video = sv.Video(str(vid_path))
+    frames = list(video.frames(stride=2))
+    assert len(frames) == 5  # ceil(9/2)
+
+
+def test_save_with_callback(tmp_path):
+    src = tmp_path / "src.mp4"
+    dst = tmp_path / "dst.mp4"
+    _create_temp_video(str(src))
+
+    def identity(frame, i):
+        return frame
+
+    sv.Video(str(src)).save(str(dst), callback=identity, show_progress=False)
+
+    # confirm destination exists and metadata matches
+    dst_video = sv.Video(str(dst))
+    assert dst_video.info.total_frames == 10
+
+
+def test_legacy_get_video_frames_generator(tmp_path):
+    vid_path = tmp_path / "legacy.mp4"
+    _create_temp_video(str(vid_path), frames=6)
+
+    frames = list(sv.get_video_frames_generator(str(vid_path)))
+    assert len(frames) == 6
+
+
+def test_legacy_process_video(tmp_path):
+    src = tmp_path / "legacy_src.mp4"
+    dst = tmp_path / "legacy_dst.mp4"
+    _create_temp_video(str(src), frames=4)
+
+    sv.process_video(
+        source_path=str(src),
+        target_path=str(dst),
+        callback=lambda f, i: f,
+        show_progress=False,
+    )
+
+    assert os.path.exists(dst)
+    assert sv.Video(str(dst)).info.total_frames == 4
\ No newline at end of file

From c004d240bbeb5cf3f99d8d97b2638f51b93a0ac8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 10 Aug 2025 05:24:27 +0000
Subject: [PATCH 086/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/utils/video.py | 9 +++++++--
 test/video/test_video.py   | 2 +-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index da77e8d68d..c485bd96e6 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -5,12 +5,13 @@
 from collections.abc import Callable, Generator
 from dataclasses import dataclass
 
-from supervision.utils.internal import deprecated
-
 import cv2
 import numpy as np
 from tqdm.auto import tqdm
 
+from supervision.utils.internal import deprecated
+
+
 @deprecated("Use `sv.VideoInfo` for video metadata.")
 @dataclass
 class VideoInfo:
@@ -61,6 +62,7 @@ def from_video_path(cls, video_path: str) -> VideoInfo:
     def resolution_wh(self) -> tuple[int, int]:
         return self.width, self.height
 
+
 @deprecated("Please use `sv.Video` for video writing and processing.")
 class VideoSink:
     """
@@ -118,6 +120,7 @@ def write_frame(self, frame: np.ndarray):
     def __exit__(self, exc_type, exc_value, exc_traceback):
         self.__writer.release()
 
+
 def _validate_and_setup_video(
     source_path: str, start: int, end: int | None, iterative_seek: bool = False
 ):
@@ -141,6 +144,7 @@ def _validate_and_setup_video(
 
     return video, start, end
 
+
 @deprecated("Use `sv.Video().frames()` or `sv.Video()` for frame iteration.")
 def get_video_frames_generator(
     source_path: str,
@@ -192,6 +196,7 @@ def get_video_frames_generator(
         frame_position += stride
     video.release()
 
+
 @deprecated("Use `sv.Video.save()` with a callback for processing and saving videos.")
 def process_video(
     source_path: str,
diff --git a/test/video/test_video.py b/test/video/test_video.py
index c6c8e1f407..785b0c704b 100644
--- a/test/video/test_video.py
+++ b/test/video/test_video.py
@@ -75,4 +75,4 @@ def test_legacy_process_video(tmp_path):
     )
 
     assert os.path.exists(dst)
-    assert sv.Video(str(dst)).info.total_frames == 4
\ No newline at end of file
+    assert sv.Video(str(dst)).info.total_frames == 4

From c5e1d2ebe58db4662ce810d1b83fe2366b4516ac Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Mon, 11 Aug 2025 16:21:58 -0400
Subject: [PATCH 087/128] FIX: Updated the pyproject.toml's extra requirements
 for ffmpeg

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 554dd16553..9fb58c765d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -58,7 +58,7 @@ Documentation = "https://supervision.roboflow.com/latest/"
 metrics = [
     "pandas>=2.0.0",
 ]
-video = ["av (>=15.0.0,<16.0.0)"]
+ffmpeg = ["av (>=15.0.0)"]
 
 [dependency-groups]
 dev = [

From b258d1ba26a4dc12dea5acabe61ee60e43d70b2a Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Mon, 11 Aug 2025 19:05:59 -0400
Subject: [PATCH 088/128] FIX: Fixes of formatting from code review

---
 supervision/utils/video.py          | 20 +++++++++++++++----
 supervision/video/__init__.py       |  4 ++--
 supervision/video/backend/openCV.py | 10 +++++-----
 supervision/video/backend/pyAV.py   | 10 +++++-----
 supervision/video/core.py           |  4 ++--
 supervision/video/utils.py          | 31 +++++++++++++++++++++++------
 6 files changed, 55 insertions(+), 24 deletions(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index c485bd96e6..b7fbf87ccc 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -12,7 +12,10 @@
 from supervision.utils.internal import deprecated
 
 
-@deprecated("Use `sv.VideoInfo` for video metadata.")
+@deprecated(  
+    "`process_video` is deprecated and will be removed in "  
+    "`supervision-0.32.0`. Use `sv.VideoInfo` instead."  
+) 
 @dataclass
 class VideoInfo:
     """
@@ -63,7 +66,10 @@ def resolution_wh(self) -> tuple[int, int]:
         return self.width, self.height
 
 
-@deprecated("Please use `sv.Video` for video writing and processing.")
+@deprecated(  
+    "`process_video` is deprecated and will be removed in "  
+    "`supervision-0.32.0`. Use `sv.Video().save` instead."  
+) 
 class VideoSink:
     """
     Context manager that saves video frames to a file using OpenCV.
@@ -145,7 +151,10 @@ def _validate_and_setup_video(
     return video, start, end
 
 
-@deprecated("Use `sv.Video().frames()` or `sv.Video()` for frame iteration.")
+@deprecated(  
+    "`process_video` is deprecated and will be removed in "  
+    "`supervision-0.32.0`. Use `sv.Video().frame()` or `sv.Video()` instead."  
+) 
 def get_video_frames_generator(
     source_path: str,
     stride: int = 1,
@@ -197,7 +206,10 @@ def get_video_frames_generator(
     video.release()
 
 
-@deprecated("Use `sv.Video.save()` with a callback for processing and saving videos.")
+@deprecated(  
+    "`process_video` is deprecated and will be removed in "  
+    "`supervision-0.32.0`. Use `sv.Video().save` instead."  
+)  
 def process_video(
     source_path: str,
     target_path: str,
diff --git a/supervision/video/__init__.py b/supervision/video/__init__.py
index d5d5559ba8..f4dc506445 100644
--- a/supervision/video/__init__.py
+++ b/supervision/video/__init__.py
@@ -1,5 +1,5 @@
 from supervision.video.backend.base import BaseBackend, BaseWriter
 from supervision.video.core import Video
-from supervision.video.utils import SOURCE_TYPE, VideoInfo
+from supervision.video.utils import SourceType, VideoInfo
 
-__all__ = ["SOURCE_TYPE", "BaseBackend", "BaseWriter", "Video", "VideoInfo"]
+__all__ = ["SourceType", "BaseBackend", "BaseWriter", "Video", "VideoInfo"]
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index 1398b19ea6..0b595d388b 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -4,7 +4,7 @@
 import numpy as np
 
 from supervision.video.backend import BaseBackend, BaseWriter
-from supervision.video.utils import SOURCE_TYPE, VideoInfo
+from supervision.video.utils import SourceType, VideoInfo
 
 
 class OpenCVBackend(BaseBackend):
@@ -42,12 +42,12 @@ def open(self, path: str | int) -> None:
         self.video_info = self._set_video_info()
 
         if isinstance(path, int):
-            self.video_info.source_type = SOURCE_TYPE.WEBCAM
+            self.video_info.SourceType = SourceType.WEBCAM
         elif isinstance(path, str):
-            self.video_info.source_type = (
-                SOURCE_TYPE.RTSP
+            self.video_info.SourceType = (
+                SourceType.RTSP
                 if path.lower().startswith("rtsp://")
-                else SOURCE_TYPE.VIDEO_FILE
+                else SourceType.VIDEO_FILE
             )
         else:
             raise ValueError("Unsupported source type")
diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index 89005a2cbb..bb4f745202 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -9,7 +9,7 @@
 import numpy as np
 
 from supervision.video.backend import BaseBackend, BaseWriter
-from supervision.video.utils import SOURCE_TYPE, VideoInfo
+from supervision.video.utils import SourceType, VideoInfo
 
 
 class pyAVBackend(BaseBackend):
@@ -68,12 +68,12 @@ def open(self, path: str) -> None:
                 self.audio_stream = None
 
             if isinstance(path, int):
-                self.video_info.source_type = SOURCE_TYPE.WEBCAM
+                self.video_info.SourceType = SourceType.WEBCAM
             elif isinstance(path, str):
-                self.video_info.source_type = (
-                    SOURCE_TYPE.RTSP
+                self.video_info.SourceType = (
+                    SourceType.RTSP
                     if path.lower().startswith("rtsp://")
-                    else SOURCE_TYPE.VIDEO_FILE
+                    else SourceType.VIDEO_FILE
                 )
             else:
                 raise ValueError("Unsupported source type")
diff --git a/supervision/video/core.py b/supervision/video/core.py
index c28bfa4fad..714cf35334 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -12,7 +12,7 @@
     BaseWriter,
     getBackend,
 )
-from supervision.video.utils import SOURCE_TYPE, VideoInfo
+from supervision.video.utils import SourceType, VideoInfo
 
 
 class Video:
@@ -169,7 +169,7 @@ def save(
         if self.backend.cap is None:
             raise RuntimeError("Video not opened yet.")
 
-        if self.backend.video_info.source_type != SOURCE_TYPE.VIDEO_FILE:
+        if self.backend.video_info.SourceType != SourceType.VIDEO_FILE:
             raise ValueError("Only video files can be saved.")
 
         if fps is None:
diff --git a/supervision/video/utils.py b/supervision/video/utils.py
index 7e0ceed245..caaaf8fcb5 100644
--- a/supervision/video/utils.py
+++ b/supervision/video/utils.py
@@ -6,7 +6,7 @@
 import cv2
 
 
-class SOURCE_TYPE(Enum):
+class SourceType(Enum):
     """
     Enumeration of supported video source types.
 
@@ -16,9 +16,28 @@ class SOURCE_TYPE(Enum):
         RTSP: A network RTSP video stream.
     """
 
-    VIDEO_FILE = "VIDEO_FILE"
-    WEBCAM = "WEBCAM"
-    RTSP = "RTSP"
+    VIDEO_FILE = "video_file"
+    WEBCAM = "webcam"
+    RTSP = "rtsp"
+
+    @classmethod  
+    def list(cls):  
+        return list(map(lambda c: c.value, cls))  
+
+    @classmethod  
+    def from_value(cls, value: SourceType | str) -> SourceType:  
+        if isinstance(value, cls):  
+            return value  
+        if isinstance(value, str):  
+            value = value.lower()  
+            try:  
+                return cls(value)  
+            except ValueError:  
+                raise ValueError(f"Invalid value: {value}. Must be one of {cls.list()}")  
+        raise ValueError(  
+            f"Invalid value type: {type(value)}. Must be an instance of "  
+            f"{cls.__name__} or str."  
+        )  
 
 
 @dataclass
@@ -31,7 +50,7 @@ class VideoInfo:
         height (int): Height of the video in pixels.
         fps (int): Frames per second of the video.
         total_frames (int | None): Total number of frames, or None if unknown.
-        source_type (SOURCE_TYPE | None): Source type: VIDEO_FILE, WEBCAM, RTSP.
+        SourceType (SourceType | None): Source type: VIDEO_FILE, WEBCAM, RTSP.
 
     Methods:
         from_video_path(video file, webcam, RTSP, or None).
@@ -54,7 +73,7 @@ class VideoInfo:
     height: int
     fps: int
     total_frames: int | None = None
-    source_type: SOURCE_TYPE | None = None
+    SourceType: SourceType | None = None
 
     @classmethod
     def from_video_path(cls, video_path: str) -> VideoInfo:

From 79e003d4cb22eff53e29bb5b6c6ba8bfddba3afd Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Mon, 11 Aug 2025 19:16:56 -0400
Subject: [PATCH 089/128] UPDATE: Removed video info from path as it is
 dependant on the backend

---
 supervision/utils/video.py | 27 ++++++++++++++++-----------
 supervision/video/utils.py | 37 -------------------------------------
 2 files changed, 16 insertions(+), 48 deletions(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index b7fbf87ccc..261033eeb0 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -10,6 +10,7 @@
 from tqdm.auto import tqdm
 
 from supervision.utils.internal import deprecated
+from supervision.video.backend import BackendTypes
 
 
 @deprecated(  
@@ -49,17 +50,21 @@ class VideoInfo:
     total_frames: int | None = None
 
     @classmethod
-    def from_video_path(cls, video_path: str) -> VideoInfo:
-        video = cv2.VideoCapture(video_path)
-        if not video.isOpened():
-            raise Exception(f"Could not open video at {video_path}")
-
-        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
-        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        fps = int(video.get(cv2.CAP_PROP_FPS))
-        total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
-        video.release()
-        return VideoInfo(width, height, fps, total_frames)
+    def from_video_path(cls, backend: BackendTypes) -> VideoInfo:
+        if not backend.isOpened():
+            raise RuntimeError("Video not opened yet.")
+
+        width = backend.stream.width
+        height = backend.stream.height
+        fps = float(backend.stream.average_rate or backend.stream.guessed_rate)
+        if fps <= 0:
+            fps = 30
+
+        total_frames = backend.stream.frames
+        if total_frames == 0:
+            total_frames = None
+
+        return VideoInfo(width, height, round(fps), total_frames)
 
     @property
     def resolution_wh(self) -> tuple[int, int]:
diff --git a/supervision/video/utils.py b/supervision/video/utils.py
index caaaf8fcb5..ae4125d085 100644
--- a/supervision/video/utils.py
+++ b/supervision/video/utils.py
@@ -75,43 +75,6 @@ class VideoInfo:
     total_frames: int | None = None
     SourceType: SourceType | None = None
 
-    @classmethod
-    def from_video_path(cls, video_path: str) -> VideoInfo:
-        """
-        Create a VideoInfo instance from a video file.
-
-        Args:
-            video_path (str): Path to the video file.
-
-        Returns:
-            VideoInfo: Metadata including width, height, FPS, and total frames.
-
-        Raises:
-            ValueError: If the video cannot be opened or has invalid properties.
-        """
-        video = cv2.VideoCapture(video_path)
-        if not video.isOpened():
-            raise ValueError(f"Could not open video at {video_path}")
-
-        try:
-            width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
-            height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
-            if width <= 0 or height <= 0:
-                raise ValueError(f"Invalid video dimensions: {width}x{height}")
-
-            fps = video.get(cv2.CAP_PROP_FPS)
-            if fps <= 0:
-                fps = 30  # Default to 30fps if invalid
-            fps = round(fps)
-
-            total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
-            if total_frames < 0:
-                total_frames = None  # Some formats may not report frame count
-        finally:
-            video.release()
-
-        return VideoInfo(width, height, fps, total_frames)
-
     @property
     def resolution_wh(self) -> tuple[int, int]:
         """

From c6e6e6cd629c930566c3f6946ac98ccba9f7ccfc Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Mon, 11 Aug 2025 19:31:16 -0400
Subject: [PATCH 090/128] UPDATE: Updated backend to Enum

---
 supervision/video/__init__.py         |  3 +-
 supervision/video/backend/__init__.py | 63 +++++++++++++++++----------
 supervision/video/backend/base.py     |  7 +--
 supervision/video/backend/openCV.py   |  2 +-
 supervision/video/backend/pyAV.py     |  2 +-
 supervision/video/core.py             | 15 ++++---
 6 files changed, 57 insertions(+), 35 deletions(-)

diff --git a/supervision/video/__init__.py b/supervision/video/__init__.py
index f4dc506445..54393f5d03 100644
--- a/supervision/video/__init__.py
+++ b/supervision/video/__init__.py
@@ -1,5 +1,4 @@
-from supervision.video.backend.base import BaseBackend, BaseWriter
 from supervision.video.core import Video
 from supervision.video.utils import SourceType, VideoInfo
 
-__all__ = ["SourceType", "BaseBackend", "BaseWriter", "Video", "VideoInfo"]
+__all__ = ["SourceType", "Video", "VideoInfo"]
diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py
index 97065c8217..d4e3674ba7 100644
--- a/supervision/video/backend/__init__.py
+++ b/supervision/video/backend/__init__.py
@@ -1,36 +1,55 @@
 from __future__ import annotations
 
 from typing import Literal, Union
+from enum import Enum
 
-from supervision.video.backend.base import BaseBackend, BaseWriter
-from supervision.video.backend.openCV import OpenCVBackend, OpenCVWriter
-from supervision.video.backend.pyAV import pyAVBackend, pyAVWriter
+from supervision.video.backend.opencv import OpenCVBackend, OpenCVWriter
+from supervision.video.backend.pyav import pyAVBackend, pyAVWriter
 
-BackendLiteral = Literal["opencv", "pyav"]
 BackendTypes = Union[OpenCVBackend, pyAVBackend]
 WriterTypes = Union[OpenCVWriter, pyAVWriter]
 
-_backends = {
-    "opencv": OpenCVBackend,
-    "pyav": pyAVBackend,
+class Backend(Enum):
+    """
+    Enumeration of Backends.
+    """
+
+    PYAV = "pyav"
+    OPENCV = "opencv"
+
+    @classmethod  
+    def list(cls):  
+        return list(map(lambda c: c.value, cls))  
+
+    @classmethod  
+    def from_value(cls, value: Backend | str) -> Backend:  
+        if isinstance(value, cls):  
+            return value  
+        if isinstance(value, str):  
+            value = value.lower()  
+            try:  
+                return cls(value)  
+            except ValueError:  
+                raise ValueError(f"Invalid value: {value}. Must be one of {cls.list()}")  
+        raise ValueError(  
+            f"Invalid value type: {type(value)}. Must be an instance of "  
+            f"{cls.__name__} or str."  
+        )  
+
+BackendDict = {
+    Backend.PYAV: pyAVBackend,
+    Backend.OPENCV: OpenCVBackend,
 }
 
-
-def getBackend(backend: str) -> BaseBackend:
-    if backend.lower() in _backends:
-        return _backends[backend.lower()]
-    else:
-        raise ValueError(f"Unsupported backend: {backend}")
-
+WriterDict = {
+    Backend.PYAV: pyAVWriter,
+    Backend.OPENCV: OpenCVWriter,
+}
 
 __all__ = [
-    "BackendLiteral",
     "BackendType",
-    "BaseBackend",
-    "BaseWriter",
-    "OpenCVBackend",
-    "OpenCVWriter",
-    "getBackend",
-    "pyAVBackend",
-    "pyAVWriter",
+    "WriterType",
+    "Backend",
+    "BackendDict",
+    "WriterDict",
 ]
diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 806230ca63..7eb2e1899d 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -5,13 +5,14 @@
 import numpy as np
 
 from supervision.video.utils import VideoInfo
+from supervision.video.backend import BackendTypes, WriterTypes
 
 
 class BaseBackend(ABC):
     def __init__(self):
         self.cap = None
-        self.video_info = None
-        self.writer = None
+        self.video_info: VideoInfo | None = None
+        self.writer: WriterTypes | None = None
         self.path = None
 
     @abstractmethod
@@ -55,7 +56,7 @@ def __init__(
         fps: int,
         frame_size: tuple[int, int],
         codec: str | None = None,
-        backend: BaseBackend | None = None,
+        backend: BackendTypes | None = None,
         render_audio: bool = False,
     ):
         pass
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index 0b595d388b..8e531e0d79 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -3,7 +3,7 @@
 import cv2
 import numpy as np
 
-from supervision.video.backend import BaseBackend, BaseWriter
+from supervision.video.backend.base import BaseBackend, BaseWriter
 from supervision.video.utils import SourceType, VideoInfo
 
 
diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index bb4f745202..26a2278bb5 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -8,7 +8,7 @@
     av = None
 import numpy as np
 
-from supervision.video.backend import BaseBackend, BaseWriter
+from supervision.video.backend.base import BaseBackend, BaseWriter
 from supervision.video.utils import SourceType, VideoInfo
 
 
diff --git a/supervision/video/core.py b/supervision/video/core.py
index 714cf35334..f522cc61d1 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -7,10 +7,10 @@
 from tqdm.auto import tqdm
 
 from supervision.video.backend import (
-    BackendLiteral,
     BackendTypes,
-    BaseWriter,
-    getBackend,
+    Backend,
+    BackendDict,
+    WriterTypes
 )
 from supervision.video.utils import SourceType, VideoInfo
 
@@ -29,7 +29,7 @@ class Video:
     source: str | int
     backend: BackendTypes
 
-    def __init__(self, source: str | int, backend: BackendLiteral = "opencv") -> None:
+    def __init__(self, source: str | int, backend: Backend | str = Backend.OPENCV) -> None:
         """
         Initialize the Video object.
 
@@ -38,7 +38,10 @@ def __init__(self, source: str | int, backend: BackendLiteral = "opencv") -> Non
             backend (BackendLiteral, optional): Backend type for video I/O.
                 Defaults to "opencv".
         """
-        self.backend = getBackend(backend)()
+        self.backend = BackendDict.get(Backend.from_value(backend))
+        if self.backend is None:
+            raise ValueError(f"Unsupported backend: {backend}")
+        
         self.backend.open(source)
         self.info = self.backend.info()
         self.source = source
@@ -58,7 +61,7 @@ def sink(
         info: VideoInfo,
         codec: str | None = None,
         render_audio: bool = False,
-    ) -> BaseWriter:
+    ) -> WriterTypes:
         """
         Create a video writer for saving frames to a file.
 

From 4f5ac5865b4671e46c1e965bef1f33d6bce6f0aa Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Mon, 11 Aug 2025 19:35:07 -0400
Subject: [PATCH 091/128] FIX: Reverted video utils

---
 supervision/utils/video.py            | 27 +++++++++++----------------
 supervision/video/backend/__init__.py |  4 ++--
 supervision/video/core.py             |  2 +-
 3 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 261033eeb0..b7fbf87ccc 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -10,7 +10,6 @@
 from tqdm.auto import tqdm
 
 from supervision.utils.internal import deprecated
-from supervision.video.backend import BackendTypes
 
 
 @deprecated(  
@@ -50,21 +49,17 @@ class VideoInfo:
     total_frames: int | None = None
 
     @classmethod
-    def from_video_path(cls, backend: BackendTypes) -> VideoInfo:
-        if not backend.isOpened():
-            raise RuntimeError("Video not opened yet.")
-
-        width = backend.stream.width
-        height = backend.stream.height
-        fps = float(backend.stream.average_rate or backend.stream.guessed_rate)
-        if fps <= 0:
-            fps = 30
-
-        total_frames = backend.stream.frames
-        if total_frames == 0:
-            total_frames = None
-
-        return VideoInfo(width, height, round(fps), total_frames)
+    def from_video_path(cls, video_path: str) -> VideoInfo:
+        video = cv2.VideoCapture(video_path)
+        if not video.isOpened():
+            raise Exception(f"Could not open video at {video_path}")
+
+        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        fps = int(video.get(cv2.CAP_PROP_FPS))
+        total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+        video.release()
+        return VideoInfo(width, height, fps, total_frames)
 
     @property
     def resolution_wh(self) -> tuple[int, int]:
diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py
index d4e3674ba7..b32e9921d9 100644
--- a/supervision/video/backend/__init__.py
+++ b/supervision/video/backend/__init__.py
@@ -47,8 +47,8 @@ def from_value(cls, value: Backend | str) -> Backend:
 }
 
 __all__ = [
-    "BackendType",
-    "WriterType",
+    "BackendTypes",
+    "WriterTypes",
     "Backend",
     "BackendDict",
     "WriterDict",
diff --git a/supervision/video/core.py b/supervision/video/core.py
index f522cc61d1..0b92463f20 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -7,8 +7,8 @@
 from tqdm.auto import tqdm
 
 from supervision.video.backend import (
-    BackendTypes,
     Backend,
+    BackendTypes,
     BackendDict,
     WriterTypes
 )

From 29a10458d7a80f81c237fd011466181e013641d3 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Mon, 11 Aug 2025 19:44:15 -0400
Subject: [PATCH 092/128] FIX: Fixed backend bug issues

---
 supervision/video/backend/base.py | 5 ++---
 supervision/video/core.py         | 3 +++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 7eb2e1899d..107276d16c 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -5,14 +5,13 @@
 import numpy as np
 
 from supervision.video.utils import VideoInfo
-from supervision.video.backend import BackendTypes, WriterTypes
 
 
 class BaseBackend(ABC):
     def __init__(self):
         self.cap = None
         self.video_info: VideoInfo | None = None
-        self.writer: WriterTypes | None = None
+        self.writer: BaseWriter | None = None
         self.path = None
 
     @abstractmethod
@@ -56,7 +55,7 @@ def __init__(
         fps: int,
         frame_size: tuple[int, int],
         codec: str | None = None,
-        backend: BackendTypes | None = None,
+        backend: BaseBackend | None = None,
         render_audio: bool = False,
     ):
         pass
diff --git a/supervision/video/core.py b/supervision/video/core.py
index 0b92463f20..2aeaca6b4b 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -42,6 +42,9 @@ def __init__(self, source: str | int, backend: Backend | str = Backend.OPENCV) -
         if self.backend is None:
             raise ValueError(f"Unsupported backend: {backend}")
         
+        # Instantiate the backend class once sanity check is done
+        self.backend = self.backend()
+
         self.backend.open(source)
         self.info = self.backend.info()
         self.source = source

From 7e401ca2bdab6152c107b5bffc02eba231c686cd Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Mon, 11 Aug 2025 19:50:56 -0400
Subject: [PATCH 093/128] UPDATE: Updated render_audio param default value
 config

---
 supervision/video/backend/base.py   | 2 +-
 supervision/video/backend/openCV.py | 6 +++---
 supervision/video/backend/pyAV.py   | 5 ++++-
 supervision/video/core.py           | 4 ++--
 4 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index 107276d16c..eda2f7d7f9 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -56,7 +56,7 @@ def __init__(
         frame_size: tuple[int, int],
         codec: str | None = None,
         backend: BaseBackend | None = None,
-        render_audio: bool = False,
+        render_audio: bool | None = None,
     ):
         pass
 
diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/openCV.py
index 8e531e0d79..8ec8ee80bc 100644
--- a/supervision/video/backend/openCV.py
+++ b/supervision/video/backend/openCV.py
@@ -161,7 +161,7 @@ def __init__(
         frame_size: tuple[int, int],
         codec: str = "mp4v",
         backend: OpenCVBackend | None = None,
-        render_audio: bool = False,
+        render_audio: bool | None = None,
     ):
         """
         Initialize the writer.
@@ -176,10 +176,10 @@ def __init__(
         Raises:
             RuntimeError: If the writer cannot be opened.
         """
-        if render_audio:
+        if render_audio or render_audio == False:
             raise ValueError(
                 "OpenCV backend does not support audio. "
-                "Please use `pyav` backend instead or set `render_audio=False`"
+                "Please use `pyav` backend instead or set `render_audio=None`"
             )
 
         self.backend = backend
diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index 26a2278bb5..83e14e8e11 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -247,7 +247,7 @@ def __init__(
         frame_size: tuple[int, int],
         codec: str = "h264",
         backend: pyAVBackend | None = None,
-        render_audio: bool = False,
+        render_audio: bool | None = None,
     ):
         """
         Initialize the video writer.
@@ -266,6 +266,9 @@ def __init__(
             self.container = av.open(filename, mode="w")
             self.backend = backend
 
+            if render_audio is None:
+                render_audio = True
+
             if codec is None:
                 codec = "h264"
             self.stream = self.container.add_stream(codec, rate=fps)
diff --git a/supervision/video/core.py b/supervision/video/core.py
index 2aeaca6b4b..0bea072a19 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -63,7 +63,7 @@ def sink(
         target_path: str,
         info: VideoInfo,
         codec: str | None = None,
-        render_audio: bool = False,
+        render_audio: bool | None = None,
     ) -> WriterTypes:
         """
         Create a video writer for saving frames to a file.
@@ -145,7 +145,7 @@ def save(
         progress_message: str = "Processing video",
         show_progress: bool = False,
         codec: str | None = None,
-        render_audio: bool = False,
+        render_audio: bool | None = None,
     ):
         """
         Process and save video frames to a file.

From 232ace57873c4d1de9741c9526dd0d1a62bd2bd3 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Mon, 11 Aug 2025 20:19:42 -0400
Subject: [PATCH 094/128] BUG: Fixed pyav not resolving webcam source path

---
 supervision/video/backend/base.py |  2 +-
 supervision/video/backend/pyAV.py | 49 +++++++++++++++++++++++--------
 supervision/video/core.py         |  2 +-
 3 files changed, 39 insertions(+), 14 deletions(-)

diff --git a/supervision/video/backend/base.py b/supervision/video/backend/base.py
index eda2f7d7f9..5ba634708a 100644
--- a/supervision/video/backend/base.py
+++ b/supervision/video/backend/base.py
@@ -15,7 +15,7 @@ def __init__(self):
         self.path = None
 
     @abstractmethod
-    def open(self, path: str) -> None:
+    def open(self, path: str | int) -> None:
         pass
 
     @abstractmethod
diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index 83e14e8e11..3a33dfe166 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -1,6 +1,8 @@
 from __future__ import annotations
 
 from fractions import Fraction
+import platform
+import re
 
 try:
     import av
@@ -36,7 +38,7 @@ def __init__(self):
         self.video_info = None
         self.current_frame_idx = 0
 
-    def open(self, path: str) -> None:
+    def open(self, path: str | int) -> None:
         """
         Open and initialize a video source.
 
@@ -50,8 +52,40 @@ def open(self, path: str) -> None:
             RuntimeError: If the video source cannot be opened.
             ValueError: If the source type is unsupported.
         """
+        _source_type = None
+        _format = None
+        
+        def is_webcam_path(path: str) -> tuple[bool, str]:
+            if not isinstance(path, str):
+                return False
+            
+            system = platform.system()
+            path_lower = path.lower()
+
+            if system == "Windows":
+                return path_lower.startswith("video="), "dshow"
+            elif system == "Linux":
+                return bool(re.match(r"^/dev/video\d+$", path_lower)), "v4l2"
+            elif system == "Darwin":
+                return path_lower.isdigit(), "avfoundation"
+            else:
+                return False
+        
+        isWebcam, ffmpeg_os_format = is_webcam_path(path=path)
+        if isWebcam:
+            _source_type = SourceType.WEBCAM
+            _format = ffmpeg_os_format
+        elif isinstance(path, str):
+            _source_type = (
+                SourceType.RTSP
+                if path.lower().startswith("rtsp://")
+                else SourceType.VIDEO_FILE
+            )
+        else:
+            raise ValueError("Unsupported source type")
+        
         try:
-            self.container = av.open(path)
+            self.container = av.open(path, format=_format)
             self.audio_src_container = self.container
             self.stream = self.container.streams.video[0]
             self.stream.thread_type = "AUTO"
@@ -67,16 +101,7 @@ def open(self, path: str) -> None:
             else:
                 self.audio_stream = None
 
-            if isinstance(path, int):
-                self.video_info.SourceType = SourceType.WEBCAM
-            elif isinstance(path, str):
-                self.video_info.SourceType = (
-                    SourceType.RTSP
-                    if path.lower().startswith("rtsp://")
-                    else SourceType.VIDEO_FILE
-                )
-            else:
-                raise ValueError("Unsupported source type")
+            self.video_info.SourceType = _source_type
 
         except Exception as e:
             raise RuntimeError(f"Cannot open video source: {path}") from e
diff --git a/supervision/video/core.py b/supervision/video/core.py
index 0bea072a19..800591fc90 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -109,7 +109,7 @@ def frames(
         total_frames = (
             self.backend.video_info.total_frames if self.backend.video_info else 0
         )
-        is_live_stream = total_frames <= 0
+        is_live_stream = total_frames is None or total_frames <= 0
 
         if is_live_stream:
             while True:

From 84f90ea8882d2b32232cab826215d8d29475861c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 12 Aug 2025 00:20:10 +0000
Subject: [PATCH 095/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/utils/video.py            | 32 +++++++++----------
 supervision/video/backend/__init__.py | 44 ++++++++++++++-------------
 supervision/video/backend/pyAV.py     | 10 +++---
 supervision/video/core.py             | 13 +++-----
 supervision/video/utils.py            | 38 +++++++++++------------
 5 files changed, 67 insertions(+), 70 deletions(-)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index b7fbf87ccc..d3408b90e9 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -12,10 +12,10 @@
 from supervision.utils.internal import deprecated
 
 
-@deprecated(  
-    "`process_video` is deprecated and will be removed in "  
-    "`supervision-0.32.0`. Use `sv.VideoInfo` instead."  
-) 
+@deprecated(
+    "`process_video` is deprecated and will be removed in "
+    "`supervision-0.32.0`. Use `sv.VideoInfo` instead."
+)
 @dataclass
 class VideoInfo:
     """
@@ -66,10 +66,10 @@ def resolution_wh(self) -> tuple[int, int]:
         return self.width, self.height
 
 
-@deprecated(  
-    "`process_video` is deprecated and will be removed in "  
-    "`supervision-0.32.0`. Use `sv.Video().save` instead."  
-) 
+@deprecated(
+    "`process_video` is deprecated and will be removed in "
+    "`supervision-0.32.0`. Use `sv.Video().save` instead."
+)
 class VideoSink:
     """
     Context manager that saves video frames to a file using OpenCV.
@@ -151,10 +151,10 @@ def _validate_and_setup_video(
     return video, start, end
 
 
-@deprecated(  
-    "`process_video` is deprecated and will be removed in "  
-    "`supervision-0.32.0`. Use `sv.Video().frame()` or `sv.Video()` instead."  
-) 
+@deprecated(
+    "`process_video` is deprecated and will be removed in "
+    "`supervision-0.32.0`. Use `sv.Video().frame()` or `sv.Video()` instead."
+)
 def get_video_frames_generator(
     source_path: str,
     stride: int = 1,
@@ -206,10 +206,10 @@ def get_video_frames_generator(
     video.release()
 
 
-@deprecated(  
-    "`process_video` is deprecated and will be removed in "  
-    "`supervision-0.32.0`. Use `sv.Video().save` instead."  
-)  
+@deprecated(
+    "`process_video` is deprecated and will be removed in "
+    "`supervision-0.32.0`. Use `sv.Video().save` instead."
+)
 def process_video(
     source_path: str,
     target_path: str,
diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py
index b32e9921d9..0ec704dbc1 100644
--- a/supervision/video/backend/__init__.py
+++ b/supervision/video/backend/__init__.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
-from typing import Literal, Union
 from enum import Enum
+from typing import Literal, Union
 
 from supervision.video.backend.opencv import OpenCVBackend, OpenCVWriter
 from supervision.video.backend.pyav import pyAVBackend, pyAVWriter
@@ -9,6 +9,7 @@
 BackendTypes = Union[OpenCVBackend, pyAVBackend]
 WriterTypes = Union[OpenCVWriter, pyAVWriter]
 
+
 class Backend(Enum):
     """
     Enumeration of Backends.
@@ -17,24 +18,25 @@ class Backend(Enum):
     PYAV = "pyav"
     OPENCV = "opencv"
 
-    @classmethod  
-    def list(cls):  
-        return list(map(lambda c: c.value, cls))  
-
-    @classmethod  
-    def from_value(cls, value: Backend | str) -> Backend:  
-        if isinstance(value, cls):  
-            return value  
-        if isinstance(value, str):  
-            value = value.lower()  
-            try:  
-                return cls(value)  
-            except ValueError:  
-                raise ValueError(f"Invalid value: {value}. Must be one of {cls.list()}")  
-        raise ValueError(  
-            f"Invalid value type: {type(value)}. Must be an instance of "  
-            f"{cls.__name__} or str."  
-        )  
+    @classmethod
+    def list(cls):
+        return list(map(lambda c: c.value, cls))
+
+    @classmethod
+    def from_value(cls, value: Backend | str) -> Backend:
+        if isinstance(value, cls):
+            return value
+        if isinstance(value, str):
+            value = value.lower()
+            try:
+                return cls(value)
+            except ValueError:
+                raise ValueError(f"Invalid value: {value}. Must be one of {cls.list()}")
+        raise ValueError(
+            f"Invalid value type: {type(value)}. Must be an instance of "
+            f"{cls.__name__} or str."
+        )
+
 
 BackendDict = {
     Backend.PYAV: pyAVBackend,
@@ -47,9 +49,9 @@ def from_value(cls, value: Backend | str) -> Backend:
 }
 
 __all__ = [
-    "BackendTypes",
-    "WriterTypes",
     "Backend",
     "BackendDict",
+    "BackendTypes",
     "WriterDict",
+    "WriterTypes",
 ]
diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyAV.py
index 3a33dfe166..26c47dedbd 100644
--- a/supervision/video/backend/pyAV.py
+++ b/supervision/video/backend/pyAV.py
@@ -1,8 +1,8 @@
 from __future__ import annotations
 
-from fractions import Fraction
 import platform
 import re
+from fractions import Fraction
 
 try:
     import av
@@ -54,11 +54,11 @@ def open(self, path: str | int) -> None:
         """
         _source_type = None
         _format = None
-        
+
         def is_webcam_path(path: str) -> tuple[bool, str]:
             if not isinstance(path, str):
                 return False
-            
+
             system = platform.system()
             path_lower = path.lower()
 
@@ -70,7 +70,7 @@ def is_webcam_path(path: str) -> tuple[bool, str]:
                 return path_lower.isdigit(), "avfoundation"
             else:
                 return False
-        
+
         isWebcam, ffmpeg_os_format = is_webcam_path(path=path)
         if isWebcam:
             _source_type = SourceType.WEBCAM
@@ -83,7 +83,7 @@ def is_webcam_path(path: str) -> tuple[bool, str]:
             )
         else:
             raise ValueError("Unsupported source type")
-        
+
         try:
             self.container = av.open(path, format=_format)
             self.audio_src_container = self.container
diff --git a/supervision/video/core.py b/supervision/video/core.py
index 800591fc90..e366e5be4e 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -6,12 +6,7 @@
 import numpy as np
 from tqdm.auto import tqdm
 
-from supervision.video.backend import (
-    Backend,
-    BackendTypes,
-    BackendDict,
-    WriterTypes
-)
+from supervision.video.backend import Backend, BackendDict, BackendTypes, WriterTypes
 from supervision.video.utils import SourceType, VideoInfo
 
 
@@ -29,7 +24,9 @@ class Video:
     source: str | int
     backend: BackendTypes
 
-    def __init__(self, source: str | int, backend: Backend | str = Backend.OPENCV) -> None:
+    def __init__(
+        self, source: str | int, backend: Backend | str = Backend.OPENCV
+    ) -> None:
         """
         Initialize the Video object.
 
@@ -41,7 +38,7 @@ def __init__(self, source: str | int, backend: Backend | str = Backend.OPENCV) -
         self.backend = BackendDict.get(Backend.from_value(backend))
         if self.backend is None:
             raise ValueError(f"Unsupported backend: {backend}")
-        
+
         # Instantiate the backend class once sanity check is done
         self.backend = self.backend()
 
diff --git a/supervision/video/utils.py b/supervision/video/utils.py
index ae4125d085..9b90c8dec8 100644
--- a/supervision/video/utils.py
+++ b/supervision/video/utils.py
@@ -3,8 +3,6 @@
 from dataclasses import dataclass
 from enum import Enum
 
-import cv2
-
 
 class SourceType(Enum):
     """
@@ -20,24 +18,24 @@ class SourceType(Enum):
     WEBCAM = "webcam"
     RTSP = "rtsp"
 
-    @classmethod  
-    def list(cls):  
-        return list(map(lambda c: c.value, cls))  
-
-    @classmethod  
-    def from_value(cls, value: SourceType | str) -> SourceType:  
-        if isinstance(value, cls):  
-            return value  
-        if isinstance(value, str):  
-            value = value.lower()  
-            try:  
-                return cls(value)  
-            except ValueError:  
-                raise ValueError(f"Invalid value: {value}. Must be one of {cls.list()}")  
-        raise ValueError(  
-            f"Invalid value type: {type(value)}. Must be an instance of "  
-            f"{cls.__name__} or str."  
-        )  
+    @classmethod
+    def list(cls):
+        return list(map(lambda c: c.value, cls))
+
+    @classmethod
+    def from_value(cls, value: SourceType | str) -> SourceType:
+        if isinstance(value, cls):
+            return value
+        if isinstance(value, str):
+            value = value.lower()
+            try:
+                return cls(value)
+            except ValueError:
+                raise ValueError(f"Invalid value: {value}. Must be one of {cls.list()}")
+        raise ValueError(
+            f"Invalid value type: {type(value)}. Must be an instance of "
+            f"{cls.__name__} or str."
+        )
 
 
 @dataclass

From f6cea28f1f74b6ff438696b79f3a539518183287 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Mon, 11 Aug 2025 20:45:19 -0400
Subject: [PATCH 096/128] UPDATE: Updated file names

---
 supervision/video/backend/{openCV.py => opencv.py} | 0
 supervision/video/backend/{pyAV.py => pyav.py}     | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename supervision/video/backend/{openCV.py => opencv.py} (100%)
 rename supervision/video/backend/{pyAV.py => pyav.py} (100%)

diff --git a/supervision/video/backend/openCV.py b/supervision/video/backend/opencv.py
similarity index 100%
rename from supervision/video/backend/openCV.py
rename to supervision/video/backend/opencv.py
diff --git a/supervision/video/backend/pyAV.py b/supervision/video/backend/pyav.py
similarity index 100%
rename from supervision/video/backend/pyAV.py
rename to supervision/video/backend/pyav.py

From 048ebc0cb253008f67850d19694e2d1f574e23b5 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Mon, 11 Aug 2025 20:47:33 -0400
Subject: [PATCH 097/128] Precommit error fix

---
 supervision/video/backend/opencv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/supervision/video/backend/opencv.py b/supervision/video/backend/opencv.py
index 8ec8ee80bc..3011925b62 100644
--- a/supervision/video/backend/opencv.py
+++ b/supervision/video/backend/opencv.py
@@ -176,7 +176,7 @@ def __init__(
         Raises:
             RuntimeError: If the writer cannot be opened.
         """
-        if render_audio or render_audio == False:
+        if render_audio or not render_audio:
             raise ValueError(
                 "OpenCV backend does not support audio. "
                 "Please use `pyav` backend instead or set `render_audio=None`"

From de62eb8f4d70bf4c0e4290f19ebcb7c1c6d726ab Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Mon, 11 Aug 2025 20:50:29 -0400
Subject: [PATCH 098/128] FIX: render_audio opencv param fix

---
 supervision/video/backend/opencv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/supervision/video/backend/opencv.py b/supervision/video/backend/opencv.py
index 3011925b62..2b5527f413 100644
--- a/supervision/video/backend/opencv.py
+++ b/supervision/video/backend/opencv.py
@@ -176,7 +176,7 @@ def __init__(
         Raises:
             RuntimeError: If the writer cannot be opened.
         """
-        if render_audio or not render_audio:
+        if render_audio or render_audio is False:
             raise ValueError(
                 "OpenCV backend does not support audio. "
                 "Please use `pyav` backend instead or set `render_audio=None`"

From afea7c045056773011f5eabee85d13ce614cb4f1 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Mon, 11 Aug 2025 21:06:22 -0400
Subject: [PATCH 099/128] UPDATE: Updated docstrings

---
 supervision/video/backend/__init__.py |   5 ++
 supervision/video/backend/opencv.py   |  94 +++++++++++++--------
 supervision/video/backend/pyav.py     | 113 +++++++++++++++++---------
 supervision/video/core.py             |  35 +++++---
 supervision/video/utils.py            |  61 ++++++++++----
 5 files changed, 206 insertions(+), 102 deletions(-)

diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py
index 0ec704dbc1..7fa432ad25 100644
--- a/supervision/video/backend/__init__.py
+++ b/supervision/video/backend/__init__.py
@@ -13,6 +13,11 @@
 class Backend(Enum):
     """
     Enumeration of Backends.
+    
+    Attributes:
+        PYAV (str): PyAV backend (powered by FFmpeg, supports audio rendering)
+        OPENCV (str): OpenCV backend
+
     """
 
     PYAV = "pyav"
diff --git a/supervision/video/backend/opencv.py b/supervision/video/backend/opencv.py
index 2b5527f413..69d87b37f1 100644
--- a/supervision/video/backend/opencv.py
+++ b/supervision/video/backend/opencv.py
@@ -9,14 +9,24 @@
 
 class OpenCVBackend(BaseBackend):
     """
-    OpenCV-based implementation of the video backend interface.
+    OpenCV-based video backend implementation for video capture and processing.
+
+    This backend provides video reading capabilities using OpenCV's VideoCapture.
+    It supports:
+    - Local video files
+    - Webcam streams
+    - RTSP network streams
+
+    Attributes:
+        cap (cv2.VideoCapture): OpenCV video capture instance.
+        video_info (VideoInfo): Metadata about the video source.
+        writer (class): Reference to the OpenCVWriter class for video writing.
+        path (str | int): Path to the video source or webcam index.
 
-    Provides methods for opening video sources, reading frames, seeking,
-    grabbing, and retrieving metadata using OpenCV.
     """
 
     def __init__(self):
-        """Initialize with no active capture, writer, or path."""
+        """Initialize the OpenCV backend with no active capture."""
         self.cap = None
         self.video_info = None
         self.writer = OpenCVWriter
@@ -24,10 +34,11 @@ def __init__(self):
 
     def open(self, path: str | int) -> None:
         """
-        Open a video source and initialize capture.
+        Open a video source for reading.
 
         Args:
-            path (str | int): Path to a video file, RTSP URL, or webcam index.
+            path (str | int): Path to video file, RTSP URL, or webcam index.
+                Webcam indices are typically 0 for default camera.
 
         Raises:
             RuntimeError: If the source cannot be opened.
@@ -54,19 +65,23 @@ def open(self, path: str | int) -> None:
 
     def isOpened(self) -> bool:
         """
-        Check if the video source is currently open.
+        Check if the video source is currently open and available.
 
         Returns:
-            bool: True if the source is open, False otherwise.
+            bool: True if source is open and ready for reading, False otherwise.
         """
         return self.cap.isOpened()
 
     def _set_video_info(self) -> VideoInfo:
         """
-        Extract and store video metadata from the open capture.
+        Extract and store video metadata from the opened source.
 
         Returns:
-            VideoInfo: Video properties such as width, height, FPS, and frame count.
+            VideoInfo: Object containing:
+                - width (int): Frame width in pixels
+                - height (int): Frame height in pixels
+                - fps (int): Frames per second
+                - total_frames (int): Total frame count (0 for streams)
 
         Raises:
             RuntimeError: If no source is open.
@@ -83,10 +98,10 @@ def _set_video_info(self) -> VideoInfo:
 
     def info(self) -> VideoInfo:
         """
-        Get the stored video metadata.
+        Retrieve stored video metadata.
 
         Returns:
-            VideoInfo: Metadata for the open source.
+            VideoInfo: Video properties including dimensions, FPS, and frame count.
 
         Raises:
             RuntimeError: If no source is open.
@@ -97,12 +112,12 @@ def info(self) -> VideoInfo:
 
     def read(self) -> tuple[bool, np.ndarray]:
         """
-        Read the next frame from the source.
+        Read and decode the next frame from the video source.
 
         Returns:
             tuple[bool, np.ndarray]:
-                - bool: True if a frame was read successfully.
-                - np.ndarray: The frame in BGR format.
+                - bool: True if frame was read successfully, False at end of stream
+                - np.ndarray: Frame data in BGR format (height, width, 3)
 
         Raises:
             RuntimeError: If no source is open.
@@ -113,10 +128,12 @@ def read(self) -> tuple[bool, np.ndarray]:
 
     def grab(self) -> bool:
         """
-        Grab the next frame without decoding.
+        Advance to the next frame without decoding.
+
+        Useful for quickly skipping frames when pixel data isn't needed.
 
         Returns:
-            bool: True if the frame pointer advanced successfully.
+            bool: True if frame was advanced successfully, False otherwise
 
         Raises:
             RuntimeError: If no source is open.
@@ -127,10 +144,12 @@ def grab(self) -> bool:
 
     def seek(self, frame_idx: int) -> None:
         """
-        Jump to a specific frame.
+        Seek to a specific frame index.
+
+        Note: Seeking may be imprecise with compressed video formats.
 
         Args:
-            frame_idx (int): Zero-based frame index to seek to.
+            frame_idx (int): Zero-based index of target frame.
 
         Raises:
             RuntimeError: If no source is open.
@@ -140,7 +159,7 @@ def seek(self, frame_idx: int) -> None:
         self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
 
     def release(self) -> None:
-        """Release capture resources."""
+        """Release the video capture resources."""
         if self.cap is not None and self.cap.isOpened():
             self.cap.release()
             self.cap = None
@@ -148,10 +167,10 @@ def release(self) -> None:
 
 class OpenCVWriter(BaseWriter):
     """
-    Video writer implementation using OpenCV's VideoWriter.
+    OpenCV-based video writer for creating video files.
 
-    Supports configurable codecs, frame sizes, and FPS, with a fallback
-    to "mp4v" if the specified codec fails.
+    This writer provides basic video encoding capabilities using OpenCV's VideoWriter.
+    Note: Does not support audio writing - use pyAVWriter for audio support.
     """
 
     def __init__(
@@ -164,17 +183,22 @@ def __init__(
         render_audio: bool | None = None,
     ):
         """
-        Initialize the writer.
+        Initialize the video writer.
 
         Args:
-            filename (str): Output video file path.
-            fps (int): Output frames per second.
-            frame_size (tuple[int, int]): Frame dimensions (width, height).
-            codec (str, optional): FourCC codec code. Defaults to "mp4v".
-            backend (OpenCVBackend | None, optional): Backend instance. Defaults to None
+            filename (str): Output video file path (e.g., "output.mp4").
+            fps (int): Target frames per second for output video.
+            frame_size (tuple[int, int]): (width, height) of output frames.
+            codec (str, optional): FourCC codec code (default "mp4v").
+            backend (OpenCVBackend, optional): Unused (for API compatibility).
+            render_audio (bool, optional): Must be None (OpenCV doesn't support audio).
 
         Raises:
-            RuntimeError: If the writer cannot be opened.
+            ValueError: If render_audio is specified (not supported).
+            RuntimeError: If writer cannot be initialized.
+
+        Note:
+            Falls back to "mp4v" codec if specified codec fails.
         """
         if render_audio or render_audio is False:
             raise ValueError(
@@ -194,20 +218,22 @@ def __init__(
             raise RuntimeError(f"Cannot open video writer for file: {filename}")
 
     def __enter__(self):
+        """Enable context manager support (with statement)."""
         return self
 
     def __exit__(self, exc_type, exc_value, traceback):
+        """Ensure proper cleanup when exiting context."""
         self.close()
 
     def write(self, frame: np.ndarray) -> None:
         """
-        Write a frame to the output.
+        Write a single frame to the output video.
 
         Args:
-            frame (np.ndarray): Frame in BGR format.
+            frame (np.ndarray): Frame data in BGR format (height, width, 3).
         """
         self.writer.write(frame)
 
     def close(self) -> None:
-        """Release writer resources."""
-        self.writer.release()
+        """Finalize and close the output video file."""
+        self.writer.release()
\ No newline at end of file
diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py
index 26c47dedbd..888038075f 100644
--- a/supervision/video/backend/pyav.py
+++ b/supervision/video/backend/pyav.py
@@ -16,14 +16,22 @@
 
 class pyAVBackend(BaseBackend):
     """
-    PyAV-based implementation of the `BaseBackend` interface.
+    PyAV-based implementation of the `BaseBackend` interface for video processing.
 
-    This backend handles video capture, frame reading, seeking, and writing
-    operations using the PyAV library. Supports local video files, webcams,
-    and RTSP streams.
+    This backend provides video capture and frame reading capabilities using the PyAV
+    library, which is a Pythonic binding for FFmpeg. It supports:
+    - Local video files
+    - Webcam streams (platform-specific)
+    - RTSP network streams
     """
 
     def __init__(self):
+        """
+        Initialize the pyAVBackend instance.
+
+        Raises:
+            RuntimeError: If PyAV (`av` module) is not installed.
+        """
         super().__init__()
 
         if av is None:
@@ -46,7 +54,7 @@ def open(self, path: str | int) -> None:
         the necessary components for decoding and reading frames.
 
         Args:
-            path (str | int): Path to the video file, RTSP URL, or webcam index.
+            path (str | int): Path to the video file, RTSP URL, or webcam path.
 
         Raises:
             RuntimeError: If the video source cannot be opened.
@@ -56,8 +64,17 @@ def open(self, path: str | int) -> None:
         _format = None
 
         def is_webcam_path(path: str) -> tuple[bool, str]:
+            """
+            Determine if the path refers to a webcam and get platform-specific format.
+
+            Args:
+                path (str): The path to check.
+
+            Returns:
+                tuple[bool, str]: (True if webcam, FFmpeg format string)
+            """
             if not isinstance(path, str):
-                return False
+                return False, None
 
             system = platform.system()
             path_lower = path.lower()
@@ -69,7 +86,7 @@ def is_webcam_path(path: str) -> tuple[bool, str]:
             elif system == "Darwin":
                 return path_lower.isdigit(), "avfoundation"
             else:
-                return False
+                return False, None
 
         isWebcam, ffmpeg_os_format = is_webcam_path(path=path)
         if isWebcam:
@@ -107,15 +124,24 @@ def is_webcam_path(path: str) -> tuple[bool, str]:
             raise RuntimeError(f"Cannot open video source: {path}") from e
 
     def isOpened(self) -> bool:
-        """Check if the video source has been successfully opened."""
+        """
+        Check if the video source has been successfully opened.
+
+        Returns:
+            bool: True if video source is opened and ready, False otherwise.
+        """
         return self.container is not None and self.stream is not None
 
     def _set_video_info(self) -> VideoInfo:
         """
-        Extract video information from the opened source.
+        Extract and calculate video information from the opened source.
 
         Returns:
-            VideoInfo: Object containing width, height, fps, and frame count.
+            VideoInfo: Object containing:
+                - width (int): Frame width in pixels
+                - height (int): Frame height in pixels
+                - fps (int): Frames per second (estimated if not available)
+                - total_frames (int | None): Total frame count if available
 
         Raises:
             RuntimeError: If the video source is not opened.
@@ -127,20 +153,20 @@ def _set_video_info(self) -> VideoInfo:
         height = self.stream.height
         fps = float(self.stream.average_rate or self.stream.guessed_rate)
         if fps <= 0:
-            fps = 30
+            fps = 30  # Default FPS if cannot be determined
 
         total_frames = self.stream.frames
         if total_frames == 0:
-            total_frames = None
+            total_frames = None  # Unknown frame count
 
         return VideoInfo(width, height, round(fps), total_frames)
 
     def info(self) -> VideoInfo:
         """
-        Retrieve video information.
+        Retrieve video information for the opened source.
 
         Returns:
-            VideoInfo: Video properties for the opened source.
+            VideoInfo: Video properties including dimensions, FPS, and frame count.
 
         Raises:
             RuntimeError: If the video source is not opened.
@@ -155,8 +181,8 @@ def read(self) -> tuple[bool, np.ndarray]:
 
         Returns:
             tuple[bool, np.ndarray]:
-                - `bool`: True if a frame was read successfully, False if end of stream.
-                - `np.ndarray`: Frame data in BGR format (H, W, 3).
+                - bool: True if frame was read successfully, False at end of stream
+                - np.ndarray: Frame data in BGR format with shape (height, width, 3)
 
         Raises:
             RuntimeError: If the video source is not opened.
@@ -174,12 +200,12 @@ def read(self) -> tuple[bool, np.ndarray]:
 
     def grab(self) -> bool:
         """
-        Grab the next frame packet without decoding it.
+        Advance to the next frame packet without decoding it.
 
-        Useful for skipping frames quickly without the overhead of decoding.
+        This is useful for quickly skipping frames when decoding isn't needed.
 
         Returns:
-            bool: True if a frame packet was grabbed successfully, False otherwise.
+            bool: True if frame packet was advanced, False at end of stream
 
         Raises:
             RuntimeError: If the video source is not opened.
@@ -199,8 +225,9 @@ def seek(self, frame_idx: int) -> None:
         """
         Seek to a specific frame index in the video.
 
-        This uses keyframe-based seeking, then decodes forward to the exact
-        requested frame.
+        Uses keyframe-based seeking followed by sequential decoding to reach
+        the exact requested frame. This is more efficient than sequential seeking
+        but may be slower for very large jumps.
 
         Args:
             frame_idx (int): Zero-based index of the target frame.
@@ -249,6 +276,9 @@ def _prepend_frame(first_frame, gen):
     def release(self) -> None:
         """
         Release the video source and free all associated resources.
+
+        This closes the video container and resets all internal state.
+        Should be called when finished with the video source.
         """
         if self.container:
             self.container.close()
@@ -259,10 +289,14 @@ def release(self) -> None:
 
 class pyAVWriter(BaseWriter):
     """
-    PyAV-based video writer.
+    PyAV-based video writer for creating video files with optional audio.
 
-    Writes frames to a video file with optional audio from a backend source.
-    Uses finer timestamp granularity (milliseconds) for smoother video playback.
+    This writer provides high-quality video encoding with precise frame timing
+    (millisecond accuracy) and supports audio muxing from a source video.
+
+    Methods:
+        write(frame): Write a video frame.
+        close(): Finalize and close the video file.
     """
 
     def __init__(
@@ -279,10 +313,11 @@ def __init__(
 
         Args:
             filename (str): Path to the output video file.
-            fps (int): Frames per second for the output video.
-            frame_size (tuple[int, int]): Width and height of the video frames.
+            fps (int): Target frames per second for the output video.
+            frame_size (tuple[int, int]): (width, height) of output frames.
             codec (str, optional): Video codec name (default "h264").
-            backend (pyAVBackend, optional): Backend providing audio stream.
+            backend (pyAVBackend, optional): Source backend for audio muxing.
+            render_audio (bool, optional): Whether to include audio (default True if available).
 
         Raises:
             RuntimeError: If the output file cannot be created.
@@ -331,19 +366,19 @@ def __init__(
             raise RuntimeError(f"Cannot open video writer for file: {filename}") from e
 
     def __enter__(self):
-        """Enable use as a context manager."""
+        """Enable use as a context manager (with statement)."""
         return self
 
     def __exit__(self, exc_type, exc_value, traceback):
-        """Close the writer on context exit."""
+        """Ensure proper cleanup when exiting context."""
         self.close()
 
     def write(self, frame: np.ndarray) -> None:
         """
-        Write a single video frame.
+        Write a single video frame to the output file.
 
         Args:
-            frame (np.ndarray): Frame data in BGR format (H, W, 3).
+            frame (np.ndarray): Frame data in BGR format (height, width, 3).
         """
         # Calculate PTS as milliseconds: frame_index * (1000 ms / fps)
         pts = int(self.frame_idx * (1000 / self.fps))
@@ -364,25 +399,26 @@ def close(self) -> None:
         Finalize the video file, mux audio with adjusted timestamps to sync with video,
         and close the container.
         """
-
         def rescale_timestamp(value, src_tb, dst_tb):
             """
-            Rescale timestamp value from source timebase to destination timebase.
+            Rescale timestamp between timebases.
 
             Args:
-                value (int): Timestamp value (PTS or DTS).
-                src_tb (Fraction): Source time base.
-                dst_tb (Fraction): Destination time base.
+                value (int): Original timestamp value
+                src_tb (Fraction): Source timebase
+                dst_tb (Fraction): Destination timebase
 
             Returns:
-                int: Rescaled timestamp.
+                int: Rescaled timestamp
             """
             return int(value * src_tb / dst_tb)
 
+        # Flush any remaining video packets
         packets = self.stream.encode()
         for packet in packets:
             self.container.mux(packet)
 
+        # Calculate audio speed adjustment factor if needed
         speed_factor = 1.0
 
         try:
@@ -411,6 +447,7 @@ def rescale_timestamp(value, src_tb, dst_tb):
         except Exception:
             speed_factor = 1.0
 
+        # Process and mux audio packets with timestamp adjustments
         if self.audio_stream_out and speed_factor != 1.0:
             for packet in self.audio_packets:
                 if packet.pts is not None:
@@ -438,4 +475,4 @@ def rescale_timestamp(value, src_tb, dst_tb):
                 packet.stream = self.audio_stream_out
                 self.container.mux(packet)
 
-        self.container.close()
+        self.container.close()
\ No newline at end of file
diff --git a/supervision/video/core.py b/supervision/video/core.py
index e366e5be4e..13647263c4 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -15,7 +15,7 @@ class Video:
     A high-level interface for reading, processing, and writing video files or streams.
 
     Attributes:
-        info (VideoInfo): Metadata about the video.
+        info (VideoInfo): Metadata about the opened video (e.g., FPS, resolution, duration).
         source (str | int): Path to the video file or index of the camera device.
         backend (BackendTypes): Video backend used for I/O operations.
     """
@@ -28,12 +28,15 @@ def __init__(
         self, source: str | int, backend: Backend | str = Backend.OPENCV
     ) -> None:
         """
-        Initialize the Video object.
+        Initialize the Video object and open the source.
 
         Args:
             source (str | int): Path to a video file or index of a camera device.
-            backend (BackendLiteral, optional): Backend type for video I/O.
-                Defaults to "opencv".
+            backend (Backend | str, optional): Backend type or name for video I/O.
+                Defaults to Backend.OPENCV.
+
+        Raises:
+            ValueError: If the specified backend is not supported.
         """
         self.backend = BackendDict.get(Backend.from_value(backend))
         if self.backend is None:
@@ -48,10 +51,10 @@ def __init__(
 
     def __iter__(self):
         """
-        Make the Video object iterable over frames.
+        Make the Video object directly iterable over frames.
 
         Yields:
-            np.ndarray: The next frame in the video.
+            np.ndarray: The next frame in the video stream.
         """
         return self.backend.frames()
 
@@ -67,12 +70,13 @@ def sink(
 
         Args:
             target_path (str): Output file path for the video.
-            info (VideoInfo): Video information including resolution and FPS.
+            info (VideoInfo): Video metadata including resolution and FPS.
             codec (str, optional): FourCC video codec code.
                 If None, the backend's default codec is used.
+            render_audio (bool | None, optional): Whether to include audio if supported.
 
         Returns:
-            BaseWriter: Video writer instance for writing frames.
+            WriterTypes: Video writer instance for writing frames.
         """
         return self.backend.writer(
             target_path, info.fps, info.resolution_wh, codec, self.backend, render_audio
@@ -86,7 +90,7 @@ def frames(
         resolution_wh: tuple[int, int] | None = None,
     ):
         """
-        Generate frames from the video with optional skipping, cropping, and resizing.
+        Generate frames from the video with optional skipping, seeking, and resizing.
 
         Args:
             stride (int, optional): Number of frames to skip between each yield.
@@ -99,6 +103,9 @@ def frames(
 
         Yields:
             np.ndarray: The next frame in the video.
+
+        Raises:
+            RuntimeError: If the video has not been opened.
         """
         if self.backend.cap is None:
             raise RuntimeError("Video not opened yet.")
@@ -109,6 +116,7 @@ def frames(
         is_live_stream = total_frames is None or total_frames <= 0
 
         if is_live_stream:
+            # Live stream handling
             while True:
                 for _ in range(stride - 1):
                     if not self.backend.grab():
@@ -120,6 +128,7 @@ def frames(
                     frame = cv2.resize(frame, resolution_wh)
                 yield frame
         else:
+            # Video file handling
             if end is None or end > total_frames:
                 end = total_frames
 
@@ -147,12 +156,13 @@ def save(
         """
         Process and save video frames to a file.
 
+        Reads frames from the source, applies the given `callback` function to each
+        frame, and writes the processed frames to the specified output file.
+
         Args:
             target_path (str): Output file path for the processed video.
             callback (Callable[[np.ndarray, int], np.ndarray]): A function that takes in
-                a numpy ndarray representation of a video frame and an
-                int index of the frame and returns a processed numpy ndarray
-                representation of the frame.
+                a video frame (numpy array) and its frame index, and returns a processed frame.
             fps (int | None, optional): Frames per second of the output video.
                 If None, uses the original FPS.
             progress_message (str, optional): Message displayed in the progress bar.
@@ -161,6 +171,7 @@ def save(
                 Defaults to False.
             codec (str | None, optional): FourCC video codec code.
                 If None, uses the backend's default codec.
+            render_audio (bool | None, optional): Whether to include audio if supported.
 
         Raises:
             RuntimeError: If the video has not been opened.
diff --git a/supervision/video/utils.py b/supervision/video/utils.py
index 9b90c8dec8..cb9cdbc10e 100644
--- a/supervision/video/utils.py
+++ b/supervision/video/utils.py
@@ -19,11 +19,37 @@ class SourceType(Enum):
     RTSP = "rtsp"
 
     @classmethod
-    def list(cls):
+    def list(cls) -> list[str]:
+        """
+        Get a list of all supported source type values.
+
+        Returns:
+            list[str]: List of enum values as lowercase strings.
+
+        Example:
+            >>> SourceType.list()
+            ['video_file', 'webcam', 'rtsp']
+        """
         return list(map(lambda c: c.value, cls))
 
     @classmethod
     def from_value(cls, value: SourceType | str) -> SourceType:
+        """
+        Convert a string or SourceType instance to a SourceType enum member.
+
+        Args:
+            value (SourceType | str): The value to convert.
+
+        Returns:
+            SourceType: Corresponding SourceType enum member.
+
+        Raises:
+            ValueError: If the value is invalid or not a supported type.
+
+        Example:
+            >>> SourceType.from_value("webcam")
+            <SourceType.WEBCAM: 'webcam'>
+        """
         if isinstance(value, cls):
             return value
         if isinstance(value, str):
@@ -48,23 +74,18 @@ class VideoInfo:
         height (int): Height of the video in pixels.
         fps (int): Frames per second of the video.
         total_frames (int | None): Total number of frames, or None if unknown.
-        SourceType (SourceType | None): Source type: VIDEO_FILE, WEBCAM, RTSP.
-
-    Methods:
-        from_video_path(video file, webcam, RTSP, or None).
-
-    Examples:
-        ```python
-        import supervision as sv
-
-        video_info = sv.VideoInfo.from_video_path("video.mp4")
-
-        print(video_info)
-        # VideoInfo(width=3840, height=2160, fps=25, total_frames=538)
-
-        print(video_info.resolution_wh)
-        # (3840, 2160)
-        ```
+        SourceType (SourceType | None): Source type (VIDEO_FILE, WEBCAM, or RTSP).
+
+    Properties:
+        resolution_wh (tuple[int, int]): The (width, height) tuple for the video.
+
+    Example:
+        >>> import supervision as sv
+        >>> video_info = sv.VideoInfo.from_video_path("video.mp4")
+        >>> print(video_info)
+        VideoInfo(width=3840, height=2160, fps=25, total_frames=538)
+        >>> video_info.resolution_wh
+        (3840, 2160)
     """
 
     width: int
@@ -80,5 +101,9 @@ def resolution_wh(self) -> tuple[int, int]:
 
         Returns:
             tuple[int, int]: The video dimensions in pixels.
+
+        Example:
+            >>> VideoInfo(width=1920, height=1080, fps=30).resolution_wh
+            (1920, 1080)
         """
         return self.width, self.height

From 6208f3dbf88d83121b832d70bf7b1a5187d07bf5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 12 Aug 2025 01:07:02 +0000
Subject: [PATCH 100/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/backend/__init__.py | 2 +-
 supervision/video/backend/opencv.py   | 2 +-
 supervision/video/backend/pyav.py     | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py
index 7fa432ad25..20f90998bd 100644
--- a/supervision/video/backend/__init__.py
+++ b/supervision/video/backend/__init__.py
@@ -13,7 +13,7 @@
 class Backend(Enum):
     """
     Enumeration of Backends.
-    
+
     Attributes:
         PYAV (str): PyAV backend (powered by FFmpeg, supports audio rendering)
         OPENCV (str): OpenCV backend
diff --git a/supervision/video/backend/opencv.py b/supervision/video/backend/opencv.py
index 69d87b37f1..6d49b668a2 100644
--- a/supervision/video/backend/opencv.py
+++ b/supervision/video/backend/opencv.py
@@ -236,4 +236,4 @@ def write(self, frame: np.ndarray) -> None:
 
     def close(self) -> None:
         """Finalize and close the output video file."""
-        self.writer.release()
\ No newline at end of file
+        self.writer.release()
diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py
index 888038075f..d856d00888 100644
--- a/supervision/video/backend/pyav.py
+++ b/supervision/video/backend/pyav.py
@@ -399,6 +399,7 @@ def close(self) -> None:
         Finalize the video file, mux audio with adjusted timestamps to sync with video,
         and close the container.
         """
+
         def rescale_timestamp(value, src_tb, dst_tb):
             """
             Rescale timestamp between timebases.
@@ -475,4 +476,4 @@ def rescale_timestamp(value, src_tb, dst_tb):
                 packet.stream = self.audio_stream_out
                 self.container.mux(packet)
 
-        self.container.close()
\ No newline at end of file
+        self.container.close()

From 9cb2170a42b2a78d3fffaae2bbde9a2b4c31a3e4 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Mon, 11 Aug 2025 21:11:43 -0400
Subject: [PATCH 101/128] UPDATE: Updated docstrings

---
 supervision/video/backend/pyav.py | 2 +-
 supervision/video/core.py         | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py
index d856d00888..3fe0ead6b8 100644
--- a/supervision/video/backend/pyav.py
+++ b/supervision/video/backend/pyav.py
@@ -317,7 +317,7 @@ def __init__(
             frame_size (tuple[int, int]): (width, height) of output frames.
             codec (str, optional): Video codec name (default "h264").
             backend (pyAVBackend, optional): Source backend for audio muxing.
-            render_audio (bool, optional): Whether to include audio (default True if available).
+            render_audio (bool, optional): Include audio (default True if available).
 
         Raises:
             RuntimeError: If the output file cannot be created.
diff --git a/supervision/video/core.py b/supervision/video/core.py
index 13647263c4..04afb7d887 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -15,7 +15,7 @@ class Video:
     A high-level interface for reading, processing, and writing video files or streams.
 
     Attributes:
-        info (VideoInfo): Metadata about the opened video (e.g., FPS, resolution, duration).
+        info (VideoInfo): Metadata about the opened video.
         source (str | int): Path to the video file or index of the camera device.
         backend (BackendTypes): Video backend used for I/O operations.
     """
@@ -162,7 +162,7 @@ def save(
         Args:
             target_path (str): Output file path for the processed video.
             callback (Callable[[np.ndarray, int], np.ndarray]): A function that takes in
-                a video frame (numpy array) and its frame index, and returns a processed frame.
+                a video frame (numpy array) and its frame index, and returns a frame.
             fps (int | None, optional): Frames per second of the output video.
                 If None, uses the original FPS.
             progress_message (str, optional): Message displayed in the progress bar.

From 70b73d8c5c6f912d79e6614ab21990b54b278037 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Mon, 11 Aug 2025 21:32:07 -0400
Subject: [PATCH 102/128] UPDATE: Added .show()

---
 supervision/video/core.py | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/supervision/video/core.py b/supervision/video/core.py
index 04afb7d887..eaca4f6930 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -211,3 +211,35 @@ def save(
             writer.write(frame=result_frame)
 
         writer.close()
+
+    def show(self, resolution_wh: tuple[int, int] | None = None):
+        """
+        Display video frames in a window with interactive playback controls.
+
+        This method streams video frames to an OpenCV window, allowing real-time
+        visualization. Press 'q' to quit playback. The method handles various
+        display-related exceptions gracefully.
+
+        Args:
+            resolution_wh (tuple[int, int] | None): Optional target resolution as 
+                (width, height) tuple. If None, uses native video resolution.
+                Note: Aspect ratio may not be preserved.
+        """
+        try:
+            for frame in self.frames(resolution_wh=resolution_wh):
+                cv2.imshow(str(self.source), frame)
+                key = cv2.waitKey(1) & 0xFF
+
+                if key == ord('q'):
+                    break
+
+            cv2.destroyAllWindows()
+        except cv2.error as e:
+            if "The function is not implemented" in str(e) or "could not connect to display" in str(e).lower():
+                print("Error: No display found or GUI support not available.")
+            else:
+                print("OpenCV error:", e)
+        except Exception as e:
+            print("Error:", e)
+        finally:
+            cv2.destroyAllWindows()
\ No newline at end of file

From 01f8b2c40c524a5daa455cc19c6f0e0f111096d3 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 12 Aug 2025 01:32:58 +0000
Subject: [PATCH 103/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/core.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/supervision/video/core.py b/supervision/video/core.py
index eaca4f6930..ed570c51fb 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -221,7 +221,7 @@ def show(self, resolution_wh: tuple[int, int] | None = None):
         display-related exceptions gracefully.
 
         Args:
-            resolution_wh (tuple[int, int] | None): Optional target resolution as 
+            resolution_wh (tuple[int, int] | None): Optional target resolution as
                 (width, height) tuple. If None, uses native video resolution.
                 Note: Aspect ratio may not be preserved.
         """
@@ -230,16 +230,19 @@ def show(self, resolution_wh: tuple[int, int] | None = None):
                 cv2.imshow(str(self.source), frame)
                 key = cv2.waitKey(1) & 0xFF
 
-                if key == ord('q'):
+                if key == ord("q"):
                     break
 
             cv2.destroyAllWindows()
         except cv2.error as e:
-            if "The function is not implemented" in str(e) or "could not connect to display" in str(e).lower():
+            if (
+                "The function is not implemented" in str(e)
+                or "could not connect to display" in str(e).lower()
+            ):
                 print("Error: No display found or GUI support not available.")
             else:
                 print("OpenCV error:", e)
         except Exception as e:
             print("Error:", e)
         finally:
-            cv2.destroyAllWindows()
\ No newline at end of file
+            cv2.destroyAllWindows()

From 11fc8a542d7345560344bb33fefe6a9afdaa7fb8 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Wed, 13 Aug 2025 02:07:26 -0400
Subject: [PATCH 104/128] UPDATE: Add support for IPython display

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 9fb58c765d..a7c46fcc23 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -59,6 +59,7 @@ metrics = [
     "pandas>=2.0.0",
 ]
 ffmpeg = ["av (>=15.0.0)"]
+rich_display = ["ipython (>=8.15,<9.0)"]
 
 [dependency-groups]
 dev = [

From 1dac635d72143679b9e8dcb9ab7008cfbc87e05d Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Wed, 13 Aug 2025 02:55:10 -0400
Subject: [PATCH 105/128] UPDATE: Added support for headless machines and
 notebook for sv.Video().show()

---
 supervision/video/core.py | 75 ++++++++++++++++++++++++++++++++-------
 1 file changed, 62 insertions(+), 13 deletions(-)

diff --git a/supervision/video/core.py b/supervision/video/core.py
index ed570c51fb..0871d8af46 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -3,12 +3,19 @@
 from collections.abc import Callable
 
 import cv2
+import os
+import sys
 import numpy as np
 from tqdm.auto import tqdm
 
 from supervision.video.backend import Backend, BackendDict, BackendTypes, WriterTypes
 from supervision.video.utils import SourceType, VideoInfo
 
+try:
+    import IPython.display as iPyDisplay
+except ImportError:
+    iPyDisplay = None
+
 
 class Video:
     """
@@ -225,7 +232,38 @@ def show(self, resolution_wh: tuple[int, int] | None = None):
                 (width, height) tuple. If None, uses native video resolution.
                 Note: Aspect ratio may not be preserved.
         """
-        try:
+        # On Jupyter Notebook
+        def in_notebook():
+            argv = getattr(sys, "argv", [])
+            return any("jupyter" in arg or "ipykernel_launcher" in arg for arg in argv)
+        
+        def is_Headless():
+            if sys.platform.startswith("linux"):
+                return not bool(os.environ.get("DISPLAY", ""))
+            if sys.platform == "darwin":
+                return not bool(os.environ.get("TERM_PROGRAM") or os.environ.get("DISPLAY"))
+            if sys.platform.startswith("win"):
+                try:
+                    import ctypes
+                    user32 = ctypes.windll.user32
+                    return user32.GetDesktopWindow() == 0
+                except Exception:
+                    return True
+            return True
+        
+        # On a notebook
+        if in_notebook():
+            if iPyDisplay is None:
+                raise ValueError("IPython is not installed")
+            
+            self.save("temp.mp4", lambda frame, _: frame, show_progress=False)
+
+            width = resolution_wh[0] if resolution_wh is not None else None
+            height = resolution_wh[1] if resolution_wh is not None else None
+            iPyDisplay.display(iPyDisplay.Video("temp.mp4", embed=True, width=width, height=height))
+            os.remove("temp.mp4")
+        # On a computer
+        elif not is_Headless():
             for frame in self.frames(resolution_wh=resolution_wh):
                 cv2.imshow(str(self.source), frame)
                 key = cv2.waitKey(1) & 0xFF
@@ -233,16 +271,27 @@ def show(self, resolution_wh: tuple[int, int] | None = None):
                 if key == ord("q"):
                     break
 
+            while True:
+                if cv2.getWindowProperty(str(self.source), cv2.WND_PROP_VISIBLE) < 1:
+                    break
+                cv2.waitKey(100)
             cv2.destroyAllWindows()
-        except cv2.error as e:
-            if (
-                "The function is not implemented" in str(e)
-                or "could not connect to display" in str(e).lower()
-            ):
-                print("Error: No display found or GUI support not available.")
-            else:
-                print("OpenCV error:", e)
-        except Exception as e:
-            print("Error:", e)
-        finally:
-            cv2.destroyAllWindows()
+        # On a headless system
+        else:
+            if iPyDisplay is None:
+                raise ValueError("IPython is not installed")
+            
+            self.save("temp.mp4", lambda frame, _: frame, show_progress=False)
+            
+            width = resolution_wh[0] if resolution_wh is not None else None
+            height = resolution_wh[1] if resolution_wh is not None else None
+
+            display_video = (iPyDisplay.Video("temp.mp4", embed=True, width=width, height=height))
+            html_code = display_video._repr_html_()
+            export_path = "video_display.html"
+
+            with open(export_path, "w") as f:
+                f.write(html_code)
+            print(f"Video exported as HTML to {export_path}")
+
+            os.remove("temp.mp4")

From dedb68aa263e88ae84faef912aa58d90a1c21260 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 13 Aug 2025 06:57:31 +0000
Subject: [PATCH 106/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/core.py | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/supervision/video/core.py b/supervision/video/core.py
index 0871d8af46..4e204123f4 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -1,10 +1,10 @@
 from __future__ import annotations
 
+import os
+import sys
 from collections.abc import Callable
 
 import cv2
-import os
-import sys
 import numpy as np
 from tqdm.auto import tqdm
 
@@ -232,35 +232,41 @@ def show(self, resolution_wh: tuple[int, int] | None = None):
                 (width, height) tuple. If None, uses native video resolution.
                 Note: Aspect ratio may not be preserved.
         """
+
         # On Jupyter Notebook
         def in_notebook():
             argv = getattr(sys, "argv", [])
             return any("jupyter" in arg or "ipykernel_launcher" in arg for arg in argv)
-        
+
         def is_Headless():
             if sys.platform.startswith("linux"):
                 return not bool(os.environ.get("DISPLAY", ""))
             if sys.platform == "darwin":
-                return not bool(os.environ.get("TERM_PROGRAM") or os.environ.get("DISPLAY"))
+                return not bool(
+                    os.environ.get("TERM_PROGRAM") or os.environ.get("DISPLAY")
+                )
             if sys.platform.startswith("win"):
                 try:
                     import ctypes
+
                     user32 = ctypes.windll.user32
                     return user32.GetDesktopWindow() == 0
                 except Exception:
                     return True
             return True
-        
+
         # On a notebook
         if in_notebook():
             if iPyDisplay is None:
                 raise ValueError("IPython is not installed")
-            
+
             self.save("temp.mp4", lambda frame, _: frame, show_progress=False)
 
             width = resolution_wh[0] if resolution_wh is not None else None
             height = resolution_wh[1] if resolution_wh is not None else None
-            iPyDisplay.display(iPyDisplay.Video("temp.mp4", embed=True, width=width, height=height))
+            iPyDisplay.display(
+                iPyDisplay.Video("temp.mp4", embed=True, width=width, height=height)
+            )
             os.remove("temp.mp4")
         # On a computer
         elif not is_Headless():
@@ -280,13 +286,15 @@ def is_Headless():
         else:
             if iPyDisplay is None:
                 raise ValueError("IPython is not installed")
-            
+
             self.save("temp.mp4", lambda frame, _: frame, show_progress=False)
-            
+
             width = resolution_wh[0] if resolution_wh is not None else None
             height = resolution_wh[1] if resolution_wh is not None else None
 
-            display_video = (iPyDisplay.Video("temp.mp4", embed=True, width=width, height=height))
+            display_video = iPyDisplay.Video(
+                "temp.mp4", embed=True, width=width, height=height
+            )
             html_code = display_video._repr_html_()
             export_path = "video_display.html"
 

From 035196a1821ab0905e60160cd50c8cf26ae20e39 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Wed, 13 Aug 2025 03:06:23 -0400
Subject: [PATCH 107/128] UPDATE: Updated error msg for IPython

---
 supervision/video/core.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/supervision/video/core.py b/supervision/video/core.py
index 4e204123f4..36999bcb64 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -258,7 +258,9 @@ def is_Headless():
         # On a notebook
         if in_notebook():
             if iPyDisplay is None:
-                raise ValueError("IPython is not installed")
+                raise RuntimeError(
+                "IPython (`IPython` module) is not installed. Run `pip install IPython`."
+            )
 
             self.save("temp.mp4", lambda frame, _: frame, show_progress=False)
 
@@ -285,7 +287,9 @@ def is_Headless():
         # On a headless system
         else:
             if iPyDisplay is None:
-                raise ValueError("IPython is not installed")
+                raise RuntimeError(
+                "IPython (`IPython` module) is not installed. Run `pip install IPython`."
+            )
 
             self.save("temp.mp4", lambda frame, _: frame, show_progress=False)
 

From a1218e8b98eb45d4262b37baab841f08817a2638 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 13 Aug 2025 07:06:44 +0000
Subject: [PATCH 108/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/core.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/supervision/video/core.py b/supervision/video/core.py
index 36999bcb64..df7c27fe85 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -259,8 +259,8 @@ def is_Headless():
         if in_notebook():
             if iPyDisplay is None:
                 raise RuntimeError(
-                "IPython (`IPython` module) is not installed. Run `pip install IPython`."
-            )
+                    "IPython (`IPython` module) is not installed. Run `pip install IPython`."
+                )
 
             self.save("temp.mp4", lambda frame, _: frame, show_progress=False)
 
@@ -288,8 +288,8 @@ def is_Headless():
         else:
             if iPyDisplay is None:
                 raise RuntimeError(
-                "IPython (`IPython` module) is not installed. Run `pip install IPython`."
-            )
+                    "IPython (`IPython` module) is not installed. Run `pip install IPython`."
+                )
 
             self.save("temp.mp4", lambda frame, _: frame, show_progress=False)
 

From 76d814501bfc81fb61bbba6cfdb6abe4c0f6869a Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Wed, 13 Aug 2025 03:07:36 -0400
Subject: [PATCH 109/128] Precommit

---
 supervision/video/core.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/supervision/video/core.py b/supervision/video/core.py
index df7c27fe85..9794d8e726 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -259,7 +259,8 @@ def is_Headless():
         if in_notebook():
             if iPyDisplay is None:
                 raise RuntimeError(
-                    "IPython (`IPython` module) is not installed. Run `pip install IPython`."
+                    "IPython (`IPython` module) is not installed. " \
+                    "Run `pip install IPython`."
                 )
 
             self.save("temp.mp4", lambda frame, _: frame, show_progress=False)
@@ -288,7 +289,8 @@ def is_Headless():
         else:
             if iPyDisplay is None:
                 raise RuntimeError(
-                    "IPython (`IPython` module) is not installed. Run `pip install IPython`."
+                    "IPython (`IPython` module) is not installed. " \
+                    "Run `pip install IPython`."
                 )
 
             self.save("temp.mp4", lambda frame, _: frame, show_progress=False)

From 32a5e2c137680327916aa9e4a1251086ce03cda2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 13 Aug 2025 07:08:18 +0000
Subject: [PATCH 110/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/supervision/video/core.py b/supervision/video/core.py
index 9794d8e726..140eb9e153 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -259,7 +259,7 @@ def is_Headless():
         if in_notebook():
             if iPyDisplay is None:
                 raise RuntimeError(
-                    "IPython (`IPython` module) is not installed. " \
+                    "IPython (`IPython` module) is not installed. "
                     "Run `pip install IPython`."
                 )
 
@@ -289,7 +289,7 @@ def is_Headless():
         else:
             if iPyDisplay is None:
                 raise RuntimeError(
-                    "IPython (`IPython` module) is not installed. " \
+                    "IPython (`IPython` module) is not installed. "
                     "Run `pip install IPython`."
                 )
 

From 7680cae6e7ac7be37790e1f70dd1e872b1853048 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Thu, 14 Aug 2025 14:42:04 -0400
Subject: [PATCH 111/128] UPDATE: Fixed av module install

---
 supervision/video/backend/pyav.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py
index 3fe0ead6b8..ade9583a36 100644
--- a/supervision/video/backend/pyav.py
+++ b/supervision/video/backend/pyav.py
@@ -35,9 +35,13 @@ def __init__(self):
         super().__init__()
 
         if av is None:
-            raise RuntimeError(
-                "PyAV (`av` module) is not installed. Run `pip install av`."
-            )
+            try:
+                import av
+            except ImportError:
+                av = None
+                raise RuntimeError(
+                    "PyAV (`av` module) is not installed. Run `pip install av`."
+                )
 
         self.container = None
         self.stream = None

From 68fb727d9c506da476347da0daf6048e5f812135 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Thu, 14 Aug 2025 14:47:42 -0400
Subject: [PATCH 112/128] UPDATE: Revert av error

---
 supervision/video/backend/pyav.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py
index ade9583a36..3fe0ead6b8 100644
--- a/supervision/video/backend/pyav.py
+++ b/supervision/video/backend/pyav.py
@@ -35,13 +35,9 @@ def __init__(self):
         super().__init__()
 
         if av is None:
-            try:
-                import av
-            except ImportError:
-                av = None
-                raise RuntimeError(
-                    "PyAV (`av` module) is not installed. Run `pip install av`."
-                )
+            raise RuntimeError(
+                "PyAV (`av` module) is not installed. Run `pip install av`."
+            )
 
         self.container = None
         self.stream = None

From 3f403fe0e8ab6af1286a996be1b04a36c9ab251c Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Thu, 14 Aug 2025 15:02:58 -0400
Subject: [PATCH 113/128] UPDATE: updated av module getter

---
 supervision/video/backend/pyav.py | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py
index 3fe0ead6b8..b3822180a5 100644
--- a/supervision/video/backend/pyav.py
+++ b/supervision/video/backend/pyav.py
@@ -2,17 +2,27 @@
 
 import platform
 import re
+import sys
 from fractions import Fraction
 
-try:
-    import av
-except ImportError:
-    av = None
 import numpy as np
 
 from supervision.video.backend.base import BaseBackend, BaseWriter
 from supervision.video.utils import SourceType, VideoInfo
 
+av = None
+
+def get_av():
+    if 'av' in sys.modules and sys.modules['av'] is None:
+        del sys.modules['av']
+
+    try:
+        import av
+        return av
+    except ImportError:
+        raise RuntimeError(
+            "PyAV (`av` module) is not installed. Run `pip install av`."
+        )
 
 class pyAVBackend(BaseBackend):
     """
@@ -34,10 +44,8 @@ def __init__(self):
         """
         super().__init__()
 
-        if av is None:
-            raise RuntimeError(
-                "PyAV (`av` module) is not installed. Run `pip install av`."
-            )
+        global av
+        av = get_av()
 
         self.container = None
         self.stream = None

From c9badc1ccea9d5fa6757cd56351e0f64bafd4950 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 14 Aug 2025 19:03:20 +0000
Subject: [PATCH 114/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/backend/pyav.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py
index b3822180a5..80152c18fa 100644
--- a/supervision/video/backend/pyav.py
+++ b/supervision/video/backend/pyav.py
@@ -12,17 +12,18 @@
 
 av = None
 
+
 def get_av():
-    if 'av' in sys.modules and sys.modules['av'] is None:
-        del sys.modules['av']
+    if "av" in sys.modules and sys.modules["av"] is None:
+        del sys.modules["av"]
 
     try:
         import av
+
         return av
     except ImportError:
-        raise RuntimeError(
-            "PyAV (`av` module) is not installed. Run `pip install av`."
-        )
+        raise RuntimeError("PyAV (`av` module) is not installed. Run `pip install av`.")
+
 
 class pyAVBackend(BaseBackend):
     """

From 7a507e589bf6df6c8dced18379b99ffb90c616a2 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Thu, 14 Aug 2025 15:18:47 -0400
Subject: [PATCH 115/128] UPDATE: Updated .show() with more configuration
 params

---
 supervision/video/core.py | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/supervision/video/core.py b/supervision/video/core.py
index 140eb9e153..4e07b7872b 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -219,7 +219,12 @@ def save(
 
         writer.close()
 
-    def show(self, resolution_wh: tuple[int, int] | None = None):
+    def show(self, resolution_wh: tuple[int, int] | None = None, 
+        callback: Callable[[np.ndarray, int], np.ndarray] = lambda f, i: f,
+        fps: int | None = None,
+        progress_message: str = "Processing video",
+        show_progress: bool = False,
+        render_audio: bool | None = None):
         """
         Display video frames in a window with interactive playback controls.
 
@@ -263,7 +268,13 @@ def is_Headless():
                     "Run `pip install IPython`."
                 )
 
-            self.save("temp.mp4", lambda frame, _: frame, show_progress=False)
+            self.save("temp.mp4",
+                      callback=callback,
+                      fps=fps,
+                      progress_message=progress_message,
+                      show_progress=show_progress,
+                      render_audio=render_audio
+                      )
 
             width = resolution_wh[0] if resolution_wh is not None else None
             height = resolution_wh[1] if resolution_wh is not None else None
@@ -293,7 +304,13 @@ def is_Headless():
                     "Run `pip install IPython`."
                 )
 
-            self.save("temp.mp4", lambda frame, _: frame, show_progress=False)
+            self.save("temp.mp4",
+                      callback=callback,
+                      fps=fps,
+                      progress_message=progress_message,
+                      show_progress=show_progress,
+                      render_audio=render_audio
+                      )
 
             width = resolution_wh[0] if resolution_wh is not None else None
             height = resolution_wh[1] if resolution_wh is not None else None

From 824fa9833da6be3620c25315b6d693c90f7f1c2c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 14 Aug 2025 19:19:12 +0000
Subject: [PATCH 116/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/core.py | 37 +++++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/supervision/video/core.py b/supervision/video/core.py
index 4e07b7872b..b3148f0736 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -219,12 +219,15 @@ def save(
 
         writer.close()
 
-    def show(self, resolution_wh: tuple[int, int] | None = None, 
+    def show(
+        self,
+        resolution_wh: tuple[int, int] | None = None,
         callback: Callable[[np.ndarray, int], np.ndarray] = lambda f, i: f,
         fps: int | None = None,
         progress_message: str = "Processing video",
         show_progress: bool = False,
-        render_audio: bool | None = None):
+        render_audio: bool | None = None,
+    ):
         """
         Display video frames in a window with interactive playback controls.
 
@@ -268,13 +271,14 @@ def is_Headless():
                     "Run `pip install IPython`."
                 )
 
-            self.save("temp.mp4",
-                      callback=callback,
-                      fps=fps,
-                      progress_message=progress_message,
-                      show_progress=show_progress,
-                      render_audio=render_audio
-                      )
+            self.save(
+                "temp.mp4",
+                callback=callback,
+                fps=fps,
+                progress_message=progress_message,
+                show_progress=show_progress,
+                render_audio=render_audio,
+            )
 
             width = resolution_wh[0] if resolution_wh is not None else None
             height = resolution_wh[1] if resolution_wh is not None else None
@@ -304,13 +308,14 @@ def is_Headless():
                     "Run `pip install IPython`."
                 )
 
-            self.save("temp.mp4",
-                      callback=callback,
-                      fps=fps,
-                      progress_message=progress_message,
-                      show_progress=show_progress,
-                      render_audio=render_audio
-                      )
+            self.save(
+                "temp.mp4",
+                callback=callback,
+                fps=fps,
+                progress_message=progress_message,
+                show_progress=show_progress,
+                render_audio=render_audio,
+            )
 
             width = resolution_wh[0] if resolution_wh is not None else None
             height = resolution_wh[1] if resolution_wh is not None else None

From 8aa364ced34493038f106c5e1a91402fa7193cc5 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Thu, 14 Aug 2025 21:49:38 -0400
Subject: [PATCH 117/128] UPDATE: Updated IPython import

---
 supervision/video/core.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/supervision/video/core.py b/supervision/video/core.py
index b3148f0736..e05fda97e8 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -11,11 +11,18 @@
 from supervision.video.backend import Backend, BackendDict, BackendTypes, WriterTypes
 from supervision.video.utils import SourceType, VideoInfo
 
-try:
-    import IPython.display as iPyDisplay
-except ImportError:
-    iPyDisplay = None
-
+def get_iPython():
+    if "IPython" in sys.modules and sys.modules["IPython"] is None:
+        del sys.modules["IPython"]
+
+    try:
+        import IPython
+        return IPython
+    except ImportError:
+        raise RuntimeError(
+                    "IPython (`IPython` module) is not installed. "
+                    "Run `pip install IPython`."
+                )
 
 class Video:
     """
@@ -265,11 +272,7 @@ def is_Headless():
 
         # On a notebook
         if in_notebook():
-            if iPyDisplay is None:
-                raise RuntimeError(
-                    "IPython (`IPython` module) is not installed. "
-                    "Run `pip install IPython`."
-                )
+            iPyDisplay = get_iPython().display
 
             self.save(
                 "temp.mp4",

From 2bec9916f9bb87a3648ba9a2c142aa9b4b0af874 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 15 Aug 2025 01:49:58 +0000
Subject: [PATCH 118/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/core.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/supervision/video/core.py b/supervision/video/core.py
index e05fda97e8..2b3cceb1bf 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -11,18 +11,20 @@
 from supervision.video.backend import Backend, BackendDict, BackendTypes, WriterTypes
 from supervision.video.utils import SourceType, VideoInfo
 
+
 def get_iPython():
     if "IPython" in sys.modules and sys.modules["IPython"] is None:
         del sys.modules["IPython"]
 
     try:
         import IPython
+
         return IPython
     except ImportError:
         raise RuntimeError(
-                    "IPython (`IPython` module) is not installed. "
-                    "Run `pip install IPython`."
-                )
+            "IPython (`IPython` module) is not installed. Run `pip install IPython`."
+        )
+
 
 class Video:
     """

From 43830f7dfff2c6f99d130ce46a8c14c7022d4f80 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Thu, 14 Aug 2025 22:02:08 -0400
Subject: [PATCH 119/128] BUG: Frame iteration fix

---
 supervision/video/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/supervision/video/core.py b/supervision/video/core.py
index 2b3cceb1bf..ded78a99e5 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -72,7 +72,7 @@ def __iter__(self):
         Yields:
             np.ndarray: The next frame in the video stream.
         """
-        return self.backend.frames()
+        return self.frames()
 
     def sink(
         self,

From f82745561948ba44ed9bda5c90e1f8986d291a0b Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sun, 31 Aug 2025 00:45:17 -0400
Subject: [PATCH 120/128] UPDATE: Updated audio stream to use atempo reflecting
 changes in fps

---
 supervision/video/backend/pyav.py | 130 ++++++++++--------------------
 1 file changed, 43 insertions(+), 87 deletions(-)

diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py
index 80152c18fa..f5766a66f4 100644
--- a/supervision/video/backend/pyav.py
+++ b/supervision/video/backend/pyav.py
@@ -112,7 +112,7 @@ def is_webcam_path(path: str) -> tuple[bool, str]:
 
         try:
             self.container = av.open(path, format=_format)
-            self.audio_src_container = self.container
+            self.path = path
             self.stream = self.container.streams.video[0]
             self.stream.thread_type = "AUTO"
             self.cap = self.container
@@ -333,6 +333,7 @@ def __init__(
         """
         try:
             self.container = av.open(filename, mode="w")
+            self.path = filename
             self.backend = backend
 
             if render_audio is None:
@@ -358,7 +359,6 @@ def __init__(
                 render_audio
                 and backend
                 and backend.audio_stream
-                and backend.audio_src_container
             ):
                 audio_codec_name = backend.audio_stream.codec_context.name
                 audio_rate = backend.audio_stream.codec_context.rate
@@ -366,11 +366,6 @@ def __init__(
                     audio_codec_name, rate=audio_rate
                 )
 
-                # Buffer all audio packets from backend for muxing later
-                for packet in backend.audio_src_container.demux(backend.audio_stream):
-                    if packet.dts is not None:
-                        self.audio_packets.append(packet)
-
         except Exception as e:
             raise RuntimeError(f"Cannot open video writer for file: {filename}") from e
 
@@ -404,85 +399,46 @@ def write(self, frame: np.ndarray) -> None:
             self.container.mux(packet)
 
     def close(self) -> None:
-        """
-        Finalize the video file, mux audio with adjusted timestamps to sync with video,
-        and close the container.
-        """
-
-        def rescale_timestamp(value, src_tb, dst_tb):
-            """
-            Rescale timestamp between timebases.
-
-            Args:
-                value (int): Original timestamp value
-                src_tb (Fraction): Source timebase
-                dst_tb (Fraction): Destination timebase
-
-            Returns:
-                int: Rescaled timestamp
-            """
-            return int(value * src_tb / dst_tb)
-
-        # Flush any remaining video packets
-        packets = self.stream.encode()
-        for packet in packets:
-            self.container.mux(packet)
-
-        # Calculate audio speed adjustment factor if needed
-        speed_factor = 1.0
-
-        try:
-            if (
-                self.backend
-                and self.backend.audio_stream
-                and self.backend.audio_stream.duration
-            ):
-                orig_audio_duration = float(
-                    self.backend.audio_stream.duration
-                    * self.backend.audio_stream.time_base
-                )
-            elif (
-                self.backend
-                and self.backend.audio_src_container
-                and self.backend.audio_src_container.duration
-            ):
-                orig_audio_duration = self.backend.audio_src_container.duration / 1000
-            else:
-                orig_audio_duration = None
-
-            new_video_duration = self.frame_idx * (1 / self.fps)
-
-            if orig_audio_duration and new_video_duration > 0:
-                speed_factor = orig_audio_duration / new_video_duration
-        except Exception:
-            speed_factor = 1.0
-
-        # Process and mux audio packets with timestamp adjustments
-        if self.audio_stream_out and speed_factor != 1.0:
-            for packet in self.audio_packets:
-                if packet.pts is not None:
-                    packet.pts = rescale_timestamp(
-                        packet.pts, packet.time_base, self.audio_stream_out.time_base
-                    )
-                    packet.pts = int(packet.pts / speed_factor)
-                if packet.dts is not None:
-                    packet.dts = rescale_timestamp(
-                        packet.dts, packet.time_base, self.audio_stream_out.time_base
-                    )
-                    packet.dts = int(packet.dts / speed_factor)
-                packet.stream = self.audio_stream_out
-                self.container.mux(packet)
-        elif self.audio_stream_out:
-            for packet in self.audio_packets:
-                if packet.pts is not None:
-                    packet.pts = rescale_timestamp(
-                        packet.pts, packet.time_base, self.audio_stream_out.time_base
-                    )
-                if packet.dts is not None:
-                    packet.dts = rescale_timestamp(
-                        packet.dts, packet.time_base, self.audio_stream_out.time_base
-                    )
-                packet.stream = self.audio_stream_out
-                self.container.mux(packet)
+        if (self.audio_stream_out is not None):
+            src = av.open(self.backend.path)
+            src_fps = src.streams.video[0].average_rate or src.streams.video[0].guessed_rate
+            audio_stream = src.streams.audio[0]
+
+            graph = av.filter.Graph()
+            graph.link_nodes(
+                graph.add_abuffer(template=audio_stream),
+                graph.add("atempo", str(self.fps/src_fps)), 
+                graph.add("abuffersink"),
+            ).configure()
+
+            for packet in src.demux(audio_stream):
+                for frame in packet.decode():
+                    graph.push(frame)
+
+                    while True:
+                        try:
+                            f = graph.pull()
+                        except Exception:
+                            break
+                        for pkt in self.audio_stream_out.encode(f):
+                            self.container.mux(pkt)
+
+            graph.push(None)
+            while True:
+                try:
+                    f = graph.pull()
+                except Exception:
+                    break
+                for pkt in self.audio_stream_out.encode(f):
+                    self.container.mux(pkt)
+
+            for pkt in self.audio_stream_out.encode(None):
+                self.container.mux(pkt)
+
+            src.close()
+
+        # flush video
+        for pkt in self.stream.encode():
+            self.container.mux(pkt)
 
         self.container.close()

From 2d909151e751573d345abb6149a0698a8c6754be Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sun, 31 Aug 2025 00:47:41 -0400
Subject: [PATCH 121/128] UPDATE: Updated docstrings

---
 supervision/video/backend/pyav.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py
index f5766a66f4..4d3a62a5d1 100644
--- a/supervision/video/backend/pyav.py
+++ b/supervision/video/backend/pyav.py
@@ -399,6 +399,24 @@ def write(self, frame: np.ndarray) -> None:
             self.container.mux(packet)
 
     def close(self) -> None:
+        """
+        Finalize and close the video file, including audio processing if enabled.
+
+        This method performs several critical operations:
+        1. If audio is enabled, processes and muxes the audio stream from the source
+        2. Applies tempo adjustment to match the output video FPS
+        3. Flushes all remaining video frames from the encoder
+        4. Properly closes the output container
+
+        The audio processing uses FFmpeg filters to:
+        - Read audio from the original source
+        - Apply tempo scaling based on FPS differences between source and output
+        - Encode and mux the processed audio into the output file
+
+        Note:
+            This method should always be called when finished writing frames.
+            It ensures proper file finalization and resource cleanup.
+        """
         if (self.audio_stream_out is not None):
             src = av.open(self.backend.path)
             src_fps = src.streams.video[0].average_rate or src.streams.video[0].guessed_rate

From e9ccca21c2d6a19ac4264ec7b8d90529c6670485 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 31 Aug 2025 04:48:10 +0000
Subject: [PATCH 122/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/backend/pyav.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py
index 4d3a62a5d1..81a3fd055d 100644
--- a/supervision/video/backend/pyav.py
+++ b/supervision/video/backend/pyav.py
@@ -355,11 +355,7 @@ def __init__(
             self.audio_stream_out = None
             self.audio_packets = []
 
-            if (
-                render_audio
-                and backend
-                and backend.audio_stream
-            ):
+            if render_audio and backend and backend.audio_stream:
                 audio_codec_name = backend.audio_stream.codec_context.name
                 audio_rate = backend.audio_stream.codec_context.rate
                 self.audio_stream_out = self.container.add_stream(
@@ -417,15 +413,17 @@ def close(self) -> None:
             This method should always be called when finished writing frames.
             It ensures proper file finalization and resource cleanup.
         """
-        if (self.audio_stream_out is not None):
+        if self.audio_stream_out is not None:
             src = av.open(self.backend.path)
-            src_fps = src.streams.video[0].average_rate or src.streams.video[0].guessed_rate
+            src_fps = (
+                src.streams.video[0].average_rate or src.streams.video[0].guessed_rate
+            )
             audio_stream = src.streams.audio[0]
 
             graph = av.filter.Graph()
             graph.link_nodes(
                 graph.add_abuffer(template=audio_stream),
-                graph.add("atempo", str(self.fps/src_fps)), 
+                graph.add("atempo", str(self.fps / src_fps)),
                 graph.add("abuffersink"),
             ).configure()
 

From 9f115c48f299979d51001fad8499ad4b2330c8f6 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sun, 31 Aug 2025 01:07:59 -0400
Subject: [PATCH 123/128] UPDATE: Changed backend type class and added ref to
 root

---
 supervision/__init__.py               |  2 ++
 supervision/video/backend/__init__.py | 30 +++++++++++++--------------
 supervision/video/core.py             | 12 +++++------
 3 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index 6e2e329b75..7ae921ee3d 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -135,6 +135,7 @@
     process_video,
 )
 from supervision.video import Video, VideoInfo
+from supervision.video.backend import VideoBackendType
 
 __all__ = [
     "LMM",
@@ -193,6 +194,7 @@
     "VertexAnnotator",
     "VertexLabelAnnotator",
     "Video",
+    "VideoBackendType",
     "VideoInfo",
     "VideoSink",
     "approximate_polygon",
diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py
index 20f90998bd..9509e916e8 100644
--- a/supervision/video/backend/__init__.py
+++ b/supervision/video/backend/__init__.py
@@ -6,11 +6,11 @@
 from supervision.video.backend.opencv import OpenCVBackend, OpenCVWriter
 from supervision.video.backend.pyav import pyAVBackend, pyAVWriter
 
-BackendTypes = Union[OpenCVBackend, pyAVBackend]
-WriterTypes = Union[OpenCVWriter, pyAVWriter]
+VideoBackendTypes = Union[OpenCVBackend, pyAVBackend]
+VideoWriterTypes = Union[OpenCVWriter, pyAVWriter]
 
 
-class Backend(Enum):
+class VideoBackendType(Enum):
     """
     Enumeration of Backends.
 
@@ -28,7 +28,7 @@ def list(cls):
         return list(map(lambda c: c.value, cls))
 
     @classmethod
-    def from_value(cls, value: Backend | str) -> Backend:
+    def from_value(cls, value: VideoBackendType | str) -> VideoBackendType:
         if isinstance(value, cls):
             return value
         if isinstance(value, str):
@@ -43,20 +43,20 @@ def from_value(cls, value: Backend | str) -> Backend:
         )
 
 
-BackendDict = {
-    Backend.PYAV: pyAVBackend,
-    Backend.OPENCV: OpenCVBackend,
+VideoBackendDict = {
+    VideoBackendType.PYAV: pyAVBackend,
+    VideoBackendType.OPENCV: OpenCVBackend,
 }
 
-WriterDict = {
-    Backend.PYAV: pyAVWriter,
-    Backend.OPENCV: OpenCVWriter,
+VideoWriterDict = {
+    VideoBackendType.PYAV: pyAVWriter,
+    VideoBackendType.OPENCV: OpenCVWriter,
 }
 
 __all__ = [
-    "Backend",
-    "BackendDict",
-    "BackendTypes",
-    "WriterDict",
-    "WriterTypes",
+    "VideoBackendType",
+    "VideoBackendDict",
+    "VideoBackendTypes",
+    "VideoWriterDict",
+    "VideoWriterTypes",
 ]
diff --git a/supervision/video/core.py b/supervision/video/core.py
index ded78a99e5..ebbcee3eba 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -8,7 +8,7 @@
 import numpy as np
 from tqdm.auto import tqdm
 
-from supervision.video.backend import Backend, BackendDict, BackendTypes, WriterTypes
+from supervision.video.backend import VideoBackend, VideoBackendDict, VideoBackendTypes, VideoWriterTypes
 from supervision.video.utils import SourceType, VideoInfo
 
 
@@ -33,15 +33,15 @@ class Video:
     Attributes:
         info (VideoInfo): Metadata about the opened video.
         source (str | int): Path to the video file or index of the camera device.
-        backend (BackendTypes): Video backend used for I/O operations.
+        backend (VideoBackendTypes): Video backend used for I/O operations.
     """
 
     info: VideoInfo
     source: str | int
-    backend: BackendTypes
+    backend: VideoBackendTypes
 
     def __init__(
-        self, source: str | int, backend: Backend | str = Backend.OPENCV
+        self, source: str | int, backend: VideoBackend | str = VideoBackend.OPENCV
     ) -> None:
         """
         Initialize the Video object and open the source.
@@ -54,7 +54,7 @@ def __init__(
         Raises:
             ValueError: If the specified backend is not supported.
         """
-        self.backend = BackendDict.get(Backend.from_value(backend))
+        self.backend = VideoBackendDict.get(VideoBackend.from_value(backend))
         if self.backend is None:
             raise ValueError(f"Unsupported backend: {backend}")
 
@@ -80,7 +80,7 @@ def sink(
         info: VideoInfo,
         codec: str | None = None,
         render_audio: bool | None = None,
-    ) -> WriterTypes:
+    ) -> VideoWriterTypes:
         """
         Create a video writer for saving frames to a file.
 

From c67aad3b9bea89fec63dbaf4f37df41fd51ba13f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 31 Aug 2025 05:08:20 +0000
Subject: [PATCH 124/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/backend/__init__.py | 2 +-
 supervision/video/core.py             | 7 ++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/supervision/video/backend/__init__.py b/supervision/video/backend/__init__.py
index 9509e916e8..5f5a83c04a 100644
--- a/supervision/video/backend/__init__.py
+++ b/supervision/video/backend/__init__.py
@@ -54,8 +54,8 @@ def from_value(cls, value: VideoBackendType | str) -> VideoBackendType:
 }
 
 __all__ = [
-    "VideoBackendType",
     "VideoBackendDict",
+    "VideoBackendType",
     "VideoBackendTypes",
     "VideoWriterDict",
     "VideoWriterTypes",
diff --git a/supervision/video/core.py b/supervision/video/core.py
index ebbcee3eba..4ff60d1c00 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -8,7 +8,12 @@
 import numpy as np
 from tqdm.auto import tqdm
 
-from supervision.video.backend import VideoBackend, VideoBackendDict, VideoBackendTypes, VideoWriterTypes
+from supervision.video.backend import (
+    VideoBackend,
+    VideoBackendDict,
+    VideoBackendTypes,
+    VideoWriterTypes,
+)
 from supervision.video.utils import SourceType, VideoInfo
 
 

From ba9efc2c6a79b3afcf709e039da773c12dbaefc1 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sun, 31 Aug 2025 01:10:31 -0400
Subject: [PATCH 125/128] BUG: Appending fixes for VideoBackend error

---
 supervision/video/core.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/supervision/video/core.py b/supervision/video/core.py
index ebbcee3eba..16e8c35a17 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -8,7 +8,7 @@
 import numpy as np
 from tqdm.auto import tqdm
 
-from supervision.video.backend import VideoBackend, VideoBackendDict, VideoBackendTypes, VideoWriterTypes
+from supervision.video.backend import VideoBackendType, VideoBackendDict, VideoBackendTypes, VideoWriterTypes
 from supervision.video.utils import SourceType, VideoInfo
 
 
@@ -41,7 +41,7 @@ class Video:
     backend: VideoBackendTypes
 
     def __init__(
-        self, source: str | int, backend: VideoBackend | str = VideoBackend.OPENCV
+        self, source: str | int, backend: VideoBackendType | str = VideoBackendType.OPENCV
     ) -> None:
         """
         Initialize the Video object and open the source.
@@ -54,7 +54,7 @@ def __init__(
         Raises:
             ValueError: If the specified backend is not supported.
         """
-        self.backend = VideoBackendDict.get(VideoBackend.from_value(backend))
+        self.backend = VideoBackendDict.get(VideoBackendType.from_value(backend))
         if self.backend is None:
             raise ValueError(f"Unsupported backend: {backend}")
 

From 9c7a9ecdff7a860315445daed5ae785d0dc2f0d7 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 31 Aug 2025 05:12:01 +0000
Subject: [PATCH 126/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/core.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/supervision/video/core.py b/supervision/video/core.py
index 16e8c35a17..c866ba2759 100644
--- a/supervision/video/core.py
+++ b/supervision/video/core.py
@@ -8,7 +8,12 @@
 import numpy as np
 from tqdm.auto import tqdm
 
-from supervision.video.backend import VideoBackendType, VideoBackendDict, VideoBackendTypes, VideoWriterTypes
+from supervision.video.backend import (
+    VideoBackendDict,
+    VideoBackendType,
+    VideoBackendTypes,
+    VideoWriterTypes,
+)
 from supervision.video.utils import SourceType, VideoInfo
 
 
@@ -41,7 +46,9 @@ class Video:
     backend: VideoBackendTypes
 
     def __init__(
-        self, source: str | int, backend: VideoBackendType | str = VideoBackendType.OPENCV
+        self,
+        source: str | int,
+        backend: VideoBackendType | str = VideoBackendType.OPENCV,
     ) -> None:
         """
         Initialize the Video object and open the source.

From fb2171ce835a555f9b41ae6d04135fb5e220056d Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Sun, 31 Aug 2025 17:52:49 -0400
Subject: [PATCH 127/128] UPDATE: Decompose playback speed into valid atempo
 chain

---
 supervision/video/backend/pyav.py | 32 ++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py
index 81a3fd055d..0cdbbd16be 100644
--- a/supervision/video/backend/pyav.py
+++ b/supervision/video/backend/pyav.py
@@ -414,6 +414,25 @@ def close(self) -> None:
             It ensures proper file finalization and resource cleanup.
         """
         if self.audio_stream_out is not None:
+            def atempo_chain(speed: float) -> list[str]:
+                if speed <= 0:
+                    raise ValueError("Speed factor must be > 0")
+
+                chain = []
+
+                while speed > 2.0:
+                    chain.append("2.0")
+                    speed /= 2.0
+
+                while speed < 0.5:
+                    chain.append("0.5")
+                    speed /= 0.5
+
+                if abs(speed - 1.0) > 1e-6:
+                    chain.append(f"{speed:.6f}")
+
+                return chain
+
             src = av.open(self.backend.path)
             src_fps = (
                 src.streams.video[0].average_rate or src.streams.video[0].guessed_rate
@@ -421,11 +440,14 @@ def close(self) -> None:
             audio_stream = src.streams.audio[0]
 
             graph = av.filter.Graph()
-            graph.link_nodes(
-                graph.add_abuffer(template=audio_stream),
-                graph.add("atempo", str(self.fps / src_fps)),
-                graph.add("abuffersink"),
-            ).configure()
+            filters = atempo_chain(self.fps / src_fps)
+            nodes = [graph.add_abuffer(template=audio_stream)]
+            for f in filters:
+                nodes.append(graph.add("atempo", f))
+
+            nodes.append(graph.add("abuffersink"))
+            graph.link_nodes(*nodes)
+            graph.configure()
 
             for packet in src.demux(audio_stream):
                 for frame in packet.decode():

From c78e4f7021cab99e6f9bd1629075a421ce013145 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 31 Aug 2025 21:53:25 +0000
Subject: [PATCH 128/128] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/video/backend/pyav.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/supervision/video/backend/pyav.py b/supervision/video/backend/pyav.py
index 0cdbbd16be..4db1e0a9ba 100644
--- a/supervision/video/backend/pyav.py
+++ b/supervision/video/backend/pyav.py
@@ -414,6 +414,7 @@ def close(self) -> None:
             It ensures proper file finalization and resource cleanup.
         """
         if self.audio_stream_out is not None:
+
             def atempo_chain(speed: float) -> list[str]:
                 if speed <= 0:
                     raise ValueError("Speed factor must be > 0")