Skip to content

Video

VideoInfo

A class to store video information, including width, height, fps and total number of frames.

Attributes:

Name Type Description
width int

width of the video in pixels

height int

height of the video in pixels

fps int

frames per second of the video

total_frames int

total number of frames in the video, default is None

Examples:

import supervision as sv

video_info = sv.VideoInfo.from_video_path(video_path='video.mp4')

video_info
# VideoInfo(width=3840, height=2160, fps=25, total_frames=538)

video_info.resolution_wh
# (3840, 2160)
Source code in supervision/utils/video.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
@dataclass
class VideoInfo:
    """
    A class to store video information, including width, height, fps and
        total number of frames.

    Attributes:
        width (int): width of the video in pixels
        height (int): height of the video in pixels
        fps (int): frames per second of the video
        total_frames (int, optional): total number of frames in the video,
            default is None

    Examples:
        ```python
        import supervision as sv

        video_info = sv.VideoInfo.from_video_path(video_path='video.mp4')

        video_info
        # VideoInfo(width=3840, height=2160, fps=25, total_frames=538)

        video_info.resolution_wh
        # (3840, 2160)
        ```
    """

    width: int
    height: int
    fps: int
    total_frames: Optional[int] = None

    @classmethod
    def from_video_path(cls, video_path: str) -> VideoInfo:
        video = cv2.VideoCapture(video_path)
        if not video.isOpened():
            raise Exception(f"Could not open video at {video_path}")

        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(video.get(cv2.CAP_PROP_FPS))
        total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
        video.release()
        return VideoInfo(width, height, fps, total_frames)

    @property
    def resolution_wh(self) -> Tuple[int, int]:
        return self.width, self.height

VideoSink

Context manager that saves video frames to a file using OpenCV.

Attributes:

Name Type Description
target_path str

The path to the output file where the video will be saved.

video_info VideoInfo

Information about the video resolution, fps, and total frame count.

codec str

FOURCC code for video format

Example
import supervision as sv

video_info = sv.VideoInfo.from_video_path('source.mp4')
frames_generator = sv.get_video_frames_generator('source.mp4')

with sv.VideoSink(target_path='target.mp4', video_info=video_info) as sink:
    for frame in frames_generator:
        sink.write_frame(frame=frame)
Source code in supervision/utils/video.py
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
class VideoSink:
    """
    Context manager that saves video frames to a file using OpenCV.

    Attributes:
        target_path (str): The path to the output file where the video will be saved.
        video_info (VideoInfo): Information about the video resolution, fps,
            and total frame count.
        codec (str): FOURCC code for video format

    Example:
        ```python
        import supervision as sv

        video_info = sv.VideoInfo.from_video_path('source.mp4')
        frames_generator = sv.get_video_frames_generator('source.mp4')

        with sv.VideoSink(target_path='target.mp4', video_info=video_info) as sink:
            for frame in frames_generator:
                sink.write_frame(frame=frame)
        ```
    """

    def __init__(self, target_path: str, video_info: VideoInfo, codec: str = "mp4v"):
        self.target_path = target_path
        self.video_info = video_info
        self.__codec = codec
        self.__writer = None

    def __enter__(self):
        try:
            self.__fourcc = cv2.VideoWriter_fourcc(*self.__codec)
        except TypeError as e:
            print(str(e) + ". Defaulting to mp4v...")
            self.__fourcc = cv2.VideoWriter_fourcc(*"mp4v")
        self.__writer = cv2.VideoWriter(
            self.target_path,
            self.__fourcc,
            self.video_info.fps,
            self.video_info.resolution_wh,
        )
        return self

    def write_frame(self, frame: np.ndarray):
        self.__writer.write(frame)

    def __exit__(self, exc_type, exc_value, exc_traceback):
        self.__writer.release()

FPSMonitor

A class for monitoring frames per second (FPS) to benchmark latency.

Source code in supervision/utils/video.py
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
class FPSMonitor:
    """
    A class for monitoring frames per second (FPS) to benchmark latency.
    """

    def __init__(self, sample_size: int = 30):
        """
        Args:
            sample_size (int): The maximum number of observations for latency
                benchmarking.

        Examples:
            ```python
            import supervision as sv

            frames_generator = sv.get_video_frames_generator('source.mp4')
            fps_monitor = sv.FPSMonitor()

            for frame in frames_generator:
                # your processing code here
                fps_monitor.tick()
                fps = fps_monitor()
            ```
        """
        self.all_timestamps = deque(maxlen=sample_size)

    def __call__(self) -> float:
        """
        Computes and returns the average FPS based on the stored time stamps.

        Returns:
            float: The average FPS. Returns 0.0 if no time stamps are stored.
        """

        if not self.all_timestamps:
            return 0.0
        taken_time = self.all_timestamps[-1] - self.all_timestamps[0]
        return (len(self.all_timestamps)) / taken_time if taken_time != 0 else 0.0

    def tick(self) -> None:
        """
        Adds a new time stamp to the deque for FPS calculation.
        """
        self.all_timestamps.append(time.monotonic())

    def reset(self) -> None:
        """
        Clears all the time stamps from the deque.
        """
        self.all_timestamps.clear()

__call__()

Computes and returns the average FPS based on the stored time stamps.

Returns:

Name Type Description
float float

The average FPS. Returns 0.0 if no time stamps are stored.

Source code in supervision/utils/video.py
233
234
235
236
237
238
239
240
241
242
243
244
def __call__(self) -> float:
    """
    Computes and returns the average FPS based on the stored time stamps.

    Returns:
        float: The average FPS. Returns 0.0 if no time stamps are stored.
    """

    if not self.all_timestamps:
        return 0.0
    taken_time = self.all_timestamps[-1] - self.all_timestamps[0]
    return (len(self.all_timestamps)) / taken_time if taken_time != 0 else 0.0

__init__(sample_size=30)

Parameters:

Name Type Description Default
sample_size int

The maximum number of observations for latency benchmarking.

30

Examples:

import supervision as sv

frames_generator = sv.get_video_frames_generator('source.mp4')
fps_monitor = sv.FPSMonitor()

for frame in frames_generator:
    # your processing code here
    fps_monitor.tick()
    fps = fps_monitor()
Source code in supervision/utils/video.py
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
def __init__(self, sample_size: int = 30):
    """
    Args:
        sample_size (int): The maximum number of observations for latency
            benchmarking.

    Examples:
        ```python
        import supervision as sv

        frames_generator = sv.get_video_frames_generator('source.mp4')
        fps_monitor = sv.FPSMonitor()

        for frame in frames_generator:
            # your processing code here
            fps_monitor.tick()
            fps = fps_monitor()
        ```
    """
    self.all_timestamps = deque(maxlen=sample_size)

reset()

Clears all the time stamps from the deque.

Source code in supervision/utils/video.py
252
253
254
255
256
def reset(self) -> None:
    """
    Clears all the time stamps from the deque.
    """
    self.all_timestamps.clear()

tick()

Adds a new time stamp to the deque for FPS calculation.

Source code in supervision/utils/video.py
246
247
248
249
250
def tick(self) -> None:
    """
    Adds a new time stamp to the deque for FPS calculation.
    """
    self.all_timestamps.append(time.monotonic())

get_video_frames_generator

Get a generator that yields the frames of the video.

Parameters:

Name Type Description Default
source_path str

The path of the video file.

required
stride int

Indicates the interval at which frames are returned, skipping stride - 1 frames between each.

1
start int

Indicates the starting position from which video should generate frames

0
end Optional[int]

Indicates the ending position at which video should stop generating frames. If None, video will be read to the end.

None

Returns:

Type Description
Generator[ndarray, None, None]

A generator that yields the frames of the video.

Examples:

import supervision as sv

for frame in sv.get_video_frames_generator(source_path='source_video.mp4'):
    ...
Source code in supervision/utils/video.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
def get_video_frames_generator(
    source_path: str, stride: int = 1, start: int = 0, end: Optional[int] = None
) -> Generator[np.ndarray, None, None]:
    """
    Get a generator that yields the frames of the video.

    Args:
        source_path (str): The path of the video file.
        stride (int): Indicates the interval at which frames are returned,
            skipping stride - 1 frames between each.
        start (int): Indicates the starting position from which
            video should generate frames
        end (Optional[int]): Indicates the ending position at which video
            should stop generating frames. If None, video will be read to the end.

    Returns:
        (Generator[np.ndarray, None, None]): A generator that yields the
            frames of the video.

    Examples:
        ```python
        import supervision as sv

        for frame in sv.get_video_frames_generator(source_path='source_video.mp4'):
            ...
        ```
    """
    video, start, end = _validate_and_setup_video(source_path, start, end)
    frame_position = start
    while True:
        success, frame = video.read()
        if not success or frame_position >= end:
            break
        yield frame
        for _ in range(stride - 1):
            success = video.grab()
            if not success:
                break
        frame_position += stride
    video.release()

process_video

Process a video file by applying a callback function on each frame and saving the result to a target video file.

Parameters:

Name Type Description Default
source_path str

The path to the source video file.

required
target_path str

The path to the target video file.

required
callback Callable[[ndarray, int], ndarray]

A function that takes in a numpy ndarray representation of a video frame and an int index of the frame and returns a processed numpy ndarray representation of the frame.

required

Examples:

import supervision as sv

def callback(scene: np.ndarray, index: int) -> np.ndarray:
    ...

process_video(
    source_path='...',
    target_path='...',
    callback=callback
)
Source code in supervision/utils/video.py
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
def process_video(
    source_path: str,
    target_path: str,
    callback: Callable[[np.ndarray, int], np.ndarray],
) -> None:
    """
    Process a video file by applying a callback function on each frame
        and saving the result to a target video file.

    Args:
        source_path (str): The path to the source video file.
        target_path (str): The path to the target video file.
        callback (Callable[[np.ndarray, int], np.ndarray]): A function that takes in
            a numpy ndarray representation of a video frame and an
            int index of the frame and returns a processed numpy ndarray
            representation of the frame.

    Examples:
        ```python
        import supervision as sv

        def callback(scene: np.ndarray, index: int) -> np.ndarray:
            ...

        process_video(
            source_path='...',
            target_path='...',
            callback=callback
        )
        ```
    """
    source_video_info = VideoInfo.from_video_path(video_path=source_path)
    with VideoSink(target_path=target_path, video_info=source_video_info) as sink:
        for index, frame in enumerate(
            get_video_frames_generator(source_path=source_path)
        ):
            result_frame = callback(frame, index)
            sink.write_frame(frame=result_frame)

Comments