Add single_animal option to benchmark_videos

C-Achard · C-Achard · commit a2a1a25460d1 · 2026-03-10T11:45:19.000+01:00
Expose a single_animal flag on benchmark_videos (default False) and forward it to the underlying analysis call. This allows benchmarking to run in single-animal mode when using DeepLabCut-live exported models.
diff --git a/dlclive/benchmark.py b/dlclive/benchmark.py
@@ -10,8 +10,10 @@
 import sys
 import time
 import warnings
+from typing import TYPE_CHECKING
 from pathlib import Path
-
+import argparse
+import os
 import colorcet as cc
 import cv2
 import numpy as np
@@ -23,10 +25,15 @@
 
 from dlclive import DLCLive
 from dlclive import VERSION
-from dlclive import __file__ as dlcfile
 from dlclive.engine import Engine
 from dlclive.utils import decode_fourcc
 
+if TYPE_CHECKING:
+    try:
+        import tensorflow
+    except ImportError:
+        tensorflow = None
+
 
 def download_benchmarking_data(
     target_dir=".",
@@ -49,17 +56,20 @@ def download_benchmarking_data(
     if os.path.exists(zip_path):
         print(f"{zip_path} already exists. Skipping download.")
     else:
+
         def show_progress(count, block_size, total_size):
             pbar.update(block_size)
 
         print(f"Downloading the benchmarking data from {url} ...")
         pbar = tqdm(unit="B", total=0, position=0, desc="Downloading")
 
-        filename, _ = urllib.request.urlretrieve(url, filename=zip_path, reporthook=show_progress)
+        filename, _ = urllib.request.urlretrieve(
+            url, filename=zip_path, reporthook=show_progress
+        )
         pbar.close()
 
     print(f"Extracting {zip_path} to {target_dir} ...")
-    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+    with zipfile.ZipFile(zip_path, "r") as zip_ref:
         zip_ref.extractall(target_dir)
 
 
@@ -81,6 +91,7 @@ def benchmark_videos(
     cmap="bmy",
     save_poses=False,
     save_video=False,
+    single_animal=False,
 ):
     """Analyze videos using DeepLabCut-live exported models.
     Analyze multiple videos and/or multiple options for the size of the video
@@ -168,7 +179,7 @@ def benchmark_videos(
         im_size_out = []
 
         for i in range(len(resize)):
-            print(f"\nRun {i+1} / {len(resize)}\n")
+            print(f"\nRun {i + 1} / {len(resize)}\n")
 
             this_inf_times, this_im_size, meta = benchmark(
                 model_path=model_path,
@@ -188,6 +199,7 @@ def benchmark_videos(
                 save_poses=save_poses,
                 save_video=save_video,
                 save_dir=output,
+                single_animal=single_animal,
             )
 
             inf_times.append(this_inf_times)
@@ -257,7 +269,7 @@ def get_system_info() -> dict:
         dev_type = "GPU"
         dev = [torch.cuda.get_device_name(torch.cuda.current_device())]
     else:
-        from cpuinfo import get_cpu_info
+        from cpuinfo import get_cpu_info  # noqa: F401
 
         dev_type = "CPU"
         dev = get_cpu_info()
@@ -275,9 +287,7 @@ def get_system_info() -> dict:
     }
 
 
-def save_inf_times(
-    sys_info, inf_times, im_size, model=None, meta=None, output=None
-):
+def save_inf_times(sys_info, inf_times, im_size, model=None, meta=None, output=None):
     """Save inference time data collected using :function:`benchmark` with system information to a pickle file.
     This is primarily used through :function:`benchmark_videos`
 
@@ -346,6 +356,7 @@ def save_inf_times(
 
     return True
 
+
 def benchmark(
     model_path: str,
     model_type: str,
@@ -357,8 +368,8 @@ def benchmark(
     single_animal: bool = True,
     cropping: list[int] | None = None,
     dynamic: tuple[bool, float, int] = (False, 0.5, 10),
-    n_frames: int =1000,
-    print_rate: bool=False,
+    n_frames: int = 1000,
+    print_rate: bool = False,
     precision: str = "FP32",
     display: bool = True,
     pcutoff: float = 0.5,
@@ -434,7 +445,10 @@ def benchmark(
     if not cap.isOpened():
         print(f"Error: Could not open video file {video_path}")
         return
-    im_size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
+    im_size = (
+        int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
+        int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
+    )
 
     if pixels is not None:
         resize = np.sqrt(pixels / (im_size[0] * im_size[1]))
@@ -492,9 +506,7 @@ def benchmark(
 
     total_n_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
     n_frames = int(
-        n_frames
-        if (n_frames > 0) and n_frames < total_n_frames
-        else total_n_frames
+        n_frames if (n_frames > 0) and n_frames < total_n_frames else total_n_frames
     )
     iterator = range(n_frames) if print_rate or display else tqdm(range(n_frames))
     for _ in iterator:
@@ -510,7 +522,7 @@ def benchmark(
 
         start_time = time.perf_counter()
         if frame_index == 0:
-            pose = dlc_live.init_inference(frame) # Loads model
+            pose = dlc_live.init_inference(frame)  # Loads model
         else:
             pose = dlc_live.get_pose(frame)
 
@@ -519,7 +531,9 @@ def benchmark(
         times.append(inf_time)
 
         if print_rate:
-            print("Inference rate = {:.3f} FPS".format(1 / inf_time), end="\r", flush=True)
+            print(
+                "Inference rate = {:.3f} FPS".format(1 / inf_time), end="\r", flush=True
+            )
 
         if save_video:
             draw_pose_and_write(
@@ -531,19 +545,17 @@ def benchmark(
                 pcutoff=pcutoff,
                 display_radius=display_radius,
                 draw_keypoint_names=draw_keypoint_names,
-                vwriter=vwriter
+                vwriter=vwriter,
             )
 
         frame_index += 1
 
     if print_rate:
-        print("Mean inference rate: {:.3f} FPS".format(np.mean(1 / np.array(times)[1:])))
+        print(
+            "Mean inference rate: {:.3f} FPS".format(np.mean(1 / np.array(times)[1:]))
+        )
 
-    metadata = _get_metadata(
-        video_path=video_path,
-        cap=cap,
-        dlc_live=dlc_live
-    )
+    metadata = _get_metadata(video_path=video_path, cap=cap, dlc_live=dlc_live)
 
     cap.release()
 
@@ -558,19 +570,21 @@ def benchmark(
         else:
             individuals = []
         n_individuals = len(individuals) or 1
-        save_poses_to_files(video_path, save_dir, n_individuals, bodyparts, poses, timestamp=timestamp)
+        save_poses_to_files(
+            video_path, save_dir, n_individuals, bodyparts, poses, timestamp=timestamp
+        )
 
     return times, im_size, metadata
 
 
 def setup_video_writer(
-    video_path:str,
-    save_dir:str,
-    timestamp:str,
-    num_keypoints:int,
-    cmap:str,
-    fps:float,
-    frame_size:tuple[int, int],
+    video_path: str,
+    save_dir: str,
+    timestamp: str,
+    num_keypoints: int,
+    cmap: str,
+    fps: float,
+    frame_size: tuple[int, int],
 ):
     # Set colors and convert to RGB
     cmap_colors = getattr(cc, cmap)
@@ -582,7 +596,9 @@ def setup_video_writer(
     # Define output video path
     video_path = Path(video_path)
     video_name = video_path.stem  # filename without extension
-    output_video_path = Path(save_dir) / f"{video_name}_DLCLIVE_LABELLED_{timestamp}.mp4"
+    output_video_path = (
+        Path(save_dir) / f"{video_name}_DLCLIVE_LABELLED_{timestamp}.mp4"
+    )
 
     # Get video writer setup
     fourcc = cv2.VideoWriter_fourcc(*"mp4v")
@@ -595,6 +611,7 @@ def setup_video_writer(
 
     return colors, vwriter
 
+
 def draw_pose_and_write(
     frame: np.ndarray,
     pose: np.ndarray,
@@ -611,7 +628,9 @@ def draw_pose_and_write(
 
     if resize is not None and resize != 1.0:
         # Resize the frame
-        frame = cv2.resize(frame, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR)
+        frame = cv2.resize(
+            frame, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR
+        )
 
         # Scale pose coordinates
         pose = pose.copy()
@@ -642,15 +661,10 @@ def draw_pose_and_write(
                         lineType=cv2.LINE_AA,
                     )
 
-
     vwriter.write(image=frame)
 
 
-def _get_metadata(
-    video_path: str,
-    cap: cv2.VideoCapture,
-    dlc_live: DLCLive
-):
+def _get_metadata(video_path: str, cap: cv2.VideoCapture, dlc_live: DLCLive):
     try:
         fourcc = decode_fourcc(cap.get(cv2.CAP_PROP_FOURCC))
     except Exception:
@@ -687,7 +701,9 @@ def _get_metadata(
     return meta
 
 
-def save_poses_to_files(video_path, save_dir, n_individuals, bodyparts, poses, timestamp):
+def save_poses_to_files(
+    video_path, save_dir, n_individuals, bodyparts, poses, timestamp
+):
     """
     Saves the detected keypoint poses from the video to CSV and HDF5 files.
 
@@ -708,7 +724,7 @@ def save_poses_to_files(video_path, save_dir, n_individuals, bodyparts, poses, t
     -------
     None
     """
-    import pandas as pd
+    import pandas as pd  # noqa: F401
 
     base_filename = Path(video_path).stem
     save_dir = Path(save_dir)
@@ -725,14 +741,16 @@ def save_poses_to_files(video_path, save_dir, n_individuals, bodyparts, poses, t
     else:
         individuals = [f"individual_{i}" for i in range(n_individuals)]
         pdindex = pd.MultiIndex.from_product(
-            [individuals, bodyparts, ["x", "y", "likelihood"]], names=["individuals", "bodyparts", "coords"]
+            [individuals, bodyparts, ["x", "y", "likelihood"]],
+            names=["individuals", "bodyparts", "coords"],
         )
 
     pose_df = pd.DataFrame(flattened_poses, columns=pdindex)
 
     pose_df.to_hdf(h5_save_path, key="df_with_missing", mode="w")
     pose_df.to_csv(csv_save_path, index=False)
 
+
 def _create_poses_np_array(n_individuals: int, bodyparts: list, poses: list):
     # Create numpy array with poses:
     max_frame = max(p["frame"] for p in poses)
@@ -745,17 +763,15 @@ def _create_poses_np_array(n_individuals: int, bodyparts: list, poses: list):
         if pose.ndim == 2:
             pose = pose[np.newaxis, :, :]
         padded_pose = np.full(pose_target_shape, np.nan)
-        slices = tuple(slice(0, min(pose.shape[i], pose_target_shape[i])) for i in range(3))
+        slices = tuple(
+            slice(0, min(pose.shape[i], pose_target_shape[i])) for i in range(3)
+        )
         padded_pose[slices] = pose[slices]
         poses_array[frame] = padded_pose
 
     return poses_array
 
 
-import argparse
-import os
-
-
 def main():
     """Provides a command line interface to benchmark_videos function."""
     parser = argparse.ArgumentParser(