add modelzoo utils (download and config)

deruyter92 · deruyter92 · commit e38e3ad93ce0 · 2026-01-23T11:49:36.000+01:00
diff --git a/dlclive/modelzoo/__init__.py b/dlclive/modelzoo/__init__.py
@@ -0,0 +1,9 @@
+from dlclive.modelzoo.utils import (
+    _MODELZOO_PATH,
+    list_available_models,
+    list_available_projects,
+    list_available_combinations,
+    load_super_animal_config,
+    download_super_animal_snapshot,
+)
+from dlclive.modelzoo.pytorch_model_zoo_export import export_modelzoo_model
diff --git a/dlclive/modelzoo/pytorch_model_zoo_export.py b/dlclive/modelzoo/pytorch_model_zoo_export.py
@@ -0,0 +1,54 @@
+import warnings
+from pathlib import Path
+from collections import OrderedDict
+
+import torch
+
+from dlclive.modelzoo.utils import load_super_animal_config, download_super_animal_snapshot
+
+
+def export_modelzoo_model(
+    export_path: str | Path,
+    super_animal: str,
+    model_name: str,
+    detector_name: str | None = None,
+) -> None:
+    """
+
+    """
+    Path(export_path).parent.mkdir(parents=True, exist_ok=True)
+    if Path(export_path).exists():
+        warnings.warn(f"Export path {export_path} already exists, skipping export", UserWarning)
+        return
+
+    model_cfg = load_super_animal_config(
+        super_animal=super_animal,
+        model_name=model_name,
+        detector_name=detector_name,
+    )
+
+    def _load_model_weights(model_name: str, super_animal: str = super_animal) -> OrderedDict:
+        """Download the model weights from huggingface and load them in torch state dict"""
+        checkpoint: Path = download_super_animal_snapshot(dataset=super_animal, model_name=model_name)
+        return torch.load(checkpoint, map_location="cpu", weights_only=True)["model"]
+    
+    export_dict = {
+        "config": model_cfg,
+        "pose": _load_model_weights(model_name),
+        "detector": _load_model_weights(detector_name) if detector_name is not None else None,
+    }
+    torch.save(export_dict, export_path)
+
+
+if __name__ == "__main__":
+    """Example usage"""	
+    from utils import _MODELZOO_PATH
+    
+    model_name = "resnet_50"
+    super_animal = "superanimal_quadruped"
+
+    export_modelzoo_model(
+        export_path=_MODELZOO_PATH / 'exported_models' / f'exported_{super_animal}_{model_name}.pt',
+        super_animal=super_animal,
+        model_name=model_name,
+    )
diff --git a/dlclive/modelzoo/resolve_config.py b/dlclive/modelzoo/resolve_config.py
@@ -0,0 +1,136 @@
+"""
+Helper function to deal with default values in the model configuration. 
+For instance, "num_bodyparts x 2" is replaced with the number of bodyparts multiplied by 2.
+"""
+# NOTE JR 2026-23-01: This is duplicate code, copied from the original DeepLabCut-Live codebase.
+
+import copy
+
+
+def update_config(config: dict, max_individuals: int, device: str):
+    """Loads the model configuration file for a model, detector and SuperAnimal
+
+    Args:
+        config: The default model configuration file.
+        max_individuals: The maximum number of detections to make in an image
+        device: The device to use to train/run inference on the model
+
+    Returns:
+        The model configuration for a SuperAnimal-pretrained model.
+    """
+    config = replace_default_values(
+        config,
+        num_bodyparts=len(config["metadata"]["bodyparts"]),
+        num_individuals=max_individuals,
+        backbone_output_channels=config["model"]["backbone_output_channels"],
+    )
+    config["metadata"]["individuals"] = [f"animal{i}" for i in range(max_individuals)]
+
+    config["device"] = device
+    if config.get("detector", None) is not None:
+        config["detector"]["device"] = device
+
+    return config
+
+
+def replace_default_values(
+    config: dict | list,
+    num_bodyparts: int | None = None,
+    num_individuals: int | None = None,
+    backbone_output_channels: int | None = None,
+    **kwargs,
+) -> dict:
+    """Replaces placeholder values in a model configuration with their actual values.
+
+    This method allows to create template PyTorch configurations for models with values
+    such as "num_bodyparts", which are replaced with the number of bodyparts for a
+    project when making its Pytorch configuration.
+
+    This code can also do some basic arithmetic. You can write "num_bodyparts x 2" (or
+    any factor other than 2) for location refinement channels, and the number of
+    channels will be twice the number of bodyparts. You can write
+    "backbone_output_channels // 2" for the number of channels in a layer, and it will
+    be half the number of channels output by the backbone. You can write
+    "num_bodyparts + 1" (such as for DEKR heatmaps, where a "center" bodypart is added).
+
+    The three base placeholder values that can be computed are "num_bodyparts",
+    "num_individuals" and "backbone_output_channels". You can add more through the
+    keyword arguments (such as "paf_graph": list[tuple[int, int]] or
+    "paf_edges_to_keep": list[int] for DLCRNet models).
+
+    Args:
+        config: the configuration in which to replace default values
+        num_bodyparts: the number of bodyparts
+        num_individuals: the number of individuals
+        backbone_output_channels: the number of backbone output channels
+        kwargs: other placeholder values to fill in
+
+    Returns:
+        the configuration with placeholder values replaced
+
+    Raises:
+        ValueError: if there is a placeholder value who's "updated" value was not
+            given to the method
+    """
+
+    def get_updated_value(variable: str) -> int | list[int]:
+        var_parts = variable.strip().split(" ")
+        var_name = var_parts[0]
+        if updated_values[var_name] is None:
+            raise ValueError(
+                f"Found {variable} in the configuration file, but there is no default "
+                f"value for this variable."
+            )
+
+        if len(var_parts) == 1:
+            return updated_values[var_name]
+        elif len(var_parts) == 3:
+            operator, factor = var_parts[1], var_parts[2]
+            if not factor.isdigit():
+                raise ValueError(f"F must be an integer in variable: {variable}")
+
+            factor = int(factor)
+            if operator == "+":
+                return updated_values[var_name] + factor
+            elif operator == "x":
+                return updated_values[var_name] * factor
+            elif operator == "//":
+                return updated_values[var_name] // factor
+            else:
+                raise ValueError(f"Unknown operator for variable: {variable}")
+
+        raise ValueError(
+            f"Found {variable} in the configuration file, but cannot parse it."
+        )
+
+    updated_values = {
+        "num_bodyparts": num_bodyparts,
+        "num_individuals": num_individuals,
+        "backbone_output_channels": backbone_output_channels,
+        **kwargs,
+    }
+
+    config = copy.deepcopy(config)
+    if isinstance(config, dict):
+        keys_to_update = list(config.keys())
+    elif isinstance(config, list):
+        keys_to_update = range(len(config))
+    else:
+        raise ValueError(f"Config to update must be dict or list, found {type(config)}")
+
+    for k in keys_to_update:
+        if isinstance(config[k], (list, dict)):
+            config[k] = replace_default_values(
+                config[k],
+                num_bodyparts,
+                num_individuals,
+                backbone_output_channels,
+                **kwargs,
+            )
+        elif (
+            isinstance(config[k], str)
+            and config[k].strip().split(" ")[0] in updated_values.keys()
+        ):
+            config[k] = get_updated_value(config[k])
+
+    return config
diff --git a/dlclive/modelzoo/utils.py b/dlclive/modelzoo/utils.py
@@ -0,0 +1,175 @@
+"""
+Utils for the DLC-Live Model Zoo
+"""
+# NOTE JR 2026-23-01: This file contains duplicated code from the DeepLabCut main repository.
+# This should be removed once a solution is found to address duplicate code.
+
+import copy
+from pathlib import Path
+import logging
+
+from ruamel.yaml import YAML
+
+from dlclibrary.dlcmodelzoo.modelzoo_download import download_huggingface_model
+from dlclive.modelzoo.resolve_config import update_config
+
+_MODELZOO_PATH = Path(__file__).parent 
+
+
+def get_super_animal_model_config_path(model_name: str) -> Path:
+    """Get the path to the model configuration file for a model and validate choice of model"""
+    cfg_path = _MODELZOO_PATH / 'model_configs' / f"{model_name}.yaml"
+    if not cfg_path.exists():
+        raise FileNotFoundError(
+            f"Modelzoo model configuration file not found: {cfg_path} "
+            f"Available models: {list_available_models()}"
+        )
+    return cfg_path
+
+
+def get_super_animal_project_config_path(super_animal: str) -> Path:
+    """Get the path to the project configuration file for a project and validate choice of project"""
+    cfg_path = _MODELZOO_PATH / 'project_configs' / f"{super_animal}.yaml"
+    if not cfg_path.exists():
+        raise FileNotFoundError(
+            f"Modelzoo project configuration file not found: {cfg_path}"
+            f"Available projects: {list_available_projects()}"
+        )
+    return cfg_path
+
+
+def get_snapshot_folder_path() -> Path:
+    return _MODELZOO_PATH / 'snapshots'
+
+
+def list_available_models() -> list[str]:
+    return [p.stem for p in _MODELZOO_PATH.glob('model_configs/*.yaml')]
+
+
+def list_available_projects() -> list[str]:
+    return [p.stem for p in _MODELZOO_PATH.glob('project_configs/*.yaml')]
+
+
+def list_available_combinations() -> list[str]:
+    models = list_available_models()
+    projects = list_available_projects()
+    combinations = ['_'.join([p, m]) for p in projects for m in models]
+    return combinations
+
+
+def read_config_as_dict(config_path: str | Path) -> dict:
+    """
+    Args:
+        config_path: the path to the configuration file to load
+
+    Returns:
+        The configuration file with pure Python classes
+    """
+    with open(config_path, "r") as f:
+        cfg = YAML(typ='safe', pure=True).load(f)
+
+    return cfg
+
+
+# NOTE JR 2026-23-01: This is duplicate code, copied from the original DeepLabCut-Live codebase.
+def add_metadata(project_config: dict, config: dict,) -> dict:
+    """Adds metadata to a pytorch pose configuration
+
+    Args:
+        project_config: the project configuration
+        config: the pytorch pose configuration
+        pose_config_path: the path where the pytorch pose configuration will be saved
+
+    Returns:
+        the configuration with a `meta` key added
+    """
+    config = copy.deepcopy(config)
+    config["metadata"] = {
+        "project_path": project_config["project_path"],
+        "pose_config_path": "",
+        "bodyparts": project_config.get("multianimalbodyparts") or project_config["bodyparts"],
+        "unique_bodyparts": project_config.get("uniquebodyparts", []),
+        "individuals": project_config.get("individuals", ["animal"]),
+        "with_identity": project_config.get("identity", False),
+    }
+    return config
+
+
+# NOTE JR 2026-23-01: This is duplicate code, copied from the original DeepLabCut-Live codebase.
+def load_super_animal_config(
+    super_animal: str,
+    model_name: str,
+    detector_name: str | None = None,
+    max_individuals: int = 30,
+    device: str | None = None,
+) -> dict:
+    """Loads the model configuration file for a model, detector and SuperAnimal
+
+    Args:
+        super_animal: The name of the SuperAnimal for which to create the model config.
+        model_name: The name of the model for which to create the model config.
+        detector_name: The name of the detector for which to create the model config.
+        max_individuals: The maximum number of detections to make in an image
+        device: The device to use to train/run inference on the model
+
+    Returns:
+        The model configuration for a SuperAnimal-pretrained model.
+    """
+    project_cfg_path = get_super_animal_project_config_path(super_animal=super_animal)
+    project_config = read_config_as_dict(project_cfg_path)
+
+    model_cfg_path = get_super_animal_model_config_path(model_name=model_name)
+    model_config = read_config_as_dict(model_cfg_path)
+    model_config = add_metadata(project_config, model_config)
+    model_config = update_config(model_config, max_individuals, device)
+
+    if detector_name is None and super_animal != "superanimal_humanbody":
+        model_config["method"] = "BU"
+    else:
+        model_config["method"] = "TD"
+        if super_animal != "superanimal_humanbody":
+            detector_cfg_path = get_super_animal_model_config_path(
+                model_name=detector_name
+            )
+            detector_cfg = read_config_as_dict(detector_cfg_path)
+            model_config["detector"] = detector_cfg
+    return model_config
+
+
+def download_super_animal_snapshot(dataset: str, model_name: str) -> Path:
+    """Downloads a SuperAnimal snapshot
+
+    Args:
+        dataset: The name of the SuperAnimal dataset for which to download a snapshot.
+        model_name: The name of the model for which to download a snapshot.
+
+    Returns:
+        The path to the downloaded snapshot.
+
+    Raises:
+        RuntimeError if the model fails to download.
+    """
+    snapshot_dir = get_snapshot_folder_path()
+    model_name = f"{dataset}_{model_name}"
+    model_filename = f"{model_name}.pt"
+    model_path = snapshot_dir / model_filename
+
+    if model_path.exists():
+        logging.info(f"Snapshot {model_path} already exists, skipping download")
+        return model_path
+
+    try:
+        download_huggingface_model(
+            model_name, target_dir=str(snapshot_dir), rename_mapping=model_filename
+        )
+
+        if not model_path.exists():
+            raise RuntimeError(f"Failed to download {model_name} to {model_path}")
+
+    except Exception as e:
+        logging.error(f"Failed to download superanimal snapshot {model_name} to {model_path}: {e}")
+        raise e
+
+    return model_path
+
+