| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
|
|
| import os |
| import shutil |
| from pathlib import Path |
| from typing import Optional, Union |
|
|
| import numpy as np |
| from huggingface_hub import hf_hub_download |
| from huggingface_hub.utils import validate_hf_hub_args |
|
|
| from ..utils import ONNX_EXTERNAL_WEIGHTS_NAME, ONNX_WEIGHTS_NAME, is_onnx_available, logging |
|
|
|
|
| if is_onnx_available(): |
| import onnxruntime as ort |
|
|
|
|
| logger = logging.get_logger(__name__) |
|
|
| ORT_TO_NP_TYPE = { |
| "tensor(bool)": np.bool_, |
| "tensor(int8)": np.int8, |
| "tensor(uint8)": np.uint8, |
| "tensor(int16)": np.int16, |
| "tensor(uint16)": np.uint16, |
| "tensor(int32)": np.int32, |
| "tensor(uint32)": np.uint32, |
| "tensor(int64)": np.int64, |
| "tensor(uint64)": np.uint64, |
| "tensor(float16)": np.float16, |
| "tensor(float)": np.float32, |
| "tensor(double)": np.float64, |
| } |
|
|
|
|
| class OnnxRuntimeModel: |
| def __init__(self, model=None, **kwargs): |
| logger.info("`diffusers.OnnxRuntimeModel` is experimental and might change in the future.") |
| self.model = model |
| self.model_save_dir = kwargs.get("model_save_dir", None) |
| self.latest_model_name = kwargs.get("latest_model_name", ONNX_WEIGHTS_NAME) |
|
|
| def __call__(self, **kwargs): |
| inputs = {k: np.array(v) for k, v in kwargs.items()} |
| return self.model.run(None, inputs) |
|
|
| @staticmethod |
| def load_model(path: Union[str, Path], provider=None, sess_options=None): |
| """ |
| Loads an ONNX Inference session with an ExecutionProvider. Default provider is `CPUExecutionProvider` |
| |
| Arguments: |
| path (`str` or `Path`): |
| Directory from which to load |
| provider(`str`, *optional*): |
| Onnxruntime execution provider to use for loading the model, defaults to `CPUExecutionProvider` |
| """ |
| if provider is None: |
| logger.info("No onnxruntime provider specified, using CPUExecutionProvider") |
| provider = "CPUExecutionProvider" |
|
|
| return ort.InferenceSession(path, providers=[provider], sess_options=sess_options) |
|
|
| def _save_pretrained(self, save_directory: Union[str, Path], file_name: Optional[str] = None, **kwargs): |
| """ |
| Save a model and its configuration file to a directory, so that it can be re-loaded using the |
| [`~optimum.onnxruntime.modeling_ort.ORTModel.from_pretrained`] class method. It will always save the |
| latest_model_name. |
| |
| Arguments: |
| save_directory (`str` or `Path`): |
| Directory where to save the model file. |
| file_name(`str`, *optional*): |
| Overwrites the default model file name from `"model.onnx"` to `file_name`. This allows you to save the |
| model with a different name. |
| """ |
| model_file_name = file_name if file_name is not None else ONNX_WEIGHTS_NAME |
|
|
| src_path = self.model_save_dir.joinpath(self.latest_model_name) |
| dst_path = Path(save_directory).joinpath(model_file_name) |
| try: |
| shutil.copyfile(src_path, dst_path) |
| except shutil.SameFileError: |
| pass |
|
|
| |
| src_path = self.model_save_dir.joinpath(ONNX_EXTERNAL_WEIGHTS_NAME) |
| if src_path.exists(): |
| dst_path = Path(save_directory).joinpath(ONNX_EXTERNAL_WEIGHTS_NAME) |
| try: |
| shutil.copyfile(src_path, dst_path) |
| except shutil.SameFileError: |
| pass |
|
|
| def save_pretrained( |
| self, |
| save_directory: Union[str, os.PathLike], |
| **kwargs, |
| ): |
| """ |
| Save a model to a directory, so that it can be re-loaded using the [`~OnnxModel.from_pretrained`] class |
| method.: |
| |
| Arguments: |
| save_directory (`str` or `os.PathLike`): |
| Directory to which to save. Will be created if it doesn't exist. |
| """ |
| if os.path.isfile(save_directory): |
| logger.error(f"Provided path ({save_directory}) should be a directory, not a file") |
| return |
|
|
| os.makedirs(save_directory, exist_ok=True) |
|
|
| |
| self._save_pretrained(save_directory, **kwargs) |
|
|
| @classmethod |
| @validate_hf_hub_args |
| def _from_pretrained( |
| cls, |
| model_id: Union[str, Path], |
| token: Optional[Union[bool, str, None]] = None, |
| revision: Optional[Union[str, None]] = None, |
| force_download: bool = False, |
| cache_dir: Optional[str] = None, |
| file_name: Optional[str] = None, |
| provider: Optional[str] = None, |
| sess_options: Optional["ort.SessionOptions"] = None, |
| **kwargs, |
| ): |
| """ |
| Load a model from a directory or the HF Hub. |
| |
| Arguments: |
| model_id (`str` or `Path`): |
| Directory from which to load |
| token (`str` or `bool`): |
| Is needed to load models from a private or gated repository |
| revision (`str`): |
| Revision is the specific model version to use. It can be a branch name, a tag name, or a commit id |
| cache_dir (`Union[str, Path]`, *optional*): |
| Path to a directory in which a downloaded pretrained model configuration should be cached if the |
| standard cache should not be used. |
| force_download (`bool`, *optional*, defaults to `False`): |
| Whether or not to force the (re-)download of the model weights and configuration files, overriding the |
| cached versions if they exist. |
| file_name(`str`): |
| Overwrites the default model file name from `"model.onnx"` to `file_name`. This allows you to load |
| different model files from the same repository or directory. |
| provider(`str`): |
| The ONNX runtime provider, e.g. `CPUExecutionProvider` or `CUDAExecutionProvider`. |
| kwargs (`Dict`, *optional*): |
| kwargs will be passed to the model during initialization |
| """ |
| model_file_name = file_name if file_name is not None else ONNX_WEIGHTS_NAME |
| |
| if os.path.isdir(model_id): |
| model = OnnxRuntimeModel.load_model( |
| os.path.join(model_id, model_file_name), provider=provider, sess_options=sess_options |
| ) |
| kwargs["model_save_dir"] = Path(model_id) |
| |
| else: |
| |
| model_cache_path = hf_hub_download( |
| repo_id=model_id, |
| filename=model_file_name, |
| token=token, |
| revision=revision, |
| cache_dir=cache_dir, |
| force_download=force_download, |
| ) |
| kwargs["model_save_dir"] = Path(model_cache_path).parent |
| kwargs["latest_model_name"] = Path(model_cache_path).name |
| model = OnnxRuntimeModel.load_model(model_cache_path, provider=provider, sess_options=sess_options) |
| return cls(model=model, **kwargs) |
|
|
| @classmethod |
| @validate_hf_hub_args |
| def from_pretrained( |
| cls, |
| model_id: Union[str, Path], |
| force_download: bool = True, |
| token: Optional[str] = None, |
| cache_dir: Optional[str] = None, |
| **model_kwargs, |
| ): |
| revision = None |
| if len(str(model_id).split("@")) == 2: |
| model_id, revision = model_id.split("@") |
|
|
| return cls._from_pretrained( |
| model_id=model_id, |
| revision=revision, |
| cache_dir=cache_dir, |
| force_download=force_download, |
| token=token, |
| **model_kwargs, |
| ) |
|
|