| from typing import Dict, List, Any |
| from optimum.onnxruntime import ORTModelForSequenceClassification |
| from transformers import AutoTokenizer |
| from optimum.pipelines import pipeline |
|
|
|
|
| import torch |
|
|
| if torch.backends.cudnn.is_available(): |
| print("cudnn:", torch.backends.cudnn.version()) |
|
|
| class EndpointHandler(): |
| def __init__(self, path=""): |
|
|
| on_cuda = torch.cuda.is_available() |
| |
|
|
| provider = "CPUExecutionProvider" |
| if on_cuda: |
| provider = "CUDAExecutionProvider" |
| |
| model = ORTModelForSequenceClassification.from_pretrained( |
| path, |
| export=False, |
| provider=provider, |
| ) |
| tokenizer = AutoTokenizer.from_pretrained(path) |
| |
| device = -1 |
| if on_cuda: |
| device = 0 |
| |
| self.pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer, device=device) |
|
|
|
|
| def __call__(self, data: Any) -> List[List[Dict[str, float]]]: |
| """ |
| Args: |
| data (:obj:): |
| includes the input data and the parameters for the inference. |
| Return: |
| A :obj:`list`:. The object returned should be a list of one list like [[{"label": 0.9939950108528137}]] containing : |
| - "label": A string representing what the label/class is. There can be multiple labels. |
| - "score": A score between 0 and 1 describing how confident the model is for this label/class. |
| """ |
| inputs = data.pop("inputs", data) |
| parameters = data.pop("parameters", dict()) |
|
|
| prediction = self.pipeline(inputs, **parameters) |
|
|
| return prediction |