| from typing import Dict, List, Any |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| import torch |
|
|
| class EndpointHandler: |
| def __init__(self, path=""): |
| self.tokenizer = AutoTokenizer.from_pretrained(path) |
| self.model = AutoModelForCausalLM.from_pretrained( |
| path, |
| torch_dtype=torch.float16, |
| device_map="auto" |
| ) |
| |
| self.inference_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. |
| Write a response that appropriately completes the request. |
| Identify the most suitable meme template based on the provided example situations. |
| |
| ### Instruction: |
| You are a meme expert who knows how to map real-life situations to the correct meme name. |
| Please identify the meme name that best fits the given examples_list. |
| |
| ### Input (examples_list): |
| {} |
| |
| ### Response:""" |
| |
| def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: |
| inputs = data.get("inputs", "") |
| parameters = data.get("parameters", {}) |
| |
| |
| formatted_input = self.inference_prompt_style.format(inputs) |
| |
| encoded = self.tokenizer(formatted_input, return_tensors="pt") |
| outputs = self.model.generate( |
| **encoded, |
| max_length=parameters.get("max_length", 1200), |
| temperature=parameters.get("temperature", 0.7), |
| do_sample=True |
| ) |
| |
| response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) |
| |
| if "### Response:" in response: |
| response = response.split("### Response:")[-1].strip() |
| |
| return [{"generated_text": response}] |