| import torch |
| from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
| class PythiaChatHandler: |
| def __init__(self, model_name="togethercomputer/Pythia-Chat-Base-7B"): |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" |
| self.tokenizer = AutoTokenizer.from_pretrained(model_name) |
| self.model = AutoModelForCausalLM.from_pretrained(model_name).to(self.device) |
|
|
| def __call__(self, input_text): |
| input_ids = self.tokenizer.encode(input_text, return_tensors="pt").to(self.device) |
| output_ids = self.model.generate(input_ids).to("cpu") |
| response_text = self.tokenizer.decode(output_ids[0], skip_special_tokens=True) |
| return response_text |
|
|