SatyamSinghal's picture
Update app.py
2eb97c5 verified
import os
import gradio as gr
import torch
MODEL_ID = "SatyamSinghal/taskmind-1.1b-chat-lora"
HF_TOKEN = os.getenv("HF_TOKEN")
pipe = None
def load_model():
global pipe
if pipe is not None:
return
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer, pipeline
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(
MODEL_ID,
token=HF_TOKEN,
)
print("Loading model...")
model = AutoPeftModelForCausalLM.from_pretrained(
MODEL_ID,
token=HF_TOKEN,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
low_cpu_mem_usage=True,
)
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
)
print("Model loaded successfully.")
def respond(message, history):
try:
load_model()
except Exception as e:
return f"❌ Model failed to load: {str(e)}"
messages = []
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
result = pipe(
messages,
max_new_tokens=256,
do_sample=True,
temperature=0.7,
top_p=0.9,
)
generated = result[0]["generated_text"]
if isinstance(generated, list):
return generated[-1]["content"]
return str(generated)
demo = gr.ChatInterface(
fn=respond,
title="TaskMind Interface",
description="Chat with the TaskMind LoRA model.",
examples=[
"Who are you?",
"@Satyam fix the growstreams deck ASAP NO Delay",
"done bhai, merged the PR",
"login page 60% ho gaya",
"getting 500 error on registration",
],
)
if __name__ == "__main__":
demo.launch()