import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Choose a lightweight, open model
model_name = "mistralai/Mistral-7B-Instruct-v0.2"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    do_sample=True,
    temperature=0.7,
    top_p=0.9
)

def chat(history, message):
    # Build conversation text
    prompt = ""
    for user, bot in history:
        prompt += f"User: {user}\nAssistant: {bot}\n"
    prompt += f"User: {message}\nAssistant:"

    output = pipe(prompt)[0]["generated_text"]
    reply = output.split("Assistant:")[-1].strip()

    history.append((message, reply))
    return history, ""

with gr.Blocks() as demo:
    gr.Markdown("# 🔥 My Chatbot")
    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Say something")
    clear = gr.Button("Clear chat")

    state = gr.State([])

    def respond(message, history):
        if history is None:
            history = []
        return chat(history, message)

    msg.submit(respond, [msg, chatbot], [chatbot, msg])
    clear.click(lambda: ([], ""), None, [chatbot, msg])

demo.launch()