import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# Load model
base_model = "unsloth/Llama-3.2-1B-Instruct"
adapter = "jinesh90/llama-3.2-ft-html-generator"

tokenizer = AutoTokenizer.from_pretrained(base_model)
model = AutoModelForCausalLM.from_pretrained(
    base_model, dtype=torch.float32, device_map="cpu"
)
model = PeftModel.from_pretrained(model, adapter)
model.eval()

def generate(instruction, input_text=""):
    prompt = f"""Generate valid HTML code based on the following instructions. Return only the HTML code.

### Instruction
{instruction}

### Input
{input_text if input_text else "(none)"}"""

    messages = [{"role": "user", "content": prompt}]
    text = tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=2048)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs, max_new_tokens=512,
            do_sample=False, temperature=0
        )
    input_len = inputs["input_ids"].shape[1]
    return tokenizer.decode(outputs[0, input_len:], skip_special_tokens=True)

demo = gr.Interface(
    fn=generate,
    inputs=[
        gr.Textbox(label="Instruction", placeholder="Create an HTML login form..."),
        gr.Textbox(label="Input code (optional)", placeholder="Paste existing HTML..."),
    ],
    outputs=gr.Code(label="Generated HTML", language="html"),
    title="💻 HTML Code Generator",
    description="Fine-tuned Llama 3.2 for HTML generation",
)

demo.launch()