import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel # Load model base_model = "unsloth/Llama-3.2-1B-Instruct" adapter = "jinesh90/llama-3.2-ft-html-generator" tokenizer = AutoTokenizer.from_pretrained(base_model) model = AutoModelForCausalLM.from_pretrained( base_model, dtype=torch.float32, device_map="cpu" ) model = PeftModel.from_pretrained(model, adapter) model.eval() def generate(instruction, input_text=""): prompt = f"""Generate valid HTML code based on the following instructions. Return only the HTML code. ### Instruction {instruction} ### Input {input_text if input_text else "(none)"}""" messages = [{"role": "user", "content": prompt}] text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=2048) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=512, do_sample=False, temperature=0 ) input_len = inputs["input_ids"].shape[1] return tokenizer.decode(outputs[0, input_len:], skip_special_tokens=True) demo = gr.Interface( fn=generate, inputs=[ gr.Textbox(label="Instruction", placeholder="Create an HTML login form..."), gr.Textbox(label="Input code (optional)", placeholder="Paste existing HTML..."), ], outputs=gr.Code(label="Generated HTML", language="html"), title="💻 HTML Code Generator", description="Fine-tuned Llama 3.2 for HTML generation", ) demo.launch()