Spaces:
Running
Running
| import os | |
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| import torch | |
| # ββ Token Resolution (Open Source friendly) ββββββββββββββββββββββββββββββββββ | |
| token = ( | |
| os.environ.get("HF_TOKEN") or | |
| os.environ.get("TEST_TOKEN") or | |
| os.environ.get("HUGGINGFACE_TOKEN") or | |
| os.environ.get("HF_API_TOKEN") or | |
| None | |
| ) | |
| if not token: | |
| print("β οΈ No HF token found β running unauthenticated (rate limits apply)") | |
| else: | |
| print("β HF token loaded") | |
| # ββ Model βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| MODEL = "HuggingFaceTB/SmolLM2-135M-Instruct" | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"π§ Device: {device}") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL, token=token) | |
| model = AutoModelForCausalLM.from_pretrained(MODEL, token=token).to(device) | |
| print(f"β Model loaded: {MODEL}") | |
| # ββ Inference βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def generate(prompt: str, max_new_tokens: int, temperature: float, system_prompt: str): | |
| if not prompt.strip(): | |
| return "β οΈ Empty prompt", "" | |
| messages = [] | |
| if system_prompt.strip(): | |
| messages.append({"role": "system", "content": system_prompt}) | |
| messages.append({"role": "user", "content": prompt}) | |
| text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| inputs = tokenizer.encode(text, return_tensors="pt").to(device) | |
| input_tokens = inputs.shape[-1] | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| inputs, | |
| max_new_tokens=max_new_tokens, | |
| temperature=temperature if temperature > 0 else None, | |
| do_sample=temperature > 0, | |
| top_p=0.9 if temperature > 0 else None, | |
| pad_token_id=tokenizer.eos_token_id, | |
| ) | |
| new_tokens = outputs[0][input_tokens:] | |
| result = tokenizer.decode(new_tokens, skip_special_tokens=True) | |
| stats = f"Input tokens: {input_tokens} | Output tokens: {len(new_tokens)} | Device: {device}" | |
| return result, stats | |
| # ββ UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks(title="SmolLM2 Pipeline Test", theme=gr.themes.Monochrome()) as demo: | |
| gr.Markdown(""" | |
| # π§ͺ SmolLM2-135M Pipeline Test | |
| `HuggingFaceTB/SmolLM2-135M-Instruct` β CPU/ZeroGPU fallback | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| system_prompt = gr.Textbox( | |
| label="System Prompt (optional)", | |
| placeholder="You are a helpful assistant.", | |
| lines=2, | |
| ) | |
| prompt = gr.Textbox( | |
| label="User Prompt", | |
| placeholder="Was ist die Hauptstadt von Deutschland?", | |
| lines=4, | |
| ) | |
| with gr.Row(): | |
| max_tokens = gr.Slider(10, 300, value=150, step=10, label="Max New Tokens") | |
| temperature = gr.Slider(0.0, 1.5, value=0.2, step=0.05, label="Temperature (0 = greedy)") | |
| btn = gr.Button("βΆ Generate", variant="primary") | |
| with gr.Column(scale=2): | |
| output = gr.Textbox(label="Output", lines=10, interactive=False) | |
| stats = gr.Textbox(label="Stats", lines=1, interactive=False) | |
| # Quick test examples | |
| gr.Examples( | |
| examples=[ | |
| ["You are a helpful assistant.", "What is 2+2? Answer in one sentence.", 50, 0.0], | |
| ["", "Summarize in one sentence: The Eiffel Tower is a wrought-iron lattice tower in Paris, built in 1889.", 80, 0.2], | |
| ["You are a JSON API. Respond only with valid JSON.", 'Extract name and age from: "I am Klaus, 34 years old."', 100, 0.0], | |
| ["", "Write a Python function that reverses a string.", 150, 0.3], | |
| ], | |
| inputs=[system_prompt, prompt, max_tokens, temperature], | |
| label="Quick Tests", | |
| ) | |
| btn.click(fn=generate, inputs=[prompt, max_tokens, temperature, system_prompt], outputs=[output, stats]) | |
| prompt.submit(fn=generate, inputs=[prompt, max_tokens, temperature, system_prompt], outputs=[output, stats]) | |
| gr.Markdown(f""" | |
| --- | |
| **Token:** `{'β loaded' if token else 'β οΈ not set'}` | | |
| **Model:** `{MODEL}` | | |
| **Device:** `{device}` | |
| """) | |
| gr.Markdown(""" | |
| ### π Links & Ressourcen | |
| [WoS](https://www.github.com/wall-of-shames) | [CodeyLab@HF](https://hf.co/codey-lab) | **BadTin & VolkanSah** | |
| """) | |
| demo.launch() |