| import os |
| from flask import Flask, request, jsonify |
| from transformers import pipeline |
| import torch |
|
|
| |
| hf_token = os.getenv("HF_TOKEN") |
|
|
| MODEL_ID = "Qwen/Qwen2.5-Coder-1.0B-Instruct" |
|
|
| print("Loading pipeline (model + tokenizer)...") |
|
|
| generator = pipeline("text-generation", model="Qwen/Qwen2.5-Coder-1.5B-Instruct") |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| print("Pipeline loaded successfully") |
|
|
| app = Flask(__name__) |
|
|
| @app.route("/generate", methods=["POST"]) |
| def generate(): |
| data = request.json |
| prompt = data.get("prompt", "") |
| max_tokens = int(data.get("max_tokens", 256)) |
|
|
| if not prompt: |
| return jsonify({"error": "Prompt required"}), 400 |
|
|
| |
| result = generator( |
| prompt, |
| max_new_tokens=max_tokens, |
| truncation=True |
| ) |
| |
| return jsonify({"response": result[0]['generated_text']}) |
|
|
| @app.route("/", methods=["GET"]) |
| def health(): |
| return jsonify({"status": "ok", "model": MODEL_ID}) |
|
|
| if __name__ == "__main__": |
| |
| app.run(host="0.0.0.0", port=7860) |
|
|