| |
| import spaces |
| import torch |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
| |
| _model = None |
| _tokenizer = None |
| _model_name = "microsoft/DialoGPT-small" |
|
|
| def initialize_tokenizer(): |
| """Initialize tokenizer""" |
| global _tokenizer |
| if _tokenizer is None: |
| print("[MinimalService] Loading tokenizer...") |
| _tokenizer = AutoTokenizer.from_pretrained(_model_name) |
| if _tokenizer.pad_token is None: |
| _tokenizer.pad_token = _tokenizer.eos_token |
| print("[MinimalService] Tokenizer loaded successfully.") |
| return _tokenizer |
|
|
| @spaces.GPU |
| def generate_text_gpu(prompt: str, max_tokens: int = 50): |
| """GPU function for text generation""" |
| global _model, _tokenizer |
| |
| print("[MinimalService] GPU function called") |
| |
| |
| if _tokenizer is None: |
| initialize_tokenizer() |
| |
| |
| if _model is None: |
| print("[MinimalService] Loading model...") |
| _model = AutoModelForCausalLM.from_pretrained( |
| _model_name, |
| torch_dtype=torch.float16, |
| device_map="auto" |
| ) |
| print("[MinimalService] Model loaded.") |
| |
| |
| inputs = _tokenizer.encode(prompt, return_tensors="pt") |
| device = next(_model.parameters()).device |
| inputs = inputs.to(device) |
| |
| with torch.no_grad(): |
| outputs = _model.generate( |
| inputs, |
| max_new_tokens=max_tokens, |
| temperature=0.7, |
| do_sample=True, |
| pad_token_id=_tokenizer.eos_token_id |
| ) |
| |
| response = _tokenizer.decode(outputs[0], skip_special_tokens=True) |
| return response |
|
|
| class MinimalService: |
| def __init__(self): |
| print("[MinimalService] Service initialized") |
| initialize_tokenizer() |
| |
| def generate(self, prompt: str): |
| """Public method to generate text""" |
| return generate_text_gpu(prompt) |
|
|
| |
| service = MinimalService() |
|
|
| |
| print(f"[MinimalService] GPU function available: {generate_text_gpu.__name__}") |
|
|
| |
|
|
| |
| import gradio as gr |
| import spaces |
|
|
| |
| from minimal_service import service, generate_text_gpu |
|
|
| |
| @spaces.GPU |
| def app_gpu_test(): |
| """Test GPU function at app level""" |
| return "App GPU function works" |
|
|
| print("[App] GPU functions imported successfully") |
| print(f"[App] Service GPU function: {generate_text_gpu.__name__}") |
| print(f"[App] App GPU function: {app_gpu_test.__name__}") |
|
|
| |
| from fastapi import FastAPI |
| from fastapi.responses import RedirectResponse |
|
|
| def generate_response(user_input): |
| """Generate response using the service""" |
| if not user_input.strip(): |
| return "Please enter some text!" |
| |
| try: |
| response = service.generate(user_input) |
| return f"Generated: {response}" |
| except Exception as e: |
| return f"Error: {str(e)}" |
|
|
| |
| with gr.Blocks(title="Step 2: FastAPI Test") as demo: |
| gr.Markdown("# Step 2: Testing FastAPI + GPU") |
| gr.Markdown("Testing if adding FastAPI breaks GPU detection.") |
| |
| with gr.Row(): |
| input_text = gr.Textbox( |
| label="Enter text", |
| placeholder="Type something...", |
| value="Hello, how are you?" |
| ) |
| output_text = gr.Textbox( |
| label="Generated response", |
| interactive=False |
| ) |
| |
| generate_btn = gr.Button("Generate", variant="primary") |
| |
| generate_btn.click( |
| fn=generate_response, |
| inputs=[input_text], |
| outputs=[output_text] |
| ) |
|
|
| |
| app = FastAPI() |
|
|
| @app.get("/") |
| async def root(): |
| return RedirectResponse(url="/gradio") |
|
|
| |
| app = gr.mount_gradio_app(app, demo, path="/gradio") |
|
|
| print("[App] FastAPI + Gradio setup completed") |
|
|
| if __name__ == "__main__": |
| print("[App] Starting application...") |
| import uvicorn |
| uvicorn.run(app, host="0.0.0.0", port=7860) |