| |
| """ |
| Test Client for Gemma 3n GGUF Backend |
| Demonstrates the complete integration working |
| """ |
|
|
| import requests |
| import json |
| import time |
|
|
| def test_gemma_backend(): |
| """Test the Gemma 3n GGUF backend integration""" |
| base_url = "http://localhost:8000" |
| |
| print("π§ͺ Testing Gemma 3n GGUF Backend Integration") |
| print("=" * 50) |
| |
| |
| print("\n1. π Testing Health Endpoint") |
| try: |
| response = requests.get(f"{base_url}/health") |
| health_data = response.json() |
| print(f"β
Health Status: {health_data['status']}") |
| print(f"π€ Model: {health_data['model']}") |
| print(f"π οΈ Backend: {health_data['backend']}") |
| print(f"π Version: {health_data['version']}") |
| except Exception as e: |
| print(f"β Health check failed: {e}") |
| return False |
| |
| |
| print("\n2. π Testing Root Info Endpoint") |
| try: |
| response = requests.get(f"{base_url}/") |
| root_data = response.json() |
| print(f"β
Service: {root_data['message']}") |
| print(f"π Model Loaded: {root_data.get('model_loaded', 'unknown')}") |
| print(f"π― Available Endpoints: {', '.join(root_data['endpoints'].keys())}") |
| except Exception as e: |
| print(f"β Root info failed: {e}") |
| return False |
| |
| |
| print("\n3. π¬ Testing Chat Completion") |
| chat_request = { |
| "model": "gemma-3n-e4b-it", |
| "messages": [ |
| {"role": "user", "content": "Hello! What is your name and what can you help me with?"} |
| ], |
| "max_tokens": 150, |
| "temperature": 0.7 |
| } |
| |
| try: |
| start_time = time.time() |
| response = requests.post( |
| f"{base_url}/v1/chat/completions", |
| headers={"Content-Type": "application/json"}, |
| json=chat_request |
| ) |
| end_time = time.time() |
| |
| if response.status_code == 200: |
| chat_data = response.json() |
| print(f"β
Chat completion successful!") |
| print(f"β‘ Response time: {end_time - start_time:.2f}s") |
| print(f"π― Model: {chat_data['model']}") |
| print(f"π’ Completion ID: {chat_data['id']}") |
| |
| |
| assistant_message = chat_data['choices'][0]['message']['content'] |
| print(f"\nπ€ Assistant Response:") |
| print(f" {assistant_message}") |
| print(f"π Finish Reason: {chat_data['choices'][0]['finish_reason']}") |
| else: |
| print(f"β Chat completion failed with status: {response.status_code}") |
| print(f"π Response: {response.text}") |
| return False |
| |
| except Exception as e: |
| print(f"β Chat completion failed: {e}") |
| return False |
| |
| |
| print("\n4. π Testing Multi-turn Conversation") |
| multi_turn_request = { |
| "model": "gemma-3n-e4b-it", |
| "messages": [ |
| {"role": "user", "content": "What is AI?"}, |
| {"role": "assistant", "content": "AI stands for Artificial Intelligence. It refers to the simulation of human intelligence in machines."}, |
| {"role": "user", "content": "What are some practical applications?"} |
| ], |
| "max_tokens": 100, |
| "temperature": 0.5 |
| } |
| |
| try: |
| response = requests.post( |
| f"{base_url}/v1/chat/completions", |
| headers={"Content-Type": "application/json"}, |
| json=multi_turn_request |
| ) |
| |
| if response.status_code == 200: |
| chat_data = response.json() |
| print("β
Multi-turn conversation successful!") |
| assistant_response = chat_data['choices'][0]['message']['content'] |
| print(f"π€ Follow-up Response: {assistant_response[:100]}...") |
| else: |
| print(f"β Multi-turn failed with status: {response.status_code}") |
| |
| except Exception as e: |
| print(f"β Multi-turn conversation failed: {e}") |
| |
| print("\n" + "=" * 50) |
| print("π Gemma 3n GGUF Backend Integration Test Complete!") |
| print("β
Your app is successfully using the Gemma-3n-E4B-it-GGUF model!") |
| |
| return True |
|
|
| if __name__ == "__main__": |
| print("π Starting Gemma 3n Integration Test...") |
| print("π Make sure the backend is running: python3 gemma_gguf_backend.py") |
| print("β³ Waiting 2 seconds for you to start the backend if needed...") |
| time.sleep(2) |
| |
| success = test_gemma_backend() |
| |
| if success: |
| print("\nπ― Integration Summary:") |
| print(" β
Backend is running correctly") |
| print(" β
OpenAI-compatible API working") |
| print(" β
Gemma 3n model integration successful") |
| print(" β
Ready for production use!") |
| else: |
| print("\nβ Some tests failed. Check the backend logs.") |
| print("π‘ Make sure to run: python3 gemma_gguf_backend.py") |
|
|