| |
| """ |
| Monitor the Mistral Nemo service startup and run tests when ready. |
| """ |
| import time |
| import requests |
| import json |
| import sys |
|
|
| def check_service_health(): |
| """Check if the service is healthy and ready.""" |
| try: |
| response = requests.get("http://localhost:8001/health", timeout=5) |
| if response.status_code == 200: |
| data = response.json() |
| return data.get("status") == "healthy" |
| except requests.exceptions.RequestException: |
| pass |
| return False |
|
|
| def test_chat_completion(): |
| """Test the chat completion endpoint.""" |
| try: |
| response = requests.post( |
| "http://localhost:8001/v1/chat/completions", |
| headers={"Content-Type": "application/json"}, |
| json={ |
| "model": "unsloth/Mistral-Nemo-Instruct-2407", |
| "messages": [ |
| {"role": "user", "content": "Hello! Please say 'Service is working correctly' if you can read this."} |
| ], |
| "max_tokens": 50, |
| "temperature": 0.7 |
| }, |
| timeout=30 |
| ) |
| |
| if response.status_code == 200: |
| data = response.json() |
| content = data["choices"][0]["message"]["content"] |
| print(f"β
Chat completion successful: {content}") |
| return True |
| else: |
| print(f"β Chat completion failed: {response.status_code} - {response.text}") |
| return False |
| except requests.exceptions.RequestException as e: |
| print(f"β Chat completion error: {e}") |
| return False |
|
|
| def monitor_service(): |
| """Monitor service startup and test when ready.""" |
| print("π Monitoring Mistral Nemo service startup...") |
| print("π₯ Waiting for model download and loading to complete...") |
| |
| check_count = 0 |
| max_checks = 300 |
| |
| while check_count < max_checks: |
| if check_service_health(): |
| print("\nπ Service is healthy! Running tests...") |
| |
| |
| if test_chat_completion(): |
| print("\nβ
All tests passed! Mistral Nemo service is fully operational.") |
| return True |
| else: |
| print("\nβ οΈ Service health check passed but chat completion failed.") |
| return False |
| |
| check_count += 1 |
| dots = "." * (check_count % 4) |
| print(f"\rβ³ Waiting for service to be ready{dots:<3} ({check_count}/300)", end="") |
| time.sleep(5) |
| |
| print(f"\nβ Service didn't become ready after {max_checks * 5} seconds") |
| return False |
|
|
| if __name__ == "__main__": |
| success = monitor_service() |
| sys.exit(0 if success else 1) |
|
|