| import requests |
| import json |
|
|
| messages = [ |
| {"role": "user", "content": "helo"}, |
| {"role": "assistant", "content": "Hello! How can I assist you today?"}, |
| {"role": "user", "content": "who are you and give me a breif description of who created you "} |
| ] |
|
|
| model = "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo" |
|
|
| url = "http://127.0.0.1:8000/api/v1/text/generate" |
|
|
| payload = { |
| "messages": messages, |
| "model": model, |
| "api_key": "" |
| } |
|
|
| response = requests.post(url, json=payload, stream=True) |
|
|
| if response.status_code == 200: |
| for line in response.iter_lines(): |
| if line: |
| print(line) |
| decoded_line = line.decode('utf-8') |
| if decoded_line.startswith('data: [DONE]'): |
| break |
| elif decoded_line.startswith('data: '): |
| try: |
| json_data = json.loads(decoded_line[6:]) |
| if json_data["choices"] and "text" in json_data["choices"][0]: |
| print(json_data["choices"][0]["text"], end='') |
| except json.JSONDecodeError: |
| continue |
| else: |
| print(f"Request failed with status code {response.status_code}") |
|
|
| |
|
|
| |
|
|
| |
| |
| |
| |
|
|
| |
|
|
| |