| |
| import os |
|
|
| from openai import OpenAI |
|
|
| os.environ['CUDA_VISIBLE_DEVICES'] = '0' |
|
|
|
|
| def get_infer_request(): |
| messages = [{'role': 'user', 'content': "How's the weather in Beijing today?"}] |
| tools = [{ |
| 'name': 'get_current_weather', |
| 'description': 'Get the current weather in a given location', |
| 'parameters': { |
| 'type': 'object', |
| 'properties': { |
| 'location': { |
| 'type': 'string', |
| 'description': 'The city and state, e.g. San Francisco, CA' |
| }, |
| 'unit': { |
| 'type': 'string', |
| 'enum': ['celsius', 'fahrenheit'] |
| } |
| }, |
| 'required': ['location'] |
| } |
| }] |
| return messages, tools |
|
|
|
|
| def infer(client, model: str, messages, tools): |
| messages = messages.copy() |
| query = messages[0]['content'] |
| resp = client.chat.completions.create(model=model, messages=messages, tools=tools, max_tokens=512, temperature=0) |
| response = resp.choices[0].message.content |
| print(f'query: {query}') |
| print(f'response: {response}') |
| print(f'tool_calls: {resp.choices[0].message.tool_calls}') |
|
|
| tool = '{"temperature": 32, "condition": "Sunny", "humidity": 50}' |
| print(f'tool_response: {tool}') |
| messages += [{'role': 'assistant', 'content': response}, {'role': 'tool', 'content': tool}] |
| resp = client.chat.completions.create(model=model, messages=messages, tools=tools, max_tokens=512, temperature=0) |
| response2 = resp.choices[0].message.content |
| print(f'response2: {response2}') |
|
|
|
|
| |
| def infer_stream(client, model: str, messages, tools): |
| messages = messages.copy() |
| query = messages[0]['content'] |
| gen = client.chat.completions.create( |
| model=model, messages=messages, tools=tools, max_tokens=512, temperature=0, stream=True) |
| response = '' |
| print(f'query: {query}\nresponse: ', end='') |
| for chunk in gen: |
| delta = chunk.choices[0].delta.content |
| response += delta |
| print(delta, end='', flush=True) |
| print() |
| print(f'tool_calls: {chunk.choices[0].delta.tool_calls}') |
|
|
| tool = '{"temperature": 32, "condition": "Sunny", "humidity": 50}' |
| print(f'tool_response: {tool}') |
| messages += [{'role': 'assistant', 'content': response}, {'role': 'tool', 'content': tool}] |
| gen = client.chat.completions.create( |
| model=model, messages=messages, tools=tools, max_tokens=512, temperature=0, stream=True) |
| print(f'query: {query}\nresponse2: ', end='') |
| for chunk in gen: |
| print(chunk.choices[0].delta.content, end='', flush=True) |
| print() |
|
|
|
|
| if __name__ == '__main__': |
| host: str = '127.0.0.1' |
| port: int = 8000 |
| client = OpenAI( |
| api_key='EMPTY', |
| base_url=f'http://{host}:{port}/v1', |
| ) |
| model = client.models.list().data[0].id |
| print(f'model: {model}') |
|
|
| messages, tools = get_infer_request() |
| infer(client, model, messages, tools) |
| infer_stream(client, model, messages, tools) |
|
|