| from flask import Flask |
| import ollama |
|
|
|
|
| def ps(): |
| from ollama import ProcessResponse, chat, ps, pull |
| ansver = [] |
|
|
| response: ProcessResponse = ps() |
| for model in response.models: |
| ansver.append(f'<h3>Mode<h3>: {model.model}') |
| ansver.append(f'\t Digest: {model.digest}') |
| ansver.append(f'\t Expires at: {model.expires_at}') |
| ansver.append(f'\t Size: {model.size}') |
| ansver.append(f'\t Size vram: {model.size_vram}') |
| ansver.append(f'\t Details: {model.details}') |
| ansver.append(f'\t Context length: {model.context_length}') |
| ansver.append(f'\n') |
| result = [f'<p>{answ}</p>' for answ in ansver ] |
| return '\n'.join(result) |
|
|
| def time_model(model_name='qwen3:0.6b'): |
| from datetime import datetime |
| from ollama import Client |
| from ollama import chat |
|
|
| t_start=datetime.now() |
| ansver = [] |
| messages = [ |
| { |
| 'role': 'user', |
| 'content': 'Расскажи о себе подробно', |
| }, |
| ] |
| response = chat(model_name, messages=messages) |
| ansver.append(f' <h3> {model_name}</h3>') |
| ansver.append(f' start {t_start} ') |
| response_time=datetime.now()-t_start |
| ansver.append(f' duration {response_time} ') |
| |
| response_len = len(response['message']['content']) |
| ansver.append(f' lehgth {response_len}') |
| response_speed=response_len/int(response_time.seconds) |
| ansver.append(f' token/sek {response_speed}') |
| ansver.append(response['message']['content']) |
| result = [f'<p>{answ}</p>' for answ in ansver ] |
| return '\n'.join(result) |
|
|
|
|