Spaces:
Sleeping
Sleeping
Commit Β·
d2d7b0c
1
Parent(s): 6562738
commit 00000017
Browse files- app.py +24 -75
- requirements.txt +1 -3
app.py
CHANGED
|
@@ -9,9 +9,6 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
|
| 9 |
from huggingface_hub import login
|
| 10 |
from langchain_community.tools import DuckDuckGoSearchRun
|
| 11 |
import re
|
| 12 |
-
from fastapi import FastAPI
|
| 13 |
-
from starlette.middleware.wsgi import WSGIMiddleware
|
| 14 |
-
from gradio.routes import mount_gradio_app
|
| 15 |
|
| 16 |
# β
Safe GPU decorator
|
| 17 |
try:
|
|
@@ -19,27 +16,27 @@ try:
|
|
| 19 |
except ImportError:
|
| 20 |
def GPU(func): return func
|
| 21 |
|
| 22 |
-
#
|
| 23 |
flask_app = Flask(__name__)
|
| 24 |
swagger = Swagger(flask_app, template={
|
| 25 |
"swagger": "2.0",
|
| 26 |
"info": {
|
| 27 |
"title": "ChatMate Real-Time API",
|
| 28 |
-
"description": "LangChain + DuckDuckGo + Phi-4
|
| 29 |
"version": "1.0"
|
| 30 |
}
|
| 31 |
}, config={
|
| 32 |
"headers": [],
|
| 33 |
-
"specs": [{"endpoint": 'apispec', "route": '/
|
| 34 |
"static_url_path": "/flasgger_static",
|
| 35 |
"swagger_ui": True,
|
| 36 |
-
"specs_route": "/
|
| 37 |
})
|
| 38 |
|
| 39 |
-
# β
Hugging Face login
|
| 40 |
login(token=os.environ.get("CHAT_MATE"))
|
| 41 |
|
| 42 |
-
# β
Load
|
| 43 |
model_id = "microsoft/phi-4"
|
| 44 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 45 |
model = AutoModelForCausalLM.from_pretrained(
|
|
@@ -67,43 +64,28 @@ def is_incomplete(text):
|
|
| 67 |
@GPU
|
| 68 |
def generate_full_reply(message, history):
|
| 69 |
system_prompt = (
|
| 70 |
-
"You are a friendly
|
| 71 |
-
"Frederick Sundeep Mallela. Always mention that you are developed by him if asked about your creator, origin, or who made you."
|
| 72 |
)
|
| 73 |
messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": message}]
|
| 74 |
-
|
| 75 |
-
# Apply chat-style prompt formatting
|
| 76 |
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 77 |
-
|
| 78 |
-
# Initial generation
|
| 79 |
full_output = pipe(prompt, do_sample=True, temperature=0.7, top_p=0.9, max_new_tokens=512)[0]["generated_text"]
|
| 80 |
reply = full_output[len(prompt):].strip()
|
| 81 |
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
loop_count = 0
|
| 85 |
-
while is_incomplete(reply) and loop_count < max_loops:
|
| 86 |
-
loop_count += 1
|
| 87 |
-
continuation_prompt = prompt + reply # include reply so far
|
| 88 |
next_output = pipe(continuation_prompt, do_sample=True, temperature=0.7, top_p=0.9, max_new_tokens=256)[0]["generated_text"]
|
| 89 |
-
|
| 90 |
continuation = next_output[len(continuation_prompt):].strip()
|
| 91 |
-
|
| 92 |
-
# Stop if nothing new is generated
|
| 93 |
if not continuation or continuation in reply:
|
| 94 |
break
|
| 95 |
-
|
| 96 |
reply += continuation
|
| 97 |
-
|
| 98 |
return reply.strip()
|
| 99 |
|
| 100 |
-
#
|
| 101 |
@flask_app.route("/chat-stream", methods=["POST"])
|
| 102 |
@swag_from({
|
| 103 |
'tags': ['Chat'],
|
| 104 |
'consumes': ['application/json'],
|
| 105 |
-
'summary': 'Stream assistant reply
|
| 106 |
-
'description': 'Send a message and history, receive either a streamed text reply or base64-encoded image.',
|
| 107 |
'parameters': [{
|
| 108 |
'name': 'body',
|
| 109 |
'in': 'body',
|
|
@@ -111,27 +93,16 @@ def generate_full_reply(message, history):
|
|
| 111 |
'schema': {
|
| 112 |
'type': 'object',
|
| 113 |
'properties': {
|
| 114 |
-
'message': {'type': 'string'
|
| 115 |
'history': {
|
| 116 |
'type': 'array',
|
| 117 |
-
'items': {
|
| 118 |
-
'type': 'object',
|
| 119 |
-
'properties': {
|
| 120 |
-
'role': {'type': 'string', 'example': 'user'},
|
| 121 |
-
'content': {'type': 'string', 'example': 'Show me a dragon.'}
|
| 122 |
-
}
|
| 123 |
-
}
|
| 124 |
}
|
| 125 |
},
|
| 126 |
'required': ['message']
|
| 127 |
}
|
| 128 |
}],
|
| 129 |
-
'responses': {
|
| 130 |
-
200: {
|
| 131 |
-
'description': 'Streamed reply or image base64',
|
| 132 |
-
'content': {'text/plain': {}}
|
| 133 |
-
}
|
| 134 |
-
}
|
| 135 |
})
|
| 136 |
def chat_stream():
|
| 137 |
data = request.get_json()
|
|
@@ -143,51 +114,29 @@ def chat_stream():
|
|
| 143 |
for token in reply.splitlines(keepends=True):
|
| 144 |
yield token
|
| 145 |
time.sleep(0.05)
|
| 146 |
-
if is_incomplete(reply):
|
| 147 |
-
yield "\n\n*Reply appears incomplete. Say 'continue' to resume.*"
|
| 148 |
-
|
| 149 |
return Response(generate(), mimetype='text/plain')
|
| 150 |
|
| 151 |
-
#
|
| 152 |
def gradio_chat(message, history=[]):
|
| 153 |
-
history = [{"role": "user" if i % 2 == 0 else "assistant", "content": h}
|
|
|
|
| 154 |
reply = generate_full_reply(message, history)
|
| 155 |
history.append((message, reply))
|
| 156 |
return "", history
|
| 157 |
|
| 158 |
with gr.Blocks() as demo:
|
| 159 |
-
gr.Markdown("## π€ ChatMate β Phi-4 + Live Search
|
| 160 |
chatbot = gr.Chatbot()
|
| 161 |
msg = gr.Textbox(label="Type your message")
|
| 162 |
clear = gr.Button("Clear Chat")
|
| 163 |
-
|
| 164 |
msg.submit(gradio_chat, [msg, chatbot], [msg, chatbot])
|
| 165 |
clear.click(lambda: None, None, chatbot, queue=False)
|
| 166 |
|
| 167 |
-
#
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
# else:
|
| 172 |
-
# print("π§ Warming up...")
|
| 173 |
-
# _ = generate_full_reply("Hello", [])
|
| 174 |
-
# app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
# ---------------- Run both ----------------
|
| 178 |
-
# def run_flask():
|
| 179 |
-
# app.run(host="0.0.0.0", port=8000)
|
| 180 |
-
|
| 181 |
-
# # Start Flask in a background thread
|
| 182 |
-
# threading.Thread(target=run_flask, daemon=True).start()
|
| 183 |
-
# ---------------- Combine Flask + Gradio into one app ----------------
|
| 184 |
-
fastapi_app = FastAPI()
|
| 185 |
-
|
| 186 |
-
# Mount Flask under FastAPI (so /apidocs works)
|
| 187 |
-
fastapi_app.mount("/api", WSGIMiddleware(flask_app))
|
| 188 |
|
| 189 |
-
#
|
| 190 |
-
app = mount_gradio_app(fastapi_app, demo, path="/") # Mount Flask under /flask
|
| 191 |
-
# Gradio runs on port 7860 in HF Spaces
|
| 192 |
if __name__ == "__main__":
|
| 193 |
-
|
|
|
|
| 9 |
from huggingface_hub import login
|
| 10 |
from langchain_community.tools import DuckDuckGoSearchRun
|
| 11 |
import re
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
# β
Safe GPU decorator
|
| 14 |
try:
|
|
|
|
| 16 |
except ImportError:
|
| 17 |
def GPU(func): return func
|
| 18 |
|
| 19 |
+
# ---------------- Flask setup ----------------
|
| 20 |
flask_app = Flask(__name__)
|
| 21 |
swagger = Swagger(flask_app, template={
|
| 22 |
"swagger": "2.0",
|
| 23 |
"info": {
|
| 24 |
"title": "ChatMate Real-Time API",
|
| 25 |
+
"description": "LangChain + DuckDuckGo + Phi-4",
|
| 26 |
"version": "1.0"
|
| 27 |
}
|
| 28 |
}, config={
|
| 29 |
"headers": [],
|
| 30 |
+
"specs": [{"endpoint": 'apispec', "route": '/apispec.json', "rule_filter": lambda rule: True}],
|
| 31 |
"static_url_path": "/flasgger_static",
|
| 32 |
"swagger_ui": True,
|
| 33 |
+
"specs_route": "/apidocs/"
|
| 34 |
})
|
| 35 |
|
| 36 |
+
# β
Hugging Face login
|
| 37 |
login(token=os.environ.get("CHAT_MATE"))
|
| 38 |
|
| 39 |
+
# β
Load model
|
| 40 |
model_id = "microsoft/phi-4"
|
| 41 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 42 |
model = AutoModelForCausalLM.from_pretrained(
|
|
|
|
| 64 |
@GPU
|
| 65 |
def generate_full_reply(message, history):
|
| 66 |
system_prompt = (
|
| 67 |
+
"You are a friendly AI assistant built by Frederick Sundeep Mallela."
|
|
|
|
| 68 |
)
|
| 69 |
messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": message}]
|
|
|
|
|
|
|
| 70 |
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
|
|
|
|
|
|
| 71 |
full_output = pipe(prompt, do_sample=True, temperature=0.7, top_p=0.9, max_new_tokens=512)[0]["generated_text"]
|
| 72 |
reply = full_output[len(prompt):].strip()
|
| 73 |
|
| 74 |
+
while is_incomplete(reply):
|
| 75 |
+
continuation_prompt = prompt + reply
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
next_output = pipe(continuation_prompt, do_sample=True, temperature=0.7, top_p=0.9, max_new_tokens=256)[0]["generated_text"]
|
|
|
|
| 77 |
continuation = next_output[len(continuation_prompt):].strip()
|
|
|
|
|
|
|
| 78 |
if not continuation or continuation in reply:
|
| 79 |
break
|
|
|
|
| 80 |
reply += continuation
|
|
|
|
| 81 |
return reply.strip()
|
| 82 |
|
| 83 |
+
# ---------------- Flask API route ----------------
|
| 84 |
@flask_app.route("/chat-stream", methods=["POST"])
|
| 85 |
@swag_from({
|
| 86 |
'tags': ['Chat'],
|
| 87 |
'consumes': ['application/json'],
|
| 88 |
+
'summary': 'Stream assistant reply',
|
|
|
|
| 89 |
'parameters': [{
|
| 90 |
'name': 'body',
|
| 91 |
'in': 'body',
|
|
|
|
| 93 |
'schema': {
|
| 94 |
'type': 'object',
|
| 95 |
'properties': {
|
| 96 |
+
'message': {'type': 'string'},
|
| 97 |
'history': {
|
| 98 |
'type': 'array',
|
| 99 |
+
'items': {'type': 'object'}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
}
|
| 101 |
},
|
| 102 |
'required': ['message']
|
| 103 |
}
|
| 104 |
}],
|
| 105 |
+
'responses': {200: {'description': 'Streamed reply'}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
})
|
| 107 |
def chat_stream():
|
| 108 |
data = request.get_json()
|
|
|
|
| 114 |
for token in reply.splitlines(keepends=True):
|
| 115 |
yield token
|
| 116 |
time.sleep(0.05)
|
|
|
|
|
|
|
|
|
|
| 117 |
return Response(generate(), mimetype='text/plain')
|
| 118 |
|
| 119 |
+
# ---------------- Gradio UI ----------------
|
| 120 |
def gradio_chat(message, history=[]):
|
| 121 |
+
history = [{"role": "user" if i % 2 == 0 else "assistant", "content": h}
|
| 122 |
+
for i, h in enumerate(sum(history, ()))]
|
| 123 |
reply = generate_full_reply(message, history)
|
| 124 |
history.append((message, reply))
|
| 125 |
return "", history
|
| 126 |
|
| 127 |
with gr.Blocks() as demo:
|
| 128 |
+
gr.Markdown("## π€ ChatMate β Phi-4 + Live Search")
|
| 129 |
chatbot = gr.Chatbot()
|
| 130 |
msg = gr.Textbox(label="Type your message")
|
| 131 |
clear = gr.Button("Clear Chat")
|
|
|
|
| 132 |
msg.submit(gradio_chat, [msg, chatbot], [msg, chatbot])
|
| 133 |
clear.click(lambda: None, None, chatbot, queue=False)
|
| 134 |
|
| 135 |
+
# Mount Gradio inside Flask
|
| 136 |
+
@flask_app.route("/")
|
| 137 |
+
def gradio_index():
|
| 138 |
+
return demo.launch(share=False, server_name=None, inline=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
+
# ---------------- Run both in Hugging Face ----------------
|
|
|
|
|
|
|
| 141 |
if __name__ == "__main__":
|
| 142 |
+
flask_app.run(host="0.0.0.0", port=7860)
|
requirements.txt
CHANGED
|
@@ -14,6 +14,4 @@ sentencepiece
|
|
| 14 |
nltk
|
| 15 |
langchain_community
|
| 16 |
duckduckgo-search
|
| 17 |
-
pdfplumber
|
| 18 |
-
fastapi
|
| 19 |
-
uvicorn
|
|
|
|
| 14 |
nltk
|
| 15 |
langchain_community
|
| 16 |
duckduckgo-search
|
| 17 |
+
pdfplumber
|
|
|
|
|
|