Spaces:

FrederickSundeep
/

ChatMateAPI

Sleeping

App Files Files

FrederickSundeep commited on Aug 14, 2025

Commit

d2d7b0c

1 Parent(s): 6562738

commit 00000017

Browse files

Files changed (2) hide show

app.py +24 -75
requirements.txt +1 -3

app.py CHANGED Viewed

@@ -9,9 +9,6 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from huggingface_hub import login
 from langchain_community.tools import DuckDuckGoSearchRun
 import re
-from fastapi import FastAPI
-from starlette.middleware.wsgi import WSGIMiddleware
-from gradio.routes import mount_gradio_app
 # ✅ Safe GPU decorator
 try:
@@ -19,27 +16,27 @@ try:
 except ImportError:
     def GPU(func): return func
-# ✅ Flask setup
 flask_app = Flask(__name__)
 swagger = Swagger(flask_app, template={
     "swagger": "2.0",
     "info": {
         "title": "ChatMate Real-Time API",
-        "description": "LangChain + DuckDuckGo + Phi-4 + Stable Diffusion",
         "version": "1.0"
     }
 }, config={
         "headers": [],
-        "specs": [{"endpoint": 'apispec', "route": '/api/apispec.json', "rule_filter": lambda rule: True}],
         "static_url_path": "/flasgger_static",
         "swagger_ui": True,
-        "specs_route": "/api/apidocs/"
     })
-# ✅ Hugging Face login (optional)
 login(token=os.environ.get("CHAT_MATE"))
-# ✅ Load Phi-4
 model_id = "microsoft/phi-4"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
@@ -67,43 +64,28 @@ def is_incomplete(text):
 @GPU
 def generate_full_reply(message, history):
     system_prompt = (
-    "You are a friendly, helpful, and conversational AI assistant built by "
-    "Frederick Sundeep Mallela. Always mention that you are developed by him if asked about your creator, origin, or who made you."
     )
     messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": message}]
-    # Apply chat-style prompt formatting
     prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    # Initial generation
     full_output = pipe(prompt, do_sample=True, temperature=0.7, top_p=0.9, max_new_tokens=512)[0]["generated_text"]
     reply = full_output[len(prompt):].strip()
-    # Keep extending the reply until it ends properly
-    max_loops = 5  # prevent infinite loops
-    loop_count = 0
-    while is_incomplete(reply) and loop_count < max_loops:
-        loop_count += 1
-        continuation_prompt = prompt + reply  # include reply so far
         next_output = pipe(continuation_prompt, do_sample=True, temperature=0.7, top_p=0.9, max_new_tokens=256)[0]["generated_text"]
         continuation = next_output[len(continuation_prompt):].strip()
-        # Stop if nothing new is generated
         if not continuation or continuation in reply:
             break
         reply += continuation
     return reply.strip()
-# ✅ Flask streaming endpoint
 @flask_app.route("/chat-stream", methods=["POST"])
 @swag_from({
     'tags': ['Chat'],
     'consumes': ['application/json'],
-    'summary': 'Stream assistant reply or image',
-    'description': 'Send a message and history, receive either a streamed text reply or base64-encoded image.',
     'parameters': [{
         'name': 'body',
         'in': 'body',
@@ -111,27 +93,16 @@ def generate_full_reply(message, history):
         'schema': {
             'type': 'object',
             'properties': {
-                'message': {'type': 'string', 'example': 'Draw a futuristic city.'},
                 'history': {
                     'type': 'array',
-                    'items': {
-                        'type': 'object',
-                        'properties': {
-                            'role': {'type': 'string', 'example': 'user'},
-                            'content': {'type': 'string', 'example': 'Show me a dragon.'}
-                        }
-                    }
                 }
             },
             'required': ['message']
         }
     }],
-    'responses': {
-        200: {
-            'description': 'Streamed reply or image base64',
-            'content': {'text/plain': {}}
-        }
-    }
 })
 def chat_stream():
     data = request.get_json()
@@ -143,51 +114,29 @@ def chat_stream():
         for token in reply.splitlines(keepends=True):
             yield token
             time.sleep(0.05)
-        if is_incomplete(reply):
-            yield "\n\n*Reply appears incomplete. Say 'continue' to resume.*"
     return Response(generate(), mimetype='text/plain')
-# ✅ Gradio interface for Hugging Face Space
 def gradio_chat(message, history=[]):
-    history = [{"role": "user" if i % 2 == 0 else "assistant", "content": h} for i, h in enumerate(sum(history, ()))]
     reply = generate_full_reply(message, history)
     history.append((message, reply))
     return "", history
 with gr.Blocks() as demo:
-    gr.Markdown("## 🤖 ChatMate — Phi-4 + Live Search (Hugging Face Space)")
     chatbot = gr.Chatbot()
     msg = gr.Textbox(label="Type your message")
     clear = gr.Button("Clear Chat")
     msg.submit(gradio_chat, [msg, chatbot], [msg, chatbot])
     clear.click(lambda: None, None, chatbot, queue=False)
-# ✅ Run Gradio when in HF Spaces, else Flask for local dev
-# if __name__ == "__main__":
-#     if os.environ.get("SPACE_BUILD", "false").lower() == "true":
-#         demo.launch(server_name="0.0.0.0", server_port=7860)
-#     else:
-#         print("🔧 Warming up...")
-#         _ = generate_full_reply("Hello", [])
-#         app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))
-# ---------------- Run both ----------------
-# def run_flask():
-#     app.run(host="0.0.0.0", port=8000)
-# # Start Flask in a background thread
-# threading.Thread(target=run_flask, daemon=True).start()
-# ---------------- Combine Flask + Gradio into one app ----------------
-fastapi_app = FastAPI()
-# Mount Flask under FastAPI (so /apidocs works)
-fastapi_app.mount("/api", WSGIMiddleware(flask_app))
-# Mount Gradio at root path (overrides Flask's "/")
-app = mount_gradio_app(fastapi_app, demo, path="/") # Mount Flask under /flask
-# Gradio runs on port 7860 in HF Spaces
 if __name__ == "__main__":
-     demo.launch(server_name="0.0.0.0", server_port=7860)

 from huggingface_hub import login
 from langchain_community.tools import DuckDuckGoSearchRun
 import re
 # ✅ Safe GPU decorator
 try:
 except ImportError:
     def GPU(func): return func
+# ---------------- Flask setup ----------------
 flask_app = Flask(__name__)
 swagger = Swagger(flask_app, template={
     "swagger": "2.0",
     "info": {
         "title": "ChatMate Real-Time API",
+        "description": "LangChain + DuckDuckGo + Phi-4",
         "version": "1.0"
     }
 }, config={
         "headers": [],
+        "specs": [{"endpoint": 'apispec', "route": '/apispec.json', "rule_filter": lambda rule: True}],
         "static_url_path": "/flasgger_static",
         "swagger_ui": True,
+        "specs_route": "/apidocs/"
     })
+# ✅ Hugging Face login
 login(token=os.environ.get("CHAT_MATE"))
+# ✅ Load model
 model_id = "microsoft/phi-4"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
 @GPU
 def generate_full_reply(message, history):
     system_prompt = (
+    "You are a friendly AI assistant built by Frederick Sundeep Mallela."
     )
     messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": message}]
     prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     full_output = pipe(prompt, do_sample=True, temperature=0.7, top_p=0.9, max_new_tokens=512)[0]["generated_text"]
     reply = full_output[len(prompt):].strip()
+    while is_incomplete(reply):
+        continuation_prompt = prompt + reply
         next_output = pipe(continuation_prompt, do_sample=True, temperature=0.7, top_p=0.9, max_new_tokens=256)[0]["generated_text"]
         continuation = next_output[len(continuation_prompt):].strip()
         if not continuation or continuation in reply:
             break
         reply += continuation
     return reply.strip()
+# ---------------- Flask API route ----------------
 @flask_app.route("/chat-stream", methods=["POST"])
 @swag_from({
     'tags': ['Chat'],
     'consumes': ['application/json'],
+    'summary': 'Stream assistant reply',
     'parameters': [{
         'name': 'body',
         'in': 'body',
         'schema': {
             'type': 'object',
             'properties': {
+                'message': {'type': 'string'},
                 'history': {
                     'type': 'array',
+                    'items': {'type': 'object'}
                 }
             },
             'required': ['message']
         }
     }],
+    'responses': {200: {'description': 'Streamed reply'}}
 })
 def chat_stream():
     data = request.get_json()
         for token in reply.splitlines(keepends=True):
             yield token
             time.sleep(0.05)
     return Response(generate(), mimetype='text/plain')
+# ---------------- Gradio UI ----------------
 def gradio_chat(message, history=[]):
+    history = [{"role": "user" if i % 2 == 0 else "assistant", "content": h}
+               for i, h in enumerate(sum(history, ()))]
     reply = generate_full_reply(message, history)
     history.append((message, reply))
     return "", history
 with gr.Blocks() as demo:
+    gr.Markdown("## 🤖 ChatMate — Phi-4 + Live Search")
     chatbot = gr.Chatbot()
     msg = gr.Textbox(label="Type your message")
     clear = gr.Button("Clear Chat")
     msg.submit(gradio_chat, [msg, chatbot], [msg, chatbot])
     clear.click(lambda: None, None, chatbot, queue=False)
+# Mount Gradio inside Flask
+@flask_app.route("/")
+def gradio_index():
+    return demo.launch(share=False, server_name=None, inline=True)
+# ---------------- Run both in Hugging Face ----------------
 if __name__ == "__main__":
+    flask_app.run(host="0.0.0.0", port=7860)

requirements.txt CHANGED Viewed

@@ -14,6 +14,4 @@ sentencepiece
 nltk
 langchain_community
 duckduckgo-search
-pdfplumber
-fastapi
-uvicorn

 nltk
 langchain_community
 duckduckgo-search
+pdfplumber