FrederickSundeep commited on
Commit
d2d7b0c
Β·
1 Parent(s): 6562738

commit 00000017

Browse files
Files changed (2) hide show
  1. app.py +24 -75
  2. requirements.txt +1 -3
app.py CHANGED
@@ -9,9 +9,6 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
9
  from huggingface_hub import login
10
  from langchain_community.tools import DuckDuckGoSearchRun
11
  import re
12
- from fastapi import FastAPI
13
- from starlette.middleware.wsgi import WSGIMiddleware
14
- from gradio.routes import mount_gradio_app
15
 
16
  # βœ… Safe GPU decorator
17
  try:
@@ -19,27 +16,27 @@ try:
19
  except ImportError:
20
  def GPU(func): return func
21
 
22
- # βœ… Flask setup
23
  flask_app = Flask(__name__)
24
  swagger = Swagger(flask_app, template={
25
  "swagger": "2.0",
26
  "info": {
27
  "title": "ChatMate Real-Time API",
28
- "description": "LangChain + DuckDuckGo + Phi-4 + Stable Diffusion",
29
  "version": "1.0"
30
  }
31
  }, config={
32
  "headers": [],
33
- "specs": [{"endpoint": 'apispec', "route": '/api/apispec.json', "rule_filter": lambda rule: True}],
34
  "static_url_path": "/flasgger_static",
35
  "swagger_ui": True,
36
- "specs_route": "/api/apidocs/"
37
  })
38
 
39
- # βœ… Hugging Face login (optional)
40
  login(token=os.environ.get("CHAT_MATE"))
41
 
42
- # βœ… Load Phi-4
43
  model_id = "microsoft/phi-4"
44
  tokenizer = AutoTokenizer.from_pretrained(model_id)
45
  model = AutoModelForCausalLM.from_pretrained(
@@ -67,43 +64,28 @@ def is_incomplete(text):
67
  @GPU
68
  def generate_full_reply(message, history):
69
  system_prompt = (
70
- "You are a friendly, helpful, and conversational AI assistant built by "
71
- "Frederick Sundeep Mallela. Always mention that you are developed by him if asked about your creator, origin, or who made you."
72
  )
73
  messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": message}]
74
-
75
- # Apply chat-style prompt formatting
76
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
77
-
78
- # Initial generation
79
  full_output = pipe(prompt, do_sample=True, temperature=0.7, top_p=0.9, max_new_tokens=512)[0]["generated_text"]
80
  reply = full_output[len(prompt):].strip()
81
 
82
- # Keep extending the reply until it ends properly
83
- max_loops = 5 # prevent infinite loops
84
- loop_count = 0
85
- while is_incomplete(reply) and loop_count < max_loops:
86
- loop_count += 1
87
- continuation_prompt = prompt + reply # include reply so far
88
  next_output = pipe(continuation_prompt, do_sample=True, temperature=0.7, top_p=0.9, max_new_tokens=256)[0]["generated_text"]
89
-
90
  continuation = next_output[len(continuation_prompt):].strip()
91
-
92
- # Stop if nothing new is generated
93
  if not continuation or continuation in reply:
94
  break
95
-
96
  reply += continuation
97
-
98
  return reply.strip()
99
 
100
- # βœ… Flask streaming endpoint
101
  @flask_app.route("/chat-stream", methods=["POST"])
102
  @swag_from({
103
  'tags': ['Chat'],
104
  'consumes': ['application/json'],
105
- 'summary': 'Stream assistant reply or image',
106
- 'description': 'Send a message and history, receive either a streamed text reply or base64-encoded image.',
107
  'parameters': [{
108
  'name': 'body',
109
  'in': 'body',
@@ -111,27 +93,16 @@ def generate_full_reply(message, history):
111
  'schema': {
112
  'type': 'object',
113
  'properties': {
114
- 'message': {'type': 'string', 'example': 'Draw a futuristic city.'},
115
  'history': {
116
  'type': 'array',
117
- 'items': {
118
- 'type': 'object',
119
- 'properties': {
120
- 'role': {'type': 'string', 'example': 'user'},
121
- 'content': {'type': 'string', 'example': 'Show me a dragon.'}
122
- }
123
- }
124
  }
125
  },
126
  'required': ['message']
127
  }
128
  }],
129
- 'responses': {
130
- 200: {
131
- 'description': 'Streamed reply or image base64',
132
- 'content': {'text/plain': {}}
133
- }
134
- }
135
  })
136
  def chat_stream():
137
  data = request.get_json()
@@ -143,51 +114,29 @@ def chat_stream():
143
  for token in reply.splitlines(keepends=True):
144
  yield token
145
  time.sleep(0.05)
146
- if is_incomplete(reply):
147
- yield "\n\n*Reply appears incomplete. Say 'continue' to resume.*"
148
-
149
  return Response(generate(), mimetype='text/plain')
150
 
151
- # βœ… Gradio interface for Hugging Face Space
152
  def gradio_chat(message, history=[]):
153
- history = [{"role": "user" if i % 2 == 0 else "assistant", "content": h} for i, h in enumerate(sum(history, ()))]
 
154
  reply = generate_full_reply(message, history)
155
  history.append((message, reply))
156
  return "", history
157
 
158
  with gr.Blocks() as demo:
159
- gr.Markdown("## πŸ€– ChatMate β€” Phi-4 + Live Search (Hugging Face Space)")
160
  chatbot = gr.Chatbot()
161
  msg = gr.Textbox(label="Type your message")
162
  clear = gr.Button("Clear Chat")
163
-
164
  msg.submit(gradio_chat, [msg, chatbot], [msg, chatbot])
165
  clear.click(lambda: None, None, chatbot, queue=False)
166
 
167
- # βœ… Run Gradio when in HF Spaces, else Flask for local dev
168
- # if __name__ == "__main__":
169
- # if os.environ.get("SPACE_BUILD", "false").lower() == "true":
170
- # demo.launch(server_name="0.0.0.0", server_port=7860)
171
- # else:
172
- # print("πŸ”§ Warming up...")
173
- # _ = generate_full_reply("Hello", [])
174
- # app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))
175
-
176
-
177
- # ---------------- Run both ----------------
178
- # def run_flask():
179
- # app.run(host="0.0.0.0", port=8000)
180
-
181
- # # Start Flask in a background thread
182
- # threading.Thread(target=run_flask, daemon=True).start()
183
- # ---------------- Combine Flask + Gradio into one app ----------------
184
- fastapi_app = FastAPI()
185
-
186
- # Mount Flask under FastAPI (so /apidocs works)
187
- fastapi_app.mount("/api", WSGIMiddleware(flask_app))
188
 
189
- # Mount Gradio at root path (overrides Flask's "/")
190
- app = mount_gradio_app(fastapi_app, demo, path="/") # Mount Flask under /flask
191
- # Gradio runs on port 7860 in HF Spaces
192
  if __name__ == "__main__":
193
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
9
  from huggingface_hub import login
10
  from langchain_community.tools import DuckDuckGoSearchRun
11
  import re
 
 
 
12
 
13
  # βœ… Safe GPU decorator
14
  try:
 
16
  except ImportError:
17
  def GPU(func): return func
18
 
19
+ # ---------------- Flask setup ----------------
20
  flask_app = Flask(__name__)
21
  swagger = Swagger(flask_app, template={
22
  "swagger": "2.0",
23
  "info": {
24
  "title": "ChatMate Real-Time API",
25
+ "description": "LangChain + DuckDuckGo + Phi-4",
26
  "version": "1.0"
27
  }
28
  }, config={
29
  "headers": [],
30
+ "specs": [{"endpoint": 'apispec', "route": '/apispec.json', "rule_filter": lambda rule: True}],
31
  "static_url_path": "/flasgger_static",
32
  "swagger_ui": True,
33
+ "specs_route": "/apidocs/"
34
  })
35
 
36
+ # βœ… Hugging Face login
37
  login(token=os.environ.get("CHAT_MATE"))
38
 
39
+ # βœ… Load model
40
  model_id = "microsoft/phi-4"
41
  tokenizer = AutoTokenizer.from_pretrained(model_id)
42
  model = AutoModelForCausalLM.from_pretrained(
 
64
  @GPU
65
  def generate_full_reply(message, history):
66
  system_prompt = (
67
+ "You are a friendly AI assistant built by Frederick Sundeep Mallela."
 
68
  )
69
  messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": message}]
 
 
70
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
 
71
  full_output = pipe(prompt, do_sample=True, temperature=0.7, top_p=0.9, max_new_tokens=512)[0]["generated_text"]
72
  reply = full_output[len(prompt):].strip()
73
 
74
+ while is_incomplete(reply):
75
+ continuation_prompt = prompt + reply
 
 
 
 
76
  next_output = pipe(continuation_prompt, do_sample=True, temperature=0.7, top_p=0.9, max_new_tokens=256)[0]["generated_text"]
 
77
  continuation = next_output[len(continuation_prompt):].strip()
 
 
78
  if not continuation or continuation in reply:
79
  break
 
80
  reply += continuation
 
81
  return reply.strip()
82
 
83
+ # ---------------- Flask API route ----------------
84
  @flask_app.route("/chat-stream", methods=["POST"])
85
  @swag_from({
86
  'tags': ['Chat'],
87
  'consumes': ['application/json'],
88
+ 'summary': 'Stream assistant reply',
 
89
  'parameters': [{
90
  'name': 'body',
91
  'in': 'body',
 
93
  'schema': {
94
  'type': 'object',
95
  'properties': {
96
+ 'message': {'type': 'string'},
97
  'history': {
98
  'type': 'array',
99
+ 'items': {'type': 'object'}
 
 
 
 
 
 
100
  }
101
  },
102
  'required': ['message']
103
  }
104
  }],
105
+ 'responses': {200: {'description': 'Streamed reply'}}
 
 
 
 
 
106
  })
107
  def chat_stream():
108
  data = request.get_json()
 
114
  for token in reply.splitlines(keepends=True):
115
  yield token
116
  time.sleep(0.05)
 
 
 
117
  return Response(generate(), mimetype='text/plain')
118
 
119
+ # ---------------- Gradio UI ----------------
120
  def gradio_chat(message, history=[]):
121
+ history = [{"role": "user" if i % 2 == 0 else "assistant", "content": h}
122
+ for i, h in enumerate(sum(history, ()))]
123
  reply = generate_full_reply(message, history)
124
  history.append((message, reply))
125
  return "", history
126
 
127
  with gr.Blocks() as demo:
128
+ gr.Markdown("## πŸ€– ChatMate β€” Phi-4 + Live Search")
129
  chatbot = gr.Chatbot()
130
  msg = gr.Textbox(label="Type your message")
131
  clear = gr.Button("Clear Chat")
 
132
  msg.submit(gradio_chat, [msg, chatbot], [msg, chatbot])
133
  clear.click(lambda: None, None, chatbot, queue=False)
134
 
135
+ # Mount Gradio inside Flask
136
+ @flask_app.route("/")
137
+ def gradio_index():
138
+ return demo.launch(share=False, server_name=None, inline=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
+ # ---------------- Run both in Hugging Face ----------------
 
 
141
  if __name__ == "__main__":
142
+ flask_app.run(host="0.0.0.0", port=7860)
requirements.txt CHANGED
@@ -14,6 +14,4 @@ sentencepiece
14
  nltk
15
  langchain_community
16
  duckduckgo-search
17
- pdfplumber
18
- fastapi
19
- uvicorn
 
14
  nltk
15
  langchain_community
16
  duckduckgo-search
17
+ pdfplumber