MB-IDK commited on
Commit
205dc04
Β·
verified Β·
1 Parent(s): bb5ae5e

Rename app.py to main.py

Browse files
Files changed (1) hide show
  1. app.py β†’ main.py +294 -258
app.py β†’ main.py RENAMED
@@ -1,10 +1,20 @@
1
- from fastapi import FastAPI, HTTPException, Header
2
- from fastapi.responses import StreamingResponse
3
- from pydantic import BaseModel
4
- import json, uuid, time, asyncio
5
- from typing import Optional, List
 
 
 
 
6
  from datetime import datetime
 
7
 
 
 
 
 
 
8
  try:
9
  from curl_cffi.requests import Session as CurlSession
10
  HAS_CURL_CFFI = True
@@ -17,13 +27,13 @@ try:
17
  except ImportError:
18
  HAS_CLOUDSCRAPER = False
19
 
20
- app = FastAPI(title="Perplexity OpenAI-Compatible API")
21
-
22
  BASE_URL = "https://www.perplexity.ai"
23
  ASK_URL = f"{BASE_URL}/rest/sse/perplexity_ask"
24
- TARGET_USAGE = "ask_text_0_markdown"
25
  MAX_RETRIES = 3
26
  RETRY_DELAY = 2
 
 
27
 
28
  HEADERS = {
29
  "Accept": "text/event-stream",
@@ -41,57 +51,78 @@ HEADERS = {
41
  "Pragma": "no-cache",
42
  }
43
 
44
- # ---------------------------------------------------------------------------
45
- # Session management (module-level singleton)
46
- # ---------------------------------------------------------------------------
47
 
48
- _session = None
49
- _backend = None
50
 
51
- def get_session():
52
- global _session, _backend
 
 
 
 
53
 
54
- if _session is not None:
55
- return _session, _backend
 
 
 
56
 
57
- if HAS_CURL_CFFI:
58
- try:
59
- s = CurlSession(impersonate="chrome120")
60
- r = s.get(BASE_URL, timeout=20)
61
- r.raise_for_status()
62
- _session, _backend = s, "curl_cffi"
63
- return _session, _backend
64
- except Exception:
65
- pass
66
-
67
- if HAS_CLOUDSCRAPER:
68
- try:
69
- s = cloudscraper.create_scraper(
70
- browser={"browser": "chrome", "platform": "windows", "mobile": False}
71
- )
72
- r = s.get(BASE_URL, timeout=20)
73
- r.raise_for_status()
74
- _session, _backend = s, "cloudscraper"
75
- return _session, _backend
76
- except Exception:
77
- pass
 
 
 
 
 
 
 
 
 
 
78
 
79
- raise RuntimeError("Could not initialize any scraping session")
 
 
 
 
 
 
80
 
81
- def reset_session():
82
- global _session, _backend
83
- _session = None
84
- _backend = None
85
 
86
- # ---------------------------------------------------------------------------
87
- # Perplexity core logic
88
- # ---------------------------------------------------------------------------
89
 
90
- def build_payload(query: str) -> dict:
 
 
 
 
91
  return {
92
  "params": {
93
  "attachments": [],
94
- "language": "fr-FR",
95
  "timezone": "Europe/Paris",
96
  "search_focus": "internet",
97
  "sources": ["web"],
@@ -119,28 +150,21 @@ def build_payload(query: str) -> dict:
119
  "query_str": query,
120
  }
121
 
122
- def collect_web_results(block: dict) -> list:
123
- results = []
124
- for wr in block.get("web_result_block", {}).get("web_results", []):
125
- results.append(wr)
126
- for wr in block.get("sources_mode_block", {}).get("web_results", []):
127
- results.append(wr)
128
- for step in block.get("plan_block", {}).get("steps", []):
129
- for wr in step.get("web_results_content", {}).get("web_results", []):
130
- results.append(wr)
131
- return results
132
-
133
- def extract_chunks(patch: dict) -> list:
134
  op = patch.get("op")
135
  path = patch.get("path", "")
136
  if op == "replace" and path == "":
137
  return patch.get("value", {}).get("chunks", [])
138
  if op == "add" and "/chunks/" in path:
139
- return [patch.get("value", "")]
 
140
  return []
141
 
142
- def parse_stream(resp) -> tuple:
143
- full_answer = ""
 
 
144
  sources = []
145
  seen_urls = set()
146
 
@@ -162,15 +186,29 @@ def parse_stream(resp) -> tuple:
162
  for block in event.get("blocks", []):
163
  usage = block.get("intended_usage", "")
164
 
165
- for wr in collect_web_results(block):
166
- url = wr.get("url", "")
167
- if url and url not in seen_urls:
168
- seen_urls.add(url)
169
- sources.append({
170
- "name": wr.get("name", ""),
171
- "url": url,
172
- "snippet": wr.get("snippet", ""),
173
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
  if usage != TARGET_USAGE:
176
  continue
@@ -178,19 +216,20 @@ def parse_stream(resp) -> tuple:
178
  diff = block.get("diff_block", {})
179
  if diff.get("field") == "markdown_block":
180
  for patch in diff.get("patches", []):
181
- for chunk in extract_chunks(patch):
182
  if chunk:
183
- full_answer += chunk
184
 
185
  if is_final:
186
  md = block.get("markdown_block", {})
187
  if md.get("answer"):
188
- full_answer = md["answer"]
189
 
190
- return full_answer, sources
191
 
192
- def parse_stream_generator(resp):
193
- """Yields text chunks as they arrive from the SSE stream."""
 
194
  for raw_line in resp.iter_lines():
195
  if isinstance(raw_line, bytes):
196
  raw_line = raw_line.decode("utf-8", errors="replace")
@@ -214,194 +253,226 @@ def parse_stream_generator(resp):
214
  diff = block.get("diff_block", {})
215
  if diff.get("field") == "markdown_block":
216
  for patch in diff.get("patches", []):
217
- for chunk in extract_chunks(patch):
218
  if chunk:
219
  yield chunk
220
 
221
  if is_final:
222
  md = block.get("markdown_block", {})
223
  if md.get("answer"):
224
- # final complete answer β€” we already streamed chunks,
225
- # nothing extra needed here
226
- pass
227
 
228
- def do_perplexity_request(query: str):
229
- session, _ = get_session()
230
- payload = build_payload(query)
 
 
 
231
  headers = {**HEADERS, "X-Request-ID": str(uuid.uuid4())}
232
- last_exc = None
233
 
234
  for attempt in range(1, MAX_RETRIES + 1):
235
  try:
 
236
  resp = session.post(
237
- ASK_URL,
238
- headers=headers,
239
- json=payload,
240
- stream=True,
241
- timeout=60,
242
  )
243
  if resp.status_code in (403, 503):
244
- reset_session()
245
- raise RuntimeError(f"Blocked (HTTP {resp.status_code})")
 
 
 
 
 
 
 
246
  resp.raise_for_status()
247
- return resp
 
 
 
 
248
  except Exception as e:
249
- last_exc = e
 
 
250
  if attempt < MAX_RETRIES:
251
  time.sleep(RETRY_DELAY)
252
- # Try refreshing session on failure
253
- try:
254
- reset_session()
255
- get_session()
256
- except Exception:
257
- pass
258
 
259
- raise RuntimeError(f"All retries failed: {last_exc}")
260
 
261
- # ---------------------------------------------------------------------------
262
- # OpenAI-compatible Pydantic models
263
- # ---------------------------------------------------------------------------
264
 
265
- class Message(BaseModel):
266
- role: str
267
- content: str
 
 
 
268
 
269
  class ChatCompletionRequest(BaseModel):
270
- model: str = "perplexity"
271
- messages: List[Message]
272
- stream: Optional[bool] = False
273
  temperature: Optional[float] = None
274
  max_tokens: Optional[int] = None
275
 
276
- # ---------------------------------------------------------------------------
277
- # Helper: build query string from messages
278
- # ---------------------------------------------------------------------------
279
 
280
- def messages_to_query(messages: List[Message]) -> str:
 
 
 
 
 
 
 
 
281
  """
282
- Converts OpenAI message list to a single query string.
283
- Uses the last user message as the main query,
284
- prepending any system prompt if present.
285
  """
286
- system_parts = [m.content for m in messages if m.role == "system"]
287
- user_parts = [m.content for m in messages if m.role == "user"]
288
-
289
- query = ""
 
 
 
290
  if system_parts:
291
- query += " ".join(system_parts) + "\n\n"
292
- if user_parts:
293
- query += user_parts[-1] # last user turn
294
- else:
295
- # fallback: last message regardless of role
296
- query = messages[-1].content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
 
298
- return query.strip()
299
 
300
- # ---------------------------------------------------------------------------
301
- # OpenAI-compatible endpoints
302
- # ---------------------------------------------------------------------------
303
 
304
  @app.get("/")
305
- def root():
306
- return {"status": "ok", "message": "Perplexity OpenAI-compatible API"}
 
 
 
 
 
 
 
 
307
 
308
  @app.get("/health")
309
- def health():
310
  return {"status": "ok"}
311
 
 
312
  @app.get("/v1/models")
313
- def list_models():
314
  return {
315
  "object": "list",
316
  "data": [
317
  {
318
- "id": "perplexity",
319
  "object": "model",
320
- "created": int(datetime.now().timestamp()),
321
- "owned_by": "perplexity",
322
  }
323
  ],
324
  }
325
 
326
- @app.post("/v1/chat/completions")
327
- def chat_completions(
328
- request: ChatCompletionRequest,
329
- authorization: Optional[str] = Header(default=None),
330
- ):
331
- query = messages_to_query(request.messages)
332
- if not query:
333
- raise HTTPException(status_code=400, detail="No query found in messages")
334
-
335
- completion_id = f"chatcmpl-{uuid.uuid4().hex}"
336
- created_ts = int(time.time())
337
- model_name = request.model or "perplexity"
338
-
339
- # ── Streaming response ──────────────────────────────────────────────────
340
- if request.stream:
341
- def stream_generator():
342
- try:
343
- resp = do_perplexity_request(query)
344
- except Exception as e:
345
- # Send error as a data chunk then stop
346
- err_chunk = {
347
- "id": completion_id,
348
- "object": "chat.completion.chunk",
349
- "created": created_ts,
350
- "model": model_name,
351
- "choices": [{
352
- "index": 0,
353
- "delta": {"content": f"[ERROR] {e}"},
354
- "finish_reason": "stop",
355
- }],
356
- }
357
- yield f"data: {json.dumps(err_chunk)}\n\ndata: [DONE]\n\n"
358
- return
359
 
360
- # First chunk with role
361
- first = {
362
- "id": completion_id,
363
- "object": "chat.completion.chunk",
364
- "created": created_ts,
365
- "model": model_name,
366
- "choices": [{
367
- "index": 0,
368
- "delta": {"role": "assistant"},
369
- "finish_reason": None,
370
- }],
371
- }
372
- yield f"data: {json.dumps(first)}\n\n"
373
-
374
- for chunk_text in parse_stream_generator(resp):
375
- chunk = {
376
- "id": completion_id,
377
- "object": "chat.completion.chunk",
378
- "created": created_ts,
379
- "model": model_name,
380
- "choices": [{
381
- "index": 0,
382
- "delta": {"content": chunk_text},
383
- "finish_reason": None,
384
- }],
385
- }
386
- yield f"data: {json.dumps(chunk)}\n\n"
387
 
388
- # Final stop chunk
389
- stop_chunk = {
390
- "id": completion_id,
391
- "object": "chat.completion.chunk",
392
- "created": created_ts,
393
- "model": model_name,
394
- "choices": [{
395
- "index": 0,
396
- "delta": {},
397
- "finish_reason": "stop",
398
- }],
399
- }
400
- yield f"data: {json.dumps(stop_chunk)}\n\n"
401
- yield "data: [DONE]\n\n"
402
 
 
 
403
  return StreamingResponse(
404
- stream_generator(),
405
  media_type="text/event-stream",
406
  headers={
407
  "Cache-Control": "no-cache",
@@ -409,54 +480,19 @@ def chat_completions(
409
  },
410
  )
411
 
412
- # ── Non-streaming response ──────────────────────────────────────────────
413
  try:
414
- resp = do_perplexity_request(query)
415
- answer, sources = parse_stream(resp)
416
- except Exception as e:
417
  raise HTTPException(status_code=502, detail=str(e))
418
 
419
  if not answer:
420
- raise HTTPException(status_code=502, detail="Empty response from Perplexity")
421
-
422
- # Append sources as footnotes if any
423
- if sources:
424
- footnotes = "\n\n---\n**Sources:**\n"
425
- for i, src in enumerate(sources, 1):
426
- footnotes += f"{i}. [{src.get('name', src['url'])}]({src['url']})\n"
427
- answer += footnotes
428
 
429
- prompt_tokens = len(query.split())
430
- completion_tokens = len(answer.split())
431
-
432
- return {
433
- "id": completion_id,
434
- "object": "chat.completion",
435
- "created": created_ts,
436
- "model": model_name,
437
- "choices": [{
438
- "index": 0,
439
- "message": {
440
- "role": "assistant",
441
- "content": answer,
442
- },
443
- "finish_reason": "stop",
444
- }],
445
- "usage": {
446
- "prompt_tokens": prompt_tokens,
447
- "completion_tokens": completion_tokens,
448
- "total_tokens": prompt_tokens + completion_tokens,
449
- },
450
- }
451
 
452
- # ---------------------------------------------------------------------------
453
- # Startup: pre-warm session
454
- # ---------------------------------------------------------------------------
455
 
456
- @app.on_event("startup")
457
- def startup_event():
458
- try:
459
- get_session()
460
- print("[startup] Session initialized successfully")
461
- except Exception as e:
462
- print(f"[startup] Session init failed (will retry on first request): {e}")
 
1
+ """
2
+ OpenAI-compatible API wrapping Perplexity Ask (free/anonymous).
3
+ Hosted on Hugging Face Spaces (Docker).
4
+ """
5
+
6
+ import json
7
+ import uuid
8
+ import time
9
+ import threading
10
  from datetime import datetime
11
+ from typing import Optional
12
 
13
+ from fastapi import FastAPI, HTTPException, Request
14
+ from fastapi.responses import StreamingResponse, JSONResponse
15
+ from pydantic import BaseModel, Field
16
+
17
+ # ── Scraping libs ──────────────────────────────────────────────
18
  try:
19
  from curl_cffi.requests import Session as CurlSession
20
  HAS_CURL_CFFI = True
 
27
  except ImportError:
28
  HAS_CLOUDSCRAPER = False
29
 
30
+ # ── Constants ──────────────────────────────────────────────────
 
31
  BASE_URL = "https://www.perplexity.ai"
32
  ASK_URL = f"{BASE_URL}/rest/sse/perplexity_ask"
 
33
  MAX_RETRIES = 3
34
  RETRY_DELAY = 2
35
+ TARGET_USAGE = "ask_text_0_markdown"
36
+ MODEL_NAME = "perplexity-ask"
37
 
38
  HEADERS = {
39
  "Accept": "text/event-stream",
 
51
  "Pragma": "no-cache",
52
  }
53
 
54
+ # ── Session Pool (thread-safe) ────────────────────────────────
 
 
55
 
56
+ class SessionManager:
57
+ """Manages a reusable scraping session with automatic refresh."""
58
 
59
+ def __init__(self):
60
+ self._lock = threading.Lock()
61
+ self._session = None
62
+ self._backend: Optional[str] = None
63
+ self._created_at: float = 0
64
+ self._max_age: float = 300 # refresh every 5 min
65
 
66
+ def _check_cloudflare(self, status_code: int, body: str = ""):
67
+ if status_code in (403, 503) and (
68
+ "cloudflare" in body.lower() or "cf-ray" in body.lower()
69
+ ):
70
+ raise RuntimeError(f"Blocked by Cloudflare (HTTP {status_code})")
71
 
72
+ def _build_session(self):
73
+ """Try curl_cffi then cloudscraper."""
74
+ if HAS_CURL_CFFI:
75
+ try:
76
+ s = CurlSession(impersonate="chrome120")
77
+ r = s.get(BASE_URL, timeout=20)
78
+ self._check_cloudflare(r.status_code, r.text)
79
+ r.raise_for_status()
80
+ print(f"[session] curl_cffi OK – cookies: {list(s.cookies.keys())}")
81
+ return s, "curl_cffi"
82
+ except Exception as e:
83
+ print(f"[session] curl_cffi failed: {e}")
84
+
85
+ if HAS_CLOUDSCRAPER:
86
+ try:
87
+ s = cloudscraper.create_scraper(
88
+ browser={
89
+ "browser": "chrome",
90
+ "platform": "windows",
91
+ "mobile": False,
92
+ }
93
+ )
94
+ r = s.get(BASE_URL, timeout=20)
95
+ self._check_cloudflare(r.status_code, r.text)
96
+ r.raise_for_status()
97
+ print(f"[session] cloudscraper OK – cookies: {list(s.cookies.keys())}")
98
+ return s, "cloudscraper"
99
+ except Exception as e:
100
+ print(f"[session] cloudscraper failed: {e}")
101
+
102
+ raise RuntimeError("No scraping backend available")
103
 
104
+ def get(self):
105
+ with self._lock:
106
+ now = time.time()
107
+ if self._session is None or (now - self._created_at) > self._max_age:
108
+ self._session, self._backend = self._build_session()
109
+ self._created_at = now
110
+ return self._session
111
 
112
+ def invalidate(self):
113
+ with self._lock:
114
+ self._session = None
 
115
 
 
 
 
116
 
117
+ sessions = SessionManager()
118
+
119
+ # ── Perplexity core ───────────────────────────────────────────
120
+
121
+ def _build_payload(query: str) -> dict:
122
  return {
123
  "params": {
124
  "attachments": [],
125
+ "language": "en-US",
126
  "timezone": "Europe/Paris",
127
  "search_focus": "internet",
128
  "sources": ["web"],
 
150
  "query_str": query,
151
  }
152
 
153
+
154
+ def _extract_chunks(patch: dict) -> list[str]:
 
 
 
 
 
 
 
 
 
 
155
  op = patch.get("op")
156
  path = patch.get("path", "")
157
  if op == "replace" and path == "":
158
  return patch.get("value", {}).get("chunks", [])
159
  if op == "add" and "/chunks/" in path:
160
+ v = patch.get("value", "")
161
+ return [v] if v else []
162
  return []
163
 
164
+
165
+ def _parse_stream_full(resp) -> tuple[str, list[dict]]:
166
+ """Parse entire SSE stream, return (answer, sources)."""
167
+ full = ""
168
  sources = []
169
  seen_urls = set()
170
 
 
186
  for block in event.get("blocks", []):
187
  usage = block.get("intended_usage", "")
188
 
189
+ # sources
190
+ for key in ("web_result_block", "sources_mode_block"):
191
+ for wr in block.get(key, {}).get("web_results", []):
192
+ url = wr.get("url", "")
193
+ if url and url not in seen_urls:
194
+ seen_urls.add(url)
195
+ sources.append({
196
+ "name": wr.get("name", ""),
197
+ "url": url,
198
+ "snippet": wr.get("snippet", ""),
199
+ })
200
+
201
+ pb = block.get("plan_block", {})
202
+ for step in pb.get("steps", []):
203
+ for wr in step.get("web_results_content", {}).get("web_results", []):
204
+ url = wr.get("url", "")
205
+ if url and url not in seen_urls:
206
+ seen_urls.add(url)
207
+ sources.append({
208
+ "name": wr.get("name", ""),
209
+ "url": url,
210
+ "snippet": wr.get("snippet", ""),
211
+ })
212
 
213
  if usage != TARGET_USAGE:
214
  continue
 
216
  diff = block.get("diff_block", {})
217
  if diff.get("field") == "markdown_block":
218
  for patch in diff.get("patches", []):
219
+ for chunk in _extract_chunks(patch):
220
  if chunk:
221
+ full += chunk
222
 
223
  if is_final:
224
  md = block.get("markdown_block", {})
225
  if md.get("answer"):
226
+ full = md["answer"]
227
 
228
+ return full, sources
229
 
230
+
231
+ def _iter_stream_chunks(resp):
232
+ """Yield text chunks as they arrive (for SSE streaming)."""
233
  for raw_line in resp.iter_lines():
234
  if isinstance(raw_line, bytes):
235
  raw_line = raw_line.decode("utf-8", errors="replace")
 
253
  diff = block.get("diff_block", {})
254
  if diff.get("field") == "markdown_block":
255
  for patch in diff.get("patches", []):
256
+ for chunk in _extract_chunks(patch):
257
  if chunk:
258
  yield chunk
259
 
260
  if is_final:
261
  md = block.get("markdown_block", {})
262
  if md.get("answer"):
263
+ yield md["answer"]
264
+
 
265
 
266
+ def _do_request(query: str, stream: bool = False):
267
+ """
268
+ Send query to Perplexity. Returns response object for streaming
269
+ or (answer, sources) tuple for non-streaming.
270
+ """
271
+ payload = _build_payload(query)
272
  headers = {**HEADERS, "X-Request-ID": str(uuid.uuid4())}
273
+ last_err = None
274
 
275
  for attempt in range(1, MAX_RETRIES + 1):
276
  try:
277
+ session = sessions.get()
278
  resp = session.post(
279
+ ASK_URL, headers=headers, json=payload, stream=True, timeout=60
 
 
 
 
280
  )
281
  if resp.status_code in (403, 503):
282
+ body = ""
283
+ try:
284
+ body = resp.text[:500]
285
+ except Exception:
286
+ pass
287
+ sessions.invalidate()
288
+ raise RuntimeError(
289
+ f"Blocked (HTTP {resp.status_code})"
290
+ )
291
  resp.raise_for_status()
292
+
293
+ if stream:
294
+ return resp # caller will iterate
295
+ return _parse_stream_full(resp)
296
+
297
  except Exception as e:
298
+ last_err = e
299
+ print(f"[ask] attempt {attempt}/{MAX_RETRIES} failed: {e}")
300
+ sessions.invalidate()
301
  if attempt < MAX_RETRIES:
302
  time.sleep(RETRY_DELAY)
 
 
 
 
 
 
303
 
304
+ raise RuntimeError(f"All retries failed: {last_err}")
305
 
 
 
 
306
 
307
+ # ── Pydantic models (OpenAI-compatible) ───────────────────────
308
+
309
+ class ChatMessage(BaseModel):
310
+ role: str = "user"
311
+ content: str = ""
312
+
313
 
314
  class ChatCompletionRequest(BaseModel):
315
+ model: str = MODEL_NAME
316
+ messages: list[ChatMessage]
317
+ stream: bool = False
318
  temperature: Optional[float] = None
319
  max_tokens: Optional[int] = None
320
 
 
 
 
321
 
322
+ # ── FastAPI app ───────────────────────────────────────────────
323
+
324
+ app = FastAPI(
325
+ title="Perplexity Ask – OpenAI Compatible API",
326
+ version="1.0.0",
327
+ )
328
+
329
+
330
+ def _messages_to_query(messages: list[ChatMessage]) -> str:
331
  """
332
+ Collapse the chat messages into a single query string.
333
+ Uses the last user message; prepends system prompt if present.
 
334
  """
335
+ system_parts = []
336
+ user_query = ""
337
+ for m in messages:
338
+ if m.role == "system":
339
+ system_parts.append(m.content)
340
+ elif m.role == "user":
341
+ user_query = m.content # take last user msg
342
  if system_parts:
343
+ return "\n".join(system_parts) + "\n\n" + user_query
344
+ return user_query
345
+
346
+
347
+ def _make_chat_completion(answer: str, sources: list[dict], req_id: str) -> dict:
348
+ """Build an OpenAI-style ChatCompletion response."""
349
+ # Append sources as footnotes
350
+ if sources:
351
+ answer += "\n\n---\n**Sources:**\n"
352
+ for i, s in enumerate(sources, 1):
353
+ answer += f"{i}. [{s.get('name', 'Link')}]({s.get('url', '')})\n"
354
+
355
+ return {
356
+ "id": req_id,
357
+ "object": "chat.completion",
358
+ "created": int(time.time()),
359
+ "model": MODEL_NAME,
360
+ "choices": [
361
+ {
362
+ "index": 0,
363
+ "message": {"role": "assistant", "content": answer},
364
+ "finish_reason": "stop",
365
+ }
366
+ ],
367
+ "usage": {
368
+ "prompt_tokens": 0,
369
+ "completion_tokens": 0,
370
+ "total_tokens": 0,
371
+ },
372
+ }
373
+
374
+
375
+ def _stream_openai_chunks(query: str, req_id: str):
376
+ """Generator yielding SSE lines in OpenAI streaming format."""
377
+ try:
378
+ resp = _do_request(query, stream=True)
379
+ for chunk_text in _iter_stream_chunks(resp):
380
+ data = {
381
+ "id": req_id,
382
+ "object": "chat.completion.chunk",
383
+ "created": int(time.time()),
384
+ "model": MODEL_NAME,
385
+ "choices": [
386
+ {
387
+ "index": 0,
388
+ "delta": {"content": chunk_text},
389
+ "finish_reason": None,
390
+ }
391
+ ],
392
+ }
393
+ yield f"data: {json.dumps(data)}\n\n"
394
+
395
+ # Final chunk
396
+ final = {
397
+ "id": req_id,
398
+ "object": "chat.completion.chunk",
399
+ "created": int(time.time()),
400
+ "model": MODEL_NAME,
401
+ "choices": [
402
+ {
403
+ "index": 0,
404
+ "delta": {},
405
+ "finish_reason": "stop",
406
+ }
407
+ ],
408
+ }
409
+ yield f"data: {json.dumps(final)}\n\n"
410
+ yield "data: [DONE]\n\n"
411
+
412
+ except Exception as e:
413
+ err = {
414
+ "id": req_id,
415
+ "object": "chat.completion.chunk",
416
+ "created": int(time.time()),
417
+ "model": MODEL_NAME,
418
+ "choices": [
419
+ {
420
+ "index": 0,
421
+ "delta": {"content": f"\n\n[ERROR] {e}"},
422
+ "finish_reason": "stop",
423
+ }
424
+ ],
425
+ }
426
+ yield f"data: {json.dumps(err)}\n\n"
427
+ yield "data: [DONE]\n\n"
428
 
 
429
 
430
+ # ── Endpoints ─────────────────────────────────────────────────
 
 
431
 
432
  @app.get("/")
433
+ async def root():
434
+ return {
435
+ "message": "Perplexity Ask API – OpenAI compatible",
436
+ "endpoints": [
437
+ "/v1/models",
438
+ "/v1/chat/completions",
439
+ "/health",
440
+ ],
441
+ }
442
+
443
 
444
  @app.get("/health")
445
+ async def health():
446
  return {"status": "ok"}
447
 
448
+
449
  @app.get("/v1/models")
450
+ async def list_models():
451
  return {
452
  "object": "list",
453
  "data": [
454
  {
455
+ "id": MODEL_NAME,
456
  "object": "model",
457
+ "created": 1700000000,
458
+ "owned_by": "perplexity-community",
459
  }
460
  ],
461
  }
462
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
463
 
464
+ @app.post("/v1/chat/completions")
465
+ async def chat_completions(req: ChatCompletionRequest):
466
+ query = _messages_to_query(req.messages)
467
+ if not query.strip():
468
+ raise HTTPException(status_code=400, detail="Empty query")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
469
 
470
+ req_id = f"chatcmpl-{uuid.uuid4().hex[:12]}"
 
 
 
 
 
 
 
 
 
 
 
 
 
471
 
472
+ # ── Streaming ──
473
+ if req.stream:
474
  return StreamingResponse(
475
+ _stream_openai_chunks(query, req_id),
476
  media_type="text/event-stream",
477
  headers={
478
  "Cache-Control": "no-cache",
 
480
  },
481
  )
482
 
483
+ # ── Non-streaming ──
484
  try:
485
+ answer, sources = _do_request(query, stream=False)
486
+ except RuntimeError as e:
 
487
  raise HTTPException(status_code=502, detail=str(e))
488
 
489
  if not answer:
490
+ raise HTTPException(status_code=502, detail="No answer received from Perplexity")
 
 
 
 
 
 
 
491
 
492
+ return JSONResponse(_make_chat_completion(answer, sources, req_id))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
 
 
 
 
494
 
495
+ # ── Catch-all for /chat/completions without /v1 prefix ────────
496
+ @app.post("/chat/completions")
497
+ async def chat_completions_no_prefix(req: ChatCompletionRequest):
498
+ return await chat_completions(req)