Spaces:

MB-IDK
/

PerplexityAPI

Running

App Files Files Community

MB-IDK commited on 1 day ago

Commit

205dc04

verified ·

1 Parent(s): bb5ae5e

Rename app.py to main.py

Browse files

Files changed (1) hide show

app.py → main.py +294 -258

app.py → main.py RENAMED Viewed

@@ -1,10 +1,20 @@
-from fastapi import FastAPI, HTTPException, Header
-from fastapi.responses import StreamingResponse
-from pydantic import BaseModel
-import json, uuid, time, asyncio
-from typing import Optional, List
 from datetime import datetime
 try:
     from curl_cffi.requests import Session as CurlSession
     HAS_CURL_CFFI = True
@@ -17,13 +27,13 @@ try:
 except ImportError:
     HAS_CLOUDSCRAPER = False
-app = FastAPI(title="Perplexity OpenAI-Compatible API")
 BASE_URL = "https://www.perplexity.ai"
 ASK_URL = f"{BASE_URL}/rest/sse/perplexity_ask"
-TARGET_USAGE = "ask_text_0_markdown"
 MAX_RETRIES = 3
 RETRY_DELAY = 2
 HEADERS = {
     "Accept": "text/event-stream",
@@ -41,57 +51,78 @@ HEADERS = {
     "Pragma": "no-cache",
 }
-# ---------------------------------------------------------------------------
-# Session management (module-level singleton)
-# ---------------------------------------------------------------------------
-_session = None
-_backend = None
-def get_session():
-    global _session, _backend
-    if _session is not None:
-        return _session, _backend
-    if HAS_CURL_CFFI:
-        try:
-            s = CurlSession(impersonate="chrome120")
-            r = s.get(BASE_URL, timeout=20)
-            r.raise_for_status()
-            _session, _backend = s, "curl_cffi"
-            return _session, _backend
-        except Exception:
-            pass
-    if HAS_CLOUDSCRAPER:
-        try:
-            s = cloudscraper.create_scraper(
-                browser={"browser": "chrome", "platform": "windows", "mobile": False}
-            )
-            r = s.get(BASE_URL, timeout=20)
-            r.raise_for_status()
-            _session, _backend = s, "cloudscraper"
-            return _session, _backend
-        except Exception:
-            pass
-    raise RuntimeError("Could not initialize any scraping session")
-def reset_session():
-    global _session, _backend
-    _session = None
-    _backend = None
-# ---------------------------------------------------------------------------
-# Perplexity core logic
-# ---------------------------------------------------------------------------
-def build_payload(query: str) -> dict:
     return {
         "params": {
             "attachments": [],
-            "language": "fr-FR",
             "timezone": "Europe/Paris",
             "search_focus": "internet",
             "sources": ["web"],
@@ -119,28 +150,21 @@ def build_payload(query: str) -> dict:
         "query_str": query,
     }
-def collect_web_results(block: dict) -> list:
-    results = []
-    for wr in block.get("web_result_block", {}).get("web_results", []):
-        results.append(wr)
-    for wr in block.get("sources_mode_block", {}).get("web_results", []):
-        results.append(wr)
-    for step in block.get("plan_block", {}).get("steps", []):
-        for wr in step.get("web_results_content", {}).get("web_results", []):
-            results.append(wr)
-    return results
-def extract_chunks(patch: dict) -> list:
     op = patch.get("op")
     path = patch.get("path", "")
     if op == "replace" and path == "":
         return patch.get("value", {}).get("chunks", [])
     if op == "add" and "/chunks/" in path:
-        return [patch.get("value", "")]
     return []
-def parse_stream(resp) -> tuple:
-    full_answer = ""
     sources = []
     seen_urls = set()
@@ -162,15 +186,29 @@ def parse_stream(resp) -> tuple:
         for block in event.get("blocks", []):
             usage = block.get("intended_usage", "")
-            for wr in collect_web_results(block):
-                url = wr.get("url", "")
-                if url and url not in seen_urls:
-                    seen_urls.add(url)
-                    sources.append({
-                        "name": wr.get("name", ""),
-                        "url": url,
-                        "snippet": wr.get("snippet", ""),
-                    })
             if usage != TARGET_USAGE:
                 continue
@@ -178,19 +216,20 @@ def parse_stream(resp) -> tuple:
             diff = block.get("diff_block", {})
             if diff.get("field") == "markdown_block":
                 for patch in diff.get("patches", []):
-                    for chunk in extract_chunks(patch):
                         if chunk:
-                            full_answer += chunk
             if is_final:
                 md = block.get("markdown_block", {})
                 if md.get("answer"):
-                    full_answer = md["answer"]
-    return full_answer, sources
-def parse_stream_generator(resp):
-    """Yields text chunks as they arrive from the SSE stream."""
     for raw_line in resp.iter_lines():
         if isinstance(raw_line, bytes):
             raw_line = raw_line.decode("utf-8", errors="replace")
@@ -214,194 +253,226 @@ def parse_stream_generator(resp):
             diff = block.get("diff_block", {})
             if diff.get("field") == "markdown_block":
                 for patch in diff.get("patches", []):
-                    for chunk in extract_chunks(patch):
                         if chunk:
                             yield chunk
             if is_final:
                 md = block.get("markdown_block", {})
                 if md.get("answer"):
-                    # final complete answer — we already streamed chunks,
-                    # nothing extra needed here
-                    pass
-def do_perplexity_request(query: str):
-    session, _ = get_session()
-    payload = build_payload(query)
     headers = {**HEADERS, "X-Request-ID": str(uuid.uuid4())}
-    last_exc = None
     for attempt in range(1, MAX_RETRIES + 1):
         try:
             resp = session.post(
-                ASK_URL,
-                headers=headers,
-                json=payload,
-                stream=True,
-                timeout=60,
             )
             if resp.status_code in (403, 503):
-                reset_session()
-                raise RuntimeError(f"Blocked (HTTP {resp.status_code})")
             resp.raise_for_status()
-            return resp
         except Exception as e:
-            last_exc = e
             if attempt < MAX_RETRIES:
                 time.sleep(RETRY_DELAY)
-                # Try refreshing session on failure
-                try:
-                    reset_session()
-                    get_session()
-                except Exception:
-                    pass
-    raise RuntimeError(f"All retries failed: {last_exc}")
-# ---------------------------------------------------------------------------
-# OpenAI-compatible Pydantic models
-# ---------------------------------------------------------------------------
-class Message(BaseModel):
-    role: str
-    content: str
 class ChatCompletionRequest(BaseModel):
-    model: str = "perplexity"
-    messages: List[Message]
-    stream: Optional[bool] = False
     temperature: Optional[float] = None
     max_tokens: Optional[int] = None
-# ---------------------------------------------------------------------------
-# Helper: build query string from messages
-# ---------------------------------------------------------------------------
-def messages_to_query(messages: List[Message]) -> str:
     """
-    Converts OpenAI message list to a single query string.
-    Uses the last user message as the main query,
-    prepending any system prompt if present.
     """
-    system_parts = [m.content for m in messages if m.role == "system"]
-    user_parts   = [m.content for m in messages if m.role == "user"]
-    query = ""
     if system_parts:
-        query += " ".join(system_parts) + "\n\n"
-    if user_parts:
-        query += user_parts[-1]  # last user turn
-    else:
-        # fallback: last message regardless of role
-        query = messages[-1].content
-    return query.strip()
-# ---------------------------------------------------------------------------
-# OpenAI-compatible endpoints
-# ---------------------------------------------------------------------------
 @app.get("/")
-def root():
-    return {"status": "ok", "message": "Perplexity OpenAI-compatible API"}
 @app.get("/health")
-def health():
     return {"status": "ok"}
 @app.get("/v1/models")
-def list_models():
     return {
         "object": "list",
         "data": [
             {
-                "id": "perplexity",
                 "object": "model",
-                "created": int(datetime.now().timestamp()),
-                "owned_by": "perplexity",
             }
         ],
     }
-@app.post("/v1/chat/completions")
-def chat_completions(
-    request: ChatCompletionRequest,
-    authorization: Optional[str] = Header(default=None),
-):
-    query = messages_to_query(request.messages)
-    if not query:
-        raise HTTPException(status_code=400, detail="No query found in messages")
-    completion_id = f"chatcmpl-{uuid.uuid4().hex}"
-    created_ts = int(time.time())
-    model_name = request.model or "perplexity"
-    # ── Streaming response ──────────────────────────────────────────────────
-    if request.stream:
-        def stream_generator():
-            try:
-                resp = do_perplexity_request(query)
-            except Exception as e:
-                # Send error as a data chunk then stop
-                err_chunk = {
-                    "id": completion_id,
-                    "object": "chat.completion.chunk",
-                    "created": created_ts,
-                    "model": model_name,
-                    "choices": [{
-                        "index": 0,
-                        "delta": {"content": f"[ERROR] {e}"},
-                        "finish_reason": "stop",
-                    }],
-                }
-                yield f"data: {json.dumps(err_chunk)}\n\ndata: [DONE]\n\n"
-                return
-            # First chunk with role
-            first = {
-                "id": completion_id,
-                "object": "chat.completion.chunk",
-                "created": created_ts,
-                "model": model_name,
-                "choices": [{
-                    "index": 0,
-                    "delta": {"role": "assistant"},
-                    "finish_reason": None,
-                }],
-            }
-            yield f"data: {json.dumps(first)}\n\n"
-            for chunk_text in parse_stream_generator(resp):
-                chunk = {
-                    "id": completion_id,
-                    "object": "chat.completion.chunk",
-                    "created": created_ts,
-                    "model": model_name,
-                    "choices": [{
-                        "index": 0,
-                        "delta": {"content": chunk_text},
-                        "finish_reason": None,
-                    }],
-                }
-                yield f"data: {json.dumps(chunk)}\n\n"
-            # Final stop chunk
-            stop_chunk = {
-                "id": completion_id,
-                "object": "chat.completion.chunk",
-                "created": created_ts,
-                "model": model_name,
-                "choices": [{
-                    "index": 0,
-                    "delta": {},
-                    "finish_reason": "stop",
-                }],
-            }
-            yield f"data: {json.dumps(stop_chunk)}\n\n"
-            yield "data: [DONE]\n\n"
         return StreamingResponse(
-            stream_generator(),
             media_type="text/event-stream",
             headers={
                 "Cache-Control": "no-cache",
@@ -409,54 +480,19 @@ def chat_completions(
             },
         )
-    # ── Non-streaming response ──────────────────────────────────────────────
     try:
-        resp = do_perplexity_request(query)
-        answer, sources = parse_stream(resp)
-    except Exception as e:
         raise HTTPException(status_code=502, detail=str(e))
     if not answer:
-        raise HTTPException(status_code=502, detail="Empty response from Perplexity")
-    # Append sources as footnotes if any
-    if sources:
-        footnotes = "\n\n---\n**Sources:**\n"
-        for i, src in enumerate(sources, 1):
-            footnotes += f"{i}. [{src.get('name', src['url'])}]({src['url']})\n"
-        answer += footnotes
-    prompt_tokens   = len(query.split())
-    completion_tokens = len(answer.split())
-    return {
-        "id": completion_id,
-        "object": "chat.completion",
-        "created": created_ts,
-        "model": model_name,
-        "choices": [{
-            "index": 0,
-            "message": {
-                "role": "assistant",
-                "content": answer,
-            },
-            "finish_reason": "stop",
-        }],
-        "usage": {
-            "prompt_tokens": prompt_tokens,
-            "completion_tokens": completion_tokens,
-            "total_tokens": prompt_tokens + completion_tokens,
-        },
-    }
-# ---------------------------------------------------------------------------
-# Startup: pre-warm session
-# ---------------------------------------------------------------------------
-@app.on_event("startup")
-def startup_event():
-    try:
-        get_session()
-        print("[startup] Session initialized successfully")
-    except Exception as e:
-        print(f"[startup] Session init failed (will retry on first request): {e}")

+"""
+OpenAI-compatible API wrapping Perplexity Ask (free/anonymous).
+Hosted on Hugging Face Spaces (Docker).
+"""
+import json
+import uuid
+import time
+import threading
 from datetime import datetime
+from typing import Optional
+from fastapi import FastAPI, HTTPException, Request
+from fastapi.responses import StreamingResponse, JSONResponse
+from pydantic import BaseModel, Field
+# ── Scraping libs ──────────────────────────────────────────────
 try:
     from curl_cffi.requests import Session as CurlSession
     HAS_CURL_CFFI = True
 except ImportError:
     HAS_CLOUDSCRAPER = False
+# ── Constants ──────────────────────────────────────────────────
 BASE_URL = "https://www.perplexity.ai"
 ASK_URL = f"{BASE_URL}/rest/sse/perplexity_ask"
 MAX_RETRIES = 3
 RETRY_DELAY = 2
+TARGET_USAGE = "ask_text_0_markdown"
+MODEL_NAME = "perplexity-ask"
 HEADERS = {
     "Accept": "text/event-stream",
     "Pragma": "no-cache",
 }
+# ── Session Pool (thread-safe) ────────────────────────────────
+class SessionManager:
+    """Manages a reusable scraping session with automatic refresh."""
+    def __init__(self):
+        self._lock = threading.Lock()
+        self._session = None
+        self._backend: Optional[str] = None
+        self._created_at: float = 0
+        self._max_age: float = 300  # refresh every 5 min
+    def _check_cloudflare(self, status_code: int, body: str = ""):
+        if status_code in (403, 503) and (
+            "cloudflare" in body.lower() or "cf-ray" in body.lower()
+        ):
+            raise RuntimeError(f"Blocked by Cloudflare (HTTP {status_code})")
+    def _build_session(self):
+        """Try curl_cffi then cloudscraper."""
+        if HAS_CURL_CFFI:
+            try:
+                s = CurlSession(impersonate="chrome120")
+                r = s.get(BASE_URL, timeout=20)
+                self._check_cloudflare(r.status_code, r.text)
+                r.raise_for_status()
+                print(f"[session] curl_cffi OK – cookies: {list(s.cookies.keys())}")
+                return s, "curl_cffi"
+            except Exception as e:
+                print(f"[session] curl_cffi failed: {e}")
+        if HAS_CLOUDSCRAPER:
+            try:
+                s = cloudscraper.create_scraper(
+                    browser={
+                        "browser": "chrome",
+                        "platform": "windows",
+                        "mobile": False,
+                    }
+                )
+                r = s.get(BASE_URL, timeout=20)
+                self._check_cloudflare(r.status_code, r.text)
+                r.raise_for_status()
+                print(f"[session] cloudscraper OK – cookies: {list(s.cookies.keys())}")
+                return s, "cloudscraper"
+            except Exception as e:
+                print(f"[session] cloudscraper failed: {e}")
+        raise RuntimeError("No scraping backend available")
+    def get(self):
+        with self._lock:
+            now = time.time()
+            if self._session is None or (now - self._created_at) > self._max_age:
+                self._session, self._backend = self._build_session()
+                self._created_at = now
+            return self._session
+    def invalidate(self):
+        with self._lock:
+            self._session = None
+sessions = SessionManager()
+# ── Perplexity core ───────────────────────────────────────────
+def _build_payload(query: str) -> dict:
     return {
         "params": {
             "attachments": [],
+            "language": "en-US",
             "timezone": "Europe/Paris",
             "search_focus": "internet",
             "sources": ["web"],
         "query_str": query,
     }
+def _extract_chunks(patch: dict) -> list[str]:
     op = patch.get("op")
     path = patch.get("path", "")
     if op == "replace" and path == "":
         return patch.get("value", {}).get("chunks", [])
     if op == "add" and "/chunks/" in path:
+        v = patch.get("value", "")
+        return [v] if v else []
     return []
+def _parse_stream_full(resp) -> tuple[str, list[dict]]:
+    """Parse entire SSE stream, return (answer, sources)."""
+    full = ""
     sources = []
     seen_urls = set()
         for block in event.get("blocks", []):
             usage = block.get("intended_usage", "")
+            # sources
+            for key in ("web_result_block", "sources_mode_block"):
+                for wr in block.get(key, {}).get("web_results", []):
+                    url = wr.get("url", "")
+                    if url and url not in seen_urls:
+                        seen_urls.add(url)
+                        sources.append({
+                            "name": wr.get("name", ""),
+                            "url": url,
+                            "snippet": wr.get("snippet", ""),
+                        })
+            pb = block.get("plan_block", {})
+            for step in pb.get("steps", []):
+                for wr in step.get("web_results_content", {}).get("web_results", []):
+                    url = wr.get("url", "")
+                    if url and url not in seen_urls:
+                        seen_urls.add(url)
+                        sources.append({
+                            "name": wr.get("name", ""),
+                            "url": url,
+                            "snippet": wr.get("snippet", ""),
+                        })
             if usage != TARGET_USAGE:
                 continue
             diff = block.get("diff_block", {})
             if diff.get("field") == "markdown_block":
                 for patch in diff.get("patches", []):
+                    for chunk in _extract_chunks(patch):
                         if chunk:
+                            full += chunk
             if is_final:
                 md = block.get("markdown_block", {})
                 if md.get("answer"):
+                    full = md["answer"]
+    return full, sources
+def _iter_stream_chunks(resp):
+    """Yield text chunks as they arrive (for SSE streaming)."""
     for raw_line in resp.iter_lines():
         if isinstance(raw_line, bytes):
             raw_line = raw_line.decode("utf-8", errors="replace")
             diff = block.get("diff_block", {})
             if diff.get("field") == "markdown_block":
                 for patch in diff.get("patches", []):
+                    for chunk in _extract_chunks(patch):
                         if chunk:
                             yield chunk
             if is_final:
                 md = block.get("markdown_block", {})
                 if md.get("answer"):
+                    yield md["answer"]
+def _do_request(query: str, stream: bool = False):
+    """
+    Send query to Perplexity. Returns response object for streaming
+    or (answer, sources) tuple for non-streaming.
+    """
+    payload = _build_payload(query)
     headers = {**HEADERS, "X-Request-ID": str(uuid.uuid4())}
+    last_err = None
     for attempt in range(1, MAX_RETRIES + 1):
         try:
+            session = sessions.get()
             resp = session.post(
+                ASK_URL, headers=headers, json=payload, stream=True, timeout=60
             )
             if resp.status_code in (403, 503):
+                body = ""
+                try:
+                    body = resp.text[:500]
+                except Exception:
+                    pass
+                sessions.invalidate()
+                raise RuntimeError(
+                    f"Blocked (HTTP {resp.status_code})"
+                )
             resp.raise_for_status()
+            if stream:
+                return resp  # caller will iterate
+            return _parse_stream_full(resp)
         except Exception as e:
+            last_err = e
+            print(f"[ask] attempt {attempt}/{MAX_RETRIES} failed: {e}")
+            sessions.invalidate()
             if attempt < MAX_RETRIES:
                 time.sleep(RETRY_DELAY)
+    raise RuntimeError(f"All retries failed: {last_err}")
+# ── Pydantic models (OpenAI-compatible) ───────────────────────
+class ChatMessage(BaseModel):
+    role: str = "user"
+    content: str = ""
 class ChatCompletionRequest(BaseModel):
+    model: str = MODEL_NAME
+    messages: list[ChatMessage]
+    stream: bool = False
     temperature: Optional[float] = None
     max_tokens: Optional[int] = None
+# ── FastAPI app ───────────────────────────────────────────────
+app = FastAPI(
+    title="Perplexity Ask – OpenAI Compatible API",
+    version="1.0.0",
+)
+def _messages_to_query(messages: list[ChatMessage]) -> str:
     """
+    Collapse the chat messages into a single query string.
+    Uses the last user message; prepends system prompt if present.
     """
+    system_parts = []
+    user_query = ""
+    for m in messages:
+        if m.role == "system":
+            system_parts.append(m.content)
+        elif m.role == "user":
+            user_query = m.content  # take last user msg
     if system_parts:
+        return "\n".join(system_parts) + "\n\n" + user_query
+    return user_query
+def _make_chat_completion(answer: str, sources: list[dict], req_id: str) -> dict:
+    """Build an OpenAI-style ChatCompletion response."""
+    # Append sources as footnotes
+    if sources:
+        answer += "\n\n---\n**Sources:**\n"
+        for i, s in enumerate(sources, 1):
+            answer += f"{i}. [{s.get('name', 'Link')}]({s.get('url', '')})\n"
+    return {
+        "id": req_id,
+        "object": "chat.completion",
+        "created": int(time.time()),
+        "model": MODEL_NAME,
+        "choices": [
+            {
+                "index": 0,
+                "message": {"role": "assistant", "content": answer},
+                "finish_reason": "stop",
+            }
+        ],
+        "usage": {
+            "prompt_tokens": 0,
+            "completion_tokens": 0,
+            "total_tokens": 0,
+        },
+    }
+def _stream_openai_chunks(query: str, req_id: str):
+    """Generator yielding SSE lines in OpenAI streaming format."""
+    try:
+        resp = _do_request(query, stream=True)
+        for chunk_text in _iter_stream_chunks(resp):
+            data = {
+                "id": req_id,
+                "object": "chat.completion.chunk",
+                "created": int(time.time()),
+                "model": MODEL_NAME,
+                "choices": [
+                    {
+                        "index": 0,
+                        "delta": {"content": chunk_text},
+                        "finish_reason": None,
+                    }
+                ],
+            }
+            yield f"data: {json.dumps(data)}\n\n"
+        # Final chunk
+        final = {
+            "id": req_id,
+            "object": "chat.completion.chunk",
+            "created": int(time.time()),
+            "model": MODEL_NAME,
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {},
+                    "finish_reason": "stop",
+                }
+            ],
+        }
+        yield f"data: {json.dumps(final)}\n\n"
+        yield "data: [DONE]\n\n"
+    except Exception as e:
+        err = {
+            "id": req_id,
+            "object": "chat.completion.chunk",
+            "created": int(time.time()),
+            "model": MODEL_NAME,
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {"content": f"\n\n[ERROR] {e}"},
+                    "finish_reason": "stop",
+                }
+            ],
+        }
+        yield f"data: {json.dumps(err)}\n\n"
+        yield "data: [DONE]\n\n"
+# ── Endpoints ─────────────────────────────────────────────────
 @app.get("/")
+async def root():
+    return {
+        "message": "Perplexity Ask API – OpenAI compatible",
+        "endpoints": [
+            "/v1/models",
+            "/v1/chat/completions",
+            "/health",
+        ],
+    }
 @app.get("/health")
+async def health():
     return {"status": "ok"}
 @app.get("/v1/models")
+async def list_models():
     return {
         "object": "list",
         "data": [
             {
+                "id": MODEL_NAME,
                 "object": "model",
+                "created": 1700000000,
+                "owned_by": "perplexity-community",
             }
         ],
     }
+@app.post("/v1/chat/completions")
+async def chat_completions(req: ChatCompletionRequest):
+    query = _messages_to_query(req.messages)
+    if not query.strip():
+        raise HTTPException(status_code=400, detail="Empty query")
+    req_id = f"chatcmpl-{uuid.uuid4().hex[:12]}"
+    # ── Streaming ──
+    if req.stream:
         return StreamingResponse(
+            _stream_openai_chunks(query, req_id),
             media_type="text/event-stream",
             headers={
                 "Cache-Control": "no-cache",
             },
         )
+    # ── Non-streaming ──
     try:
+        answer, sources = _do_request(query, stream=False)
+    except RuntimeError as e:
         raise HTTPException(status_code=502, detail=str(e))
     if not answer:
+        raise HTTPException(status_code=502, detail="No answer received from Perplexity")
+    return JSONResponse(_make_chat_completion(answer, sources, req_id))
+# ── Catch-all for /chat/completions without /v1 prefix ────────
+@app.post("/chat/completions")
+async def chat_completions_no_prefix(req: ChatCompletionRequest):
+    return await chat_completions(req)