Spaces:
Sleeping
Sleeping
simonefilosofi commited on
Commit ·
ebdf453
0
Parent(s):
feat: initial commit - upload local project to remote repository
Browse files- .env.example +4 -0
- Dockerfile +24 -0
- app/__init__.py +0 -0
- app/api/__init__.py +0 -0
- app/api/routes/__init__.py +0 -0
- app/api/routes/chat.py +45 -0
- app/api/routes/documents.py +131 -0
- app/api/routes/health.py +8 -0
- app/core/__init__.py +0 -0
- app/core/config.py +17 -0
- app/core/security.py +56 -0
- app/main.py +25 -0
- app/services/__init__.py +0 -0
- app/services/document_processor.py +60 -0
- app/services/embeddings.py +25 -0
- app/services/llm.py +28 -0
- app/services/rag.py +55 -0
- app/services/vector_store.py +60 -0
- requirements.txt +13 -0
.env.example
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
SUPABASE_URL=https://your-project.supabase.co
|
| 2 |
+
SUPABASE_ANON_KEY=your-anon-key
|
| 3 |
+
SUPABASE_JWT_SECRET=your-jwt-secret
|
| 4 |
+
ALLOWED_ORIGINS=http://localhost:5173,https://yourusername.github.io
|
Dockerfile
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install system dependencies
|
| 6 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 7 |
+
gcc \
|
| 8 |
+
g++ \
|
| 9 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
+
|
| 11 |
+
# Install Python dependencies
|
| 12 |
+
COPY requirements.txt .
|
| 13 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 14 |
+
|
| 15 |
+
# Pre-download the embedding model so first requests are fast
|
| 16 |
+
RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')"
|
| 17 |
+
|
| 18 |
+
# Copy application code
|
| 19 |
+
COPY app/ ./app/
|
| 20 |
+
|
| 21 |
+
# HuggingFace Spaces expects port 7860
|
| 22 |
+
EXPOSE 7860
|
| 23 |
+
|
| 24 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
app/__init__.py
ADDED
|
File without changes
|
app/api/__init__.py
ADDED
|
File without changes
|
app/api/routes/__init__.py
ADDED
|
File without changes
|
app/api/routes/chat.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
from fastapi import APIRouter, Depends, Header
|
| 4 |
+
from fastapi.responses import StreamingResponse
|
| 5 |
+
from pydantic import BaseModel
|
| 6 |
+
|
| 7 |
+
from ...core.security import get_user_id, validate_llm_key
|
| 8 |
+
from ...services.rag import rag_stream
|
| 9 |
+
|
| 10 |
+
router = APIRouter(prefix="/chat", tags=["chat"])
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class ChatRequest(BaseModel):
|
| 14 |
+
question: str
|
| 15 |
+
session_id: str | None = None
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
async def _event_stream(user_jwt: str, groq_key: str, question: str):
|
| 19 |
+
"""Convert async generator tokens into SSE format."""
|
| 20 |
+
try:
|
| 21 |
+
async for token in rag_stream(user_jwt, groq_key, question):
|
| 22 |
+
data = json.dumps({"token": token})
|
| 23 |
+
yield f"data: {data}\n\n"
|
| 24 |
+
yield "data: [DONE]\n\n"
|
| 25 |
+
except Exception as exc:
|
| 26 |
+
error_data = json.dumps({"error": str(exc)})
|
| 27 |
+
yield f"data: {error_data}\n\n"
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
@router.post("/stream")
|
| 31 |
+
async def chat_stream(
|
| 32 |
+
body: ChatRequest,
|
| 33 |
+
auth: tuple = Depends(get_user_id),
|
| 34 |
+
groq_key: str = Depends(validate_llm_key),
|
| 35 |
+
) -> StreamingResponse:
|
| 36 |
+
user_jwt, _ = auth
|
| 37 |
+
|
| 38 |
+
return StreamingResponse(
|
| 39 |
+
_event_stream(user_jwt, groq_key, body.question),
|
| 40 |
+
media_type="text/event-stream",
|
| 41 |
+
headers={
|
| 42 |
+
"Cache-Control": "no-cache",
|
| 43 |
+
"X-Accel-Buffering": "no",
|
| 44 |
+
},
|
| 45 |
+
)
|
app/api/routes/documents.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
|
| 3 |
+
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, UploadFile, status
|
| 4 |
+
from pydantic import BaseModel
|
| 5 |
+
|
| 6 |
+
from ...core.security import get_user_id
|
| 7 |
+
from ...services.document_processor import process_pdf, process_url
|
| 8 |
+
from ...services.vector_store import get_supabase_client, insert_chunks
|
| 9 |
+
|
| 10 |
+
router = APIRouter(prefix="/documents", tags=["documents"])
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class URLIngestRequest(BaseModel):
|
| 14 |
+
url: str
|
| 15 |
+
title: str | None = None
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
async def _process_and_store(
|
| 19 |
+
document_id: str,
|
| 20 |
+
user_id: str,
|
| 21 |
+
user_jwt: str,
|
| 22 |
+
chunks_coro,
|
| 23 |
+
) -> None:
|
| 24 |
+
"""Background task: process document, embed chunks, update status."""
|
| 25 |
+
client = get_supabase_client(user_jwt)
|
| 26 |
+
try:
|
| 27 |
+
chunks = await chunks_coro
|
| 28 |
+
await insert_chunks(client, document_id, user_id, chunks)
|
| 29 |
+
client.table("documents").update({"status": "ready"}).eq("id", document_id).execute()
|
| 30 |
+
except Exception:
|
| 31 |
+
client.table("documents").update({"status": "error"}).eq("id", document_id).execute()
|
| 32 |
+
raise
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
@router.post("/upload", status_code=status.HTTP_202_ACCEPTED)
|
| 36 |
+
async def upload_document(
|
| 37 |
+
background_tasks: BackgroundTasks,
|
| 38 |
+
file: UploadFile,
|
| 39 |
+
auth: tuple = Depends(get_user_id),
|
| 40 |
+
) -> dict:
|
| 41 |
+
user_jwt, user_id = auth
|
| 42 |
+
|
| 43 |
+
if file.content_type not in ("application/pdf", "application/octet-stream"):
|
| 44 |
+
raise HTTPException(status_code=400, detail="Only PDF files are supported")
|
| 45 |
+
|
| 46 |
+
file_bytes = await file.read()
|
| 47 |
+
client = get_supabase_client(user_jwt)
|
| 48 |
+
|
| 49 |
+
# Insert document row immediately with status=processing
|
| 50 |
+
result = (
|
| 51 |
+
client.table("documents")
|
| 52 |
+
.insert(
|
| 53 |
+
{
|
| 54 |
+
"user_id": user_id,
|
| 55 |
+
"title": file.filename or "Untitled",
|
| 56 |
+
"source_type": "pdf",
|
| 57 |
+
"status": "processing",
|
| 58 |
+
}
|
| 59 |
+
)
|
| 60 |
+
.execute()
|
| 61 |
+
)
|
| 62 |
+
document_id: str = result.data[0]["id"]
|
| 63 |
+
|
| 64 |
+
background_tasks.add_task(
|
| 65 |
+
_process_and_store,
|
| 66 |
+
document_id,
|
| 67 |
+
user_id,
|
| 68 |
+
user_jwt,
|
| 69 |
+
process_pdf(file_bytes, file.filename or "document.pdf"),
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
return {"document_id": document_id, "status": "processing"}
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
@router.post("/ingest-url", status_code=status.HTTP_202_ACCEPTED)
|
| 76 |
+
async def ingest_url(
|
| 77 |
+
body: URLIngestRequest,
|
| 78 |
+
background_tasks: BackgroundTasks,
|
| 79 |
+
auth: tuple = Depends(get_user_id),
|
| 80 |
+
) -> dict:
|
| 81 |
+
user_jwt, user_id = auth
|
| 82 |
+
client = get_supabase_client(user_jwt)
|
| 83 |
+
|
| 84 |
+
result = (
|
| 85 |
+
client.table("documents")
|
| 86 |
+
.insert(
|
| 87 |
+
{
|
| 88 |
+
"user_id": user_id,
|
| 89 |
+
"title": body.title or body.url,
|
| 90 |
+
"source_type": "url",
|
| 91 |
+
"source_url": body.url,
|
| 92 |
+
"status": "processing",
|
| 93 |
+
}
|
| 94 |
+
)
|
| 95 |
+
.execute()
|
| 96 |
+
)
|
| 97 |
+
document_id: str = result.data[0]["id"]
|
| 98 |
+
|
| 99 |
+
background_tasks.add_task(
|
| 100 |
+
_process_and_store,
|
| 101 |
+
document_id,
|
| 102 |
+
user_id,
|
| 103 |
+
user_jwt,
|
| 104 |
+
process_url(body.url),
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
return {"document_id": document_id, "status": "processing"}
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
@router.get("")
|
| 111 |
+
async def list_documents(auth: tuple = Depends(get_user_id)) -> list[dict]:
|
| 112 |
+
user_jwt, _ = auth
|
| 113 |
+
client = get_supabase_client(user_jwt)
|
| 114 |
+
result = (
|
| 115 |
+
client.table("documents")
|
| 116 |
+
.select("id, title, source_type, source_url, status, created_at")
|
| 117 |
+
.order("created_at", desc=True)
|
| 118 |
+
.execute()
|
| 119 |
+
)
|
| 120 |
+
return result.data or []
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
@router.delete("/{document_id}", status_code=status.HTTP_204_NO_CONTENT)
|
| 124 |
+
async def delete_document(
|
| 125 |
+
document_id: str,
|
| 126 |
+
auth: tuple = Depends(get_user_id),
|
| 127 |
+
) -> None:
|
| 128 |
+
user_jwt, _ = auth
|
| 129 |
+
client = get_supabase_client(user_jwt)
|
| 130 |
+
# RLS ensures only the owner can delete
|
| 131 |
+
client.table("documents").delete().eq("id", document_id).execute()
|
app/api/routes/health.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter
|
| 2 |
+
|
| 3 |
+
router = APIRouter()
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@router.get("/health")
|
| 7 |
+
async def health() -> dict:
|
| 8 |
+
return {"status": "ok"}
|
app/core/__init__.py
ADDED
|
File without changes
|
app/core/config.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class Settings(BaseSettings):
|
| 5 |
+
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
|
| 6 |
+
|
| 7 |
+
SUPABASE_URL: str
|
| 8 |
+
SUPABASE_ANON_KEY: str
|
| 9 |
+
SUPABASE_JWT_SECRET: str
|
| 10 |
+
ALLOWED_ORIGINS: str = "http://localhost:5173"
|
| 11 |
+
|
| 12 |
+
@property
|
| 13 |
+
def origins_list(self) -> list[str]:
|
| 14 |
+
return [o.strip() for o in self.ALLOWED_ORIGINS.split(",") if o.strip()]
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
settings = Settings()
|
app/core/security.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
from fastapi import Header, HTTPException, status
|
| 4 |
+
from jose import JWTError, jwt
|
| 5 |
+
|
| 6 |
+
from .config import settings
|
| 7 |
+
|
| 8 |
+
_GROQ_KEY_RE = re.compile(r"^gsk_[a-zA-Z0-9]{50,}$")
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def verify_jwt(token: str) -> dict:
|
| 12 |
+
"""Decode and validate a Supabase JWT. Raises 401 on failure."""
|
| 13 |
+
try:
|
| 14 |
+
payload = jwt.decode(
|
| 15 |
+
token,
|
| 16 |
+
settings.SUPABASE_JWT_SECRET,
|
| 17 |
+
algorithms=["HS256"],
|
| 18 |
+
audience="authenticated",
|
| 19 |
+
)
|
| 20 |
+
return payload
|
| 21 |
+
except JWTError as exc:
|
| 22 |
+
raise HTTPException(
|
| 23 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 24 |
+
detail="Invalid or expired token",
|
| 25 |
+
) from exc
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def get_user_id(authorization: str = Header(...)) -> tuple[str, str]:
|
| 29 |
+
"""
|
| 30 |
+
Extract bearer token and user_id from the Authorization header.
|
| 31 |
+
Returns (raw_token, user_id).
|
| 32 |
+
"""
|
| 33 |
+
if not authorization.startswith("Bearer "):
|
| 34 |
+
raise HTTPException(
|
| 35 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 36 |
+
detail="Authorization header must start with 'Bearer '",
|
| 37 |
+
)
|
| 38 |
+
token = authorization.removeprefix("Bearer ").strip()
|
| 39 |
+
payload = verify_jwt(token)
|
| 40 |
+
user_id: str | None = payload.get("sub")
|
| 41 |
+
if not user_id:
|
| 42 |
+
raise HTTPException(
|
| 43 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 44 |
+
detail="Token missing subject claim",
|
| 45 |
+
)
|
| 46 |
+
return token, user_id
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def validate_llm_key(x_llm_key: str = Header(...)) -> str:
|
| 50 |
+
"""Validate the Groq API key format. Never persisted."""
|
| 51 |
+
if not _GROQ_KEY_RE.match(x_llm_key):
|
| 52 |
+
raise HTTPException(
|
| 53 |
+
status_code=status.HTTP_400_BAD_REQUEST,
|
| 54 |
+
detail="Invalid LLM key format",
|
| 55 |
+
)
|
| 56 |
+
return x_llm_key
|
app/main.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
+
|
| 4 |
+
from .api.routes.chat import router as chat_router
|
| 5 |
+
from .api.routes.documents import router as documents_router
|
| 6 |
+
from .api.routes.health import router as health_router
|
| 7 |
+
from .core.config import settings
|
| 8 |
+
|
| 9 |
+
app = FastAPI(
|
| 10 |
+
title="RAGnarok API",
|
| 11 |
+
description="Production-ready RAG backend with Supabase + Groq",
|
| 12 |
+
version="1.0.0",
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
app.add_middleware(
|
| 16 |
+
CORSMiddleware,
|
| 17 |
+
allow_origins=settings.origins_list,
|
| 18 |
+
allow_credentials=True,
|
| 19 |
+
allow_methods=["*"],
|
| 20 |
+
allow_headers=["*"],
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
app.include_router(health_router)
|
| 24 |
+
app.include_router(documents_router)
|
| 25 |
+
app.include_router(chat_router)
|
app/services/__init__.py
ADDED
|
File without changes
|
app/services/document_processor.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
from dataclasses import dataclass
|
| 3 |
+
|
| 4 |
+
import httpx
|
| 5 |
+
from bs4 import BeautifulSoup
|
| 6 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 7 |
+
from pypdf import PdfReader
|
| 8 |
+
|
| 9 |
+
CHUNK_SIZE = 1000
|
| 10 |
+
CHUNK_OVERLAP = 200
|
| 11 |
+
|
| 12 |
+
_splitter = RecursiveCharacterTextSplitter(
|
| 13 |
+
chunk_size=CHUNK_SIZE,
|
| 14 |
+
chunk_overlap=CHUNK_OVERLAP,
|
| 15 |
+
separators=["\n\n", "\n", " ", ""],
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@dataclass
|
| 20 |
+
class TextChunk:
|
| 21 |
+
content: str
|
| 22 |
+
metadata: dict
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def _split_text(text: str, base_metadata: dict) -> list[TextChunk]:
|
| 26 |
+
raw_chunks = _splitter.split_text(text)
|
| 27 |
+
return [
|
| 28 |
+
TextChunk(content=chunk, metadata={**base_metadata, "chunk_index": i})
|
| 29 |
+
for i, chunk in enumerate(raw_chunks)
|
| 30 |
+
if chunk.strip()
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
async def process_pdf(file_bytes: bytes, filename: str) -> list[TextChunk]:
|
| 35 |
+
"""Extract text from a PDF and split into chunks."""
|
| 36 |
+
reader = PdfReader(io.BytesIO(file_bytes))
|
| 37 |
+
pages_text: list[str] = []
|
| 38 |
+
for page_num, page in enumerate(reader.pages):
|
| 39 |
+
text = page.extract_text() or ""
|
| 40 |
+
if text.strip():
|
| 41 |
+
pages_text.append(text)
|
| 42 |
+
|
| 43 |
+
full_text = "\n\n".join(pages_text)
|
| 44 |
+
return _split_text(full_text, {"source": filename, "source_type": "pdf"})
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
async def process_url(url: str) -> list[TextChunk]:
|
| 48 |
+
"""Fetch a URL, strip boilerplate HTML, and split into chunks."""
|
| 49 |
+
async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client:
|
| 50 |
+
response = await client.get(url, headers={"User-Agent": "RAGnarok/1.0"})
|
| 51 |
+
response.raise_for_status()
|
| 52 |
+
|
| 53 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
| 54 |
+
|
| 55 |
+
# Remove noisy elements
|
| 56 |
+
for tag in soup(["script", "style", "nav", "footer", "header", "aside", "form"]):
|
| 57 |
+
tag.decompose()
|
| 58 |
+
|
| 59 |
+
text = soup.get_text(separator="\n", strip=True)
|
| 60 |
+
return _split_text(text, {"source": url, "source_type": "url"})
|
app/services/embeddings.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from functools import lru_cache
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
from sentence_transformers import SentenceTransformer
|
| 5 |
+
|
| 6 |
+
MODEL_NAME = "all-MiniLM-L6-v2"
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
@lru_cache(maxsize=1)
|
| 10 |
+
def _get_model() -> SentenceTransformer:
|
| 11 |
+
return SentenceTransformer(MODEL_NAME)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def embed_text(text: str) -> list[float]:
|
| 15 |
+
"""Embed a single piece of text and return a normalised float list."""
|
| 16 |
+
model = _get_model()
|
| 17 |
+
embedding: np.ndarray = model.encode(text, normalize_embeddings=True)
|
| 18 |
+
return embedding.tolist()
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def embed_batch(texts: list[str]) -> list[list[float]]:
|
| 22 |
+
"""Embed multiple texts in one forward pass."""
|
| 23 |
+
model = _get_model()
|
| 24 |
+
embeddings: np.ndarray = model.encode(texts, normalize_embeddings=True, batch_size=32)
|
| 25 |
+
return embeddings.tolist()
|
app/services/llm.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections.abc import AsyncGenerator
|
| 2 |
+
|
| 3 |
+
from groq import AsyncGroq
|
| 4 |
+
|
| 5 |
+
MODEL = "llama-3.1-8b-instant"
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
async def stream_completion(
|
| 9 |
+
groq_key: str,
|
| 10 |
+
messages: list[dict],
|
| 11 |
+
) -> AsyncGenerator[str, None]:
|
| 12 |
+
"""
|
| 13 |
+
Stream a chat completion from Groq.
|
| 14 |
+
Yields text delta strings as they arrive.
|
| 15 |
+
The key is used ephemerally and never stored.
|
| 16 |
+
"""
|
| 17 |
+
client = AsyncGroq(api_key=groq_key)
|
| 18 |
+
stream = await client.chat.completions.create(
|
| 19 |
+
model=MODEL,
|
| 20 |
+
messages=messages,
|
| 21 |
+
stream=True,
|
| 22 |
+
temperature=0.2,
|
| 23 |
+
max_tokens=1024,
|
| 24 |
+
)
|
| 25 |
+
async for chunk in stream:
|
| 26 |
+
delta = chunk.choices[0].delta.content
|
| 27 |
+
if delta:
|
| 28 |
+
yield delta
|
app/services/rag.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections.abc import AsyncGenerator
|
| 2 |
+
|
| 3 |
+
from .embeddings import embed_text
|
| 4 |
+
from .llm import stream_completion
|
| 5 |
+
from .vector_store import get_supabase_client, match_documents_rpc
|
| 6 |
+
|
| 7 |
+
SYSTEM_PROMPT = (
|
| 8 |
+
"You are a helpful AI assistant. Answer the user's question using ONLY the provided "
|
| 9 |
+
"context sources. If the answer is not in the sources, say so clearly. "
|
| 10 |
+
"Cite sources by their number, e.g. [Source 1]."
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
async def rag_stream(
|
| 15 |
+
user_jwt: str,
|
| 16 |
+
groq_key: str,
|
| 17 |
+
question: str,
|
| 18 |
+
match_count: int = 5,
|
| 19 |
+
match_threshold: float = 0.5,
|
| 20 |
+
) -> AsyncGenerator[str, None]:
|
| 21 |
+
"""
|
| 22 |
+
Full RAG pipeline:
|
| 23 |
+
1. Embed the question
|
| 24 |
+
2. Retrieve matching chunks via RPC (RLS enforced)
|
| 25 |
+
3. Build prompt with sources
|
| 26 |
+
4. Stream Groq response back to the caller
|
| 27 |
+
"""
|
| 28 |
+
# Step 1 — embed query
|
| 29 |
+
query_embedding = embed_text(question)
|
| 30 |
+
|
| 31 |
+
# Step 2 — vector search (user's data only thanks to RLS)
|
| 32 |
+
client = get_supabase_client(user_jwt)
|
| 33 |
+
chunks = await match_documents_rpc(client, query_embedding, match_count, match_threshold)
|
| 34 |
+
|
| 35 |
+
# Step 3 — build prompt
|
| 36 |
+
if chunks:
|
| 37 |
+
context_parts = [
|
| 38 |
+
f"[Source {i + 1}] (similarity: {c['similarity']:.2f})\n{c['content']}"
|
| 39 |
+
for i, c in enumerate(chunks)
|
| 40 |
+
]
|
| 41 |
+
context_block = "\n\n---\n\n".join(context_parts)
|
| 42 |
+
user_message = f"Context:\n{context_block}\n\nQuestion: {question}"
|
| 43 |
+
else:
|
| 44 |
+
user_message = (
|
| 45 |
+
f"No relevant documents were found in your knowledge base.\n\nQuestion: {question}"
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
messages = [
|
| 49 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 50 |
+
{"role": "user", "content": user_message},
|
| 51 |
+
]
|
| 52 |
+
|
| 53 |
+
# Step 4 — stream LLM response
|
| 54 |
+
async for token in stream_completion(groq_key, messages):
|
| 55 |
+
yield token
|
app/services/vector_store.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from supabase import Client, create_client
|
| 2 |
+
|
| 3 |
+
from ..core.config import settings
|
| 4 |
+
from .document_processor import TextChunk
|
| 5 |
+
from .embeddings import embed_batch
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def get_supabase_client(user_jwt: str) -> Client:
|
| 9 |
+
"""
|
| 10 |
+
Create a Supabase client authenticated as the end user.
|
| 11 |
+
Setting the auth header activates RLS policies.
|
| 12 |
+
"""
|
| 13 |
+
client: Client = create_client(settings.SUPABASE_URL, settings.SUPABASE_ANON_KEY)
|
| 14 |
+
client.postgrest.auth(user_jwt)
|
| 15 |
+
return client
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
async def insert_chunks(
|
| 19 |
+
client: Client,
|
| 20 |
+
document_id: str,
|
| 21 |
+
user_id: str,
|
| 22 |
+
chunks: list[TextChunk],
|
| 23 |
+
) -> None:
|
| 24 |
+
"""Embed all chunks and bulk-insert them into document_chunks."""
|
| 25 |
+
if not chunks:
|
| 26 |
+
return
|
| 27 |
+
|
| 28 |
+
texts = [c.content for c in chunks]
|
| 29 |
+
embeddings = embed_batch(texts)
|
| 30 |
+
|
| 31 |
+
rows = [
|
| 32 |
+
{
|
| 33 |
+
"document_id": document_id,
|
| 34 |
+
"user_id": user_id,
|
| 35 |
+
"content": chunk.content,
|
| 36 |
+
"metadata": chunk.metadata,
|
| 37 |
+
"embedding": embedding,
|
| 38 |
+
}
|
| 39 |
+
for chunk, embedding in zip(chunks, embeddings)
|
| 40 |
+
]
|
| 41 |
+
|
| 42 |
+
client.table("document_chunks").insert(rows).execute()
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
async def match_documents_rpc(
|
| 46 |
+
client: Client,
|
| 47 |
+
query_embedding: list[float],
|
| 48 |
+
match_count: int = 5,
|
| 49 |
+
match_threshold: float = 0.5,
|
| 50 |
+
) -> list[dict]:
|
| 51 |
+
"""Call the match_documents Postgres function via RPC."""
|
| 52 |
+
result = client.rpc(
|
| 53 |
+
"match_documents",
|
| 54 |
+
{
|
| 55 |
+
"query_embedding": query_embedding,
|
| 56 |
+
"match_count": match_count,
|
| 57 |
+
"match_threshold": match_threshold,
|
| 58 |
+
},
|
| 59 |
+
).execute()
|
| 60 |
+
return result.data or []
|
requirements.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.111.0
|
| 2 |
+
uvicorn[standard]==0.30.1
|
| 3 |
+
python-multipart==0.0.9
|
| 4 |
+
pydantic-settings==2.3.4
|
| 5 |
+
python-jose[cryptography]==3.3.0
|
| 6 |
+
supabase==2.5.0
|
| 7 |
+
sentence-transformers==3.0.1
|
| 8 |
+
groq==0.9.0
|
| 9 |
+
httpx==0.27.0
|
| 10 |
+
beautifulsoup4==4.12.3
|
| 11 |
+
pypdf==4.2.0
|
| 12 |
+
langchain-text-splitters==0.2.2
|
| 13 |
+
python-dotenv==1.0.1
|