Spaces:

Filosofii
/

RAGnarok-2

Sleeping

App Files Files Community

simonefilosofi commited on Feb 24

Commit

ebdf453

0 Parent(s):

feat: initial commit - upload local project to remote repository

Browse files

Files changed (19) hide show

.env.example +4 -0
Dockerfile +24 -0
app/__init__.py +0 -0
app/api/__init__.py +0 -0
app/api/routes/__init__.py +0 -0
app/api/routes/chat.py +45 -0
app/api/routes/documents.py +131 -0
app/api/routes/health.py +8 -0
app/core/__init__.py +0 -0
app/core/config.py +17 -0
app/core/security.py +56 -0
app/main.py +25 -0
app/services/__init__.py +0 -0
app/services/document_processor.py +60 -0
app/services/embeddings.py +25 -0
app/services/llm.py +28 -0
app/services/rag.py +55 -0
app/services/vector_store.py +60 -0
requirements.txt +13 -0

.env.example ADDED Viewed

	@@ -0,0 +1,4 @@

+SUPABASE_URL=https://your-project.supabase.co
+SUPABASE_ANON_KEY=your-anon-key
+SUPABASE_JWT_SECRET=your-jwt-secret
+ALLOWED_ORIGINS=http://localhost:5173,https://yourusername.github.io

Dockerfile ADDED Viewed

	@@ -0,0 +1,24 @@

+FROM python:3.11-slim
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    gcc \
+    g++ \
+    && rm -rf /var/lib/apt/lists/*
+# Install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Pre-download the embedding model so first requests are fast
+RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')"
+# Copy application code
+COPY app/ ./app/
+# HuggingFace Spaces expects port 7860
+EXPOSE 7860
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

app/__init__.py ADDED Viewed

File without changes

app/api/__init__.py ADDED Viewed

File without changes

app/api/routes/__init__.py ADDED Viewed

File without changes

app/api/routes/chat.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import json
+from fastapi import APIRouter, Depends, Header
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel
+from ...core.security import get_user_id, validate_llm_key
+from ...services.rag import rag_stream
+router = APIRouter(prefix="/chat", tags=["chat"])
+class ChatRequest(BaseModel):
+    question: str
+    session_id: str | None = None
+async def _event_stream(user_jwt: str, groq_key: str, question: str):
+    """Convert async generator tokens into SSE format."""
+    try:
+        async for token in rag_stream(user_jwt, groq_key, question):
+            data = json.dumps({"token": token})
+            yield f"data: {data}\n\n"
+        yield "data: [DONE]\n\n"
+    except Exception as exc:
+        error_data = json.dumps({"error": str(exc)})
+        yield f"data: {error_data}\n\n"
+@router.post("/stream")
+async def chat_stream(
+    body: ChatRequest,
+    auth: tuple = Depends(get_user_id),
+    groq_key: str = Depends(validate_llm_key),
+) -> StreamingResponse:
+    user_jwt, _ = auth
+    return StreamingResponse(
+        _event_stream(user_jwt, groq_key, body.question),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "X-Accel-Buffering": "no",
+        },
+    )

app/api/routes/documents.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import asyncio
+from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, UploadFile, status
+from pydantic import BaseModel
+from ...core.security import get_user_id
+from ...services.document_processor import process_pdf, process_url
+from ...services.vector_store import get_supabase_client, insert_chunks
+router = APIRouter(prefix="/documents", tags=["documents"])
+class URLIngestRequest(BaseModel):
+    url: str
+    title: str | None = None
+async def _process_and_store(
+    document_id: str,
+    user_id: str,
+    user_jwt: str,
+    chunks_coro,
+) -> None:
+    """Background task: process document, embed chunks, update status."""
+    client = get_supabase_client(user_jwt)
+    try:
+        chunks = await chunks_coro
+        await insert_chunks(client, document_id, user_id, chunks)
+        client.table("documents").update({"status": "ready"}).eq("id", document_id).execute()
+    except Exception:
+        client.table("documents").update({"status": "error"}).eq("id", document_id).execute()
+        raise
+@router.post("/upload", status_code=status.HTTP_202_ACCEPTED)
+async def upload_document(
+    background_tasks: BackgroundTasks,
+    file: UploadFile,
+    auth: tuple = Depends(get_user_id),
+) -> dict:
+    user_jwt, user_id = auth
+    if file.content_type not in ("application/pdf", "application/octet-stream"):
+        raise HTTPException(status_code=400, detail="Only PDF files are supported")
+    file_bytes = await file.read()
+    client = get_supabase_client(user_jwt)
+    # Insert document row immediately with status=processing
+    result = (
+        client.table("documents")
+        .insert(
+            {
+                "user_id": user_id,
+                "title": file.filename or "Untitled",
+                "source_type": "pdf",
+                "status": "processing",
+            }
+        )
+        .execute()
+    )
+    document_id: str = result.data[0]["id"]
+    background_tasks.add_task(
+        _process_and_store,
+        document_id,
+        user_id,
+        user_jwt,
+        process_pdf(file_bytes, file.filename or "document.pdf"),
+    )
+    return {"document_id": document_id, "status": "processing"}
+@router.post("/ingest-url", status_code=status.HTTP_202_ACCEPTED)
+async def ingest_url(
+    body: URLIngestRequest,
+    background_tasks: BackgroundTasks,
+    auth: tuple = Depends(get_user_id),
+) -> dict:
+    user_jwt, user_id = auth
+    client = get_supabase_client(user_jwt)
+    result = (
+        client.table("documents")
+        .insert(
+            {
+                "user_id": user_id,
+                "title": body.title or body.url,
+                "source_type": "url",
+                "source_url": body.url,
+                "status": "processing",
+            }
+        )
+        .execute()
+    )
+    document_id: str = result.data[0]["id"]
+    background_tasks.add_task(
+        _process_and_store,
+        document_id,
+        user_id,
+        user_jwt,
+        process_url(body.url),
+    )
+    return {"document_id": document_id, "status": "processing"}
+@router.get("")
+async def list_documents(auth: tuple = Depends(get_user_id)) -> list[dict]:
+    user_jwt, _ = auth
+    client = get_supabase_client(user_jwt)
+    result = (
+        client.table("documents")
+        .select("id, title, source_type, source_url, status, created_at")
+        .order("created_at", desc=True)
+        .execute()
+    )
+    return result.data or []
+@router.delete("/{document_id}", status_code=status.HTTP_204_NO_CONTENT)
+async def delete_document(
+    document_id: str,
+    auth: tuple = Depends(get_user_id),
+) -> None:
+    user_jwt, _ = auth
+    client = get_supabase_client(user_jwt)
+    # RLS ensures only the owner can delete
+    client.table("documents").delete().eq("id", document_id).execute()

app/api/routes/health.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from fastapi import APIRouter
+router = APIRouter()
+@router.get("/health")
+async def health() -> dict:
+    return {"status": "ok"}

app/core/__init__.py ADDED Viewed

File without changes

app/core/config.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from pydantic_settings import BaseSettings, SettingsConfigDict
+class Settings(BaseSettings):
+    model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
+    SUPABASE_URL: str
+    SUPABASE_ANON_KEY: str
+    SUPABASE_JWT_SECRET: str
+    ALLOWED_ORIGINS: str = "http://localhost:5173"
+    @property
+    def origins_list(self) -> list[str]:
+        return [o.strip() for o in self.ALLOWED_ORIGINS.split(",") if o.strip()]
+settings = Settings()

app/core/security.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import re
+from fastapi import Header, HTTPException, status
+from jose import JWTError, jwt
+from .config import settings
+_GROQ_KEY_RE = re.compile(r"^gsk_[a-zA-Z0-9]{50,}$")
+def verify_jwt(token: str) -> dict:
+    """Decode and validate a Supabase JWT. Raises 401 on failure."""
+    try:
+        payload = jwt.decode(
+            token,
+            settings.SUPABASE_JWT_SECRET,
+            algorithms=["HS256"],
+            audience="authenticated",
+        )
+        return payload
+    except JWTError as exc:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Invalid or expired token",
+        ) from exc
+def get_user_id(authorization: str = Header(...)) -> tuple[str, str]:
+    """
+    Extract bearer token and user_id from the Authorization header.
+    Returns (raw_token, user_id).
+    """
+    if not authorization.startswith("Bearer "):
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Authorization header must start with 'Bearer '",
+        )
+    token = authorization.removeprefix("Bearer ").strip()
+    payload = verify_jwt(token)
+    user_id: str | None = payload.get("sub")
+    if not user_id:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Token missing subject claim",
+        )
+    return token, user_id
+def validate_llm_key(x_llm_key: str = Header(...)) -> str:
+    """Validate the Groq API key format. Never persisted."""
+    if not _GROQ_KEY_RE.match(x_llm_key):
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Invalid LLM key format",
+        )
+    return x_llm_key

app/main.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from .api.routes.chat import router as chat_router
+from .api.routes.documents import router as documents_router
+from .api.routes.health import router as health_router
+from .core.config import settings
+app = FastAPI(
+    title="RAGnarok API",
+    description="Production-ready RAG backend with Supabase + Groq",
+    version="1.0.0",
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=settings.origins_list,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+app.include_router(health_router)
+app.include_router(documents_router)
+app.include_router(chat_router)

app/services/__init__.py ADDED Viewed

File without changes

app/services/document_processor.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import io
+from dataclasses import dataclass
+import httpx
+from bs4 import BeautifulSoup
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from pypdf import PdfReader
+CHUNK_SIZE = 1000
+CHUNK_OVERLAP = 200
+_splitter = RecursiveCharacterTextSplitter(
+    chunk_size=CHUNK_SIZE,
+    chunk_overlap=CHUNK_OVERLAP,
+    separators=["\n\n", "\n", " ", ""],
+)
+@dataclass
+class TextChunk:
+    content: str
+    metadata: dict
+def _split_text(text: str, base_metadata: dict) -> list[TextChunk]:
+    raw_chunks = _splitter.split_text(text)
+    return [
+        TextChunk(content=chunk, metadata={**base_metadata, "chunk_index": i})
+        for i, chunk in enumerate(raw_chunks)
+        if chunk.strip()
+    ]
+async def process_pdf(file_bytes: bytes, filename: str) -> list[TextChunk]:
+    """Extract text from a PDF and split into chunks."""
+    reader = PdfReader(io.BytesIO(file_bytes))
+    pages_text: list[str] = []
+    for page_num, page in enumerate(reader.pages):
+        text = page.extract_text() or ""
+        if text.strip():
+            pages_text.append(text)
+    full_text = "\n\n".join(pages_text)
+    return _split_text(full_text, {"source": filename, "source_type": "pdf"})
+async def process_url(url: str) -> list[TextChunk]:
+    """Fetch a URL, strip boilerplate HTML, and split into chunks."""
+    async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client:
+        response = await client.get(url, headers={"User-Agent": "RAGnarok/1.0"})
+        response.raise_for_status()
+    soup = BeautifulSoup(response.text, "html.parser")
+    # Remove noisy elements
+    for tag in soup(["script", "style", "nav", "footer", "header", "aside", "form"]):
+        tag.decompose()
+    text = soup.get_text(separator="\n", strip=True)
+    return _split_text(text, {"source": url, "source_type": "url"})

app/services/embeddings.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from functools import lru_cache
+import numpy as np
+from sentence_transformers import SentenceTransformer
+MODEL_NAME = "all-MiniLM-L6-v2"
+@lru_cache(maxsize=1)
+def _get_model() -> SentenceTransformer:
+    return SentenceTransformer(MODEL_NAME)
+def embed_text(text: str) -> list[float]:
+    """Embed a single piece of text and return a normalised float list."""
+    model = _get_model()
+    embedding: np.ndarray = model.encode(text, normalize_embeddings=True)
+    return embedding.tolist()
+def embed_batch(texts: list[str]) -> list[list[float]]:
+    """Embed multiple texts in one forward pass."""
+    model = _get_model()
+    embeddings: np.ndarray = model.encode(texts, normalize_embeddings=True, batch_size=32)
+    return embeddings.tolist()

app/services/llm.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from collections.abc import AsyncGenerator
+from groq import AsyncGroq
+MODEL = "llama-3.1-8b-instant"
+async def stream_completion(
+    groq_key: str,
+    messages: list[dict],
+) -> AsyncGenerator[str, None]:
+    """
+    Stream a chat completion from Groq.
+    Yields text delta strings as they arrive.
+    The key is used ephemerally and never stored.
+    """
+    client = AsyncGroq(api_key=groq_key)
+    stream = await client.chat.completions.create(
+        model=MODEL,
+        messages=messages,
+        stream=True,
+        temperature=0.2,
+        max_tokens=1024,
+    )
+    async for chunk in stream:
+        delta = chunk.choices[0].delta.content
+        if delta:
+            yield delta

app/services/rag.py ADDED Viewed

	@@ -0,0 +1,55 @@

+from collections.abc import AsyncGenerator
+from .embeddings import embed_text
+from .llm import stream_completion
+from .vector_store import get_supabase_client, match_documents_rpc
+SYSTEM_PROMPT = (
+    "You are a helpful AI assistant. Answer the user's question using ONLY the provided "
+    "context sources. If the answer is not in the sources, say so clearly. "
+    "Cite sources by their number, e.g. [Source 1]."
+)
+async def rag_stream(
+    user_jwt: str,
+    groq_key: str,
+    question: str,
+    match_count: int = 5,
+    match_threshold: float = 0.5,
+) -> AsyncGenerator[str, None]:
+    """
+    Full RAG pipeline:
+    1. Embed the question
+    2. Retrieve matching chunks via RPC (RLS enforced)
+    3. Build prompt with sources
+    4. Stream Groq response back to the caller
+    """
+    # Step 1 — embed query
+    query_embedding = embed_text(question)
+    # Step 2 — vector search (user's data only thanks to RLS)
+    client = get_supabase_client(user_jwt)
+    chunks = await match_documents_rpc(client, query_embedding, match_count, match_threshold)
+    # Step 3 — build prompt
+    if chunks:
+        context_parts = [
+            f"[Source {i + 1}] (similarity: {c['similarity']:.2f})\n{c['content']}"
+            for i, c in enumerate(chunks)
+        ]
+        context_block = "\n\n---\n\n".join(context_parts)
+        user_message = f"Context:\n{context_block}\n\nQuestion: {question}"
+    else:
+        user_message = (
+            f"No relevant documents were found in your knowledge base.\n\nQuestion: {question}"
+        )
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user", "content": user_message},
+    ]
+    # Step 4 — stream LLM response
+    async for token in stream_completion(groq_key, messages):
+        yield token

app/services/vector_store.py ADDED Viewed

	@@ -0,0 +1,60 @@

+from supabase import Client, create_client
+from ..core.config import settings
+from .document_processor import TextChunk
+from .embeddings import embed_batch
+def get_supabase_client(user_jwt: str) -> Client:
+    """
+    Create a Supabase client authenticated as the end user.
+    Setting the auth header activates RLS policies.
+    """
+    client: Client = create_client(settings.SUPABASE_URL, settings.SUPABASE_ANON_KEY)
+    client.postgrest.auth(user_jwt)
+    return client
+async def insert_chunks(
+    client: Client,
+    document_id: str,
+    user_id: str,
+    chunks: list[TextChunk],
+) -> None:
+    """Embed all chunks and bulk-insert them into document_chunks."""
+    if not chunks:
+        return
+    texts = [c.content for c in chunks]
+    embeddings = embed_batch(texts)
+    rows = [
+        {
+            "document_id": document_id,
+            "user_id": user_id,
+            "content": chunk.content,
+            "metadata": chunk.metadata,
+            "embedding": embedding,
+        }
+        for chunk, embedding in zip(chunks, embeddings)
+    ]
+    client.table("document_chunks").insert(rows).execute()
+async def match_documents_rpc(
+    client: Client,
+    query_embedding: list[float],
+    match_count: int = 5,
+    match_threshold: float = 0.5,
+) -> list[dict]:
+    """Call the match_documents Postgres function via RPC."""
+    result = client.rpc(
+        "match_documents",
+        {
+            "query_embedding": query_embedding,
+            "match_count": match_count,
+            "match_threshold": match_threshold,
+        },
+    ).execute()
+    return result.data or []

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+fastapi==0.111.0
+uvicorn[standard]==0.30.1
+python-multipart==0.0.9
+pydantic-settings==2.3.4
+python-jose[cryptography]==3.3.0
+supabase==2.5.0
+sentence-transformers==3.0.1
+groq==0.9.0
+httpx==0.27.0
+beautifulsoup4==4.12.3
+pypdf==4.2.0
+langchain-text-splitters==0.2.2
+python-dotenv==1.0.1