simonefilosofi commited on
Commit
ebdf453
·
0 Parent(s):

feat: initial commit - upload local project to remote repository

Browse files
.env.example ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ SUPABASE_URL=https://your-project.supabase.co
2
+ SUPABASE_ANON_KEY=your-anon-key
3
+ SUPABASE_JWT_SECRET=your-jwt-secret
4
+ ALLOWED_ORIGINS=http://localhost:5173,https://yourusername.github.io
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y --no-install-recommends \
7
+ gcc \
8
+ g++ \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
+ # Install Python dependencies
12
+ COPY requirements.txt .
13
+ RUN pip install --no-cache-dir -r requirements.txt
14
+
15
+ # Pre-download the embedding model so first requests are fast
16
+ RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')"
17
+
18
+ # Copy application code
19
+ COPY app/ ./app/
20
+
21
+ # HuggingFace Spaces expects port 7860
22
+ EXPOSE 7860
23
+
24
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app/__init__.py ADDED
File without changes
app/api/__init__.py ADDED
File without changes
app/api/routes/__init__.py ADDED
File without changes
app/api/routes/chat.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ from fastapi import APIRouter, Depends, Header
4
+ from fastapi.responses import StreamingResponse
5
+ from pydantic import BaseModel
6
+
7
+ from ...core.security import get_user_id, validate_llm_key
8
+ from ...services.rag import rag_stream
9
+
10
+ router = APIRouter(prefix="/chat", tags=["chat"])
11
+
12
+
13
+ class ChatRequest(BaseModel):
14
+ question: str
15
+ session_id: str | None = None
16
+
17
+
18
+ async def _event_stream(user_jwt: str, groq_key: str, question: str):
19
+ """Convert async generator tokens into SSE format."""
20
+ try:
21
+ async for token in rag_stream(user_jwt, groq_key, question):
22
+ data = json.dumps({"token": token})
23
+ yield f"data: {data}\n\n"
24
+ yield "data: [DONE]\n\n"
25
+ except Exception as exc:
26
+ error_data = json.dumps({"error": str(exc)})
27
+ yield f"data: {error_data}\n\n"
28
+
29
+
30
+ @router.post("/stream")
31
+ async def chat_stream(
32
+ body: ChatRequest,
33
+ auth: tuple = Depends(get_user_id),
34
+ groq_key: str = Depends(validate_llm_key),
35
+ ) -> StreamingResponse:
36
+ user_jwt, _ = auth
37
+
38
+ return StreamingResponse(
39
+ _event_stream(user_jwt, groq_key, body.question),
40
+ media_type="text/event-stream",
41
+ headers={
42
+ "Cache-Control": "no-cache",
43
+ "X-Accel-Buffering": "no",
44
+ },
45
+ )
app/api/routes/documents.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+
3
+ from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, UploadFile, status
4
+ from pydantic import BaseModel
5
+
6
+ from ...core.security import get_user_id
7
+ from ...services.document_processor import process_pdf, process_url
8
+ from ...services.vector_store import get_supabase_client, insert_chunks
9
+
10
+ router = APIRouter(prefix="/documents", tags=["documents"])
11
+
12
+
13
+ class URLIngestRequest(BaseModel):
14
+ url: str
15
+ title: str | None = None
16
+
17
+
18
+ async def _process_and_store(
19
+ document_id: str,
20
+ user_id: str,
21
+ user_jwt: str,
22
+ chunks_coro,
23
+ ) -> None:
24
+ """Background task: process document, embed chunks, update status."""
25
+ client = get_supabase_client(user_jwt)
26
+ try:
27
+ chunks = await chunks_coro
28
+ await insert_chunks(client, document_id, user_id, chunks)
29
+ client.table("documents").update({"status": "ready"}).eq("id", document_id).execute()
30
+ except Exception:
31
+ client.table("documents").update({"status": "error"}).eq("id", document_id).execute()
32
+ raise
33
+
34
+
35
+ @router.post("/upload", status_code=status.HTTP_202_ACCEPTED)
36
+ async def upload_document(
37
+ background_tasks: BackgroundTasks,
38
+ file: UploadFile,
39
+ auth: tuple = Depends(get_user_id),
40
+ ) -> dict:
41
+ user_jwt, user_id = auth
42
+
43
+ if file.content_type not in ("application/pdf", "application/octet-stream"):
44
+ raise HTTPException(status_code=400, detail="Only PDF files are supported")
45
+
46
+ file_bytes = await file.read()
47
+ client = get_supabase_client(user_jwt)
48
+
49
+ # Insert document row immediately with status=processing
50
+ result = (
51
+ client.table("documents")
52
+ .insert(
53
+ {
54
+ "user_id": user_id,
55
+ "title": file.filename or "Untitled",
56
+ "source_type": "pdf",
57
+ "status": "processing",
58
+ }
59
+ )
60
+ .execute()
61
+ )
62
+ document_id: str = result.data[0]["id"]
63
+
64
+ background_tasks.add_task(
65
+ _process_and_store,
66
+ document_id,
67
+ user_id,
68
+ user_jwt,
69
+ process_pdf(file_bytes, file.filename or "document.pdf"),
70
+ )
71
+
72
+ return {"document_id": document_id, "status": "processing"}
73
+
74
+
75
+ @router.post("/ingest-url", status_code=status.HTTP_202_ACCEPTED)
76
+ async def ingest_url(
77
+ body: URLIngestRequest,
78
+ background_tasks: BackgroundTasks,
79
+ auth: tuple = Depends(get_user_id),
80
+ ) -> dict:
81
+ user_jwt, user_id = auth
82
+ client = get_supabase_client(user_jwt)
83
+
84
+ result = (
85
+ client.table("documents")
86
+ .insert(
87
+ {
88
+ "user_id": user_id,
89
+ "title": body.title or body.url,
90
+ "source_type": "url",
91
+ "source_url": body.url,
92
+ "status": "processing",
93
+ }
94
+ )
95
+ .execute()
96
+ )
97
+ document_id: str = result.data[0]["id"]
98
+
99
+ background_tasks.add_task(
100
+ _process_and_store,
101
+ document_id,
102
+ user_id,
103
+ user_jwt,
104
+ process_url(body.url),
105
+ )
106
+
107
+ return {"document_id": document_id, "status": "processing"}
108
+
109
+
110
+ @router.get("")
111
+ async def list_documents(auth: tuple = Depends(get_user_id)) -> list[dict]:
112
+ user_jwt, _ = auth
113
+ client = get_supabase_client(user_jwt)
114
+ result = (
115
+ client.table("documents")
116
+ .select("id, title, source_type, source_url, status, created_at")
117
+ .order("created_at", desc=True)
118
+ .execute()
119
+ )
120
+ return result.data or []
121
+
122
+
123
+ @router.delete("/{document_id}", status_code=status.HTTP_204_NO_CONTENT)
124
+ async def delete_document(
125
+ document_id: str,
126
+ auth: tuple = Depends(get_user_id),
127
+ ) -> None:
128
+ user_jwt, _ = auth
129
+ client = get_supabase_client(user_jwt)
130
+ # RLS ensures only the owner can delete
131
+ client.table("documents").delete().eq("id", document_id).execute()
app/api/routes/health.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+
3
+ router = APIRouter()
4
+
5
+
6
+ @router.get("/health")
7
+ async def health() -> dict:
8
+ return {"status": "ok"}
app/core/__init__.py ADDED
File without changes
app/core/config.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic_settings import BaseSettings, SettingsConfigDict
2
+
3
+
4
+ class Settings(BaseSettings):
5
+ model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
6
+
7
+ SUPABASE_URL: str
8
+ SUPABASE_ANON_KEY: str
9
+ SUPABASE_JWT_SECRET: str
10
+ ALLOWED_ORIGINS: str = "http://localhost:5173"
11
+
12
+ @property
13
+ def origins_list(self) -> list[str]:
14
+ return [o.strip() for o in self.ALLOWED_ORIGINS.split(",") if o.strip()]
15
+
16
+
17
+ settings = Settings()
app/core/security.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ from fastapi import Header, HTTPException, status
4
+ from jose import JWTError, jwt
5
+
6
+ from .config import settings
7
+
8
+ _GROQ_KEY_RE = re.compile(r"^gsk_[a-zA-Z0-9]{50,}$")
9
+
10
+
11
+ def verify_jwt(token: str) -> dict:
12
+ """Decode and validate a Supabase JWT. Raises 401 on failure."""
13
+ try:
14
+ payload = jwt.decode(
15
+ token,
16
+ settings.SUPABASE_JWT_SECRET,
17
+ algorithms=["HS256"],
18
+ audience="authenticated",
19
+ )
20
+ return payload
21
+ except JWTError as exc:
22
+ raise HTTPException(
23
+ status_code=status.HTTP_401_UNAUTHORIZED,
24
+ detail="Invalid or expired token",
25
+ ) from exc
26
+
27
+
28
+ def get_user_id(authorization: str = Header(...)) -> tuple[str, str]:
29
+ """
30
+ Extract bearer token and user_id from the Authorization header.
31
+ Returns (raw_token, user_id).
32
+ """
33
+ if not authorization.startswith("Bearer "):
34
+ raise HTTPException(
35
+ status_code=status.HTTP_401_UNAUTHORIZED,
36
+ detail="Authorization header must start with 'Bearer '",
37
+ )
38
+ token = authorization.removeprefix("Bearer ").strip()
39
+ payload = verify_jwt(token)
40
+ user_id: str | None = payload.get("sub")
41
+ if not user_id:
42
+ raise HTTPException(
43
+ status_code=status.HTTP_401_UNAUTHORIZED,
44
+ detail="Token missing subject claim",
45
+ )
46
+ return token, user_id
47
+
48
+
49
+ def validate_llm_key(x_llm_key: str = Header(...)) -> str:
50
+ """Validate the Groq API key format. Never persisted."""
51
+ if not _GROQ_KEY_RE.match(x_llm_key):
52
+ raise HTTPException(
53
+ status_code=status.HTTP_400_BAD_REQUEST,
54
+ detail="Invalid LLM key format",
55
+ )
56
+ return x_llm_key
app/main.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+
4
+ from .api.routes.chat import router as chat_router
5
+ from .api.routes.documents import router as documents_router
6
+ from .api.routes.health import router as health_router
7
+ from .core.config import settings
8
+
9
+ app = FastAPI(
10
+ title="RAGnarok API",
11
+ description="Production-ready RAG backend with Supabase + Groq",
12
+ version="1.0.0",
13
+ )
14
+
15
+ app.add_middleware(
16
+ CORSMiddleware,
17
+ allow_origins=settings.origins_list,
18
+ allow_credentials=True,
19
+ allow_methods=["*"],
20
+ allow_headers=["*"],
21
+ )
22
+
23
+ app.include_router(health_router)
24
+ app.include_router(documents_router)
25
+ app.include_router(chat_router)
app/services/__init__.py ADDED
File without changes
app/services/document_processor.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ from dataclasses import dataclass
3
+
4
+ import httpx
5
+ from bs4 import BeautifulSoup
6
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
7
+ from pypdf import PdfReader
8
+
9
+ CHUNK_SIZE = 1000
10
+ CHUNK_OVERLAP = 200
11
+
12
+ _splitter = RecursiveCharacterTextSplitter(
13
+ chunk_size=CHUNK_SIZE,
14
+ chunk_overlap=CHUNK_OVERLAP,
15
+ separators=["\n\n", "\n", " ", ""],
16
+ )
17
+
18
+
19
+ @dataclass
20
+ class TextChunk:
21
+ content: str
22
+ metadata: dict
23
+
24
+
25
+ def _split_text(text: str, base_metadata: dict) -> list[TextChunk]:
26
+ raw_chunks = _splitter.split_text(text)
27
+ return [
28
+ TextChunk(content=chunk, metadata={**base_metadata, "chunk_index": i})
29
+ for i, chunk in enumerate(raw_chunks)
30
+ if chunk.strip()
31
+ ]
32
+
33
+
34
+ async def process_pdf(file_bytes: bytes, filename: str) -> list[TextChunk]:
35
+ """Extract text from a PDF and split into chunks."""
36
+ reader = PdfReader(io.BytesIO(file_bytes))
37
+ pages_text: list[str] = []
38
+ for page_num, page in enumerate(reader.pages):
39
+ text = page.extract_text() or ""
40
+ if text.strip():
41
+ pages_text.append(text)
42
+
43
+ full_text = "\n\n".join(pages_text)
44
+ return _split_text(full_text, {"source": filename, "source_type": "pdf"})
45
+
46
+
47
+ async def process_url(url: str) -> list[TextChunk]:
48
+ """Fetch a URL, strip boilerplate HTML, and split into chunks."""
49
+ async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client:
50
+ response = await client.get(url, headers={"User-Agent": "RAGnarok/1.0"})
51
+ response.raise_for_status()
52
+
53
+ soup = BeautifulSoup(response.text, "html.parser")
54
+
55
+ # Remove noisy elements
56
+ for tag in soup(["script", "style", "nav", "footer", "header", "aside", "form"]):
57
+ tag.decompose()
58
+
59
+ text = soup.get_text(separator="\n", strip=True)
60
+ return _split_text(text, {"source": url, "source_type": "url"})
app/services/embeddings.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import lru_cache
2
+
3
+ import numpy as np
4
+ from sentence_transformers import SentenceTransformer
5
+
6
+ MODEL_NAME = "all-MiniLM-L6-v2"
7
+
8
+
9
+ @lru_cache(maxsize=1)
10
+ def _get_model() -> SentenceTransformer:
11
+ return SentenceTransformer(MODEL_NAME)
12
+
13
+
14
+ def embed_text(text: str) -> list[float]:
15
+ """Embed a single piece of text and return a normalised float list."""
16
+ model = _get_model()
17
+ embedding: np.ndarray = model.encode(text, normalize_embeddings=True)
18
+ return embedding.tolist()
19
+
20
+
21
+ def embed_batch(texts: list[str]) -> list[list[float]]:
22
+ """Embed multiple texts in one forward pass."""
23
+ model = _get_model()
24
+ embeddings: np.ndarray = model.encode(texts, normalize_embeddings=True, batch_size=32)
25
+ return embeddings.tolist()
app/services/llm.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections.abc import AsyncGenerator
2
+
3
+ from groq import AsyncGroq
4
+
5
+ MODEL = "llama-3.1-8b-instant"
6
+
7
+
8
+ async def stream_completion(
9
+ groq_key: str,
10
+ messages: list[dict],
11
+ ) -> AsyncGenerator[str, None]:
12
+ """
13
+ Stream a chat completion from Groq.
14
+ Yields text delta strings as they arrive.
15
+ The key is used ephemerally and never stored.
16
+ """
17
+ client = AsyncGroq(api_key=groq_key)
18
+ stream = await client.chat.completions.create(
19
+ model=MODEL,
20
+ messages=messages,
21
+ stream=True,
22
+ temperature=0.2,
23
+ max_tokens=1024,
24
+ )
25
+ async for chunk in stream:
26
+ delta = chunk.choices[0].delta.content
27
+ if delta:
28
+ yield delta
app/services/rag.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections.abc import AsyncGenerator
2
+
3
+ from .embeddings import embed_text
4
+ from .llm import stream_completion
5
+ from .vector_store import get_supabase_client, match_documents_rpc
6
+
7
+ SYSTEM_PROMPT = (
8
+ "You are a helpful AI assistant. Answer the user's question using ONLY the provided "
9
+ "context sources. If the answer is not in the sources, say so clearly. "
10
+ "Cite sources by their number, e.g. [Source 1]."
11
+ )
12
+
13
+
14
+ async def rag_stream(
15
+ user_jwt: str,
16
+ groq_key: str,
17
+ question: str,
18
+ match_count: int = 5,
19
+ match_threshold: float = 0.5,
20
+ ) -> AsyncGenerator[str, None]:
21
+ """
22
+ Full RAG pipeline:
23
+ 1. Embed the question
24
+ 2. Retrieve matching chunks via RPC (RLS enforced)
25
+ 3. Build prompt with sources
26
+ 4. Stream Groq response back to the caller
27
+ """
28
+ # Step 1 — embed query
29
+ query_embedding = embed_text(question)
30
+
31
+ # Step 2 — vector search (user's data only thanks to RLS)
32
+ client = get_supabase_client(user_jwt)
33
+ chunks = await match_documents_rpc(client, query_embedding, match_count, match_threshold)
34
+
35
+ # Step 3 — build prompt
36
+ if chunks:
37
+ context_parts = [
38
+ f"[Source {i + 1}] (similarity: {c['similarity']:.2f})\n{c['content']}"
39
+ for i, c in enumerate(chunks)
40
+ ]
41
+ context_block = "\n\n---\n\n".join(context_parts)
42
+ user_message = f"Context:\n{context_block}\n\nQuestion: {question}"
43
+ else:
44
+ user_message = (
45
+ f"No relevant documents were found in your knowledge base.\n\nQuestion: {question}"
46
+ )
47
+
48
+ messages = [
49
+ {"role": "system", "content": SYSTEM_PROMPT},
50
+ {"role": "user", "content": user_message},
51
+ ]
52
+
53
+ # Step 4 — stream LLM response
54
+ async for token in stream_completion(groq_key, messages):
55
+ yield token
app/services/vector_store.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from supabase import Client, create_client
2
+
3
+ from ..core.config import settings
4
+ from .document_processor import TextChunk
5
+ from .embeddings import embed_batch
6
+
7
+
8
+ def get_supabase_client(user_jwt: str) -> Client:
9
+ """
10
+ Create a Supabase client authenticated as the end user.
11
+ Setting the auth header activates RLS policies.
12
+ """
13
+ client: Client = create_client(settings.SUPABASE_URL, settings.SUPABASE_ANON_KEY)
14
+ client.postgrest.auth(user_jwt)
15
+ return client
16
+
17
+
18
+ async def insert_chunks(
19
+ client: Client,
20
+ document_id: str,
21
+ user_id: str,
22
+ chunks: list[TextChunk],
23
+ ) -> None:
24
+ """Embed all chunks and bulk-insert them into document_chunks."""
25
+ if not chunks:
26
+ return
27
+
28
+ texts = [c.content for c in chunks]
29
+ embeddings = embed_batch(texts)
30
+
31
+ rows = [
32
+ {
33
+ "document_id": document_id,
34
+ "user_id": user_id,
35
+ "content": chunk.content,
36
+ "metadata": chunk.metadata,
37
+ "embedding": embedding,
38
+ }
39
+ for chunk, embedding in zip(chunks, embeddings)
40
+ ]
41
+
42
+ client.table("document_chunks").insert(rows).execute()
43
+
44
+
45
+ async def match_documents_rpc(
46
+ client: Client,
47
+ query_embedding: list[float],
48
+ match_count: int = 5,
49
+ match_threshold: float = 0.5,
50
+ ) -> list[dict]:
51
+ """Call the match_documents Postgres function via RPC."""
52
+ result = client.rpc(
53
+ "match_documents",
54
+ {
55
+ "query_embedding": query_embedding,
56
+ "match_count": match_count,
57
+ "match_threshold": match_threshold,
58
+ },
59
+ ).execute()
60
+ return result.data or []
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.111.0
2
+ uvicorn[standard]==0.30.1
3
+ python-multipart==0.0.9
4
+ pydantic-settings==2.3.4
5
+ python-jose[cryptography]==3.3.0
6
+ supabase==2.5.0
7
+ sentence-transformers==3.0.1
8
+ groq==0.9.0
9
+ httpx==0.27.0
10
+ beautifulsoup4==4.12.3
11
+ pypdf==4.2.0
12
+ langchain-text-splitters==0.2.2
13
+ python-dotenv==1.0.1