0-Parth-D
Fixed API key error
a1934d7
import os
import uvicorn
from dotenv import load_dotenv
load_dotenv()
from fastapi.security import APIKeyHeader
from fastapi import FastAPI, UploadFile, File, Security, HTTPException, status, Depends
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from langchain_pinecone import PineconeVectorStore # Changed from Chroma
from langchain_ollama import ChatOllama
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.tools.retriever import create_retriever_tool
from langchain.agents import create_agent
from langchain_core.messages import HumanMessage, AIMessage, AIMessageChunk
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
import fast_tokenizer
from pathlib import Path
from langchain_community.document_loaders import UnstructuredHTMLLoader, UnstructuredMarkdownLoader, TextLoader, BSHTMLLoader
def load_vectorstore():
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
return PineconeVectorStore(
index_name="rag-agent",
embedding=embeddings,
pinecone_api_key=os.environ["PINECONE_API_KEY"],
)
def load_llm():
# 1. Use .get() so it doesn't crash if the variable is missing
ollama_url = os.environ.get("OLLAMA_BASE_URL")
# 2. If the URL exists (like on your laptop), use Ollama
if ollama_url:
print("🔧 Using local Ollama LLM (Development Mode)")
return ChatOllama(
model="llama3.1",
temperature=0.1,
base_url=ollama_url,
)
# 3. If it doesn't exist (like on Hugging Face), fall back to Groq
groq_api_key = os.environ.get("GROQ_API_KEY")
if not groq_api_key:
raise ValueError(
"Neither OLLAMA_BASE_URL nor GROQ_API_KEY found! "
"Please set one in your environment variables."
)
print("☁️ Using Groq Cloud LLM (Production Mode)")
return ChatGroq(
api_key=groq_api_key,
model_name="llama-3.3-70b-versatile",
temperature=0.1
)
def load_retriever(vectorstore):
# Kept exactly as you wrote it
return vectorstore.as_retriever(
search_type="mmr",
search_kwargs={"k": 4, "fetch_k": 20}
)
def load_retriever_tool(retriever):
# Kept exactly as you wrote it
return create_retriever_tool(
retriever,
"rag_retriever",
description="Retrieve relevant documents from the RAG database of programming languages documentations. Don't output raw JSON in your final answer."
)
def load_agent(tools, llm):
# Kept exactly as you wrote it
system_prompt = (
"You are an expert all in one assistant. Follow these rules strictly:\n\n"
"1. PYTHON QUESTIONS: YOU MUST use tools to search for the answer.\n"
"2. GREETINGS: If the user says 'Hi' or 'Hello', respond warmly and ask how you can help with Python. DO NOT use the tool.\n"
"3. OFF-TOPIC QUESTIONS: If the user asks a non-coding question (e.g., trivia, history), answer it briefly using your own knowledge, then politely steer the conversation back to Python. DO NOT use the tool.\n\n"
"STRICT CONSTRAINTS:\n"
"- NEVER output raw JSON in your final answer.\n"
"- NEVER explain your internal workings or mention the terms 'tool', 'database', or 'training data' to the user.\n"
"- NEVER apologize or say 'I am just an AI' or 'I don't have direct access'."
)
llm_with_tools = llm.bind_tools(tools)
return create_agent(
model=llm_with_tools,
tools=tools,
system_prompt=system_prompt,
)
# --- FASTAPI SETUP & GLOBAL INITIALIZATION ---
app = FastAPI(title="Python RAG Agent API")
# 1. Define the name of the header we expect
api_key_header = APIKeyHeader(name="X-API-Key")
# 2. Get your secret password from environment variables
SECRET_APP_KEY = os.environ["APP_API_KEY"]
# 3. Create the security function
def verify_api_key(api_key: str = Security(api_key_header)):
if api_key != SECRET_APP_KEY:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid or missing API Key"
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Initialize your agent once when the server starts
vectorstore = load_vectorstore()
llm = load_llm()
retriever = load_retriever(vectorstore)
retriever_tool = load_retriever_tool(retriever)
tools = [retriever_tool]
agent = load_agent(tools, llm)
# --- API ENDPOINTS ---
class ChatRequest(BaseModel):
message: str
history: list[dict] = [] # Allows UI to send previous messages
@app.post("/chat", dependencies=[Depends(verify_api_key)])
async def chat_endpoint(request: ChatRequest):
# 1. Build the chat history array from the UI's request
chat_history = []
for msg in request.history:
if msg["role"] == "user":
chat_history.append(HumanMessage(content=msg["content"]))
else:
chat_history.append(AIMessage(content=msg["content"]))
chat_history.append(HumanMessage(content=request.message))
# 2. Wrap your exact original streaming logic in a generator function
async def generate_stream():
try:
for chunk, metadata in agent.stream(
{"messages": chat_history},
stream_mode="messages",
):
if isinstance(chunk, AIMessageChunk) and chunk.content:
yield chunk.content
except Exception as e:
yield f"\n[Error]: {e}"
# 3. Stream the output to the Vercel frontend
return StreamingResponse(generate_stream(), media_type="text/event-stream")
# 1. Add your custom token length function back
def custom_token_length(text):
tokens = fast_tokenizer.tokenize(text)
return len(tokens)
@app.post("/upload", dependencies=[Depends(verify_api_key)])
async def upload_document(file: UploadFile = File(...)):
"""Accepts PDF, HTML, MD, and TXT files and uploads them to Pinecone using fast_tokenizer."""
ext = Path(file.filename).suffix.lower()
supported_extensions = [".pdf", ".html", ".htm", ".md", ".txt"]
if ext not in supported_extensions:
return {"error": f"Unsupported file type. Please upload one of: {', '.join(supported_extensions)}"}
temp_file_path = f"temp_{file.filename}"
with open(temp_file_path, "wb") as f:
f.write(await file.read())
try:
if ext == ".pdf":
loader = PyPDFLoader(temp_file_path)
docs = loader.load()
elif ext in [".html", ".htm"]:
try:
loader = UnstructuredHTMLLoader(temp_file_path)
docs = loader.load()
except Exception as e:
print(f"Warning: UnstructuredHTMLLoader failed, trying BSHTMLLoader: {e}")
loader = BSHTMLLoader(temp_file_path)
docs = loader.load()
elif ext == ".md":
loader = UnstructuredMarkdownLoader(temp_file_path)
docs = loader.load()
elif ext == ".txt":
loader = TextLoader(temp_file_path)
docs = loader.load()
# 2. Re-implement your exact RecursiveCharacterTextSplitter settings
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=350, # Max 350 tokens per chunk
chunk_overlap=50, # Overlap of 50 tokens
length_function=custom_token_length # Tells LangChain to use your C++ tool
)
splits = text_splitter.split_documents(docs)
# 3. Upload the perfectly tokenized chunks to Pinecone
vectorstore.add_documents(splits)
return {
"status": "success",
"message": f"Successfully processed {file.filename} into {len(splits)} chunks and uploaded to Pinecone."
}
except Exception as e:
return {"error": f"Failed to process file: {str(e)}"}
finally:
# Clean up temp file
if os.path.exists(temp_file_path):
os.remove(temp_file_path)
if __name__ == "__main__":
# Runs the API server on port 7860 (Required for Hugging Face Spaces)
print("\n" + "="*50)
print("🐍 Python RAG API Initialized on Port 7860")
print("="*50 + "\n")
uvicorn.run("agent:app", host="0.0.0.0", port=7860, reload=True)