Spaces:

parthtamu
/

rag-code-assistant

Sleeping

rag-code-assistant / src /rag_code_assistant /agent.py

0-Parth-D

Fixed API key error

a1934d7 16 days ago

8.66 kB

	import os
	import uvicorn
	from dotenv import load_dotenv

	load_dotenv()

	from fastapi.security import APIKeyHeader
	from fastapi import FastAPI, UploadFile, File, Security, HTTPException, status, Depends
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import StreamingResponse
	from pydantic import BaseModel

	from langchain_pinecone import PineconeVectorStore # Changed from Chroma
	from langchain_ollama import ChatOllama
	from langchain_groq import ChatGroq
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_core.tools.retriever import create_retriever_tool
	from langchain.agents import create_agent
	from langchain_core.messages import HumanMessage, AIMessage, AIMessageChunk

	from langchain_community.document_loaders import PyPDFLoader
	from langchain_text_splitters import RecursiveCharacterTextSplitter

	import fast_tokenizer
	from pathlib import Path
	from langchain_community.document_loaders import UnstructuredHTMLLoader, UnstructuredMarkdownLoader, TextLoader, BSHTMLLoader

	def load_vectorstore():
	embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
	return PineconeVectorStore(
	index_name="rag-agent",
	embedding=embeddings,
	pinecone_api_key=os.environ["PINECONE_API_KEY"],
	)

	def load_llm():
	# 1. Use .get() so it doesn't crash if the variable is missing
	ollama_url = os.environ.get("OLLAMA_BASE_URL")

	# 2. If the URL exists (like on your laptop), use Ollama
	if ollama_url:
	print("🔧 Using local Ollama LLM (Development Mode)")
	return ChatOllama(
	model="llama3.1",
	temperature=0.1,
	base_url=ollama_url,
	)

	# 3. If it doesn't exist (like on Hugging Face), fall back to Groq
	groq_api_key = os.environ.get("GROQ_API_KEY")
	if not groq_api_key:
	raise ValueError(
	"Neither OLLAMA_BASE_URL nor GROQ_API_KEY found! "
	"Please set one in your environment variables."
	)

	print("☁️ Using Groq Cloud LLM (Production Mode)")
	return ChatGroq(
	api_key=groq_api_key,
	model_name="llama-3.3-70b-versatile",
	temperature=0.1
	)

	def load_retriever(vectorstore):
	# Kept exactly as you wrote it
	return vectorstore.as_retriever(
	search_type="mmr",
	search_kwargs={"k": 4, "fetch_k": 20}
	)

	def load_retriever_tool(retriever):
	# Kept exactly as you wrote it
	return create_retriever_tool(
	retriever,
	"rag_retriever",
	description="Retrieve relevant documents from the RAG database of programming languages documentations. Don't output raw JSON in your final answer."
	)

	def load_agent(tools, llm):
	# Kept exactly as you wrote it
	system_prompt = (
	"You are an expert all in one assistant. Follow these rules strictly:\n\n"
	"1. PYTHON QUESTIONS: YOU MUST use tools to search for the answer.\n"
	"2. GREETINGS: If the user says 'Hi' or 'Hello', respond warmly and ask how you can help with Python. DO NOT use the tool.\n"
	"3. OFF-TOPIC QUESTIONS: If the user asks a non-coding question (e.g., trivia, history), answer it briefly using your own knowledge, then politely steer the conversation back to Python. DO NOT use the tool.\n\n"
	"STRICT CONSTRAINTS:\n"
	"- NEVER output raw JSON in your final answer.\n"
	"- NEVER explain your internal workings or mention the terms 'tool', 'database', or 'training data' to the user.\n"
	"- NEVER apologize or say 'I am just an AI' or 'I don't have direct access'."
	)

	llm_with_tools = llm.bind_tools(tools)

	return create_agent(
	model=llm_with_tools,
	tools=tools,
	system_prompt=system_prompt,
	)


	# --- FASTAPI SETUP & GLOBAL INITIALIZATION ---

	app = FastAPI(title="Python RAG Agent API")

	# 1. Define the name of the header we expect
	api_key_header = APIKeyHeader(name="X-API-Key")

	# 2. Get your secret password from environment variables
	SECRET_APP_KEY = os.environ["APP_API_KEY"]

	# 3. Create the security function
	def verify_api_key(api_key: str = Security(api_key_header)):
	if api_key != SECRET_APP_KEY:
	raise HTTPException(
	status_code=status.HTTP_401_UNAUTHORIZED,
	detail="Invalid or missing API Key"
	)

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Initialize your agent once when the server starts
	vectorstore = load_vectorstore()
	llm = load_llm()
	retriever = load_retriever(vectorstore)
	retriever_tool = load_retriever_tool(retriever)
	tools = [retriever_tool]
	agent = load_agent(tools, llm)


	# --- API ENDPOINTS ---

	class ChatRequest(BaseModel):
	message: str
	history: list[dict] = [] # Allows UI to send previous messages

	@app.post("/chat", dependencies=[Depends(verify_api_key)])
	async def chat_endpoint(request: ChatRequest):
	# 1. Build the chat history array from the UI's request
	chat_history = []
	for msg in request.history:
	if msg["role"] == "user":
	chat_history.append(HumanMessage(content=msg["content"]))
	else:
	chat_history.append(AIMessage(content=msg["content"]))

	chat_history.append(HumanMessage(content=request.message))

	# 2. Wrap your exact original streaming logic in a generator function
	async def generate_stream():
	try:
	for chunk, metadata in agent.stream(
	{"messages": chat_history},
	stream_mode="messages",
	):
	if isinstance(chunk, AIMessageChunk) and chunk.content:
	yield chunk.content
	except Exception as e:
	yield f"\n[Error]: {e}"

	# 3. Stream the output to the Vercel frontend
	return StreamingResponse(generate_stream(), media_type="text/event-stream")


	# 1. Add your custom token length function back
	def custom_token_length(text):
	tokens = fast_tokenizer.tokenize(text)
	return len(tokens)

	@app.post("/upload", dependencies=[Depends(verify_api_key)])
	async def upload_document(file: UploadFile = File(...)):
	"""Accepts PDF, HTML, MD, and TXT files and uploads them to Pinecone using fast_tokenizer."""

	ext = Path(file.filename).suffix.lower()

	supported_extensions = [".pdf", ".html", ".htm", ".md", ".txt"]
	if ext not in supported_extensions:
	return {"error": f"Unsupported file type. Please upload one of: {', '.join(supported_extensions)}"}

	temp_file_path = f"temp_{file.filename}"
	with open(temp_file_path, "wb") as f:
	f.write(await file.read())

	try:
	if ext == ".pdf":
	loader = PyPDFLoader(temp_file_path)
	docs = loader.load()

	elif ext in [".html", ".htm"]:
	try:
	loader = UnstructuredHTMLLoader(temp_file_path)
	docs = loader.load()
	except Exception as e:
	print(f"Warning: UnstructuredHTMLLoader failed, trying BSHTMLLoader: {e}")
	loader = BSHTMLLoader(temp_file_path)
	docs = loader.load()

	elif ext == ".md":
	loader = UnstructuredMarkdownLoader(temp_file_path)
	docs = loader.load()

	elif ext == ".txt":
	loader = TextLoader(temp_file_path)
	docs = loader.load()

	# 2. Re-implement your exact RecursiveCharacterTextSplitter settings
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=350, # Max 350 tokens per chunk
	chunk_overlap=50, # Overlap of 50 tokens
	length_function=custom_token_length # Tells LangChain to use your C++ tool
	)

	splits = text_splitter.split_documents(docs)

	# 3. Upload the perfectly tokenized chunks to Pinecone
	vectorstore.add_documents(splits)

	return {
	"status": "success",
	"message": f"Successfully processed {file.filename} into {len(splits)} chunks and uploaded to Pinecone."
	}

	except Exception as e:
	return {"error": f"Failed to process file: {str(e)}"}

	finally:
	# Clean up temp file
	if os.path.exists(temp_file_path):
	os.remove(temp_file_path)


	if __name__ == "__main__":
	# Runs the API server on port 7860 (Required for Hugging Face Spaces)
	print("\n" + "="*50)
	print("🐍 Python RAG API Initialized on Port 7860")
	print("="*50 + "\n")
	uvicorn.run("agent:app", host="0.0.0.0", port=7860, reload=True)