| | import streamlit as st |
| | from langchain_community.document_loaders import PDFPlumberLoader |
| | from langchain_text_splitters import RecursiveCharacterTextSplitter |
| | from langchain_core.prompts import PromptTemplate |
| | import os |
| | import tempfile |
| | from langchain_groq import ChatGroq |
| | from dotenv import load_dotenv |
| |
|
| | |
| | MAX_DOC_LENGTH = 4000 |
| |
|
| | def process_pdf(uploaded_file): |
| | try: |
| | if not uploaded_file: |
| | return "Error: No file uploaded." |
| |
|
| | |
| | with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: |
| | temp_file.write(uploaded_file.read()) |
| | temp_path = temp_file.name |
| |
|
| | |
| | loader = PDFPlumberLoader(temp_path) |
| | result = loader.load() |
| |
|
| | |
| | splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=20) |
| | split_docs = splitter.split_documents(result) |
| |
|
| | |
| | document_text = "\n".join([doc.page_content for doc in split_docs]) |
| | document_text = document_text[:MAX_DOC_LENGTH] |
| |
|
| | |
| | os.remove(temp_path) |
| |
|
| | return document_text |
| | except Exception as e: |
| | return f"Error processing PDF: {str(e)}" |
| |
|
| | def initialize_llm(): |
| | """Initializes the LLM with error handling for unavailable models.""" |
| | load_dotenv() |
| | groq_api_key = os.getenv("Groq_API_Key") |
| | if not groq_api_key: |
| | st.error("GROQ_API_KEY environment variable is missing.") |
| | return None |
| |
|
| | try: |
| | return ChatGroq( |
| | model="llama3-8b-8192", |
| | temperature=0.7, |
| | api_key=groq_api_key, |
| | verbose=False |
| | ) |
| | except Exception as e: |
| | st.error(f"Error initializing LLM: {str(e)}") |
| | return None |
| |
|
| | def create_prompt(): |
| | """Creates a structured prompt template for document-based Q&A.""" |
| | return PromptTemplate( |
| | input_variables=["document", "question"], |
| | template=( |
| | "You are an AI assistant that provides precise answers based on the given document. " |
| | "Use only the information available in the document to respond.\n\n" |
| | "Document:\n{document}\n\n" |
| | "Question: {question}\n" |
| | "Answer:" |
| | ) |
| | ) |
| |
|
| | def generate_answer(chain, document_text, user_input): |
| | """Generates an answer from the LLM while handling API errors.""" |
| | try: |
| | response = chain.invoke({"document": document_text, "question": user_input}) |
| | answer = response.content |
| | return str(answer) |
| | except Exception as e: |
| | error_message = str(e).lower() |
| | if "rate_limit_exceeded" in error_message: |
| | return "β οΈ Error: Rate limit exceeded. Try again later." |
| | elif "context_length_exceeded" in error_message: |
| | return "β οΈ Error: Input too long. Please shorten your document or question." |
| | elif "model_not_found" in error_message or "model_decommissioned" in error_message: |
| | return "β οΈ Error: Selected model is unavailable. Please try a different one." |
| | return f"β οΈ Error generating answer: {str(e)}" |
| |
|
| | def main(): |
| | """Streamlit UI""" |
| | st.set_page_config(page_title="Ask My PDF", layout="wide") |
| |
|
| | st.title("π Ask My PDF") |
| |
|
| | with st.sidebar: |
| | st.header("π Upload PDF") |
| | uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"]) |
| |
|
| | if uploaded_file: |
| | st.success("β
File uploaded successfully!") |
| |
|
| | user_input = st.text_area("π¬ Enter your question:", placeholder="Ask something about the document...") |
| |
|
| | if st.button("Get Answer", use_container_width=True): |
| | if not uploaded_file: |
| | st.warning("β οΈ Please upload a PDF document.") |
| | elif not user_input.strip(): |
| | st.warning("β οΈ Please enter a question.") |
| | else: |
| | document_text = process_pdf(uploaded_file) |
| | if isinstance(document_text, str) and document_text.startswith("Error"): |
| | st.error(document_text) |
| | else: |
| | llm = initialize_llm() |
| | if llm: |
| | prompt = create_prompt() |
| | chain = prompt | llm |
| | answer = generate_answer(chain, document_text, user_input) |
| | st.subheader("π Answer:") |
| | st.markdown(f"> {answer}") |
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|