Spaces:

Legal-Assistant
/

LawBot

Running

File size: 4,799 Bytes

b82f276

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "356e8efe-9836-42f4-b2b2-38513a0a573a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import streamlit as st\n",
    "from PyPDF2 import PdfReader\n",
    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
    "import os\n",
    "from langchain_google_genai import GoogleGenerativeAIEmbeddings\n",
    "import google.generativeai as genai\n",
    "from langchain.vectorstores import FAISS\n",
    "from langchain_google_genai import ChatGoogleGenerativeAI\n",
    "from langchain.chains.question_answering import load_qa_chain\n",
    "from langchain.prompts import PromptTemplate\n",
    "from dotenv import load_dotenv\n",
    "\n",
    "load_dotenv()\n",
    "os.getenv(\"GOOGLE_API_KEY\")\n",
    "genai.configure(api_key=os.getenv(\"GOOGLE_API_KEY\"))\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "def get_pdf_text(pdf_docs):\n",
    "    text=\"\"\n",
    "    for pdf in pdf_docs:\n",
    "        pdf_reader= PdfReader(pdf)\n",
    "        for page in pdf_reader.pages:\n",
    "            text+= page.extract_text()\n",
    "    return  text\n",
    "\n",
    "\n",
    "\n",
    "def get_text_chunks(text):\n",
    "    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)\n",
    "    chunks = text_splitter.split_text(text)\n",
    "    return chunks\n",
    "\n",
    "\n",
    "def get_vector_store(text_chunks):\n",
    "    embeddings = GoogleGenerativeAIEmbeddings(model = \"models/embedding-001\")\n",
    "    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)\n",
    "    vector_store.save_local(\"faiss_index\")\n",
    "\n",
    "\n",
    "def get_conversational_chain():\n",
    "\n",
    "    prompt_template = \"\"\"\n",
    "    Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in\n",
    "    provided context just say, \"answer is not available in the context\", don't provide the wrong answer\\n\\n\n",
    "    Context:\\n {context}?\\n\n",
    "    Question: \\n{question}\\n\n",
    "\n",
    "    Answer:\n",
    "    \"\"\"\n",
    "\n",
    "    model = ChatGoogleGenerativeAI(model=\"gemini-pro\",\n",
    "                             temperature=0.3)\n",
    "\n",
    "    prompt = PromptTemplate(template = prompt_template, input_variables = [\"context\", \"question\"])\n",
    "    chain = load_qa_chain(model, chain_type=\"stuff\", prompt=prompt)\n",
    "\n",
    "    return chain\n",
    "\n",
    "\n",
    "\n",
    "def user_input(user_question):\n",
    "    embeddings = GoogleGenerativeAIEmbeddings(model = \"models/embedding-001\")\n",
    "    \n",
    "    new_db = FAISS.load_local(\"faiss_index\", embeddings)\n",
    "    docs = new_db.similarity_search(user_question)\n",
    "\n",
    "    chain = get_conversational_chain()\n",
    "\n",
    "    \n",
    "    response = chain(\n",
    "        {\"input_documents\":docs, \"question\": user_question}\n",
    "        , return_only_outputs=True)\n",
    "\n",
    "    print(response)\n",
    "    st.write(\"Reply: \", response[\"output_text\"])\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "def main():\n",
    "    st.set_page_config(\"Chat PDF\")\n",
    "    st.header(\"Chat with PDF using Gemini💁\")\n",
    "\n",
    "    user_question = st.text_input(\"Ask a Question from the PDF Files\")\n",
    "\n",
    "    if user_question:\n",
    "        user_input(user_question)\n",
    "\n",
    "    with st.sidebar:\n",
    "        st.title(\"Menu:\")\n",
    "        pdf_docs = st.file_uploader(\"Upload your PDF Files and Click on the Submit & Process Button\", accept_multiple_files=True)\n",
    "        if st.button(\"Submit & Process\"):\n",
    "            with st.spinner(\"Processing...\"):\n",
    "                raw_text = get_pdf_text(pdf_docs)\n",
    "                text_chunks = get_text_chunks(raw_text)\n",
    "                get_vector_store(text_chunks)\n",
    "                st.success(\"Done\")\n",
    "\n",
    "\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    main()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}