import os import streamlit as st import pandas as pd from langchain_community.vectorstores import FAISS from langchain_openai import OpenAIEmbeddings from langchain_openai.chat_models import ChatOpenAI from langchain.memory import ConversationBufferMemory from langchain.chains import ConversationalRetrievalChain from langchain.text_splitter import CharacterTextSplitter from time import sleep OPENAI_API_KEY = "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" os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY # Initialize variables vectorstore = None conversation_chain = None chat_history = [] # Function to process uploaded CSV file def process_csv(csv_file): try: df = pd.read_csv(csv_file, encoding='latin-1') # Specify encoding as latin-1 or ISO-8859-1 text = df.to_string(index=False) text_chunks = get_text_chunks(text) vectorstore = get_vectorstore(text_chunks) if vectorstore: conversation_chain = get_conversation_chain(vectorstore) return conversation_chain else: st.error("Failed to create vectorstore. Rate limit exceeded. Please try again later.") return None except Exception as e: st.error(f"Error processing CSV file: {e}") return None # Function to split text into chunks def get_text_chunks(text): text_splitter = CharacterTextSplitter( separator="\n", chunk_size=2000, # Increased chunk size for larger datasets chunk_overlap=40, # Increased chunk overlap for larger datasets length_function=len ) chunks = text_splitter.split_text(text) return chunks # Function to create vectorstore from text chunks def get_vectorstore(text_chunks): retries = 5 # Increased number of retries for larger datasets for i in range(retries): try: embeddings = OpenAIEmbeddings() vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings) return vectorstore except Exception as e: st.warning(f"Retry {i+1}/{retries}: Waiting for 20 seconds due to rate limit exceeded.") sleep(20) return None # Function to create conversation chain def get_conversation_chain(vectorstore): llm = ChatOpenAI() memory = ConversationBufferMemory( memory_key='chat_history', return_messages=True) conversation_chain = ConversationalRetrievalChain.from_llm( llm=llm, retriever=vectorstore.as_retriever(), # This might still raise an error if vectorstore is None memory=memory ) return conversation_chain # Streamlit app def main(): global vectorstore, conversation_chain, chat_history st.title('CSV Chatbot') # Page to upload CSV file st.subheader('Upload CSV File') csv_file = st.file_uploader('Upload CSV', type=['csv']) if csv_file: conversation_chain = process_csv(csv_file) if conversation_chain: # Chat interface st.subheader('Chat Interface') user_question = st.text_input('Ask a question:') if st.button('Ask'): # Split the user question into smaller parts questions = user_question.split('?') for question in questions: response = conversation_chain.invoke({'question': question.strip()}) if 'chat_history' in response: chat_history = response['chat_history'] for message in chat_history: if isinstance(message, dict) and 'role' in message and 'content' in message: if message['role'] == 'user': st.write(f"You: {message['content']}") elif message['role'] == 'assistant': st.write(f"Assistant: {message['content']}") else: st.error("Failed to get response. Please try again.") else: st.error("Failed to process CSV file. Please try again.") if __name__ == '__main__': main()