import os
import streamlit as st
import pandas as pd
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_openai.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.text_splitter import CharacterTextSplitter
from time import sleep

OPENAI_API_KEY = "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"
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

# Initialize variables
vectorstore = None
conversation_chain = None
chat_history = []

# Function to process uploaded CSV file
def process_csv(csv_file):
    try:
        df = pd.read_csv(csv_file, encoding='latin-1')  # Specify encoding as latin-1 or ISO-8859-1
        text = df.to_string(index=False)
        text_chunks = get_text_chunks(text)
        vectorstore = get_vectorstore(text_chunks)
        if vectorstore:
            conversation_chain = get_conversation_chain(vectorstore)
            return conversation_chain
        else:
            st.error("Failed to create vectorstore. Rate limit exceeded. Please try again later.")
            return None
    except Exception as e:
        st.error(f"Error processing CSV file: {e}")
        return None


# Function to split text into chunks
def get_text_chunks(text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=2000,  # Increased chunk size for larger datasets
        chunk_overlap=40,  # Increased chunk overlap for larger datasets
        length_function=len
    )
    chunks = text_splitter.split_text(text)
    return chunks

# Function to create vectorstore from text chunks
def get_vectorstore(text_chunks):
    retries = 5  # Increased number of retries for larger datasets
    for i in range(retries):
        try:
            embeddings = OpenAIEmbeddings()
            vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
            return vectorstore
        except Exception as e:
            st.warning(f"Retry {i+1}/{retries}: Waiting for 20 seconds due to rate limit exceeded.")
            sleep(20)
    return None

# Function to create conversation chain
def get_conversation_chain(vectorstore):
    llm = ChatOpenAI()
    memory = ConversationBufferMemory(
        memory_key='chat_history', return_messages=True)
    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(),  # This might still raise an error if vectorstore is None
        memory=memory
    )
    return conversation_chain

# Streamlit app
def main():
    global vectorstore, conversation_chain, chat_history

    st.title('CSV Chatbot')

    # Page to upload CSV file
    st.subheader('Upload CSV File')
    csv_file = st.file_uploader('Upload CSV', type=['csv'])

    if csv_file:
        conversation_chain = process_csv(csv_file)

        if conversation_chain:
            # Chat interface
            st.subheader('Chat Interface')
            user_question = st.text_input('Ask a question:')
            if st.button('Ask'):
                # Split the user question into smaller parts
                questions = user_question.split('?')
                for question in questions:
                    response = conversation_chain.invoke({'question': question.strip()})
                    if 'chat_history' in response:
                        chat_history = response['chat_history']
                        for message in chat_history:
                            if isinstance(message, dict) and 'role' in message and 'content' in message:
                                if message['role'] == 'user':
                                    st.write(f"You: {message['content']}")
                                elif message['role'] == 'assistant':
                                    st.write(f"Assistant: {message['content']}")
                    else:
                        st.error("Failed to get response. Please try again.")
        else:
            st.error("Failed to process CSV file. Please try again.")

if __name__ == '__main__':
    main()