|
|
| import os |
| from langchain.text_splitter import CharacterTextSplitter |
| from langchain.document_loaders import TextLoader, DirectoryLoader |
| from langchain.embeddings import CohereEmbeddings |
| from langchain.embeddings import OpenAIEmbeddings |
| from langchain.vectorstores import Chroma |
| from langchain.llms import OpenAI |
| from langchain.llms import Cohere |
| from langchain.chains import RetrievalQA |
| from langchain import PromptTemplate |
|
|
| import streamlit as st |
|
|
| def ingest(file_path,embeddings): |
| loader = TextLoader(file_path) |
| documents = loader.load() |
| text_splitter = CharacterTextSplitter(chunk_size=1000) |
| docs = text_splitter.split_documents(documents) |
|
|
| persist_directory = file_path[:-4] |
| print('persist dict: ') |
| print(persist_directory) |
|
|
| vectordb = Chroma.from_documents(documents=docs, |
| embedding=embeddings, |
| persist_directory=persist_directory) |
| |
| vectordb.persist() |
| vectordb = None |
|
|
| with st.sidebar: |
| with st.form('Cohere/OpenAI'): |
| mod = st.radio('Choose OpenAI/Cohere', ('OpenAI', 'Cohere')) |
| api_key = st.text_input('Enter API key', type="password") |
| |
| submitted = st.form_submit_button("Submit") |
|
|
| if api_key: |
| if(mod=='OpenAI'): |
| os.environ["OPENAI_API_KEY"] = api_key |
| llm = OpenAI(temperature=0.7, verbose=True) |
| embeddings = OpenAIEmbeddings() |
| elif(mod=='Cohere'): |
| os.environ["COHERE_API_KEY"] = api_key |
| llm = Cohere(temperature=0.7, verbose=True) |
| embeddings = CohereEmbeddings() |
|
|
| uploaded_file = st.file_uploader("Upload a file to ingest", type=["txt"]) |
|
|
| if uploaded_file is not None: |
| file_contents = uploaded_file.read() |
| file_path = uploaded_file.name |
| |
| |
| print(file_path) |
| ingest(file_path,embeddings) |