| |
| import os |
| import logging |
| import gradio as gr |
| from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext |
| from llama_index.vector_stores.pinecone import PineconeVectorStore |
| from pinecone import Pinecone, ServerlessSpec |
|
|
| |
| logging.basicConfig(level=logging.INFO) |
|
|
|
|
| api_key = os.environ["PINECONE_API_KEY"] |
|
|
| |
| pc = Pinecone(api_key=api_key) |
| index_name = "quickstart" |
| dimension = 1536 |
|
|
| |
| if index_name in [idx['name'] for idx in pc.list_indexes()]: |
| pc.delete_index(index_name) |
|
|
| |
| pc.create_index( |
| name=index_name, |
| dimension=dimension, |
| metric="euclidean", |
| spec=ServerlessSpec(cloud="aws", region="us-east-1"), |
| ) |
|
|
| pinecone_index = pc.Index(index_name) |
|
|
| |
| os.makedirs("data/paul_graham", exist_ok=True) |
| file_path = "data/paul_graham/paul_graham_essay.txt" |
| if not os.path.exists(file_path): |
| import urllib.request |
| urllib.request.urlretrieve( |
| "https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt", |
| file_path |
| ) |
|
|
| |
| documents = SimpleDirectoryReader("data/paul_graham/").load_data() |
|
|
| |
| vector_store = PineconeVectorStore(pinecone_index=pinecone_index) |
| storage_context = StorageContext.from_defaults(vector_store=vector_store) |
| index = VectorStoreIndex.from_documents(documents, storage_context=storage_context) |
|
|
| query_engine = index.as_query_engine() |
|
|
| |
| def query_doc(prompt): |
| try: |
| response = query_engine.query(prompt) |
| return str(response) |
| except Exception as e: |
| return f"Error: {str(e)}" |
|
|
| |
| gr.Interface( |
| fn=query_doc, |
| inputs=gr.Textbox(label="Ask a question about the document"), |
| outputs=gr.Textbox(label="Answer"), |
| title="Paul Graham Document QA (LlamaIndex + Pinecone)", |
| description="Ask questions based on the indexed Paul Graham essay. Powered by LlamaIndex & Pinecone." |
| ).launch() |
|
|