| | """ |
| | Vector Database Service implementation for Qdrant |
| | """ |
| |
|
| | from typing import List, Dict, Any, Optional |
| | from qdrant_client import QdrantClient |
| | from qdrant_client.models import PointStruct, VectorParams, Distance, Record |
| |
|
| |
|
| |
|
| | class VectorDatabaseClient: |
| | """Client for interacting with Qdrant vector database""" |
| | |
| | def __init__(self, url: str, api_key: str, collection_name: str, embedding_size: int): |
| | """Initialize Qdrant client and collection settings |
| | |
| | Args: |
| | url: Qdrant server URL |
| | api_key: API key for Qdrant |
| | collection_name: Name of the collection to use |
| | embedding_size: Size of embedding vectors |
| | """ |
| | self.client = QdrantClient(url=url, api_key=api_key) |
| | self.collection_name = collection_name |
| | self.embedding_size = embedding_size |
| | |
| | def ensure_collection_exists(self): |
| | """Ensure the collection exists, create it if it doesn't""" |
| | collections = [c.name for c in self.client.get_collections().collections] |
| | |
| | if self.collection_name not in collections: |
| | self.client.create_collection( |
| | collection_name=self.collection_name, |
| | vectors_config=VectorParams( |
| | size=self.embedding_size, |
| | distance=Distance.COSINE |
| | ) |
| | ) |
| | print(f"✅ Collection '{self.collection_name}' created.") |
| | else: |
| | print(f"ℹ️ Collection '{self.collection_name}' already exists.") |
| | |
| | def add_embedding(self, id: str, embedding: List[float], filename: str, metadata: Optional[str] = None) -> str: |
| | """Add an embedding to the collection |
| | |
| | Args: |
| | id: Unique ID for the point |
| | embedding: Vector embedding |
| | filename: Original filename |
| | metadata: Optional metadata as JSON string |
| | |
| | Returns: |
| | ID of the added point |
| | """ |
| | payload = {"filename": filename} |
| | if metadata: |
| | payload["metadata"] = metadata |
| | |
| | self.client.upsert( |
| | collection_name=self.collection_name, |
| | points=[ |
| | PointStruct( |
| | id=id, |
| | vector=embedding, |
| | payload=payload |
| | ) |
| | ] |
| | ) |
| | return id |
| | |
| | def add_embedding_with_payload(self, id: str, embedding: List[float], payload: Dict[str, Any]) -> str: |
| | """Add an embedding with a custom payload |
| | |
| | Args: |
| | id: Unique ID for the point |
| | embedding: Vector embedding |
| | payload: Dictionary of metadata to store |
| | |
| | Returns: |
| | ID of the added point |
| | """ |
| | self.client.upsert( |
| | collection_name=self.collection_name, |
| | points=[ |
| | PointStruct( |
| | id=id, |
| | vector=embedding, |
| | payload=payload |
| | ) |
| | ] |
| | ) |
| | return id |
| | |
| | def search_by_embedding(self, embedding: List[float], limit: int = 5) -> List[Record]: |
| | """Search for similar vectors |
| | |
| | Args: |
| | embedding: Query vector |
| | limit: Maximum number of results |
| | |
| | Returns: |
| | List of search results |
| | """ |
| | results = self.client.search( |
| | collection_name=self.collection_name, |
| | query_vector=embedding, |
| | limit=limit |
| | ) |
| | return results |
| | |
| | def search_by_id(self, id: str, limit: int = 1) -> List[Record]: |
| | """Search for similar vectors using an existing vector as query |
| | |
| | Args: |
| | id: ID of the existing vector to use as query |
| | limit: Maximum number of results |
| | |
| | Returns: |
| | List of search results |
| | """ |
| | |
| | vector = self.client.retrieve( |
| | collection_name=self.collection_name, |
| | ids=[id] |
| | ) |
| | |
| | if not vector or len(vector) == 0: |
| | return [] |
| | |
| | |
| | return self.search_by_embedding(vector[0].vector, limit) |
| | |
| | def delete_embedding(self, id: str) -> bool: |
| | """Delete an embedding from the collection |
| | |
| | Args: |
| | id: ID of the embedding to delete |
| | |
| | Returns: |
| | True if deleted, False if not found |
| | """ |
| | self.client.delete( |
| | collection_name=self.collection_name, |
| | points_selector=[id] |
| | ) |
| | return True |
| | |
| | def list_collections(self) -> List[str]: |
| | """List all collections in the database |
| | |
| | Returns: |
| | List of collection names |
| | """ |
| | return [c.name for c in self.client.get_collections().collections] |