Spaces:
Runtime error
Runtime error
[KM-437][DB] db pipeline+ metadata
#1
by rhbt6767 - opened
- .gitattributes +0 -1
- .gitignore +1 -4
- main.py +0 -2
- pyproject.toml +6 -7
- src/api/v1/db_client.py +3 -355
- src/api/v1/document.py +130 -24
- src/config/settings.py +0 -5
- src/database_client/database_client_service.py +0 -118
- src/db/postgres/init_db.py +1 -8
- src/db/postgres/models.py +0 -16
- src/knowledge/processing_service.py +56 -100
- src/models/credentials.py +0 -164
- src/pipeline/db_pipeline/__init__.py +0 -3
- src/pipeline/db_pipeline/db_pipeline_service.py +0 -197
- src/pipeline/db_pipeline/extractor.py +0 -213
- src/pipeline/document_pipeline/__init__.py +0 -0
- src/pipeline/document_pipeline/document_pipeline.py +0 -80
- src/utils/db_credential_encryption.py +0 -70
- uv.lock +1 -369
.gitattributes
CHANGED
|
@@ -33,4 +33,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
-
software/** filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
.gitignore
CHANGED
|
@@ -26,8 +26,6 @@ test/users/user_accounts.csv
|
|
| 26 |
.env.prd
|
| 27 |
.env.example
|
| 28 |
|
| 29 |
-
CLAUDE.md
|
| 30 |
-
|
| 31 |
erd/
|
| 32 |
playground/
|
| 33 |
playground_retriever.py
|
|
@@ -35,5 +33,4 @@ playground_chat.py
|
|
| 35 |
playground_flush_cache.py
|
| 36 |
playground_create_user.py
|
| 37 |
API_CONTRACT.md
|
| 38 |
-
context_engineering/
|
| 39 |
-
sample_file/
|
|
|
|
| 26 |
.env.prd
|
| 27 |
.env.example
|
| 28 |
|
|
|
|
|
|
|
| 29 |
erd/
|
| 30 |
playground/
|
| 31 |
playground_retriever.py
|
|
|
|
| 33 |
playground_flush_cache.py
|
| 34 |
playground_create_user.py
|
| 35 |
API_CONTRACT.md
|
| 36 |
+
context_engineering/
|
|
|
main.py
CHANGED
|
@@ -10,7 +10,6 @@ from src.api.v1.chat import router as chat_router
|
|
| 10 |
from src.api.v1.room import router as room_router
|
| 11 |
from src.api.v1.users import router as users_router
|
| 12 |
from src.api.v1.knowledge import router as knowledge_router
|
| 13 |
-
from src.api.v1.db_client import router as db_client_router
|
| 14 |
from src.db.postgres.init_db import init_db
|
| 15 |
import uvicorn
|
| 16 |
|
|
@@ -36,7 +35,6 @@ app.include_router(document_router)
|
|
| 36 |
app.include_router(knowledge_router)
|
| 37 |
app.include_router(room_router)
|
| 38 |
app.include_router(chat_router)
|
| 39 |
-
app.include_router(db_client_router)
|
| 40 |
|
| 41 |
|
| 42 |
@app.on_event("startup")
|
|
|
|
| 10 |
from src.api.v1.room import router as room_router
|
| 11 |
from src.api.v1.users import router as users_router
|
| 12 |
from src.api.v1.knowledge import router as knowledge_router
|
|
|
|
| 13 |
from src.db.postgres.init_db import init_db
|
| 14 |
import uvicorn
|
| 15 |
|
|
|
|
| 35 |
app.include_router(knowledge_router)
|
| 36 |
app.include_router(room_router)
|
| 37 |
app.include_router(chat_router)
|
|
|
|
| 38 |
|
| 39 |
|
| 40 |
@app.on_event("startup")
|
pyproject.toml
CHANGED
|
@@ -79,13 +79,12 @@ dependencies = [
|
|
| 79 |
"jsonpatch>=1.33",
|
| 80 |
"pymongo>=4.14.0",
|
| 81 |
"psycopg2>=2.9.11",
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
"
|
| 87 |
-
"
|
| 88 |
-
"pdf24.0",
|
| 89 |
"pytest-cov==6.0.0",
|
| 90 |
"httpx==0.28.1",
|
| 91 |
"ruff==0.8.4",
|
|
|
|
| 79 |
"jsonpatch>=1.33",
|
| 80 |
"pymongo>=4.14.0",
|
| 81 |
"psycopg2>=2.9.11",
|
| 82 |
+
]
|
| 83 |
+
|
| 84 |
+
[project.optional-dependencies]
|
| 85 |
+
dev = [
|
| 86 |
+
"pytest==8.3.4",
|
| 87 |
+
"pytest-asyncio==0.24.0",
|
|
|
|
| 88 |
"pytest-cov==6.0.0",
|
| 89 |
"httpx==0.28.1",
|
| 90 |
"ruff==0.8.4",
|
src/api/v1/db_client.py
CHANGED
|
@@ -1,357 +1,5 @@
|
|
| 1 |
-
|
| 2 |
|
| 3 |
-
Credential schemas (DbType, PostgresCredentials, etc.) live in
|
| 4 |
-
`src/models/credentials.py` — they are imported below (with noqa: F401) so
|
| 5 |
-
FastAPI/Swagger picks them up for OpenAPI schema generation even though they
|
| 6 |
-
are not referenced by name in this file.
|
| 7 |
-
"""
|
| 8 |
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
|
| 13 |
-
from pydantic import BaseModel, Field
|
| 14 |
-
from sqlalchemy.ext.asyncio import AsyncSession
|
| 15 |
-
|
| 16 |
-
from src.database_client.database_client_service import database_client_service
|
| 17 |
-
from src.db.postgres.connection import get_db
|
| 18 |
-
from src.middlewares.logging import get_logger, log_execution
|
| 19 |
-
from src.middlewares.rate_limit import limiter
|
| 20 |
-
from src.models.credentials import ( # noqa: F401 — re-exported for Swagger schema discovery
|
| 21 |
-
BigQueryCredentials,
|
| 22 |
-
CredentialSchemas,
|
| 23 |
-
DbType,
|
| 24 |
-
MysqlCredentials,
|
| 25 |
-
PostgresCredentials,
|
| 26 |
-
SnowflakeCredentials,
|
| 27 |
-
SqlServerCredentials,
|
| 28 |
-
SupabaseCredentials,
|
| 29 |
-
)
|
| 30 |
-
from src.pipeline.db_pipeline import db_pipeline_service
|
| 31 |
-
from src.utils.db_credential_encryption import decrypt_credentials_dict
|
| 32 |
-
|
| 33 |
-
logger = get_logger("database_client_api")
|
| 34 |
-
|
| 35 |
-
router = APIRouter(prefix="/api/v1", tags=["Database Clients"])
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
# ---------------------------------------------------------------------------
|
| 39 |
-
# Request / Response schemas
|
| 40 |
-
# ---------------------------------------------------------------------------
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
class DatabaseClientCreate(BaseModel):
|
| 44 |
-
"""
|
| 45 |
-
Payload to register a new external database connection.
|
| 46 |
-
|
| 47 |
-
The `credentials` object shape depends on `db_type`:
|
| 48 |
-
|
| 49 |
-
| db_type | Required fields |
|
| 50 |
-
|-------------|----------------------------------------------------------|
|
| 51 |
-
| postgres | host, port, database, username, password, ssl_mode |
|
| 52 |
-
| mysql | host, port, database, username, password, ssl |
|
| 53 |
-
| sqlserver | host, port, database, username, password, driver? |
|
| 54 |
-
| supabase | host, port, database, username, password, ssl_mode |
|
| 55 |
-
| bigquery | project_id, dataset_id, location?, service_account_json |
|
| 56 |
-
| snowflake | account, warehouse, database, schema?, username, password, role? |
|
| 57 |
-
|
| 58 |
-
Sensitive fields (`password`, `service_account_json`) are encrypted
|
| 59 |
-
at rest using Fernet symmetric encryption.
|
| 60 |
-
"""
|
| 61 |
-
|
| 62 |
-
name: str = Field(..., description="Display name for this connection.", examples=["Production DB"])
|
| 63 |
-
db_type: DbType = Field(..., description="Type of the database engine.", examples=["postgres"])
|
| 64 |
-
credentials: Dict[str, Any] = Field(
|
| 65 |
-
...,
|
| 66 |
-
description="Connection credentials. Shape depends on db_type. See schema descriptions above.",
|
| 67 |
-
examples=[
|
| 68 |
-
{
|
| 69 |
-
"host": "db.example.com",
|
| 70 |
-
"port": 5432,
|
| 71 |
-
"database": "mydb",
|
| 72 |
-
"username": "admin",
|
| 73 |
-
"password": "s3cr3t!",
|
| 74 |
-
"ssl_mode": "require",
|
| 75 |
-
}
|
| 76 |
-
],
|
| 77 |
-
)
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
class DatabaseClientUpdate(BaseModel):
|
| 81 |
-
"""
|
| 82 |
-
Payload to update an existing database connection.
|
| 83 |
-
|
| 84 |
-
All fields are optional — only provided fields will be updated.
|
| 85 |
-
If `credentials` is provided, it replaces the entire credentials object
|
| 86 |
-
and sensitive fields are re-encrypted.
|
| 87 |
-
"""
|
| 88 |
-
|
| 89 |
-
name: Optional[str] = Field(None, description="New display name for this connection.", examples=["Staging DB"])
|
| 90 |
-
credentials: Optional[Dict[str, Any]] = Field(
|
| 91 |
-
None,
|
| 92 |
-
description="Updated credentials object. Replaces existing credentials entirely if provided.",
|
| 93 |
-
examples=[{"host": "new-host.example.com", "port": 5432, "database": "mydb", "username": "admin", "password": "n3wP@ss!", "ssl_mode": "require"}],
|
| 94 |
-
)
|
| 95 |
-
status: Optional[Literal["active", "inactive"]] = Field(
|
| 96 |
-
None,
|
| 97 |
-
description="Set to 'inactive' to soft-disable the connection without deleting it.",
|
| 98 |
-
examples=["inactive"],
|
| 99 |
-
)
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
class DatabaseClientResponse(BaseModel):
|
| 103 |
-
"""
|
| 104 |
-
Database connection record returned by the API.
|
| 105 |
-
|
| 106 |
-
Credentials are **never** included in the response for security reasons.
|
| 107 |
-
"""
|
| 108 |
-
|
| 109 |
-
id: str = Field(..., description="Unique identifier of the database connection.")
|
| 110 |
-
user_id: str = Field(..., description="ID of the user who owns this connection.")
|
| 111 |
-
name: str = Field(..., description="Display name of the connection.")
|
| 112 |
-
db_type: str = Field(..., description="Database engine type.")
|
| 113 |
-
status: str = Field(..., description="Connection status: 'active' or 'inactive'.")
|
| 114 |
-
created_at: datetime = Field(..., description="Timestamp when the connection was registered.")
|
| 115 |
-
updated_at: Optional[datetime] = Field(None, description="Timestamp of the last update, if any.")
|
| 116 |
-
|
| 117 |
-
model_config = {"from_attributes": True}
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
# ---------------------------------------------------------------------------
|
| 121 |
-
# Endpoints
|
| 122 |
-
# ---------------------------------------------------------------------------
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
@router.post(
|
| 126 |
-
"/database-clients",
|
| 127 |
-
response_model=DatabaseClientResponse,
|
| 128 |
-
status_code=status.HTTP_201_CREATED,
|
| 129 |
-
summary="Register a new database connection",
|
| 130 |
-
response_description="The newly created database connection record (credentials excluded).",
|
| 131 |
-
responses={
|
| 132 |
-
201: {"description": "Connection registered successfully."},
|
| 133 |
-
422: {"description": "Validation error — check the credentials shape for the given db_type."},
|
| 134 |
-
500: {"description": "Internal server error."},
|
| 135 |
-
},
|
| 136 |
-
)
|
| 137 |
-
@limiter.limit("10/minute")
|
| 138 |
-
@log_execution(logger)
|
| 139 |
-
async def create_database_client(
|
| 140 |
-
request: Request,
|
| 141 |
-
payload: DatabaseClientCreate,
|
| 142 |
-
user_id: str = Query(..., description="ID of the user registering the connection."),
|
| 143 |
-
db: AsyncSession = Depends(get_db),
|
| 144 |
-
):
|
| 145 |
-
"""
|
| 146 |
-
Register a new external database connection for a user.
|
| 147 |
-
|
| 148 |
-
The `credentials` object must match the shape for the chosen `db_type`
|
| 149 |
-
(see **CredentialSchemas** in the schema section below for exact fields).
|
| 150 |
-
Sensitive fields (`password`, `service_account_json`) are encrypted
|
| 151 |
-
before being persisted — they are never returned in any response.
|
| 152 |
-
"""
|
| 153 |
-
try:
|
| 154 |
-
client = await database_client_service.create(
|
| 155 |
-
db=db,
|
| 156 |
-
user_id=user_id,
|
| 157 |
-
name=payload.name,
|
| 158 |
-
db_type=payload.db_type,
|
| 159 |
-
credentials=payload.credentials,
|
| 160 |
-
)
|
| 161 |
-
return DatabaseClientResponse.model_validate(client)
|
| 162 |
-
except Exception as e:
|
| 163 |
-
logger.error(f"Failed to create database client for user {user_id}", error=str(e))
|
| 164 |
-
raise HTTPException(
|
| 165 |
-
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 166 |
-
detail=f"Failed to create database client: {str(e)}",
|
| 167 |
-
)
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
@router.get(
|
| 171 |
-
"/database-clients/{user_id}",
|
| 172 |
-
response_model=List[DatabaseClientResponse],
|
| 173 |
-
summary="List all database connections for a user",
|
| 174 |
-
response_description="List of database connections (credentials excluded).",
|
| 175 |
-
responses={
|
| 176 |
-
200: {"description": "Returns an empty list if the user has no connections."},
|
| 177 |
-
},
|
| 178 |
-
)
|
| 179 |
-
@log_execution(logger)
|
| 180 |
-
async def list_database_clients(
|
| 181 |
-
user_id: str,
|
| 182 |
-
db: AsyncSession = Depends(get_db),
|
| 183 |
-
):
|
| 184 |
-
"""
|
| 185 |
-
Return all database connections registered by the specified user,
|
| 186 |
-
ordered by creation date (newest first).
|
| 187 |
-
|
| 188 |
-
Credentials are never included in the response.
|
| 189 |
-
"""
|
| 190 |
-
clients = await database_client_service.get_user_clients(db, user_id)
|
| 191 |
-
return [DatabaseClientResponse.model_validate(c) for c in clients]
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
@router.get(
|
| 195 |
-
"/database-clients/{user_id}/{client_id}",
|
| 196 |
-
response_model=DatabaseClientResponse,
|
| 197 |
-
summary="Get a single database connection",
|
| 198 |
-
response_description="Database connection detail (credentials excluded).",
|
| 199 |
-
responses={
|
| 200 |
-
404: {"description": "Connection not found."},
|
| 201 |
-
403: {"description": "Access denied — user_id does not own this connection."},
|
| 202 |
-
},
|
| 203 |
-
)
|
| 204 |
-
@log_execution(logger)
|
| 205 |
-
async def get_database_client(
|
| 206 |
-
user_id: str,
|
| 207 |
-
client_id: str,
|
| 208 |
-
db: AsyncSession = Depends(get_db),
|
| 209 |
-
):
|
| 210 |
-
"""
|
| 211 |
-
Return the detail of a single database connection.
|
| 212 |
-
|
| 213 |
-
Returns **403** if the `user_id` in the path does not match the owner
|
| 214 |
-
of the requested connection.
|
| 215 |
-
"""
|
| 216 |
-
client = await database_client_service.get(db, client_id)
|
| 217 |
-
|
| 218 |
-
if not client:
|
| 219 |
-
raise HTTPException(status_code=404, detail="Database client not found")
|
| 220 |
-
|
| 221 |
-
if client.user_id != user_id:
|
| 222 |
-
raise HTTPException(status_code=403, detail="Access denied")
|
| 223 |
-
|
| 224 |
-
return DatabaseClientResponse.model_validate(client)
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
@router.put(
|
| 228 |
-
"/database-clients/{client_id}",
|
| 229 |
-
response_model=DatabaseClientResponse,
|
| 230 |
-
summary="Update a database connection",
|
| 231 |
-
response_description="Updated database connection record (credentials excluded).",
|
| 232 |
-
responses={
|
| 233 |
-
404: {"description": "Connection not found."},
|
| 234 |
-
403: {"description": "Access denied — user_id does not own this connection."},
|
| 235 |
-
},
|
| 236 |
-
)
|
| 237 |
-
@log_execution(logger)
|
| 238 |
-
async def update_database_client(
|
| 239 |
-
client_id: str,
|
| 240 |
-
payload: DatabaseClientUpdate,
|
| 241 |
-
user_id: str = Query(..., description="ID of the user who owns the connection."),
|
| 242 |
-
db: AsyncSession = Depends(get_db),
|
| 243 |
-
):
|
| 244 |
-
"""
|
| 245 |
-
Update an existing database connection.
|
| 246 |
-
|
| 247 |
-
Only fields present in the request body are updated.
|
| 248 |
-
If `credentials` is provided it **replaces** the entire credentials object
|
| 249 |
-
and sensitive fields are re-encrypted automatically.
|
| 250 |
-
"""
|
| 251 |
-
client = await database_client_service.get(db, client_id)
|
| 252 |
-
|
| 253 |
-
if not client:
|
| 254 |
-
raise HTTPException(status_code=404, detail="Database client not found")
|
| 255 |
-
|
| 256 |
-
if client.user_id != user_id:
|
| 257 |
-
raise HTTPException(status_code=403, detail="Access denied")
|
| 258 |
-
|
| 259 |
-
updated = await database_client_service.update(
|
| 260 |
-
db=db,
|
| 261 |
-
client_id=client_id,
|
| 262 |
-
name=payload.name,
|
| 263 |
-
credentials=payload.credentials,
|
| 264 |
-
status=payload.status,
|
| 265 |
-
)
|
| 266 |
-
return DatabaseClientResponse.model_validate(updated)
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
@router.delete(
|
| 270 |
-
"/database-clients/{client_id}",
|
| 271 |
-
status_code=status.HTTP_200_OK,
|
| 272 |
-
summary="Delete a database connection",
|
| 273 |
-
responses={
|
| 274 |
-
200: {"description": "Connection deleted successfully."},
|
| 275 |
-
404: {"description": "Connection not found."},
|
| 276 |
-
403: {"description": "Access denied — user_id does not own this connection."},
|
| 277 |
-
},
|
| 278 |
-
)
|
| 279 |
-
@log_execution(logger)
|
| 280 |
-
async def delete_database_client(
|
| 281 |
-
client_id: str,
|
| 282 |
-
user_id: str = Query(..., description="ID of the user who owns the connection."),
|
| 283 |
-
db: AsyncSession = Depends(get_db),
|
| 284 |
-
):
|
| 285 |
-
"""
|
| 286 |
-
Permanently delete a database connection.
|
| 287 |
-
|
| 288 |
-
This action is irreversible. The stored credentials are also removed.
|
| 289 |
-
"""
|
| 290 |
-
client = await database_client_service.get(db, client_id)
|
| 291 |
-
|
| 292 |
-
if not client:
|
| 293 |
-
raise HTTPException(status_code=404, detail="Database client not found")
|
| 294 |
-
|
| 295 |
-
if client.user_id != user_id:
|
| 296 |
-
raise HTTPException(status_code=403, detail="Access denied")
|
| 297 |
-
|
| 298 |
-
await database_client_service.delete(db, client_id)
|
| 299 |
-
return {"status": "success", "message": "Database client deleted successfully"}
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
@router.post(
|
| 303 |
-
"/database-clients/{client_id}/ingest",
|
| 304 |
-
status_code=status.HTTP_200_OK,
|
| 305 |
-
summary="Ingest schema from a registered database into the vector store",
|
| 306 |
-
response_description="Count of chunks ingested.",
|
| 307 |
-
responses={
|
| 308 |
-
200: {"description": "Ingestion completed successfully."},
|
| 309 |
-
403: {"description": "Access denied — user_id does not own this connection."},
|
| 310 |
-
404: {"description": "Connection not found."},
|
| 311 |
-
501: {"description": "The connection's db_type is not yet supported by the pipeline."},
|
| 312 |
-
500: {"description": "Ingestion failed (connection error, profiling error, etc.)."},
|
| 313 |
-
},
|
| 314 |
-
)
|
| 315 |
-
@limiter.limit("5/minute")
|
| 316 |
-
@log_execution(logger)
|
| 317 |
-
async def ingest_database_client(
|
| 318 |
-
request: Request,
|
| 319 |
-
client_id: str,
|
| 320 |
-
user_id: str = Query(..., description="ID of the user who owns the connection."),
|
| 321 |
-
db: AsyncSession = Depends(get_db),
|
| 322 |
-
):
|
| 323 |
-
"""
|
| 324 |
-
Decrypt the stored credentials, connect to the user's database, introspect
|
| 325 |
-
its schema, profile each column, embed the descriptions, and store them in
|
| 326 |
-
the shared PGVector collection tagged with `source_type="database"`.
|
| 327 |
-
|
| 328 |
-
Chunks become retrievable via the same retriever used for document chunks.
|
| 329 |
-
"""
|
| 330 |
-
client = await database_client_service.get(db, client_id)
|
| 331 |
-
|
| 332 |
-
if not client:
|
| 333 |
-
raise HTTPException(status_code=404, detail="Database client not found")
|
| 334 |
-
|
| 335 |
-
if client.user_id != user_id:
|
| 336 |
-
raise HTTPException(status_code=403, detail="Access denied")
|
| 337 |
-
|
| 338 |
-
creds = decrypt_credentials_dict(client.credentials)
|
| 339 |
-
|
| 340 |
-
try:
|
| 341 |
-
with db_pipeline_service.engine_scope(
|
| 342 |
-
db_type=client.db_type,
|
| 343 |
-
credentials=creds,
|
| 344 |
-
) as engine:
|
| 345 |
-
total = await db_pipeline_service.run(user_id=user_id, engine=engine)
|
| 346 |
-
except NotImplementedError as e:
|
| 347 |
-
raise HTTPException(status_code=status.HTTP_501_NOT_IMPLEMENTED, detail=str(e))
|
| 348 |
-
except Exception as e:
|
| 349 |
-
logger.error(
|
| 350 |
-
f"Ingestion failed for client {client_id}", user_id=user_id, error=str(e)
|
| 351 |
-
)
|
| 352 |
-
raise HTTPException(
|
| 353 |
-
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 354 |
-
detail=f"Ingestion failed: {e}",
|
| 355 |
-
)
|
| 356 |
-
|
| 357 |
-
return {"status": "success", "client_id": client_id, "chunks_ingested": total}
|
|
|
|
| 1 |
+
from typing import Literal, Dict
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
+
dbtypes: Literal["postgresql", "mysql", "sqlite"] = Literal["postgresql", "mysql", "sqlite"]
|
| 5 |
+
creds: Dict[str, str]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/api/v1/document.py
CHANGED
|
@@ -1,20 +1,21 @@
|
|
| 1 |
"""Document management API endpoints."""
|
| 2 |
-
|
| 3 |
-
from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile, File
|
| 4 |
from sqlalchemy.ext.asyncio import AsyncSession
|
| 5 |
from src.db.postgres.connection import get_db
|
| 6 |
from src.document.document_service import document_service
|
|
|
|
|
|
|
| 7 |
from src.middlewares.logging import get_logger, log_execution
|
| 8 |
from src.middlewares.rate_limit import limiter
|
| 9 |
-
from src.pipeline.document_pipeline.document_pipeline import document_pipeline
|
| 10 |
from pydantic import BaseModel
|
| 11 |
from typing import List
|
| 12 |
-
|
| 13 |
logger = get_logger("document_api")
|
| 14 |
-
|
| 15 |
router = APIRouter(prefix="/api/v1", tags=["Documents"])
|
| 16 |
-
|
| 17 |
-
|
| 18 |
class DocumentResponse(BaseModel):
|
| 19 |
id: str
|
| 20 |
filename: str
|
|
@@ -22,8 +23,8 @@ class DocumentResponse(BaseModel):
|
|
| 22 |
file_size: int
|
| 23 |
file_type: str
|
| 24 |
created_at: str
|
| 25 |
-
|
| 26 |
-
|
| 27 |
@router.get("/documents/{user_id}", response_model=List[DocumentResponse])
|
| 28 |
@log_execution(logger)
|
| 29 |
async def list_documents(
|
|
@@ -43,8 +44,8 @@ async def list_documents(
|
|
| 43 |
)
|
| 44 |
for doc in documents
|
| 45 |
]
|
| 46 |
-
|
| 47 |
-
|
| 48 |
@router.post("/document/upload")
|
| 49 |
@limiter.limit("10/minute")
|
| 50 |
@log_execution(logger)
|
|
@@ -56,12 +57,57 @@ async def upload_document(
|
|
| 56 |
):
|
| 57 |
"""Upload a document."""
|
| 58 |
if not user_id:
|
| 59 |
-
raise HTTPException(
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
@router.delete("/document/delete")
|
| 66 |
@log_execution(logger)
|
| 67 |
async def delete_document(
|
|
@@ -70,10 +116,31 @@ async def delete_document(
|
|
| 70 |
db: AsyncSession = Depends(get_db)
|
| 71 |
):
|
| 72 |
"""Delete a document."""
|
| 73 |
-
await
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
@router.post("/document/process")
|
| 78 |
@log_execution(logger)
|
| 79 |
async def process_document(
|
|
@@ -82,6 +149,45 @@ async def process_document(
|
|
| 82 |
db: AsyncSession = Depends(get_db)
|
| 83 |
):
|
| 84 |
"""Process document and ingest to vector index."""
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""Document management API endpoints."""
|
| 2 |
+
|
| 3 |
+
from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile, File, status
|
| 4 |
from sqlalchemy.ext.asyncio import AsyncSession
|
| 5 |
from src.db.postgres.connection import get_db
|
| 6 |
from src.document.document_service import document_service
|
| 7 |
+
from src.knowledge.processing_service import knowledge_processor
|
| 8 |
+
from src.storage.az_blob.az_blob import blob_storage
|
| 9 |
from src.middlewares.logging import get_logger, log_execution
|
| 10 |
from src.middlewares.rate_limit import limiter
|
|
|
|
| 11 |
from pydantic import BaseModel
|
| 12 |
from typing import List
|
| 13 |
+
|
| 14 |
logger = get_logger("document_api")
|
| 15 |
+
|
| 16 |
router = APIRouter(prefix="/api/v1", tags=["Documents"])
|
| 17 |
+
|
| 18 |
+
|
| 19 |
class DocumentResponse(BaseModel):
|
| 20 |
id: str
|
| 21 |
filename: str
|
|
|
|
| 23 |
file_size: int
|
| 24 |
file_type: str
|
| 25 |
created_at: str
|
| 26 |
+
|
| 27 |
+
|
| 28 |
@router.get("/documents/{user_id}", response_model=List[DocumentResponse])
|
| 29 |
@log_execution(logger)
|
| 30 |
async def list_documents(
|
|
|
|
| 44 |
)
|
| 45 |
for doc in documents
|
| 46 |
]
|
| 47 |
+
|
| 48 |
+
|
| 49 |
@router.post("/document/upload")
|
| 50 |
@limiter.limit("10/minute")
|
| 51 |
@log_execution(logger)
|
|
|
|
| 57 |
):
|
| 58 |
"""Upload a document."""
|
| 59 |
if not user_id:
|
| 60 |
+
raise HTTPException(
|
| 61 |
+
status_code=400,
|
| 62 |
+
detail="user_id is required"
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
try:
|
| 66 |
+
# Read file content
|
| 67 |
+
content = await file.read()
|
| 68 |
+
file_size = len(content)
|
| 69 |
+
|
| 70 |
+
# Get file type
|
| 71 |
+
filename = file.filename
|
| 72 |
+
file_type = filename.split('.')[-1].lower() if '.' in filename else 'txt'
|
| 73 |
+
|
| 74 |
+
if file_type not in ['pdf', 'docx', 'txt']:
|
| 75 |
+
raise HTTPException(
|
| 76 |
+
status_code=400,
|
| 77 |
+
detail="Unsupported file type. Supported: pdf, docx, txt"
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
# Upload to blob storage
|
| 81 |
+
blob_name = await blob_storage.upload_file(content, filename, user_id)
|
| 82 |
+
|
| 83 |
+
# Create document record
|
| 84 |
+
document = await document_service.create_document(
|
| 85 |
+
db=db,
|
| 86 |
+
user_id=user_id,
|
| 87 |
+
filename=filename,
|
| 88 |
+
blob_name=blob_name,
|
| 89 |
+
file_size=file_size,
|
| 90 |
+
file_type=file_type
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
return {
|
| 94 |
+
"status": "success",
|
| 95 |
+
"message": "Document uploaded successfully",
|
| 96 |
+
"data": {
|
| 97 |
+
"id": document.id,
|
| 98 |
+
"filename": document.filename,
|
| 99 |
+
"status": document.status
|
| 100 |
+
}
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
except Exception as e:
|
| 104 |
+
logger.error(f"Upload failed for user {user_id}", error=str(e))
|
| 105 |
+
raise HTTPException(
|
| 106 |
+
status_code=500,
|
| 107 |
+
detail=f"Upload failed: {str(e)}"
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
|
| 111 |
@router.delete("/document/delete")
|
| 112 |
@log_execution(logger)
|
| 113 |
async def delete_document(
|
|
|
|
| 116 |
db: AsyncSession = Depends(get_db)
|
| 117 |
):
|
| 118 |
"""Delete a document."""
|
| 119 |
+
document = await document_service.get_document(db, document_id)
|
| 120 |
+
|
| 121 |
+
if not document:
|
| 122 |
+
raise HTTPException(
|
| 123 |
+
status_code=404,
|
| 124 |
+
detail="Document not found"
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
if document.user_id != user_id:
|
| 128 |
+
raise HTTPException(
|
| 129 |
+
status_code=403,
|
| 130 |
+
detail="Access denied"
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
success = await document_service.delete_document(db, document_id)
|
| 134 |
+
|
| 135 |
+
if success:
|
| 136 |
+
return {"status": "success", "message": "Document deleted successfully"}
|
| 137 |
+
else:
|
| 138 |
+
raise HTTPException(
|
| 139 |
+
status_code=500,
|
| 140 |
+
detail="Failed to delete document"
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
|
| 144 |
@router.post("/document/process")
|
| 145 |
@log_execution(logger)
|
| 146 |
async def process_document(
|
|
|
|
| 149 |
db: AsyncSession = Depends(get_db)
|
| 150 |
):
|
| 151 |
"""Process document and ingest to vector index."""
|
| 152 |
+
document = await document_service.get_document(db, document_id)
|
| 153 |
+
|
| 154 |
+
if not document:
|
| 155 |
+
raise HTTPException(
|
| 156 |
+
status_code=404,
|
| 157 |
+
detail="Document not found"
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
if document.user_id != user_id:
|
| 161 |
+
raise HTTPException(
|
| 162 |
+
status_code=403,
|
| 163 |
+
detail="Access denied"
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
try:
|
| 167 |
+
# Update status to processing
|
| 168 |
+
await document_service.update_document_status(db, document_id, "processing")
|
| 169 |
+
|
| 170 |
+
# Process document
|
| 171 |
+
chunks_count = await knowledge_processor.process_document(document, db)
|
| 172 |
+
|
| 173 |
+
# Update status to completed
|
| 174 |
+
await document_service.update_document_status(db, document_id, "completed")
|
| 175 |
+
|
| 176 |
+
return {
|
| 177 |
+
"status": "success",
|
| 178 |
+
"message": "Document processed successfully",
|
| 179 |
+
"data": {
|
| 180 |
+
"document_id": document_id,
|
| 181 |
+
"chunks_processed": chunks_count
|
| 182 |
+
}
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
except Exception as e:
|
| 186 |
+
logger.error(f"Processing failed for document {document_id}", error=str(e))
|
| 187 |
+
await document_service.update_document_status(
|
| 188 |
+
db, document_id, "failed", str(e)
|
| 189 |
+
)
|
| 190 |
+
raise HTTPException(
|
| 191 |
+
status_code=500,
|
| 192 |
+
detail=f"Processing failed: {str(e)}"
|
| 193 |
+
)
|
src/config/settings.py
CHANGED
|
@@ -61,11 +61,6 @@ class Settings(BaseSettings):
|
|
| 61 |
# Bcrypt salt (for users - existing)
|
| 62 |
emarcal_bcrypt_salt: str = Field(alias="emarcal__bcrypt__salt", default="")
|
| 63 |
|
| 64 |
-
# DB credential encryption (Fernet key for user-registered database creds)
|
| 65 |
-
dataeyond_db_credential_key: str = Field(
|
| 66 |
-
alias="dataeyond__db__credential__key", default=""
|
| 67 |
-
)
|
| 68 |
-
|
| 69 |
|
| 70 |
# Singleton instance
|
| 71 |
settings = Settings()
|
|
|
|
| 61 |
# Bcrypt salt (for users - existing)
|
| 62 |
emarcal_bcrypt_salt: str = Field(alias="emarcal__bcrypt__salt", default="")
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
# Singleton instance
|
| 66 |
settings = Settings()
|
src/database_client/database_client_service.py
DELETED
|
@@ -1,118 +0,0 @@
|
|
| 1 |
-
"""Service for managing user-registered external database connections."""
|
| 2 |
-
|
| 3 |
-
import uuid
|
| 4 |
-
from typing import List, Optional
|
| 5 |
-
|
| 6 |
-
from sqlalchemy import delete, select
|
| 7 |
-
from sqlalchemy.ext.asyncio import AsyncSession
|
| 8 |
-
|
| 9 |
-
from src.db.postgres.models import DatabaseClient
|
| 10 |
-
from src.middlewares.logging import get_logger
|
| 11 |
-
from src.utils.db_credential_encryption import (
|
| 12 |
-
decrypt_credentials_dict,
|
| 13 |
-
encrypt_credentials_dict,
|
| 14 |
-
)
|
| 15 |
-
|
| 16 |
-
logger = get_logger("database_client_service")
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
class DatabaseClientService:
|
| 20 |
-
"""Service for managing user-registered external database connections."""
|
| 21 |
-
|
| 22 |
-
async def create(
|
| 23 |
-
self,
|
| 24 |
-
db: AsyncSession,
|
| 25 |
-
user_id: str,
|
| 26 |
-
name: str,
|
| 27 |
-
db_type: str,
|
| 28 |
-
credentials: dict,
|
| 29 |
-
) -> DatabaseClient:
|
| 30 |
-
"""Register a new database client connection.
|
| 31 |
-
|
| 32 |
-
Credentials are encrypted before being stored.
|
| 33 |
-
"""
|
| 34 |
-
client = DatabaseClient(
|
| 35 |
-
id=str(uuid.uuid4()),
|
| 36 |
-
user_id=user_id,
|
| 37 |
-
name=name,
|
| 38 |
-
db_type=db_type,
|
| 39 |
-
credentials=encrypt_credentials_dict(credentials),
|
| 40 |
-
status="active",
|
| 41 |
-
)
|
| 42 |
-
db.add(client)
|
| 43 |
-
await db.commit()
|
| 44 |
-
await db.refresh(client)
|
| 45 |
-
logger.info(f"Created database client {client.id} for user {user_id}")
|
| 46 |
-
return client
|
| 47 |
-
|
| 48 |
-
async def get_user_clients(
|
| 49 |
-
self,
|
| 50 |
-
db: AsyncSession,
|
| 51 |
-
user_id: str,
|
| 52 |
-
) -> List[DatabaseClient]:
|
| 53 |
-
"""Return all active and inactive database clients for a user."""
|
| 54 |
-
result = await db.execute(
|
| 55 |
-
select(DatabaseClient)
|
| 56 |
-
.where(DatabaseClient.user_id == user_id)
|
| 57 |
-
.order_by(DatabaseClient.created_at.desc())
|
| 58 |
-
)
|
| 59 |
-
return result.scalars().all()
|
| 60 |
-
|
| 61 |
-
async def get(
|
| 62 |
-
self,
|
| 63 |
-
db: AsyncSession,
|
| 64 |
-
client_id: str,
|
| 65 |
-
) -> Optional[DatabaseClient]:
|
| 66 |
-
"""Return a single database client by its ID."""
|
| 67 |
-
result = await db.execute(
|
| 68 |
-
select(DatabaseClient).where(DatabaseClient.id == client_id)
|
| 69 |
-
)
|
| 70 |
-
return result.scalars().first()
|
| 71 |
-
|
| 72 |
-
async def update(
|
| 73 |
-
self,
|
| 74 |
-
db: AsyncSession,
|
| 75 |
-
client_id: str,
|
| 76 |
-
name: Optional[str] = None,
|
| 77 |
-
credentials: Optional[dict] = None,
|
| 78 |
-
status: Optional[str] = None,
|
| 79 |
-
) -> Optional[DatabaseClient]:
|
| 80 |
-
"""Update an existing database client connection.
|
| 81 |
-
|
| 82 |
-
Only non-None fields are updated.
|
| 83 |
-
Credentials are re-encrypted if provided.
|
| 84 |
-
"""
|
| 85 |
-
client = await self.get(db, client_id)
|
| 86 |
-
if not client:
|
| 87 |
-
return None
|
| 88 |
-
|
| 89 |
-
if name is not None:
|
| 90 |
-
client.name = name
|
| 91 |
-
if credentials is not None:
|
| 92 |
-
client.credentials = encrypt_credentials_dict(credentials)
|
| 93 |
-
if status is not None:
|
| 94 |
-
client.status = status
|
| 95 |
-
|
| 96 |
-
await db.commit()
|
| 97 |
-
await db.refresh(client)
|
| 98 |
-
logger.info(f"Updated database client {client_id}")
|
| 99 |
-
return client
|
| 100 |
-
|
| 101 |
-
async def delete(
|
| 102 |
-
self,
|
| 103 |
-
db: AsyncSession,
|
| 104 |
-
client_id: str,
|
| 105 |
-
) -> bool:
|
| 106 |
-
"""Permanently delete a database client connection."""
|
| 107 |
-
result = await db.execute(
|
| 108 |
-
delete(DatabaseClient).where(DatabaseClient.id == client_id)
|
| 109 |
-
)
|
| 110 |
-
await db.commit()
|
| 111 |
-
deleted = result.rowcount > 0
|
| 112 |
-
if deleted:
|
| 113 |
-
logger.info(f"Deleted database client {client_id}")
|
| 114 |
-
return deleted
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
database_client_service = DatabaseClientService()
|
| 118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/db/postgres/init_db.py
CHANGED
|
@@ -2,14 +2,7 @@
|
|
| 2 |
|
| 3 |
from sqlalchemy import text
|
| 4 |
from src.db.postgres.connection import engine, Base
|
| 5 |
-
from src.db.postgres.models import
|
| 6 |
-
ChatMessage,
|
| 7 |
-
DatabaseClient,
|
| 8 |
-
Document,
|
| 9 |
-
MessageSource,
|
| 10 |
-
Room,
|
| 11 |
-
User,
|
| 12 |
-
)
|
| 13 |
|
| 14 |
|
| 15 |
async def init_db():
|
|
|
|
| 2 |
|
| 3 |
from sqlalchemy import text
|
| 4 |
from src.db.postgres.connection import engine, Base
|
| 5 |
+
from src.db.postgres.models import Document, Room, ChatMessage, User, MessageSource
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
async def init_db():
|
src/db/postgres/models.py
CHANGED
|
@@ -4,7 +4,6 @@ from uuid import uuid4
|
|
| 4 |
from sqlalchemy import Column, String, DateTime, Text, Integer, ForeignKey
|
| 5 |
from sqlalchemy.orm import relationship
|
| 6 |
from sqlalchemy.sql import func
|
| 7 |
-
from sqlalchemy.dialects.postgresql import JSONB
|
| 8 |
from src.db.postgres.connection import Base
|
| 9 |
|
| 10 |
|
|
@@ -82,18 +81,3 @@ class MessageSource(Base):
|
|
| 82 |
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
| 83 |
|
| 84 |
message = relationship("ChatMessage", back_populates="sources")
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
class DatabaseClient(Base):
|
| 88 |
-
"""User-registered external database connections."""
|
| 89 |
-
__tablename__ = "databases"
|
| 90 |
-
|
| 91 |
-
id = Column(String, primary_key=True, default=lambda: str(uuid4()))
|
| 92 |
-
user_id = Column(String, nullable=False, index=True)
|
| 93 |
-
name = Column(String, nullable=False) # display name, e.g. "Prod DB"
|
| 94 |
-
db_type = Column(String, nullable=False) # postgres|mysql|sqlserver|supabase|bigquery|snowflake
|
| 95 |
-
credentials = Column(JSONB, nullable=False) # per-type JSON; sensitive fields Fernet-encrypted
|
| 96 |
-
status = Column(String, nullable=False, default="active") # active | inactive
|
| 97 |
-
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
| 98 |
-
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
|
| 99 |
-
|
|
|
|
| 4 |
from sqlalchemy import Column, String, DateTime, Text, Integer, ForeignKey
|
| 5 |
from sqlalchemy.orm import relationship
|
| 6 |
from sqlalchemy.sql import func
|
|
|
|
| 7 |
from src.db.postgres.connection import Base
|
| 8 |
|
| 9 |
|
|
|
|
| 81 |
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
| 82 |
|
| 83 |
message = relationship("ChatMessage", back_populates="sources")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/knowledge/processing_service.py
CHANGED
|
@@ -5,14 +5,14 @@ from langchain_core.documents import Document as LangChainDocument
|
|
| 5 |
from src.db.postgres.vector_store import get_vector_store
|
| 6 |
from src.storage.az_blob.az_blob import blob_storage
|
| 7 |
from src.db.postgres.models import Document as DBDocument
|
|
|
|
| 8 |
from sqlalchemy.ext.asyncio import AsyncSession
|
| 9 |
from src.middlewares.logging import get_logger
|
|
|
|
|
|
|
| 10 |
from typing import List
|
| 11 |
-
import
|
| 12 |
import docx
|
| 13 |
-
import pandas as pd
|
| 14 |
-
import pytesseract
|
| 15 |
-
from pdf2image import convert_from_bytes
|
| 16 |
from io import BytesIO
|
| 17 |
|
| 18 |
logger = get_logger("knowledge_processing")
|
|
@@ -40,10 +40,6 @@ class KnowledgeProcessingService:
|
|
| 40 |
|
| 41 |
if db_doc.file_type == "pdf":
|
| 42 |
documents = await self._build_pdf_documents(content, db_doc)
|
| 43 |
-
elif db_doc.file_type == "csv":
|
| 44 |
-
documents = self._build_csv_documents(content, db_doc)
|
| 45 |
-
elif db_doc.file_type == "xlsx":
|
| 46 |
-
documents = self._build_excel_documents(content, db_doc)
|
| 47 |
else:
|
| 48 |
text = self._extract_text(content, db_doc.file_type)
|
| 49 |
if not text.strip():
|
|
@@ -53,14 +49,10 @@ class KnowledgeProcessingService:
|
|
| 53 |
LangChainDocument(
|
| 54 |
page_content=chunk,
|
| 55 |
metadata={
|
|
|
|
| 56 |
"user_id": db_doc.user_id,
|
| 57 |
-
"
|
| 58 |
-
"
|
| 59 |
-
"document_id": db_doc.id,
|
| 60 |
-
"filename": db_doc.filename,
|
| 61 |
-
"file_type": db_doc.file_type,
|
| 62 |
-
"chunk_index": i,
|
| 63 |
-
},
|
| 64 |
}
|
| 65 |
)
|
| 66 |
for i, chunk in enumerate(chunks)
|
|
@@ -82,98 +74,62 @@ class KnowledgeProcessingService:
|
|
| 82 |
async def _build_pdf_documents(
|
| 83 |
self, content: bytes, db_doc: DBDocument
|
| 84 |
) -> List[LangChainDocument]:
|
| 85 |
-
"""Build LangChain documents from PDF with page_label metadata
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
documents: List[LangChainDocument] = []
|
| 87 |
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
"document_id": db_doc.id,
|
|
|
|
| 108 |
"filename": db_doc.filename,
|
| 109 |
-
"file_type": db_doc.file_type,
|
| 110 |
"chunk_index": len(documents),
|
| 111 |
"page_label": page_num,
|
| 112 |
-
}
|
| 113 |
-
|
| 114 |
-
))
|
| 115 |
-
|
| 116 |
-
return documents
|
| 117 |
-
|
| 118 |
-
def _profile_dataframe(
|
| 119 |
-
self, df: pd.DataFrame, source_name: str, db_doc: DBDocument
|
| 120 |
-
) -> List[LangChainDocument]:
|
| 121 |
-
"""Profile each column of a dataframe → one chunk per column."""
|
| 122 |
-
documents = []
|
| 123 |
-
row_count = len(df)
|
| 124 |
-
|
| 125 |
-
for col_name in df.columns:
|
| 126 |
-
col = df[col_name]
|
| 127 |
-
is_numeric = pd.api.types.is_numeric_dtype(col)
|
| 128 |
-
null_count = int(col.isnull().sum())
|
| 129 |
-
distinct_count = int(col.nunique())
|
| 130 |
-
distinct_ratio = distinct_count / row_count if row_count > 0 else 0
|
| 131 |
-
|
| 132 |
-
text = f"Source: {source_name} ({row_count} rows)\n"
|
| 133 |
-
text += f"Column: {col_name} ({col.dtype})\n"
|
| 134 |
-
text += f"Null count: {null_count}\n"
|
| 135 |
-
text += f"Distinct count: {distinct_count} ({distinct_ratio:.1%})\n"
|
| 136 |
-
|
| 137 |
-
if is_numeric:
|
| 138 |
-
text += f"Min: {col.min()}, Max: {col.max()}\n"
|
| 139 |
-
text += f"Mean: {col.mean():.4f}, Median: {col.median():.4f}\n"
|
| 140 |
-
|
| 141 |
-
if 0 < distinct_ratio <= 0.05:
|
| 142 |
-
top_values = col.value_counts().head(10)
|
| 143 |
-
top_str = ", ".join(f"{v} ({c})" for v, c in top_values.items())
|
| 144 |
-
text += f"Top values: {top_str}\n"
|
| 145 |
-
|
| 146 |
-
text += f"Sample values: {col.dropna().head(5).tolist()}"
|
| 147 |
-
|
| 148 |
-
documents.append(LangChainDocument(
|
| 149 |
-
page_content=text,
|
| 150 |
-
metadata={
|
| 151 |
-
"user_id": db_doc.user_id,
|
| 152 |
-
"source_type": "document",
|
| 153 |
-
"data": {
|
| 154 |
-
"document_id": db_doc.id,
|
| 155 |
-
"filename": db_doc.filename,
|
| 156 |
-
"file_type": db_doc.file_type,
|
| 157 |
-
"source": source_name,
|
| 158 |
-
"column_name": col_name,
|
| 159 |
-
"column_type": str(col.dtype),
|
| 160 |
-
}
|
| 161 |
-
}
|
| 162 |
-
))
|
| 163 |
-
return documents
|
| 164 |
|
| 165 |
-
def _build_csv_documents(self, content: bytes, db_doc: DBDocument) -> List[LangChainDocument]:
|
| 166 |
-
"""Profile each column of a CSV file."""
|
| 167 |
-
df = pd.read_csv(BytesIO(content))
|
| 168 |
-
return self._profile_dataframe(df, db_doc.filename, db_doc)
|
| 169 |
-
|
| 170 |
-
def _build_excel_documents(self, content: bytes, db_doc: DBDocument) -> List[LangChainDocument]:
|
| 171 |
-
"""Profile each column of every sheet in an Excel file."""
|
| 172 |
-
sheets = pd.read_excel(BytesIO(content), sheet_name=None)
|
| 173 |
-
documents = []
|
| 174 |
-
for sheet_name, df in sheets.items():
|
| 175 |
-
source_name = f"{db_doc.filename} / sheet: {sheet_name}"
|
| 176 |
-
documents.extend(self._profile_dataframe(df, source_name, db_doc))
|
| 177 |
return documents
|
| 178 |
|
| 179 |
def _extract_text(self, content: bytes, file_type: str) -> str:
|
|
|
|
| 5 |
from src.db.postgres.vector_store import get_vector_store
|
| 6 |
from src.storage.az_blob.az_blob import blob_storage
|
| 7 |
from src.db.postgres.models import Document as DBDocument
|
| 8 |
+
from src.config.settings import settings
|
| 9 |
from sqlalchemy.ext.asyncio import AsyncSession
|
| 10 |
from src.middlewares.logging import get_logger
|
| 11 |
+
from azure.ai.documentintelligence.aio import DocumentIntelligenceClient
|
| 12 |
+
from azure.core.credentials import AzureKeyCredential
|
| 13 |
from typing import List
|
| 14 |
+
import pypdf
|
| 15 |
import docx
|
|
|
|
|
|
|
|
|
|
| 16 |
from io import BytesIO
|
| 17 |
|
| 18 |
logger = get_logger("knowledge_processing")
|
|
|
|
| 40 |
|
| 41 |
if db_doc.file_type == "pdf":
|
| 42 |
documents = await self._build_pdf_documents(content, db_doc)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
else:
|
| 44 |
text = self._extract_text(content, db_doc.file_type)
|
| 45 |
if not text.strip():
|
|
|
|
| 49 |
LangChainDocument(
|
| 50 |
page_content=chunk,
|
| 51 |
metadata={
|
| 52 |
+
"document_id": db_doc.id,
|
| 53 |
"user_id": db_doc.user_id,
|
| 54 |
+
"filename": db_doc.filename,
|
| 55 |
+
"chunk_index": i,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
}
|
| 57 |
)
|
| 58 |
for i, chunk in enumerate(chunks)
|
|
|
|
| 74 |
async def _build_pdf_documents(
|
| 75 |
self, content: bytes, db_doc: DBDocument
|
| 76 |
) -> List[LangChainDocument]:
|
| 77 |
+
"""Build LangChain documents from PDF with page_label metadata.
|
| 78 |
+
|
| 79 |
+
Uses Azure Document Intelligence (per-page) when credentials are present,
|
| 80 |
+
falls back to pypdf (also per-page) otherwise.
|
| 81 |
+
"""
|
| 82 |
documents: List[LangChainDocument] = []
|
| 83 |
|
| 84 |
+
if settings.azureai_docintel_endpoint and settings.azureai_docintel_key:
|
| 85 |
+
async with DocumentIntelligenceClient(
|
| 86 |
+
endpoint=settings.azureai_docintel_endpoint,
|
| 87 |
+
credential=AzureKeyCredential(settings.azureai_docintel_key),
|
| 88 |
+
) as client:
|
| 89 |
+
poller = await client.begin_analyze_document(
|
| 90 |
+
model_id="prebuilt-read",
|
| 91 |
+
body=BytesIO(content),
|
| 92 |
+
content_type="application/pdf",
|
| 93 |
+
)
|
| 94 |
+
result = await poller.result()
|
| 95 |
+
logger.info(f"Azure DI extracted {len(result.pages or [])} pages")
|
| 96 |
+
|
| 97 |
+
for page in result.pages or []:
|
| 98 |
+
page_text = "\n".join(
|
| 99 |
+
line.content for line in (page.lines or [])
|
| 100 |
+
)
|
| 101 |
+
if not page_text.strip():
|
| 102 |
+
continue
|
| 103 |
+
for chunk in self.text_splitter.split_text(page_text):
|
| 104 |
+
documents.append(LangChainDocument(
|
| 105 |
+
page_content=chunk,
|
| 106 |
+
metadata={
|
| 107 |
+
"document_id": db_doc.id,
|
| 108 |
+
"user_id": db_doc.user_id,
|
| 109 |
+
"filename": db_doc.filename,
|
| 110 |
+
"chunk_index": len(documents),
|
| 111 |
+
"page_label": page.page_number,
|
| 112 |
+
}
|
| 113 |
+
))
|
| 114 |
+
else:
|
| 115 |
+
logger.warning("Azure DI not configured, using pypdf")
|
| 116 |
+
pdf_reader = pypdf.PdfReader(BytesIO(content))
|
| 117 |
+
for page_num, page in enumerate(pdf_reader.pages, start=1):
|
| 118 |
+
page_text = page.extract_text() or ""
|
| 119 |
+
if not page_text.strip():
|
| 120 |
+
continue
|
| 121 |
+
for chunk in self.text_splitter.split_text(page_text):
|
| 122 |
+
documents.append(LangChainDocument(
|
| 123 |
+
page_content=chunk,
|
| 124 |
+
metadata={
|
| 125 |
"document_id": db_doc.id,
|
| 126 |
+
"user_id": db_doc.user_id,
|
| 127 |
"filename": db_doc.filename,
|
|
|
|
| 128 |
"chunk_index": len(documents),
|
| 129 |
"page_label": page_num,
|
| 130 |
+
}
|
| 131 |
+
))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
return documents
|
| 134 |
|
| 135 |
def _extract_text(self, content: bytes, file_type: str) -> str:
|
src/models/credentials.py
DELETED
|
@@ -1,164 +0,0 @@
|
|
| 1 |
-
"""Pydantic credential schemas for user-registered external databases.
|
| 2 |
-
|
| 3 |
-
Imported by the `/database-clients` API router (`src/api/v1/db_client.py`) and,
|
| 4 |
-
via `DbType`, by the db pipeline connector (`src/pipeline/db_pipeline/connector.py`).
|
| 5 |
-
|
| 6 |
-
Sensitive fields (`password`, `service_account_json`) are Fernet-encrypted by
|
| 7 |
-
the database_client service before being stored in the JSONB column; these
|
| 8 |
-
schemas describe the plaintext wire format, not the stored shape.
|
| 9 |
-
"""
|
| 10 |
-
|
| 11 |
-
from typing import Literal, Optional, Union
|
| 12 |
-
|
| 13 |
-
from pydantic import BaseModel, Field
|
| 14 |
-
|
| 15 |
-
# ---------------------------------------------------------------------------
|
| 16 |
-
# Supported DB types
|
| 17 |
-
# ---------------------------------------------------------------------------
|
| 18 |
-
|
| 19 |
-
DbType = Literal["postgres", "mysql", "sqlserver", "supabase", "bigquery", "snowflake"]
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
# ---------------------------------------------------------------------------
|
| 23 |
-
# Typed credential schemas per DB type
|
| 24 |
-
# ---------------------------------------------------------------------------
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
class PostgresCredentials(BaseModel):
|
| 28 |
-
"""Connection credentials for PostgreSQL."""
|
| 29 |
-
|
| 30 |
-
host: str = Field(..., description="Hostname or IP address of the PostgreSQL server.", examples=["db.example.com"])
|
| 31 |
-
port: int = Field(5432, description="Port number (default: 5432).", examples=[5432])
|
| 32 |
-
database: str = Field(..., description="Name of the target database.", examples=["mydb"])
|
| 33 |
-
username: str = Field(..., description="Database username.", examples=["admin"])
|
| 34 |
-
password: str = Field(..., description="Database password. Will be encrypted at rest.", examples=["s3cr3t!"])
|
| 35 |
-
ssl_mode: Literal["disable", "require", "verify-ca", "verify-full"] = Field(
|
| 36 |
-
"require",
|
| 37 |
-
description="SSL mode for the connection.",
|
| 38 |
-
examples=["require"],
|
| 39 |
-
)
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
class MysqlCredentials(BaseModel):
|
| 43 |
-
"""Connection credentials for MySQL."""
|
| 44 |
-
|
| 45 |
-
host: str = Field(..., description="Hostname or IP address of the MySQL server.", examples=["db.example.com"])
|
| 46 |
-
port: int = Field(3306, description="Port number (default: 3306).", examples=[3306])
|
| 47 |
-
database: str = Field(..., description="Name of the target database.", examples=["mydb"])
|
| 48 |
-
username: str = Field(..., description="Database username.", examples=["admin"])
|
| 49 |
-
password: str = Field(..., description="Database password. Will be encrypted at rest.", examples=["s3cr3t!"])
|
| 50 |
-
ssl: bool = Field(True, description="Enable SSL for the connection.", examples=[True])
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
class SqlServerCredentials(BaseModel):
|
| 54 |
-
"""Connection credentials for Microsoft SQL Server."""
|
| 55 |
-
|
| 56 |
-
host: str = Field(..., description="Hostname or IP address of the SQL Server.", examples=["sqlserver.example.com"])
|
| 57 |
-
port: int = Field(1433, description="Port number (default: 1433).", examples=[1433])
|
| 58 |
-
database: str = Field(..., description="Name of the target database.", examples=["mydb"])
|
| 59 |
-
username: str = Field(..., description="Database username.", examples=["sa"])
|
| 60 |
-
password: str = Field(..., description="Database password. Will be encrypted at rest.", examples=["s3cr3t!"])
|
| 61 |
-
driver: Optional[str] = Field(
|
| 62 |
-
None,
|
| 63 |
-
description="ODBC driver name. Leave empty to use the default driver.",
|
| 64 |
-
examples=["ODBC Driver 17 for SQL Server"],
|
| 65 |
-
)
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
class SupabaseCredentials(BaseModel):
|
| 69 |
-
"""Connection credentials for Supabase (PostgreSQL-based).
|
| 70 |
-
|
| 71 |
-
Use the connection string details from your Supabase project dashboard
|
| 72 |
-
under Settings > Database.
|
| 73 |
-
"""
|
| 74 |
-
|
| 75 |
-
host: str = Field(
|
| 76 |
-
...,
|
| 77 |
-
description="Supabase database host (e.g. db.<project-ref>.supabase.co, or the pooler host).",
|
| 78 |
-
examples=["db.xxxx.supabase.co"],
|
| 79 |
-
)
|
| 80 |
-
port: int = Field(
|
| 81 |
-
5432,
|
| 82 |
-
description="Port number. Use 5432 for direct connection, 6543 for the connection pooler.",
|
| 83 |
-
examples=[5432],
|
| 84 |
-
)
|
| 85 |
-
database: str = Field("postgres", description="Database name (always 'postgres' for Supabase).", examples=["postgres"])
|
| 86 |
-
username: str = Field(
|
| 87 |
-
...,
|
| 88 |
-
description="Database user. Use 'postgres' for direct connection, or 'postgres.<project-ref>' for the pooler.",
|
| 89 |
-
examples=["postgres"],
|
| 90 |
-
)
|
| 91 |
-
password: str = Field(..., description="Database password (set in Supabase dashboard). Will be encrypted at rest.", examples=["s3cr3t!"])
|
| 92 |
-
ssl_mode: Literal["require", "verify-ca", "verify-full"] = Field(
|
| 93 |
-
"require",
|
| 94 |
-
description="SSL mode. Supabase always requires SSL.",
|
| 95 |
-
examples=["require"],
|
| 96 |
-
)
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
class BigQueryCredentials(BaseModel):
|
| 100 |
-
"""Connection credentials for Google BigQuery.
|
| 101 |
-
|
| 102 |
-
Requires a GCP Service Account with at least BigQuery Data Viewer
|
| 103 |
-
and BigQuery Job User roles.
|
| 104 |
-
"""
|
| 105 |
-
|
| 106 |
-
project_id: str = Field(..., description="GCP project ID where the BigQuery dataset resides.", examples=["my-gcp-project"])
|
| 107 |
-
dataset_id: str = Field(..., description="BigQuery dataset name to connect to.", examples=["my_dataset"])
|
| 108 |
-
location: Optional[str] = Field(
|
| 109 |
-
"US",
|
| 110 |
-
description="Dataset location/region (default: US).",
|
| 111 |
-
examples=["US", "EU", "asia-southeast1"],
|
| 112 |
-
)
|
| 113 |
-
service_account_json: str = Field(
|
| 114 |
-
...,
|
| 115 |
-
description=(
|
| 116 |
-
"Full content of the GCP Service Account key JSON file as a string. "
|
| 117 |
-
"Will be encrypted at rest."
|
| 118 |
-
),
|
| 119 |
-
examples=['{"type":"service_account","project_id":"my-gcp-project","private_key_id":"..."}'],
|
| 120 |
-
)
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
class SnowflakeCredentials(BaseModel):
|
| 124 |
-
"""Connection credentials for Snowflake."""
|
| 125 |
-
|
| 126 |
-
account: str = Field(
|
| 127 |
-
...,
|
| 128 |
-
description="Snowflake account identifier, including region if applicable (e.g. myaccount.us-east-1).",
|
| 129 |
-
examples=["myaccount.us-east-1"],
|
| 130 |
-
)
|
| 131 |
-
warehouse: str = Field(..., description="Name of the virtual warehouse to use for queries.", examples=["COMPUTE_WH"])
|
| 132 |
-
database: str = Field(..., description="Name of the target Snowflake database.", examples=["MY_DB"])
|
| 133 |
-
db_schema: Optional[str] = Field("PUBLIC", alias="schema", description="Schema name (default: PUBLIC).", examples=["PUBLIC"])
|
| 134 |
-
username: str = Field(..., description="Snowflake username.", examples=["admin"])
|
| 135 |
-
password: str = Field(..., description="Snowflake password. Will be encrypted at rest.", examples=["s3cr3t!"])
|
| 136 |
-
role: Optional[str] = Field(None, description="Snowflake role to assume for the session.", examples=["SYSADMIN"])
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
# Union of all credential shapes — reserved for future typed validation on
|
| 140 |
-
# DatabaseClientCreate.credentials (currently Dict[str, Any]). Kept exported
|
| 141 |
-
# so downstream code can reference it without re-declaring.
|
| 142 |
-
CredentialsUnion = Union[
|
| 143 |
-
PostgresCredentials,
|
| 144 |
-
MysqlCredentials,
|
| 145 |
-
SqlServerCredentials,
|
| 146 |
-
SupabaseCredentials,
|
| 147 |
-
BigQueryCredentials,
|
| 148 |
-
SnowflakeCredentials,
|
| 149 |
-
]
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
# Doc-only helper: surfaces per-type credential shapes in the Swagger "Schemas"
|
| 153 |
-
# panel so API consumers can discover the exact field set for each db_type.
|
| 154 |
-
# Not referenced by any endpoint — importing it in db_client.py is enough for
|
| 155 |
-
# FastAPI's OpenAPI generator to pick it up.
|
| 156 |
-
class CredentialSchemas(BaseModel):
|
| 157 |
-
"""Reference schemas for `credentials` per `db_type` (Swagger-only, not used by endpoints)."""
|
| 158 |
-
|
| 159 |
-
postgres: PostgresCredentials
|
| 160 |
-
mysql: MysqlCredentials
|
| 161 |
-
sqlserver: SqlServerCredentials
|
| 162 |
-
supabase: SupabaseCredentials
|
| 163 |
-
bigquery: BigQueryCredentials
|
| 164 |
-
snowflake: SnowflakeCredentials
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/pipeline/db_pipeline/__init__.py
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
from src.pipeline.db_pipeline.db_pipeline_service import DbPipelineService, db_pipeline_service
|
| 2 |
-
|
| 3 |
-
__all__ = ["DbPipelineService", "db_pipeline_service"]
|
|
|
|
|
|
|
|
|
|
|
|
src/pipeline/db_pipeline/db_pipeline_service.py
DELETED
|
@@ -1,197 +0,0 @@
|
|
| 1 |
-
"""Service for ingesting a user's external database into the vector store.
|
| 2 |
-
|
| 3 |
-
End-to-end flow: connect -> introspect schema -> profile columns -> build text
|
| 4 |
-
-> embed + store in the shared PGVector collection (tagged with
|
| 5 |
-
`source_type="database"`, retrievable via the same retriever used for docs).
|
| 6 |
-
|
| 7 |
-
Sync DB work (SQLAlchemy inspect, pandas read_sql) runs in a threadpool;
|
| 8 |
-
async vector writes stay on the event loop.
|
| 9 |
-
"""
|
| 10 |
-
|
| 11 |
-
import asyncio
|
| 12 |
-
from contextlib import contextmanager
|
| 13 |
-
from typing import Any, Iterator, Optional
|
| 14 |
-
|
| 15 |
-
from langchain_core.documents import Document as LangChainDocument
|
| 16 |
-
from sqlalchemy import URL, create_engine
|
| 17 |
-
from sqlalchemy.engine import Engine
|
| 18 |
-
|
| 19 |
-
from src.db.postgres.vector_store import get_vector_store
|
| 20 |
-
from src.middlewares.logging import get_logger
|
| 21 |
-
from src.models.credentials import DbType
|
| 22 |
-
from src.pipeline.db_pipeline.extractor import get_schema, profile_table
|
| 23 |
-
|
| 24 |
-
logger = get_logger("db_pipeline")
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
class DbPipelineService:
|
| 28 |
-
"""End-to-end DB ingestion: connect -> introspect -> profile -> embed -> store."""
|
| 29 |
-
|
| 30 |
-
def connect(self, db_type: DbType, credentials: dict[str, Any]) -> Engine:
|
| 31 |
-
"""Build a SQLAlchemy engine for the user's database.
|
| 32 |
-
|
| 33 |
-
`credentials` is the plaintext dict matching the per-type schema in
|
| 34 |
-
`src/models/credentials.py`. BigQuery/Snowflake auth models differ
|
| 35 |
-
from host/port/user/pass, so every shape flows through one dict.
|
| 36 |
-
|
| 37 |
-
Optional driver imports (snowflake-sqlalchemy, json for BigQuery) are
|
| 38 |
-
done lazily so an env missing one driver doesn't break module import.
|
| 39 |
-
"""
|
| 40 |
-
logger.info("connecting to user db", db_type=db_type)
|
| 41 |
-
|
| 42 |
-
if db_type in ("postgres", "supabase"):
|
| 43 |
-
query = (
|
| 44 |
-
{"sslmode": credentials["ssl_mode"]} if credentials.get("ssl_mode") else {}
|
| 45 |
-
)
|
| 46 |
-
url = URL.create(
|
| 47 |
-
drivername="postgresql+psycopg2",
|
| 48 |
-
username=credentials["username"],
|
| 49 |
-
password=credentials["password"],
|
| 50 |
-
host=credentials["host"],
|
| 51 |
-
port=credentials["port"],
|
| 52 |
-
database=credentials["database"],
|
| 53 |
-
query=query,
|
| 54 |
-
)
|
| 55 |
-
return create_engine(url)
|
| 56 |
-
|
| 57 |
-
if db_type == "mysql":
|
| 58 |
-
url = URL.create(
|
| 59 |
-
drivername="mysql+pymysql",
|
| 60 |
-
username=credentials["username"],
|
| 61 |
-
password=credentials["password"],
|
| 62 |
-
host=credentials["host"],
|
| 63 |
-
port=credentials["port"],
|
| 64 |
-
database=credentials["database"],
|
| 65 |
-
)
|
| 66 |
-
# pymysql only activates TLS when the `ssl` dict is truthy
|
| 67 |
-
# (empty dict is falsy and silently disables TLS). Use system-
|
| 68 |
-
# default CAs via certifi + hostname verification — required by
|
| 69 |
-
# managed MySQL providers like TiDB Cloud / PlanetScale / Aiven.
|
| 70 |
-
if credentials.get("ssl", True):
|
| 71 |
-
import certifi
|
| 72 |
-
|
| 73 |
-
connect_args = {
|
| 74 |
-
"ssl": {
|
| 75 |
-
"ca": certifi.where(),
|
| 76 |
-
"check_hostname": True,
|
| 77 |
-
}
|
| 78 |
-
}
|
| 79 |
-
else:
|
| 80 |
-
connect_args = {}
|
| 81 |
-
return create_engine(url, connect_args=connect_args)
|
| 82 |
-
|
| 83 |
-
if db_type == "sqlserver":
|
| 84 |
-
# `driver` applies to pyodbc only; we ship pymssql. Accept-and-ignore
|
| 85 |
-
# keeps the credential schema stable.
|
| 86 |
-
if credentials.get("driver"):
|
| 87 |
-
logger.info(
|
| 88 |
-
"sqlserver driver hint ignored (using pymssql)",
|
| 89 |
-
driver=credentials["driver"],
|
| 90 |
-
)
|
| 91 |
-
url = URL.create(
|
| 92 |
-
drivername="mssql+pymssql",
|
| 93 |
-
username=credentials["username"],
|
| 94 |
-
password=credentials["password"],
|
| 95 |
-
host=credentials["host"],
|
| 96 |
-
port=credentials["port"],
|
| 97 |
-
database=credentials["database"],
|
| 98 |
-
)
|
| 99 |
-
return create_engine(url)
|
| 100 |
-
|
| 101 |
-
if db_type == "bigquery":
|
| 102 |
-
import json
|
| 103 |
-
|
| 104 |
-
sa_info = json.loads(credentials["service_account_json"])
|
| 105 |
-
# sqlalchemy-bigquery URL shape: bigquery://<project>/<dataset>
|
| 106 |
-
url = f"bigquery://{credentials['project_id']}/{credentials['dataset_id']}"
|
| 107 |
-
return create_engine(
|
| 108 |
-
url,
|
| 109 |
-
credentials_info=sa_info,
|
| 110 |
-
location=credentials.get("location", "US"),
|
| 111 |
-
)
|
| 112 |
-
|
| 113 |
-
if db_type == "snowflake":
|
| 114 |
-
from snowflake.sqlalchemy import URL as SnowflakeURL
|
| 115 |
-
|
| 116 |
-
url = SnowflakeURL(
|
| 117 |
-
account=credentials["account"],
|
| 118 |
-
user=credentials["username"],
|
| 119 |
-
password=credentials["password"],
|
| 120 |
-
database=credentials["database"],
|
| 121 |
-
schema=(
|
| 122 |
-
credentials.get("db_schema")
|
| 123 |
-
or credentials.get("schema")
|
| 124 |
-
or "PUBLIC"
|
| 125 |
-
),
|
| 126 |
-
warehouse=credentials["warehouse"],
|
| 127 |
-
role=credentials.get("role") or "",
|
| 128 |
-
)
|
| 129 |
-
return create_engine(url)
|
| 130 |
-
|
| 131 |
-
raise ValueError(f"Unsupported db_type: {db_type}")
|
| 132 |
-
|
| 133 |
-
@contextmanager
|
| 134 |
-
def engine_scope(
|
| 135 |
-
self, db_type: DbType, credentials: dict[str, Any]
|
| 136 |
-
) -> Iterator[Engine]:
|
| 137 |
-
"""Yield a connected Engine and dispose its pool on exit.
|
| 138 |
-
|
| 139 |
-
API callers should prefer this over raw `connect(...)` so user DB
|
| 140 |
-
connection pools do not leak between pipeline runs.
|
| 141 |
-
"""
|
| 142 |
-
engine = self.connect(db_type, credentials)
|
| 143 |
-
try:
|
| 144 |
-
yield engine
|
| 145 |
-
finally:
|
| 146 |
-
engine.dispose()
|
| 147 |
-
|
| 148 |
-
def _to_document(
|
| 149 |
-
self, user_id: str, table_name: str, entry: dict
|
| 150 |
-
) -> LangChainDocument:
|
| 151 |
-
col = entry["col"]
|
| 152 |
-
return LangChainDocument(
|
| 153 |
-
page_content=entry["text"],
|
| 154 |
-
metadata={
|
| 155 |
-
"user_id": user_id,
|
| 156 |
-
"source_type": "database",
|
| 157 |
-
"data": {
|
| 158 |
-
"table_name": table_name,
|
| 159 |
-
"column_name": col["name"],
|
| 160 |
-
"column_type": col["type"],
|
| 161 |
-
"is_primary_key": col.get("is_primary_key", False),
|
| 162 |
-
"foreign_key": col.get("foreign_key"),
|
| 163 |
-
},
|
| 164 |
-
},
|
| 165 |
-
)
|
| 166 |
-
|
| 167 |
-
async def run(
|
| 168 |
-
self,
|
| 169 |
-
user_id: str,
|
| 170 |
-
engine: Engine,
|
| 171 |
-
exclude_tables: Optional[frozenset[str]] = None,
|
| 172 |
-
) -> int:
|
| 173 |
-
"""Introspect the user's DB, profile columns, embed descriptions, store in PGVector.
|
| 174 |
-
|
| 175 |
-
Returns:
|
| 176 |
-
Total number of chunks ingested.
|
| 177 |
-
"""
|
| 178 |
-
vector_store = get_vector_store()
|
| 179 |
-
logger.info("db pipeline start", user_id=user_id)
|
| 180 |
-
|
| 181 |
-
schema = await asyncio.to_thread(get_schema, engine, exclude_tables)
|
| 182 |
-
|
| 183 |
-
total = 0
|
| 184 |
-
for table_name, columns in schema.items():
|
| 185 |
-
logger.info("profiling table", table=table_name, columns=len(columns))
|
| 186 |
-
entries = await asyncio.to_thread(profile_table, engine, table_name, columns)
|
| 187 |
-
docs = [self._to_document(user_id, table_name, e) for e in entries]
|
| 188 |
-
if docs:
|
| 189 |
-
await vector_store.aadd_documents(docs)
|
| 190 |
-
total += len(docs)
|
| 191 |
-
logger.info("ingested chunks", table=table_name, count=len(docs))
|
| 192 |
-
|
| 193 |
-
logger.info("db pipeline complete", user_id=user_id, total=total)
|
| 194 |
-
return total
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
db_pipeline_service = DbPipelineService()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/pipeline/db_pipeline/extractor.py
DELETED
|
@@ -1,213 +0,0 @@
|
|
| 1 |
-
"""Schema introspection and per-column profiling for a user's database.
|
| 2 |
-
|
| 3 |
-
Identifiers (table/column names) are quoted via the engine's dialect preparer,
|
| 4 |
-
which handles reserved words, mixed case, and embedded quotes correctly across
|
| 5 |
-
dialects. Values used in SQL come from SQLAlchemy inspection of the DB itself,
|
| 6 |
-
not user input.
|
| 7 |
-
"""
|
| 8 |
-
|
| 9 |
-
from typing import Optional
|
| 10 |
-
|
| 11 |
-
import pandas as pd
|
| 12 |
-
from sqlalchemy import Float, Integer, Numeric, inspect
|
| 13 |
-
from sqlalchemy.engine import Engine
|
| 14 |
-
|
| 15 |
-
from src.middlewares.logging import get_logger
|
| 16 |
-
|
| 17 |
-
logger = get_logger("db_extractor")
|
| 18 |
-
|
| 19 |
-
TOP_VALUES_THRESHOLD = 0.05 # show top values if distinct_ratio <= 5%
|
| 20 |
-
|
| 21 |
-
# Dialects where PERCENTILE_CONT(...) WITHIN GROUP is supported as an aggregate.
|
| 22 |
-
# MySQL has no percentile aggregate; BigQuery has PERCENTILE_CONT only as an
|
| 23 |
-
# analytic (window) function — both drop median and keep min/max/mean.
|
| 24 |
-
_MEDIAN_DIALECTS = frozenset({"postgresql", "mssql", "snowflake"})
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
def _supports_median(engine: Engine) -> bool:
|
| 28 |
-
return engine.dialect.name in _MEDIAN_DIALECTS
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
def _head_query(
|
| 32 |
-
engine: Engine,
|
| 33 |
-
select_clause: str,
|
| 34 |
-
from_clause: str,
|
| 35 |
-
n: int,
|
| 36 |
-
order_by: str = "",
|
| 37 |
-
) -> str:
|
| 38 |
-
"""LIMIT/TOP-equivalent head query for the engine's dialect."""
|
| 39 |
-
if engine.dialect.name == "mssql":
|
| 40 |
-
return f"SELECT TOP {n} {select_clause} FROM {from_clause} {order_by}".strip()
|
| 41 |
-
return f"SELECT {select_clause} FROM {from_clause} {order_by} LIMIT {n}".strip()
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
def _qi(engine: Engine, name: str) -> str:
|
| 45 |
-
"""Dialect-correct identifier quoting (schema.table also handled if dotted)."""
|
| 46 |
-
preparer = engine.dialect.identifier_preparer
|
| 47 |
-
if "." in name:
|
| 48 |
-
schema, _, table = name.partition(".")
|
| 49 |
-
return f"{preparer.quote(schema)}.{preparer.quote(table)}"
|
| 50 |
-
return preparer.quote(name)
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
def get_schema(
|
| 54 |
-
engine: Engine, exclude_tables: Optional[frozenset[str]] = None
|
| 55 |
-
) -> dict[str, list[dict]]:
|
| 56 |
-
"""Returns {table_name: [{name, type, is_numeric, is_primary_key, foreign_key}, ...]}."""
|
| 57 |
-
exclude = exclude_tables or frozenset()
|
| 58 |
-
inspector = inspect(engine)
|
| 59 |
-
schema = {}
|
| 60 |
-
for table_name in inspector.get_table_names():
|
| 61 |
-
if table_name in exclude:
|
| 62 |
-
continue
|
| 63 |
-
|
| 64 |
-
pk = inspector.get_pk_constraint(table_name)
|
| 65 |
-
pk_cols = set(pk["constrained_columns"]) if pk else set()
|
| 66 |
-
|
| 67 |
-
fk_map = {}
|
| 68 |
-
for fk in inspector.get_foreign_keys(table_name):
|
| 69 |
-
for col, ref_col in zip(fk["constrained_columns"], fk["referred_columns"]):
|
| 70 |
-
fk_map[col] = f"{fk['referred_table']}.{ref_col}"
|
| 71 |
-
|
| 72 |
-
cols = inspector.get_columns(table_name)
|
| 73 |
-
schema[table_name] = [
|
| 74 |
-
{
|
| 75 |
-
"name": c["name"],
|
| 76 |
-
"type": str(c["type"]),
|
| 77 |
-
"is_numeric": isinstance(c["type"], (Integer, Numeric, Float)),
|
| 78 |
-
"is_primary_key": c["name"] in pk_cols,
|
| 79 |
-
"foreign_key": fk_map.get(c["name"]),
|
| 80 |
-
}
|
| 81 |
-
for c in cols
|
| 82 |
-
]
|
| 83 |
-
logger.info("extracted schema", table_count=len(schema))
|
| 84 |
-
return schema
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
def get_row_count(engine: Engine, table_name: str) -> int:
|
| 88 |
-
return pd.read_sql(f"SELECT COUNT(*) FROM {_qi(engine, table_name)}", engine).iloc[0, 0]
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
def profile_column(
|
| 92 |
-
engine: Engine,
|
| 93 |
-
table_name: str,
|
| 94 |
-
col_name: str,
|
| 95 |
-
is_numeric: bool,
|
| 96 |
-
row_count: int,
|
| 97 |
-
) -> dict:
|
| 98 |
-
"""Returns null_count, distinct_count, min/max, top values, and sample values."""
|
| 99 |
-
if row_count == 0:
|
| 100 |
-
return {
|
| 101 |
-
"null_count": 0,
|
| 102 |
-
"distinct_count": 0,
|
| 103 |
-
"distinct_ratio": 0.0,
|
| 104 |
-
"sample_values": [],
|
| 105 |
-
}
|
| 106 |
-
|
| 107 |
-
qt = _qi(engine, table_name)
|
| 108 |
-
qc = _qi(engine, col_name)
|
| 109 |
-
|
| 110 |
-
# Combined stats query: null_count, distinct_count, and min/max (if numeric).
|
| 111 |
-
# One round-trip instead of two.
|
| 112 |
-
select_cols = [
|
| 113 |
-
f"COUNT(*) - COUNT({qc}) AS nulls",
|
| 114 |
-
f"COUNT(DISTINCT {qc}) AS distincts",
|
| 115 |
-
]
|
| 116 |
-
if is_numeric:
|
| 117 |
-
select_cols.append(f"MIN({qc}) AS min_val")
|
| 118 |
-
select_cols.append(f"MAX({qc}) AS max_val")
|
| 119 |
-
select_cols.append(f"AVG({qc}) AS mean_val")
|
| 120 |
-
if _supports_median(engine):
|
| 121 |
-
select_cols.append(
|
| 122 |
-
f"PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY {qc}) AS median_val"
|
| 123 |
-
)
|
| 124 |
-
stats = pd.read_sql(f"SELECT {', '.join(select_cols)} FROM {qt}", engine)
|
| 125 |
-
|
| 126 |
-
null_count = int(stats.iloc[0]["nulls"])
|
| 127 |
-
distinct_count = int(stats.iloc[0]["distincts"])
|
| 128 |
-
distinct_ratio = distinct_count / row_count if row_count > 0 else 0
|
| 129 |
-
|
| 130 |
-
profile = {
|
| 131 |
-
"null_count": null_count,
|
| 132 |
-
"distinct_count": distinct_count,
|
| 133 |
-
"distinct_ratio": round(distinct_ratio, 4),
|
| 134 |
-
}
|
| 135 |
-
|
| 136 |
-
if is_numeric:
|
| 137 |
-
profile["min"] = stats.iloc[0]["min_val"]
|
| 138 |
-
profile["max"] = stats.iloc[0]["max_val"]
|
| 139 |
-
profile["mean"] = stats.iloc[0]["mean_val"]
|
| 140 |
-
if _supports_median(engine):
|
| 141 |
-
profile["median"] = stats.iloc[0]["median_val"]
|
| 142 |
-
|
| 143 |
-
if 0 < distinct_ratio <= TOP_VALUES_THRESHOLD:
|
| 144 |
-
top_sql = _head_query(
|
| 145 |
-
engine,
|
| 146 |
-
select_clause=f"{qc}, COUNT(*) AS cnt",
|
| 147 |
-
from_clause=f"{qt} GROUP BY {qc}",
|
| 148 |
-
n=10,
|
| 149 |
-
order_by="ORDER BY cnt DESC",
|
| 150 |
-
)
|
| 151 |
-
top = pd.read_sql(top_sql, engine)
|
| 152 |
-
profile["top_values"] = list(zip(top[col_name].tolist(), top["cnt"].tolist()))
|
| 153 |
-
|
| 154 |
-
sample = pd.read_sql(_head_query(engine, qc, qt, 5), engine)
|
| 155 |
-
profile["sample_values"] = sample[col_name].tolist()
|
| 156 |
-
|
| 157 |
-
return profile
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
def profile_table(engine: Engine, table_name: str, columns: list[dict]) -> list[dict]:
|
| 161 |
-
"""Profile every column in a table. Returns [{col, profile, text}, ...].
|
| 162 |
-
|
| 163 |
-
Per-column errors are logged and skipped so one bad column doesn't abort
|
| 164 |
-
the whole table.
|
| 165 |
-
"""
|
| 166 |
-
row_count = get_row_count(engine, table_name)
|
| 167 |
-
if row_count == 0:
|
| 168 |
-
logger.info("skipping empty table", table=table_name)
|
| 169 |
-
return []
|
| 170 |
-
|
| 171 |
-
results = []
|
| 172 |
-
for col in columns:
|
| 173 |
-
try:
|
| 174 |
-
profile = profile_column(
|
| 175 |
-
engine, table_name, col["name"], col.get("is_numeric", False), row_count
|
| 176 |
-
)
|
| 177 |
-
text = build_text(table_name, row_count, col, profile)
|
| 178 |
-
results.append({"col": col, "profile": profile, "text": text})
|
| 179 |
-
except Exception as e:
|
| 180 |
-
logger.error(
|
| 181 |
-
"column profiling failed",
|
| 182 |
-
table=table_name,
|
| 183 |
-
column=col["name"],
|
| 184 |
-
error=str(e),
|
| 185 |
-
)
|
| 186 |
-
continue
|
| 187 |
-
return results
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
def build_text(table_name: str, row_count: int, col: dict, profile: dict) -> str:
|
| 191 |
-
col_name = col["name"]
|
| 192 |
-
col_type = col["type"]
|
| 193 |
-
|
| 194 |
-
key_label = ""
|
| 195 |
-
if col.get("is_primary_key"):
|
| 196 |
-
key_label = " [PRIMARY KEY]"
|
| 197 |
-
elif col.get("foreign_key"):
|
| 198 |
-
key_label = f" [FK -> {col['foreign_key']}]"
|
| 199 |
-
|
| 200 |
-
text = f"Table: {table_name} ({row_count} rows)\n"
|
| 201 |
-
text += f"Column: {col_name} ({col_type}){key_label}\n"
|
| 202 |
-
text += f"Null count: {profile['null_count']}\n"
|
| 203 |
-
text += f"Distinct count: {profile['distinct_count']} ({profile['distinct_ratio']:.1%})\n"
|
| 204 |
-
if "min" in profile:
|
| 205 |
-
text += f"Min: {profile['min']}, Max: {profile['max']}\n"
|
| 206 |
-
text += f"Mean: {profile['mean']}\n"
|
| 207 |
-
if profile.get("median") is not None:
|
| 208 |
-
text += f"Median: {profile['median']}\n"
|
| 209 |
-
if "top_values" in profile:
|
| 210 |
-
top_str = ", ".join(f"{v} ({c})" for v, c in profile["top_values"])
|
| 211 |
-
text += f"Top values: {top_str}\n"
|
| 212 |
-
text += f"Sample values: {profile['sample_values']}"
|
| 213 |
-
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/pipeline/document_pipeline/__init__.py
DELETED
|
File without changes
|
src/pipeline/document_pipeline/document_pipeline.py
DELETED
|
@@ -1,80 +0,0 @@
|
|
| 1 |
-
"""Document upload and processing pipeline."""
|
| 2 |
-
|
| 3 |
-
from fastapi import HTTPException, UploadFile
|
| 4 |
-
from sqlalchemy.ext.asyncio import AsyncSession
|
| 5 |
-
|
| 6 |
-
from src.document.document_service import document_service
|
| 7 |
-
from src.knowledge.processing_service import knowledge_processor
|
| 8 |
-
from src.middlewares.logging import get_logger
|
| 9 |
-
from src.storage.az_blob.az_blob import blob_storage
|
| 10 |
-
|
| 11 |
-
logger = get_logger("document_pipeline")
|
| 12 |
-
|
| 13 |
-
SUPPORTED_FILE_TYPES = ["pdf", "docx", "txt", "csv", "xlsx"]
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
class DocumentPipeline:
|
| 17 |
-
"""Orchestrates the full document upload, process, and delete flows."""
|
| 18 |
-
|
| 19 |
-
async def upload(self, file: UploadFile, user_id: str, db: AsyncSession) -> dict:
|
| 20 |
-
"""Validate → upload to blob → save to DB."""
|
| 21 |
-
content = await file.read()
|
| 22 |
-
file_type = file.filename.split(".")[-1].lower() if "." in file.filename else "txt"
|
| 23 |
-
|
| 24 |
-
if file_type not in SUPPORTED_FILE_TYPES:
|
| 25 |
-
raise HTTPException(
|
| 26 |
-
status_code=400,
|
| 27 |
-
detail=f"Unsupported file type. Supported: {SUPPORTED_FILE_TYPES}",
|
| 28 |
-
)
|
| 29 |
-
|
| 30 |
-
blob_name = await blob_storage.upload_file(content, file.filename, user_id)
|
| 31 |
-
document = await document_service.create_document(
|
| 32 |
-
db=db,
|
| 33 |
-
user_id=user_id,
|
| 34 |
-
filename=file.filename,
|
| 35 |
-
blob_name=blob_name,
|
| 36 |
-
file_size=len(content),
|
| 37 |
-
file_type=file_type,
|
| 38 |
-
)
|
| 39 |
-
|
| 40 |
-
logger.info(f"Uploaded document {document.id} for user {user_id}")
|
| 41 |
-
return {"id": document.id, "filename": document.filename, "status": document.status}
|
| 42 |
-
|
| 43 |
-
async def process(self, document_id: str, user_id: str, db: AsyncSession) -> dict:
|
| 44 |
-
"""Validate ownership → extract text → chunk → ingest to vector store."""
|
| 45 |
-
document = await document_service.get_document(db, document_id)
|
| 46 |
-
|
| 47 |
-
if not document:
|
| 48 |
-
raise HTTPException(status_code=404, detail="Document not found")
|
| 49 |
-
if document.user_id != user_id:
|
| 50 |
-
raise HTTPException(status_code=403, detail="Access denied")
|
| 51 |
-
|
| 52 |
-
try:
|
| 53 |
-
await document_service.update_document_status(db, document_id, "processing")
|
| 54 |
-
chunks_count = await knowledge_processor.process_document(document, db)
|
| 55 |
-
await document_service.update_document_status(db, document_id, "completed")
|
| 56 |
-
|
| 57 |
-
logger.info(f"Processed document {document_id}: {chunks_count} chunks")
|
| 58 |
-
return {"document_id": document_id, "chunks_processed": chunks_count}
|
| 59 |
-
|
| 60 |
-
except Exception as e:
|
| 61 |
-
logger.error(f"Processing failed for document {document_id}", error=str(e))
|
| 62 |
-
await document_service.update_document_status(db, document_id, "failed", str(e))
|
| 63 |
-
raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}")
|
| 64 |
-
|
| 65 |
-
async def delete(self, document_id: str, user_id: str, db: AsyncSession) -> dict:
|
| 66 |
-
"""Validate ownership → delete from blob and DB."""
|
| 67 |
-
document = await document_service.get_document(db, document_id)
|
| 68 |
-
|
| 69 |
-
if not document:
|
| 70 |
-
raise HTTPException(status_code=404, detail="Document not found")
|
| 71 |
-
if document.user_id != user_id:
|
| 72 |
-
raise HTTPException(status_code=403, detail="Access denied")
|
| 73 |
-
|
| 74 |
-
await document_service.delete_document(db, document_id)
|
| 75 |
-
|
| 76 |
-
logger.info(f"Deleted document {document_id} for user {user_id}")
|
| 77 |
-
return {"document_id": document_id}
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
document_pipeline = DocumentPipeline()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/utils/db_credential_encryption.py
DELETED
|
@@ -1,70 +0,0 @@
|
|
| 1 |
-
"""Fernet encryption utilities for user-registered database credentials.
|
| 2 |
-
|
| 3 |
-
Encryption key is sourced from `dataeyond__db__credential__key` env variable,
|
| 4 |
-
intentionally separate from the user-auth bcrypt salt (`emarcal__bcrypt__salt`).
|
| 5 |
-
|
| 6 |
-
Usage:
|
| 7 |
-
from src.utils.db_credential_encryption import encrypt_credentials_dict, decrypt_credentials_dict
|
| 8 |
-
|
| 9 |
-
# Before INSERT:
|
| 10 |
-
safe_creds = encrypt_credentials_dict(raw_credentials)
|
| 11 |
-
|
| 12 |
-
# After SELECT:
|
| 13 |
-
plain_creds = decrypt_credentials_dict(row.credentials)
|
| 14 |
-
"""
|
| 15 |
-
|
| 16 |
-
from cryptography.fernet import Fernet
|
| 17 |
-
from src.config.settings import settings
|
| 18 |
-
|
| 19 |
-
# Sensitive credential field names that must be encrypted at rest.
|
| 20 |
-
# Covers all supported DB types:
|
| 21 |
-
# - password : postgres, mysql, sqlserver, supabase, snowflake
|
| 22 |
-
# - service_account_json : bigquery
|
| 23 |
-
SENSITIVE_FIELDS: frozenset[str] = frozenset({"password", "service_account_json"})
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
def _get_cipher() -> Fernet:
|
| 27 |
-
key = settings.dataeyond_db_credential_key
|
| 28 |
-
if not key:
|
| 29 |
-
raise ValueError(
|
| 30 |
-
"dataeyond__db__credential__key is not set. "
|
| 31 |
-
"Generate one with: Fernet.generate_key().decode()"
|
| 32 |
-
)
|
| 33 |
-
return Fernet(key.encode())
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
def encrypt_credential(value: str) -> str:
|
| 37 |
-
"""Encrypt a single credential string value."""
|
| 38 |
-
return _get_cipher().encrypt(value.encode()).decode()
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
def decrypt_credential(value: str) -> str:
|
| 42 |
-
"""Decrypt a single Fernet-encrypted credential string."""
|
| 43 |
-
return _get_cipher().decrypt(value.encode()).decode()
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
def encrypt_credentials_dict(creds: dict) -> dict:
|
| 47 |
-
"""Return a copy of the credentials dict with sensitive fields encrypted.
|
| 48 |
-
|
| 49 |
-
Call this before inserting a new DatabaseClient record.
|
| 50 |
-
"""
|
| 51 |
-
cipher = _get_cipher()
|
| 52 |
-
result = dict(creds)
|
| 53 |
-
for field in SENSITIVE_FIELDS:
|
| 54 |
-
if result.get(field):
|
| 55 |
-
result[field] = cipher.encrypt(result[field].encode()).decode()
|
| 56 |
-
return result
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
def decrypt_credentials_dict(creds: dict) -> dict:
|
| 60 |
-
"""Return a copy of the credentials dict with sensitive fields decrypted.
|
| 61 |
-
|
| 62 |
-
Call this after fetching a DatabaseClient record from DB.
|
| 63 |
-
"""
|
| 64 |
-
cipher = _get_cipher()
|
| 65 |
-
result = dict(creds)
|
| 66 |
-
for field in SENSITIVE_FIELDS:
|
| 67 |
-
if result.get(field):
|
| 68 |
-
result[field] = cipher.decrypt(result[field].encode()).decode()
|
| 69 |
-
return result
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uv.lock
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
version = 1
|
| 2 |
-
revision =
|
| 3 |
requires-python = "==3.12.*"
|
| 4 |
resolution-markers = [
|
| 5 |
"python_full_version >= '3.12.4'",
|
|
@@ -49,8 +49,6 @@ dependencies = [
|
|
| 49 |
{ name = "pydantic" },
|
| 50 |
{ name = "pydantic-settings" },
|
| 51 |
{ name = "pymongo" },
|
| 52 |
-
{ name = "pymssql" },
|
| 53 |
-
{ name = "pymysql" },
|
| 54 |
{ name = "pypdf" },
|
| 55 |
{ name = "python-docx" },
|
| 56 |
{ name = "python-dotenv" },
|
|
@@ -59,10 +57,8 @@ dependencies = [
|
|
| 59 |
{ name = "redis" },
|
| 60 |
{ name = "sentence-transformers" },
|
| 61 |
{ name = "slowapi" },
|
| 62 |
-
{ name = "snowflake-sqlalchemy" },
|
| 63 |
{ name = "spacy" },
|
| 64 |
{ name = "sqlalchemy", extra = ["asyncio"] },
|
| 65 |
-
{ name = "sqlalchemy-bigquery" },
|
| 66 |
{ name = "sse-starlette" },
|
| 67 |
{ name = "starlette" },
|
| 68 |
{ name = "structlog" },
|
|
@@ -135,8 +131,6 @@ requires-dist = [
|
|
| 135 |
{ name = "pydantic", specifier = "==2.10.3" },
|
| 136 |
{ name = "pydantic-settings", specifier = "==2.7.0" },
|
| 137 |
{ name = "pymongo", specifier = ">=4.14.0" },
|
| 138 |
-
{ name = "pymssql", specifier = ">=2.3.0" },
|
| 139 |
-
{ name = "pymysql", specifier = ">=1.1.1" },
|
| 140 |
{ name = "pypdf", specifier = "==5.1.0" },
|
| 141 |
{ name = "pytest", marker = "extra == 'dev'", specifier = "==8.3.4" },
|
| 142 |
{ name = "pytest-asyncio", marker = "extra == 'dev'", specifier = "==0.24.0" },
|
|
@@ -149,10 +143,8 @@ requires-dist = [
|
|
| 149 |
{ name = "ruff", marker = "extra == 'dev'", specifier = "==0.8.4" },
|
| 150 |
{ name = "sentence-transformers", specifier = "==3.3.1" },
|
| 151 |
{ name = "slowapi", specifier = "==0.1.9" },
|
| 152 |
-
{ name = "snowflake-sqlalchemy", specifier = ">=1.7.0" },
|
| 153 |
{ name = "spacy", specifier = "==3.8.3" },
|
| 154 |
{ name = "sqlalchemy", extras = ["asyncio"], specifier = "==2.0.36" },
|
| 155 |
-
{ name = "sqlalchemy-bigquery", specifier = ">=1.11.0" },
|
| 156 |
{ name = "sse-starlette", specifier = "==2.1.3" },
|
| 157 |
{ name = "starlette", specifier = "==0.41.3" },
|
| 158 |
{ name = "structlog", specifier = "==24.4.0" },
|
|
@@ -288,15 +280,6 @@ wheels = [
|
|
| 288 |
{ url = "https://files.pythonhosted.org/packages/13/b5/7af0cb920a476dccd612fbc9a21a3745fb29b1fcd74636078db8f7ba294c/APScheduler-3.10.4-py3-none-any.whl", hash = "sha256:fb91e8a768632a4756a585f79ec834e0e27aad5860bac7eaa523d9ccefd87661", size = 59303, upload-time = "2023-08-19T16:44:56.814Z" },
|
| 289 |
]
|
| 290 |
|
| 291 |
-
[[package]]
|
| 292 |
-
name = "asn1crypto"
|
| 293 |
-
version = "1.5.1"
|
| 294 |
-
source = { registry = "https://pypi.org/simple" }
|
| 295 |
-
sdist = { url = "https://files.pythonhosted.org/packages/de/cf/d547feed25b5244fcb9392e288ff9fdc3280b10260362fc45d37a798a6ee/asn1crypto-1.5.1.tar.gz", hash = "sha256:13ae38502be632115abf8a24cbe5f4da52e3b5231990aff31123c805306ccb9c", size = 121080, upload-time = "2022-03-15T14:46:52.889Z" }
|
| 296 |
-
wheels = [
|
| 297 |
-
{ url = "https://files.pythonhosted.org/packages/c9/7f/09065fd9e27da0eda08b4d6897f1c13535066174cc023af248fc2a8d5e5a/asn1crypto-1.5.1-py2.py3-none-any.whl", hash = "sha256:db4e40728b728508912cbb3d44f19ce188f218e9eba635821bb4b68564f8fd67", size = 105045, upload-time = "2022-03-15T14:46:51.055Z" },
|
| 298 |
-
]
|
| 299 |
-
|
| 300 |
[[package]]
|
| 301 |
name = "asyncpg"
|
| 302 |
version = "0.30.0"
|
|
@@ -445,34 +428,6 @@ wheels = [
|
|
| 445 |
{ url = "https://files.pythonhosted.org/packages/20/07/fb43edc2ff0a6a367e4a94fc39eb3b85aa1e55e24cc857af2db145ce9f0d/blis-1.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:f20f7ad69aaffd1ce14fe77de557b6df9b61e0c9e582f75a843715d836b5c8af", size = 6192759, upload-time = "2025-11-17T12:27:56.176Z" },
|
| 446 |
]
|
| 447 |
|
| 448 |
-
[[package]]
|
| 449 |
-
name = "boto3"
|
| 450 |
-
version = "1.42.89"
|
| 451 |
-
source = { registry = "https://pypi.org/simple" }
|
| 452 |
-
dependencies = [
|
| 453 |
-
{ name = "botocore" },
|
| 454 |
-
{ name = "jmespath" },
|
| 455 |
-
{ name = "s3transfer" },
|
| 456 |
-
]
|
| 457 |
-
sdist = { url = "https://files.pythonhosted.org/packages/bb/0c/f7bccb22b245cabf392816baba20f9e95f78ace7dbc580fd40136e80e732/boto3-1.42.89.tar.gz", hash = "sha256:3e43aacc0801bba9bcd23a8c271c089af297a69565f783fcdd357ae0e330bf1e", size = 113165, upload-time = "2026-04-13T19:36:17.516Z" }
|
| 458 |
-
wheels = [
|
| 459 |
-
{ url = "https://files.pythonhosted.org/packages/b9/33/55103ba5ef9975ea54b8d39e69b76eb6e9fded3beae5f01065e26951a3a1/boto3-1.42.89-py3-none-any.whl", hash = "sha256:6204b189f4d0c655535f43d7eaa57ff4e8d965b8463c97e45952291211162932", size = 140556, upload-time = "2026-04-13T19:36:13.894Z" },
|
| 460 |
-
]
|
| 461 |
-
|
| 462 |
-
[[package]]
|
| 463 |
-
name = "botocore"
|
| 464 |
-
version = "1.42.89"
|
| 465 |
-
source = { registry = "https://pypi.org/simple" }
|
| 466 |
-
dependencies = [
|
| 467 |
-
{ name = "jmespath" },
|
| 468 |
-
{ name = "python-dateutil" },
|
| 469 |
-
{ name = "urllib3" },
|
| 470 |
-
]
|
| 471 |
-
sdist = { url = "https://files.pythonhosted.org/packages/0f/cc/e6be943efa9051bd15c2ee14077c2b10d6e27c9e9385fc43a03a5c4ed8b5/botocore-1.42.89.tar.gz", hash = "sha256:95ac52f472dad29942f3088b278ab493044516c16dbf9133c975af16527baa99", size = 15206290, upload-time = "2026-04-13T19:36:02.321Z" }
|
| 472 |
-
wheels = [
|
| 473 |
-
{ url = "https://files.pythonhosted.org/packages/91/f1/90a7b8eda38b7c3a65ca7ee0075bdf310b6b471cb1b95fab6e8994323a50/botocore-1.42.89-py3-none-any.whl", hash = "sha256:d9b786c8d9db6473063b4cc5be0ba7e6a381082307bd6afb69d4216f9fa95f35", size = 14887287, upload-time = "2026-04-13T19:35:56.677Z" },
|
| 474 |
-
]
|
| 475 |
-
|
| 476 |
[[package]]
|
| 477 |
name = "cachetools"
|
| 478 |
version = "5.5.0"
|
|
@@ -986,109 +941,6 @@ wheels = [
|
|
| 986 |
{ url = "https://files.pythonhosted.org/packages/d5/1f/5f4a3cd9e4440e9d9bc78ad0a91a1c8d46b4d429d5239ebe6793c9fe5c41/fsspec-2026.3.0-py3-none-any.whl", hash = "sha256:d2ceafaad1b3457968ed14efa28798162f1638dbb5d2a6868a2db002a5ee39a4", size = 202595, upload-time = "2026-03-27T19:11:13.595Z" },
|
| 987 |
]
|
| 988 |
|
| 989 |
-
[[package]]
|
| 990 |
-
name = "google-api-core"
|
| 991 |
-
version = "2.30.3"
|
| 992 |
-
source = { registry = "https://pypi.org/simple" }
|
| 993 |
-
dependencies = [
|
| 994 |
-
{ name = "google-auth" },
|
| 995 |
-
{ name = "googleapis-common-protos" },
|
| 996 |
-
{ name = "proto-plus" },
|
| 997 |
-
{ name = "protobuf" },
|
| 998 |
-
{ name = "requests" },
|
| 999 |
-
]
|
| 1000 |
-
sdist = { url = "https://files.pythonhosted.org/packages/16/ce/502a57fb0ec752026d24df1280b162294b22a0afb98a326084f9a979138b/google_api_core-2.30.3.tar.gz", hash = "sha256:e601a37f148585319b26db36e219df68c5d07b6382cff2d580e83404e44d641b", size = 177001, upload-time = "2026-04-10T00:41:28.035Z" }
|
| 1001 |
-
wheels = [
|
| 1002 |
-
{ url = "https://files.pythonhosted.org/packages/03/15/e56f351cf6ef1cfea58e6ac226a7318ed1deb2218c4b3cc9bd9e4b786c5a/google_api_core-2.30.3-py3-none-any.whl", hash = "sha256:a85761ba72c444dad5d611c2220633480b2b6be2521eca69cca2dbb3ffd6bfe8", size = 173274, upload-time = "2026-04-09T22:57:16.198Z" },
|
| 1003 |
-
]
|
| 1004 |
-
|
| 1005 |
-
[package.optional-dependencies]
|
| 1006 |
-
grpc = [
|
| 1007 |
-
{ name = "grpcio" },
|
| 1008 |
-
{ name = "grpcio-status" },
|
| 1009 |
-
]
|
| 1010 |
-
|
| 1011 |
-
[[package]]
|
| 1012 |
-
name = "google-auth"
|
| 1013 |
-
version = "2.49.2"
|
| 1014 |
-
source = { registry = "https://pypi.org/simple" }
|
| 1015 |
-
dependencies = [
|
| 1016 |
-
{ name = "cryptography" },
|
| 1017 |
-
{ name = "pyasn1-modules" },
|
| 1018 |
-
]
|
| 1019 |
-
sdist = { url = "https://files.pythonhosted.org/packages/c6/fc/e925290a1ad95c975c459e2df070fac2b90954e13a0370ac505dff78cb99/google_auth-2.49.2.tar.gz", hash = "sha256:c1ae38500e73065dcae57355adb6278cf8b5c8e391994ae9cbadbcb9631ab409", size = 333958, upload-time = "2026-04-10T00:41:21.888Z" }
|
| 1020 |
-
wheels = [
|
| 1021 |
-
{ url = "https://files.pythonhosted.org/packages/73/76/d241a5c927433420507215df6cac1b1fa4ac0ba7a794df42a84326c68da8/google_auth-2.49.2-py3-none-any.whl", hash = "sha256:c2720924dfc82dedb962c9f52cabb2ab16714fd0a6a707e40561d217574ed6d5", size = 240638, upload-time = "2026-04-10T00:41:14.501Z" },
|
| 1022 |
-
]
|
| 1023 |
-
|
| 1024 |
-
[[package]]
|
| 1025 |
-
name = "google-cloud-bigquery"
|
| 1026 |
-
version = "3.41.0"
|
| 1027 |
-
source = { registry = "https://pypi.org/simple" }
|
| 1028 |
-
dependencies = [
|
| 1029 |
-
{ name = "google-api-core", extra = ["grpc"] },
|
| 1030 |
-
{ name = "google-auth" },
|
| 1031 |
-
{ name = "google-cloud-core" },
|
| 1032 |
-
{ name = "google-resumable-media" },
|
| 1033 |
-
{ name = "packaging" },
|
| 1034 |
-
{ name = "python-dateutil" },
|
| 1035 |
-
{ name = "requests" },
|
| 1036 |
-
]
|
| 1037 |
-
sdist = { url = "https://files.pythonhosted.org/packages/ce/13/6515c7aab55a4a0cf708ffd309fb9af5bab54c13e32dc22c5acd6497193c/google_cloud_bigquery-3.41.0.tar.gz", hash = "sha256:2217e488b47ed576360c9b2cc07d59d883a54b83167c0ef37f915c26b01a06fe", size = 513434, upload-time = "2026-03-30T22:50:55.347Z" }
|
| 1038 |
-
wheels = [
|
| 1039 |
-
{ url = "https://files.pythonhosted.org/packages/40/33/1d3902efadef9194566d499d61507e1f038454e0b55499d2d7f8ab2a4fee/google_cloud_bigquery-3.41.0-py3-none-any.whl", hash = "sha256:2a5b5a737b401cbd824a6e5eac7554100b878668d908e6548836b5d8aaa4dcaa", size = 262343, upload-time = "2026-03-30T22:48:45.444Z" },
|
| 1040 |
-
]
|
| 1041 |
-
|
| 1042 |
-
[[package]]
|
| 1043 |
-
name = "google-cloud-core"
|
| 1044 |
-
version = "2.5.1"
|
| 1045 |
-
source = { registry = "https://pypi.org/simple" }
|
| 1046 |
-
dependencies = [
|
| 1047 |
-
{ name = "google-api-core" },
|
| 1048 |
-
{ name = "google-auth" },
|
| 1049 |
-
]
|
| 1050 |
-
sdist = { url = "https://files.pythonhosted.org/packages/dc/24/6ca08b0a03c7b0c620427503ab00353a4ae806b848b93bcea18b6b76fde6/google_cloud_core-2.5.1.tar.gz", hash = "sha256:3dc94bdec9d05a31d9f355045ed0f369fbc0d8c665076c734f065d729800f811", size = 36078, upload-time = "2026-03-30T22:50:08.057Z" }
|
| 1051 |
-
wheels = [
|
| 1052 |
-
{ url = "https://files.pythonhosted.org/packages/73/d9/5bb050cb32826466aa9b25f79e2ca2879fe66cb76782d4ed798dd7506151/google_cloud_core-2.5.1-py3-none-any.whl", hash = "sha256:ea62cdf502c20e3e14be8a32c05ed02113d7bef454e40ff3fab6fe1ec9f1f4e7", size = 29452, upload-time = "2026-03-30T22:48:31.567Z" },
|
| 1053 |
-
]
|
| 1054 |
-
|
| 1055 |
-
[[package]]
|
| 1056 |
-
name = "google-crc32c"
|
| 1057 |
-
version = "1.8.0"
|
| 1058 |
-
source = { registry = "https://pypi.org/simple" }
|
| 1059 |
-
sdist = { url = "https://files.pythonhosted.org/packages/03/41/4b9c02f99e4c5fb477122cd5437403b552873f014616ac1d19ac8221a58d/google_crc32c-1.8.0.tar.gz", hash = "sha256:a428e25fb7691024de47fecfbff7ff957214da51eddded0da0ae0e0f03a2cf79", size = 14192, upload-time = "2025-12-16T00:35:25.142Z" }
|
| 1060 |
-
wheels = [
|
| 1061 |
-
{ url = "https://files.pythonhosted.org/packages/e9/5f/7307325b1198b59324c0fa9807cafb551afb65e831699f2ce211ad5c8240/google_crc32c-1.8.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:4b8286b659c1335172e39563ab0a768b8015e88e08329fa5321f774275fc3113", size = 31300, upload-time = "2025-12-16T00:21:56.723Z" },
|
| 1062 |
-
{ url = "https://files.pythonhosted.org/packages/21/8e/58c0d5d86e2220e6a37befe7e6a94dd2f6006044b1a33edf1ff6d9f7e319/google_crc32c-1.8.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:2a3dc3318507de089c5384cc74d54318401410f82aa65b2d9cdde9d297aca7cb", size = 30867, upload-time = "2025-12-16T00:38:31.302Z" },
|
| 1063 |
-
{ url = "https://files.pythonhosted.org/packages/ce/a9/a780cc66f86335a6019f557a8aaca8fbb970728f0efd2430d15ff1beae0e/google_crc32c-1.8.0-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:14f87e04d613dfa218d6135e81b78272c3b904e2a7053b841481b38a7d901411", size = 33364, upload-time = "2025-12-16T00:40:22.96Z" },
|
| 1064 |
-
{ url = "https://files.pythonhosted.org/packages/21/3f/3457ea803db0198c9aaca2dd373750972ce28a26f00544b6b85088811939/google_crc32c-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cb5c869c2923d56cb0c8e6bcdd73c009c36ae39b652dbe46a05eb4ef0ad01454", size = 33740, upload-time = "2025-12-16T00:40:23.96Z" },
|
| 1065 |
-
{ url = "https://files.pythonhosted.org/packages/df/c0/87c2073e0c72515bb8733d4eef7b21548e8d189f094b5dad20b0ecaf64f6/google_crc32c-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:3cc0c8912038065eafa603b238abf252e204accab2a704c63b9e14837a854962", size = 34437, upload-time = "2025-12-16T00:35:21.395Z" },
|
| 1066 |
-
]
|
| 1067 |
-
|
| 1068 |
-
[[package]]
|
| 1069 |
-
name = "google-resumable-media"
|
| 1070 |
-
version = "2.8.2"
|
| 1071 |
-
source = { registry = "https://pypi.org/simple" }
|
| 1072 |
-
dependencies = [
|
| 1073 |
-
{ name = "google-crc32c" },
|
| 1074 |
-
]
|
| 1075 |
-
sdist = { url = "https://files.pythonhosted.org/packages/3f/d1/b1ea14b93b6b78f57fc580125de44e9f593ab88dd2460f1a8a8d18f74754/google_resumable_media-2.8.2.tar.gz", hash = "sha256:f3354a182ebd193ae3f42e3ef95e6c9b10f128320de23ac7637236713b1acd70", size = 2164510, upload-time = "2026-03-30T23:34:25.369Z" }
|
| 1076 |
-
wheels = [
|
| 1077 |
-
{ url = "https://files.pythonhosted.org/packages/5e/f8/50bfaf4658431ff9de45c5c3935af7ab01157a4903c603cd0eee6e78e087/google_resumable_media-2.8.2-py3-none-any.whl", hash = "sha256:82b6d8ccd11765268cdd2a2123f417ec806b8eef3000a9a38dfe3033da5fb220", size = 81511, upload-time = "2026-03-30T23:34:09.671Z" },
|
| 1078 |
-
]
|
| 1079 |
-
|
| 1080 |
-
[[package]]
|
| 1081 |
-
name = "googleapis-common-protos"
|
| 1082 |
-
version = "1.74.0"
|
| 1083 |
-
source = { registry = "https://pypi.org/simple" }
|
| 1084 |
-
dependencies = [
|
| 1085 |
-
{ name = "protobuf" },
|
| 1086 |
-
]
|
| 1087 |
-
sdist = { url = "https://files.pythonhosted.org/packages/20/18/a746c8344152d368a5aac738d4c857012f2c5d1fd2eac7e17b647a7861bd/googleapis_common_protos-1.74.0.tar.gz", hash = "sha256:57971e4eeeba6aad1163c1f0fc88543f965bb49129b8bb55b2b7b26ecab084f1", size = 151254, upload-time = "2026-04-02T21:23:26.679Z" }
|
| 1088 |
-
wheels = [
|
| 1089 |
-
{ url = "https://files.pythonhosted.org/packages/b6/b0/be5d3329badb9230b765de6eea66b73abd5944bdeb5afb3562ddcd80ae84/googleapis_common_protos-1.74.0-py3-none-any.whl", hash = "sha256:702216f78610bb510e3f12ac3cafd281b7ac45cc5d86e90ad87e4d301a3426b5", size = 300743, upload-time = "2026-04-02T21:22:49.108Z" },
|
| 1090 |
-
]
|
| 1091 |
-
|
| 1092 |
[[package]]
|
| 1093 |
name = "greenlet"
|
| 1094 |
version = "3.3.2"
|
|
@@ -1106,41 +958,6 @@ wheels = [
|
|
| 1106 |
{ url = "https://files.pythonhosted.org/packages/58/2e/fe7f36ff1982d6b10a60d5e0740c759259a7d6d2e1dc41da6d96de32fff6/greenlet-3.3.2-cp312-cp312-win_arm64.whl", hash = "sha256:d3a62fa76a32b462a97198e4c9e99afb9ab375115e74e9a83ce180e7a496f643", size = 230331, upload-time = "2026-02-20T20:17:23.34Z" },
|
| 1107 |
]
|
| 1108 |
|
| 1109 |
-
[[package]]
|
| 1110 |
-
name = "grpcio"
|
| 1111 |
-
version = "1.80.0"
|
| 1112 |
-
source = { registry = "https://pypi.org/simple" }
|
| 1113 |
-
dependencies = [
|
| 1114 |
-
{ name = "typing-extensions" },
|
| 1115 |
-
]
|
| 1116 |
-
sdist = { url = "https://files.pythonhosted.org/packages/b7/48/af6173dbca4454f4637a4678b67f52ca7e0c1ed7d5894d89d434fecede05/grpcio-1.80.0.tar.gz", hash = "sha256:29aca15edd0688c22ba01d7cc01cb000d72b2033f4a3c72a81a19b56fd143257", size = 12978905, upload-time = "2026-03-30T08:49:10.502Z" }
|
| 1117 |
-
wheels = [
|
| 1118 |
-
{ url = "https://files.pythonhosted.org/packages/5c/e8/a2b749265eb3415abc94f2e619bbd9e9707bebdda787e61c593004ec927a/grpcio-1.80.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:c624cc9f1008361014378c9d776de7182b11fe8b2e5a81bc69f23a295f2a1ad0", size = 6015616, upload-time = "2026-03-30T08:47:13.428Z" },
|
| 1119 |
-
{ url = "https://files.pythonhosted.org/packages/3e/97/b1282161a15d699d1e90c360df18d19165a045ce1c343c7f313f5e8a0b77/grpcio-1.80.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:f49eddcac43c3bf350c0385366a58f36bed8cc2c0ec35ef7b74b49e56552c0c2", size = 12014204, upload-time = "2026-03-30T08:47:15.873Z" },
|
| 1120 |
-
{ url = "https://files.pythonhosted.org/packages/6e/5e/d319c6e997b50c155ac5a8cb12f5173d5b42677510e886d250d50264949d/grpcio-1.80.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d334591df610ab94714048e0d5b4f3dd5ad1bee74dfec11eee344220077a79de", size = 6563866, upload-time = "2026-03-30T08:47:18.588Z" },
|
| 1121 |
-
{ url = "https://files.pythonhosted.org/packages/ae/f6/fdd975a2cb4d78eb67769a7b3b3830970bfa2e919f1decf724ae4445f42c/grpcio-1.80.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:0cb517eb1d0d0aaf1d87af7cc5b801d686557c1d88b2619f5e31fab3c2315921", size = 7273060, upload-time = "2026-03-30T08:47:21.113Z" },
|
| 1122 |
-
{ url = "https://files.pythonhosted.org/packages/db/f0/a3deb5feba60d9538a962913e37bd2e69a195f1c3376a3dd44fe0427e996/grpcio-1.80.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4e78c4ac0d97dc2e569b2f4bcbbb447491167cb358d1a389fc4af71ab6f70411", size = 6782121, upload-time = "2026-03-30T08:47:23.827Z" },
|
| 1123 |
-
{ url = "https://files.pythonhosted.org/packages/ca/84/36c6dcfddc093e108141f757c407902a05085e0c328007cb090d56646cdf/grpcio-1.80.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2ed770b4c06984f3b47eb0517b1c69ad0b84ef3f40128f51448433be904634cd", size = 7383811, upload-time = "2026-03-30T08:47:26.517Z" },
|
| 1124 |
-
{ url = "https://files.pythonhosted.org/packages/7c/ef/f3a77e3dc5b471a0ec86c564c98d6adfa3510d38f8ee99010410858d591e/grpcio-1.80.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:256507e2f524092f1473071a05e65a5b10d84b82e3ff24c5b571513cfaa61e2f", size = 8393860, upload-time = "2026-03-30T08:47:29.439Z" },
|
| 1125 |
-
{ url = "https://files.pythonhosted.org/packages/9b/8d/9d4d27ed7f33d109c50d6b5ce578a9914aa68edab75d65869a17e630a8d1/grpcio-1.80.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9a6284a5d907c37db53350645567c522be314bac859a64a7a5ca63b77bb7958f", size = 7830132, upload-time = "2026-03-30T08:47:33.254Z" },
|
| 1126 |
-
{ url = "https://files.pythonhosted.org/packages/14/e4/9990b41c6d7a44e1e9dee8ac11d7a9802ba1378b40d77468a7761d1ad288/grpcio-1.80.0-cp312-cp312-win32.whl", hash = "sha256:c71309cfce2f22be26aa4a847357c502db6c621f1a49825ae98aa0907595b193", size = 4140904, upload-time = "2026-03-30T08:47:35.319Z" },
|
| 1127 |
-
{ url = "https://files.pythonhosted.org/packages/2f/2c/296f6138caca1f4b92a31ace4ae1b87dab692fc16a7a3417af3bb3c805bf/grpcio-1.80.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe648599c0e37594c4809d81a9e77bd138cc82eb8baa71b6a86af65426723ff", size = 4880944, upload-time = "2026-03-30T08:47:37.831Z" },
|
| 1128 |
-
]
|
| 1129 |
-
|
| 1130 |
-
[[package]]
|
| 1131 |
-
name = "grpcio-status"
|
| 1132 |
-
version = "1.80.0"
|
| 1133 |
-
source = { registry = "https://pypi.org/simple" }
|
| 1134 |
-
dependencies = [
|
| 1135 |
-
{ name = "googleapis-common-protos" },
|
| 1136 |
-
{ name = "grpcio" },
|
| 1137 |
-
{ name = "protobuf" },
|
| 1138 |
-
]
|
| 1139 |
-
sdist = { url = "https://files.pythonhosted.org/packages/b1/ed/105f619bdd00cb47a49aa2feea6232ea2bbb04199d52a22cc6a7d603b5cb/grpcio_status-1.80.0.tar.gz", hash = "sha256:df73802a4c89a3ea88aa2aff971e886fccce162bc2e6511408b3d67a144381cd", size = 13901, upload-time = "2026-03-30T08:54:34.784Z" }
|
| 1140 |
-
wheels = [
|
| 1141 |
-
{ url = "https://files.pythonhosted.org/packages/76/80/58cd2dfc19a07d022abe44bde7c365627f6c7cb6f692ada6c65ca437d09a/grpcio_status-1.80.0-py3-none-any.whl", hash = "sha256:4b56990363af50dbf2c2ebb80f1967185c07d87aa25aa2bea45ddb75fc181dbe", size = 14638, upload-time = "2026-03-30T08:54:01.569Z" },
|
| 1142 |
-
]
|
| 1143 |
-
|
| 1144 |
[[package]]
|
| 1145 |
name = "h11"
|
| 1146 |
version = "0.16.0"
|
|
@@ -1310,15 +1127,6 @@ wheels = [
|
|
| 1310 |
{ url = "https://files.pythonhosted.org/packages/67/8a/a342b2f0251f3dac4ca17618265d93bf244a2a4d089126e81e4c1056ac50/jiter-0.13.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bb00b6d26db67a05fe3e12c76edc75f32077fb51deed13822dc648fa373bc19", size = 343768, upload-time = "2026-02-02T12:37:55.055Z" },
|
| 1311 |
]
|
| 1312 |
|
| 1313 |
-
[[package]]
|
| 1314 |
-
name = "jmespath"
|
| 1315 |
-
version = "1.1.0"
|
| 1316 |
-
source = { registry = "https://pypi.org/simple" }
|
| 1317 |
-
sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" }
|
| 1318 |
-
wheels = [
|
| 1319 |
-
{ url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" },
|
| 1320 |
-
]
|
| 1321 |
-
|
| 1322 |
[[package]]
|
| 1323 |
name = "joblib"
|
| 1324 |
version = "1.5.3"
|
|
@@ -2313,33 +2121,6 @@ wheels = [
|
|
| 2313 |
{ url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload-time = "2025-10-08T19:49:00.792Z" },
|
| 2314 |
]
|
| 2315 |
|
| 2316 |
-
[[package]]
|
| 2317 |
-
name = "proto-plus"
|
| 2318 |
-
version = "1.27.2"
|
| 2319 |
-
source = { registry = "https://pypi.org/simple" }
|
| 2320 |
-
dependencies = [
|
| 2321 |
-
{ name = "protobuf" },
|
| 2322 |
-
]
|
| 2323 |
-
sdist = { url = "https://files.pythonhosted.org/packages/81/0d/94dfe80193e79d55258345901acd2917523d56e8381bc4dee7fd38e3868a/proto_plus-1.27.2.tar.gz", hash = "sha256:b2adde53adadf75737c44d3dcb0104fde65250dfc83ad59168b4aa3e574b6a24", size = 57204, upload-time = "2026-03-26T22:18:57.174Z" }
|
| 2324 |
-
wheels = [
|
| 2325 |
-
{ url = "https://files.pythonhosted.org/packages/84/f3/1fba73eeffafc998a25d59703b63f8be4fe8a5cb12eaff7386a0ba0f7125/proto_plus-1.27.2-py3-none-any.whl", hash = "sha256:6432f75893d3b9e70b9c412f1d2f03f65b11fb164b793d14ae2ca01821d22718", size = 50450, upload-time = "2026-03-26T22:13:42.927Z" },
|
| 2326 |
-
]
|
| 2327 |
-
|
| 2328 |
-
[[package]]
|
| 2329 |
-
name = "protobuf"
|
| 2330 |
-
version = "6.33.6"
|
| 2331 |
-
source = { registry = "https://pypi.org/simple" }
|
| 2332 |
-
sdist = { url = "https://files.pythonhosted.org/packages/66/70/e908e9c5e52ef7c3a6c7902c9dfbb34c7e29c25d2f81ade3856445fd5c94/protobuf-6.33.6.tar.gz", hash = "sha256:a6768d25248312c297558af96a9f9c929e8c4cee0659cb07e780731095f38135", size = 444531, upload-time = "2026-03-18T19:05:00.988Z" }
|
| 2333 |
-
wheels = [
|
| 2334 |
-
{ url = "https://files.pythonhosted.org/packages/fc/9f/2f509339e89cfa6f6a4c4ff50438db9ca488dec341f7e454adad60150b00/protobuf-6.33.6-cp310-abi3-win32.whl", hash = "sha256:7d29d9b65f8afef196f8334e80d6bc1d5d4adedb449971fefd3723824e6e77d3", size = 425739, upload-time = "2026-03-18T19:04:48.373Z" },
|
| 2335 |
-
{ url = "https://files.pythonhosted.org/packages/76/5d/683efcd4798e0030c1bab27374fd13a89f7c2515fb1f3123efdfaa5eab57/protobuf-6.33.6-cp310-abi3-win_amd64.whl", hash = "sha256:0cd27b587afca21b7cfa59a74dcbd48a50f0a6400cfb59391340ad729d91d326", size = 437089, upload-time = "2026-03-18T19:04:50.381Z" },
|
| 2336 |
-
{ url = "https://files.pythonhosted.org/packages/5c/01/a3c3ed5cd186f39e7880f8303cc51385a198a81469d53d0fdecf1f64d929/protobuf-6.33.6-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:9720e6961b251bde64edfdab7d500725a2af5280f3f4c87e57c0208376aa8c3a", size = 427737, upload-time = "2026-03-18T19:04:51.866Z" },
|
| 2337 |
-
{ url = "https://files.pythonhosted.org/packages/ee/90/b3c01fdec7d2f627b3a6884243ba328c1217ed2d978def5c12dc50d328a3/protobuf-6.33.6-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:e2afbae9b8e1825e3529f88d514754e094278bb95eadc0e199751cdd9a2e82a2", size = 324610, upload-time = "2026-03-18T19:04:53.096Z" },
|
| 2338 |
-
{ url = "https://files.pythonhosted.org/packages/9b/ca/25afc144934014700c52e05103c2421997482d561f3101ff352e1292fb81/protobuf-6.33.6-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:c96c37eec15086b79762ed265d59ab204dabc53056e3443e702d2681f4b39ce3", size = 339381, upload-time = "2026-03-18T19:04:54.616Z" },
|
| 2339 |
-
{ url = "https://files.pythonhosted.org/packages/16/92/d1e32e3e0d894fe00b15ce28ad4944ab692713f2e7f0a99787405e43533a/protobuf-6.33.6-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:e9db7e292e0ab79dd108d7f1a94fe31601ce1ee3f7b79e0692043423020b0593", size = 323436, upload-time = "2026-03-18T19:04:55.768Z" },
|
| 2340 |
-
{ url = "https://files.pythonhosted.org/packages/c4/72/02445137af02769918a93807b2b7890047c32bfb9f90371cbc12688819eb/protobuf-6.33.6-py3-none-any.whl", hash = "sha256:77179e006c476e69bf8e8ce866640091ec42e1beb80b213c3900006ecfba6901", size = 170656, upload-time = "2026-03-18T19:04:59.826Z" },
|
| 2341 |
-
]
|
| 2342 |
-
|
| 2343 |
[[package]]
|
| 2344 |
name = "psycopg"
|
| 2345 |
version = "3.2.3"
|
|
@@ -2400,27 +2181,6 @@ wheels = [
|
|
| 2400 |
{ url = "https://files.pythonhosted.org/packages/b5/bf/635fbe5dd10ed200afbbfbe98f8602829252ca1cce81cc48fb25ed8dadc0/psycopg2-2.9.11-cp312-cp312-win_amd64.whl", hash = "sha256:e03e4a6dbe87ff81540b434f2e5dc2bddad10296db5eea7bdc995bf5f4162938", size = 2713969, upload-time = "2025-10-10T11:10:15.946Z" },
|
| 2401 |
]
|
| 2402 |
|
| 2403 |
-
[[package]]
|
| 2404 |
-
name = "pyasn1"
|
| 2405 |
-
version = "0.6.3"
|
| 2406 |
-
source = { registry = "https://pypi.org/simple" }
|
| 2407 |
-
sdist = { url = "https://files.pythonhosted.org/packages/5c/5f/6583902b6f79b399c9c40674ac384fd9cd77805f9e6205075f828ef11fb2/pyasn1-0.6.3.tar.gz", hash = "sha256:697a8ecd6d98891189184ca1fa05d1bb00e2f84b5977c481452050549c8a72cf", size = 148685, upload-time = "2026-03-17T01:06:53.382Z" }
|
| 2408 |
-
wheels = [
|
| 2409 |
-
{ url = "https://files.pythonhosted.org/packages/5d/a0/7d793dce3fa811fe047d6ae2431c672364b462850c6235ae306c0efd025f/pyasn1-0.6.3-py3-none-any.whl", hash = "sha256:a80184d120f0864a52a073acc6fc642847d0be408e7c7252f31390c0f4eadcde", size = 83997, upload-time = "2026-03-17T01:06:52.036Z" },
|
| 2410 |
-
]
|
| 2411 |
-
|
| 2412 |
-
[[package]]
|
| 2413 |
-
name = "pyasn1-modules"
|
| 2414 |
-
version = "0.4.2"
|
| 2415 |
-
source = { registry = "https://pypi.org/simple" }
|
| 2416 |
-
dependencies = [
|
| 2417 |
-
{ name = "pyasn1" },
|
| 2418 |
-
]
|
| 2419 |
-
sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload-time = "2025-03-28T02:41:22.17Z" }
|
| 2420 |
-
wheels = [
|
| 2421 |
-
{ url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" },
|
| 2422 |
-
]
|
| 2423 |
-
|
| 2424 |
[[package]]
|
| 2425 |
name = "pycparser"
|
| 2426 |
version = "3.0"
|
|
@@ -2550,43 +2310,6 @@ wheels = [
|
|
| 2550 |
{ url = "https://files.pythonhosted.org/packages/60/4c/33f75713d50d5247f2258405142c0318ff32c6f8976171c4fcae87a9dbdf/pymongo-4.16.0-cp312-cp312-win_arm64.whl", hash = "sha256:dfc320f08ea9a7ec5b2403dc4e8150636f0d6150f4b9792faaae539c88e7db3b", size = 892971, upload-time = "2026-01-07T18:04:35.594Z" },
|
| 2551 |
]
|
| 2552 |
|
| 2553 |
-
[[package]]
|
| 2554 |
-
name = "pymssql"
|
| 2555 |
-
version = "2.3.13"
|
| 2556 |
-
source = { registry = "https://pypi.org/simple" }
|
| 2557 |
-
sdist = { url = "https://files.pythonhosted.org/packages/7a/cc/843c044b7f71ee329436b7327c578383e2f2499313899f88ad267cdf1f33/pymssql-2.3.13.tar.gz", hash = "sha256:2137e904b1a65546be4ccb96730a391fcd5a85aab8a0632721feb5d7e39cfbce", size = 203153, upload-time = "2026-02-14T05:00:36.865Z" }
|
| 2558 |
-
wheels = [
|
| 2559 |
-
{ url = "https://files.pythonhosted.org/packages/ba/60/a2e8a8a38f7be21d54402e2b3365cd56f1761ce9f2706c97f864e8aa8300/pymssql-2.3.13-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:cf4f32b4a05b66f02cb7d55a0f3bcb0574a6f8cf0bee4bea6f7b104038364733", size = 3158689, upload-time = "2026-02-14T04:59:46.982Z" },
|
| 2560 |
-
{ url = "https://files.pythonhosted.org/packages/43/9e/0cf0ffb9e2f73238baf766d8e31d7237b5bee3cc1bb29a376b404610994a/pymssql-2.3.13-cp312-cp312-macosx_15_0_x86_64.whl", hash = "sha256:2b056eb175955f7fb715b60dc1c0c624969f4d24dbdcf804b41ab1e640a2b131", size = 2960018, upload-time = "2026-02-14T04:59:48.668Z" },
|
| 2561 |
-
{ url = "https://files.pythonhosted.org/packages/93/ea/bc27354feaca717faa4626911f6b19bb62985c87dda28957c63de4de5895/pymssql-2.3.13-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:319810b89aa64b99d9c5c01518752c813938df230496fa2c4c6dda0603f04c4c", size = 3065719, upload-time = "2026-02-14T04:59:50.369Z" },
|
| 2562 |
-
{ url = "https://files.pythonhosted.org/packages/1e/7a/8028681c96241fb5fc850b87c8959402c353e4b83c6e049a99ffa67ded54/pymssql-2.3.13-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c0ea72641cb0f8bce7ad8565dbdbda4a7437aa58bce045f2a3a788d71af2e4be", size = 3190567, upload-time = "2026-02-14T04:59:52.202Z" },
|
| 2563 |
-
{ url = "https://files.pythonhosted.org/packages/aa/f1/ab5b76adbbd6db9ce746d448db34b044683522e7e7b95053f9dd0165297b/pymssql-2.3.13-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1493f63d213607f708a5722aa230776ada726ccdb94097fab090a1717a2534e0", size = 3710481, upload-time = "2026-02-14T04:59:54.01Z" },
|
| 2564 |
-
{ url = "https://files.pythonhosted.org/packages/59/aa/2fa0951475cd0a1829e0b8bfbe334d04ece4bce11546a556b005c4100689/pymssql-2.3.13-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:eb3275985c23479e952d6462ae6c8b2b6993ab6b99a92805a9c17942cf3d5b3d", size = 3453789, upload-time = "2026-02-14T04:59:56.841Z" },
|
| 2565 |
-
{ url = "https://files.pythonhosted.org/packages/78/08/8cd2af9003f9fc03912b658a64f5a4919dcd68f0dd3bbc822b49a3d14fd9/pymssql-2.3.13-cp312-cp312-win_amd64.whl", hash = "sha256:a930adda87bdd8351a5637cf73d6491936f34e525a5e513068a6eac742f69cdb", size = 1994709, upload-time = "2026-02-14T04:59:58.972Z" },
|
| 2566 |
-
]
|
| 2567 |
-
|
| 2568 |
-
[[package]]
|
| 2569 |
-
name = "pymysql"
|
| 2570 |
-
version = "1.1.2"
|
| 2571 |
-
source = { registry = "https://pypi.org/simple" }
|
| 2572 |
-
sdist = { url = "https://files.pythonhosted.org/packages/f5/ae/1fe3fcd9f959efa0ebe200b8de88b5a5ce3e767e38c7ac32fb179f16a388/pymysql-1.1.2.tar.gz", hash = "sha256:4961d3e165614ae65014e361811a724e2044ad3ea3739de9903ae7c21f539f03", size = 48258, upload-time = "2025-08-24T12:55:55.146Z" }
|
| 2573 |
-
wheels = [
|
| 2574 |
-
{ url = "https://files.pythonhosted.org/packages/7c/4c/ad33b92b9864cbde84f259d5df035a6447f91891f5be77788e2a3892bce3/pymysql-1.1.2-py3-none-any.whl", hash = "sha256:e6b1d89711dd51f8f74b1631fe08f039e7d76cf67a42a323d3178f0f25762ed9", size = 45300, upload-time = "2025-08-24T12:55:53.394Z" },
|
| 2575 |
-
]
|
| 2576 |
-
|
| 2577 |
-
[[package]]
|
| 2578 |
-
name = "pyopenssl"
|
| 2579 |
-
version = "25.1.0"
|
| 2580 |
-
source = { registry = "https://pypi.org/simple" }
|
| 2581 |
-
dependencies = [
|
| 2582 |
-
{ name = "cryptography" },
|
| 2583 |
-
{ name = "typing-extensions" },
|
| 2584 |
-
]
|
| 2585 |
-
sdist = { url = "https://files.pythonhosted.org/packages/04/8c/cd89ad05804f8e3c17dea8f178c3f40eeab5694c30e0c9f5bcd49f576fc3/pyopenssl-25.1.0.tar.gz", hash = "sha256:8d031884482e0c67ee92bf9a4d8cceb08d92aba7136432ffb0703c5280fc205b", size = 179937, upload-time = "2025-05-17T16:28:31.31Z" }
|
| 2586 |
-
wheels = [
|
| 2587 |
-
{ url = "https://files.pythonhosted.org/packages/80/28/2659c02301b9500751f8d42f9a6632e1508aa5120de5e43042b8b30f8d5d/pyopenssl-25.1.0-py3-none-any.whl", hash = "sha256:2b11f239acc47ac2e5aca04fd7fa829800aeee22a2eb30d744572a157bd8a1ab", size = 56771, upload-time = "2025-05-17T16:28:29.197Z" },
|
| 2588 |
-
]
|
| 2589 |
-
|
| 2590 |
[[package]]
|
| 2591 |
name = "pyparsing"
|
| 2592 |
version = "3.3.2"
|
|
@@ -2887,18 +2610,6 @@ wheels = [
|
|
| 2887 |
{ url = "https://files.pythonhosted.org/packages/13/9f/026e18ca7d7766783d779dae5e9c656746c6ede36ef73c6d934aaf4a6dec/ruff-0.8.4-py3-none-win_arm64.whl", hash = "sha256:9183dd615d8df50defa8b1d9a074053891ba39025cf5ae88e8bcb52edcc4bf08", size = 9074500, upload-time = "2024-12-19T13:36:23.92Z" },
|
| 2888 |
]
|
| 2889 |
|
| 2890 |
-
[[package]]
|
| 2891 |
-
name = "s3transfer"
|
| 2892 |
-
version = "0.16.0"
|
| 2893 |
-
source = { registry = "https://pypi.org/simple" }
|
| 2894 |
-
dependencies = [
|
| 2895 |
-
{ name = "botocore" },
|
| 2896 |
-
]
|
| 2897 |
-
sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827, upload-time = "2025-12-01T02:30:59.114Z" }
|
| 2898 |
-
wheels = [
|
| 2899 |
-
{ url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830, upload-time = "2025-12-01T02:30:57.729Z" },
|
| 2900 |
-
]
|
| 2901 |
-
|
| 2902 |
[[package]]
|
| 2903 |
name = "safetensors"
|
| 2904 |
version = "0.7.0"
|
|
@@ -3053,60 +2764,6 @@ wheels = [
|
|
| 3053 |
{ url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
|
| 3054 |
]
|
| 3055 |
|
| 3056 |
-
[[package]]
|
| 3057 |
-
name = "snowflake-connector-python"
|
| 3058 |
-
version = "4.0.0"
|
| 3059 |
-
source = { registry = "https://pypi.org/simple" }
|
| 3060 |
-
dependencies = [
|
| 3061 |
-
{ name = "asn1crypto" },
|
| 3062 |
-
{ name = "boto3" },
|
| 3063 |
-
{ name = "botocore" },
|
| 3064 |
-
{ name = "certifi" },
|
| 3065 |
-
{ name = "charset-normalizer" },
|
| 3066 |
-
{ name = "cryptography" },
|
| 3067 |
-
{ name = "filelock" },
|
| 3068 |
-
{ name = "idna" },
|
| 3069 |
-
{ name = "packaging" },
|
| 3070 |
-
{ name = "platformdirs" },
|
| 3071 |
-
{ name = "pyjwt" },
|
| 3072 |
-
{ name = "pyopenssl" },
|
| 3073 |
-
{ name = "pytz" },
|
| 3074 |
-
{ name = "requests" },
|
| 3075 |
-
{ name = "sortedcontainers" },
|
| 3076 |
-
{ name = "tomlkit" },
|
| 3077 |
-
{ name = "typing-extensions" },
|
| 3078 |
-
]
|
| 3079 |
-
sdist = { url = "https://files.pythonhosted.org/packages/1d/f1/4aff125021a9c5e0183f2f55dd7d04b7256a0e1e10db50d537a7415d9c55/snowflake_connector_python-4.0.0.tar.gz", hash = "sha256:4b10a865c4a5e1fa60c365c7fe41e0433605e6e5edc824e8730a9038f330b3a6", size = 813937, upload-time = "2025-10-09T10:11:34.631Z" }
|
| 3080 |
-
wheels = [
|
| 3081 |
-
{ url = "https://files.pythonhosted.org/packages/ea/b0/462c0deee35d6d03d3d729b3f923615bae665beb7f9a94673a23a52080fe/snowflake_connector_python-4.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bfd3b8523d7adc830f99c5c4c635689ceca61700a05368d5bbb34c6811f2ec54", size = 1029568, upload-time = "2025-10-09T10:11:42.125Z" },
|
| 3082 |
-
{ url = "https://files.pythonhosted.org/packages/ff/4b/bb3ae3f07e7927c8f16c4c0f1283d3c721978d16e8bf4193fc8e41025c1e/snowflake_connector_python-4.0.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:835161dd46ef8f5fc9d2f135ca654c2f3fbdf57b035d3e1980506aa8eac671dc", size = 1041337, upload-time = "2025-10-09T10:11:43.692Z" },
|
| 3083 |
-
{ url = "https://files.pythonhosted.org/packages/9c/75/4bfac89f10c6dbb75e97adf1e217737fc599ebf964031c9298b6cbd807d0/snowflake_connector_python-4.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:65e4e36dd1b0c7235d84cddef8a3c97c5ea0dc8fea85e31e45fc485000b77a83", size = 2699730, upload-time = "2025-10-09T10:11:25.295Z" },
|
| 3084 |
-
{ url = "https://files.pythonhosted.org/packages/cd/78/0e916416c50909dbae511fe38b1e671a9efa62decdce51b174a0396804e4/snowflake_connector_python-4.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e6132986d6965e4005b0167270612fbc7fa4bc4ef42726a40b85a8f57475a78d", size = 2731336, upload-time = "2025-10-09T10:11:27.028Z" },
|
| 3085 |
-
{ url = "https://files.pythonhosted.org/packages/83/f0/3db8a2f3f5ee724d309c661af739a70d0643070b9b4597728151ef900f9b/snowflake_connector_python-4.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:a790f06808e4481c23cfed1396d2c9a786060ddd62408b1fda1a63e1e6bc4b07", size = 1176292, upload-time = "2025-10-09T10:11:54.956Z" },
|
| 3086 |
-
]
|
| 3087 |
-
|
| 3088 |
-
[[package]]
|
| 3089 |
-
name = "snowflake-sqlalchemy"
|
| 3090 |
-
version = "1.9.0"
|
| 3091 |
-
source = { registry = "https://pypi.org/simple" }
|
| 3092 |
-
dependencies = [
|
| 3093 |
-
{ name = "snowflake-connector-python" },
|
| 3094 |
-
{ name = "sqlalchemy" },
|
| 3095 |
-
]
|
| 3096 |
-
sdist = { url = "https://files.pythonhosted.org/packages/ff/6a/fcc5c00c3a253029a7b7b293a3958ba07d5e97623b643de47be0cc9e5530/snowflake_sqlalchemy-1.9.0.tar.gz", hash = "sha256:fb32baf559f7f933ae8fde2ec535bcea5381bb15188777cd8c006b3226efa3b1", size = 141707, upload-time = "2026-03-04T13:48:17.905Z" }
|
| 3097 |
-
wheels = [
|
| 3098 |
-
{ url = "https://files.pythonhosted.org/packages/88/28/b7ae8df80847e8157b74669ad7e1b0180e82ac0e3daf950612effd232fea/snowflake_sqlalchemy-1.9.0-py3-none-any.whl", hash = "sha256:f0b1528173e93c8c80bd9ca510985054667e0e514dd90b890271ac1cfae261c1", size = 78953, upload-time = "2026-03-04T13:48:16.393Z" },
|
| 3099 |
-
]
|
| 3100 |
-
|
| 3101 |
-
[[package]]
|
| 3102 |
-
name = "sortedcontainers"
|
| 3103 |
-
version = "2.4.0"
|
| 3104 |
-
source = { registry = "https://pypi.org/simple" }
|
| 3105 |
-
sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594, upload-time = "2021-05-16T22:03:42.897Z" }
|
| 3106 |
-
wheels = [
|
| 3107 |
-
{ url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575, upload-time = "2021-05-16T22:03:41.177Z" },
|
| 3108 |
-
]
|
| 3109 |
-
|
| 3110 |
[[package]]
|
| 3111 |
name = "spacy"
|
| 3112 |
version = "3.8.3"
|
|
@@ -3185,22 +2842,6 @@ asyncio = [
|
|
| 3185 |
{ name = "greenlet" },
|
| 3186 |
]
|
| 3187 |
|
| 3188 |
-
[[package]]
|
| 3189 |
-
name = "sqlalchemy-bigquery"
|
| 3190 |
-
version = "1.16.0"
|
| 3191 |
-
source = { registry = "https://pypi.org/simple" }
|
| 3192 |
-
dependencies = [
|
| 3193 |
-
{ name = "google-api-core" },
|
| 3194 |
-
{ name = "google-auth" },
|
| 3195 |
-
{ name = "google-cloud-bigquery" },
|
| 3196 |
-
{ name = "packaging" },
|
| 3197 |
-
{ name = "sqlalchemy" },
|
| 3198 |
-
]
|
| 3199 |
-
sdist = { url = "https://files.pythonhosted.org/packages/7e/6a/c49932b3d9c44cab9202b1866c5b36b7f0d0455d4653fbc0af4466aeaa76/sqlalchemy_bigquery-1.16.0.tar.gz", hash = "sha256:fe937a0d1f4cf7219fcf5d4995c6718805b38d4df43e29398dec5dc7b6d1987e", size = 119632, upload-time = "2025-11-06T01:35:40.373Z" }
|
| 3200 |
-
wheels = [
|
| 3201 |
-
{ url = "https://files.pythonhosted.org/packages/c0/87/11e6de00ef7949bb8ea06b55304a1a4911c329fdf0d9882b464db240c2c5/sqlalchemy_bigquery-1.16.0-py3-none-any.whl", hash = "sha256:0fe7634cd954f3e74f5e2db6d159f9e5ee87a47fbe8d52eac3cd3bb3dadb3a77", size = 40615, upload-time = "2025-11-06T01:35:39.358Z" },
|
| 3202 |
-
]
|
| 3203 |
-
|
| 3204 |
[[package]]
|
| 3205 |
name = "srsly"
|
| 3206 |
version = "2.5.3"
|
|
@@ -3374,15 +3015,6 @@ wheels = [
|
|
| 3374 |
{ url = "https://files.pythonhosted.org/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc", size = 2612133, upload-time = "2026-01-05T10:45:17.232Z" },
|
| 3375 |
]
|
| 3376 |
|
| 3377 |
-
[[package]]
|
| 3378 |
-
name = "tomlkit"
|
| 3379 |
-
version = "0.14.0"
|
| 3380 |
-
source = { registry = "https://pypi.org/simple" }
|
| 3381 |
-
sdist = { url = "https://files.pythonhosted.org/packages/c3/af/14b24e41977adb296d6bd1fb59402cf7d60ce364f90c890bd2ec65c43b5a/tomlkit-0.14.0.tar.gz", hash = "sha256:cf00efca415dbd57575befb1f6634c4f42d2d87dbba376128adb42c121b87064", size = 187167, upload-time = "2026-01-13T01:14:53.304Z" }
|
| 3382 |
-
wheels = [
|
| 3383 |
-
{ url = "https://files.pythonhosted.org/packages/b5/11/87d6d29fb5d237229d67973a6c9e06e048f01cf4994dee194ab0ea841814/tomlkit-0.14.0-py3-none-any.whl", hash = "sha256:592064ed85b40fa213469f81ac584f67a4f2992509a7c3ea2d632208623a3680", size = 39310, upload-time = "2026-01-13T01:14:51.965Z" },
|
| 3384 |
-
]
|
| 3385 |
-
|
| 3386 |
[[package]]
|
| 3387 |
name = "torch"
|
| 3388 |
version = "2.11.0"
|
|
|
|
| 1 |
version = 1
|
| 2 |
+
revision = 2
|
| 3 |
requires-python = "==3.12.*"
|
| 4 |
resolution-markers = [
|
| 5 |
"python_full_version >= '3.12.4'",
|
|
|
|
| 49 |
{ name = "pydantic" },
|
| 50 |
{ name = "pydantic-settings" },
|
| 51 |
{ name = "pymongo" },
|
|
|
|
|
|
|
| 52 |
{ name = "pypdf" },
|
| 53 |
{ name = "python-docx" },
|
| 54 |
{ name = "python-dotenv" },
|
|
|
|
| 57 |
{ name = "redis" },
|
| 58 |
{ name = "sentence-transformers" },
|
| 59 |
{ name = "slowapi" },
|
|
|
|
| 60 |
{ name = "spacy" },
|
| 61 |
{ name = "sqlalchemy", extra = ["asyncio"] },
|
|
|
|
| 62 |
{ name = "sse-starlette" },
|
| 63 |
{ name = "starlette" },
|
| 64 |
{ name = "structlog" },
|
|
|
|
| 131 |
{ name = "pydantic", specifier = "==2.10.3" },
|
| 132 |
{ name = "pydantic-settings", specifier = "==2.7.0" },
|
| 133 |
{ name = "pymongo", specifier = ">=4.14.0" },
|
|
|
|
|
|
|
| 134 |
{ name = "pypdf", specifier = "==5.1.0" },
|
| 135 |
{ name = "pytest", marker = "extra == 'dev'", specifier = "==8.3.4" },
|
| 136 |
{ name = "pytest-asyncio", marker = "extra == 'dev'", specifier = "==0.24.0" },
|
|
|
|
| 143 |
{ name = "ruff", marker = "extra == 'dev'", specifier = "==0.8.4" },
|
| 144 |
{ name = "sentence-transformers", specifier = "==3.3.1" },
|
| 145 |
{ name = "slowapi", specifier = "==0.1.9" },
|
|
|
|
| 146 |
{ name = "spacy", specifier = "==3.8.3" },
|
| 147 |
{ name = "sqlalchemy", extras = ["asyncio"], specifier = "==2.0.36" },
|
|
|
|
| 148 |
{ name = "sse-starlette", specifier = "==2.1.3" },
|
| 149 |
{ name = "starlette", specifier = "==0.41.3" },
|
| 150 |
{ name = "structlog", specifier = "==24.4.0" },
|
|
|
|
| 280 |
{ url = "https://files.pythonhosted.org/packages/13/b5/7af0cb920a476dccd612fbc9a21a3745fb29b1fcd74636078db8f7ba294c/APScheduler-3.10.4-py3-none-any.whl", hash = "sha256:fb91e8a768632a4756a585f79ec834e0e27aad5860bac7eaa523d9ccefd87661", size = 59303, upload-time = "2023-08-19T16:44:56.814Z" },
|
| 281 |
]
|
| 282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
[[package]]
|
| 284 |
name = "asyncpg"
|
| 285 |
version = "0.30.0"
|
|
|
|
| 428 |
{ url = "https://files.pythonhosted.org/packages/20/07/fb43edc2ff0a6a367e4a94fc39eb3b85aa1e55e24cc857af2db145ce9f0d/blis-1.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:f20f7ad69aaffd1ce14fe77de557b6df9b61e0c9e582f75a843715d836b5c8af", size = 6192759, upload-time = "2025-11-17T12:27:56.176Z" },
|
| 429 |
]
|
| 430 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 431 |
[[package]]
|
| 432 |
name = "cachetools"
|
| 433 |
version = "5.5.0"
|
|
|
|
| 941 |
{ url = "https://files.pythonhosted.org/packages/d5/1f/5f4a3cd9e4440e9d9bc78ad0a91a1c8d46b4d429d5239ebe6793c9fe5c41/fsspec-2026.3.0-py3-none-any.whl", hash = "sha256:d2ceafaad1b3457968ed14efa28798162f1638dbb5d2a6868a2db002a5ee39a4", size = 202595, upload-time = "2026-03-27T19:11:13.595Z" },
|
| 942 |
]
|
| 943 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 944 |
[[package]]
|
| 945 |
name = "greenlet"
|
| 946 |
version = "3.3.2"
|
|
|
|
| 958 |
{ url = "https://files.pythonhosted.org/packages/58/2e/fe7f36ff1982d6b10a60d5e0740c759259a7d6d2e1dc41da6d96de32fff6/greenlet-3.3.2-cp312-cp312-win_arm64.whl", hash = "sha256:d3a62fa76a32b462a97198e4c9e99afb9ab375115e74e9a83ce180e7a496f643", size = 230331, upload-time = "2026-02-20T20:17:23.34Z" },
|
| 959 |
]
|
| 960 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 961 |
[[package]]
|
| 962 |
name = "h11"
|
| 963 |
version = "0.16.0"
|
|
|
|
| 1127 |
{ url = "https://files.pythonhosted.org/packages/67/8a/a342b2f0251f3dac4ca17618265d93bf244a2a4d089126e81e4c1056ac50/jiter-0.13.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bb00b6d26db67a05fe3e12c76edc75f32077fb51deed13822dc648fa373bc19", size = 343768, upload-time = "2026-02-02T12:37:55.055Z" },
|
| 1128 |
]
|
| 1129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1130 |
[[package]]
|
| 1131 |
name = "joblib"
|
| 1132 |
version = "1.5.3"
|
|
|
|
| 2121 |
{ url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload-time = "2025-10-08T19:49:00.792Z" },
|
| 2122 |
]
|
| 2123 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2124 |
[[package]]
|
| 2125 |
name = "psycopg"
|
| 2126 |
version = "3.2.3"
|
|
|
|
| 2181 |
{ url = "https://files.pythonhosted.org/packages/b5/bf/635fbe5dd10ed200afbbfbe98f8602829252ca1cce81cc48fb25ed8dadc0/psycopg2-2.9.11-cp312-cp312-win_amd64.whl", hash = "sha256:e03e4a6dbe87ff81540b434f2e5dc2bddad10296db5eea7bdc995bf5f4162938", size = 2713969, upload-time = "2025-10-10T11:10:15.946Z" },
|
| 2182 |
]
|
| 2183 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2184 |
[[package]]
|
| 2185 |
name = "pycparser"
|
| 2186 |
version = "3.0"
|
|
|
|
| 2310 |
{ url = "https://files.pythonhosted.org/packages/60/4c/33f75713d50d5247f2258405142c0318ff32c6f8976171c4fcae87a9dbdf/pymongo-4.16.0-cp312-cp312-win_arm64.whl", hash = "sha256:dfc320f08ea9a7ec5b2403dc4e8150636f0d6150f4b9792faaae539c88e7db3b", size = 892971, upload-time = "2026-01-07T18:04:35.594Z" },
|
| 2311 |
]
|
| 2312 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2313 |
[[package]]
|
| 2314 |
name = "pyparsing"
|
| 2315 |
version = "3.3.2"
|
|
|
|
| 2610 |
{ url = "https://files.pythonhosted.org/packages/13/9f/026e18ca7d7766783d779dae5e9c656746c6ede36ef73c6d934aaf4a6dec/ruff-0.8.4-py3-none-win_arm64.whl", hash = "sha256:9183dd615d8df50defa8b1d9a074053891ba39025cf5ae88e8bcb52edcc4bf08", size = 9074500, upload-time = "2024-12-19T13:36:23.92Z" },
|
| 2611 |
]
|
| 2612 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2613 |
[[package]]
|
| 2614 |
name = "safetensors"
|
| 2615 |
version = "0.7.0"
|
|
|
|
| 2764 |
{ url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
|
| 2765 |
]
|
| 2766 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2767 |
[[package]]
|
| 2768 |
name = "spacy"
|
| 2769 |
version = "3.8.3"
|
|
|
|
| 2842 |
{ name = "greenlet" },
|
| 2843 |
]
|
| 2844 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2845 |
[[package]]
|
| 2846 |
name = "srsly"
|
| 2847 |
version = "2.5.3"
|
|
|
|
| 3015 |
{ url = "https://files.pythonhosted.org/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc", size = 2612133, upload-time = "2026-01-05T10:45:17.232Z" },
|
| 3016 |
]
|
| 3017 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3018 |
[[package]]
|
| 3019 |
name = "torch"
|
| 3020 |
version = "2.11.0"
|