Spaces:
Sleeping
Sleeping
| from fastapi import UploadFile | |
| # from sqlalchemy.ext.asyncio import AsyncSession | |
| from sqlalchemy.exc import IntegrityError | |
| from externals.storages.azure_blob import upload_pdf as upload_file | |
| from externals.databases.pg_crud import get_file_by_filename, get_file_by_user_id | |
| from externals.databases.pg_models import CVFile, CVUser | |
| from utils.logger import get_logger | |
| logger = get_logger("knowledge.upload") | |
| class KnowledgeFileService: | |
| def __init__(self, db, user: CVUser): | |
| self.db = db | |
| self.user = user | |
| async def upload(self, file: UploadFile) -> CVFile: | |
| """ | |
| Upload PDF to Azure Blob Storage and log metadata into cv_file table | |
| """ | |
| logger.info( | |
| "📤 Knowledge upload requested", | |
| extra={"context": { | |
| "cv_filename": file.filename, | |
| "file_content_type": file.content_type, | |
| "user_id": str(self.user.user_id), | |
| "tenant_id": str(self.user.tenant_id), | |
| }} | |
| ) | |
| # 1️⃣ Check DB first (idempotent behavior) | |
| existing = await get_file_by_filename(self.db, | |
| filename=file.filename, | |
| user_id=self.user.user_id) | |
| print(f"DEBUG: KnowledgeFileService/get_file_by_filename/existing: {existing}") | |
| if existing: | |
| logger.info( | |
| "ℹ️ File already exists, skipping upload", | |
| extra={"context": { | |
| "filename": file.filename, | |
| "file_id": str(existing.file_id), | |
| }} | |
| ) | |
| return existing | |
| else: | |
| # 2️⃣ Upload to Azure Blob | |
| upload_result = await upload_file(file, self.user.tenant_id, self.user.user_id) | |
| print(f"DEBUG: KnowledgeFileService/upload_result: {upload_result}") | |
| if not upload_result.get("uploaded"): | |
| logger.error( | |
| "❌ Azure upload failed", | |
| extra={"context": {"filename": file.filename}}, | |
| ) | |
| raise RuntimeError(upload_result.get("message", "Upload failed")) | |
| # 3️⃣ Persist metadata into DB | |
| cv_file = CVFile( | |
| user_id=self.user.user_id, | |
| file_type=file.content_type or "application/pdf", | |
| filename=file.filename, | |
| url=upload_result["url"], | |
| is_extracted=False, | |
| is_deleted=False, | |
| ) | |
| try: | |
| self.db.add(cv_file) | |
| await self.db.commit() | |
| await self.db.refresh(cv_file) | |
| logger.info("✅ File successfully logged", extra={"context": { | |
| "file_id": str(cv_file.file_id), | |
| "filename": cv_file.filename, | |
| "uploaded_by": self.user.email, | |
| }}) | |
| return cv_file | |
| except IntegrityError: | |
| await self.db.rollback() | |
| logger.warning( | |
| "⚠️ Duplicate file detected during insert", | |
| extra={"context": {"filename": file.filename}}, | |
| ) | |
| # Fetch the already-existing row | |
| existing = await get_file_by_filename(self.db, | |
| filename=file.filename, | |
| user_id=self.user.user_id) | |
| if existing: | |
| return existing | |
| async def get_files_by_user(self, user_id: str): | |
| """ | |
| Retrieve all file metadata uploaded by a specific user. | |
| """ | |
| logger.info( | |
| "📄 Fetching files for user", | |
| extra={"context": { | |
| "requested_user_id": user_id, | |
| "requested_by": str(self.user.user_id), | |
| }} | |
| ) | |
| files = await get_file_by_user_id( | |
| self.db, | |
| user_id=user_id | |
| ) | |
| file_list = [] | |
| for file in files: | |
| file_list.append({ | |
| "file_id": str(file.file_id), | |
| "filename": file.filename, | |
| "file_type": file.file_type, | |
| "url": file.url, | |
| "is_extracted": file.is_extracted, | |
| "uploaded_at": file.uploaded_at.isoformat(), | |
| "date_modified": file.date_modified.isoformat(), | |
| }) | |
| logger.info( | |
| "✅ Files retrieved successfully", | |
| extra={"context": { | |
| "file_count": len(file_list), | |
| "requested_user_id": user_id, | |
| }} | |
| ) | |
| return file_list | |
| # class KnowledgeUploadService: | |
| # def __init__(self, db: AsyncSession, user: CVUser): | |
| # self.db = db | |
| # self.user = user | |
| # async def upload(self, file: UploadFile) -> CVFile: | |
| # """ | |
| # Upload file to Azure Blob and log metadata to cv_file table | |
| # """ | |
| # logger.info(f"📤 Uploading file: {file.filename}") | |
| # # 1️⃣ Upload to Azure Blob | |
| # upload_result = await upload_file(file) | |
| # if not upload_result.get("uploaded"): | |
| # raise RuntimeError(upload_result.get("message")) | |
| # # 2️⃣ Check existing file record | |
| # existing = await get_file_by_filename(self.db, file.filename) | |
| # if existing: | |
| # logger.info(f"ℹ️ File already exists in DB: {file.filename}") | |
| # return existing | |
| # # 3️⃣ Persist metadata | |
| # cv_file = CVFile( | |
| # file_type=file.content_type or "application/pdf", | |
| # filename=file.filename, | |
| # url=upload_result["url"], | |
| # is_extracted=False, | |
| # ) | |
| # self.db.add(cv_file) | |
| # await self.db.commit() | |
| # await self.db.refresh(cv_file) | |
| # logger.info(f"✅ File logged into cv_file: {file.filename}") | |
| # return cv_file | |