from fastapi import UploadFile # from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.exc import IntegrityError from externals.storages.azure_blob import upload_pdf as upload_file from externals.databases.pg_crud import get_file_by_filename, get_file_by_user_id from externals.databases.pg_models import CVFile, CVUser from utils.logger import get_logger logger = get_logger("knowledge.upload") class KnowledgeFileService: def __init__(self, db, user: CVUser): self.db = db self.user = user async def upload(self, file: UploadFile) -> CVFile: """ Upload PDF to Azure Blob Storage and log metadata into cv_file table """ logger.info( "📤 Knowledge upload requested", extra={"context": { "cv_filename": file.filename, "file_content_type": file.content_type, "user_id": str(self.user.user_id), "tenant_id": str(self.user.tenant_id), }} ) # 1️⃣ Check DB first (idempotent behavior) existing = await get_file_by_filename(self.db, filename=file.filename, user_id=self.user.user_id) print(f"DEBUG: KnowledgeFileService/get_file_by_filename/existing: {existing}") if existing: logger.info( "ℹ️ File already exists, skipping upload", extra={"context": { "filename": file.filename, "file_id": str(existing.file_id), }} ) return existing else: # 2️⃣ Upload to Azure Blob upload_result = await upload_file(file, self.user.tenant_id, self.user.user_id) print(f"DEBUG: KnowledgeFileService/upload_result: {upload_result}") if not upload_result.get("uploaded"): logger.error( "❌ Azure upload failed", extra={"context": {"filename": file.filename}}, ) raise RuntimeError(upload_result.get("message", "Upload failed")) # 3️⃣ Persist metadata into DB cv_file = CVFile( user_id=self.user.user_id, file_type=file.content_type or "application/pdf", filename=file.filename, url=upload_result["url"], is_extracted=False, is_deleted=False, ) try: self.db.add(cv_file) await self.db.commit() await self.db.refresh(cv_file) logger.info("✅ File successfully logged", extra={"context": { "file_id": str(cv_file.file_id), "filename": cv_file.filename, "uploaded_by": self.user.email, }}) return cv_file except IntegrityError: await self.db.rollback() logger.warning( "⚠️ Duplicate file detected during insert", extra={"context": {"filename": file.filename}}, ) # Fetch the already-existing row existing = await get_file_by_filename(self.db, filename=file.filename, user_id=self.user.user_id) if existing: return existing async def get_files_by_user(self, user_id: str): """ Retrieve all file metadata uploaded by a specific user. """ logger.info( "📄 Fetching files for user", extra={"context": { "requested_user_id": user_id, "requested_by": str(self.user.user_id), }} ) files = await get_file_by_user_id( self.db, user_id=user_id ) file_list = [] for file in files: file_list.append({ "file_id": str(file.file_id), "filename": file.filename, "file_type": file.file_type, "url": file.url, "is_extracted": file.is_extracted, "uploaded_at": file.uploaded_at.isoformat(), "date_modified": file.date_modified.isoformat(), }) logger.info( "✅ Files retrieved successfully", extra={"context": { "file_count": len(file_list), "requested_user_id": user_id, }} ) return file_list # class KnowledgeUploadService: # def __init__(self, db: AsyncSession, user: CVUser): # self.db = db # self.user = user # async def upload(self, file: UploadFile) -> CVFile: # """ # Upload file to Azure Blob and log metadata to cv_file table # """ # logger.info(f"📤 Uploading file: {file.filename}") # # 1️⃣ Upload to Azure Blob # upload_result = await upload_file(file) # if not upload_result.get("uploaded"): # raise RuntimeError(upload_result.get("message")) # # 2️⃣ Check existing file record # existing = await get_file_by_filename(self.db, file.filename) # if existing: # logger.info(f"ℹ️ File already exists in DB: {file.filename}") # return existing # # 3️⃣ Persist metadata # cv_file = CVFile( # file_type=file.content_type or "application/pdf", # filename=file.filename, # url=upload_result["url"], # is_extracted=False, # ) # self.db.add(cv_file) # await self.db.commit() # await self.db.refresh(cv_file) # logger.info(f"✅ File logged into cv_file: {file.filename}") # return cv_file