ishaq101's picture
[NOTICKET] Fix upload file
b3a6c78
from fastapi import UploadFile
# from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.exc import IntegrityError
from externals.storages.azure_blob import upload_pdf as upload_file
from externals.databases.pg_crud import get_file_by_filename, get_file_by_user_id
from externals.databases.pg_models import CVFile, CVUser
from utils.logger import get_logger
logger = get_logger("knowledge.upload")
class KnowledgeFileService:
def __init__(self, db, user: CVUser):
self.db = db
self.user = user
async def upload(self, file: UploadFile) -> CVFile:
"""
Upload PDF to Azure Blob Storage and log metadata into cv_file table
"""
logger.info(
"📤 Knowledge upload requested",
extra={"context": {
"cv_filename": file.filename,
"file_content_type": file.content_type,
"user_id": str(self.user.user_id),
"tenant_id": str(self.user.tenant_id),
}}
)
# 1️⃣ Check DB first (idempotent behavior)
existing = await get_file_by_filename(self.db,
filename=file.filename,
user_id=self.user.user_id)
print(f"DEBUG: KnowledgeFileService/get_file_by_filename/existing: {existing}")
if existing:
logger.info(
"ℹ️ File already exists, skipping upload",
extra={"context": {
"filename": file.filename,
"file_id": str(existing.file_id),
}}
)
return existing
else:
# 2️⃣ Upload to Azure Blob
upload_result = await upload_file(file, self.user.tenant_id, self.user.user_id)
print(f"DEBUG: KnowledgeFileService/upload_result: {upload_result}")
if not upload_result.get("uploaded"):
logger.error(
"❌ Azure upload failed",
extra={"context": {"filename": file.filename}},
)
raise RuntimeError(upload_result.get("message", "Upload failed"))
# 3️⃣ Persist metadata into DB
cv_file = CVFile(
user_id=self.user.user_id,
file_type=file.content_type or "application/pdf",
filename=file.filename,
url=upload_result["url"],
is_extracted=False,
is_deleted=False,
)
try:
self.db.add(cv_file)
await self.db.commit()
await self.db.refresh(cv_file)
logger.info("✅ File successfully logged", extra={"context": {
"file_id": str(cv_file.file_id),
"filename": cv_file.filename,
"uploaded_by": self.user.email,
}})
return cv_file
except IntegrityError:
await self.db.rollback()
logger.warning(
"⚠️ Duplicate file detected during insert",
extra={"context": {"filename": file.filename}},
)
# Fetch the already-existing row
existing = await get_file_by_filename(self.db,
filename=file.filename,
user_id=self.user.user_id)
if existing:
return existing
async def get_files_by_user(self, user_id: str):
"""
Retrieve all file metadata uploaded by a specific user.
"""
logger.info(
"📄 Fetching files for user",
extra={"context": {
"requested_user_id": user_id,
"requested_by": str(self.user.user_id),
}}
)
files = await get_file_by_user_id(
self.db,
user_id=user_id
)
file_list = []
for file in files:
file_list.append({
"file_id": str(file.file_id),
"filename": file.filename,
"file_type": file.file_type,
"url": file.url,
"is_extracted": file.is_extracted,
"uploaded_at": file.uploaded_at.isoformat(),
"date_modified": file.date_modified.isoformat(),
})
logger.info(
"✅ Files retrieved successfully",
extra={"context": {
"file_count": len(file_list),
"requested_user_id": user_id,
}}
)
return file_list
# class KnowledgeUploadService:
# def __init__(self, db: AsyncSession, user: CVUser):
# self.db = db
# self.user = user
# async def upload(self, file: UploadFile) -> CVFile:
# """
# Upload file to Azure Blob and log metadata to cv_file table
# """
# logger.info(f"📤 Uploading file: {file.filename}")
# # 1️⃣ Upload to Azure Blob
# upload_result = await upload_file(file)
# if not upload_result.get("uploaded"):
# raise RuntimeError(upload_result.get("message"))
# # 2️⃣ Check existing file record
# existing = await get_file_by_filename(self.db, file.filename)
# if existing:
# logger.info(f"ℹ️ File already exists in DB: {file.filename}")
# return existing
# # 3️⃣ Persist metadata
# cv_file = CVFile(
# file_type=file.content_type or "application/pdf",
# filename=file.filename,
# url=upload_result["url"],
# is_extracted=False,
# )
# self.db.add(cv_file)
# await self.db.commit()
# await self.db.refresh(cv_file)
# logger.info(f"✅ File logged into cv_file: {file.filename}")
# return cv_file