Spaces:
Sleeping
Sleeping
File size: 6,009 Bytes
478dec6 1249d8b 478dec6 b3a6c78 478dec6 b3a6c78 478dec6 b3a6c78 478dec6 b3a6c78 478dec6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 | from fastapi import UploadFile
# from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.exc import IntegrityError
from externals.storages.azure_blob import upload_pdf as upload_file
from externals.databases.pg_crud import get_file_by_filename, get_file_by_user_id
from externals.databases.pg_models import CVFile, CVUser
from utils.logger import get_logger
logger = get_logger("knowledge.upload")
class KnowledgeFileService:
def __init__(self, db, user: CVUser):
self.db = db
self.user = user
async def upload(self, file: UploadFile) -> CVFile:
"""
Upload PDF to Azure Blob Storage and log metadata into cv_file table
"""
logger.info(
"📤 Knowledge upload requested",
extra={"context": {
"cv_filename": file.filename,
"file_content_type": file.content_type,
"user_id": str(self.user.user_id),
"tenant_id": str(self.user.tenant_id),
}}
)
# 1️⃣ Check DB first (idempotent behavior)
existing = await get_file_by_filename(self.db,
filename=file.filename,
user_id=self.user.user_id)
print(f"DEBUG: KnowledgeFileService/get_file_by_filename/existing: {existing}")
if existing:
logger.info(
"ℹ️ File already exists, skipping upload",
extra={"context": {
"filename": file.filename,
"file_id": str(existing.file_id),
}}
)
return existing
else:
# 2️⃣ Upload to Azure Blob
upload_result = await upload_file(file, self.user.tenant_id, self.user.user_id)
print(f"DEBUG: KnowledgeFileService/upload_result: {upload_result}")
if not upload_result.get("uploaded"):
logger.error(
"❌ Azure upload failed",
extra={"context": {"filename": file.filename}},
)
raise RuntimeError(upload_result.get("message", "Upload failed"))
# 3️⃣ Persist metadata into DB
cv_file = CVFile(
user_id=self.user.user_id,
file_type=file.content_type or "application/pdf",
filename=file.filename,
url=upload_result["url"],
is_extracted=False,
is_deleted=False,
)
try:
self.db.add(cv_file)
await self.db.commit()
await self.db.refresh(cv_file)
logger.info("✅ File successfully logged", extra={"context": {
"file_id": str(cv_file.file_id),
"filename": cv_file.filename,
"uploaded_by": self.user.email,
}})
return cv_file
except IntegrityError:
await self.db.rollback()
logger.warning(
"⚠️ Duplicate file detected during insert",
extra={"context": {"filename": file.filename}},
)
# Fetch the already-existing row
existing = await get_file_by_filename(self.db,
filename=file.filename,
user_id=self.user.user_id)
if existing:
return existing
async def get_files_by_user(self, user_id: str):
"""
Retrieve all file metadata uploaded by a specific user.
"""
logger.info(
"📄 Fetching files for user",
extra={"context": {
"requested_user_id": user_id,
"requested_by": str(self.user.user_id),
}}
)
files = await get_file_by_user_id(
self.db,
user_id=user_id
)
file_list = []
for file in files:
file_list.append({
"file_id": str(file.file_id),
"filename": file.filename,
"file_type": file.file_type,
"url": file.url,
"is_extracted": file.is_extracted,
"uploaded_at": file.uploaded_at.isoformat(),
"date_modified": file.date_modified.isoformat(),
})
logger.info(
"✅ Files retrieved successfully",
extra={"context": {
"file_count": len(file_list),
"requested_user_id": user_id,
}}
)
return file_list
# class KnowledgeUploadService:
# def __init__(self, db: AsyncSession, user: CVUser):
# self.db = db
# self.user = user
# async def upload(self, file: UploadFile) -> CVFile:
# """
# Upload file to Azure Blob and log metadata to cv_file table
# """
# logger.info(f"📤 Uploading file: {file.filename}")
# # 1️⃣ Upload to Azure Blob
# upload_result = await upload_file(file)
# if not upload_result.get("uploaded"):
# raise RuntimeError(upload_result.get("message"))
# # 2️⃣ Check existing file record
# existing = await get_file_by_filename(self.db, file.filename)
# if existing:
# logger.info(f"ℹ️ File already exists in DB: {file.filename}")
# return existing
# # 3️⃣ Persist metadata
# cv_file = CVFile(
# file_type=file.content_type or "application/pdf",
# filename=file.filename,
# url=upload_result["url"],
# is_extracted=False,
# )
# self.db.add(cv_file)
# await self.db.commit()
# await self.db.refresh(cv_file)
# logger.info(f"✅ File logged into cv_file: {file.filename}")
# return cv_file
|