| | |
| | from .mongo_db import insert_document, find_documents, get_collection |
| | from datetime import datetime, timezone |
| | import logging |
| | import re |
| |
|
| | logger = logging.getLogger(__name__) |
| | COLLECTION_NAME = 'chat_history-v3' |
| |
|
| | def clean_text_content(text: str) -> str: |
| | """ |
| | Limpia y normaliza el texto para almacenamiento seguro en UTF-8. |
| | |
| | Args: |
| | text: Texto a limpiar |
| | |
| | Returns: |
| | str: Texto limpio y normalizado |
| | """ |
| | if not text: |
| | return text |
| | |
| | |
| | special_chars = ["▌", "\u2588", "\u2580", "\u2584", "\u258C", "\u2590"] |
| | |
| | |
| | for char in special_chars: |
| | text = text.replace(char, "") |
| | |
| | |
| | text = re.sub(r'\s+', ' ', text).strip() |
| | |
| | |
| | try: |
| | text = text.encode('utf-8', errors='strict').decode('utf-8') |
| | except UnicodeError: |
| | text = text.encode('utf-8', errors='ignore').decode('utf-8') |
| | logger.warning("Se encontraron caracteres no UTF-8 en el texto") |
| | |
| | return text |
| |
|
| | def get_chat_history(username: str, analysis_type: str = 'sidebar', limit: int = None) -> list: |
| | """ |
| | Recupera el historial del chat con codificación UTF-8 segura. |
| | """ |
| | try: |
| | query = { |
| | "username": username, |
| | "$or": [ |
| | {"analysis_type": analysis_type}, |
| | {"analysis_type": {"$exists": False}}, |
| | {"analysis_type": None} |
| | ] |
| | } |
| | |
| | collection = get_collection(COLLECTION_NAME) |
| | if collection is None: |
| | logger.error("No se pudo obtener la colección de chat") |
| | return [] |
| | |
| | cursor = collection.find(query).sort("timestamp", -1) |
| | if limit: |
| | cursor = cursor.limit(limit) |
| | |
| | conversations = [] |
| | for chat in cursor: |
| | try: |
| | |
| | cleaned_messages = [] |
| | for msg in chat.get('messages', []): |
| | try: |
| | cleaned_messages.append({ |
| | 'role': msg.get('role', 'unknown'), |
| | 'content': clean_text_content(msg.get('content', '')) |
| | }) |
| | except Exception as msg_error: |
| | logger.error(f"Error procesando mensaje: {str(msg_error)}") |
| | continue |
| | |
| | conversations.append({ |
| | 'timestamp': chat['timestamp'], |
| | 'messages': cleaned_messages |
| | }) |
| | |
| | except Exception as e: |
| | logger.error(f"Error formateando chat: {str(e)}") |
| | continue |
| | |
| | return conversations |
| | |
| | except Exception as e: |
| | logger.error(f"Error al recuperar historial de chat: {str(e)}") |
| | return [] |
| |
|
| |
|
| | def store_chat_history(username: str, messages: list, analysis_type: str = 'sidebar', metadata: dict = None) -> bool: |
| | """ |
| | Guarda el historial del chat con codificación UTF-8 segura. |
| | """ |
| | try: |
| | collection = get_collection(COLLECTION_NAME) |
| | if collection is None: |
| | logger.error("No se pudo obtener la colección de chat") |
| | return False |
| | |
| | |
| | formatted_messages = [] |
| | for msg in messages: |
| | try: |
| | formatted_messages.append({ |
| | 'role': msg.get('role', 'unknown'), |
| | 'content': clean_text_content(msg.get('content', '')), |
| | 'timestamp': datetime.now(timezone.utc) |
| | }) |
| | except Exception as msg_error: |
| | logger.error(f"Error procesando mensaje para almacenar: {str(msg_error)}") |
| | continue |
| | |
| | chat_document = { |
| | 'username': username, |
| | 'timestamp': datetime.now(timezone.utc), |
| | 'messages': formatted_messages, |
| | 'analysis_type': analysis_type, |
| | 'metadata': metadata or {} |
| | } |
| | |
| | |
| | try: |
| | import json |
| | json.dumps(chat_document, ensure_ascii=False) |
| | except UnicodeEncodeError as e: |
| | logger.error(f"Error de codificación en documento: {str(e)}") |
| | return False |
| | |
| | result = collection.insert_one(chat_document) |
| | if result.inserted_id: |
| | logger.info(f"Chat guardado para {username} con ID: {result.inserted_id}") |
| | return True |
| | |
| | logger.error("No se pudo insertar el documento") |
| | return False |
| | |
| | except Exception as e: |
| | logger.error(f"Error al guardar historial: {str(e)}") |
| | return False |