| | |
| | |
| |
|
| | import streamlit as st |
| | import spacy |
| | import networkx as nx |
| | import matplotlib.pyplot as plt |
| | import pandas as pd |
| | import numpy as np |
| | import logging |
| | import io |
| | import base64 |
| | from collections import Counter, defaultdict |
| | import logging |
| |
|
| |
|
| | logging.basicConfig(level=logging.INFO) |
| | logger = logging.getLogger(__name__) |
| |
|
| |
|
| | from .semantic_analysis import ( |
| | create_concept_graph, |
| | visualize_concept_graph, |
| | identify_key_concepts |
| | ) |
| |
|
| |
|
| | from .stopwords import ( |
| | get_custom_stopwords, |
| | process_text, |
| | get_stopwords_for_spacy |
| | ) |
| |
|
| |
|
| | |
| | POS_TRANSLATIONS = { |
| | 'es': { |
| | 'ADJ': 'Adjetivo', 'ADP': 'Preposición', 'ADV': 'Adverbio', 'AUX': 'Auxiliar', |
| | 'CCONJ': 'Conjunción Coordinante', 'DET': 'Determinante', 'INTJ': 'Interjección', |
| | 'NOUN': 'Sustantivo', 'NUM': 'Número', 'PART': 'Partícula', 'PRON': 'Pronombre', |
| | 'PROPN': 'Nombre Propio', 'SCONJ': 'Conjunción Subordinante', 'SYM': 'Símbolo', |
| | 'VERB': 'Verbo', 'X': 'Otro', |
| | }, |
| | 'en': { |
| | 'ADJ': 'Adjective', 'ADP': 'Preposition', 'ADV': 'Adverb', 'AUX': 'Auxiliary', |
| | 'CCONJ': 'Coordinating Conjunction', 'DET': 'Determiner', 'INTJ': 'Interjection', |
| | 'NOUN': 'Noun', 'NUM': 'Number', 'PART': 'Particle', 'PRON': 'Pronoun', |
| | 'PROPN': 'Proper Noun', 'SCONJ': 'Subordinating Conjunction', 'SYM': 'Symbol', |
| | 'VERB': 'Verb', 'X': 'Other', |
| | }, |
| | 'uk': { |
| | 'ADJ': 'Прикметник', 'ADP': 'Прийменник', 'ADV': 'Прислівник', 'AUX': 'Допоміжне дієслово', |
| | 'CCONJ': 'Сурядний сполучник', 'DET': 'Означник', 'INTJ': 'Вигук', |
| | 'NOUN': 'Іменник', 'NUM': 'Число', 'PART': 'Частка', 'PRON': 'Займенник', |
| | 'PROPN': 'Власна назва', 'SCONJ': 'Підрядний сполучник', 'SYM': 'Символ', |
| | 'VERB': 'Дієслово', 'X': 'Інше', |
| | } |
| | } |
| |
|
| | ENTITY_LABELS = { |
| | 'es': { |
| | "Personas": "lightblue", |
| | "Lugares": "lightcoral", |
| | "Inventos": "lightgreen", |
| | "Fechas": "lightyellow", |
| | "Conceptos": "lightpink" |
| | }, |
| | 'en': { |
| | "People": "lightblue", |
| | "Places": "lightcoral", |
| | "Inventions": "lightgreen", |
| | "Dates": "lightyellow", |
| | "Concepts": "lightpink" |
| | }, |
| | 'uk': { |
| | "Люди": "lightblue", |
| | "Місця": "lightcoral", |
| | "Винаходи": "lightgreen", |
| | "Дати": "lightyellow", |
| | "Концепції": "lightpink" |
| | } |
| | } |
| | |
| |
|
| | def fig_to_bytes(fig, dpi=100): |
| | """Convierte una figura de matplotlib a bytes.""" |
| | try: |
| | buf = io.BytesIO() |
| | fig.savefig(buf, format='png', dpi=dpi, bbox_inches='tight') |
| | buf.seek(0) |
| | return buf.getvalue() |
| | except Exception as e: |
| | logger.error(f"Error en fig_to_bytes: {str(e)}") |
| | return None |
| | |
| | |
| | def compare_semantic_analysis(text1, text2, nlp, lang): |
| | """ |
| | Realiza el análisis semántico comparativo entre dos textos |
| | """ |
| | try: |
| | logger.info(f"Iniciando análisis comparativo para idioma: {lang}") |
| | |
| | |
| | stopwords = get_custom_stopwords(lang) |
| | logger.info(f"Obtenidas {len(stopwords)} stopwords para el idioma {lang}") |
| | |
| | |
| | doc1 = nlp(text1) |
| | doc2 = nlp(text2) |
| | |
| | |
| | logger.info("Identificando conceptos clave del primer texto...") |
| | key_concepts1 = identify_key_concepts(doc1, stopwords=stopwords, min_freq=2, min_length=3) |
| | |
| | logger.info("Identificando conceptos clave del segundo texto...") |
| | key_concepts2 = identify_key_concepts(doc2, stopwords=stopwords, min_freq=2, min_length=3) |
| |
|
| | if not key_concepts1 or not key_concepts2: |
| | raise ValueError("No se pudieron identificar conceptos clave en uno o ambos textos") |
| |
|
| | |
| | logger.info("Creando grafos de conceptos...") |
| | G1 = create_concept_graph(doc1, key_concepts1) |
| | G2 = create_concept_graph(doc2, key_concepts2) |
| |
|
| | |
| | logger.info("Visualizando grafos...") |
| | |
| | |
| | plt.figure(figsize=(12, 8)) |
| | fig1 = visualize_concept_graph(G1, lang) |
| | plt.title("Análisis del primer texto", pad=20) |
| | plt.tight_layout() |
| | |
| | |
| | plt.figure(figsize=(12, 8)) |
| | fig2 = visualize_concept_graph(G2, lang) |
| | plt.title("Análisis del segundo texto", pad=20) |
| | plt.tight_layout() |
| |
|
| | logger.info("Análisis comparativo completado exitosamente") |
| | return fig1, fig2, key_concepts1, key_concepts2 |
| |
|
| | except Exception as e: |
| | logger.error(f"Error en compare_semantic_analysis: {str(e)}") |
| | plt.close('all') |
| | raise |
| | finally: |
| | plt.close('all') |
| |
|
| |
|
| | |
| | def create_concept_table(key_concepts): |
| | """ |
| | Crea una tabla de conceptos clave con sus frecuencias |
| | Args: |
| | key_concepts: Lista de tuplas (concepto, frecuencia) |
| | Returns: |
| | pandas.DataFrame: Tabla formateada de conceptos |
| | """ |
| | try: |
| | if not key_concepts: |
| | logger.warning("Lista de conceptos vacía") |
| | return pd.DataFrame(columns=['Concepto', 'Frecuencia']) |
| | |
| | df = pd.DataFrame(key_concepts, columns=['Concepto', 'Frecuencia']) |
| | df['Frecuencia'] = df['Frecuencia'].round(2) |
| | return df |
| | except Exception as e: |
| | logger.error(f"Error en create_concept_table: {str(e)}") |
| | return pd.DataFrame(columns=['Concepto', 'Frecuencia']) |
| |
|
| |
|
| | |
| |
|
| | def perform_discourse_analysis(text1, text2, nlp, lang): |
| | """ |
| | Realiza el análisis completo del discurso |
| | Args: |
| | text1: Primer texto a analizar |
| | text2: Segundo texto a analizar |
| | nlp: Modelo de spaCy cargado |
| | lang: Código de idioma |
| | Returns: |
| | dict: Resultados del análisis con gráficos convertidos a bytes |
| | """ |
| | try: |
| | logger.info("Iniciando análisis del discurso...") |
| | |
| | |
| | if not text1 or not text2: |
| | raise ValueError("Los textos de entrada no pueden estar vacíos") |
| | |
| | if not nlp: |
| | raise ValueError("Modelo de lenguaje no inicializado") |
| | |
| | |
| | fig1, fig2, key_concepts1, key_concepts2 = compare_semantic_analysis( |
| | text1, text2, nlp, lang |
| | ) |
| | |
| | logger.info("Análisis comparativo completado, convirtiendo figuras a bytes...") |
| |
|
| | |
| | graph1_bytes = fig_to_bytes(fig1) |
| | graph2_bytes = fig_to_bytes(fig2) |
| | |
| | logger.info(f"Figura 1 convertida a {len(graph1_bytes) if graph1_bytes else 0} bytes") |
| | logger.info(f"Figura 2 convertida a {len(graph2_bytes) if graph2_bytes else 0} bytes") |
| |
|
| | |
| | if not graph1_bytes or not graph2_bytes: |
| | logger.error("Error al convertir figuras a bytes - obteniendo 0 bytes") |
| | |
| | raise ValueError("No se pudieron convertir las figuras a bytes") |
| |
|
| | |
| | table1 = create_concept_table(key_concepts1) |
| | table2 = create_concept_table(key_concepts2) |
| |
|
| | |
| | plt.close(fig1) |
| | plt.close(fig2) |
| |
|
| | result = { |
| | 'graph1': graph1_bytes, |
| | 'graph2': graph2_bytes, |
| | 'combined_graph': None, |
| | 'key_concepts1': key_concepts1, |
| | 'key_concepts2': key_concepts2, |
| | 'table1': table1, |
| | 'table2': table2, |
| | 'success': True |
| | } |
| | |
| | logger.info("Análisis del discurso completado y listo para almacenamiento") |
| | return result |
| |
|
| | except Exception as e: |
| | logger.error(f"Error en perform_discourse_analysis: {str(e)}") |
| | |
| | plt.close('all') |
| | return { |
| | 'success': False, |
| | 'error': str(e) |
| | } |
| | finally: |
| | |
| | plt.close('all') |
| |
|
| | |
| |
|