| |
|
|
| import streamlit as st |
| import matplotlib.pyplot as plt |
| import networkx as nx |
| import seaborn as sns |
| from collections import Counter |
| from itertools import combinations |
| import numpy as np |
| import matplotlib.patches as patches |
| import logging |
|
|
|
|
| |
| logging.basicConfig( |
| level=logging.INFO, |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', |
| handlers=[ |
| logging.StreamHandler(), |
| logging.FileHandler('app.log') |
| ] |
| ) |
|
|
| |
| logger = logging.getLogger(__name__) |
|
|
| |
|
|
| def correlate_metrics(scores): |
| """ |
| Ajusta los scores para mantener correlaciones l贸gicas entre m茅tricas. |
| |
| Args: |
| scores: dict con scores iniciales de vocabulario, estructura, cohesi贸n y claridad |
| |
| Returns: |
| dict con scores ajustados |
| """ |
| try: |
| |
| |
| min_cohesion = scores['structure']['normalized_score'] * 0.7 |
| if scores['cohesion']['normalized_score'] < min_cohesion: |
| scores['cohesion']['normalized_score'] = min_cohesion |
|
|
| |
| |
| vocab_influence = scores['vocabulary']['normalized_score'] * 0.6 |
| scores['cohesion']['normalized_score'] = max( |
| scores['cohesion']['normalized_score'], |
| vocab_influence |
| ) |
|
|
| |
| |
| max_clarity = scores['cohesion']['normalized_score'] * 1.2 |
| if scores['clarity']['normalized_score'] > max_clarity: |
| scores['clarity']['normalized_score'] = max_clarity |
|
|
| |
| |
| struct_max_clarity = scores['structure']['normalized_score'] * 1.1 |
| scores['clarity']['normalized_score'] = min( |
| scores['clarity']['normalized_score'], |
| struct_max_clarity |
| ) |
|
|
| |
| for metric in scores: |
| scores[metric]['normalized_score'] = max(0.0, min(1.0, scores[metric]['normalized_score'])) |
|
|
| return scores |
|
|
| except Exception as e: |
| logger.error(f"Error en correlate_metrics: {str(e)}") |
| return scores |
|
|
| |
|
|
| def analyze_text_dimensions(doc): |
| """ |
| Analiza las dimensiones principales del texto manteniendo correlaciones l贸gicas. |
| """ |
| try: |
| |
| vocab_score, vocab_details = analyze_vocabulary_diversity(doc) |
| struct_score = analyze_structure(doc) |
| cohesion_score = analyze_cohesion(doc) |
| clarity_score, clarity_details = analyze_clarity(doc) |
|
|
| |
| scores = { |
| 'vocabulary': { |
| 'normalized_score': vocab_score, |
| 'details': vocab_details |
| }, |
| 'structure': { |
| 'normalized_score': struct_score, |
| 'details': None |
| }, |
| 'cohesion': { |
| 'normalized_score': cohesion_score, |
| 'details': None |
| }, |
| 'clarity': { |
| 'normalized_score': clarity_score, |
| 'details': clarity_details |
| } |
| } |
|
|
| |
| adjusted_scores = correlate_metrics(scores) |
|
|
| |
| logger.info(f""" |
| Scores originales vs ajustados: |
| Vocabulario: {vocab_score:.2f} -> {adjusted_scores['vocabulary']['normalized_score']:.2f} |
| Estructura: {struct_score:.2f} -> {adjusted_scores['structure']['normalized_score']:.2f} |
| Cohesi贸n: {cohesion_score:.2f} -> {adjusted_scores['cohesion']['normalized_score']:.2f} |
| Claridad: {clarity_score:.2f} -> {adjusted_scores['clarity']['normalized_score']:.2f} |
| """) |
|
|
| return adjusted_scores |
|
|
| except Exception as e: |
| logger.error(f"Error en analyze_text_dimensions: {str(e)}") |
| return { |
| 'vocabulary': {'normalized_score': 0.0, 'details': {}}, |
| 'structure': {'normalized_score': 0.0, 'details': {}}, |
| 'cohesion': {'normalized_score': 0.0, 'details': {}}, |
| 'clarity': {'normalized_score': 0.0, 'details': {}} |
| } |
|
|
|
|
|
|
| |
|
|
| def analyze_clarity(doc): |
| """ |
| Analiza la claridad del texto considerando m煤ltiples factores. |
| """ |
| try: |
| sentences = list(doc.sents) |
| if not sentences: |
| return 0.0, {} |
| |
| |
| sentence_lengths = [len(sent) for sent in sentences] |
| avg_length = sum(sentence_lengths) / len(sentences) |
| |
| |
| length_score = normalize_score( |
| value=avg_length, |
| metric_type='clarity', |
| optimal_length=20, |
| min_threshold=0.60, |
| target_threshold=0.75 |
| ) |
| |
| |
| connector_count = 0 |
| connector_weights = { |
| 'CCONJ': 1.0, |
| 'SCONJ': 1.2, |
| 'ADV': 0.8 |
| } |
| |
| for token in doc: |
| if token.pos_ in connector_weights and token.dep_ in ['cc', 'mark', 'advmod']: |
| connector_count += connector_weights[token.pos_] |
| |
| |
| connectors_per_sentence = connector_count / len(sentences) if sentences else 0 |
| connector_score = normalize_score( |
| value=connectors_per_sentence, |
| metric_type='clarity', |
| optimal_connections=1.5, |
| min_threshold=0.60, |
| target_threshold=0.75 |
| ) |
| |
| |
| clause_count = 0 |
| for sent in sentences: |
| verbs = [token for token in sent if token.pos_ == 'VERB'] |
| clause_count += len(verbs) |
| |
| complexity_raw = clause_count / len(sentences) if sentences else 0 |
| complexity_score = normalize_score( |
| value=complexity_raw, |
| metric_type='clarity', |
| optimal_depth=2.0, |
| min_threshold=0.60, |
| target_threshold=0.75 |
| ) |
| |
| |
| content_words = len([token for token in doc if token.pos_ in ['NOUN', 'VERB', 'ADJ', 'ADV']]) |
| total_words = len([token for token in doc if token.is_alpha]) |
| density = content_words / total_words if total_words > 0 else 0 |
| |
| density_score = normalize_score( |
| value=density, |
| metric_type='clarity', |
| optimal_connections=0.6, |
| min_threshold=0.60, |
| target_threshold=0.75 |
| ) |
| |
| |
| weights = { |
| 'length': 0.3, |
| 'connectors': 0.3, |
| 'complexity': 0.2, |
| 'density': 0.2 |
| } |
| |
| clarity_score = ( |
| weights['length'] * length_score + |
| weights['connectors'] * connector_score + |
| weights['complexity'] * complexity_score + |
| weights['density'] * density_score |
| ) |
|
|
| details = { |
| 'length_score': length_score, |
| 'connector_score': connector_score, |
| 'complexity_score': complexity_score, |
| 'density_score': density_score, |
| 'avg_sentence_length': avg_length, |
| 'connectors_per_sentence': connectors_per_sentence, |
| 'density': density |
| } |
| |
| |
| logger.info(f""" |
| Scores de Claridad: |
| - Longitud: {length_score:.2f} (avg={avg_length:.1f} palabras) |
| - Conectores: {connector_score:.2f} (avg={connectors_per_sentence:.1f} por oraci贸n) |
| - Complejidad: {complexity_score:.2f} (avg={complexity_raw:.1f} cl谩usulas) |
| - Densidad: {density_score:.2f} ({density*100:.1f}% palabras de contenido) |
| - Score Final: {clarity_score:.2f} |
| """) |
| |
| return clarity_score, details |
|
|
| except Exception as e: |
| logger.error(f"Error en analyze_clarity: {str(e)}") |
| return 0.0, {} |
|
|
| |
| def analyze_vocabulary_diversity(doc): |
| """An谩lisis mejorado de la diversidad y calidad del vocabulario""" |
| try: |
| |
| unique_lemmas = {token.lemma_ for token in doc if token.is_alpha} |
| total_words = len([token for token in doc if token.is_alpha]) |
| basic_diversity = len(unique_lemmas) / total_words if total_words > 0 else 0 |
| |
| |
| academic_words = 0 |
| narrative_words = 0 |
| technical_terms = 0 |
| |
| |
| for token in doc: |
| if token.is_alpha: |
| |
| if token.pos_ in ['NOUN', 'VERB', 'ADJ']: |
| if any(parent.pos_ == 'NOUN' for parent in token.ancestors): |
| technical_terms += 1 |
| |
| if token.pos_ in ['VERB', 'ADV'] and token.dep_ in ['ROOT', 'advcl']: |
| narrative_words += 1 |
| |
| |
| avg_sentence_length = sum(len(sent) for sent in doc.sents) / len(list(doc.sents)) |
| |
| |
| weights = { |
| 'diversity': 0.3, |
| 'technical': 0.3, |
| 'narrative': 0.2, |
| 'complexity': 0.2 |
| } |
| |
| scores = { |
| 'diversity': basic_diversity, |
| 'technical': technical_terms / total_words if total_words > 0 else 0, |
| 'narrative': narrative_words / total_words if total_words > 0 else 0, |
| 'complexity': min(1.0, avg_sentence_length / 20) |
| } |
| |
| |
| final_score = sum(weights[key] * scores[key] for key in weights) |
| |
| |
| details = { |
| 'text_type': 'narrative' if scores['narrative'] > scores['technical'] else 'academic', |
| 'scores': scores |
| } |
| |
| return final_score, details |
| |
| except Exception as e: |
| logger.error(f"Error en analyze_vocabulary_diversity: {str(e)}") |
| return 0.0, {} |
|
|
| |
| def analyze_cohesion(doc): |
| """Analiza la cohesi贸n textual""" |
| try: |
| sentences = list(doc.sents) |
| if len(sentences) < 2: |
| logger.warning("Texto demasiado corto para an谩lisis de cohesi贸n") |
| return 0.0 |
| |
| |
| lexical_connections = 0 |
| total_possible_connections = 0 |
| |
| for i in range(len(sentences)-1): |
| |
| sent1_words = {token.lemma_ for token in sentences[i] |
| if token.is_alpha and not token.is_stop} |
| sent2_words = {token.lemma_ for token in sentences[i+1] |
| if token.is_alpha and not token.is_stop} |
| |
| if sent1_words and sent2_words: |
| intersection = len(sent1_words.intersection(sent2_words)) |
| total_possible = min(len(sent1_words), len(sent2_words)) |
| |
| if total_possible > 0: |
| lexical_score = intersection / total_possible |
| lexical_connections += lexical_score |
| total_possible_connections += 1 |
| |
| |
| connector_count = 0 |
| connector_types = { |
| 'CCONJ': 1.0, |
| 'SCONJ': 1.2, |
| 'ADV': 0.8 |
| } |
| |
| for token in doc: |
| if (token.pos_ in connector_types and |
| token.dep_ in ['cc', 'mark', 'advmod'] and |
| not token.is_stop): |
| connector_count += connector_types[token.pos_] |
| |
| |
| if total_possible_connections > 0: |
| lexical_cohesion = lexical_connections / total_possible_connections |
| else: |
| lexical_cohesion = 0 |
| |
| if len(sentences) > 1: |
| connector_cohesion = min(1.0, connector_count / (len(sentences) - 1)) |
| else: |
| connector_cohesion = 0 |
| |
| |
| weights = { |
| 'lexical': 0.7, |
| 'connectors': 0.3 |
| } |
| |
| cohesion_score = ( |
| weights['lexical'] * lexical_cohesion + |
| weights['connectors'] * connector_cohesion |
| ) |
| |
| |
| logger.info(f""" |
| An谩lisis de Cohesi贸n: |
| - Conexiones l茅xicas encontradas: {lexical_connections} |
| - Conexiones posibles: {total_possible_connections} |
| - Lexical cohesion score: {lexical_cohesion} |
| - Conectores encontrados: {connector_count} |
| - Connector cohesion score: {connector_cohesion} |
| - Score final: {cohesion_score} |
| """) |
| |
| return cohesion_score |
|
|
| except Exception as e: |
| logger.error(f"Error en analyze_cohesion: {str(e)}") |
| return 0.0 |
|
|
| |
| def analyze_structure(doc): |
| try: |
| if len(doc) == 0: |
| return 0.0 |
| |
| structure_scores = [] |
| for token in doc: |
| if token.dep_ == 'ROOT': |
| result = get_dependency_depths(token) |
| structure_scores.append(result['final_score']) |
| |
| if not structure_scores: |
| return 0.0 |
| |
| return min(1.0, sum(structure_scores) / len(structure_scores)) |
| |
| except Exception as e: |
| logger.error(f"Error en analyze_structure: {str(e)}") |
| return 0.0 |
|
|
| |
| |
| def get_dependency_depths(token, depth=0, analyzed_tokens=None): |
| """ |
| Analiza la profundidad y calidad de las relaciones de dependencia. |
| |
| Args: |
| token: Token a analizar |
| depth: Profundidad actual en el 谩rbol |
| analyzed_tokens: Set para evitar ciclos en el an谩lisis |
| |
| Returns: |
| dict: Informaci贸n detallada sobre las dependencias |
| - depths: Lista de profundidades |
| - relations: Diccionario con tipos de relaciones encontradas |
| - complexity_score: Puntuaci贸n de complejidad |
| """ |
| if analyzed_tokens is None: |
| analyzed_tokens = set() |
| |
| |
| if token.i in analyzed_tokens: |
| return { |
| 'depths': [], |
| 'relations': {}, |
| 'complexity_score': 0 |
| } |
| |
| analyzed_tokens.add(token.i) |
| |
| |
| dependency_weights = { |
| |
| 'nsubj': 1.2, |
| 'obj': 1.1, |
| 'iobj': 1.1, |
| 'ROOT': 1.3, |
| |
| |
| 'amod': 0.8, |
| 'advmod': 0.8, |
| 'nmod': 0.9, |
| |
| |
| 'csubj': 1.4, |
| 'ccomp': 1.3, |
| 'xcomp': 1.2, |
| 'advcl': 1.2, |
| |
| |
| 'conj': 1.1, |
| 'cc': 0.7, |
| 'mark': 0.8, |
| |
| |
| 'det': 0.5, |
| 'case': 0.5, |
| 'punct': 0.1 |
| } |
| |
| |
| current_result = { |
| 'depths': [depth], |
| 'relations': {token.dep_: 1}, |
| 'complexity_score': dependency_weights.get(token.dep_, 0.5) * (depth + 1) |
| } |
| |
| |
| for child in token.children: |
| child_result = get_dependency_depths(child, depth + 1, analyzed_tokens) |
| |
| |
| current_result['depths'].extend(child_result['depths']) |
| |
| |
| for rel, count in child_result['relations'].items(): |
| current_result['relations'][rel] = current_result['relations'].get(rel, 0) + count |
| |
| |
| current_result['complexity_score'] += child_result['complexity_score'] |
| |
| |
| current_result['max_depth'] = max(current_result['depths']) |
| current_result['avg_depth'] = sum(current_result['depths']) / len(current_result['depths']) |
| current_result['relation_diversity'] = len(current_result['relations']) |
| |
| |
| structure_bonus = 0 |
| |
| |
| if 'csubj' in current_result['relations'] or 'ccomp' in current_result['relations']: |
| structure_bonus += 0.3 |
| |
| |
| if 'conj' in current_result['relations'] and 'cc' in current_result['relations']: |
| structure_bonus += 0.2 |
| |
| |
| if len(set(['amod', 'advmod', 'nmod']) & set(current_result['relations'])) >= 2: |
| structure_bonus += 0.2 |
| |
| current_result['final_score'] = ( |
| current_result['complexity_score'] * (1 + structure_bonus) |
| ) |
| |
| return current_result |
|
|
| |
| def normalize_score(value, metric_type, |
| min_threshold=0.0, target_threshold=1.0, |
| range_factor=2.0, optimal_length=None, |
| optimal_connections=None, optimal_depth=None): |
| """ |
| Normaliza un valor considerando umbrales espec铆ficos por tipo de m茅trica. |
| |
| Args: |
| value: Valor a normalizar |
| metric_type: Tipo de m茅trica ('vocabulary', 'structure', 'cohesion', 'clarity') |
| min_threshold: Valor m铆nimo aceptable |
| target_threshold: Valor objetivo |
| range_factor: Factor para ajustar el rango |
| optimal_length: Longitud 贸ptima (opcional) |
| optimal_connections: N煤mero 贸ptimo de conexiones (opcional) |
| optimal_depth: Profundidad 贸ptima de estructura (opcional) |
| |
| Returns: |
| float: Valor normalizado entre 0 y 1 |
| """ |
| try: |
| |
| METRIC_THRESHOLDS = { |
| 'vocabulary': { |
| 'min': 0.60, |
| 'target': 0.75, |
| 'range_factor': 1.5 |
| }, |
| 'structure': { |
| 'min': 0.65, |
| 'target': 0.80, |
| 'range_factor': 1.8 |
| }, |
| 'cohesion': { |
| 'min': 0.55, |
| 'target': 0.70, |
| 'range_factor': 1.6 |
| }, |
| 'clarity': { |
| 'min': 0.60, |
| 'target': 0.75, |
| 'range_factor': 1.7 |
| } |
| } |
|
|
| |
| if value < 0: |
| logger.warning(f"Valor negativo recibido: {value}") |
| return 0.0 |
| |
| |
| if value == 0: |
| logger.warning("Valor cero recibido") |
| return 0.0 |
|
|
| |
| thresholds = METRIC_THRESHOLDS.get(metric_type, { |
| 'min': min_threshold, |
| 'target': target_threshold, |
| 'range_factor': range_factor |
| }) |
| |
| |
| if optimal_depth is not None: |
| reference = optimal_depth |
| elif optimal_connections is not None: |
| reference = optimal_connections |
| elif optimal_length is not None: |
| reference = optimal_length |
| else: |
| reference = thresholds['target'] |
|
|
| |
| if reference <= 0: |
| logger.warning(f"Valor de referencia inv谩lido: {reference}") |
| return 0.0 |
|
|
| |
| if value < thresholds['min']: |
| |
| score = (value / thresholds['min']) * 0.5 |
| elif value < thresholds['target']: |
| |
| range_size = thresholds['target'] - thresholds['min'] |
| progress = (value - thresholds['min']) / range_size |
| score = 0.5 + (progress * 0.5) |
| else: |
| |
| score = 1.0 |
| |
| |
| if value > (thresholds['target'] * thresholds['range_factor']): |
| excess = (value - thresholds['target']) / (thresholds['target'] * thresholds['range_factor']) |
| score = max(0.7, 1.0 - excess) |
|
|
| |
| return max(0.0, min(1.0, score)) |
|
|
| except Exception as e: |
| logger.error(f"Error en normalize_score: {str(e)}") |
| return 0.0 |
|
|
| |
| |
|
|
| def generate_recommendations(metrics, text_type, lang_code='es'): |
| """ |
| Genera recomendaciones personalizadas basadas en las m茅tricas del texto y el tipo de texto. |
| |
| Args: |
| metrics: Diccionario con las m茅tricas analizadas |
| text_type: Tipo de texto ('academic_article', 'student_essay', 'general_communication') |
| lang_code: C贸digo del idioma para las recomendaciones (es, en, uk) |
| |
| Returns: |
| dict: Recomendaciones organizadas por categor铆a en el idioma correspondiente |
| """ |
| try: |
| |
| logger.info(f"generate_recommendations llamado con idioma: {lang_code}") |
| |
| |
| logger.info(f"Idiomas disponibles en RECOMMENDATIONS: {list(RECOMMENDATIONS.keys())}") |
| |
| |
| thresholds = TEXT_TYPES[text_type]['thresholds'] |
| |
| |
| if lang_code not in RECOMMENDATIONS: |
| logger.warning(f"Idioma {lang_code} no soportado para recomendaciones, usando espa帽ol") |
| lang_code = 'es' |
| |
| |
| translations = RECOMMENDATIONS[lang_code] |
| |
| |
| recommendations = { |
| 'vocabulary': [], |
| 'structure': [], |
| 'cohesion': [], |
| 'clarity': [], |
| 'specific': [], |
| 'priority': { |
| 'area': 'general', |
| 'tips': [] |
| }, |
| 'text_type_name': translations['text_types'][text_type], |
| 'dimension_names': translations['dimension_names'], |
| 'ui_text': { |
| 'priority_intro': translations['priority_intro'], |
| 'detailed_recommendations': translations['detailed_recommendations'], |
| 'save_button': translations['save_button'], |
| 'save_success': translations['save_success'], |
| 'save_error': translations['save_error'], |
| 'area_priority': translations['area_priority'] |
| } |
| } |
| |
| |
| dimensions = ['vocabulary', 'structure', 'cohesion', 'clarity'] |
| scores = {} |
| |
| for dim in dimensions: |
| score = metrics[dim]['normalized_score'] |
| scores[dim] = score |
| |
| |
| if score < thresholds[dim]['min']: |
| level = 'low' |
| elif score < thresholds[dim]['target']: |
| level = 'medium' |
| else: |
| level = 'high' |
| |
| |
| recommendations[dim] = translations[dim][level] |
| |
| |
| recommendations['specific'] = translations[text_type] |
| |
| |
| priority_dimension = min(scores, key=scores.get) |
| recommendations['priority']['area'] = priority_dimension |
| recommendations['priority']['tips'] = recommendations[priority_dimension] |
| |
| logger.info(f"Generadas recomendaciones en {lang_code} para texto tipo {text_type}") |
| return recommendations |
| |
| except Exception as e: |
| logger.error(f"Error en generate_recommendations: {str(e)}") |
| |
| |
| |
| fallback_translations = { |
| 'en': { |
| 'basic_recommendations': { |
| 'vocabulary': ["Try enriching your vocabulary"], |
| 'structure': ["Work on the structure of your sentences"], |
| 'cohesion': ["Improve the connection between your ideas"], |
| 'clarity': ["Try to express your ideas more clearly"], |
| 'specific': ["Adapt your text according to its purpose"], |
| }, |
| 'dimension_names': { |
| 'vocabulary': 'Vocabulary', |
| 'structure': 'Structure', |
| 'cohesion': 'Cohesion', |
| 'clarity': 'Clarity', |
| 'general': 'General' |
| }, |
| 'ui_text': { |
| 'priority_intro': "This is where you should focus your efforts.", |
| 'detailed_recommendations': "Detailed recommendations", |
| 'save_button': "Save analysis", |
| 'save_success': "Analysis saved successfully", |
| 'save_error': "Error saving analysis", |
| 'area_priority': "Priority area" |
| } |
| }, |
| 'uk': { |
| 'basic_recommendations': { |
| 'vocabulary': ["袪芯蟹褕懈褉褌械 褋胁褨泄 褋谢芯胁薪懈泻芯胁懈泄 蟹邪锌邪褋"], |
| 'structure': ["袩芯泻褉邪褖褨褌褜 褋褌褉褍泻褌褍褉褍 胁邪褕懈褏 褉械褔械薪褜"], |
| 'cohesion': ["袩芯泻褉邪褖褨褌褜 蟹胁'褟蟹芯泻 屑褨卸 胁邪褕懈屑懈 褨写械褟屑懈"], |
| 'clarity': ["袙懈褋谢芯胁谢褞泄褌械 褋胁芯褩 褨写械褩 褟褋薪褨褕械"], |
| 'specific': ["袗写邪锌褌褍泄褌械 褋胁褨泄 褌械泻褋褌 胁褨写锌芯胁褨写薪芯 写芯 泄芯谐芯 屑械褌懈"], |
| }, |
| 'dimension_names': { |
| 'vocabulary': '小谢芯胁薪懈泻芯胁懈泄 蟹邪锌邪褋', |
| 'structure': '小褌褉褍泻褌褍褉邪', |
| 'cohesion': '袟胁\'褟蟹薪褨褋褌褜', |
| 'clarity': '携褋薪褨褋褌褜', |
| 'general': '袟邪谐邪谢褜薪械' |
| }, |
| 'ui_text': { |
| 'priority_intro': "笑械 芯斜谢邪褋褌褜, 写械 胁懈 锌芯胁懈薪薪褨 蟹芯褋械褉械写懈褌懈 褋胁芯褩 蟹褍褋懈谢谢褟.", |
| 'detailed_recommendations': "袛械褌邪谢褜薪褨 褉械泻芯屑械薪写邪褑褨褩", |
| 'save_button': "袟斜械褉械谐褌懈 邪薪邪谢褨蟹", |
| 'save_success': "袗薪邪谢褨蟹 褍褋锌褨褕薪芯 蟹斜械褉械卸械薪芯", |
| 'save_error': "袩芯屑懈谢泻邪 锌褉懈 蟹斜械褉械卸械薪薪褨 邪薪邪谢褨蟹褍", |
| 'area_priority': "袩褉褨芯褉懈褌械褌薪邪 芯斜谢邪褋褌褜" |
| } |
| }, |
| 'es': { |
| 'basic_recommendations': { |
| 'vocabulary': ["Intenta enriquecer tu vocabulario"], |
| 'structure': ["Trabaja en la estructura de tus oraciones"], |
| 'cohesion': ["Mejora la conexi贸n entre tus ideas"], |
| 'clarity': ["Busca expresar tus ideas con mayor claridad"], |
| 'specific': ["Adapta tu texto seg煤n su prop贸sito"], |
| }, |
| 'dimension_names': { |
| 'vocabulary': 'Vocabulario', |
| 'structure': 'Estructura', |
| 'cohesion': 'Cohesi贸n', |
| 'clarity': 'Claridad', |
| 'general': 'General' |
| }, |
| 'ui_text': { |
| 'priority_intro': "Esta es el 谩rea donde debes concentrar tus esfuerzos.", |
| 'detailed_recommendations': "Recomendaciones detalladas", |
| 'save_button': "Guardar an谩lisis", |
| 'save_success': "An谩lisis guardado con 茅xito", |
| 'save_error': "Error al guardar el an谩lisis", |
| 'area_priority': "脕rea prioritaria" |
| } |
| } |
| } |
| |
| |
| current_lang = fallback_translations.get(lang_code, |
| fallback_translations.get('en', |
| fallback_translations['es'])) |
| |
| basic_recommendations = current_lang['basic_recommendations'] |
| |
| return { |
| 'vocabulary': basic_recommendations['vocabulary'], |
| 'structure': basic_recommendations['structure'], |
| 'cohesion': basic_recommendations['cohesion'], |
| 'clarity': basic_recommendations['clarity'], |
| 'specific': basic_recommendations['specific'], |
| 'priority': { |
| 'area': 'general', |
| 'tips': ["Busca retroalimentaci贸n espec铆fica de un tutor o profesor"] |
| }, |
| 'dimension_names': current_lang['dimension_names'], |
| 'ui_text': current_lang['ui_text'] |
| } |
|
|
|
|
|
|
|
|
| |
| |
| |
| def generate_sentence_graphs(doc): |
| """Genera visualizaciones de estructura de oraciones""" |
| fig, ax = plt.subplots(figsize=(10, 6)) |
| |
| plt.close() |
| return fig |
|
|
| |
| def generate_word_connections(doc): |
| """Genera red de conexiones de palabras""" |
| fig, ax = plt.subplots(figsize=(10, 6)) |
| |
| plt.close() |
| return fig |
|
|
| |
| def generate_connection_paths(doc): |
| """Genera patrones de conexi贸n""" |
| fig, ax = plt.subplots(figsize=(10, 6)) |
| |
| plt.close() |
| return fig |
|
|
| |
| def create_vocabulary_network(doc): |
| """ |
| Genera el grafo de red de vocabulario. |
| """ |
| G = nx.Graph() |
| |
| |
| words = [token.text.lower() for token in doc if token.is_alpha and not token.is_stop] |
| word_freq = Counter(words) |
| |
| |
| for word, freq in word_freq.items(): |
| G.add_node(word, size=freq) |
| |
| |
| window_size = 5 |
| for i in range(len(words) - window_size): |
| window = words[i:i+window_size] |
| for w1, w2 in combinations(set(window), 2): |
| if G.has_edge(w1, w2): |
| G[w1][w2]['weight'] += 1 |
| else: |
| G.add_edge(w1, w2, weight=1) |
| |
| |
| fig, ax = plt.subplots(figsize=(12, 8)) |
| pos = nx.spring_layout(G) |
| |
| |
| nx.draw_networkx_nodes(G, pos, |
| node_size=[G.nodes[node]['size']*100 for node in G.nodes], |
| node_color='lightblue', |
| alpha=0.7) |
| |
| |
| nx.draw_networkx_edges(G, pos, |
| width=[G[u][v]['weight']*0.5 for u,v in G.edges], |
| alpha=0.5) |
| |
| |
| nx.draw_networkx_labels(G, pos) |
| |
| plt.title("Red de Vocabulario") |
| plt.axis('off') |
| return fig |
|
|
| |
| def create_syntax_complexity_graph(doc): |
| """ |
| Genera el diagrama de arco de complejidad sint谩ctica. |
| Muestra la estructura de dependencias con colores basados en la complejidad. |
| """ |
| try: |
| |
| sentences = list(doc.sents) |
| if not sentences: |
| return None |
| |
| |
| fig, ax = plt.subplots(figsize=(12, len(sentences) * 2)) |
| |
| |
| depth_colors = plt.cm.viridis(np.linspace(0, 1, 6)) |
| |
| y_offset = 0 |
| max_x = 0 |
| |
| for sent in sentences: |
| words = [token.text for token in sent] |
| x_positions = range(len(words)) |
| max_x = max(max_x, len(words)) |
| |
| |
| plt.plot(x_positions, [y_offset] * len(words), 'k-', alpha=0.2) |
| plt.scatter(x_positions, [y_offset] * len(words), alpha=0) |
| |
| |
| for i, word in enumerate(words): |
| plt.annotate(word, (i, y_offset), xytext=(0, -10), |
| textcoords='offset points', ha='center') |
| |
| |
| for token in sent: |
| if token.dep_ != "ROOT": |
| |
| depth = 0 |
| current = token |
| while current.head != current: |
| depth += 1 |
| current = current.head |
| |
| |
| start = token.i - sent[0].i |
| end = token.head.i - sent[0].i |
| |
| |
| height = 0.5 * abs(end - start) |
| |
| |
| color = depth_colors[min(depth, len(depth_colors)-1)] |
| |
| |
| arc = patches.Arc((min(start, end) + abs(end - start)/2, y_offset), |
| width=abs(end - start), |
| height=height, |
| angle=0, |
| theta1=0, |
| theta2=180, |
| color=color, |
| alpha=0.6) |
| ax.add_patch(arc) |
| |
| y_offset -= 2 |
| |
| |
| plt.xlim(-1, max_x) |
| plt.ylim(y_offset - 1, 1) |
| plt.axis('off') |
| plt.title("Complejidad Sint谩ctica") |
| |
| return fig |
| |
| except Exception as e: |
| logger.error(f"Error en create_syntax_complexity_graph: {str(e)}") |
| return None |
|
|
| |
| def create_cohesion_heatmap(doc): |
| """Genera un mapa de calor que muestra la cohesi贸n entre p谩rrafos/oraciones.""" |
| try: |
| sentences = list(doc.sents) |
| n_sentences = len(sentences) |
| |
| if n_sentences < 2: |
| return None |
| |
| similarity_matrix = np.zeros((n_sentences, n_sentences)) |
| |
| for i in range(n_sentences): |
| for j in range(n_sentences): |
| sent1_lemmas = {token.lemma_ for token in sentences[i] |
| if token.is_alpha and not token.is_stop} |
| sent2_lemmas = {token.lemma_ for token in sentences[j] |
| if token.is_alpha and not token.is_stop} |
| |
| if sent1_lemmas and sent2_lemmas: |
| intersection = len(sent1_lemmas & sent2_lemmas) |
| union = len(sent1_lemmas | sent2_lemmas) |
| similarity_matrix[i, j] = intersection / union if union > 0 else 0 |
| |
| |
| fig, ax = plt.subplots(figsize=(10, 8)) |
| |
| sns.heatmap(similarity_matrix, |
| cmap='YlOrRd', |
| square=True, |
| xticklabels=False, |
| yticklabels=False, |
| cbar_kws={'label': 'Cohesi贸n'}, |
| ax=ax) |
| |
| plt.title("Mapa de Cohesi贸n Textual") |
| plt.xlabel("Oraciones") |
| plt.ylabel("Oraciones") |
| |
| plt.tight_layout() |
| return fig |
| |
| except Exception as e: |
| logger.error(f"Error en create_cohesion_heatmap: {str(e)}") |
| return None |