| | |
| | """ |
| | Simplified Context-First Clue Generator |
| | A focused prototype that demonstrates context-based clue generation |
| | without heavy dependencies or complex model loading. |
| | |
| | Key improvements over test_context_prototype.py: |
| | 1. Multiple context sources (Wikipedia, dictionary patterns, word structure) |
| | 2. Smart pattern-based clue generation |
| | 3. Handles technical terms like XANTHIC |
| | 4. Production-ready structure with clear separation of concerns |
| | """ |
| |
|
| | import re |
| | import json |
| | import time |
| | import requests |
| | from typing import Dict, List, Optional, Tuple |
| | from dataclasses import dataclass |
| | from pathlib import Path |
| |
|
| |
|
| | @dataclass |
| | class ClueResult: |
| | """Structured result from clue generation""" |
| | word: str |
| | clue: str |
| | context_source: str |
| | context_type: str |
| | confidence: float |
| | generation_time: float |
| |
|
| |
|
| | class ContextExtractor: |
| | """Extract context from multiple sources for better coverage""" |
| | |
| | def __init__(self): |
| | self.wikipedia_api = "https://en.wikipedia.org/api/rest_v1/page/summary/" |
| | self.cache_dir = Path(__file__).parent / "context_cache" |
| | self.cache_dir.mkdir(exist_ok=True) |
| | |
| | |
| | self.technical_patterns = { |
| | 'xanth': 'yellow or yellowish', |
| | 'chrom': 'color or pigment', |
| | 'hydro': 'water or liquid', |
| | 'therm': 'heat or temperature', |
| | 'bio': 'life or living', |
| | 'geo': 'earth or ground', |
| | 'aero': 'air or flight', |
| | 'pyro': 'fire or heat', |
| | 'crypto': 'hidden or secret', |
| | 'macro': 'large scale', |
| | 'micro': 'small scale' |
| | } |
| | |
| | |
| | self.suffix_meanings = { |
| | 'ic': 'relating to or characterized by', |
| | 'ous': 'having the quality of', |
| | 'tion': 'the act or process of', |
| | 'ity': 'the state or quality of', |
| | 'ment': 'the result or product of', |
| | 'able': 'capable of being', |
| | 'ible': 'capable of being', |
| | 'ful': 'full of or characterized by', |
| | 'less': 'without or lacking', |
| | 'ish': 'somewhat or relating to' |
| | } |
| | |
| | def get_wikipedia_context(self, word: str) -> Optional[Dict]: |
| | """Get Wikipedia context for proper nouns and entities""" |
| | cache_file = self.cache_dir / f"wiki_{word.lower()}.json" |
| | |
| | |
| | if cache_file.exists(): |
| | try: |
| | with open(cache_file, 'r') as f: |
| | return json.load(f) |
| | except: |
| | pass |
| | |
| | |
| | variations = [word.lower(), word.capitalize(), word.upper()] |
| | |
| | for variant in variations: |
| | try: |
| | response = requests.get( |
| | f"{self.wikipedia_api}{variant}", |
| | headers={'User-Agent': 'CrosswordCluePrototype/2.0'}, |
| | timeout=3 |
| | ) |
| | |
| | if response.status_code == 200: |
| | data = response.json() |
| | result = { |
| | 'type': 'wikipedia', |
| | 'title': data.get('title', ''), |
| | 'extract': data.get('extract', ''), |
| | 'description': data.get('description', '') |
| | } |
| | |
| | |
| | try: |
| | with open(cache_file, 'w') as f: |
| | json.dump(result, f) |
| | except: |
| | pass |
| | |
| | return result |
| | except: |
| | continue |
| | |
| | return None |
| | |
| | def get_technical_context(self, word: str) -> Optional[Dict]: |
| | """Extract context from word structure for technical terms""" |
| | word_lower = word.lower() |
| | |
| | |
| | for root, meaning in self.technical_patterns.items(): |
| | if root in word_lower: |
| | |
| | for suffix, suffix_meaning in self.suffix_meanings.items(): |
| | if word_lower.endswith(suffix): |
| | return { |
| | 'type': 'technical', |
| | 'root': root, |
| | 'root_meaning': meaning, |
| | 'suffix': suffix, |
| | 'suffix_meaning': suffix_meaning, |
| | 'full_meaning': f"{meaning} {suffix_meaning}" |
| | } |
| | |
| | return { |
| | 'type': 'technical', |
| | 'root': root, |
| | 'root_meaning': meaning, |
| | 'full_meaning': meaning |
| | } |
| | |
| | return None |
| | |
| | def get_pattern_context(self, word: str) -> Optional[Dict]: |
| | """Extract context from word patterns and structure""" |
| | word_lower = word.lower() |
| | |
| | |
| | cricket_names = ['panesar', 'tendulkar', 'gavaskar', 'kapil', 'dhoni', 'kohli'] |
| | if word_lower in cricket_names: |
| | return { |
| | 'type': 'pattern', |
| | 'category': 'cricket_player', |
| | 'nationality': 'Indian' if word_lower != 'panesar' else 'English' |
| | } |
| | |
| | |
| | if word_lower.endswith('pur') or word_lower.endswith('bad') or word_lower.endswith('garh'): |
| | return { |
| | 'type': 'pattern', |
| | 'category': 'indian_city' |
| | } |
| | |
| | |
| | indian_places = ['rajouri', 'delhi', 'mumbai', 'chennai', 'kolkata'] |
| | if word_lower in indian_places: |
| | return { |
| | 'type': 'pattern', |
| | 'category': 'indian_location' |
| | } |
| | |
| | return None |
| | |
| | def get_all_contexts(self, word: str) -> List[Dict]: |
| | """Get context from all available sources""" |
| | contexts = [] |
| | |
| | |
| | wiki_context = self.get_wikipedia_context(word) |
| | if wiki_context: |
| | contexts.append(wiki_context) |
| | |
| | |
| | tech_context = self.get_technical_context(word) |
| | if tech_context: |
| | contexts.append(tech_context) |
| | |
| | |
| | pattern_context = self.get_pattern_context(word) |
| | if pattern_context: |
| | contexts.append(pattern_context) |
| | |
| | return contexts |
| |
|
| |
|
| | class SmartClueGenerator: |
| | """Generate clues based on extracted context""" |
| | |
| | def __init__(self): |
| | self.extractor = ContextExtractor() |
| | |
| | def generate_from_wikipedia(self, word: str, context: Dict) -> str: |
| | """Generate clue from Wikipedia context""" |
| | extract = context.get('extract', '').lower() |
| | description = context.get('description', '').lower() |
| | |
| | |
| | if 'cricketer' in extract or 'cricket' in extract: |
| | if 'english' in extract: |
| | return "English cricketer" |
| | elif 'indian' in extract: |
| | return "Indian cricketer" |
| | else: |
| | return "Cricket player" |
| | |
| | |
| | if any(term in extract for term in ['district', 'city', 'town', 'village', 'region']): |
| | if 'kashmir' in extract or 'jammu' in extract: |
| | return "Kashmir district" |
| | elif 'india' in extract: |
| | return "Indian district" |
| | else: |
| | return "Geographic location" |
| | |
| | |
| | if description and len(description.split()) <= 5: |
| | return description.capitalize() |
| | |
| | |
| | if extract: |
| | |
| | first_sentence = extract.split('.')[0] |
| | |
| | first_sentence = first_sentence.replace(word.lower(), '').replace(word.capitalize(), '') |
| | |
| | words = first_sentence.split()[:6] |
| | if words: |
| | clue = ' '.join(words).strip() |
| | if clue and len(clue) < 50: |
| | return clue.capitalize() |
| | |
| | return f"Notable {word.lower()}" |
| | |
| | def generate_from_technical(self, word: str, context: Dict) -> str: |
| | """Generate clue from technical/etymological context""" |
| | full_meaning = context.get('full_meaning', '') |
| | root_meaning = context.get('root_meaning', '') |
| | |
| | if full_meaning: |
| | |
| | if 'relating to' in full_meaning: |
| | return full_meaning.replace('relating to or characterized by', 'relating to').capitalize() |
| | else: |
| | return full_meaning.capitalize() |
| | elif root_meaning: |
| | return f"Related to {root_meaning}" |
| | |
| | return f"Technical term" |
| | |
| | def generate_from_pattern(self, word: str, context: Dict) -> str: |
| | """Generate clue from pattern matching""" |
| | category = context.get('category', '') |
| | |
| | if category == 'cricket_player': |
| | nationality = context.get('nationality', '') |
| | if nationality: |
| | return f"{nationality} cricketer" |
| | return "Cricket player" |
| | |
| | elif category == 'indian_city': |
| | return "Indian city" |
| | |
| | elif category == 'indian_location': |
| | return "Indian location" |
| | |
| | return f"Proper noun" |
| | |
| | def generate_clue(self, word: str) -> ClueResult: |
| | """Generate the best possible clue for a word""" |
| | start_time = time.time() |
| | |
| | |
| | contexts = self.extractor.get_all_contexts(word) |
| | |
| | if not contexts: |
| | |
| | return ClueResult( |
| | word=word.upper(), |
| | clue=f"Word with {len(word)} letters", |
| | context_source="none", |
| | context_type="fallback", |
| | confidence=0.1, |
| | generation_time=time.time() - start_time |
| | ) |
| | |
| | |
| | best_context = contexts[0] |
| | context_type = best_context.get('type', 'unknown') |
| | |
| | |
| | if context_type == 'wikipedia': |
| | clue = self.generate_from_wikipedia(word, best_context) |
| | confidence = 0.9 |
| | elif context_type == 'technical': |
| | clue = self.generate_from_technical(word, best_context) |
| | confidence = 0.8 |
| | elif context_type == 'pattern': |
| | clue = self.generate_from_pattern(word, best_context) |
| | confidence = 0.6 |
| | else: |
| | clue = f"Crossword answer" |
| | confidence = 0.3 |
| | |
| | return ClueResult( |
| | word=word.upper(), |
| | clue=clue, |
| | context_source=context_type, |
| | context_type=context_type, |
| | confidence=confidence, |
| | generation_time=time.time() - start_time |
| | ) |
| |
|
| |
|
| | def test_prototype(): |
| | """Test the simplified context-first prototype""" |
| | print("π Simplified Context-First Clue Generator") |
| | print("=" * 60) |
| | |
| | |
| | test_words = [ |
| | "panesar", |
| | "tendulkar", |
| | "rajouri", |
| | "xanthic", |
| | "serendipity", |
| | "pyrolysis", |
| | "hyderabad", |
| | ] |
| | |
| | generator = SmartClueGenerator() |
| | results = [] |
| | |
| | for word in test_words: |
| | print(f"\nπ Processing: {word.upper()}") |
| | result = generator.generate_clue(word) |
| | results.append(result) |
| | |
| | print(f"π Clue: \"{result.clue}\"") |
| | print(f"π Source: {result.context_source}") |
| | print(f"β‘ Confidence: {result.confidence:.1%}") |
| | print(f"β±οΈ Time: {result.generation_time:.2f}s") |
| | |
| | |
| | print("\n" + "=" * 60) |
| | print("π SUMMARY") |
| | print("=" * 60) |
| | |
| | successful = [r for r in results if r.confidence > 0.5] |
| | print(f"β
Success rate: {len(successful)}/{len(results)} ({len(successful)/len(results)*100:.0f}%)") |
| | |
| | |
| | by_source = {} |
| | for r in results: |
| | by_source.setdefault(r.context_source, []).append(r) |
| | |
| | print("\nπ By Context Source:") |
| | for source, items in by_source.items(): |
| | avg_confidence = sum(i.confidence for i in items) / len(items) |
| | print(f" {source}: {len(items)} words (avg confidence: {avg_confidence:.1%})") |
| | |
| | print("\nπ― Quality Comparison:") |
| | print("Word | Generated Clue | Quality") |
| | print("-" * 60) |
| | for r in results: |
| | quality = "β
Good" if r.confidence > 0.7 else "π Fair" if r.confidence > 0.4 else "β Poor" |
| | print(f"{r.word:11} | {r.clue:27} | {quality}") |
| |
|
| |
|
| | if __name__ == "__main__": |
| | test_prototype() |