| | |
| | """ |
| | Test adaptive beta fix with full vocabulary to see if it now correctly |
| | uses the adjusted threshold for filtering |
| | """ |
| |
|
| | import os |
| | import sys |
| | import logging |
| |
|
| | |
| | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s') |
| |
|
| | def setup_environment(): |
| | """Setup environment and add src to path""" |
| | |
| | cache_dir = os.path.join(os.path.dirname(__file__), '..', 'cache-dir') |
| | cache_dir = os.path.abspath(cache_dir) |
| | os.environ['HF_HOME'] = cache_dir |
| | os.environ['TRANSFORMERS_CACHE'] = cache_dir |
| | os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dir |
| | |
| | |
| | backend_path = os.path.join(os.path.dirname(__file__), '..', 'crossword-app', 'backend-py', 'src') |
| | backend_path = os.path.abspath(backend_path) |
| | if backend_path not in sys.path: |
| | sys.path.insert(0, backend_path) |
| | |
| | print(f"Using cache directory: {cache_dir}") |
| |
|
| | def test_adaptive_fix(): |
| | """Test with full vocabulary to see the fix in action""" |
| | |
| | setup_environment() |
| | |
| | print("🔧 Testing Adaptive Beta Fix") |
| | print("=" * 50) |
| | |
| | |
| | os.environ['MULTI_TOPIC_METHOD'] = 'soft_minimum' |
| | os.environ['SOFT_MIN_BETA'] = '10.0' |
| | os.environ['SOFT_MIN_ADAPTIVE'] = 'true' |
| | os.environ['SOFT_MIN_MIN_WORDS'] = '15' |
| | os.environ['SOFT_MIN_MAX_RETRIES'] = '5' |
| | os.environ['SOFT_MIN_BETA_DECAY'] = '0.7' |
| | os.environ['THEMATIC_VOCAB_SIZE_LIMIT'] = '100000' |
| | |
| | try: |
| | from services.thematic_word_service import ThematicWordService |
| | |
| | print("Creating ThematicWordService...") |
| | service = ThematicWordService() |
| | service.initialize() |
| | |
| | |
| | inputs = ["universe", "movies", "languages"] |
| | print(f"\\nTesting original case: {inputs} (with full vocabulary)") |
| | print(f"Expected: Should now get words using adjusted threshold") |
| | print("-" * 50) |
| | |
| | results = service.generate_thematic_words( |
| | inputs, |
| | num_words=50, |
| | min_similarity=0.25, |
| | multi_theme=True |
| | ) |
| | |
| | print(f"\\n✅ Final result: {len(results)} words generated") |
| | if len(results) > 0: |
| | print(f"Top 10 words:") |
| | for i, (word, similarity, tier) in enumerate(results[:10], 1): |
| | print(f" {i}. {word}: {similarity:.4f}") |
| | else: |
| | print(" ⚠️ Still no words generated!") |
| | |
| | print(f"\\n🔬 Test another challenging case: ['science', 'art', 'music']") |
| | results2 = service.generate_thematic_words( |
| | ["science", "art", "music"], |
| | num_words=30, |
| | min_similarity=0.25, |
| | multi_theme=True |
| | ) |
| | |
| | print(f"\\n✅ Second result: {len(results2)} words generated") |
| | if len(results2) > 0: |
| | print(f"Top 5 words:") |
| | for i, (word, similarity, tier) in enumerate(results2[:5], 1): |
| | print(f" {i}. {word}: {similarity:.4f}") |
| | |
| | except Exception as e: |
| | print(f"❌ Test failed: {e}") |
| | import traceback |
| | traceback.print_exc() |
| |
|
| | if __name__ == "__main__": |
| | test_adaptive_fix() |