Spaces:

vimalk78
/

abc123

Sleeping

App Files Files Community

abc123 / hack /debug_adaptive_beta_bug.py

vimalk78

feat: add multi-topic intersection methods with adaptive beta for word selection

b05514b 6 months ago

raw

history blame contribute delete

3.27 kB

	#!/usr/bin/env python3
	"""
	Debug Adaptive Beta Bug

	Quick test to reproduce the bug where word count decreases when beta is relaxed.
	"""

	import os
	import sys
	import logging

	# Configure logging to see the debug messages
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

	def setup_environment():
	"""Setup environment and add src to path"""
	# Set cache directory to root cache-dir folder
	cache_dir = os.path.join(os.path.dirname(__file__), '..', 'cache-dir')
	cache_dir = os.path.abspath(cache_dir)
	os.environ['HF_HOME'] = cache_dir
	os.environ['TRANSFORMERS_CACHE'] = cache_dir
	os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dir

	# Add backend source to path
	backend_path = os.path.join(os.path.dirname(__file__), '..', 'crossword-app', 'backend-py', 'src')
	backend_path = os.path.abspath(backend_path)
	if backend_path not in sys.path:
	sys.path.insert(0, backend_path)

	print(f"Using cache directory: {cache_dir}")

	def test_debug_adaptive_beta():
	"""Test the problematic case with debug logging"""

	setup_environment()

	print("🐛 Debug Adaptive Beta Bug")
	print("=" * 50)

	# Set environment variables for soft minimum with debug
	os.environ['MULTI_TOPIC_METHOD'] = 'soft_minimum'
	os.environ['SOFT_MIN_BETA'] = '10.0'
	os.environ['SOFT_MIN_ADAPTIVE'] = 'true'
	os.environ['SOFT_MIN_MIN_WORDS'] = '15'
	os.environ['SOFT_MIN_MAX_RETRIES'] = '5'
	os.environ['SOFT_MIN_BETA_DECAY'] = '0.7'
	os.environ['THEMATIC_VOCAB_SIZE_LIMIT'] = '1000' # Small for faster testing

	try:
	from services.thematic_word_service import ThematicWordService

	print("Creating ThematicWordService...")
	service = ThematicWordService()
	service.initialize()

	# Test the problematic case
	inputs = ["universe", "movies", "languages"]
	print(f"\\nTesting problematic case: {inputs}")
	print(f"Expected: Word count should INCREASE as beta decreases")
	print("-" * 50)

	results = service.generate_thematic_words(
	inputs,
	num_words=50,
	min_similarity=0.3,
	multi_theme=False # Force single theme processing
	)

	print(f"\\n✅ Final result: {len(results)} words generated")
	if len(results) > 0:
	print(f"Top 5 words:")
	for i, (word, similarity, tier) in enumerate(results[:5], 1):
	print(f" {i}. {word}: {similarity:.4f}")
	else:
	print(" ⚠️ No words generated!")

	except Exception as e:
	print(f"❌ Test failed: {e}")
	import traceback
	traceback.print_exc()

	def main():
	print("🧪 Debugging Adaptive Beta Bug")
	print("This will show detailed score statistics at each beta level")
	print("=" * 60)

	test_debug_adaptive_beta()

	print("\\n" + "=" * 60)
	print("🔍 Look for patterns in the debug output:")
	print("1. Do score ranges change as expected?")
	print("2. Is the threshold comparison working correctly?")
	print("3. Are scores getting more permissive with lower beta?")
	print("=" * 60)

	if __name__ == "__main__":
	main()