Spaces:

vimalk78
/

abc123

Sleeping

App Files Files Community

abc123 / hack /api_clue_generator.py

vimalk78

feat(crossword): generated crosswords with clues

486eff6 7 months ago

raw

history blame contribute delete

9.11 kB

	#!/usr/bin/env python3
	"""
	API-Based Clue Generator for Crossword Puzzles
	Uses Hugging Face Inference API to test multiple models without local downloads.
	"""

	import os
	import time
	import json
	import logging
	import requests
	from typing import List, Dict, Optional, Tuple
	from pathlib import Path

	# Set up logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
	)
	logger = logging.getLogger(__name__)


	class APIClueGenerator:
	"""
	API-based clue generator using Hugging Face Inference API.
	Tests multiple models without local downloads.
	"""

	def __init__(self, hf_token: Optional[str] = None):
	"""Initialize API clue generator.

	Args:
	hf_token: Hugging Face API token (optional but recommended for rate limits)
	"""
	self.hf_token = hf_token or os.getenv('HF_TOKEN') or os.getenv('HUGGINGFACE_TOKEN')
	self.base_url = "https://router.huggingface.co/v1" # Use Router API like in clue_with_hf.py

	# Models available via HF Router API (based on working clue_with_hf.py approach)
	self.models = {
	"deepseek-v3": "deepseek-ai/DeepSeek-V3-0324:fireworks-ai", # Your working example
	"llama-3.3-70b": "meta-llama/Llama-3.3-70B-Instruct:fireworks-ai", # Large Llama model
	}

	# Headers for API requests
	self.headers = {}
	if self.hf_token:
	self.headers["Authorization"] = f"Bearer {self.hf_token}"

	# Enhanced prompts for crossword clue generation
	self.prompts = {
	"instruction": """Generate a crossword clue for '{word}' (category: {topic}).

	Rules:
	- 2-6 words only
	- Don't use the word '{word}' in the clue
	- Be descriptive and accurate

	Examples:
	- CAT (animals) → "Feline household pet"
	- GUITAR (music) → "Six-stringed instrument"
	- AIRPORT (transportation) → "Flight departure hub"

	Clue for '{word}' ({topic}):""",

	"simple": """Complete this crossword clue:

	{word} ({topic}) = [ANSWER]

	Examples:
	VIOLIN (music) = Bowed string instrument
	SCIENTIST (science) = Research professional
	DATABASE (technology) = Information storage system

	{word} ({topic}) =""",

	"question": """What is '{word}' in the context of {topic}? Give a brief crossword clue (2-5 words) without using the word '{word}'.

	Answer:"""
	}

	def query_model(self, model_name: str, word: str, context: str, max_retries: int = 3) -> Optional[str]:
	"""Query a model via Hugging Face Router API using chat completions format.

	Args:
	model_name: Name of the model to query
	word: Target word for clue generation
	context: Topic/context for the word
	max_retries: Maximum number of retries

	Returns:
	Generated clue text or None if failed
	"""
	url = f"{self.base_url}/chat/completions"

	# Use the same successful approach as clue_with_hf.py
	messages = [
	{
	"role": "system",
	"content": f"You are a crossword puzzle clue generator. Generate a single, concise, creative crossword clue for the word '{word}'. The clue should be 2-8 words, accurate, and not contain the word '{word}' itself."
	},
	{
	"role": "user",
	"content": f"Generate a crossword clue for the word '{word}' in the context of '{context}'."
	}
	]

	payload = {
	"model": model_name,
	"messages": messages,
	"temperature": 0.7,
	"max_tokens": 50
	}

	for attempt in range(max_retries):
	try:
	response = requests.post(url, headers=self.headers, json=payload, timeout=30)
	response.raise_for_status()

	result = response.json()

	# Extract content from chat completions response
	if "choices" in result and len(result["choices"]) > 0:
	generated_text = result["choices"][0]["message"]["content"].strip()
	return self._clean_response(generated_text)
	else:
	logger.warning(f"No choices in response for {model_name}")
	return None

	except requests.exceptions.RequestException as e:
	logger.warning(f"Request failed for {model_name} (attempt {attempt+1}): {e}")
	if hasattr(e, 'response') and e.response is not None:
	logger.warning(f"Response content: {e.response.text}")
	if attempt < max_retries - 1:
	time.sleep(2)
	except Exception as e:
	logger.warning(f"Unexpected error for {model_name} (attempt {attempt+1}): {e}")
	if attempt < max_retries - 1:
	time.sleep(2)

	return None

	def _clean_response(self, text: str) -> str:
	"""Clean and validate API response."""
	if not text:
	return ""

	# Remove common artifacts
	text = text.strip()
	text = text.replace('\n', ' ').replace('\t', ' ')

	# Remove quotes and brackets
	text = text.strip('"\'[](){}')

	# Take first sentence/line if multiple
	if '.' in text and len(text.split('.')) > 1:
	text = text.split('.')[0].strip()
	if '\n' in text:
	text = text.split('\n')[0].strip()

	# Basic length check
	if len(text) < 3 or len(text) > 100:
	return ""

	return text

	def generate_clue(self, word: str, topic: str) -> Dict[str, Optional[str]]:
	"""Generate clues using all available models.

	Args:
	word: Target word
	topic: Topic/category context

	Returns:
	Dictionary mapping model names to generated clues
	"""
	results = {}

	logger.info(f"🎯 Generating clues for '{word}' + '{topic}' using {len(self.models)} models")

	for model_key, model_name in self.models.items():
	logger.info(f" Querying {model_key}...")
	clue = self.query_model(model_name, word, topic)
	results[model_key] = clue

	# Add small delay to be respectful to the API
	time.sleep(1)

	return results

	def evaluate_clue_quality(self, word: str, clue: str) -> Tuple[str, float]:
	"""Evaluate the quality of a generated clue.

	Args:
	word: Target word
	clue: Generated clue

	Returns:
	Tuple of (quality_label, quality_score)
	"""
	if not clue or len(clue.strip()) < 3:
	return "FAILED", 0.0

	word_lower = word.lower()
	clue_lower = clue.lower()

	# Check for critical issues
	if word_lower in clue_lower:
	return "POOR", 0.2

	# Check for quality indicators
	score = 0.5 # Base score
	quality_words = ["player", "instrument", "device", "system", "location", "animal",
	"food", "building", "activity", "professional", "tool", "creature",
	"terminal", "hub", "language", "storage", "sport", "game"]

	if any(qw in clue_lower for qw in quality_words):
	score += 0.3

	if 2 <= len(clue.split()) <= 6: # Good length
	score += 0.1

	if len(clue) >= 8: # Descriptive
	score += 0.1

	# Determine label
	if score >= 0.8:
	return "EXCELLENT", score
	elif score >= 0.6:
	return "GOOD", score
	elif score >= 0.4:
	return "ACCEPTABLE", score
	else:
	return "POOR", score


	def main():
	"""Demo the API clue generator using your working approach."""
	generator = APIClueGenerator()

	if not generator.hf_token:
	print("❌ Error: HF_TOKEN environment variable not set")
	print("Please set your Hugging Face token: export HF_TOKEN='hf_your_token_here'")
	return

	# Test with your working examples first
	test_cases = [
	("CRICKET", "sports"),
	("SHUTTLE", "space"),
	("CAT", "animals"),
	("DATABASE", "technology"),
	]

	for word, topic in test_cases:
	print(f"\n🧪 Testing: {word} + {topic}")
	print("=" * 50)

	results = generator.generate_clue(word, topic)

	for model, clue in results.items():
	if clue:
	quality, score = generator.evaluate_clue_quality(word, clue)
	print(f"{model:15} \| {quality:10} \| {clue}")
	else:
	print(f"{model:15} \| FAILED \| No response")


	if __name__ == "__main__":
	main()