Spaces:
Paused
Paused
| """ | |
| RandomWeb β Re-verification Scheduler | |
| Rolling yearly re-verification of indexed websites. | |
| Politely re-checks active URLs and toggles visibility on failure. | |
| """ | |
| import asyncio | |
| import logging | |
| from datetime import datetime, timezone | |
| from backend.config import SCHEDULER_INTERVAL_SECONDS, SCHEDULER_BATCH_SIZE | |
| from backend.db import get_urls_needing_recheck | |
| from backend.workers.validator import enqueue_url | |
| logger = logging.getLogger("randomweb.scheduler") | |
| async def run_scheduler(): | |
| """ | |
| Background scheduler that continuously checks for URLs due re-verification. | |
| Runs every hour, queries for URLs where next_check <= now(), | |
| and routes them through the validation queue. | |
| """ | |
| logger.info("Re-verification scheduler started (interval: %ds)", SCHEDULER_INTERVAL_SECONDS) | |
| # Initial delay to let the system warm up | |
| await asyncio.sleep(120) | |
| while True: | |
| try: | |
| urls = get_urls_needing_recheck(limit=SCHEDULER_BATCH_SIZE) | |
| if urls: | |
| logger.info("Re-verifying %d URLs", len(urls)) | |
| for record in urls: | |
| await enqueue_url(record["url"], source="recheck") | |
| # Small delay between queuing to avoid flooding | |
| await asyncio.sleep(0.1) | |
| logger.info("Queued %d URLs for re-verification", len(urls)) | |
| else: | |
| logger.debug("No URLs due for re-verification") | |
| except Exception as e: | |
| logger.error("Scheduler error: %s", e) | |
| # Wait until next check | |
| await asyncio.sleep(SCHEDULER_INTERVAL_SECONDS) | |