| |
| """ |
| QA Runner β Automated End-to-End Agent Testing |
| =============================================== |
| Runs test queries through the Eurus agent, captures ALL intermediate steps |
| (tool calls, tool outputs, reasoning, plots) and saves structured results |
| to data/qa_results/q{NN}_{slug}/. |
| |
| Usage: |
| PYTHONPATH=src OPENAI_API_KEY=... python3 scripts/qa_runner.py |
| |
| Or run a single query: |
| PYTHONPATH=src OPENAI_API_KEY=... python3 scripts/qa_runner.py --query 2 |
| """ |
|
|
| import os |
| import sys |
| import json |
| import shutil |
| import base64 |
| import time |
| import argparse |
| from pathlib import Path |
| from datetime import datetime |
| from typing import Optional |
|
|
| |
| PROJECT_ROOT = Path(__file__).parent.parent |
| sys.path.insert(0, str(PROJECT_ROOT / "src")) |
| sys.path.insert(0, str(PROJECT_ROOT)) |
|
|
| |
| from dotenv import load_dotenv |
| load_dotenv(PROJECT_ROOT / ".env") |
|
|
| from langchain_openai import ChatOpenAI |
| from langchain.agents import create_agent |
| from langchain_core.messages import HumanMessage, AIMessage, ToolMessage |
|
|
| from eurus.config import AGENT_SYSTEM_PROMPT, CONFIG, get_plots_dir |
| from eurus.tools import get_all_tools |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| QA_QUERIES = [ |
| |
| |
| |
| { |
| "id": 1, |
| "slug": "europe_heatwave_anomaly", |
| "query": "Show me a spatial map of 2m temperature anomalies across Europe " |
| "during the June 2023 heatwave compared to June 2022.", |
| "type": "anomaly_map", |
| "variables": ["t2"], |
| "region": "Europe", |
| }, |
| { |
| "id": 2, |
| "slug": "storm_isha_mslp_wind", |
| "query": "Plot MSLP isobars and 10m wind vectors over the North Atlantic " |
| "for 2024-01-22 β I want to see Storm Isha's structure.", |
| "type": "contour_quiver", |
| "variables": ["mslp", "u10", "v10"], |
| "region": "North Atlantic", |
| }, |
| { |
| "id": 3, |
| "slug": "atmospheric_river_jan2023", |
| "query": "Download total column water vapour for the US West Coast, Jan 2023, " |
| "and show the atmospheric river event around Jan 9th.", |
| "type": "ar_detection", |
| "variables": ["tcwv"], |
| "region": "US West Coast", |
| }, |
| { |
| "id": 4, |
| "slug": "sahara_heat_july2024", |
| "query": "Plot the daily mean 2m temperature time series averaged over " |
| "the Sahara (20-30Β°N, 0 to 15Β°E) for July 2024 and compare " |
| "it to July 2023 on the same chart.", |
| "type": "time_series", |
| "variables": ["t2"], |
| "region": "Sahara", |
| }, |
| { |
| "id": 5, |
| "slug": "great_plains_wind_may2024", |
| "query": "Plot a map of mean 10m wind speed over the US Great Plains " |
| "(30-45Β°N, -105 to -90Β°W) for May 2024 and highlight areas exceeding 5 m/s.", |
| "type": "threshold_map", |
| "variables": ["u10", "v10"], |
| "region": "US Great Plains", |
| }, |
|
|
| |
| |
| |
| { |
| "id": 6, |
| "slug": "nino34_index", |
| "query": "Calculate the NiΓ±o 3.4 index from ERA5 SST for 2015-2024 and " |
| "classify El NiΓ±o / La NiΓ±a episodes.", |
| "type": "climate_index", |
| "variables": ["sst"], |
| "region": "Tropical Pacific", |
| }, |
| { |
| "id": 7, |
| "slug": "elnino_vs_lanina_tropical_belt", |
| "query": "Compare SST anomalies across the entire tropical belt " |
| "(30Β°S-30Β°N, global) for December 2023 (peak El NiΓ±o) vs December 2022 " |
| "(La NiΓ±a). Show the full basin-wide pattern across the Pacific, " |
| "Atlantic, and Indian oceans in a single anomaly difference map.", |
| "type": "anomaly_comparison", |
| "variables": ["sst"], |
| "region": "Tropical Belt (global)", |
| }, |
| { |
| "id": 8, |
| "slug": "nao_index", |
| "query": "Compute the NAO index from MSLP (Azores minus Iceland) for 2000-2024 " |
| "and plot it with a 3-month rolling mean.", |
| "type": "climate_index", |
| "variables": ["mslp"], |
| "region": "North Atlantic", |
| }, |
| { |
| "id": 9, |
| "slug": "australia_enso_rainfall", |
| "query": "Compare precipitation over Eastern Australia (25-45Β°S, 145-155Β°E) " |
| "between the La NiΓ±a year 2022 and El NiΓ±o year 2023. " |
| "Show a two-panel map of annual total precipitation for each year " |
| "and a difference map (2023 minus 2022).", |
| "type": "multi_year_anomaly", |
| "variables": ["tp"], |
| "region": "Australia", |
| }, |
| { |
| "id": 10, |
| "slug": "med_eof_sst", |
| "query": "Perform an EOF analysis on Mediterranean SST anomalies " |
| "(30-46Β°N, -6 to 36Β°E) for 2019-2024 and show the first 3 modes " |
| "with variance explained. Interpret the dominant patterns.", |
| "type": "eof_analysis", |
| "variables": ["sst"], |
| "region": "Mediterranean", |
| }, |
|
|
| |
| |
| |
| { |
| "id": 11, |
| "slug": "arctic_polar_amplification", |
| "query": "Compare January mean 2m temperature across the entire Arctic " |
| "(north of 70Β°N) for 2024 vs 2000. Show both maps side by side, " |
| "compute the area-weighted temperature difference, and quantify " |
| "polar amplification.", |
| "type": "decadal_comparison", |
| "variables": ["t2"], |
| "region": "Arctic (>70Β°N)", |
| }, |
| { |
| "id": 12, |
| "slug": "med_marine_heatwave_2023", |
| "query": "Map the summer (JJA) 2023 mean SST anomaly across the entire " |
| "Mediterranean basin (30-46Β°N, -6 to 36Β°E) compared to the 2018-2022 " |
| "summer mean. Identify marine heatwave hotspots where SST exceeded " |
| "+2Β°C above normal.", |
| "type": "marine_heatwave", |
| "variables": ["sst"], |
| "region": "Mediterranean", |
| }, |
| { |
| "id": 13, |
| "slug": "paris_decadal_comparison", |
| "query": "Compare the average summer (JJA) temperature in Paris between the " |
| "decades 2000-2009 and 2014-2023 β show a difference map and time series.", |
| "type": "multi_panel_comparison", |
| "variables": ["t2"], |
| "region": "Paris", |
| }, |
| { |
| "id": 14, |
| "slug": "alps_snow_trend", |
| "query": "Has the snow depth over the Alps decreased over the last 30 years? " |
| "Show me the December-February trend.", |
| "type": "trend_analysis", |
| "variables": ["sd"], |
| "region": "Alps", |
| }, |
| { |
| "id": 15, |
| "slug": "uk_precip_anomaly_winter2024", |
| "query": "Map the total precipitation anomaly over the British Isles " |
| "(49-60Β°N, 11Β°W-2Β°E) for January 2024 compared to the 2019-2023 " |
| "January mean. Highlight regions receiving more than 150% of normal " |
| "rainfall. Save the map as a PNG file.", |
| "type": "anomaly_map", |
| "variables": ["tp"], |
| "region": "British Isles", |
| }, |
|
|
| |
| |
| |
| { |
| "id": 16, |
| "slug": "delhi_heatwave_detection", |
| "query": "Detect heatwave events in Delhi from 2010-2024 using the 90th " |
| "percentile threshold with a 3-day duration criterion β how has the " |
| "frequency changed?", |
| "type": "heatwave_detection", |
| "variables": ["t2"], |
| "region": "Delhi", |
| }, |
| { |
| "id": 17, |
| "slug": "horn_africa_drought", |
| "query": "Calculate a 3-month SPI proxy for the Horn of Africa " |
| "(Ethiopia/Somalia) for 2020-2024 β when were the worst drought periods?", |
| "type": "drought_analysis", |
| "variables": ["tp"], |
| "region": "Horn of Africa", |
| }, |
| { |
| "id": 18, |
| "slug": "baghdad_hot_days", |
| "query": "How many days per year exceeded 35Β°C in Baghdad from 1980 to 2024? " |
| "Plot as a bar chart with a trend line.", |
| "type": "exceedance_frequency", |
| "variables": ["t2"], |
| "region": "Baghdad", |
| }, |
| { |
| "id": 19, |
| "slug": "sea_p95_precip", |
| "query": "Show me the 95th percentile daily precipitation map for Southeast Asia " |
| "for 2010-2023.", |
| "type": "extreme_percentile", |
| "variables": ["tp"], |
| "region": "Southeast Asia", |
| }, |
| { |
| "id": 20, |
| "slug": "scandinavia_blocking_2018", |
| "query": "Analyse the blocking event over Scandinavia in July 2018 β show MSLP " |
| "anomalies persisting for 5+ days.", |
| "type": "blocking_detection", |
| "variables": ["mslp"], |
| "region": "Scandinavia", |
| }, |
|
|
| |
| |
| |
| { |
| "id": 21, |
| "slug": "rotterdam_shanghai_route", |
| "query": "Calculate the maritime route from Rotterdam to Shanghai and analyse " |
| "wind risk along the route for December.", |
| "type": "maritime_route_risk", |
| "variables": ["u10", "v10"], |
| "region": "Europe-Asia", |
| }, |
| { |
| "id": 22, |
| "slug": "indian_ocean_sst_dipole", |
| "query": "Map the SST anomaly across the Indian Ocean (30Β°S-25Β°N, 30-120Β°E) " |
| "for October 2023 relative to the 2019-2022 October mean. " |
| "Show the Indian Ocean Dipole pattern. Save the map as PNG.", |
| "type": "anomaly_map", |
| "variables": ["sst"], |
| "region": "Indian Ocean", |
| }, |
| { |
| "id": 23, |
| "slug": "japan_typhoon_season_wind", |
| "query": "Map the mean and maximum 10m wind speed over the seas around Japan " |
| "(20-45Β°N, 120-150Β°E) during typhoon season (August-October) 2023. " |
| "Show two-panel spatial maps highlighting areas where mean wind " |
| "exceeded 8 m/s. Save as PNG.", |
| "type": "multi_panel_map", |
| "variables": ["u10", "v10"], |
| "region": "Japan", |
| }, |
| { |
| "id": 24, |
| "slug": "south_atlantic_sst_gradient", |
| "query": "Map the mean SST field across the South Atlantic (40Β°S-5Β°N, 50Β°W-15Β°E) " |
| "for March 2024. Overlay SST isotherms and highlight the " |
| "Brazil-Malvinas confluence zone. Save as PNG.", |
| "type": "sst_map", |
| "variables": ["sst"], |
| "region": "South Atlantic", |
| }, |
|
|
| |
| |
| |
| { |
| "id": 25, |
| "slug": "north_sea_wind_power", |
| "query": "Map the mean 100m wind power density across the North Sea for " |
| "2020-2024 β where are the best offshore wind sites?", |
| "type": "wind_energy", |
| "variables": ["u100", "v100"], |
| "region": "North Sea", |
| }, |
| { |
| "id": 26, |
| "slug": "german_bight_weibull", |
| "query": "Fit a Weibull distribution to 100m wind speed at 54Β°N, 7Β°E " |
| "(German Bight) for 2023 and estimate the capacity factor for a " |
| "3-25 m/s turbine range. Plot the histogram with Weibull fit overlay " |
| "and save as PNG.", |
| "type": "weibull_analysis", |
| "variables": ["u100", "v100"], |
| "region": "German Bight", |
| }, |
| { |
| "id": 27, |
| "slug": "solar_sahara_vs_germany", |
| "query": "Compare incoming solar radiation (SSRD) between the Sahara and " |
| "northern Germany across 2023 β show monthly means.", |
| "type": "comparison_timeseries", |
| "variables": ["ssrd"], |
| "region": "Sahara / Germany", |
| }, |
| { |
| "id": 28, |
| "slug": "persian_gulf_sst_summer", |
| "query": "Map the mean SST across the Persian Gulf and Arabian Sea " |
| "(12-32Β°N, 44-70Β°E) for August 2023. Highlight areas where SST " |
| "exceeded 32Β°C in a spatial map. Save as PNG.", |
| "type": "threshold_map", |
| "variables": ["sst"], |
| "region": "Persian Gulf", |
| }, |
|
|
| |
| |
| |
| { |
| "id": 29, |
| "slug": "sahara_diurnal_t2_blh", |
| "query": "Show the diurnal cycle of 2m temperature and boundary layer height " |
| "in the Sahara for July 2024 β dual-axis plot.", |
| "type": "diurnal_cycle", |
| "variables": ["t2", "blh"], |
| "region": "Sahara", |
| }, |
| { |
| "id": 30, |
| "slug": "amazon_convective_peak", |
| "query": "When does convective precipitation peak over the Amazon basin during " |
| "DJF? Hourly climatology please.", |
| "type": "diurnal_cycle", |
| "variables": ["cp"], |
| "region": "Amazon", |
| }, |
|
|
| |
| |
| |
| { |
| "id": 31, |
| "slug": "europe_rh_august", |
| "query": "Compute relative humidity from 2m temperature and dewpoint for " |
| "central Europe, August 2023, and map the spatial mean.", |
| "type": "derived_variable", |
| "variables": ["t2", "d2"], |
| "region": "Central Europe", |
| }, |
| { |
| "id": 32, |
| "slug": "hovmoller_equator_skt", |
| "query": "Create a HovmΓΆller diagram of 850 hPa equivalent β use skin " |
| "temperature as proxy β along the equator for 2023 to visualise the MJO.", |
| "type": "hovmoller", |
| "variables": ["skt"], |
| "region": "Equatorial", |
| }, |
| { |
| "id": 33, |
| "slug": "hurricane_otis_dashboard", |
| "query": "Plot a summary dashboard for Hurricane Otis (Oct 2023, Acapulco): " |
| "SST map, wind speed time series, and TCWV distribution in one figure.", |
| "type": "dashboard", |
| "variables": ["sst", "u10", "v10", "tcwv"], |
| "region": "East Pacific / Mexico", |
| }, |
|
|
| |
| |
| |
| { |
| "id": 34, |
| "slug": "california_sst_jan", |
| "query": "What was the average SST off the coast of California in January 2024? " |
| "Also plot a spatial map of the SST field for that month and save as PNG.", |
| "type": "point_retrieval", |
| "variables": ["sst"], |
| "region": "California", |
| }, |
| { |
| "id": 35, |
| "slug": "berlin_monthly_temp", |
| "query": "Plot the 2023 monthly mean temperature for Berlin as a seasonal curve.", |
| "type": "time_series", |
| "variables": ["t2"], |
| "region": "Berlin", |
| }, |
| { |
| "id": 36, |
| "slug": "biscay_wind_stats", |
| "query": "Download 10m wind speed for the Bay of Biscay, last 3 years, and " |
| "give me basic statistics. Also plot a wind speed histogram or time " |
| "series and save as PNG.", |
| "type": "stats_retrieval", |
| "variables": ["u10", "v10"], |
| "region": "Bay of Biscay", |
| }, |
| ] |
|
|
|
|
| |
| |
| |
|
|
| def build_agent(): |
| """Build a LangChain agent with full tool suite.""" |
| llm = ChatOpenAI( |
| model=CONFIG.model_name, |
| temperature=CONFIG.temperature, |
| ) |
| |
| tools = get_all_tools(enable_routing=False, enable_guide=True) |
| |
| agent = create_agent( |
| model=llm, |
| tools=tools, |
| system_prompt=AGENT_SYSTEM_PROMPT, |
| debug=False, |
| ) |
| |
| return agent |
|
|
|
|
| |
| |
| |
|
|
| def extract_steps(messages) -> list: |
| """ |
| Extract ALL intermediate steps from agent message history. |
| Returns list of step dicts with type, content, tool_name, etc. |
| """ |
| steps = [] |
| |
| for msg in messages: |
| if isinstance(msg, HumanMessage): |
| steps.append({ |
| "step": len(steps) + 1, |
| "type": "user_query", |
| "content": msg.content[:2000], |
| }) |
| elif isinstance(msg, AIMessage): |
| |
| if msg.tool_calls: |
| for tc in msg.tool_calls: |
| |
| args = tc.get("args", {}) |
| |
| args_str = json.dumps(args, indent=2, default=str) |
| if len(args_str) > 5000: |
| args_str = args_str[:5000] + "\n... [TRUNCATED]" |
| |
| steps.append({ |
| "step": len(steps) + 1, |
| "type": "tool_call", |
| "tool_name": tc.get("name", "unknown"), |
| "tool_id": tc.get("id", ""), |
| "arguments": json.loads(args_str) if len(args_str) <= 5000 else args_str, |
| "reasoning": msg.content[:1000] if msg.content else "", |
| }) |
| elif msg.content: |
| |
| steps.append({ |
| "step": len(steps) + 1, |
| "type": "ai_response", |
| "content": msg.content[:5000], |
| }) |
| elif isinstance(msg, ToolMessage): |
| |
| content = msg.content if isinstance(msg.content, str) else str(msg.content) |
| if len(content) > 3000: |
| content = content[:3000] + "\n... [TRUNCATED]" |
| |
| steps.append({ |
| "step": len(steps) + 1, |
| "type": "tool_output", |
| "tool_name": msg.name if hasattr(msg, 'name') else "unknown", |
| "tool_call_id": msg.tool_call_id if hasattr(msg, 'tool_call_id') else "", |
| "content": content, |
| }) |
| |
| return steps |
|
|
|
|
| |
| |
| |
|
|
| def run_single_query(agent, query_def: dict, output_dir: Path) -> dict: |
| """ |
| Run a single QA query and capture everything. |
| |
| Returns: metadata dict |
| """ |
| qid = query_def["id"] |
| slug = query_def["slug"] |
| query = query_def["query"] |
| |
| folder = output_dir / f"q{qid:02d}_{slug}" |
| folder.mkdir(parents=True, exist_ok=True) |
| |
| print(f"\n{'='*70}") |
| print(f" Q{qid:02d}: {query[:70]}...") |
| print(f"{'='*70}") |
| |
| start_time = time.time() |
| |
| try: |
| |
| plots_dir = get_plots_dir() |
| existing_plots = set() |
| if plots_dir.exists(): |
| existing_plots = {f.name for f in plots_dir.glob("*.png")} |
| |
| |
| config = {"recursion_limit": 35} |
| messages = [HumanMessage(content=query)] |
| |
| result = agent.invoke({"messages": messages}, config=config) |
| |
| elapsed = time.time() - start_time |
| result_messages = result["messages"] |
| |
| |
| steps = extract_steps(result_messages) |
| |
| |
| final_response = "" |
| for msg in reversed(result_messages): |
| if isinstance(msg, AIMessage) and msg.content and not msg.tool_calls: |
| final_response = msg.content |
| break |
| |
| |
| steps_path = folder / "steps.json" |
| with open(steps_path, "w") as f: |
| json.dump(steps, f, indent=2, default=str, ensure_ascii=False) |
| |
| |
| response_path = folder / "response.md" |
| with open(response_path, "w") as f: |
| f.write(f"# Q{qid:02d}: {slug}\n\n") |
| f.write(f"**Query:** {query}\n\n") |
| f.write(f"**Elapsed:** {elapsed:.1f}s\n\n") |
| f.write("---\n\n") |
| f.write(final_response) |
| |
| |
| plot_files = [] |
| if plots_dir.exists(): |
| for f_path in sorted(plots_dir.glob("*.png")): |
| if f_path.name not in existing_plots: |
| dest = folder / f_path.name |
| shutil.copy2(f_path, dest) |
| plot_files.append(f_path.name) |
| print(f" π Plot saved: {f_path.name}") |
| |
| |
| tool_calls = [s for s in steps if s["type"] == "tool_call"] |
| tools_used = list(set(s["tool_name"] for s in tool_calls)) |
| |
| |
| metadata = { |
| "query_id": qid, |
| "slug": slug, |
| "query": query, |
| "type": query_def.get("type", "unknown"), |
| "variables": query_def.get("variables", []), |
| "region": query_def.get("region", ""), |
| "timestamp": datetime.now().isoformat(), |
| "elapsed_seconds": round(elapsed, 1), |
| "status": "success", |
| "tools_used": tools_used, |
| "num_tool_calls": len(tool_calls), |
| "num_steps": len(steps), |
| "plot_files": plot_files, |
| "notes": "", |
| } |
| |
| |
| meta_path = folder / "metadata.json" |
| with open(meta_path, "w") as f: |
| json.dump(metadata, f, indent=2, ensure_ascii=False) |
| |
| print(f" β
SUCCESS in {elapsed:.1f}s | Tools: {', '.join(tools_used)} | Steps: {len(steps)}") |
| |
| return metadata |
| |
| except Exception as e: |
| elapsed = time.time() - start_time |
| print(f" β FAILED in {elapsed:.1f}s: {e}") |
| |
| metadata = { |
| "query_id": qid, |
| "slug": slug, |
| "query": query, |
| "type": query_def.get("type", "unknown"), |
| "variables": query_def.get("variables", []), |
| "region": query_def.get("region", ""), |
| "timestamp": datetime.now().isoformat(), |
| "elapsed_seconds": round(elapsed, 1), |
| "status": "error", |
| "error": str(e), |
| "tools_used": [], |
| "num_tool_calls": 0, |
| "num_steps": 0, |
| "plot_files": [], |
| "notes": f"Error: {e}", |
| } |
| |
| meta_path = folder / "metadata.json" |
| with open(meta_path, "w") as f: |
| json.dump(metadata, f, indent=2, ensure_ascii=False) |
| |
| return metadata |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser(description="Eurus QA Runner") |
| parser.add_argument("--query", type=int, help="Run a single query by ID (1-36)") |
| parser.add_argument("--start", type=int, default=1, help="Start from query ID") |
| parser.add_argument("--end", type=int, default=36, help="End at query ID (inclusive)") |
| parser.add_argument("--output", type=str, default=None, help="Output directory (default: data/qa_results)") |
| parser.add_argument("--skip-existing", action="store_true", help="Skip if folder already has metadata.json") |
| args = parser.parse_args() |
| |
| |
| if not os.environ.get("OPENAI_API_KEY"): |
| print("β OPENAI_API_KEY not set!") |
| sys.exit(1) |
| |
| if args.output: |
| output_dir = Path(args.output) |
| else: |
| output_dir = PROJECT_ROOT / "data" / "qa_results" |
| output_dir.mkdir(parents=True, exist_ok=True) |
| |
| print(f""" |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| β Eurus QA Runner v1.0 β |
| β {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} β |
| ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| Output: {output_dir} |
| """) |
| |
| |
| print("ποΈ Building agent...") |
| agent = build_agent() |
| print("β
Agent ready\n") |
| |
| |
| if args.query: |
| queries = [q for q in QA_QUERIES if q["id"] == args.query] |
| else: |
| queries = [q for q in QA_QUERIES if args.start <= q["id"] <= args.end] |
| |
| results = [] |
| for q in queries: |
| folder = output_dir / f"q{q['id']:02d}_{q['slug']}" |
| if args.skip_existing and (folder / "metadata.json").exists(): |
| print(f"βοΈ Skipping Q{q['id']:02d} (already exists)") |
| continue |
| |
| result = run_single_query(agent, q, output_dir) |
| results.append(result) |
| |
| |
| print(f"\n{'='*70}") |
| print("QA SUMMARY") |
| print(f"{'='*70}") |
| |
| success = sum(1 for r in results if r["status"] == "success") |
| failed = sum(1 for r in results if r["status"] == "error") |
| total_time = sum(r["elapsed_seconds"] for r in results) |
| |
| for r in results: |
| status = "β
" if r["status"] == "success" else "β" |
| print(f" {status} Q{r['query_id']:02d} ({r['slug']:20s}) | " |
| f"{r['elapsed_seconds']:5.1f}s | Tools: {', '.join(r['tools_used'])}") |
| |
| print(f"\nTotal: {success} passed, {failed} failed, {total_time:.1f}s total") |
| |
| |
| summary_path = output_dir / "qa_summary.json" |
| with open(summary_path, "w") as f: |
| json.dump({ |
| "timestamp": datetime.now().isoformat(), |
| "total_queries": len(results), |
| "passed": success, |
| "failed": failed, |
| "total_time_seconds": round(total_time, 1), |
| "results": results, |
| }, f, indent=2, ensure_ascii=False) |
| |
| print(f"\nSummary saved to: {summary_path}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|