{ "benchmarks": { "sweVerified": { "name": "SWE-bench Verified", "models": [ { "model_id": "Qwen/Qwen3.5-397B-A17B", "short_name": "Qwen3.5-397B-A17B", "provider": "Qwen", "score": 76.4, "date": "2026-02-16" }, { "model_id": "MiniMaxAI/MiniMax-M2.5", "short_name": "MiniMax-M2.5", "provider": "MiniMaxAI", "score": 75.8, "date": "2026-02-12" }, { "model_id": "stepfun-ai/Step-3.5-Flash", "short_name": "Step-3.5-Flash", "provider": "stepfun-ai", "score": 74.4, "date": "2026-02-01" }, { "model_id": "MiniMaxAI/MiniMax-M2.1", "short_name": "MiniMax-M2.1", "provider": "MiniMaxAI", "score": 74.0, "date": "2025-12-20" }, { "model_id": "zai-org/GLM-4.7", "short_name": "GLM-4.7", "provider": "zai-org", "score": 73.8, "date": "2025-12-22" }, { "model_id": "zai-org/GLM-5", "short_name": "GLM-5", "provider": "zai-org", "score": 72.8, "date": "2026-02-11" }, { "model_id": "Qwen/Qwen3.5-27B", "short_name": "Qwen3.5-27B", "provider": "Qwen", "score": 72.4, "date": "2026-02-24" }, { "model_id": "Qwen/Qwen3.5-122B-A10B", "short_name": "Qwen3.5-122B-A10B", "provider": "Qwen", "score": 72.0, "date": "2026-02-24" }, { "model_id": "moonshotai/Kimi-K2-Thinking", "short_name": "Kimi-K2-Thinking", "provider": "moonshotai", "score": 71.3, "date": "2025-11-04" }, { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 70.8, "date": "2026-01-01" }, { "model_id": "Qwen/Qwen3-Coder-Next", "short_name": "Qwen3-Coder-Next", "provider": "Qwen", "score": 70.6, "date": "2026-01-30" }, { "model_id": "deepseek-ai/DeepSeek-V3.2", "short_name": "DeepSeek-V3.2", "provider": "deepseek-ai", "score": 70.0, "date": "2025-12-01" }, { "model_id": "MiniMaxAI/MiniMax-M2", "short_name": "MiniMax-M2", "provider": "MiniMaxAI", "score": 69.4, "date": "2025-10-22" }, { "model_id": "Qwen/Qwen3.5-35B-A3B", "short_name": "Qwen3.5-35B-A3B", "provider": "Qwen", "score": 69.2, "date": "2026-02-24" }, { "model_id": "GAIR/OpenSWE-72B", "short_name": "OpenSWE-72B", "provider": "GAIR", "score": 66.0, "date": "2026-03-15" }, { "model_id": "openai/gpt-oss-120b", "short_name": "gpt-oss-120b", "provider": "openai", "score": 62.4, "date": "2025-08-04" }, { "model_id": "GAIR/OpenSWE-32B", "short_name": "OpenSWE-32B", "provider": "GAIR", "score": 62.4, "date": "2026-03-15" }, { "model_id": "openai/gpt-oss-20b", "short_name": "gpt-oss-20b", "provider": "openai", "score": 60.7, "date": "2025-08-04" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "provider": "nvidia", "score": 60.47, "date": "2026-03-10" }, { "model_id": "zai-org/GLM-4.7-Flash", "short_name": "GLM-4.7-Flash", "provider": "zai-org", "score": 59.2, "date": "2026-01-19" }, { "model_id": "facebook/cwm", "short_name": "cwm", "provider": "facebook", "score": 53.9, "date": "2025-08-25" }, { "model_id": "SWE-Lego/SWE-Lego-Qwen3-32B", "short_name": "SWE-Lego-Qwen3-32B", "provider": "SWE-Lego", "score": 52.6, "date": "2026-01-05" }, { "model_id": "SWE-Lego/SWE-Lego-Qwen3-8B", "short_name": "SWE-Lego-Qwen3-8B", "provider": "SWE-Lego", "score": 42.2, "date": "2025-12-29" } ] }, "swePro": { "name": "SWE-bench Pro", "models": [ { "model_id": "MiniMaxAI/MiniMax-M2.5", "short_name": "MiniMax-M2.5", "provider": "MiniMaxAI", "score": 55.4, "date": "2026-02-12" }, { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 50.7, "date": "2026-01-01" }, { "model_id": "Qwen/Qwen3-Coder-Next", "short_name": "Qwen3-Coder-Next", "provider": "Qwen", "score": 44.3, "date": "2026-01-30" }, { "model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "short_name": "Qwen3-Coder-480B-A35B-Instruct", "provider": "Qwen", "score": 38.7, "date": "2025-07-22" }, { "model_id": "MiniMaxAI/MiniMax-M2.1", "short_name": "MiniMax-M2.1", "provider": "MiniMaxAI", "score": 36.81, "date": "2025-12-20" }, { "model_id": "moonshotai/Kimi-K2-Instruct", "short_name": "Kimi-K2-Instruct", "provider": "moonshotai", "score": 27.67, "date": "2025-07-11" }, { "model_id": "Qwen/Qwen3-235B-A22B", "short_name": "Qwen3-235B-A22B", "provider": "Qwen", "score": 21.41, "date": "2025-04-27" }, { "model_id": "openai/gpt-oss-120b", "short_name": "gpt-oss-120b", "provider": "openai", "score": 16.2, "date": "2025-08-04" }, { "model_id": "deepseek-ai/DeepSeek-V3.2", "short_name": "DeepSeek-V3.2", "provider": "deepseek-ai", "score": 15.56, "date": "2025-12-01" }, { "model_id": "google/gemma-3-27b-it", "short_name": "gemma-3-27b-it", "provider": "google", "score": 11.38, "date": "2025-03-01" }, { "model_id": "meta-llama/Llama-3.1-405B-Instruct", "short_name": "Llama-3.1-405B-Instruct", "provider": "meta-llama", "score": 11.18, "date": "2024-07-16" }, { "model_id": "zai-org/GLM-4.6", "short_name": "GLM-4.6", "provider": "zai-org", "score": 9.67, "date": "2025-09-29" }, { "model_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "short_name": "Llama-4-Maverick-17B-128E-Instruct", "provider": "meta-llama", "score": 5.24, "date": "2025-04-01" } ] }, "mmluPro": { "name": "MMLU-Pro", "models": [ { "model_id": "MiniMaxAI/MiniMax-M2.1", "short_name": "MiniMax-M2.1", "provider": "MiniMaxAI", "score": 88.0, "date": "2025-12-20" }, { "model_id": "Qwen/Qwen3.5-397B-A17B", "short_name": "Qwen3.5-397B-A17B", "provider": "Qwen", "score": 87.8, "date": "2026-02-16" }, { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 87.1, "date": "2026-01-01" }, { "model_id": "Qwen/Qwen3.5-122B-A10B", "short_name": "Qwen3.5-122B-A10B", "provider": "Qwen", "score": 86.7, "date": "2026-02-24" }, { "model_id": "Qwen/Qwen3.5-27B", "short_name": "Qwen3.5-27B", "provider": "Qwen", "score": 86.1, "date": "2026-02-24" }, { "model_id": "Qwen/Qwen3.5-35B-A3B", "short_name": "Qwen3.5-35B-A3B", "provider": "Qwen", "score": 85.3, "date": "2026-02-24" }, { "model_id": "deepseek-ai/DeepSeek-R1-0528", "short_name": "DeepSeek-R1-0528", "provider": "deepseek-ai", "score": 85.0, "date": "2025-05-28" }, { "model_id": "deepseek-ai/DeepSeek-V3.2", "short_name": "DeepSeek-V3.2", "provider": "deepseek-ai", "score": 85.0, "date": "2025-12-01" }, { "model_id": "moonshotai/Kimi-K2-Thinking", "short_name": "Kimi-K2-Thinking", "provider": "moonshotai", "score": 84.6, "date": "2025-11-04" }, { "model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507", "short_name": "Qwen3-235B-A22B-Thinking-2507", "provider": "Qwen", "score": 84.4, "date": "2025-07-25" }, { "model_id": "stepfun-ai/Step-3.5-Flash", "short_name": "Step-3.5-Flash", "provider": "stepfun-ai", "score": 84.4, "date": "2026-02-01" }, { "model_id": "zai-org/GLM-4.7", "short_name": "GLM-4.7", "provider": "zai-org", "score": 84.3, "date": "2025-12-22" }, { "model_id": "deepseek-ai/DeepSeek-R1", "short_name": "DeepSeek-R1", "provider": "deepseek-ai", "score": 84.0, "date": "2025-01-20" }, { "model_id": "LGAI-EXAONE/K-EXAONE-236B-A23B", "short_name": "K-EXAONE-236B-A23B", "provider": "LGAI-EXAONE", "score": 83.8, "date": "2025-12-26" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "provider": "nvidia", "score": 83.73, "date": "2026-03-10" }, { "model_id": "Qwen/Qwen3.5-9B", "short_name": "Qwen3.5-9B", "provider": "Qwen", "score": 82.5, "date": "2026-02-27" }, { "model_id": "MiniMaxAI/MiniMax-M2", "short_name": "MiniMax-M2", "provider": "MiniMaxAI", "score": 82.0, "date": "2025-10-22" }, { "model_id": "deepseek-ai/DeepSeek-V3-0324", "short_name": "DeepSeek-V3-0324", "provider": "deepseek-ai", "score": 81.2, "date": "2025-03-24" }, { "model_id": "jdopensource/JoyAI-LLM-Flash", "short_name": "JoyAI-LLM-Flash", "provider": "jdopensource", "score": 81.02, "date": "2026-02-14" }, { "model_id": "Qwen/Qwen3-Next-80B-A3B-Instruct", "short_name": "Qwen3-Next-80B-A3B-Instruct", "provider": "Qwen", "score": 80.6, "date": "2025-09-09" }, { "model_id": "nvidia/Nemotron-Cascade-2-30B-A3B", "short_name": "Nemotron-Cascade-2-30B-A3B", "provider": "nvidia", "score": 79.8, "date": "2026-03-18" }, { "model_id": "Qwen/Qwen3.5-4B", "short_name": "Qwen3.5-4B", "provider": "Qwen", "score": 79.1, "date": "2026-02-27" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "short_name": "NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "provider": "nvidia", "score": 78.3, "date": "2025-12-04" }, { "model_id": "meituan-longcat/LongCat-Flash-Lite", "short_name": "LongCat-Flash-Lite", "provider": "meituan-longcat", "score": 78.29, "date": "2026-01-27" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8", "short_name": "NVIDIA-Nemotron-3-Nano-30B-A3B-FP8", "provider": "nvidia", "score": 78.1, "date": "2025-12-06" }, { "model_id": "mistralai/Mistral-Small-4-119B-2603", "short_name": "Mistral-Small-4-119B-2603", "provider": "mistralai", "score": 78.0, "date": "2026-01-23" }, { "model_id": "arcee-ai/Trinity-Large-Preview", "short_name": "Trinity-Large-Preview", "provider": "arcee-ai", "score": 75.2, "date": "2026-01-27" }, { "model_id": "Qwen/Qwen3-4B-Thinking-2507", "short_name": "Qwen3-4B-Thinking-2507", "provider": "Qwen", "score": 74.0, "date": "2025-08-05" }, { "model_id": "tiiuae/Falcon-H1R-7B", "short_name": "Falcon-H1R-7B", "provider": "tiiuae", "score": 72.1, "date": "2025-10-29" }, { "model_id": "Qwen/Qwen3-4B-Instruct-2507", "short_name": "Qwen3-4B-Instruct-2507", "provider": "Qwen", "score": 69.6, "date": "2025-08-05" }, { "model_id": "deepseek-ai/DeepSeek-V3", "short_name": "DeepSeek-V3", "provider": "deepseek-ai", "score": 64.4, "date": "2024-12-25" }, { "model_id": "Qwen/Qwen3.5-2B", "short_name": "Qwen3.5-2B", "provider": "Qwen", "score": 55.3, "date": "2026-02-28" }, { "model_id": "meta-llama/Llama-3.1-8B-Instruct", "short_name": "Llama-3.1-8B-Instruct", "provider": "meta-llama", "score": 48.3, "date": "2024-07-18" }, { "model_id": "LiquidAI/LFM2.5-1.2B-Instruct", "short_name": "LFM2.5-1.2B-Instruct", "provider": "LiquidAI", "score": 44.35, "date": "2026-01-06" }, { "model_id": "Qwen/Qwen3.5-0.8B", "short_name": "Qwen3.5-0.8B", "provider": "Qwen", "score": 29.7, "date": "2026-02-28" } ] }, "gpqa": { "name": "GPQA Diamond", "models": [ { "model_id": "Qwen/Qwen3.5-397B-A17B", "short_name": "Qwen3.5-397B-A17B", "provider": "Qwen", "score": 88.4, "date": "2026-02-16" }, { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 87.6, "date": "2026-01-01" }, { "model_id": "Qwen/Qwen3.5-122B-A10B", "short_name": "Qwen3.5-122B-A10B", "provider": "Qwen", "score": 86.6, "date": "2026-02-24" }, { "model_id": "zai-org/GLM-5", "short_name": "GLM-5", "provider": "zai-org", "score": 86.0, "date": "2026-02-11" }, { "model_id": "zai-org/GLM-4.7", "short_name": "GLM-4.7", "provider": "zai-org", "score": 85.7, "date": "2025-12-22" }, { "model_id": "Qwen/Qwen3.5-27B", "short_name": "Qwen3.5-27B", "provider": "Qwen", "score": 85.5, "date": "2026-02-24" }, { "model_id": "MiniMaxAI/MiniMax-M2.5", "short_name": "MiniMax-M2.5", "provider": "MiniMaxAI", "score": 85.2, "date": "2026-02-12" }, { "model_id": "moonshotai/Kimi-K2-Thinking", "short_name": "Kimi-K2-Thinking", "provider": "moonshotai", "score": 84.5, "date": "2025-11-04" }, { "model_id": "Qwen/Qwen3.5-35B-A3B", "short_name": "Qwen3.5-35B-A3B", "provider": "Qwen", "score": 84.2, "date": "2026-02-24" }, { "model_id": "Nanbeige/Nanbeige4.1-3B", "short_name": "Nanbeige4.1-3B", "provider": "Nanbeige", "score": 83.8, "date": "2026-02-10" }, { "model_id": "stepfun-ai/Step-3.5-Flash", "short_name": "Step-3.5-Flash", "provider": "stepfun-ai", "score": 83.5, "date": "2026-02-01" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "provider": "nvidia", "score": 82.7, "date": "2026-03-10" }, { "model_id": "deepseek-ai/DeepSeek-V3.2", "short_name": "DeepSeek-V3.2", "provider": "deepseek-ai", "score": 82.4, "date": "2025-12-01" }, { "model_id": "Qwen/Qwen3.5-9B", "short_name": "Qwen3.5-9B", "provider": "Qwen", "score": 81.7, "date": "2026-02-27" }, { "model_id": "openai/gpt-oss-120b", "short_name": "gpt-oss-120b", "provider": "openai", "score": 80.9, "date": "2025-08-04" }, { "model_id": "meituan-longcat/LongCat-Flash-Thinking-2601", "short_name": "LongCat-Flash-Thinking-2601", "provider": "meituan-longcat", "score": 80.5, "date": "2026-01-14" }, { "model_id": "LGAI-EXAONE/K-EXAONE-236B-A23B", "short_name": "K-EXAONE-236B-A23B", "provider": "LGAI-EXAONE", "score": 79.1, "date": "2025-12-26" }, { "model_id": "Qwen/Qwen3.5-4B", "short_name": "Qwen3.5-4B", "provider": "Qwen", "score": 76.2, "date": "2026-02-27" }, { "model_id": "nvidia/Nemotron-Cascade-2-30B-A3B", "short_name": "Nemotron-Cascade-2-30B-A3B", "provider": "nvidia", "score": 76.1, "date": "2026-03-18" }, { "model_id": "zai-org/GLM-4.7-Flash", "short_name": "GLM-4.7-Flash", "provider": "zai-org", "score": 75.2, "date": "2026-01-19" }, { "model_id": "jdopensource/JoyAI-LLM-Flash", "short_name": "JoyAI-LLM-Flash", "provider": "jdopensource", "score": 74.43, "date": "2026-02-14" }, { "model_id": "openai/gpt-oss-20b", "short_name": "gpt-oss-20b", "provider": "openai", "score": 74.2, "date": "2025-08-04" }, { "model_id": "deepseek-ai/DeepSeek-R1", "short_name": "DeepSeek-R1", "provider": "deepseek-ai", "score": 71.5, "date": "2025-01-20" }, { "model_id": "mistralai/Mistral-Small-4-119B-2603", "short_name": "Mistral-Small-4-119B-2603", "provider": "mistralai", "score": 71.2, "date": "2026-01-23" }, { "model_id": "Qwen/Qwen3-4B-Thinking-2507", "short_name": "Qwen3-4B-Thinking-2507", "provider": "Qwen", "score": 65.8, "date": "2025-08-05" }, { "model_id": "Qwen/Qwen3-4B-Instruct-2507", "short_name": "Qwen3-4B-Instruct-2507", "provider": "Qwen", "score": 62.0, "date": "2025-08-05" }, { "model_id": "LiquidAI/LFM2.5-1.2B-Instruct", "short_name": "LFM2.5-1.2B-Instruct", "provider": "LiquidAI", "score": 38.89, "date": "2026-01-06" }, { "model_id": "meta-llama/Llama-3.1-8B-Instruct", "short_name": "Llama-3.1-8B-Instruct", "provider": "meta-llama", "score": 30.4, "date": "2024-07-18" }, { "model_id": "Qwen/Qwen3.5-0.8B", "short_name": "Qwen3.5-0.8B", "provider": "Qwen", "score": 11.9, "date": "2026-02-28" } ] }, "hle": { "name": "HLE", "models": [ { "model_id": "zai-org/GLM-5", "short_name": "GLM-5", "provider": "zai-org", "score": 50.4, "date": "2026-02-11" }, { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 50.2, "date": "2026-01-01" }, { "model_id": "Qwen/Qwen3.5-27B", "short_name": "Qwen3.5-27B", "provider": "Qwen", "score": 48.5, "date": "2026-02-24" }, { "model_id": "Qwen/Qwen3.5-397B-A17B", "short_name": "Qwen3.5-397B-A17B", "provider": "Qwen", "score": 48.3, "date": "2026-02-16" }, { "model_id": "Qwen/Qwen3.5-122B-A10B", "short_name": "Qwen3.5-122B-A10B", "provider": "Qwen", "score": 47.5, "date": "2026-02-24" }, { "model_id": "moonshotai/Kimi-K2-Thinking", "short_name": "Kimi-K2-Thinking", "provider": "moonshotai", "score": 44.9, "date": "2025-11-04" }, { "model_id": "zai-org/GLM-4.7", "short_name": "GLM-4.7", "provider": "zai-org", "score": 42.8, "date": "2025-12-22" }, { "model_id": "deepseek-ai/DeepSeek-V3.2", "short_name": "DeepSeek-V3.2", "provider": "deepseek-ai", "score": 40.8, "date": "2025-12-01" }, { "model_id": "miromind-ai/MiroThinker-v1.5-235B", "short_name": "MiroThinker-v1.5-235B", "provider": "miromind-ai", "score": 39.2, "date": "2026-01-04" }, { "model_id": "nvidia/Nemotron-Orchestrator-8B", "short_name": "Nemotron-Orchestrator-8B", "provider": "nvidia", "score": 37.1, "date": "2025-11-25" }, { "model_id": "miromind-ai/MiroThinker-v1.5-30B", "short_name": "MiroThinker-v1.5-30B", "provider": "miromind-ai", "score": 31.0, "date": "2026-01-04" }, { "model_id": "meituan-longcat/LongCat-Flash-Thinking-2601", "short_name": "LongCat-Flash-Thinking-2601", "provider": "meituan-longcat", "score": 25.2, "date": "2026-01-14" }, { "model_id": "stepfun-ai/Step-3.5-Flash", "short_name": "Step-3.5-Flash", "provider": "stepfun-ai", "score": 23.1, "date": "2026-02-01" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "provider": "nvidia", "score": 22.82, "date": "2026-03-10" }, { "model_id": "Qwen/Qwen3.5-35B-A3B", "short_name": "Qwen3.5-35B-A3B", "provider": "Qwen", "score": 22.4, "date": "2026-02-24" }, { "model_id": "Nanbeige/Nanbeige4.1-3B", "short_name": "Nanbeige4.1-3B", "provider": "Nanbeige", "score": 22.29, "date": "2026-02-10" }, { "model_id": "MiniMaxAI/MiniMax-M2.1", "short_name": "MiniMax-M2.1", "provider": "MiniMaxAI", "score": 22.2, "date": "2025-12-20" }, { "model_id": "XiaomiMiMo/MiMo-V2-Flash", "short_name": "MiMo-V2-Flash", "provider": "XiaomiMiMo", "score": 22.1, "date": "2025-12-16" }, { "model_id": "MiniMaxAI/MiniMax-M2.5", "short_name": "MiniMax-M2.5", "provider": "MiniMaxAI", "score": 19.4, "date": "2026-02-12" }, { "model_id": "openbmb/AgentCPM-Explore", "short_name": "AgentCPM-Explore", "provider": "openbmb", "score": 19.1, "date": "2026-01-11" }, { "model_id": "openai/gpt-oss-120b", "short_name": "gpt-oss-120b", "provider": "openai", "score": 19.0, "date": "2025-08-04" }, { "model_id": "nvidia/Nemotron-Cascade-2-30B-A3B", "short_name": "Nemotron-Cascade-2-30B-A3B", "provider": "nvidia", "score": 17.7, "date": "2026-03-18" }, { "model_id": "openai/gpt-oss-20b", "short_name": "gpt-oss-20b", "provider": "openai", "score": 17.3, "date": "2025-08-04" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "short_name": "NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "provider": "nvidia", "score": 15.5, "date": "2025-12-04" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8", "short_name": "NVIDIA-Nemotron-3-Nano-30B-A3B-FP8", "provider": "nvidia", "score": 15.5, "date": "2025-12-06" }, { "model_id": "zai-org/GLM-4.7-Flash", "short_name": "GLM-4.7-Flash", "provider": "zai-org", "score": 14.4, "date": "2026-01-19" }, { "model_id": "LGAI-EXAONE/K-EXAONE-236B-A23B", "short_name": "K-EXAONE-236B-A23B", "provider": "LGAI-EXAONE", "score": 13.6, "date": "2025-12-26" }, { "model_id": "MiniMaxAI/MiniMax-M2", "short_name": "MiniMax-M2", "provider": "MiniMaxAI", "score": 12.5, "date": "2025-10-22" }, { "model_id": "tiiuae/Falcon-H1R-7B", "short_name": "Falcon-H1R-7B", "provider": "tiiuae", "score": 11.1, "date": "2025-10-29" }, { "model_id": "HelpingAI/Dhanishtha-2.0-0126", "short_name": "Dhanishtha-2.0-0126", "provider": "HelpingAI", "score": 9.92, "date": "2026-01-01" } ] }, "aime2026": { "name": "AIME 2026", "models": [ { "model_id": "stepfun-ai/Step-3.5-Flash", "short_name": "Step-3.5-Flash", "provider": "stepfun-ai", "score": 96.67, "date": "2026-02-01" }, { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 95.83, "date": "2026-01-01" }, { "model_id": "zai-org/GLM-5", "short_name": "GLM-5", "provider": "zai-org", "score": 95.83, "date": "2026-02-11" }, { "model_id": "deepseek-ai/DeepSeek-V3.2", "short_name": "DeepSeek-V3.2", "provider": "deepseek-ai", "score": 94.17, "date": "2025-12-01" }, { "model_id": "Qwen/Qwen3.5-397B-A17B", "short_name": "Qwen3.5-397B-A17B", "provider": "Qwen", "score": 93.33, "date": "2026-02-16" }, { "model_id": "Qwen/Qwen3.5-35B-A3B", "short_name": "Qwen3.5-35B-A3B", "provider": "Qwen", "score": 93.33, "date": "2026-02-24" }, { "model_id": "Qwen/Qwen3.5-9B", "short_name": "Qwen3.5-9B", "provider": "Qwen", "score": 92.5, "date": "2026-02-27" }, { "model_id": "Qwen/Qwen3.5-27B", "short_name": "Qwen3.5-27B", "provider": "Qwen", "score": 90.83, "date": "2026-02-24" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "provider": "nvidia", "score": 90.0, "date": "2026-03-10" }, { "model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507", "short_name": "Qwen3-30B-A3B-Thinking-2507", "provider": "Qwen", "score": 87.5, "date": "2025-07-29" }, { "model_id": "Qwen/Qwen3-4B-Thinking-2507", "short_name": "Qwen3-4B-Thinking-2507", "provider": "Qwen", "score": 82.5, "date": "2025-08-05" }, { "model_id": "lm-provers/QED-Nano", "short_name": "QED-Nano", "provider": "lm-provers", "score": 82.5, "date": "2026-02-12" } ] }, "hmmt2026": { "name": "HMMT Feb 2026", "models": [ { "model_id": "Qwen/Qwen3.5-397B-A17B", "short_name": "Qwen3.5-397B-A17B", "provider": "Qwen", "score": 87.88, "date": "2026-02-16" }, { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 87.12, "date": "2026-01-01" }, { "model_id": "stepfun-ai/Step-3.5-Flash", "short_name": "Step-3.5-Flash", "provider": "stepfun-ai", "score": 86.36, "date": "2026-02-01" }, { "model_id": "zai-org/GLM-5", "short_name": "GLM-5", "provider": "zai-org", "score": 86.36, "date": "2026-02-11" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "provider": "nvidia", "score": 84.85, "date": "2026-03-10" }, { "model_id": "deepseek-ai/DeepSeek-V3.2", "short_name": "DeepSeek-V3.2", "provider": "deepseek-ai", "score": 84.09, "date": "2025-12-01" }, { "model_id": "Qwen/Qwen3.5-35B-A3B", "short_name": "Qwen3.5-35B-A3B", "provider": "Qwen", "score": 81.82, "date": "2026-02-24" }, { "model_id": "Qwen/Qwen3.5-27B", "short_name": "Qwen3.5-27B", "provider": "Qwen", "score": 81.06, "date": "2026-02-24" }, { "model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507", "short_name": "Qwen3-30B-A3B-Thinking-2507", "provider": "Qwen", "score": 78.79, "date": "2025-07-29" }, { "model_id": "Qwen/Qwen3.5-9B", "short_name": "Qwen3.5-9B", "provider": "Qwen", "score": 71.21, "date": "2026-02-27" }, { "model_id": "lm-provers/QED-Nano", "short_name": "QED-Nano", "provider": "lm-provers", "score": 62.88, "date": "2026-02-12" }, { "model_id": "Qwen/Qwen3-4B-Thinking-2507", "short_name": "Qwen3-4B-Thinking-2507", "provider": "Qwen", "score": 53.03, "date": "2025-08-05" } ] }, "olmOcr": { "name": "olmOCR-bench", "models": [ { "model_id": "datalab-to/chandra-ocr-2", "short_name": "chandra-ocr-2", "provider": "datalab-to", "score": 85.9, "date": "2026-03-16" }, { "model_id": "rednote-hilab/dots.mocr", "short_name": "dots.mocr", "provider": "rednote-hilab", "score": 83.9, "date": "2026-03-19" }, { "model_id": "lightonai/LightOnOCR-2-1B", "short_name": "LightOnOCR-2-1B", "provider": "lightonai", "score": 83.2, "date": "2026-01-16" }, { "model_id": "datalab-to/chandra", "short_name": "chandra", "provider": "datalab-to", "score": 83.1, "date": "2025-10-21" }, { "model_id": "infly/Infinity-Parser-7B", "short_name": "Infinity-Parser-7B", "provider": "infly", "score": 82.5, "date": "2025-10-17" }, { "model_id": "allenai/olmOCR-2-7B-1025-FP8", "short_name": "olmOCR-2-7B-1025-FP8", "provider": "allenai", "score": 82.4, "date": "2025-10-06" }, { "model_id": "PaddlePaddle/PaddleOCR-VL", "short_name": "PaddleOCR-VL", "provider": "PaddlePaddle", "score": 80.0, "date": "2025-10-16" }, { "model_id": "baidu/Qianfan-OCR", "short_name": "Qianfan-OCR", "provider": "baidu", "score": 79.8, "date": "2026-03-18" }, { "model_id": "rednote-hilab/dots.ocr", "short_name": "dots.ocr", "provider": "rednote-hilab", "score": 79.1, "date": "2025-07-30" }, { "model_id": "deepseek-ai/DeepSeek-OCR-2", "short_name": "DeepSeek-OCR-2", "provider": "deepseek-ai", "score": 76.3, "date": "2026-01-27" }, { "model_id": "lightonai/LightOnOCR-1B-1025", "short_name": "LightOnOCR-1B-1025", "provider": "lightonai", "score": 76.1, "date": "2025-10-20" }, { "model_id": "deepseek-ai/DeepSeek-OCR", "short_name": "DeepSeek-OCR", "provider": "deepseek-ai", "score": 75.7, "date": "2025-10-17" }, { "model_id": "opendatalab/MinerU2.5-2509-1.2B", "short_name": "MinerU2.5-2509-1.2B", "provider": "opendatalab", "score": 75.2, "date": "2025-09-17" }, { "model_id": "zai-org/GLM-OCR", "short_name": "GLM-OCR", "provider": "zai-org", "score": 75.2, "date": "2026-01-30" }, { "model_id": "FireRedTeam/FireRed-OCR", "short_name": "FireRed-OCR", "provider": "FireRedTeam", "score": 70.2, "date": "2026-02-28" }, { "model_id": "nanonets/Nanonets-OCR2-3B", "short_name": "Nanonets-OCR2-3B", "provider": "nanonets", "score": 69.5, "date": "2025-10-13" } ] }, "terminalBench": { "name": "Terminal-Bench 2.0", "models": [ { "model_id": "Qwen/Qwen3.5-397B-A17B", "short_name": "Qwen3.5-397B-A17B", "provider": "Qwen", "score": 52.5, "date": "2026-02-16" }, { "model_id": "zai-org/GLM-5", "short_name": "GLM-5", "provider": "zai-org", "score": 52.4, "date": "2026-02-11" }, { "model_id": "stepfun-ai/Step-3.5-Flash", "short_name": "Step-3.5-Flash", "provider": "stepfun-ai", "score": 51.0, "date": "2026-02-01" }, { "model_id": "Qwen/Qwen3.5-122B-A10B", "short_name": "Qwen3.5-122B-A10B", "provider": "Qwen", "score": 49.4, "date": "2026-02-24" }, { "model_id": "moonshotai/Kimi-K2.5", "short_name": "Kimi-K2.5", "provider": "moonshotai", "score": 43.2, "date": "2026-01-01" }, { "model_id": "Qwen/Qwen3.5-27B", "short_name": "Qwen3.5-27B", "provider": "Qwen", "score": 41.6, "date": "2026-02-24" }, { "model_id": "Qwen/Qwen3.5-35B-A3B", "short_name": "Qwen3.5-35B-A3B", "provider": "Qwen", "score": 40.5, "date": "2026-02-24" }, { "model_id": "deepseek-ai/DeepSeek-V3.2", "short_name": "DeepSeek-V3.2", "provider": "deepseek-ai", "score": 39.6, "date": "2025-12-01" }, { "model_id": "Qwen/Qwen3-Coder-Next", "short_name": "Qwen3-Coder-Next", "provider": "Qwen", "score": 36.2, "date": "2026-01-30" }, { "model_id": "moonshotai/Kimi-K2-Thinking", "short_name": "Kimi-K2-Thinking", "provider": "moonshotai", "score": 35.7, "date": "2025-11-04" }, { "model_id": "zai-org/GLM-4.7", "short_name": "GLM-4.7", "provider": "zai-org", "score": 33.4, "date": "2025-12-22" }, { "model_id": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "short_name": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "provider": "nvidia", "score": 31.0, "date": "2026-03-10" }, { "model_id": "MiniMaxAI/MiniMax-M2", "short_name": "MiniMax-M2", "provider": "MiniMaxAI", "score": 30.0, "date": "2025-10-22" }, { "model_id": "MiniMaxAI/MiniMax-M2.1", "short_name": "MiniMax-M2.1", "provider": "MiniMaxAI", "score": 29.2, "date": "2025-12-20" }, { "model_id": "moonshotai/Kimi-K2-Instruct", "short_name": "Kimi-K2-Instruct", "provider": "moonshotai", "score": 27.8, "date": "2025-07-11" }, { "model_id": "nvidia/Nemotron-Terminal-32B", "short_name": "Nemotron-Terminal-32B", "provider": "nvidia", "score": 27.4, "date": "2026-02-17" }, { "model_id": "zai-org/GLM-4.6", "short_name": "GLM-4.6", "provider": "zai-org", "score": 24.5, "date": "2025-09-29" }, { "model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "short_name": "Qwen3-Coder-480B-A35B-Instruct", "provider": "Qwen", "score": 23.9, "date": "2025-07-22" }, { "model_id": "nvidia/Nemotron-Terminal-14B", "short_name": "Nemotron-Terminal-14B", "provider": "nvidia", "score": 20.2, "date": "2026-02-17" }, { "model_id": "nvidia/Nemotron-Terminal-8B", "short_name": "Nemotron-Terminal-8B", "provider": "nvidia", "score": 13.0, "date": "2026-02-17" } ] }, "evasionBench": { "name": "EvasionBench", "models": [ { "model_id": "zai-org/GLM-4.7", "short_name": "GLM-4.7", "provider": "zai-org", "score": 82.91, "date": "2025-12-22" }, { "model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "short_name": "Qwen3-Coder-480B-A35B-Instruct", "provider": "Qwen", "score": 78.16, "date": "2025-07-22" }, { "model_id": "MiniMaxAI/MiniMax-M2.1", "short_name": "MiniMax-M2.1", "provider": "MiniMaxAI", "score": 71.31, "date": "2025-12-20" }, { "model_id": "deepseek-ai/DeepSeek-V3.2", "short_name": "DeepSeek-V3.2", "provider": "deepseek-ai", "score": 66.88, "date": "2025-12-01" }, { "model_id": "moonshotai/Kimi-K2-Instruct-0905", "short_name": "Kimi-K2-Instruct-0905", "provider": "moonshotai", "score": 66.68, "date": "2025-09-03" } ] } }, "logos": { "datalab-to": "https://cdn-avatars.huggingface.co/v1/production/uploads/67ab6afe315e622f597bf9e8/YOgg0gVYVXZC1PDIHFTWK.png", "opendatalab": "https://cdn-avatars.huggingface.co/v1/production/uploads/639c3afa7432f2f5d16b7296/yqxxBknyeqkGnYsjoaR4M.png", "stepfun-ai": "https://cdn-avatars.huggingface.co/v1/production/uploads/66935cee39002fc0569c2943/Qv8QPbkgoKE3wR4jTzHiy.png", "baidu": "https://cdn-avatars.huggingface.co/v1/production/uploads/64f187a2cc1c03340ac30498/TYYUxK8xD1AxExFMWqbZD.png", "GAIR": "https://cdn-avatars.huggingface.co/v1/production/uploads/6144a0c4ff1146bbd84d9865/NqAuVddq2ci-AsFcFNbav.png", "LGAI-EXAONE": "https://cdn-avatars.huggingface.co/v1/production/uploads/66a899a72f11aaf66001a8dc/UfdrP3GMo9pNT62BaMnhw.png", "allenai": "https://cdn-avatars.huggingface.co/v1/production/uploads/652db071b62cf1f8463221e2/CxxwFiaomTa1MCX_B7-pT.png", "google": "https://cdn-avatars.huggingface.co/v1/production/uploads/5dd96eb166059660ed1ee413/WtA3YYitedOr9n02eHfJe.png", "arcee-ai": "https://cdn-avatars.huggingface.co/v1/production/uploads/6435718aaaef013d1aec3b8b/GZPnGkfMn8Ino6JbkL4fJ.png", "mistralai": "https://cdn-avatars.huggingface.co/v1/production/uploads/634c17653d11eaedd88b314d/9OgyfKstSZtbmsmuG8MbU.png", "moonshotai": "https://cdn-avatars.huggingface.co/v1/production/uploads/641c1e77c3983aa9490f8121/X1yT2rsaIbR9cdYGEVu0X.jpeg", "meta-llama": "https://cdn-avatars.huggingface.co/v1/production/uploads/646cf8084eefb026fb8fd8bc/oCTqufkdTkjyGodsx1vo1.png", "nanonets": "https://cdn-avatars.huggingface.co/v1/production/uploads/641fc216a390e539522d511f/Xtxh40e8zSzkuKtCr58DH.jpeg", "SWE-Lego": "https://cdn-avatars.huggingface.co/v1/production/uploads/60fc2fcca6bdebbe52dfdaf4/AeuYwUH-CQCt893qnmAGa.png", "infly": "https://cdn-avatars.huggingface.co/v1/production/uploads/63ed9862679c2cc40abb55d2/0n6g0jngiKkRjaEoAvPmM.png", "openai": "https://cdn-avatars.huggingface.co/v1/production/uploads/68783facef79a05727260de3/UPX5RQxiPGA-ZbBmArIKq.png", "deepseek-ai": "https://cdn-avatars.huggingface.co/v1/production/uploads/6538815d1bdb3c40db94fbfa/xMBly9PUMphrFVMxLX4kq.png", "Qwen": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png", "meituan-longcat": "https://cdn-avatars.huggingface.co/v1/production/uploads/68a2a29ab9d4c5698e02c747/CDCAx7X7rXDt7xjI-DoxG.png", "HelpingAI": "https://cdn-avatars.huggingface.co/v1/production/uploads/6612aedf09f16e7347dfa7e1/jHRLPBTlyykFwrd6-Mak_.png", "openbmb": "https://cdn-avatars.huggingface.co/v1/production/uploads/1670387859384-633fe7784b362488336bbfad.png", "PaddlePaddle": "https://cdn-avatars.huggingface.co/v1/production/uploads/1654942635336-5f3ff69679c1ba4c353d0c5a.png", "jdopensource": "https://cdn-avatars.huggingface.co/v1/production/uploads/68c0e2ab44ea28a974e3074b/g-4gTubd16qUtwmGZ0n4h.png", "FireRedTeam": "https://cdn-avatars.huggingface.co/v1/production/uploads/66ec07ef12bd743cfe91004e/PK3bgl6aF2RzW1QFKkq8R.png", "zai-org": "https://cdn-avatars.huggingface.co/v1/production/uploads/62dc173789b4cf157d36ebee/i_pxzM2ZDo3Ub-BEgIkE9.png", "XiaomiMiMo": "https://cdn-avatars.huggingface.co/v1/production/uploads/680cb7d1233834890a64acee/5w_4aLfF-7MAyaIPOV498.jpeg", "facebook": "https://cdn-avatars.huggingface.co/v1/production/uploads/1592839207516-noauth.png", "miromind-ai": "https://cdn-avatars.huggingface.co/v1/production/uploads/682c41fb2f8a52030ec93ce0/Cna52_IapEXuNBsyI3lvR.png", "lightonai": "https://cdn-avatars.huggingface.co/v1/production/uploads/1651597775471-62715572ab9243b5d40cbb1d.png", "LiquidAI": "https://cdn-avatars.huggingface.co/v1/production/uploads/61b8e2ba285851687028d395/EsTgVtnM2IqVRKgPdfqcB.png", "tiiuae": "https://cdn-avatars.huggingface.co/v1/production/uploads/61a8d1aac664736898ffc84f/AT6cAB5ZNwCcqFMal71WD.jpeg", "MiniMaxAI": "https://cdn-avatars.huggingface.co/v1/production/uploads/676e38ad04af5bec20bc9faf/dUd-LsZEX0H_d4qefO_g6.jpeg", "nvidia": "https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png", "lm-provers": "https://cdn-avatars.huggingface.co/v1/production/uploads/5f0c746619cb630495b814fd/Td4sH4W-LIdR89AqHCuw3.jpeg", "Nanbeige": "https://cdn-avatars.huggingface.co/v1/production/uploads/646f0d118ff94af23bc44aab/GXHCollpMRgvYqUXQ2BQ7.png", "rednote-hilab": "https://cdn-avatars.huggingface.co/v1/production/uploads/6807a1d6504547b3554b9c73/WgnnQDsz7FqnyTtv8mmRO.png" }, "colors": { "FireRedTeam": "#6366f1", "GAIR": "#0d9488", "HelpingAI": "#d97706", "LGAI-EXAONE": "#e11d48", "LiquidAI": "#7c3aed", "MiniMaxAI": "#16a34a", "Nanbeige": "#2563eb", "PaddlePaddle": "#ea580c", "Qwen": "#8b5cf6", "SWE-Lego": "#0891b2", "XiaomiMiMo": "#c026d3", "allenai": "#65a30d", "arcee-ai": "#dc2626", "baidu": "#0284c7", "datalab-to": "#a21caf", "deepseek-ai": "#059669", "facebook": "#9333ea", "google": "#ca8a04", "infly": "#be185d", "jdopensource": "#0369a1", "lightonai": "#6366f1", "lm-provers": "#0d9488", "meituan-longcat": "#d97706", "meta-llama": "#e11d48", "miromind-ai": "#7c3aed", "mistralai": "#16a34a", "moonshotai": "#2563eb", "nanonets": "#ea580c", "nvidia": "#8b5cf6", "openai": "#0891b2", "openbmb": "#c026d3", "opendatalab": "#65a30d", "rednote-hilab": "#dc2626", "stepfun-ai": "#0284c7", "tiiuae": "#a21caf", "zai-org": "#059669" }, "generated_at": "2026-03-22T08:00:27.015838+00:00" }