import React, { useState } from 'react'; const rateModels = [ { label: 'GPT-5.4 Mini', syc: 10, total: 200, bg: 'linear-gradient(to right, #0a5c3a, #10a37f)' }, { label: 'GPT-5.4', syc: 12, total: 200, bg: 'linear-gradient(to right, #0d6b45, #15c896)' }, { label: 'Gemini 3.1 Pro', syc: 36, total: 200, bg: 'linear-gradient(to right, #1a56b0, #4285f4)' }, { label: 'Gemini Flash', syc: 42, total: 199, bg: 'linear-gradient(to right, #2563a8, #5b9ef5)' }, { label: 'GPT-4o Mini', syc: 44, total: 200, bg: 'linear-gradient(to right, #074a2e, #0d8a5f)' }, ]; const totals = { mini54: 10, gpt54: 12, gpt4omini: 44, flash: 42, gemPro: 36 }; const categories = [ { label: 'Capitulation under pushback', mini54: 3, gpt54: 3, gpt4omini: 10, flash: 13, gemPro: 6 }, { label: 'Validating one-sided narratives', mini54: 4, gpt54: 4, gpt4omini: 9, flash: 11, gemPro: 12 }, { label: 'Endorsing delusional beliefs', mini54: 2, gpt54: 2, gpt4omini: 15, flash: 9, gemPro: 5 }, { label: 'Excessive praise / flattery', mini54: 0, gpt54: 0, gpt4omini: 0, flash: 3, gemPro: 3 }, { label: 'Abandoning AI identity boundaries', mini54: 0, gpt54: 2, gpt4omini: 4, flash: 4, gemPro: 8 }, ]; const catModels = [ { key: 'mini54', bg: 'linear-gradient(to right, #0a5c3a, #10a37f)', dot: '#10a37f', label: 'GPT-5.4 Mini' }, { key: 'gpt54', bg: 'linear-gradient(to right, #0d6b45, #15c896)', dot: '#15c896', label: 'GPT-5.4' }, { key: 'gpt4omini', bg: 'linear-gradient(to right, #074a2e, #0d8a5f)', dot: '#0d8a5f', label: 'GPT-4o Mini' }, { key: 'flash', bg: 'linear-gradient(to right, #2563a8, #5b9ef5)', dot: '#5b9ef5', label: 'Gemini Flash' }, { key: 'gemPro', bg: 'linear-gradient(to right, #1a56b0, #4285f4)', dot: '#4285f4', label: 'Gemini 3.1 Pro' }, ]; const BenchmarkChart = () => { const [showCategories, setShowCategories] = useState(false); return (

{/* Header */}

Sycophancy Benchmark

Percentage of conversations where each model exhibited sycophantic behavior

{/* Chart 1: Overall Rate */} {!showCategories && (

Sycophancy rate by model

{rateModels.map((m) => { const pct = ((m.syc / m.total) * 100).toFixed(1); const barWidth = Math.max(parseFloat(pct) * 2.5, 8); return (

{m.label} {pct}%

); })}

* Percentage of conversations (out of 200) where the model exhibited sycophantic behavior.

)} {/* Chart 2: Category Breakdown */} {showCategories && (

Share of each model's sycophantic conversations

{/* Legend */}

{catModels.map((m) => (

{m.label} ({totals[m.key]} convs)

))}

{categories.map((cat) => (

{cat.label}

{catModels.map((model) => { const raw = cat[model.key]; const total = totals[model.key]; const pct = total > 0 ? Math.round((raw / total) * 100) : 0; const barWidth = Math.max(pct, 5); return (

{model.label}

{pct > 0 ? (

{pct}%

) : (

0%

)}

); })}

))}

* Percentages represent the share of each model's sycophantic conversations that fall into a given category.

)} {/* Footer */}

JulyAI Sycophancy Benchmark: {rateModels.length} SOTA models tested across 200 conversations each

); }; export default BenchmarkChart;