File size: 22,820 Bytes
edcaa7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb3df2e
 
edcaa7f
 
fb3df2e
 
edcaa7f
fb3df2e
edcaa7f
fb3df2e
 
 
 
edcaa7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bcb2aa6
 
fdc1e03
bcb2aa6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28569b7
bcb2aa6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
edcaa7f
0da3d51
 
 
 
 
 
 
 
 
 
 
 
edcaa7f
 
 
0da3d51
bcb2aa6
 
edcaa7f
 
 
 
bcb2aa6
0da3d51
bcb2aa6
 
 
edcaa7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb3df2e
edcaa7f
fb3df2e
 
 
 
edcaa7f
 
 
 
 
 
 
 
 
cfb3156
 
 
 
 
 
 
 
 
 
 
 
edcaa7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb3df2e
 
 
 
 
 
 
 
 
 
edcaa7f
 
bcb2aa6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96eb17e
 
 
 
 
 
 
 
 
 
 
 
 
0da3d51
caf6918
bcb2aa6
 
6421da1
bcb2aa6
0da3d51
 
 
bcb2aa6
0da3d51
 
96eb17e
 
 
 
 
 
 
0da3d51
bcb2aa6
 
0da3d51
 
 
 
6421da1
 
 
0da3d51
 
fdc1e03
28569b7
0da3d51
 
96eb17e
0da3d51
 
 
bcb2aa6
 
 
 
0da3d51
6421da1
 
 
0da3d51
bcb2aa6
caf6918
bcb2aa6
 
caf6918
 
 
 
 
 
 
bcb2aa6
 
 
 
 
 
 
caf6918
bcb2aa6
 
edcaa7f
caf6918
 
 
 
 
 
 
 
 
 
bcb2aa6
edcaa7f
caf6918
bcb2aa6
caf6918
 
bcb2aa6
edcaa7f
bcb2aa6
 
 
 
edcaa7f
caf6918
 
 
 
 
 
 
 
edcaa7f
 
 
 
 
0da3d51
 
 
 
 
 
 
caf6918
 
 
 
 
 
 
 
 
 
0da3d51
 
caf6918
0da3d51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
caf6918
0da3d51
 
 
 
edcaa7f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
const sampleData = [
  {
    qaId: "0KoG04_heLE@1@character_2@belief@first",
    id: "0KoG04_heLE",
    dimension: "Belief",
    order: "First-order",
    target: "Man",
    question: "What does the Woman believe about the Man's memory of their breakup at the moment he claims not to remember?",
    openEndedAnswer:
      "The Woman perceives the Man's claim of forgetfulness as a deliberate act to avoid taking responsibility for their breakup. His neutral expression and evasive body language suggest he is not genuinely confused but rather using forgetfulness as a tactic.",
    options: [
      { label: "A", text: "She believes he has blocked out the breakup details because facing them would be too painful for him." },
      { label: "B", text: "She believes he is mixing up the reasons for their breakup because so much time has passed and details have gotten fuzzy." },
      { label: "C", text: "She believes he wants to move forward and avoid revisiting their breakup because he thinks it will help them start fresh." },
      { label: "D", text: "She believes he is deliberately pretending to forget to avoid blame.", correct: true }
    ]
  },
  {
    qaId: "-2KGPYEFnsU@2@character_1@belief@second",
    id: "-2KGPYEFnsU",
    dimension: "Belief",
    order: "Second-order",
    target: "Dark-Haired Woman",
    question: "What does the Blonde Woman think the Dark-Haired Woman believes about her situation?",
    openEndedAnswer:
      "The Blonde Woman perceives the Dark-Haired Woman's apologetic tone and slight smile as signs of empathy, suggesting she sees her as a victim needing sympathy. However, the Blonde Woman might also sense that the Dark-Haired Woman feels awkward, possibly viewing her as someone who dwells on the negative.",
    options: [
      { label: "A", text: "She thinks the Dark-Haired Woman is trying to be supportive on the surface, but actually feels detached and uninterested in what she's going through." },
      { label: "B", text: "She thinks the Dark-Haired Woman sees her as a victim needing sympathy but maybe also as someone who dwells on the negative.", correct: true },
      { label: "C", text: "She thinks the Dark-Haired Woman assumes she made poor choices that led to the failed date, putting the responsibility solely on her actions." },
      { label: "D", text: "She thinks the Dark-Haired Woman suspects she is making her problems seem worse than they are, possibly to get comfort or reassurance from people around her." }
    ]
  },
  {
    qaId: "A-I3dc0Gct8@3@Willa@desire@first",
    id: "A-I3dc0Gct8",
    dimension: "Desire",
    order: "First-order",
    target: "Willa",
    question: "What does Willa desire when she says Will is supposed to be with someone like Becca?",
    openEndedAnswer:
      "Willa desires reassurance from Will that she is worthy despite her doubts. Her vulnerable tone suggests she wants him to prove her fears wrong.",
    options: [
      { label: "A", text: "She desires Will to acknowledge that Becca is a better match for him, hoping he will choose Becca and end their relationship." },
      { label: "B", text: "She desires Will to openly discuss their future together, wanting clarity and communication about where their relationship is headed." },
      { label: "C", text: "She desires Will to prove her wrong and reassure her worth, wanting him to show that she is valuable to him deeply.", correct: true },
      { label: "D", text: "She desires evidence that her insecurities are justified, seeking affirmation that she is unworthy of Will and that her doubts are correct." }
    ]
  },
  {
    qaId: "07YuuA_2O9w@3@Gina@desire@second",
    id: "07YuuA_2O9w",
    dimension: "Desire",
    order: "Second-order",
    target: "Jorge",
    question: "What does Gina think Jorge desires by giving this lecture?",
    openEndedAnswer:
      "Gina perceives Jorge's aggressive tone and gestures as an attempt to intimidate her into submission, reinforcing his control over the workplace dynamics.",
    options: [
      { label: "A", text: "Jorge desires Gina to demonstrate more initiative in managing the salon's daily operations." },
      { label: "B", text: "Jorge desires to repair his relationship with Gina by fostering open and respectful communication." },
      { label: "C", text: "Jorge desires to express vulnerability about his recent struggles that are not connected to their professional relationship." },
      { label: "D", text: "Jorge desires to reassert control and ensure Gina's obedience through intimidation.", correct: true }
    ]
  },
  {
    qaId: "0-HM2VCdrC0@1@Jane@intention@first",
    id: "0-HM2VCdrC0",
    dimension: "Intention",
    order: "First-order",
    target: "Kevin",
    question: "What does Jane intend to achieve by talking to Kevin?",
    openEndedAnswer:
      "Jane's sharp questioning and defensive posture indicate she wants to maintain emotional distance from Kevin, protecting herself from further manipulation.",
    options: [
      { label: "A", text: "To push Kevin away and assert emotional distance because she wants to protect herself", correct: true },
      { label: "B", text: "To question Kevin's motives and see if he will admit to using her, while remaining emotionally detached" },
      { label: "C", text: "To give Kevin an opportunity to defend his actions, hoping he will take responsibility and show genuine remorse" },
      { label: "D", text: "To keep Kevin uncertain about her true feelings by alternating between confrontation and brief moments of listening" }
    ]
  },
  {
    qaId: "HgQDAW28DsA@1@character_1@intention@second",
    id: "HgQDAW28DsA",
    dimension: "Intention",
    order: "Second-order",
    target: "Woman",
    question: "What does Man think the woman intends by saying 'We'll see'?",
    openEndedAnswer:
      "The woman's challenging tone suggests she is skeptical of the man's claim about his emotional state. Her response 'We'll see' implies she intends to test whether he is truly emotionally detached or if he is masking deeper feelings.",
    options: [
      { label: "A", text: "She wants to distance herself from the conversation, signaling that she prefers to avoid discussing their issues further." },
      { label: "B", text: "She uses 'We'll see' to subtly mock Man's statement, indicating she doesn't believe he means what he says." },
      { label: "C", text: "She hopes to prompt Man to express more about his feelings, inviting honest emotional conversation." },
      { label: "D", text: "She intends to test or challenge Man's claim, doubting his emotional detachment", correct: true }
    ]
  },
  {
    qaId: "B2CEGhwMjkQ@5@Juno@emotion@first",
    id: "B2CEGhwMjkQ",
    dimension: "Emotion",
    order: "First-order",
    target: "Juno",
    question: "What complex emotion is Juno experiencing while talking?",
    openEndedAnswer:
      "Juno's sarcastic tone masks a complex mix of hurt, disappointment, and defiance. Her gestures and gaze indicate she is struggling with these layered emotions.",
    options: [
      { label: "A", text: "Detachment and mild irritation about the conversation" },
      { label: "B", text: "Overwhelming frustration with no underlying vulnerability" },
      { label: "C", text: "Amusement concealed beneath a layer of wit and irony" },
      { label: "D", text: "A mix of hurt, disappointment, and defiance", correct: true }
    ]
  },
  {
    qaId: "h9Zol4P5u9Q@4@character_2@emotion@second",
    id: "h9Zol4P5u9Q",
    dimension: "Emotion",
    order: "Second-order",
    target: "Man",
    question: "What does Woman think the man believes about her emotional state during her outburst?",
    openEndedAnswer:
      "The woman believes the man thinks she is overreacting because his lack of visible concern or engagement suggests he dismisses her emotional display as exaggerated.",
    options: [
      { label: "A", text: "She thinks he believes she is overreacting and being overly emotional without cause.", correct: true },
      { label: "B", text: "She thinks he believes she is sincerely upset because of Sylvia's death and that he should comfort her." },
      { label: "C", text: "She thinks he believes she is exaggerating for effect and not actually serious about her accusations." },
      { label: "D", text: "She thinks he believes she is emotionally detached and unaffected by the situation with Sylvia." }
    ]
  },
  {
    qaId: "89yPHsT8AKg@74@knowledge@first_order",
    id: "89yPHsT8AKg",
    dimension: "Knowledge",
    order: "First-order",
    target: "Male protagonist",
    question: "Does the male protagonist know who replaced the female protagonist?",
    openEndedAnswer:
      "Yes. He knows, despite saying 'None,' because he consistently avoids eye contact with the female protagonist, keeps his head down, and appears tense.",
    options: [
      { label: "A", text: "Yes", correct: true },
      { label: "B", text: "No" }
    ]
  },
  {
    qaId: "ARD5AUnl7_M@33@knowledge@tom_second_order",
    id: "ARD5AUnl7_M",
    dimension: "Knowledge",
    order: "Second-order",
    target: "The man",
    question: "Does the man know that his wife feels both comforted and sad by what he said, and is crying with her face covered?",
    openEndedAnswer:
      "No. He does not know because he has his back turned to the woman and cannot know the specifics, even if he might guess some of it.",
    options: [
      { label: "A", text: "Yes" },
      { label: "B", text: "No", correct: true }
    ]
  }
];

const filters = ["Belief", "Desire", "Intention", "Emotion", "Knowledge"];

const judgeResults = {
  gpt4o: {
    label: "GPT-4o",
    deltaLabel: "Gemini",
    rows: [
      { model: "WildToM-Reasoner", rank: 1, oeAcc: 32.7, oeScr: 2.6, isOurs: true },
      { model: "GPT-4o-mini", rank: 2, oeAcc: 32.2, oeScr: 2.6 },
      { model: "Qwen3-Omni", rank: 3, oeAcc: 29.1, oeScr: 2.3 },
      { model: "Qwen3-VL", rank: 4, oeAcc: 28.6, oeScr: 2.2 },
      { model: "GPT-5-mini", rank: 5, oeAcc: 28.0, oeScr: 2.11 },
      { model: "GLM-4.6V", rank: 6, oeAcc: 16.6, oeScr: 1.9 },
      { model: "MiniCPM-V-4.5", rank: 7, oeAcc: 14.6, oeScr: 1.7 },
      { model: "Emotion-Qwen", rank: 8, oeAcc: 12.1, oeScr: 1.6 },
      { model: "Video-LLaVA", rank: 9, oeAcc: 8.4, oeScr: 1.7 },
      { model: "AffectGPT", rank: 10, oeAcc: 4.8, oeScr: 1.1 }
    ]
  },
  gemini: {
    label: "Gemini-2.5-Flash",
    deltaLabel: "GPT-4o",
    rows: [
      { model: "WildToM-Reasoner", rank: 1, oeAcc: 47.5, oeScr: 2.45, isOurs: true },
      { model: "Qwen3-VL", rank: 2, oeAcc: 40.0, oeScr: 2.3 },
      { model: "GPT-4o-mini", rank: 3, oeAcc: 37.5, oeScr: 2.23 },
      { model: "GLM-4.6V", rank: 4, oeAcc: 35.0, oeScr: 2.02 },
      { model: "GPT-5-mini", rank: 5, oeAcc: 25.0, oeScr: 1.4 },
      { model: "Qwen3-Omni", rank: 6, oeAcc: 22.5, oeScr: 1.68 },
      { model: "Emotion-Qwen", rank: 7, oeAcc: 20.0, oeScr: 1.43 },
      { model: "MiniCPM-V-4.5", rank: 8, oeAcc: 17.5, oeScr: 1.35 },
      { model: "AffectGPT", rank: 9, oeAcc: 15.0, oeScr: 1.35 },
      { model: "Video-LLaVA", rank: 10, oeAcc: 12.5, oeScr: 1.43 }
    ]
  }
};

const mcResults = [
  { model: "WildToM-Reasoner", mcAcc: 72.7, isOurs: true },
  { model: "Qwen3-VL", mcAcc: 62.1 },
  { model: "Qwen3-Omni", mcAcc: 61.8 },
  { model: "GPT-4o-mini", mcAcc: 57.2 },
  { model: "Emotion-Qwen", mcAcc: 54.2 },
  { model: "GLM-4.6V", mcAcc: 51.2 },
  { model: "MiniCPM-V-4.5", mcAcc: 46.8 },
  { model: "AffectGPT", mcAcc: 35.9 },
  { model: "Video-LLaVA", mcAcc: 25.8 }
];

const filterRoot = document.getElementById("showcase-filters");
const showcaseRoot = document.getElementById("showcase-browser");
const resultsRoot = document.getElementById("results-table");
const taskToggleRoot = document.getElementById("task-toggle");
const judgeToggleRoot = document.getElementById("judge-toggle");
const resultsCaptionRoot = document.getElementById("results-caption");
const carouselState = {
  activeDimension: "Belief",
  activeIndex: 0
};
const crossJudgeState = {
  activeTask: "oe",
  activeJudge: "gpt4o",
  activeMetric: "oeAcc"
};

function renderFilters(active) {
  filterRoot.innerHTML = "";
  filters.forEach((filter) => {
    const button = document.createElement("button");
    button.type = "button";
    button.className = `filter-chip${filter === active ? " active" : ""}`;
    button.textContent = filter;
    button.addEventListener("click", () => renderSamples(filter));
    filterRoot.appendChild(button);
  });
}

function getVisibleSamples(active) {
  return sampleData.filter((item) => item.dimension === active);
}

function renderSamples(active = carouselState.activeDimension, index = 0) {
  carouselState.activeDimension = active;
  renderFilters(active);

  const visible = getVisibleSamples(active);
  if (visible.length === 0) {
    showcaseRoot.innerHTML = "";
    return;
  }

  const safeIndex = ((index % visible.length) + visible.length) % visible.length;
  carouselState.activeIndex = safeIndex;
  const item = visible[safeIndex];
  const gold = item.options.find((option) => option.correct)?.label ?? "N/A";
  const optionsHtml = item.options
    .map(
      (option) => `
        <li class="option-item${option.correct ? " correct" : ""}">
          <span class="option-key">${option.label}</span>
          <span class="option-text">${option.text}</span>
        </li>
      `
    )
    .join("");

  const detailBlocks = item.openEndedAnswer
    ? `
      <div class="oe-card">
        <button type="button" class="oe-head oe-toggle" aria-expanded="false">
          <span class="oe-badge">Open-Ended Answer</span>
          <span class="oe-caret" aria-hidden="true">&#9662;</span>
        </button>
        <div class="oe-body" hidden>
          <p class="oe-text">${item.openEndedAnswer}</p>
        </div>
      </div>
    `
    : "";

  showcaseRoot.innerHTML = "";
  const card = document.createElement("article");
  card.className = "sample-card";
  card.innerHTML = `
    <div class="sample-media">
      <div class="sample-video-wrap">
        <video class="sample-video" controls preload="metadata">
          <source src="./videos/${item.id}.mp4" type="video/mp4">
        </video>
      </div>
      <div class="sample-video-note">
        <div class="sample-video-note-title">Case Metadata</div>
        <div class="sample-video-note-line"><span>QA ID</span><strong>${item.qaId}</strong></div>
        <div class="sample-video-note-line"><span>Target</span><strong>${item.target || "N/A"}</strong></div>
        <div class="sample-video-note-line"><span>Reasoning</span><strong>${item.order}</strong></div>
      </div>
    </div>
    <div class="sample-content">
      <div class="sample-header">
        <div class="sample-counter">${active} case ${safeIndex + 1} / ${visible.length}</div>
        <div class="sample-nav">
          <button type="button" aria-label="Previous sample" data-nav="prev">&#8592;</button>
          <button type="button" aria-label="Next sample" data-nav="next">&#8594;</button>
        </div>
      </div>
      <div class="sample-meta">
        <span class="sample-chip">${item.dimension}</span>
        <span class="sample-chip">${item.order}</span>
        ${item.target ? `<span class="sample-chip">${item.target}</span>` : ""}
        <span class="sample-chip qa">QA ${item.qaId}</span>
        <span class="sample-chip gold">Gold ${gold}</span>
      </div>
      <h3 class="sample-question">${item.question}</h3>
      <div class="option-block">
        <strong>Options</strong>
        <ol class="option-list">
          ${optionsHtml}
        </ol>
      </div>
      ${detailBlocks}
    </div>
  `;
  showcaseRoot.appendChild(card);

  card.querySelector('[data-nav="prev"]').addEventListener("click", () => {
    renderSamples(active, safeIndex - 1);
  });
  card.querySelector('[data-nav="next"]').addEventListener("click", () => {
    renderSamples(active, safeIndex + 1);
  });

  const oeToggle = card.querySelector(".oe-toggle");
  const oeBody = card.querySelector(".oe-body");
  if (oeToggle && oeBody) {
    oeToggle.addEventListener("click", () => {
      const expanded = oeToggle.getAttribute("aria-expanded") === "true";
      oeToggle.setAttribute("aria-expanded", String(!expanded));
      oeBody.hidden = expanded;
    });
  }
}

function getMetricValue(item, metric) {
  return metric === "oeAcc" ? item.oeAcc : item.oeScr;
}

function getOtherJudge(activeJudge) {
  return activeJudge === "gpt4o" ? "gemini" : "gpt4o";
}

function formatMetricValue(value, metric) {
  return metric === "oeAcc" ? `${value.toFixed(1)}%` : value.toFixed(2);
}

function formatDeltaValue(delta, metric) {
  const sign = delta >= 0 ? "+" : "";
  if (metric === "oeAcc") {
    return `${sign}${delta.toFixed(1)} pp`;
  }
  return `${sign}${delta.toFixed(2)}`;
}

function getDeltaClass(delta) {
  if (Math.abs(delta) < 0.05) {
    return "neutral";
  }
  return delta > 0 ? "positive" : "negative";
}

function createToggleButton(label, isActive, onClick) {
  const button = document.createElement("button");
  button.type = "button";
  button.className = `toggle-btn${isActive ? " active" : ""}`;
  button.textContent = label;
  button.setAttribute("aria-pressed", String(isActive));
  if (isActive) {
    button.setAttribute("aria-current", "true");
  }
  button.addEventListener("click", onClick);
  return button;
}

function renderControls() {
  if (!taskToggleRoot || !judgeToggleRoot) {
    return;
  }
  const judgeToggleRow = judgeToggleRoot.closest(".toolbar-row");

  const taskItems = [
    { key: "oe", label: "OE" },
    { key: "mc", label: "MC" }
  ];
  taskToggleRoot.innerHTML = "";
  taskItems.forEach((item) => {
    const button = createToggleButton(
      item.label === "MC" ? "MC (main benchmark)" : "OE (cross-judge)",
      item.key === crossJudgeState.activeTask,
      () => {
        renderResultsPanel(item.key, crossJudgeState.activeJudge, crossJudgeState.activeMetric);
      }
    );
    taskToggleRoot.appendChild(button);
  });

  judgeToggleRoot.innerHTML = "";

  if (crossJudgeState.activeTask === "oe") {
    judgeToggleRoot.classList.remove("is-hidden");
    if (judgeToggleRow) {
      judgeToggleRow.classList.remove("is-hidden");
    }

    const judgeItems = [
      { key: "gpt4o", label: "GPT-4o" },
      { key: "gemini", label: "Gemini-2.5-Flash" }
    ];
    judgeItems.forEach((item) => {
      const button = createToggleButton(item.label, item.key === crossJudgeState.activeJudge, () => {
        renderResultsPanel(crossJudgeState.activeTask, item.key, crossJudgeState.activeMetric);
      });
      judgeToggleRoot.appendChild(button);
    });
    return;
  }

  judgeToggleRoot.classList.add("is-hidden");
  if (judgeToggleRow) {
    judgeToggleRow.classList.add("is-hidden");
  }
}

function renderOeResults(activeJudge) {
  const otherJudge = getOtherJudge(activeJudge);
  const currentRows = [...judgeResults[activeJudge].rows].sort((a, b) => {
    const accDiff = b.oeAcc - a.oeAcc;
    if (Math.abs(accDiff) > 1e-6) {
      return accDiff;
    }
    const scrDiff = b.oeScr - a.oeScr;
    if (Math.abs(scrDiff) > 1e-6) {
      return scrDiff;
    }
    return a.rank - b.rank;
  });
  const otherMap = new Map(judgeResults[otherJudge].rows.map((item) => [item.model, item]));

  if (resultsCaptionRoot) {
    resultsCaptionRoot.textContent =
      `${judgeResults[activeJudge].label} 路 Sorted by OE_acc 路 per-metric deltas shown vs ${judgeResults[activeJudge].deltaLabel}`;
  }

  resultsRoot.innerHTML = "";
  const header = document.createElement("div");
  header.className = "results-head results-head-oe";
  header.innerHTML = `
    <div></div>
    <div class="results-head-cell">Model</div>
    <div class="results-head-cell results-head-cell-right">OE_acc</div>
    <div class="results-head-cell results-head-cell-right">OE_scr</div>
  `;
  resultsRoot.appendChild(header);

  currentRows.forEach((item, index) => {
    const row = document.createElement("div");
    row.className = `result-row result-row-oe${item.isOurs ? " ours" : ""}`;
    const other = otherMap.get(item.model);
    const accDelta = other ? item.oeAcc - other.oeAcc : 0;
    const scrDelta = other ? item.oeScr - other.oeScr : 0;

    row.innerHTML = `
      <div class="result-rank">#${index + 1}</div>
      <div class="result-model-wrap">
        <div class="result-model">${item.model}</div>
        ${item.isOurs ? '<span class="result-tag">Ours</span>' : ""}
      </div>
      <div class="result-metric-col acc" data-label="OE_acc">
        <div class="result-score">${formatMetricValue(item.oeAcc, "oeAcc")}</div>
        <div class="result-delta ${getDeltaClass(accDelta)}">${formatDeltaValue(accDelta, "oeAcc")}</div>
      </div>
      <div class="result-metric-col scr" data-label="OE_scr">
        <div class="result-score">${formatMetricValue(item.oeScr, "oeScr")}</div>
        <div class="result-delta ${getDeltaClass(scrDelta)}">${formatDeltaValue(scrDelta, "oeScr")}</div>
      </div>
    `;
    resultsRoot.appendChild(row);
  });
}

function renderMcResults() {
  const ranked = [...mcResults].sort((a, b) => b.mcAcc - a.mcAcc);
  if (resultsCaptionRoot) {
    resultsCaptionRoot.textContent = "Main benchmark (MC) 路 Sorted by MC_acc";
  }

  resultsRoot.innerHTML = "";
  const header = document.createElement("div");
  header.className = "results-head results-head-mc";
  header.innerHTML = `
    <div></div>
    <div class="results-head-cell">Model</div>
    <div class="results-head-cell results-head-cell-right">MC_acc</div>
    <div class="results-head-cell results-head-cell-right">Setting</div>
  `;
  resultsRoot.appendChild(header);

  ranked.forEach((item, index) => {
    const row = document.createElement("div");
    row.className = `result-row result-row-mc${item.isOurs ? " ours" : ""}`;
    row.innerHTML = `
      <div class="result-rank">#${index + 1}</div>
      <div class="result-model-wrap">
        <div class="result-model">${item.model}</div>
        ${item.isOurs ? '<span class="result-tag">Ours</span>' : ""}
      </div>
      <div class="result-score">${item.mcAcc.toFixed(1)}%</div>
      <div class="result-delta neutral">single-judge MC setting</div>
    `;
    resultsRoot.appendChild(row);
  });
}

function renderResultsPanel(
  activeTask = crossJudgeState.activeTask,
  activeJudge = crossJudgeState.activeJudge,
  activeMetric = crossJudgeState.activeMetric
) {
  if (!resultsRoot) {
    return;
  }
  crossJudgeState.activeTask = activeTask;
  crossJudgeState.activeJudge = activeJudge;
  crossJudgeState.activeMetric = activeMetric;

  renderControls();
  if (crossJudgeState.activeTask === "mc") {
    renderMcResults();
  } else {
    renderOeResults(crossJudgeState.activeJudge);
  }
}

renderResultsPanel("oe", "gpt4o", "oeAcc");
renderSamples("Belief", 0);