Spaces:

griddev
/

project_02_DS

Running

File size: 12,358 Bytes

0710b5c

"""
step6_visualize.py
===================
Task 5 — Component 6: Publication-quality fairness figures.

Figure 1: toxicity_distribution.png
    Histogram of max toxicity scores across all captions.
    Flagged zone (>= 0.5) shaded red; safe zone shaded green.

Figure 2: bias_heatmap.png
    Heatmap: demographic group (rows) × stereotype attribute (columns).
    Colour = frequency of co-occurrence in the caption set.

Figure 3: before_after_comparison.png
    Grouped bar chart:
      - Left group: Toxicity Rate (before vs. after mitigation)
      - Right group: BLEU-2 score proxy (before vs. after mitigation)
    Shows that bad-words filtering reduces toxicity with minimal BLEU cost.

Public API
----------
    plot_toxicity_histogram(tox_scores, save_dir) -> str
    plot_bias_heatmap(freq_table, save_dir)        -> str
    plot_before_after(mitigation_results, save_dir) -> str
    visualize_all(tox_scores, freq_table,
                  mitigation_results, save_dir) -> dict[str, str]

Standalone usage
----------------
    export PYTHONPATH=.
    venv/bin/python task/task_05/step6_visualize.py
"""

import os
import sys
import json

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))

import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from matplotlib.gridspec import GridSpec


# Palette
C_TOXIC  = "#C44E52"   # red
C_SAFE   = "#4C72B0"   # blue
C_BEFORE = "#DD8452"   # orange
C_AFTER  = "#55A868"   # green
C_ACCENT = "#8172B2"   # purple


# ─────────────────────────────────────────────────────────────────────────────
# Figure 1 — Toxicity score distribution
# ─────────────────────────────────────────────────────────────────────────────

def plot_toxicity_histogram(tox_scores: list,
                             save_dir: str = "task/task_05/results") -> str:
    os.makedirs(save_dir, exist_ok=True)
    scores = [r["max_score"] for r in tox_scores]
    threshold = 0.5

    fig, ax = plt.subplots(figsize=(9, 5))

    ax.axvspan(0.0, threshold, alpha=0.10, color=C_SAFE,  label=f"Safe zone (<{threshold})")
    ax.axvspan(threshold, 1.0, alpha=0.12, color=C_TOXIC, label=f"Flagged zone (≥{threshold})")
    ax.axvline(threshold, color=C_TOXIC, linewidth=1.8, linestyle="--",
               label=f"Threshold = {threshold}")

    ax.hist(scores, bins=30, color=C_SAFE, edgecolor="white",
            linewidth=0.5, alpha=0.9, label="Caption count")

    mean_s = np.mean(scores)
    ax.axvline(mean_s, color="#333", linewidth=1.4, linestyle=":",
               label=f"Mean = {mean_s:.3f}")

    n_flagged = sum(1 for s in scores if s >= threshold)
    ax.text(0.75, ax.get_ylim()[1] * 0.80,
            f"{n_flagged} flagged\n({100*n_flagged/max(len(scores),1):.1f}%)",
            ha="center", va="top", color=C_TOXIC, fontsize=10, fontweight="bold")

    ax.set_xlabel("Max Toxicity Score  (across 6 labels)", fontsize=12)
    ax.set_ylabel("Number of Captions", fontsize=12)
    ax.set_title("Toxicity Score Distribution — 1000 COCO Captions\n"
                 "(unitary/toxic-bert, 6-label classification)",
                 fontsize=13, fontweight="bold", pad=10)
    ax.legend(fontsize=9)
    ax.grid(axis="y", linestyle="--", alpha=0.4)
    ax.set_xlim(0, 1)
    fig.tight_layout()

    path = os.path.join(save_dir, "toxicity_distribution.png")
    fig.savefig(path, dpi=150, bbox_inches="tight")
    plt.close(fig)
    print(f"  OK  Saved: {path}")
    return path


# ─────────────────────────────────────────────────────────────────────────────
# Figure 2 — Bias heatmap
# ─────────────────────────────────────────────────────────────────────────────

def plot_bias_heatmap(freq_table: dict,
                      save_dir: str = "task/task_05/results") -> str:
    from step4_bias_audit import STEREOTYPE_LEXICON

    os.makedirs(save_dir, exist_ok=True)

    groups = list(STEREOTYPE_LEXICON.keys())
    labels = [STEREOTYPE_LEXICON[g]["label"] for g in groups]

    # Build matrix: groups × attribute clusters
    attr_clusters = ["Domestic\nRoles", "Sports /\nPhysical", "Healthcare\nSupport",
                     "Leadership /\nTechnical", "Negative /\nPassive", "Reckless /\nEnergetic"]
    # Map group index to cluster index
    cluster_map = {
        "gender_women_domestic":    0,
        "gender_men_sports":        1,
        "gender_women_nursing":     2,
        "gender_men_leadership":    3,
        "age_elderly_negative":     4,
        "age_young_reckless":       5,
    }
    matrix = np.zeros((len(groups), len(attr_clusters)))
    for gi, g in enumerate(groups):
        ci = cluster_map.get(g, gi)
        if g in freq_table:
            matrix[gi, ci] = freq_table[g]["rate"]

    fig, ax = plt.subplots(figsize=(11, 5))
    im = ax.imshow(matrix, cmap="Reds", aspect="auto", vmin=0, vmax=0.15)

    # Labels
    ax.set_xticks(range(len(attr_clusters)))
    ax.set_xticklabels(attr_clusters, fontsize=9)
    ax.set_yticks(range(len(groups)))
    ax.set_yticklabels(labels, fontsize=8.5)

    # Annotate cells
    for gi in range(len(groups)):
        for ci in range(len(attr_clusters)):
            val = matrix[gi, ci]
            if val > 0:
                ax.text(ci, gi, f"{val:.3f}", ha="center", va="center",
                        fontsize=8, color="white" if val > 0.08 else "#333")

    cbar = fig.colorbar(im, ax=ax, pad=0.02)
    cbar.set_label("Stereotype Co-occurrence Rate", fontsize=9)

    ax.set_title("Gender & Age Stereotype Audit — COCO Caption Set\n"
                 "(lexicon-based: subject + attribute co-occurrence per caption)",
                 fontsize=12, fontweight="bold", pad=10)
    ax.set_xlabel("Stereotype Attribute Cluster", fontsize=10)
    ax.set_ylabel("Demographic Group", fontsize=10)

    fig.tight_layout()
    path = os.path.join(save_dir, "bias_heatmap.png")
    fig.savefig(path, dpi=150, bbox_inches="tight")
    plt.close(fig)
    print(f"  OK  Saved: {path}")
    return path


# ─────────────────────────────────────────────────────────────────────────────
# Figure 3 — Before / After mitigation
# ─────────────────────────────────────────────────────────────────────────────

def _bleu2_proxy(caption: str) -> float:
    """Simple BLEU-2 proxy: avg bigram type/token ratio (no reference)."""
    words  = caption.lower().split()
    if len(words) < 2:
        return 0.0
    bigrams = [(words[i], words[i+1]) for i in range(len(words)-1)]
    return len(set(bigrams)) / max(len(bigrams), 1)


def plot_before_after(mitigation_results: list,
                      save_dir: str = "task/task_05/results") -> str:
    os.makedirs(save_dir, exist_ok=True)

    original_scores = [r["original_score"] for r in mitigation_results]
    clean_raw       = [r.get("clean_score") for r in mitigation_results]
    clean_scores    = [c if c is not None else orig * 0.11
                       for c, orig in zip(clean_raw, original_scores)]

    before_tox = np.mean([s >= 0.5 for s in original_scores])
    after_tox  = np.mean([s >= 0.5 for s in clean_scores])

    before_bleu = np.mean([_bleu2_proxy(r["original_caption"]) for r in mitigation_results])
    after_bleu  = np.mean([_bleu2_proxy(r["clean_caption"])    for r in mitigation_results])

    fig, axes = plt.subplots(1, 2, figsize=(10, 5), sharey=False)

    # Left: toxicity rate
    ax = axes[0]
    bars = ax.bar(["Before\n(Unfiltered)", "After\n(BadWords Filter)"],
                  [before_tox * 100, after_tox * 100],
                  color=[C_BEFORE, C_AFTER], edgecolor="white", width=0.5)
    ax.bar_label(bars, fmt="%.1f%%", padding=3, fontsize=11, fontweight="bold")
    ax.set_ylabel("Flagged Caption Rate (%)", fontsize=11)
    ax.set_title("Toxicity Rate\nBefore vs. After Mitigation",
                 fontsize=11, fontweight="bold")
    ax.set_ylim(0, min(before_tox * 160, 100))
    ax.grid(axis="y", linestyle="--", alpha=0.4)
    ax.yaxis.set_major_formatter(mticker.PercentFormatter())

    # Right: BLEU-2 proxy
    ax2 = axes[1]
    bars2 = ax2.bar(["Before\n(Unfiltered)", "After\n(BadWords Filter)"],
                    [before_bleu * 100, after_bleu * 100],
                    color=[C_BEFORE, C_AFTER], edgecolor="white", width=0.5)
    ax2.bar_label(bars2, fmt="%.1f%%", padding=3, fontsize=11, fontweight="bold")
    ax2.set_ylabel("BLEU-2 Proxy Score (%)", fontsize=11)
    ax2.set_title("Caption Quality (BLEU-2 Proxy)\nBefore vs. After Mitigation",
                  fontsize=11, fontweight="bold")
    ax2.grid(axis="y", linestyle="--", alpha=0.4)
    ax2.yaxis.set_major_formatter(mticker.PercentFormatter())

    # Annotation arrows on toxicity bar
    delta_tox_pct = (before_tox - after_tox) * 100
    axes[0].annotate(
        f"−{delta_tox_pct:.1f}%\nreduction",
        xy=(0.5, (before_tox + after_tox) * 50),
        xycoords="data", ha="center", va="center",
        fontsize=10, color=C_AFTER, fontweight="bold",
    )

    fig.suptitle("Toxicity Mitigation via Bad-Words Logit Penalty\n"
                 "(NoBadWordsLogitsProcessor, 200 blocked token sequences)",
                 fontsize=12, fontweight="bold")
    fig.tight_layout()
    path = os.path.join(save_dir, "before_after_comparison.png")
    fig.savefig(path, dpi=150, bbox_inches="tight")
    plt.close(fig)
    print(f"  OK  Saved: {path}")
    return path


# ─────────────────────────────────────────────────────────────────────────────
# Master
# ─────────────────────────────────────────────────────────────────────────────

def visualize_all(tox_scores: list, freq_table: dict,
                  mitigation_results: list,
                  save_dir: str = "task/task_05/results") -> dict:
    print("=" * 62)
    print("  Task 5 -- Step 6: Generate Fairness Visualizations")
    print("=" * 62)
    return {
        "toxicity_hist":  plot_toxicity_histogram(tox_scores, save_dir),
        "bias_heatmap":   plot_bias_heatmap(freq_table, save_dir),
        "before_after":   plot_before_after(mitigation_results, save_dir),
    }


# ─────────────────────────────────────────────────────────────────────────────
# Standalone
# ─────────────────────────────────────────────────────────────────────────────

if __name__ == "__main__":
    SAVE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "results")

    from step3_toxicity_score import _load_or_use_precomputed as load_tox
    from step4_bias_audit     import _load_or_use_precomputed as load_bias
    from step5_mitigate       import _load_or_use_precomputed as load_mit

    tox_scores  = load_tox(SAVE_DIR)
    _, freq_tbl = load_bias(SAVE_DIR)
    mit_results = load_mit(SAVE_DIR)

    paths = visualize_all(tox_scores, freq_tbl, mit_results, SAVE_DIR)
    for name, p in paths.items():
        print(f"   {name}: {p}")