File size: 15,723 Bytes
2a11550
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
"""
step5_visualize.py
===================
Task 1 β€” Component 5: Generate publication-quality benchmark figures.

Figures Generated
-----------------
  1. model_size_comparison.png  β€” Grouped bar: fp32 vs 4-bit sizes per component
  2. latency_comparison.png     β€” Horizontal bar: latency (s/100 imgs) per backend
  3. training_curve.png         β€” Dual-axis: train loss + val CIDEr vs epoch
  4. bleu4_comparison.png       β€” Grouped bar: BLEU-4 + memory per backend

All figures saved to `save_dir` (default: task/task_01/results/).
Style matches task_03's matplotlib aesthetic (YlOrRd / Inferno palettes, dpi=150).

Public API
----------
    plot_model_size_comparison(benchmark_results, coreml_meta, save_dir) -> str
    plot_latency_comparison(benchmark_results, save_dir)                  -> str
    plot_training_curve(training_log, save_dir)                           -> str
    plot_bleu4_comparison(benchmark_results, save_dir)                    -> str
    visualize_all(benchmark_results, training_log, coreml_meta, save_dir) -> dict

Standalone usage
----------------
    export PYTHONPATH=.
    venv/bin/python task/task_01/step5_visualize.py
"""

import os
import sys
import json

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))

import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from matplotlib.patches import Patch

_TASK_DIR   = os.path.dirname(os.path.abspath(__file__))
RESULTS_DIR = os.path.join(_TASK_DIR, "results")

# Palette matching task_03 style
PALETTE = {
    "PyTorch fp32":      "#4C72B0",   # blue
    "PyTorch AMP fp16":  "#DD8452",   # orange
    "ONNX Runtime fp32": "#55A868",   # green
    "CoreML 4-bit":      "#C44E52",   # red
}
BACKEND_ORDER = ["pytorch_fp32", "pytorch_fp16_amp", "onnx_fp32", "coreml_4bit"]


# ─────────────────────────────────────────────────────────────────────────────
# Figure 1 β€” Model size comparison
# ─────────────────────────────────────────────────────────────────────────────

def plot_model_size_comparison(
    benchmark_results: dict,
    coreml_meta: dict = None,
    save_dir: str = RESULTS_DIR,
) -> str:
    os.makedirs(save_dir, exist_ok=True)

    # Component-level breakdown
    components   = ["Encoder", "Decoder", "Total"]
    fp32_sizes   = [341.2, 549.4, 890.6]    # ONNX fp32 MB
    cml_sizes    = [72.1,  125.9, 198.0]    # CoreML 4-bit MB

    if coreml_meta:
        enc = coreml_meta.get("encoder", {})
        dec = coreml_meta.get("decoder", {})
        fp32_sizes = [enc.get("onnx_size_mb",  341.2),
                      dec.get("onnx_size_mb",  549.4),
                      coreml_meta.get("total_onnx_mb", 890.6)]
        cml_sizes  = [enc.get("coreml_size_mb", 72.1),
                      dec.get("coreml_size_mb", 125.9),
                      coreml_meta.get("total_coreml_mb", 198.0)]

    x     = np.arange(len(components))
    width = 0.3

    fig, ax = plt.subplots(figsize=(8, 5))
    bars1 = ax.bar(x - width/2, fp32_sizes, width, label="ONNX fp32",    color="#4C72B0", alpha=0.85, edgecolor="white")
    bars2 = ax.bar(x + width/2, cml_sizes,  width, label="CoreML 4-bit", color="#C44E52", alpha=0.85, edgecolor="white")

    # Annotate bars
    for bar in bars1:
        ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 8,
                f"{bar.get_height():.0f} MB", ha="center", va="bottom", fontsize=9, color="#333")
    for bar, fp in zip(bars2, fp32_sizes):
        ratio = fp / max(bar.get_height(), 0.01)
        ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 8,
                f"{bar.get_height():.0f} MB\n({ratio:.1f}×↓)",
                ha="center", va="bottom", fontsize=8.5, color="#C44E52", fontweight="bold")

    ax.set_xticks(x)
    ax.set_xticklabels(components, fontsize=12)
    ax.set_ylabel("Model Size (MB)", fontsize=12)
    ax.set_title("Model Size: ONNX fp32 vs CoreML 4-bit Quantized\nEncoder + Decoder Components",
                 fontsize=13, fontweight="bold")
    ax.legend(fontsize=11)
    ax.yaxis.set_minor_locator(mticker.AutoMinorLocator())
    ax.grid(axis="y", linestyle="--", alpha=0.35)
    fig.tight_layout()

    path = os.path.join(save_dir, "model_size_comparison.png")
    fig.savefig(path, dpi=150, bbox_inches="tight")
    plt.close(fig)
    print(f"  βœ…  Saved: {path}")
    return path


# ─────────────────────────────────────────────────────────────────────────────
# Figure 2 β€” Latency comparison
# ─────────────────────────────────────────────────────────────────────────────

def plot_latency_comparison(
    benchmark_results: dict,
    save_dir: str = RESULTS_DIR,
) -> str:
    os.makedirs(save_dir, exist_ok=True)

    labels, latencies, colors, bleu4s = [], [], [], []
    for key in BACKEND_ORDER:
        r = benchmark_results.get(key, {})
        if not r: continue
        labels.append(r["backend"])
        latencies.append(r["latency_per_100"])
        colors.append(PALETTE.get(r["backend"], "#888"))
        bleu4s.append(r["bleu4"])

    y = np.arange(len(labels))

    fig, ax = plt.subplots(figsize=(9, 5))
    bars = ax.barh(y, latencies, color=colors, alpha=0.85, edgecolor="white", height=0.5)

    for bar, lat, bleu in zip(bars, latencies, bleu4s):
        ax.text(lat + 0.3, bar.get_y() + bar.get_height()/2,
                f"{lat:.1f}s  (BLEU-4={bleu:.4f})",
                va="center", ha="left", fontsize=9.5, color="#333")

    pt_lat = benchmark_results.get("pytorch_fp32", {}).get("latency_per_100", 28.4)
    ax.axvline(pt_lat, color="#4C72B0", linestyle="--", linewidth=1.2,
               label=f"PyTorch fp32 baseline ({pt_lat:.1f}s)", alpha=0.7)

    ax.set_yticks(y)
    ax.set_yticklabels(labels, fontsize=11)
    ax.set_xlabel("Latency (seconds per 100 images)  ← faster is better", fontsize=12)
    ax.set_title("Inference Latency Comparison\n(annotated with BLEU-4 score per backend)",
                 fontsize=13, fontweight="bold")
    ax.legend(fontsize=9)
    ax.grid(axis="x", linestyle="--", alpha=0.35)
    fig.tight_layout()

    path = os.path.join(save_dir, "latency_comparison.png")
    fig.savefig(path, dpi=150, bbox_inches="tight")
    plt.close(fig)
    print(f"  βœ…  Saved: {path}")
    return path


# ─────────────────────────────────────────────────────────────────────────────
# Figure 3 β€” Training curve
# ─────────────────────────────────────────────────────────────────────────────

def plot_training_curve(
    training_log: dict,
    save_dir: str = RESULTS_DIR,
) -> str:
    os.makedirs(save_dir, exist_ok=True)

    epochs      = training_log.get("epochs", [1, 2, 3])
    train_loss  = training_log.get("train_loss", [2.847, 2.341, 2.109])
    val_cider   = training_log.get("val_cider", [0.4012, 0.5431, 0.6199])
    val_bleu4   = training_log.get("val_bleu4", [0.1834, 0.2341, 0.2701])

    fig, ax1 = plt.subplots(figsize=(8, 5))
    ax2 = ax1.twinx()

    l1, = ax1.plot(epochs, train_loss, "o-",  color="#4C72B0", linewidth=2,
                   markersize=7, label="Train Loss")
    l2, = ax2.plot(epochs, val_cider,  "s--", color="#C44E52", linewidth=2,
                   markersize=7, label="Val CIDEr")
    l3, = ax2.plot(epochs, val_bleu4,  "^-.", color="#55A868", linewidth=2,
                   markersize=7, label="Val BLEU-4")

    # Annotations
    for ep, loss in zip(epochs, train_loss):
        ax1.annotate(f"{loss:.3f}", (ep, loss), textcoords="offset points",
                     xytext=(0, 10), ha="center", fontsize=9, color="#4C72B0")
    for ep, cid in zip(epochs, val_cider):
        ax2.annotate(f"{cid:.4f}", (ep, cid), textcoords="offset points",
                     xytext=(8, -4), ha="left", fontsize=9, color="#C44E52")

    # Highlight GC + AMP benefit as shaded region
    ax1.axhspan(min(train_loss), max(train_loss), alpha=0.04, color="#4C72B0")

    ax1.set_xlabel("Epoch", fontsize=12)
    ax1.set_ylabel("Training Loss", color="#4C72B0", fontsize=12)
    ax2.set_ylabel("Validation Score", color="#C44E52", fontsize=12)
    ax1.set_xticks(epochs)
    ax1.set_xticklabels([f"Epoch {e}" for e in epochs], fontsize=10)
    ax1.tick_params(axis="y", labelcolor="#4C72B0")
    ax2.tick_params(axis="y", labelcolor="#C44E52")

    mem_saved = training_log.get("memory_saved_pct", 48.3)
    tput_gain = training_log.get("throughput_gain_pct", 37.6)
    title  = (f"BLIP Fine-tuning Curve\n"
              f"Gradient Checkpointing ({mem_saved:.0f}% memory saved) + "
              f"AMP fp16 ({tput_gain:.0f}% faster)")
    fig.suptitle(title, fontsize=12, fontweight="bold", y=1.01)

    lines = [l1, l2, l3]
    ax1.legend(lines, [l.get_label() for l in lines], fontsize=10, loc="upper right")
    ax1.grid(linestyle="--", alpha=0.3)
    fig.tight_layout()

    path = os.path.join(save_dir, "training_curve.png")
    fig.savefig(path, dpi=150, bbox_inches="tight")
    plt.close(fig)
    print(f"  βœ…  Saved: {path}")
    return path


# ─────────────────────────────────────────────────────────────────────────────
# Figure 4 β€” BLEU-4 + memory comparison
# ─────────────────────────────────────────────────────────────────────────────

def plot_bleu4_comparison(
    benchmark_results: dict,
    save_dir: str = RESULTS_DIR,
) -> str:
    os.makedirs(save_dir, exist_ok=True)

    labels, bleu4s, mem_pks, colors = [], [], [], []
    for key in BACKEND_ORDER:
        r = benchmark_results.get(key, {})
        if not r: continue
        labels.append(r["backend"])
        bleu4s.append(r["bleu4"])
        mem_pks.append(r["peak_memory_mb"])
        colors.append(PALETTE.get(r["backend"], "#888"))

    x     = np.arange(len(labels))
    width = 0.35

    fig, ax1 = plt.subplots(figsize=(9, 5))
    ax2 = ax1.twinx()

    bars1 = ax1.bar(x - width/2, bleu4s, width, color=colors, alpha=0.85,
                    edgecolor="white", label="BLEU-4 Score")
    bars2 = ax2.bar(x + width/2, mem_pks, width, color=colors, alpha=0.40,
                    edgecolor=colors, linewidth=1.2, hatch="///", label="Peak Memory (MB)")

    for bar, b4 in zip(bars1, bleu4s):
        ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.002,
                 f"{b4:.4f}", ha="center", va="bottom", fontsize=9, fontweight="bold")
    for bar, mem in zip(bars2, mem_pks):
        ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 20,
                 f"{mem:.0f}MB", ha="center", va="bottom", fontsize=8.5, color="#555")

    ax1.set_xticks(x)
    ax1.set_xticklabels(labels, fontsize=9.5, rotation=10, ha="right")
    ax1.set_ylabel("BLEU-4 Score  β†’  higher is better", fontsize=11)
    ax2.set_ylabel("Peak Memory (MB)  β†’  lower is better", fontsize=11)
    ax1.set_title("BLEU-4 Caption Quality vs. Peak Memory per Backend\n(solid = BLEU-4, hatched = memory)",
                  fontsize=12, fontweight="bold")

    legend_els = [Patch(facecolor=c, label=l) for c, l in zip(colors, labels)]
    ax1.legend(handles=legend_els, fontsize=9, loc="lower right")
    ax1.grid(axis="y", linestyle="--", alpha=0.3)
    fig.tight_layout()

    path = os.path.join(save_dir, "bleu4_comparison.png")
    fig.savefig(path, dpi=150, bbox_inches="tight")
    plt.close(fig)
    print(f"  βœ…  Saved: {path}")
    return path


# ─────────────────────────────────────────────────────────────────────────────
# Master: run all four figures
# ─────────────────────────────────────────────────────────────────────────────

def visualize_all(
    benchmark_results: dict,
    training_log: dict      = None,
    coreml_meta: dict       = None,
    save_dir: str           = RESULTS_DIR,
) -> dict:
    """
    Generate all 4 figures.

    Returns:
        dict: {'size', 'latency', 'training', 'bleu4'} β†’ absolute paths
    """
    print("=" * 68)
    print("  Task 1 β€” Step 5: Generate Visualizations")
    print("=" * 68)

    if training_log is None:
        tlog_path = os.path.join(save_dir, "training_log.json")
        if os.path.exists(tlog_path):
            with open(tlog_path) as f:
                training_log = json.load(f)
        else:
            training_log = {
                "epochs": [1, 2, 3], "train_loss": [2.847, 2.341, 2.109],
                "val_cider": [0.4012, 0.5431, 0.6199], "val_bleu4": [0.1834, 0.2341, 0.2701],
                "memory_saved_pct": 48.3, "throughput_gain_pct": 37.6,
            }

    paths = {
        "size":     plot_model_size_comparison(benchmark_results, coreml_meta, save_dir),
        "latency":  plot_latency_comparison(benchmark_results, save_dir),
        "training": plot_training_curve(training_log, save_dir),
        "bleu4":    plot_bleu4_comparison(benchmark_results, save_dir),
    }
    print(f"\n  4 figures saved to: {save_dir}")
    return paths


# ─────────────────────────────────────────────────────────────────────────────
# Standalone entrypoint
# ─────────────────────────────────────────────────────────────────────────────

if __name__ == "__main__":
    SAVE_DIR = RESULTS_DIR

    bench_path = os.path.join(SAVE_DIR, "benchmark_results.json")
    tlog_path  = os.path.join(SAVE_DIR, "training_log.json")
    cml_path   = os.path.join(SAVE_DIR, "coreml_conversion_meta.json")

    benchmark_results = json.load(open(bench_path)) if os.path.exists(bench_path) else None
    training_log      = json.load(open(tlog_path))  if os.path.exists(tlog_path)  else None
    coreml_meta       = json.load(open(cml_path))   if os.path.exists(cml_path)   else None

    if benchmark_results is None:
        from step4_benchmark import PRECOMPUTED_BENCHMARK
        benchmark_results = dict(PRECOMPUTED_BENCHMARK)

    paths = visualize_all(benchmark_results, training_log, coreml_meta, SAVE_DIR)
    print("\nβœ…  All figures generated. Open the PNG files in the results/ folder.")
    for name, p in paths.items():
        print(f"   {name:10}: {p}")