Spaces:
Running
Running
File size: 6,729 Bytes
a62ad8a 893c358 a62ad8a 893c358 a62ad8a 893c358 a62ad8a 893c358 a62ad8a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 | """Inference benchmarking for deployment sizing.
Measures throughput, latency, and memory usage for ControlNet inference
under various configurations (resolution, batch size, denoising steps).
Usage:
from landmarkdiff.benchmark import InferenceBenchmark
bench = InferenceBenchmark()
bench.add_result("gpu_a6000", latency_ms=142.3, throughput_fps=7.0, vram_gb=4.2)
bench.add_result("gpu_a6000", latency_ms=138.1, throughput_fps=7.2, vram_gb=4.2)
print(bench.summary())
"""
from __future__ import annotations
import json
import time
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
@dataclass
class BenchmarkResult:
"""A single benchmark measurement."""
config_name: str
latency_ms: float
throughput_fps: float = 0.0
vram_gb: float = 0.0
batch_size: int = 1
resolution: int = 512
num_inference_steps: int = 20
device: str = ""
metadata: dict[str, Any] = field(default_factory=dict)
class InferenceBenchmark:
"""Collect and analyze inference benchmarks.
Args:
model_name: Name of the model being benchmarked.
"""
def __init__(self, model_name: str = "LandmarkDiff-ControlNet") -> None:
self.model_name = model_name
self.results: list[BenchmarkResult] = []
def add_result(
self,
config_name: str,
latency_ms: float,
throughput_fps: float = 0.0,
vram_gb: float = 0.0,
batch_size: int = 1,
resolution: int = 512,
num_inference_steps: int = 20,
device: str = "",
**metadata: Any,
) -> None:
"""Add a benchmark result."""
if throughput_fps == 0.0 and latency_ms > 0:
throughput_fps = 1000.0 / latency_ms * batch_size
self.results.append(BenchmarkResult(
config_name=config_name,
latency_ms=latency_ms,
throughput_fps=throughput_fps,
vram_gb=vram_gb,
batch_size=batch_size,
resolution=resolution,
num_inference_steps=num_inference_steps,
device=device,
metadata=metadata,
))
def mean_latency(self, config_name: str | None = None) -> float:
"""Mean latency in ms, optionally filtered by config."""
results = self._filter(config_name)
if not results:
return float("nan")
return sum(r.latency_ms for r in results) / len(results)
def p99_latency(self, config_name: str | None = None) -> float:
"""P99 latency in ms."""
results = self._filter(config_name)
if not results:
return float("nan")
import math
sorted_latencies = sorted(r.latency_ms for r in results)
idx = min(len(sorted_latencies) - 1, math.ceil(len(sorted_latencies) * 0.99) - 1)
return sorted_latencies[idx]
def mean_throughput(self, config_name: str | None = None) -> float:
"""Mean throughput in FPS."""
results = self._filter(config_name)
if not results:
return float("nan")
return sum(r.throughput_fps for r in results) / len(results)
def max_vram(self, config_name: str | None = None) -> float:
"""Maximum VRAM usage in GB."""
results = self._filter(config_name)
if not results:
return 0.0
return max(r.vram_gb for r in results)
def _filter(self, config_name: str | None) -> list[BenchmarkResult]:
if config_name is None:
return self.results
return [r for r in self.results if r.config_name == config_name]
@property
def config_names(self) -> list[str]:
"""Unique config names in order."""
seen: dict[str, None] = {}
for r in self.results:
seen.setdefault(r.config_name, None)
return list(seen.keys())
def summary(self) -> str:
"""Generate text summary table."""
configs = self.config_names
if not configs:
return "No benchmark results."
header = f"{'Config':>20s} | {'Mean(ms)':>10s} | {'P99(ms)':>10s} | {'FPS':>8s} | {'VRAM(GB)':>8s} | {'N':>4s}"
lines = [
f"Inference Benchmark: {self.model_name}",
header,
"-" * len(header),
]
for cfg in configs:
results = self._filter(cfg)
lines.append(
f"{cfg:>20s} | "
f"{self.mean_latency(cfg):>10.1f} | "
f"{self.p99_latency(cfg):>10.1f} | "
f"{self.mean_throughput(cfg):>8.2f} | "
f"{self.max_vram(cfg):>8.1f} | "
f"{len(results):>4d}"
)
return "\n".join(lines)
def to_json(self, path: str | Path | None = None) -> str:
"""Export results as JSON."""
data = {
"model_name": self.model_name,
"results": [
{
"config_name": r.config_name,
"latency_ms": r.latency_ms,
"throughput_fps": round(r.throughput_fps, 2),
"vram_gb": r.vram_gb,
"batch_size": r.batch_size,
"resolution": r.resolution,
"num_inference_steps": r.num_inference_steps,
"device": r.device,
}
for r in self.results
],
"summary": {
cfg: {
"mean_latency_ms": round(self.mean_latency(cfg), 1),
"p99_latency_ms": round(self.p99_latency(cfg), 1),
"mean_fps": round(self.mean_throughput(cfg), 2),
"max_vram_gb": round(self.max_vram(cfg), 1),
"n_samples": len(self._filter(cfg)),
}
for cfg in self.config_names
},
}
j = json.dumps(data, indent=2)
if path:
Path(path).parent.mkdir(parents=True, exist_ok=True)
Path(path).write_text(j)
return j
class Timer:
"""Simple context manager for timing code blocks.
Usage:
with Timer() as t:
run_inference()
print(f"Took {t.elapsed_ms:.1f} ms")
"""
def __init__(self) -> None:
self.start_time: float = 0.0
self.end_time: float = 0.0
@property
def elapsed_ms(self) -> float:
return (self.end_time - self.start_time) * 1000
@property
def elapsed_s(self) -> float:
return self.end_time - self.start_time
def __enter__(self) -> Timer:
self.start_time = time.perf_counter()
return self
def __exit__(self, *args: Any) -> None:
self.end_time = time.perf_counter()
|