dreamlessx commited on
Commit
a62ad8a
·
verified ·
1 Parent(s): 392e60f

Upload landmarkdiff/benchmark.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. landmarkdiff/benchmark.py +208 -0
landmarkdiff/benchmark.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Inference benchmarking for deployment sizing.
2
+
3
+ Measures throughput, latency, and memory usage for ControlNet inference
4
+ under various configurations (resolution, batch size, denoising steps).
5
+
6
+ Usage:
7
+ from landmarkdiff.benchmark import InferenceBenchmark
8
+
9
+ bench = InferenceBenchmark()
10
+ bench.add_result("gpu_a6000", latency_ms=142.3, throughput_fps=7.0, vram_gb=4.2)
11
+ bench.add_result("gpu_a6000", latency_ms=138.1, throughput_fps=7.2, vram_gb=4.2)
12
+ print(bench.summary())
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import json
18
+ import time
19
+ from dataclasses import dataclass, field
20
+ from pathlib import Path
21
+ from typing import Any
22
+
23
+
24
+ @dataclass
25
+ class BenchmarkResult:
26
+ """A single benchmark measurement."""
27
+
28
+ config_name: str
29
+ latency_ms: float
30
+ throughput_fps: float = 0.0
31
+ vram_gb: float = 0.0
32
+ batch_size: int = 1
33
+ resolution: int = 512
34
+ num_inference_steps: int = 20
35
+ device: str = ""
36
+ metadata: dict[str, Any] = field(default_factory=dict)
37
+
38
+
39
+ class InferenceBenchmark:
40
+ """Collect and analyze inference benchmarks.
41
+
42
+ Args:
43
+ model_name: Name of the model being benchmarked.
44
+ """
45
+
46
+ def __init__(self, model_name: str = "LandmarkDiff-ControlNet") -> None:
47
+ self.model_name = model_name
48
+ self.results: list[BenchmarkResult] = []
49
+
50
+ def add_result(
51
+ self,
52
+ config_name: str,
53
+ latency_ms: float,
54
+ throughput_fps: float = 0.0,
55
+ vram_gb: float = 0.0,
56
+ batch_size: int = 1,
57
+ resolution: int = 512,
58
+ num_inference_steps: int = 20,
59
+ device: str = "",
60
+ **metadata: Any,
61
+ ) -> None:
62
+ """Add a benchmark result."""
63
+ if throughput_fps == 0.0 and latency_ms > 0:
64
+ throughput_fps = 1000.0 / latency_ms * batch_size
65
+
66
+ self.results.append(BenchmarkResult(
67
+ config_name=config_name,
68
+ latency_ms=latency_ms,
69
+ throughput_fps=throughput_fps,
70
+ vram_gb=vram_gb,
71
+ batch_size=batch_size,
72
+ resolution=resolution,
73
+ num_inference_steps=num_inference_steps,
74
+ device=device,
75
+ metadata=metadata,
76
+ ))
77
+
78
+ def mean_latency(self, config_name: str | None = None) -> float:
79
+ """Mean latency in ms, optionally filtered by config."""
80
+ results = self._filter(config_name)
81
+ if not results:
82
+ return float("nan")
83
+ return sum(r.latency_ms for r in results) / len(results)
84
+
85
+ def p99_latency(self, config_name: str | None = None) -> float:
86
+ """P99 latency in ms."""
87
+ results = self._filter(config_name)
88
+ if not results:
89
+ return float("nan")
90
+ sorted_latencies = sorted(r.latency_ms for r in results)
91
+ idx = max(0, int(len(sorted_latencies) * 0.99) - 1)
92
+ return sorted_latencies[idx]
93
+
94
+ def mean_throughput(self, config_name: str | None = None) -> float:
95
+ """Mean throughput in FPS."""
96
+ results = self._filter(config_name)
97
+ if not results:
98
+ return float("nan")
99
+ return sum(r.throughput_fps for r in results) / len(results)
100
+
101
+ def max_vram(self, config_name: str | None = None) -> float:
102
+ """Maximum VRAM usage in GB."""
103
+ results = self._filter(config_name)
104
+ if not results:
105
+ return 0.0
106
+ return max(r.vram_gb for r in results)
107
+
108
+ def _filter(self, config_name: str | None) -> list[BenchmarkResult]:
109
+ if config_name is None:
110
+ return self.results
111
+ return [r for r in self.results if r.config_name == config_name]
112
+
113
+ @property
114
+ def config_names(self) -> list[str]:
115
+ """Unique config names in order."""
116
+ seen: dict[str, None] = {}
117
+ for r in self.results:
118
+ seen.setdefault(r.config_name, None)
119
+ return list(seen.keys())
120
+
121
+ def summary(self) -> str:
122
+ """Generate text summary table."""
123
+ configs = self.config_names
124
+ if not configs:
125
+ return "No benchmark results."
126
+
127
+ header = f"{'Config':>20s} | {'Mean(ms)':>10s} | {'P99(ms)':>10s} | {'FPS':>8s} | {'VRAM(GB)':>8s} | {'N':>4s}"
128
+ lines = [
129
+ f"Inference Benchmark: {self.model_name}",
130
+ header,
131
+ "-" * len(header),
132
+ ]
133
+
134
+ for cfg in configs:
135
+ results = self._filter(cfg)
136
+ lines.append(
137
+ f"{cfg:>20s} | "
138
+ f"{self.mean_latency(cfg):>10.1f} | "
139
+ f"{self.p99_latency(cfg):>10.1f} | "
140
+ f"{self.mean_throughput(cfg):>8.2f} | "
141
+ f"{self.max_vram(cfg):>8.1f} | "
142
+ f"{len(results):>4d}"
143
+ )
144
+
145
+ return "\n".join(lines)
146
+
147
+ def to_json(self, path: str | Path | None = None) -> str:
148
+ """Export results as JSON."""
149
+ data = {
150
+ "model_name": self.model_name,
151
+ "results": [
152
+ {
153
+ "config_name": r.config_name,
154
+ "latency_ms": r.latency_ms,
155
+ "throughput_fps": round(r.throughput_fps, 2),
156
+ "vram_gb": r.vram_gb,
157
+ "batch_size": r.batch_size,
158
+ "resolution": r.resolution,
159
+ "num_inference_steps": r.num_inference_steps,
160
+ "device": r.device,
161
+ }
162
+ for r in self.results
163
+ ],
164
+ "summary": {
165
+ cfg: {
166
+ "mean_latency_ms": round(self.mean_latency(cfg), 1),
167
+ "p99_latency_ms": round(self.p99_latency(cfg), 1),
168
+ "mean_fps": round(self.mean_throughput(cfg), 2),
169
+ "max_vram_gb": round(self.max_vram(cfg), 1),
170
+ "n_samples": len(self._filter(cfg)),
171
+ }
172
+ for cfg in self.config_names
173
+ },
174
+ }
175
+ j = json.dumps(data, indent=2)
176
+ if path:
177
+ Path(path).parent.mkdir(parents=True, exist_ok=True)
178
+ Path(path).write_text(j)
179
+ return j
180
+
181
+
182
+ class Timer:
183
+ """Simple context manager for timing code blocks.
184
+
185
+ Usage:
186
+ with Timer() as t:
187
+ run_inference()
188
+ print(f"Took {t.elapsed_ms:.1f} ms")
189
+ """
190
+
191
+ def __init__(self) -> None:
192
+ self.start_time: float = 0.0
193
+ self.end_time: float = 0.0
194
+
195
+ @property
196
+ def elapsed_ms(self) -> float:
197
+ return (self.end_time - self.start_time) * 1000
198
+
199
+ @property
200
+ def elapsed_s(self) -> float:
201
+ return self.end_time - self.start_time
202
+
203
+ def __enter__(self) -> Timer:
204
+ self.start_time = time.perf_counter()
205
+ return self
206
+
207
+ def __exit__(self, *args: Any) -> None:
208
+ self.end_time = time.perf_counter()