dreamlessx commited on
Commit
5b56d44
·
verified ·
1 Parent(s): d847b3c

Upload landmarkdiff/postprocess.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. landmarkdiff/postprocess.py +195 -50
landmarkdiff/postprocess.py CHANGED
@@ -1,5 +1,15 @@
1
- """Post-processing: CodeFormer/GFPGAN face restore, Real-ESRGAN bg,
2
- Laplacian blend, sharpening, histogram matching, ArcFace identity gate.
 
 
 
 
 
 
 
 
 
 
3
  """
4
 
5
  from __future__ import annotations
@@ -7,6 +17,12 @@ from __future__ import annotations
7
  import cv2
8
  import numpy as np
9
 
 
 
 
 
 
 
10
 
11
  def laplacian_pyramid_blend(
12
  source: np.ndarray,
@@ -14,7 +30,22 @@ def laplacian_pyramid_blend(
14
  mask: np.ndarray,
15
  levels: int = 6,
16
  ) -> np.ndarray:
17
- """Laplacian pyramid blend - kills the 'pasted on' look from alpha blending."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  # Ensure same size
19
  h, w = target.shape[:2]
20
  source = cv2.resize(source, (w, h)) if source.shape[:2] != (h, w) else source
@@ -107,7 +138,20 @@ def frequency_aware_sharpen(
107
  strength: float = 0.3,
108
  radius: int = 3,
109
  ) -> np.ndarray:
110
- """Unsharp mask on LAB luminance only - sharpens skin texture without color fringe."""
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB).astype(np.float32)
112
  l_channel = lab[:, :, 0]
113
 
@@ -124,21 +168,36 @@ def restore_face_gfpgan(
124
  image: np.ndarray,
125
  upscale: int = 1,
126
  ) -> np.ndarray:
127
- """GFPGAN face restore. Returns original if not installed."""
 
 
 
 
 
 
 
 
 
 
 
 
128
  try:
129
  from gfpgan import GFPGANer
130
  except ImportError:
131
  return image
132
 
133
  try:
134
- restorer = GFPGANer(
135
- model_path="https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth",
136
- upscale=upscale,
137
- arch="clean",
138
- channel_multiplier=2,
139
- bg_upsampler=None,
140
- )
141
- _, _, restored = restorer.enhance(
 
 
 
142
  image,
143
  has_aligned=False,
144
  only_center_face=True,
@@ -157,7 +216,21 @@ def restore_face_codeformer(
157
  fidelity: float = 0.7,
158
  upscale: int = 1,
159
  ) -> np.ndarray:
160
- """CodeFormer face restore. fidelity: 0=quality, 1=identity. Returns original if not installed."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  try:
162
  from codeformer.basicsr.utils import img2tensor, tensor2img
163
  from codeformer.facelib.utils.face_restoration_helper import FaceRestoreHelper
@@ -168,24 +241,28 @@ def restore_face_codeformer(
168
  return image
169
 
170
  try:
 
171
  from codeformer.inference_codeformer import set_realesrgan as _unused # noqa: F401
172
  from codeformer.basicsr.archs.codeformer_arch import CodeFormer as CodeFormerArch
173
 
174
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
175
 
176
- model = CodeFormerArch(
177
- dim_embd=512, codebook_size=1024, n_head=8, n_layers=9,
178
- connect_list=["32", "64", "128", "256"],
179
- ).to(device)
180
-
181
- ckpt_path = load_file_from_url(
182
- url="https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer.pth",
183
- model_dir="weights/CodeFormer",
184
- progress=True,
185
- )
186
- checkpoint = torch.load(ckpt_path, map_location=device, weights_only=False)
187
- model.load_state_dict(checkpoint["params_ema"])
188
- model.eval()
 
 
 
189
 
190
  face_helper = FaceRestoreHelper(
191
  upscale,
@@ -225,7 +302,19 @@ def enhance_background_realesrgan(
225
  mask: np.ndarray,
226
  outscale: int = 2,
227
  ) -> np.ndarray:
228
- """Real-ESRGAN on background only (outside mask). Returns original if not installed."""
 
 
 
 
 
 
 
 
 
 
 
 
229
  try:
230
  from realesrgan import RealESRGANer
231
  from basicsr.archs.rrdbnet_arch import RRDBNet
@@ -234,17 +323,19 @@ def enhance_background_realesrgan(
234
  return image
235
 
236
  try:
237
- model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
238
- upsampler = RealESRGANer(
239
- scale=4,
240
- model_path="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth",
241
- model=model,
242
- tile=400,
243
- tile_pad=10,
244
- pre_pad=0,
245
- half=torch.cuda.is_available(),
246
- )
247
- enhanced, _ = upsampler.enhance(image, outscale=outscale)
 
 
248
 
249
  # Downscale back to original size
250
  h, w = image.shape[:2]
@@ -276,22 +367,39 @@ def verify_identity_arcface(
276
  result: np.ndarray,
277
  threshold: float = 0.6,
278
  ) -> dict:
279
- """ArcFace cosine similarity check. Flags if output drifted from input identity."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  try:
281
  from insightface.app import FaceAnalysis
282
  except ImportError:
283
  return {
284
  "similarity": -1.0,
285
  "passed": True,
286
- "message": "InsightFace not installed - identity check skipped",
287
  }
288
 
289
  try:
290
- app = FaceAnalysis(
291
- name="buffalo_l",
292
- providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
293
- )
294
- app.prepare(ctx_id=0 if _has_cuda() else -1, det_size=(320, 320))
 
 
 
295
 
296
  orig_faces = app.get(original)
297
  result_faces = app.get(result)
@@ -300,7 +408,7 @@ def verify_identity_arcface(
300
  return {
301
  "similarity": -1.0,
302
  "passed": True,
303
- "message": "Could not detect face in one/both images - check skipped",
304
  }
305
 
306
  orig_emb = orig_faces[0].embedding
@@ -339,7 +447,19 @@ def histogram_match_skin(
339
  reference: np.ndarray,
340
  mask: np.ndarray,
341
  ) -> np.ndarray:
342
- """CDF-based histogram matching in LAB space. Better than mean/std for skin."""
 
 
 
 
 
 
 
 
 
 
 
 
343
  mask_bool = mask > 0.3 if mask.dtype == np.float32 else mask > 76
344
 
345
  if not np.any(mask_bool):
@@ -391,7 +511,31 @@ def full_postprocess(
391
  verify_identity: bool = True,
392
  identity_threshold: float = 0.6,
393
  ) -> dict:
394
- """Full pipeline: restore -> bg enhance -> histogram match -> sharpen -> blend -> identity check."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
395
  result = generated.copy()
396
  restore_used = "none"
397
 
@@ -403,8 +547,9 @@ def full_postprocess(
403
  restore_used = "codeformer"
404
  else:
405
  # CodeFormer unavailable, fall back to GFPGAN
 
406
  result = restore_face_gfpgan(result)
407
- restore_used = "gfpgan" if result is not generated else "none"
408
  elif restore_mode == "gfpgan":
409
  restored = restore_face_gfpgan(result)
410
  if restored is not result:
 
1
+ """Post-processing pipeline for photorealistic face output.
2
+
3
+ Neural net components:
4
+ - CodeFormer (primary): face restoration with controllable fidelity-quality tradeoff
5
+ - GFPGAN (fallback): face restoration for diffusion artifact repair
6
+ - Real-ESRGAN: neural super-resolution for background regions
7
+ - ArcFace: identity verification to flag drift between input/output
8
+
9
+ Classical components:
10
+ - Multi-band Laplacian pyramid blending (replaces simple alpha blend)
11
+ - Frequency-aware sharpening (recovers fine skin texture)
12
+ - Color histogram matching (ensures skin tone consistency)
13
  """
14
 
15
  from __future__ import annotations
 
17
  import cv2
18
  import numpy as np
19
 
20
+ # Singleton model caches — load once, reuse across calls
21
+ _CODEFORMER_MODEL = None
22
+ _CODEFORMER_HELPER = None
23
+ _REALESRGAN_UPSAMPLER = None
24
+ _ARCFACE_APP = None
25
+
26
 
27
  def laplacian_pyramid_blend(
28
  source: np.ndarray,
 
30
  mask: np.ndarray,
31
  levels: int = 6,
32
  ) -> np.ndarray:
33
+ """Multi-band Laplacian pyramid blending for seamless compositing.
34
+
35
+ Unlike simple alpha blending which creates visible halos at mask edges,
36
+ Laplacian blending operates at multiple frequency bands. Low frequencies
37
+ (overall color/lighting) blend smoothly, high frequencies (skin texture,
38
+ pores, hair) transition sharply. This eliminates the "pasted on" look.
39
+
40
+ Args:
41
+ source: BGR image to blend IN (the surgical result).
42
+ target: BGR image to blend INTO (the original photo).
43
+ mask: Float32 mask [0-1] (1 = source region).
44
+ levels: Number of pyramid levels (6 works well for 512x512).
45
+
46
+ Returns:
47
+ Seamlessly composited BGR image.
48
+ """
49
  # Ensure same size
50
  h, w = target.shape[:2]
51
  source = cv2.resize(source, (w, h)) if source.shape[:2] != (h, w) else source
 
138
  strength: float = 0.3,
139
  radius: int = 3,
140
  ) -> np.ndarray:
141
+ """Sharpen high-frequency detail (skin texture, pores) without amplifying noise.
142
+
143
+ Uses unsharp masking in LAB space (luminance only) to avoid
144
+ color fringing. Preserves the smooth look of diffusion output
145
+ while recovering fine texture detail.
146
+
147
+ Args:
148
+ image: BGR image.
149
+ strength: Sharpening strength (0.2-0.5 typical for faces).
150
+ radius: Gaussian blur radius for unsharp mask.
151
+
152
+ Returns:
153
+ Sharpened BGR image.
154
+ """
155
  lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB).astype(np.float32)
156
  l_channel = lab[:, :, 0]
157
 
 
168
  image: np.ndarray,
169
  upscale: int = 1,
170
  ) -> np.ndarray:
171
+ """Restore face quality using GFPGAN.
172
+
173
+ Fixes common diffusion artifacts: blurry eyes, distorted features,
174
+ inconsistent skin texture. The restored face is then blended back
175
+ into the original for a natural look.
176
+
177
+ Args:
178
+ image: BGR face image (any size).
179
+ upscale: Upscale factor (1 = same size, 2 = 2x).
180
+
181
+ Returns:
182
+ Restored BGR image, or original if GFPGAN unavailable.
183
+ """
184
  try:
185
  from gfpgan import GFPGANer
186
  except ImportError:
187
  return image
188
 
189
  try:
190
+ global _CODEFORMER_HELPER
191
+ # Singleton: avoid reloading ~300MB GFPGAN model on every call
192
+ if _CODEFORMER_HELPER is None:
193
+ _CODEFORMER_HELPER = GFPGANer(
194
+ model_path="https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth",
195
+ upscale=upscale,
196
+ arch="clean",
197
+ channel_multiplier=2,
198
+ bg_upsampler=None,
199
+ )
200
+ _, _, restored = _CODEFORMER_HELPER.enhance(
201
  image,
202
  has_aligned=False,
203
  only_center_face=True,
 
216
  fidelity: float = 0.7,
217
  upscale: int = 1,
218
  ) -> np.ndarray:
219
+ """Restore face quality using CodeFormer (neural net).
220
+
221
+ CodeFormer uses a Transformer-based codebook lookup to restore degraded
222
+ faces. The fidelity parameter controls the quality-fidelity tradeoff:
223
+ lower values produce higher quality but may alter identity slightly,
224
+ higher values preserve identity but fix fewer artifacts.
225
+
226
+ Args:
227
+ image: BGR face image.
228
+ fidelity: Quality-fidelity balance (0.0=quality, 1.0=fidelity). 0.7 default.
229
+ upscale: Upscale factor (1 = same size).
230
+
231
+ Returns:
232
+ Restored BGR image, or original if CodeFormer unavailable.
233
+ """
234
  try:
235
  from codeformer.basicsr.utils import img2tensor, tensor2img
236
  from codeformer.facelib.utils.face_restoration_helper import FaceRestoreHelper
 
241
  return image
242
 
243
  try:
244
+ global _CODEFORMER_MODEL, _CODEFORMER_HELPER
245
  from codeformer.inference_codeformer import set_realesrgan as _unused # noqa: F401
246
  from codeformer.basicsr.archs.codeformer_arch import CodeFormer as CodeFormerArch
247
 
248
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
249
 
250
+ if _CODEFORMER_MODEL is None:
251
+ model = CodeFormerArch(
252
+ dim_embd=512, codebook_size=1024, n_head=8, n_layers=9,
253
+ connect_list=["32", "64", "128", "256"],
254
+ ).to(device)
255
+
256
+ ckpt_path = load_file_from_url(
257
+ url="https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer.pth",
258
+ model_dir="weights/CodeFormer",
259
+ progress=True,
260
+ )
261
+ checkpoint = torch.load(ckpt_path, map_location=device, weights_only=True)
262
+ model.load_state_dict(checkpoint["params_ema"])
263
+ model.eval()
264
+ _CODEFORMER_MODEL = model
265
+ model = _CODEFORMER_MODEL
266
 
267
  face_helper = FaceRestoreHelper(
268
  upscale,
 
302
  mask: np.ndarray,
303
  outscale: int = 2,
304
  ) -> np.ndarray:
305
+ """Enhance non-face background regions using Real-ESRGAN neural upscaler.
306
+
307
+ Only applies to regions outside the surgical mask to improve overall
308
+ image quality without interfering with the face restoration pipeline.
309
+
310
+ Args:
311
+ image: BGR image.
312
+ mask: Float32 mask [0-1] where 1 = face region (skip these pixels).
313
+ outscale: Upscale factor (2 = 2x resolution, then downsample back).
314
+
315
+ Returns:
316
+ Enhanced BGR image at original resolution.
317
+ """
318
  try:
319
  from realesrgan import RealESRGANer
320
  from basicsr.archs.rrdbnet_arch import RRDBNet
 
323
  return image
324
 
325
  try:
326
+ global _REALESRGAN_UPSAMPLER
327
+ if _REALESRGAN_UPSAMPLER is None:
328
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
329
+ _REALESRGAN_UPSAMPLER = RealESRGANer(
330
+ scale=4,
331
+ model_path="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth",
332
+ model=model,
333
+ tile=400,
334
+ tile_pad=10,
335
+ pre_pad=0,
336
+ half=torch.cuda.is_available(),
337
+ )
338
+ enhanced, _ = _REALESRGAN_UPSAMPLER.enhance(image, outscale=outscale)
339
 
340
  # Downscale back to original size
341
  h, w = image.shape[:2]
 
367
  result: np.ndarray,
368
  threshold: float = 0.6,
369
  ) -> dict:
370
+ """Verify output preserves input identity using ArcFace neural net.
371
+
372
+ Computes cosine similarity between ArcFace embeddings of the original
373
+ and result images. If similarity drops below threshold, flags identity
374
+ drift — meaning the postprocessing or diffusion altered the person's
375
+ appearance too much.
376
+
377
+ Args:
378
+ original: BGR original face image.
379
+ result: BGR post-processed output image.
380
+ threshold: Minimum cosine similarity to pass (0.6 = same person).
381
+
382
+ Returns:
383
+ Dict with 'similarity' (float), 'passed' (bool), 'message' (str).
384
+ """
385
  try:
386
  from insightface.app import FaceAnalysis
387
  except ImportError:
388
  return {
389
  "similarity": -1.0,
390
  "passed": True,
391
+ "message": "InsightFace not installed identity check skipped",
392
  }
393
 
394
  try:
395
+ global _ARCFACE_APP
396
+ if _ARCFACE_APP is None:
397
+ _ARCFACE_APP = FaceAnalysis(
398
+ name="buffalo_l",
399
+ providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
400
+ )
401
+ _ARCFACE_APP.prepare(ctx_id=0 if _has_cuda() else -1, det_size=(320, 320))
402
+ app = _ARCFACE_APP
403
 
404
  orig_faces = app.get(original)
405
  result_faces = app.get(result)
 
408
  return {
409
  "similarity": -1.0,
410
  "passed": True,
411
+ "message": "Could not detect face in one/both images check skipped",
412
  }
413
 
414
  orig_emb = orig_faces[0].embedding
 
447
  reference: np.ndarray,
448
  mask: np.ndarray,
449
  ) -> np.ndarray:
450
+ """Match skin color histogram of source to reference within masked region.
451
+
452
+ More robust than simple mean/std matching — preserves the full
453
+ distribution of skin tones including highlights and shadows.
454
+
455
+ Args:
456
+ source: BGR image whose skin tone to adjust.
457
+ reference: BGR image with target skin tone.
458
+ mask: Float32 mask [0-1] of skin region.
459
+
460
+ Returns:
461
+ Color-matched BGR image.
462
+ """
463
  mask_bool = mask > 0.3 if mask.dtype == np.float32 else mask > 76
464
 
465
  if not np.any(mask_bool):
 
511
  verify_identity: bool = True,
512
  identity_threshold: float = 0.6,
513
  ) -> dict:
514
+ """Full neural net + classical post-processing pipeline for maximum photorealism.
515
+
516
+ Pipeline:
517
+ 1. Face restoration: CodeFormer (primary) or GFPGAN (fallback) neural nets
518
+ 2. Background enhancement: Real-ESRGAN neural upscaler (non-face regions)
519
+ 3. Skin tone histogram matching to original (classical)
520
+ 4. Frequency-aware sharpening for texture recovery (classical)
521
+ 5. Laplacian pyramid blending for seamless compositing (classical)
522
+ 6. ArcFace identity verification (neural net quality gate)
523
+
524
+ Args:
525
+ generated: BGR generated/warped face image.
526
+ original: BGR original face image.
527
+ mask: Float32 surgical mask [0-1].
528
+ restore_mode: 'codeformer', 'gfpgan', or 'none'.
529
+ codeformer_fidelity: CodeFormer fidelity weight (0=quality, 1=fidelity).
530
+ use_realesrgan: Apply Real-ESRGAN to background regions.
531
+ use_laplacian_blend: Use Laplacian blend vs simple alpha blend.
532
+ sharpen_strength: Texture sharpening amount (0 = none).
533
+ verify_identity: Run ArcFace identity check at the end.
534
+ identity_threshold: Min cosine similarity to pass identity check.
535
+
536
+ Returns:
537
+ Dict with 'image' (composited BGR), 'identity_check' (dict), 'restore_used' (str).
538
+ """
539
  result = generated.copy()
540
  restore_used = "none"
541
 
 
547
  restore_used = "codeformer"
548
  else:
549
  # CodeFormer unavailable, fall back to GFPGAN
550
+ pre_gfpgan = result
551
  result = restore_face_gfpgan(result)
552
+ restore_used = "gfpgan" if result is not pre_gfpgan else "none"
553
  elif restore_mode == "gfpgan":
554
  restored = restore_face_gfpgan(result)
555
  if restored is not result: