ColorMNet

Sleeping

App Files Files Community

ColorMNet / app.py

yyang181

Update app.py

c17b50c verified 6 months ago

raw

history blame contribute delete

22.3 kB

	# app.py — Gradio front-end that calls test.py IN-PROCESS (ZeroGPU-safe)
	# Folder layout per run (under TEMP_ROOT):
	# input_video/<video_stem>/00000.png ...
	# ref/<video_stem>/ref.png
	# output/<video_stem>/*.png
	# Final mp4: TEMP_ROOT/<video_stem>.mp4

	import os
	import sys
	import shutil
	import urllib.request
	from os import path
	import io
	from contextlib import redirect_stdout, redirect_stderr
	import subprocess
	import tempfile
	import importlib

	import gradio as gr
	import spaces
	from PIL import Image
	import cv2
	import torch # used for cuda sync & empty_cache

	# ----------------- BASIC INFO -----------------
	CHECKPOINT_URL = "https://github.com/yyang181/colormnet/releases/download/v0.1/DINOv2FeatureV6_LocalAtten_s2_154000.pth"
	CHECKPOINT_LOCAL = "DINOv2FeatureV6_LocalAtten_s2_154000.pth"

	TITLE = "ColorMNet — 视频着色 / Video Colorization (ZeroGPU, CUDA-only)"
	DESC = """
	中文
	上传黑白视频与参考图像，点击「开始着色 / Start Coloring」。
	此版本在 app.py 中调度 ZeroGPU，并在同一进程调用 `test.py` 的入口函数。
	临时工作目录结构：
	- 抽帧：`_colormnet_tmp/input_video/<视频名>/00000.png ...`
	- 参考：`_colormnet_tmp/ref/<视频名>/ref.png`
	- 输出：`_colormnet_tmp/output/<视频名>/*.png`
	- 合成视频：`_colormnet_tmp/<视频名>.mp4`
	English
	Upload a B&W video and a reference image, then click “Start Coloring”.
	This app runs ZeroGPU scheduling in `app.py` and calls `test.py` in-process.
	Temp workspace layout:
	- Frames: `_colormnet_tmp/input_video/<stem>/00000.png ...`
	- Reference: `_colormnet_tmp/ref/<stem>/ref.png`
	- Output frames: `_colormnet_tmp/output/<stem>/*.png`
	- Final video: `_colormnet_tmp/<stem>.mp4`
	"""

	PAPER = """
	### 论文 / Paper
	ECCV 2024 — ColorMNet: A Memory-based Deep Spatial-Temporal Feature Propagation Network for Video Colorization
	如果你喜欢这个项目，欢迎到 GitHub 点个 ⭐ Star：
	GitHub: https://github.com/yyang181/colormnet
	BibTeX 引用 / BibTeX Citation
	```bibtex
	@inproceedings{yang2024colormnet,
	author = {Yixin Yang and Jiangxin Dong and Jinhui Tang and Jinshan Pan},
	title = {ColorMNet: A Memory-based Deep Spatial-Temporal Feature Propagation Network for Video Colorization},
	booktitle = ECCV,
	year = {2024}
	}
	"""
	BADGES_HTML = """
	<div style="display:flex;gap:12px;align-items:center;flex-wrap:wrap;">
	<a href="https://github.com/yyang181/colormnet" target="_blank" title="Open GitHub Repo">
	<img alt="GitHub Repo"
	src="https://img.shields.io/badge/GitHub-colormnet-181717?logo=github" />
	</a>
	<a href="https://github.com/yyang181/colormnet/stargazers" target="_blank" title="Star on GitHub">
	<img alt="GitHub Repo stars"
	src="https://img.shields.io/github/stars/yyang181/colormnet?style=social" />
	</a>
	</div>
	"""

	# ----------------- REFERENCE FRAME GUIDE (NO CROPPING) -----------------
	REF_GUIDE_MD = r"""
	## 参考帧制作指南 / Reference Frame Guide
	目的 / Goal
	为模型提供一张与你的视频关键帧在姿态、光照、构图尽量接近的彩色参考图，用来指导整段视频的着色风格与主体颜色。
	---
	### 中文步骤
	1. 挑帧：从视频里挑一帧（或相近角度的照片），尽量与要着色的镜头在姿态 / 光照 / 场景一致。
	2. 上色方式：若你只有黑白参考图、但需要彩色参考，可用通义千问·图像编辑（Qwen-Image）：
	- 打开：<https://chat.qwen.ai/> → 选择图像编辑
	- 上传你的黑白参考图
	- 在提示词里输入：
	「帮我给这张照片上色，只修改颜色，不要修改内容」
	- 可按需多次编辑（如补充「衣服为复古蓝、肤色自然、不要锐化」）
	3. 保存格式：PNG/JPG 均可；推荐分辨率 ≥ 480px（短边）。
	4. 文件放置：本应用会自动放置为 `ref/<视频名>/ref.png`。
	注意事项（Do/Don’t）
	- ✅ 主体清晰、颜色干净，不要过曝或强滤镜。
	- ✅ 关键区域（衣服、皮肤、头发、天空等）颜色与目标风格一致。
	- ❌ 不要更改几何结构（如人脸形状/姿态），只修改颜色。
	- ❌ 避免文字、贴纸、重度风格化滤镜。
	---
	### English Steps
	1. Pick a frame (or a similar photo) that matches the target shot in pose / lighting / composition.
	2. Colorizing if your reference is B&W — use Qwen-Image (Image Editing):
	- Open <https://chat.qwen.ai/> → Image Editing
	- Upload your B&W reference
	- Prompt: “Help me colorize this photo; only change colors, do not alter the content.”
	- Iterate if needed (e.g., “vintage blue jacket, natural skin tone; avoid sharpening”).
	3. Format: PNG/JPG; recommended short side ≥ 480px.
	4. File placement: The app will place it as `ref/<video_stem>/ref.png`.
	Do / Don’t
	- ✅ Clean subject and palette; avoid overexposure/harsh filters.
	- ✅ Ensure key regions (clothes/skin/hair/sky) match the intended colors.
	- ❌ Do not change geometry/structure — colors only.
	- ❌ Avoid text/stickers/heavy stylization filters.
	"""

	# ----------------- TEMP WORKDIR -----------------
	TEMP_ROOT = path.join(os.getcwd(), "_colormnet_tmp")
	INPUT_DIR = "input_video"
	REF_DIR = "ref"
	OUTPUT_DIR = "output"

	def reset_temp_root():
	"""每次运行前清空并重建临时工作目录。"""
	if path.isdir(TEMP_ROOT):
	shutil.rmtree(TEMP_ROOT, ignore_errors=True)
	os.makedirs(TEMP_ROOT, exist_ok=True)
	for sub in (INPUT_DIR, REF_DIR, OUTPUT_DIR):
	os.makedirs(path.join(TEMP_ROOT, sub), exist_ok=True)

	def ensure_dir(d: str):
	os.makedirs(d, exist_ok=True)

	# ----------------- CHECKPOINT (可选) -----------------
	def ensure_checkpoint():
	"""若 test.py 会在当前目录加载权重，可提前预下载，避免首次拉取超时。"""
	try:
	if not path.exists(CHECKPOINT_LOCAL):
	print(f"[INFO] Downloading checkpoint from: {CHECKPOINT_URL}")
	urllib.request.urlretrieve(CHECKPOINT_URL, CHECKPOINT_LOCAL)
	print("[INFO] Checkpoint downloaded:", CHECKPOINT_LOCAL)
	except Exception as e:
	print(f"[WARN] 预下载权重失败（首次推理会再试）: {e}")

	# ----------------- VIDEO UTILS -----------------
	def video_to_frames_dir(video_path: str, frames_dir: str):
	"""
	抽帧到 frames_dir/00000.png ...
	返回: (w, h, fps, n_frames)
	"""
	ensure_dir(frames_dir)
	cap = cv2.VideoCapture(video_path)
	assert cap.isOpened(), f"Cannot open video: {video_path}"
	fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
	idx = 0
	w = h = None
	while True:
	ret, frame = cap.read()
	if not ret:
	break
	if frame is None:
	continue
	h, w = frame.shape[:2]
	out_path = path.join(frames_dir, f"{idx:05d}.png")
	ok = cv2.imwrite(out_path, frame)
	if not ok:
	raise RuntimeError(f"写入抽帧失败 / Failed to write: {out_path}")
	idx += 1
	cap.release()
	if idx == 0:
	raise RuntimeError("视频无可读帧 / Input video has no readable frames.")
	return w, h, fps, idx

	def encode_frames_to_video(frames_dir: str, out_path: str, fps: float):
	frames = sorted([f for f in os.listdir(frames_dir) if f.lower().endswith(".png")])
	if not frames:
	raise RuntimeError(f"No frames found in {frames_dir}")
	first = cv2.imread(path.join(frames_dir, frames[0]))
	if first is None:
	raise RuntimeError(f"Failed to read first frame {frames[0]}")
	h, w = first.shape[:2]
	fourcc = cv2.VideoWriter_fourcc(*"mp4v")
	vw = cv2.VideoWriter(out_path, fourcc, fps, (w, h))
	for f in frames:
	img = cv2.imread(path.join(frames_dir, f))
	if img is None:
	continue
	vw.write(img)
	vw.release()

	# ----------------- CLI MAPPING -----------------
	CONFIG_TO_CLI = {
	"FirstFrameIsNotExemplar": "--FirstFrameIsNotExemplar", # bool
	"dataset": "--dataset",
	"split": "--split",
	"save_all": "--save_all", # bool
	"benchmark": "--benchmark", # bool
	"disable_long_term": "--disable_long_term", # bool
	"max_mid_term_frames": "--max_mid_term_frames",
	"min_mid_term_frames": "--min_mid_term_frames",
	"max_long_term_elements": "--max_long_term_elements",
	"num_prototypes": "--num_prototypes",
	"top_k": "--top_k",
	"mem_every": "--mem_every",
	"deep_update_every": "--deep_update_every",
	"save_scores": "--save_scores", # bool
	"flip": "--flip", # bool
	"size": "--size",
	"reverse": "--reverse", # bool
	}

	def build_args_list_for_test(d16_batch_path: str,
	out_path: str,
	ref_root: str,
	cfg: dict):
	"""
	构造传给 test.run_cli(args_list) 的参数列表。
	- 必传：--d16_batch_path <input_video_root>、--ref_path <ref_root>、--output <output_root>
	"""
	args = [
	"--d16_batch_path", d16_batch_path,
	"--ref_path", ref_root,
	"--output", out_path,
	]
	for k, v in cfg.items():
	if k not in CONFIG_TO_CLI:
	continue
	flag = CONFIG_TO_CLI[k]
	if isinstance(v, bool):
	if v:
	args.append(flag) # store_true
	elif v is None:
	continue
	else:
	args.extend([flag, str(v)])
	return args

	# ===== 新增：ZeroGPU 后按需安装 Pytorch-Correlation-extension =====
	_CORR_OK_STAMP = path.join(os.getcwd(), ".corr_ext_installed")

	def ensure_correlation_extension_installed():
	"""
	在 ZeroGPU 分配后调用（位于 @spaces.GPU 函数体内）：
	- 若已能 import 或存在本地 stamp，则直接返回
	- 否则执行：
	git clone https://github.com/ClementPinard/Pytorch-Correlation-extension.git
	cd Pytorch-Correlation-extension && python setup.py install && cd ..
	"""
	# 1) 尝试直接导入
	try:
	import spatial_correlation_sampler # noqa: F401
	return
	except Exception:
	pass
	# 2) 之前成功过（打过 stamp）
	if path.exists(_CORR_OK_STAMP):
	return

	repo_url = "https://github.com/ClementPinard/Pytorch-Correlation-extension.git"
	workdir = tempfile.mkdtemp(prefix="corr_ext_")
	repo_dir = path.join(workdir, "Pytorch-Correlation-extension")
	try:
	print("[INFO] Installing Pytorch-Correlation-extension ...")
	# clone
	subprocess.run(
	["git", "clone", "--depth", "1", repo_url],
	cwd=workdir, check=True
	)
	# build & install
	subprocess.run(
	[sys.executable, "setup.py", "install"],
	cwd=repo_dir, check=True
	)
	# 验证
	importlib.invalidate_caches()
	import spatial_correlation_sampler # noqa: F401
	# 打 stamp，避免下次重复
	with open(_CORR_OK_STAMP, "w") as f:
	f.write("ok")
	print("[INFO] Pytorch-Correlation-extension installed successfully.")
	except subprocess.CalledProcessError as e:
	print(f"[WARN] Failed to build/install correlation extension: {e}")
	print("You can still proceed if your pipeline doesn't use it.")
	except Exception as e:
	print(f"[WARN] Correlation extension install check failed: {e}")
	finally:
	# 清理临时目录
	try:
	shutil.rmtree(workdir, ignore_errors=True)
	except Exception:
	pass

	# ----------------- GRADIO HANDLER -----------------
	@spaces.GPU(duration=600) # 确保 CUDA 初始化在此函数体内
	def gradio_infer(
	debug_shapes,
	bw_video, ref_image,
	first_not_exemplar, dataset, split, save_all, benchmark,
	disable_long_term, max_mid, min_mid, max_long,
	num_proto, top_k, mem_every, deep_update,
	save_scores, flip, size, reverse
	):
	# <<< ZeroGPU 分配后：按需安装 Pytorch-Correlation-extension >>>
	ensure_correlation_extension_installed()
	# --------------------------------------------------------------

	# 1) 基本校验与临时目录
	if bw_video is None:
	return None, "请上传黑白视频 / Please upload a B&W video."
	if ref_image is None:
	return None, "请上传参考图像 / Please upload a reference image."
	reset_temp_root()

	# 2) 解析视频源路径 & 目标 <video_stem>
	if isinstance(bw_video, dict) and "name" in bw_video:
	src_video_path = bw_video["name"]
	elif isinstance(bw_video, str):
	src_video_path = bw_video
	else:
	return None, "无法读取视频输入 / Failed to read video input."

	video_stem = path.splitext(path.basename(src_video_path))[0]

	# 3) 生成临时路径
	input_root = path.join(TEMP_ROOT, INPUT_DIR) # _colormnet_tmp/input_video
	ref_root = path.join(TEMP_ROOT, REF_DIR) # _colormnet_tmp/ref
	output_root= path.join(TEMP_ROOT, OUTPUT_DIR) # _colormnet_tmp/output
	input_frames_dir = path.join(input_root, video_stem)
	ref_dir = path.join(ref_root, video_stem)
	out_frames_dir = path.join(output_root, video_stem)
	for d in (input_root, ref_root, output_root, input_frames_dir, ref_dir, out_frames_dir):
	ensure_dir(d)

	# 4) 抽帧 -> input_video/<stem>/
	try:
	_w, _h, fps, _n = video_to_frames_dir(src_video_path, input_frames_dir)
	except Exception as e:
	return None, f"抽帧失败 / Frame extraction failed:\n{e}"

	# 5) 参考帧 -> ref/<stem>/ref.png
	ref_png_path = path.join(ref_dir, "ref.png")
	if isinstance(ref_image, Image.Image):
	try:
	ref_image.save(ref_png_path)
	except Exception as e:
	return None, f"保存参考图像失败 / Failed to save reference image:\n{e}"
	elif isinstance(ref_image, str):
	try:
	shutil.copy2(ref_image, ref_png_path)
	except Exception as e:
	return None, f"复制参考图像失败 / Failed to copy reference image:\n{e}"
	else:
	return None, "无法读取参考图像输入 / Failed to read reference image."

	# 6) 收集 UI 配置
	default_config = {
	"FirstFrameIsNotExemplar": True,
	"dataset": "D16_batch",
	"split": "val",
	"save_all": True,
	"benchmark": False,
	"disable_long_term": False,
	"max_mid_term_frames": 10,
	"min_mid_term_frames": 5,
	"max_long_term_elements": 10000,
	"num_prototypes": 128,
	"top_k": 30,
	"mem_every": 5,
	"deep_update_every": -1,
	"save_scores": False,
	"flip": False,
	"size": -1,
	"reverse": False,
	}
	user_config = {
	"FirstFrameIsNotExemplar": bool(first_not_exemplar) if first_not_exemplar is not None else default_config["FirstFrameIsNotExemplar"],
	"dataset": str(dataset) if dataset else default_config["dataset"],
	"split": str(split) if split else default_config["split"],
	"save_all": bool(save_all) if save_all is not None else default_config["save_all"],
	"benchmark": bool(benchmark) if benchmark is not None else default_config["benchmark"],
	"disable_long_term": bool(disable_long_term) if disable_long_term is not None else default_config["disable_long_term"],
	"max_mid_term_frames": int(max_mid) if max_mid is not None else default_config["max_mid_term_frames"],
	"min_mid_term_frames": int(min_mid) if min_mid is not None else default_config["min_mid_term_frames"],
	"max_long_term_elements": int(max_long) if max_long is not None else default_config["max_long_term_elements"],
	"num_prototypes": int(num_proto) if num_proto is not None else default_config["num_prototypes"],
	"top_k": int(top_k) if top_k is not None else default_config["top_k"],
	"mem_every": int(mem_every) if mem_every is not None else default_config["mem_every"],
	"deep_update_every": int(deep_update) if deep_update is not None else default_config["deep_update_every"],
	"save_scores": bool(save_scores) if save_scores is not None else default_config["save_scores"],
	"flip": bool(flip) if flip is not None else default_config["flip"],
	"size": int(size) if size is not None else default_config["size"],
	"reverse": bool(reverse) if reverse is not None else default_config["reverse"],
	}

	# 7) 预下载权重（可选）
	ensure_checkpoint()

	# 8) 同进程调用 test.py
	try:
	import test # 确保 test.py 同目录且提供 run_cli(args_list)
	except Exception as e:
	return None, f"导入 test.py 失败 / Failed to import test.py：\n{e}"

	args_list = build_args_list_for_test(
	d16_batch_path=input_root, # 指向 input_video 根
	out_path=output_root, # 指向 output 根（test.py 写 output/<stem>/*.png）
	ref_root=ref_root, # 指向 ref 根（test.py 读 ref/<stem>/ref.png）
	cfg=user_config
	)

	buf = io.StringIO()
	try:
	with redirect_stdout(buf), redirect_stderr(buf):
	entry = getattr(test, "run_cli", None)
	if entry is None or not callable(entry):
	raise RuntimeError("test.py 未提供可调用的 run_cli(args_list) 接口。")
	entry(args_list)
	log = f"Args: {' '.join(args_list)}\n\n{buf.getvalue()}"
	except Exception as e:
	log = f"Args: {' '.join(args_list)}\n\n{buf.getvalue()}\n\nERROR: {e}"
	return None, log

	# 在合成 mp4 之前：清空 CUDA（防止显存占用）
	try:
	torch.cuda.synchronize()
	except Exception:
	pass
	try:
	torch.cuda.empty_cache()
	except Exception:
	pass

	# 9) 合成 mp4：从 output/<stem>/ 帧合成 -> TEMP_ROOT/<stem>.mp4
	out_frames = path.join(output_root, video_stem)
	if not path.isdir(out_frames):
	return None, f"未找到输出帧目录 / Output frame dir not found：{out_frames}\n\n{log}"
	final_mp4 = path.abspath(path.join(TEMP_ROOT, f"{video_stem}.mp4"))
	try:
	encode_frames_to_video(out_frames, final_mp4, fps=fps)
	except Exception as e:
	return None, f"合成视频失败 / Video mux failed：\n{e}\n\n{log}"

	return final_mp4, f"完成 ✅ / Done ✅\n\n{log}"

	# ----------------- UI -----------------
	with gr.Blocks() as demo:
	gr.Markdown(f"# {TITLE}")
	gr.HTML(BADGES_HTML)
	gr.Markdown(PAPER)
	gr.Markdown(DESC)

	# 参考帧制作指南（中英双语，无裁剪步骤）
	with gr.Accordion("参考帧制作指南 / Reference Frame Guide", open=False):
	gr.Markdown(REF_GUIDE_MD)

	debug_shapes = gr.Checkbox(label="调试日志 / Debug Logs（仅用于显示更完整日志 / show verbose logs）", value=False)

	with gr.Row():
	inp_video = gr.Video(label="黑白视频（mp4/webm/avi） / B&W Video", interactive=True)
	inp_ref = gr.Image(label="参考图像（RGB） / Reference Image (RGB)", type="pil")
	gr.Examples(
	label="示例 / Examples",
	examples=[["./example/4.mp4", "./example/4.png"]],
	inputs=[inp_video, inp_ref],
	cache_examples=False,
	)

	with gr.Accordion("高级参数设置 / Advanced Settings（传给 test.py / passed to test.py）", open=False):
	with gr.Row():
	first_not_exemplar = gr.Checkbox(label="FirstFrameIsNotExemplar (--FirstFrameIsNotExemplar)", value=True)
	reverse = gr.Checkbox(label="reverse (--reverse)", value=False)
	dataset = gr.Textbox(label="dataset (--dataset)", value="D16_batch")
	split = gr.Textbox(label="split (--split)", value="val")
	save_all = gr.Checkbox(label="save_all (--save_all)", value=True)
	benchmark = gr.Checkbox(label="benchmark (--benchmark)", value=False)
	with gr.Row():
	disable_long_term = gr.Checkbox(label="disable_long_term (--disable_long_term)", value=False)
	max_mid = gr.Number(label="max_mid_term_frames (--max_mid_term_frames)", value=10, precision=0)
	min_mid = gr.Number(label="min_mid_term_frames (--min_mid_term_frames)", value=5, precision=0)
	max_long = gr.Number(label="max_long_term_elements (--max_long_term_elements)", value=10000, precision=0)
	num_proto = gr.Number(label="num_prototypes (--num_prototypes)", value=128, precision=0)
	with gr.Row():
	top_k = gr.Number(label="top_k (--top_k)", value=30, precision=0)
	mem_every = gr.Number(label="mem_every (--mem_every)", value=5, precision=0)
	deep_update = gr.Number(label="deep_update_every (--deep_update_every)", value=-1, precision=0)
	save_scores = gr.Checkbox(label="save_scores (--save_scores)", value=False)
	flip = gr.Checkbox(label="flip (--flip)", value=False)
	size = gr.Number(label="size (--size)", value=-1, precision=0)

	run_btn = gr.Button("开始着色 / Start Coloring")
	with gr.Row():
	out_video = gr.Video(label="输出视频（着色结果） / Output (Colorized)", autoplay=True)
	status = gr.Textbox(label="状态 / 日志输出 / Status & Logs", interactive=False, lines=16)

	run_btn.click(
	fn=gradio_infer,
	inputs=[
	debug_shapes,
	inp_video, inp_ref,
	first_not_exemplar, dataset, split, save_all, benchmark,
	disable_long_term, max_mid, min_mid, max_long,
	num_proto, top_k, mem_every, deep_update,
	save_scores, flip, size, reverse
	],
	outputs=[out_video, status]
	)

	gr.HTML("<hr/>")
	gr.HTML(BADGES_HTML)

	if __name__ == "__main__":
	try:
	ensure_checkpoint()
	except Exception as e:
	print(f"[WARN] 预下载权重失败（首次推理会再试）: {e}")
	demo.queue(max_size=32).launch(server_name="0.0.0.0", server_port=7860)