Spaces:

verymehari
/

depthshift-depth-generator

Sleeping

App Files Files Community

depthshift-depth-generator / app.py

verymehari

Update app.py

00761be verified 21 days ago

raw

history blame contribute delete

6.04 kB

	# Patch for Python 3.13: audioop removed from stdlib, pydub/gradio tries to import it
	import sys
	import types
	if "audioop" not in sys.modules:
	sys.modules["audioop"] = types.ModuleType("audioop")
	if "pyaudioop" not in sys.modules:
	sys.modules["pyaudioop"] = types.ModuleType("pyaudioop")

	import gradio as gr
	import torch
	import numpy as np
	import cv2
	from PIL import Image
	from transformers import AutoImageProcessor, AutoModelForDepthEstimation
	import tempfile
	import os

	# --- LAZY LOAD (fixes DNS init failure) ---
	_processor = None
	_model = None

	def get_model():
	global _processor, _model
	if _processor is None:
	print("Loading Depth Anything V2 Small...")
	_processor = AutoImageProcessor.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
	_model = AutoModelForDepthEstimation.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
	_model.eval()
	print("Model loaded.")
	return _processor, _model
	# ------------------------------------------


	def estimate_depth(frame_rgb: np.ndarray) -> np.ndarray:
	processor, model = get_model() # loads on first call only
	h, w = frame_rgb.shape[:2]
	small = cv2.resize(frame_rgb, (256, int(256 * h / w)))
	pil_img = Image.fromarray(small)
	inputs = processor(images=pil_img, return_tensors="pt")
	with torch.no_grad():
	outputs = model(**inputs)
	depth = outputs.predicted_depth.squeeze().numpy()
	depth_norm = cv2.normalize(depth, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
	depth_full = cv2.resize(depth_norm, (w, h), interpolation=cv2.INTER_LINEAR)
	return depth_full


	def process_video(video_path, fps_out, max_frames, progress=gr.Progress()):
	if video_path is None:
	return None, None, None

	cap = cv2.VideoCapture(video_path)
	if not cap.isOpened():
	raise gr.Error("Could not open video file.")

	src_fps = cap.get(cv2.CAP_PROP_FPS) or 24
	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
	w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
	h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

	step = max(1, int(src_fps / fps_out))
	frame_indices = list(range(0, min(total_frames, max_frames * step), step))[:max_frames]

	tmp_dir = tempfile.mkdtemp()
	depth_path = os.path.join(tmp_dir, "depth.mp4")
	preview_path = os.path.join(tmp_dir, "preview.mp4")

	fourcc = cv2.VideoWriter_fourcc(*"mp4v")
	depth_writer = cv2.VideoWriter(depth_path, fourcc, fps_out, (w, h), isColor=False)
	preview_writer = cv2.VideoWriter(preview_path, fourcc, fps_out, (w * 2, h))

	first_depth_frame = None

	for i, idx in enumerate(frame_indices):
	progress(i / len(frame_indices), desc=f"Processing frame {i+1}/{len(frame_indices)}")
	cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
	ret, frame_bgr = cap.read()
	if not ret:
	continue

	frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
	depth = estimate_depth(frame_rgb)

	if first_depth_frame is None:
	first_depth_frame = depth

	depth_writer.write(depth)

	depth_color = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)
	depth_color_rgb = cv2.cvtColor(depth_color, cv2.COLOR_BGR2RGB)
	side = np.concatenate([frame_rgb, depth_color_rgb], axis=1)
	preview_writer.write(cv2.cvtColor(side, cv2.COLOR_RGB2BGR))

	cap.release()
	depth_writer.release()
	preview_writer.release()

	first_frame_png = None
	if first_depth_frame is not None:
	png_path = os.path.join(tmp_dir, "first_frame.png")
	Image.fromarray(first_depth_frame).save(png_path)
	first_frame_png = png_path

	return depth_path, preview_path, first_frame_png


	css = """
	body { background: #0a0a0a; color: #e0e0e0; font-family: 'Space Mono', monospace; }
	.gradio-container { max-width: 900px; margin: 0 auto; }
	h1 { color: #c8ff00; letter-spacing: 0.08em; font-size: 1.6rem; }
	button.primary { background: #c8ff00 !important; color: #0a0a0a !important; font-weight: 700; border-radius: 2px !important; }
	button.primary:hover { background: #b0e000 !important; }
	.footer { color: #444; font-size: 0.7rem; text-align: center; margin-top: 2rem; }
	"""

	with gr.Blocks(css=css, title="DepthShift — Depth Map Generator") as demo:
	gr.HTML("""
	<link href="https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&display=swap" rel="stylesheet">
	<h1>⬛ DEPTHSHIFT / DEPTH GENERATOR</h1>
	<p style="color:#888; font-size:0.85rem; margin-top:-0.5rem;">
	Upload an MP4 → get a grayscale depth map video ready for
	<a href="https://spatial-index.vercel.app" target="_blank" style="color:#c8ff00;">Spatial Index</a>
	</p>
	""")

	with gr.Row():
	with gr.Column():
	video_in = gr.Video(label="Input Video (MP4)", interactive=True)
	with gr.Row():
	fps_slider = gr.Slider(6, 24, value=12, step=1, label="Output FPS")
	frames_slider = gr.Slider(10, 60, value=30, step=5, label="Max Frames")
	run_btn = gr.Button("Generate Depth Map", variant="primary")

	with gr.Column():
	depth_out = gr.Video(label="Depth Map (grayscale) — use this in DepthShift")
	preview_out = gr.Video(label="Preview (original \| depth side-by-side)")
	frame_out = gr.Image(label="First Frame Depth PNG")

	gr.HTML("""
	<div class="footer">
	<b style="color:#c8ff00">HOW TO USE</b><br>
	1. Upload your MP4  →
	2. Download the depth map video  →
	3. Load both into <a href="https://spatial-index.vercel.app" style="color:#c8ff00">Spatial Index / DepthShift</a>
	<br><br>Processing runs on CPU — keep Max Frames ≤ 30 for reasonable wait times (~1–2 min).
	</div>
	""")

	run_btn.click(
	fn=process_video,
	inputs=[video_in, fps_slider, frames_slider],
	outputs=[depth_out, preview_out, frame_out],
	)

	demo.launch()