| |
| import sys |
| import types |
| if "audioop" not in sys.modules: |
| sys.modules["audioop"] = types.ModuleType("audioop") |
| if "pyaudioop" not in sys.modules: |
| sys.modules["pyaudioop"] = types.ModuleType("pyaudioop") |
|
|
| import gradio as gr |
| import torch |
| import numpy as np |
| import cv2 |
| from PIL import Image |
| from transformers import AutoImageProcessor, AutoModelForDepthEstimation |
| import tempfile |
| import os |
|
|
| |
| _processor = None |
| _model = None |
|
|
| def get_model(): |
| global _processor, _model |
| if _processor is None: |
| print("Loading Depth Anything V2 Small...") |
| _processor = AutoImageProcessor.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf") |
| _model = AutoModelForDepthEstimation.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf") |
| _model.eval() |
| print("Model loaded.") |
| return _processor, _model |
| |
|
|
|
|
| def estimate_depth(frame_rgb: np.ndarray) -> np.ndarray: |
| processor, model = get_model() |
| h, w = frame_rgb.shape[:2] |
| small = cv2.resize(frame_rgb, (256, int(256 * h / w))) |
| pil_img = Image.fromarray(small) |
| inputs = processor(images=pil_img, return_tensors="pt") |
| with torch.no_grad(): |
| outputs = model(**inputs) |
| depth = outputs.predicted_depth.squeeze().numpy() |
| depth_norm = cv2.normalize(depth, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8) |
| depth_full = cv2.resize(depth_norm, (w, h), interpolation=cv2.INTER_LINEAR) |
| return depth_full |
|
|
|
|
| def process_video(video_path, fps_out, max_frames, progress=gr.Progress()): |
| if video_path is None: |
| return None, None, None |
|
|
| cap = cv2.VideoCapture(video_path) |
| if not cap.isOpened(): |
| raise gr.Error("Could not open video file.") |
|
|
| src_fps = cap.get(cv2.CAP_PROP_FPS) or 24 |
| total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) |
| w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) |
| h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) |
|
|
| step = max(1, int(src_fps / fps_out)) |
| frame_indices = list(range(0, min(total_frames, max_frames * step), step))[:max_frames] |
|
|
| tmp_dir = tempfile.mkdtemp() |
| depth_path = os.path.join(tmp_dir, "depth.mp4") |
| preview_path = os.path.join(tmp_dir, "preview.mp4") |
|
|
| fourcc = cv2.VideoWriter_fourcc(*"mp4v") |
| depth_writer = cv2.VideoWriter(depth_path, fourcc, fps_out, (w, h), isColor=False) |
| preview_writer = cv2.VideoWriter(preview_path, fourcc, fps_out, (w * 2, h)) |
|
|
| first_depth_frame = None |
|
|
| for i, idx in enumerate(frame_indices): |
| progress(i / len(frame_indices), desc=f"Processing frame {i+1}/{len(frame_indices)}") |
| cap.set(cv2.CAP_PROP_POS_FRAMES, idx) |
| ret, frame_bgr = cap.read() |
| if not ret: |
| continue |
|
|
| frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB) |
| depth = estimate_depth(frame_rgb) |
|
|
| if first_depth_frame is None: |
| first_depth_frame = depth |
|
|
| depth_writer.write(depth) |
|
|
| depth_color = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO) |
| depth_color_rgb = cv2.cvtColor(depth_color, cv2.COLOR_BGR2RGB) |
| side = np.concatenate([frame_rgb, depth_color_rgb], axis=1) |
| preview_writer.write(cv2.cvtColor(side, cv2.COLOR_RGB2BGR)) |
|
|
| cap.release() |
| depth_writer.release() |
| preview_writer.release() |
|
|
| first_frame_png = None |
| if first_depth_frame is not None: |
| png_path = os.path.join(tmp_dir, "first_frame.png") |
| Image.fromarray(first_depth_frame).save(png_path) |
| first_frame_png = png_path |
|
|
| return depth_path, preview_path, first_frame_png |
|
|
|
|
| css = """ |
| body { background: #0a0a0a; color: #e0e0e0; font-family: 'Space Mono', monospace; } |
| .gradio-container { max-width: 900px; margin: 0 auto; } |
| h1 { color: #c8ff00; letter-spacing: 0.08em; font-size: 1.6rem; } |
| button.primary { background: #c8ff00 !important; color: #0a0a0a !important; font-weight: 700; border-radius: 2px !important; } |
| button.primary:hover { background: #b0e000 !important; } |
| .footer { color: #444; font-size: 0.7rem; text-align: center; margin-top: 2rem; } |
| """ |
|
|
| with gr.Blocks(css=css, title="DepthShift — Depth Map Generator") as demo: |
| gr.HTML(""" |
| <link href="https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&display=swap" rel="stylesheet"> |
| <h1>⬛ DEPTHSHIFT / DEPTH GENERATOR</h1> |
| <p style="color:#888; font-size:0.85rem; margin-top:-0.5rem;"> |
| Upload an MP4 → get a grayscale depth map video ready for |
| <a href="https://spatial-index.vercel.app" target="_blank" style="color:#c8ff00;">Spatial Index</a> |
| </p> |
| """) |
|
|
| with gr.Row(): |
| with gr.Column(): |
| video_in = gr.Video(label="Input Video (MP4)", interactive=True) |
| with gr.Row(): |
| fps_slider = gr.Slider(6, 24, value=12, step=1, label="Output FPS") |
| frames_slider = gr.Slider(10, 60, value=30, step=5, label="Max Frames") |
| run_btn = gr.Button("Generate Depth Map", variant="primary") |
|
|
| with gr.Column(): |
| depth_out = gr.Video(label="Depth Map (grayscale) — use this in DepthShift") |
| preview_out = gr.Video(label="Preview (original | depth side-by-side)") |
| frame_out = gr.Image(label="First Frame Depth PNG") |
|
|
| gr.HTML(""" |
| <div class="footer"> |
| <b style="color:#c8ff00">HOW TO USE</b><br> |
| 1. Upload your MP4 → |
| 2. Download the depth map video → |
| 3. Load both into <a href="https://spatial-index.vercel.app" style="color:#c8ff00">Spatial Index / DepthShift</a> |
| <br><br>Processing runs on CPU — keep Max Frames ≤ 30 for reasonable wait times (~1–2 min). |
| </div> |
| """) |
|
|
| run_btn.click( |
| fn=process_video, |
| inputs=[video_in, fps_slider, frames_slider], |
| outputs=[depth_out, preview_out, frame_out], |
| ) |
|
|
| demo.launch() |
|
|