|
|
| import os |
| import cv2 |
| import torch |
| import numpy as np |
| import gradio as gr |
| import sys |
| import shutil |
| from datetime import datetime |
| import glob |
| import gc |
| import time |
| import spaces |
|
|
| from pi3.utils.geometry import se3_inverse, homogenize_points, depth_edge |
| from pi3.models.pi3 import Pi3 |
| from pi3.utils.basic import load_images_as_tensor, write_ply |
|
|
| import trimesh |
| import matplotlib |
| from scipy.spatial.transform import Rotation |
|
|
|
|
| """ |
| Gradio utils |
| """ |
|
|
| def predictions_to_glb( |
| predictions, |
| conf_thres=50.0, |
| filter_by_frames="all", |
| show_cam=True, |
| ) -> trimesh.Scene: |
| """ |
| Converts VGGT predictions to a 3D scene represented as a GLB file. |
| |
| Args: |
| predictions (dict): Dictionary containing model predictions with keys: |
| - world_points: 3D point coordinates (S, H, W, 3) |
| - world_points_conf: Confidence scores (S, H, W) |
| - images: Input images (S, H, W, 3) |
| - extrinsic: Camera extrinsic matrices (S, 3, 4) |
| conf_thres (float): Percentage of low-confidence points to filter out (default: 50.0) |
| filter_by_frames (str): Frame filter specification (default: "all") |
| show_cam (bool): Include camera visualization (default: True) |
| |
| Returns: |
| trimesh.Scene: Processed 3D scene containing point cloud and cameras |
| |
| Raises: |
| ValueError: If input predictions structure is invalid |
| """ |
| if not isinstance(predictions, dict): |
| raise ValueError("predictions must be a dictionary") |
|
|
| if conf_thres is None: |
| conf_thres = 10 |
|
|
| print("Building GLB scene") |
| selected_frame_idx = None |
| if filter_by_frames != "all" and filter_by_frames != "All": |
| try: |
| |
| selected_frame_idx = int(filter_by_frames.split(":")[0]) |
| except (ValueError, IndexError): |
| pass |
|
|
| pred_world_points = predictions["points"] |
| pred_world_points_conf = predictions.get("conf", np.ones_like(pred_world_points[..., 0])) |
|
|
| |
| images = predictions["images"] |
| |
| camera_poses = predictions["camera_poses"] |
|
|
| if selected_frame_idx is not None: |
| pred_world_points = pred_world_points[selected_frame_idx][None] |
| pred_world_points_conf = pred_world_points_conf[selected_frame_idx][None] |
| images = images[selected_frame_idx][None] |
| camera_poses = camera_poses[selected_frame_idx][None] |
|
|
| vertices_3d = pred_world_points.reshape(-1, 3) |
| |
| if images.ndim == 4 and images.shape[1] == 3: |
| colors_rgb = np.transpose(images, (0, 2, 3, 1)) |
| else: |
| colors_rgb = images |
| colors_rgb = (colors_rgb.reshape(-1, 3) * 255).astype(np.uint8) |
|
|
| conf = pred_world_points_conf.reshape(-1) |
| |
| if conf_thres == 0.0: |
| conf_threshold = 0.0 |
| else: |
| |
| conf_threshold = conf_thres / 100 |
|
|
| conf_mask = (conf >= conf_threshold) & (conf > 1e-5) |
|
|
| vertices_3d = vertices_3d[conf_mask] |
| colors_rgb = colors_rgb[conf_mask] |
|
|
| if vertices_3d is None or np.asarray(vertices_3d).size == 0: |
| vertices_3d = np.array([[1, 0, 0]]) |
| colors_rgb = np.array([[255, 255, 255]]) |
| scene_scale = 1 |
| else: |
| |
| lower_percentile = np.percentile(vertices_3d, 5, axis=0) |
| upper_percentile = np.percentile(vertices_3d, 95, axis=0) |
|
|
| |
| scene_scale = np.linalg.norm(upper_percentile - lower_percentile) |
|
|
| colormap = matplotlib.colormaps.get_cmap("gist_rainbow") |
|
|
| |
| scene_3d = trimesh.Scene() |
| scene_3d_no_cam = trimesh.Scene() |
|
|
| |
| point_cloud_data = trimesh.PointCloud(vertices=vertices_3d, colors=colors_rgb) |
|
|
| scene_3d.add_geometry(point_cloud_data) |
|
|
| |
| num_cameras = len(camera_poses) |
|
|
| if show_cam: |
| |
| for i in range(num_cameras): |
| camera_to_world = camera_poses[i] |
| rgba_color = colormap(i / num_cameras) |
| current_color = tuple(int(255 * x) for x in rgba_color[:3]) |
|
|
| |
| integrate_camera_into_scene(scene_3d, camera_to_world, current_color, 1.) |
|
|
| |
| align_rotation = np.eye(4) |
| align_rotation[:3, :3] = Rotation.from_euler("y", 100, degrees=True).as_matrix() |
| align_rotation[:3, :3] = align_rotation[:3, :3] @ Rotation.from_euler("x", 155, degrees=True).as_matrix() |
| scene_3d.apply_transform(align_rotation) |
|
|
| print("GLB Scene built") |
| return scene_3d, [vertices_3d, colors_rgb] |
|
|
| def integrate_camera_into_scene(scene: trimesh.Scene, transform: np.ndarray, face_colors: tuple, scene_scale: float): |
| """ |
| Integrates a fake camera mesh into the 3D scene. |
| |
| Args: |
| scene (trimesh.Scene): The 3D scene to add the camera model. |
| transform (np.ndarray): Transformation matrix for camera positioning. |
| face_colors (tuple): Color of the camera face. |
| scene_scale (float): Scale of the scene. |
| """ |
|
|
| cam_width = scene_scale * 0.05 |
| cam_height = scene_scale * 0.1 |
|
|
| |
| rot_45_degree = np.eye(4) |
| rot_45_degree[:3, :3] = Rotation.from_euler("z", 45, degrees=True).as_matrix() |
| rot_45_degree[2, 3] = -cam_height |
|
|
| opengl_transform = get_opengl_conversion_matrix() |
| |
| complete_transform = transform @ opengl_transform @ rot_45_degree |
| camera_cone_shape = trimesh.creation.cone(cam_width, cam_height, sections=4) |
|
|
| |
| slight_rotation = np.eye(4) |
| slight_rotation[:3, :3] = Rotation.from_euler("z", 2, degrees=True).as_matrix() |
|
|
| vertices_combined = np.concatenate( |
| [ |
| camera_cone_shape.vertices, |
| 0.95 * camera_cone_shape.vertices, |
| transform_points(slight_rotation, camera_cone_shape.vertices), |
| ] |
| ) |
| vertices_transformed = transform_points(complete_transform, vertices_combined) |
|
|
| mesh_faces = compute_camera_faces(camera_cone_shape) |
|
|
| |
| camera_mesh = trimesh.Trimesh(vertices=vertices_transformed, faces=mesh_faces) |
| camera_mesh.visual.face_colors[:, :3] = face_colors |
| scene.add_geometry(camera_mesh) |
|
|
|
|
| def get_opengl_conversion_matrix() -> np.ndarray: |
| """ |
| Constructs and returns the OpenGL conversion matrix. |
| |
| Returns: |
| numpy.ndarray: A 4x4 OpenGL conversion matrix. |
| """ |
| |
| matrix = np.identity(4) |
|
|
| |
| matrix[1, 1] = -1 |
| matrix[2, 2] = -1 |
|
|
| return matrix |
|
|
|
|
| def transform_points(transformation: np.ndarray, points: np.ndarray, dim: int = None) -> np.ndarray: |
| """ |
| Applies a 4x4 transformation to a set of points. |
| |
| Args: |
| transformation (np.ndarray): Transformation matrix. |
| points (np.ndarray): Points to be transformed. |
| dim (int, optional): Dimension for reshaping the result. |
| |
| Returns: |
| np.ndarray: Transformed points. |
| """ |
| points = np.asarray(points) |
| initial_shape = points.shape[:-1] |
| dim = dim or points.shape[-1] |
|
|
| |
| transformation = transformation.swapaxes(-1, -2) |
| points = points @ transformation[..., :-1, :] + transformation[..., -1:, :] |
|
|
| |
| result = points[..., :dim].reshape(*initial_shape, dim) |
| return result |
|
|
|
|
| def compute_camera_faces(cone_shape: trimesh.Trimesh) -> np.ndarray: |
| """ |
| Computes the faces for the camera mesh. |
| |
| Args: |
| cone_shape (trimesh.Trimesh): The shape of the camera cone. |
| |
| Returns: |
| np.ndarray: Array of faces for the camera mesh. |
| """ |
| |
| faces_list = [] |
| num_vertices_cone = len(cone_shape.vertices) |
|
|
| for face in cone_shape.faces: |
| if 0 in face: |
| continue |
| v1, v2, v3 = face |
| v1_offset, v2_offset, v3_offset = face + num_vertices_cone |
| v1_offset_2, v2_offset_2, v3_offset_2 = face + 2 * num_vertices_cone |
|
|
| faces_list.extend( |
| [ |
| (v1, v2, v2_offset), |
| (v1, v1_offset, v3), |
| (v3_offset, v2, v3), |
| (v1, v2, v2_offset_2), |
| (v1, v1_offset_2, v3), |
| (v3_offset_2, v2, v3), |
| ] |
| ) |
|
|
| faces_list += [(v3, v2, v1) for v1, v2, v3 in faces_list] |
| return np.array(faces_list) |
|
|
|
|
| |
| |
| |
| @spaces.GPU(duration=120) |
| def run_model(target_dir, model) -> dict: |
| print(f"Processing images from {target_dir}") |
|
|
| |
| device = "cuda" if torch.cuda.is_available() else "cpu" |
| if not torch.cuda.is_available(): |
| raise ValueError("CUDA is not available. Check your environment.") |
|
|
| |
| model = model.to(device) |
| model.eval() |
|
|
| |
| image_names = glob.glob(os.path.join(target_dir, "images", "*")) |
| image_names = sorted(image_names) |
| print(f"Found {len(image_names)} images") |
| if len(image_names) == 0: |
| raise ValueError("No images found. Check your upload.") |
|
|
| |
| interval = 1 |
| imgs = load_images_as_tensor(os.path.join(target_dir, "images"), interval=interval).to(device) |
|
|
| |
| print("Running model inference...") |
| dtype = torch.bfloat16 |
| with torch.no_grad(): |
| with torch.amp.autocast('cuda', dtype=dtype): |
| predictions = model(imgs[None]) |
| predictions['images'] = imgs[None].permute(0, 1, 3, 4, 2) |
| predictions['conf'] = torch.sigmoid(predictions['conf']) |
| edge = depth_edge(predictions['local_points'][..., 2], rtol=0.03) |
| predictions['conf'][edge] = 0.0 |
| del predictions['local_points'] |
|
|
| |
| |
| |
|
|
| |
| for key in predictions.keys(): |
| if isinstance(predictions[key], torch.Tensor): |
| predictions[key] = predictions[key].cpu().numpy().squeeze(0) |
|
|
| |
| torch.cuda.empty_cache() |
| return predictions |
|
|
|
|
| |
| |
| |
| def handle_uploads(input_video, input_images, interval=-1): |
| """ |
| Create a new 'target_dir' + 'images' subfolder, and place user-uploaded |
| images or extracted frames from video into it. Return (target_dir, image_paths). |
| """ |
| start_time = time.time() |
| gc.collect() |
| torch.cuda.empty_cache() |
|
|
| |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f") |
| target_dir = f"input_images_{timestamp}" |
| target_dir_images = os.path.join(target_dir, "images") |
|
|
| |
| if os.path.exists(target_dir): |
| shutil.rmtree(target_dir) |
| os.makedirs(target_dir, exist_ok=True) |
| os.makedirs(target_dir_images, exist_ok=True) |
|
|
| image_paths = [] |
|
|
| |
| if input_images is not None: |
| if interval is not None and interval > 0: |
| input_images = input_images[::interval] |
|
|
| for file_data in input_images: |
| if isinstance(file_data, dict) and "name" in file_data: |
| file_path = file_data["name"] |
| else: |
| file_path = file_data |
| dst_path = os.path.join(target_dir_images, os.path.basename(file_path)) |
| shutil.copy(file_path, dst_path) |
| image_paths.append(dst_path) |
| |
| |
| if input_video is not None: |
| if isinstance(input_video, dict) and "name" in input_video: |
| video_path = input_video["name"] |
| else: |
| video_path = input_video |
|
|
| vs = cv2.VideoCapture(video_path) |
| fps = vs.get(cv2.CAP_PROP_FPS) |
| if interval is not None and interval > 0: |
| frame_interval = interval |
| else: |
| frame_interval = int(fps * 1) |
|
|
| count = 0 |
| video_frame_num = 0 |
| while True: |
| gotit, frame = vs.read() |
| if not gotit: |
| break |
| count += 1 |
| if count % frame_interval == 0: |
| image_path = os.path.join(target_dir_images, f"{video_frame_num:06}.png") |
| cv2.imwrite(image_path, frame) |
| image_paths.append(image_path) |
| video_frame_num += 1 |
|
|
| |
| image_paths = sorted(image_paths) |
|
|
| end_time = time.time() |
| print(f"Files copied to {target_dir_images}; took {end_time - start_time:.3f} seconds") |
| return target_dir, image_paths |
|
|
|
|
| |
| |
| |
| def update_gallery_on_upload(input_video, input_images, interval=-1): |
| """ |
| Whenever user uploads or changes files, immediately handle them |
| and show in the gallery. Return (target_dir, image_paths). |
| If nothing is uploaded, returns "None" and empty list. |
| """ |
| if not input_video and not input_images: |
| return None, None, None, None |
| target_dir, image_paths = handle_uploads(input_video, input_images, interval=interval) |
| return None, target_dir, image_paths, "Upload complete. Click 'Reconstruct' to begin 3D processing." |
|
|
|
|
| |
| |
| |
| @spaces.GPU(duration=120) |
| def gradio_demo( |
| target_dir, |
| conf_thres=3.0, |
| frame_filter="All", |
| show_cam=True, |
| ): |
| """ |
| Perform reconstruction using the already-created target_dir/images. |
| """ |
| if not os.path.isdir(target_dir) or target_dir == "None": |
| return None, "No valid target directory found. Please upload first.", None, None |
|
|
| start_time = time.time() |
| gc.collect() |
| torch.cuda.empty_cache() |
|
|
| |
| target_dir_images = os.path.join(target_dir, "images") |
| all_files = sorted(os.listdir(target_dir_images)) if os.path.isdir(target_dir_images) else [] |
| all_files = [f"{i}: {filename}" for i, filename in enumerate(all_files)] |
| frame_filter_choices = ["All"] + all_files |
|
|
| print("Running run_model...") |
| with torch.no_grad(): |
| predictions = run_model(target_dir, model) |
|
|
| |
| prediction_save_path = os.path.join(target_dir, "predictions.npz") |
| np.savez(prediction_save_path, **predictions) |
|
|
| |
| if frame_filter is None: |
| frame_filter = "All" |
|
|
| |
| glbfile = os.path.join( |
| target_dir, |
| f"glbscene_{conf_thres}_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}.glb", |
| ) |
|
|
| |
| glbscene, pcd = predictions_to_glb( |
| predictions, |
| conf_thres=conf_thres, |
| filter_by_frames=frame_filter, |
| show_cam=show_cam, |
| ) |
| glbscene.export(file_obj=glbfile) |
|
|
| |
| plyfile = glbfile.replace('.glb', '.ply') |
| write_ply(pcd[0], pcd[1]/255, path=plyfile) |
| print(f'Saved .ply file to {plyfile}') |
|
|
| |
| del predictions |
| gc.collect() |
| torch.cuda.empty_cache() |
|
|
| end_time = time.time() |
| print(f"Total time: {end_time - start_time:.2f} seconds (including IO)") |
| log_msg = f"Reconstruction Success ({len(all_files)} frames). Waiting for visualization." |
|
|
| return glbfile, plyfile, log_msg, gr.Dropdown(choices=frame_filter_choices, value=frame_filter, interactive=True) |
|
|
|
|
| |
| |
| |
| def clear_fields(): |
| """ |
| Clears the 3D viewer, the stored target_dir, and empties the gallery. |
| """ |
| return None |
|
|
|
|
| def update_log(): |
| """ |
| Display a quick log message while waiting. |
| """ |
| return "Loading and Reconstructing..." |
|
|
|
|
| def update_visualization( |
| target_dir, conf_thres, frame_filter, show_cam, is_example |
| ): |
| """ |
| Reload saved predictions from npz, create (or reuse) the GLB for new parameters, |
| and return it for the 3D viewer. If is_example == "True", skip. |
| """ |
|
|
| |
| if is_example == "True": |
| return None, "No reconstruction available. Please click the Reconstruct button first." |
|
|
| if not target_dir or target_dir == "None" or not os.path.isdir(target_dir): |
| return None, "No reconstruction available. Please click the Reconstruct button first." |
|
|
| predictions_path = os.path.join(target_dir, "predictions.npz") |
| if not os.path.exists(predictions_path): |
| return None, f"No reconstruction available at {predictions_path}. Please run 'Reconstruct' first." |
|
|
| key_list = [ |
| "images", |
| "points", |
| "conf", |
| "camera_poses", |
| ] |
|
|
| loaded = np.load(predictions_path) |
| predictions = {key: np.array(loaded[key]) for key in key_list} |
|
|
| glbfile = os.path.join( |
| target_dir, |
| f"glbscene_{conf_thres}_{frame_filter.replace('.', '_').replace(':', '').replace(' ', '_')}_cam{show_cam}.glb", |
| ) |
|
|
| if not os.path.exists(glbfile): |
| glbscene, pcd = predictions_to_glb( |
| predictions, |
| conf_thres=conf_thres, |
| filter_by_frames=frame_filter, |
| show_cam=show_cam, |
| ) |
| glbscene.export(file_obj=glbfile) |
|
|
| |
| plyfile = glbfile.replace('.glb', '.ply') |
| write_ply(pcd[0], pcd[1]/255, path=plyfile) |
| print(f'Saved .ply file to {plyfile}') |
|
|
| return glbfile, plyfile, "Updating Visualization" |
|
|
|
|
| |
| |
| |
|
|
| house = "examples/gradio_examples/house.mp4" |
| man_walking_long = "examples/gradio_examples/man_walking_long.mp4" |
| parkour = "examples/gradio_examples/parkour.mp4" |
| valley = "examples/gradio_examples/valley.mp4" |
| cartoon_horse = "examples/cartoon_horse.mp4" |
| parkour_long = "examples/parkour_long.mp4" |
| skating = "examples/skating.mp4" |
| skiing = "examples/skiing.mp4" |
|
|
| |
| |
| |
|
|
| if __name__ == '__main__': |
|
|
| device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
| print("Initializing and loading Pi3 model...") |
|
|
| model = Pi3.from_pretrained("yyfz233/Pi3") |
| |
|
|
| model.eval() |
| model = model.to(device) |
|
|
| theme = gr.themes.Ocean() |
| theme.set( |
| checkbox_label_background_fill_selected="*button_primary_background_fill", |
| checkbox_label_text_color_selected="*button_primary_text_color", |
| ) |
|
|
| with gr.Blocks( |
| theme=theme, |
| css=""" |
| /* --- Google 字体导入 (科技感字体) --- */ |
| @import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700;900&family=Rajdhani:wght@400;500;700&display=swap'); |
| |
| /* --- 动画关键帧 --- */ |
| /* 背景动态星云效果 */ |
| @keyframes gradient-animation { |
| 0% { background-position: 0% 50%; } |
| 50% { background-position: 100% 50%; } |
| 100% { background-position: 0% 50%; } |
| } |
| |
| /* 标题和状态文字的霓虹灯光效 */ |
| @keyframes text-glow { |
| 0%, 100% { |
| text-shadow: 0 0 10px #0ea5e9, 0 0 20px #0ea5e9, 0 0 30px #4f46e5, 0 0 40px #4f46e5; |
| } |
| 50% { |
| text-shadow: 0 0 5px #0ea5e9, 0 0 10px #0ea5e9, 0 0 15px #4f46e5, 0 0 20px #4f46e5; |
| } |
| } |
| |
| /* 卡片边框呼吸光晕 */ |
| @keyframes border-glow { |
| 0% { border-color: rgba(79, 70, 229, 0.5); box-shadow: 0 0 15px rgba(79, 70, 229, 0.3); } |
| 50% { border-color: rgba(14, 165, 233, 0.8); box-shadow: 0 0 25px rgba(14, 165, 233, 0.5); } |
| 100% { border-color: rgba(79, 70, 229, 0.5); box-shadow: 0 0 15px rgba(79, 70, 229, 0.3); } |
| } |
| |
| /* --- 全局样式:宇宙黑暗主题 --- */ |
| .gradio-container { |
| font-family: 'Rajdhani', sans-serif; |
| background: linear-gradient(-45deg, #020617, #111827, #082f49, #4f46e5); |
| background-size: 400% 400%; |
| animation: gradient-animation 20s ease infinite; |
| color: #9ca3af; |
| } |
| |
| /* --- 全局文字颜色修复 (解决Light Mode问题) --- */ |
| |
| /* 1. 修复全局、标签和输入框内的文字颜色 */ |
| .gradio-container, .gr-label label, .gr-input, input, textarea, .gr-check-radio label { |
| color: #d1d5db !important; /* 设置一个柔和的浅灰色 */ |
| } |
| |
| /* 2. 修复 Examples 表头 (这是您问题的核心) */ |
| thead th { |
| color: white !important; |
| background-color: #1f2937 !important; /* 同时给表头一个背景色,视觉效果更好 */ |
| } |
| |
| /* 3. 修复 Examples 表格内容文字 */ |
| tbody td { |
| color: #d1d5db !important; |
| } |
| |
| /* --- 状态信息 & 输出标题样式 (custom-log) ✨ --- */ |
| .custom-log * { |
| font-family: 'Orbitron', sans-serif; |
| font-size: 24px !important; |
| font-weight: 700 !important; |
| text-align: center !important; |
| color: transparent !important; |
| background-image: linear-gradient(120deg, #93c5fd, #6ee7b7, #fde047); |
| background-size: 300% 300%; |
| -webkit-background-clip: text; |
| background-clip: text; |
| animation: gradient-animation 8s ease-in-out infinite, text-glow 3s ease-in-out infinite; |
| padding: 10px 0; |
| } |
| |
| /* --- UI 卡片/分组样式 (玻璃拟态) 💎 --- */ |
| .gr-block.gr-group { |
| background-color: rgba(17, 24, 39, 0.6); |
| backdrop-filter: blur(10px); |
| -webkit-backdrop-filter: blur(10px); |
| border: 1px solid rgba(55, 65, 81, 0.5); |
| border-radius: 16px; |
| box-shadow: 0 8px 32px 0 rgba(0, 0, 0, 0.37); |
| transition: all 0.3s ease; |
| /* 应用边框呼吸光晕动画 */ |
| animation: border-glow 5s infinite alternate; |
| } |
| .gr-block.gr-group:hover { |
| box-shadow: 0 0 25px rgba(14, 165, 233, 0.4); |
| border-color: rgba(14, 165, 233, 0.6); |
| } |
| |
| /* --- 酷炫按钮样式 🚀 --- */ |
| .gr-button { |
| background: linear-gradient(to right, #4f46e5, #7c3aed, #0ea5e9) !important; |
| background-size: 200% auto !important; |
| color: white !important; |
| font-weight: bold !important; |
| border: none !important; |
| border-radius: 10px !important; |
| box-shadow: 0 4px 15px 0 rgba(79, 70, 229, 0.5) !important; |
| transition: all 0.4s ease-in-out !important; |
| font-family: 'Orbitron', sans-serif !important; |
| text-transform: uppercase; |
| letter-spacing: 1px; |
| } |
| .gr-button:hover { |
| background-position: right center !important; |
| box-shadow: 0 4px 20px 0 rgba(14, 165, 233, 0.6) !important; |
| transform: translateY(-3px) scale(1.02); |
| } |
| .gr-button.primary { |
| /* 主按钮增加呼吸光晕动画 */ |
| animation: border-glow 3s infinite alternate; |
| } |
| """, |
| ) as demo: |
| |
| is_example = gr.Textbox(label="is_example", visible=False, value="None") |
| num_images = gr.Textbox(label="num_images", visible=False, value="None") |
| target_dir_output = gr.Textbox(label="Target Dir", visible=False, value="None") |
|
|
| gr.HTML( |
| """ |
| <style> |
| /* --- 介绍文字区专属样式 --- */ |
| .intro-content { font-size: 17px !important; line-height: 1.7; color: #C0C0C0 !important; } |
| /* 额外为 p 标签添加规则,确保覆盖 */ |
| .intro-content p { color: #C0C0C0 !important; } |
| |
| .intro-content h1 { |
| font-family: 'Orbitron', sans-serif; font-size: 2.8em !important; font-weight: 900; |
| text-align: center; color: #C0C0C0 !important; animation: text-glow 4s ease-in-out infinite; margin-bottom: 0px; |
| } |
| .intro-content .pi-symbol { |
| display: inline-block; color: transparent; |
| background-image: linear-gradient(120deg, #38bdf8, #818cf8, #c084fc); |
| -webkit-background-clip: text; background-clip: text; |
| text-shadow: 0 0 15px rgba(129, 140, 248, 0.5); |
| } |
| .intro-content .subtitle { text-align: center; font-size: 1.1em; margin-bottom: 2rem; } |
| .intro-content a.themed-link { |
| color: #C0C0C0 !important; text-decoration: none; font-weight: 700; transition: all 0.3s ease; |
| } |
| .intro-content a.themed-link:hover { color: #EAEAEA !important; text-shadow: 0 0 8px rgba(234, 234, 234, 0.7); } |
| .intro-content h3 { |
| font-family: 'Orbitron', sans-serif; color: #C0C0C0 !important; text-transform: uppercase; |
| letter-spacing: 2px; border-bottom: 1px solid #374151; padding-bottom: 8px; margin-top: 25px; |
| } |
| .intro-content ol { list-style: none; padding-left: 0; counter-reset: step-counter; } |
| .intro-content ol li { |
| counter-increment: step-counter; margin-bottom: 15px; padding-left: 45px; position: relative; |
| color: #C0C0C0 !important; /* 确保列表项文字也是银白色 */ |
| } |
| /* 自定义酷炫列表数字 */ |
| .intro-content ol li::before { |
| content: counter(step-counter); position: absolute; left: 0; top: 0; |
| width: 30px; height: 30px; background: linear-gradient(135deg, #1e3a8a, #4f46e5); |
| border-radius: 50%; color: white; font-weight: 700; font-family: 'Orbitron', sans-serif; |
| display: flex; align-items: center; justify-content: center; |
| box-shadow: 0 0 10px rgba(79, 70, 229, 0.5); |
| } |
| .intro-content strong { color: #C0C0C0 !important; font-weight: 700; } |
| .intro-content .performance-note { |
| background-color: rgba(14, 165, 233, 0.1); border-left: 4px solid #0ea5e9; |
| padding: 15px; border-radius: 8px; margin-top: 20px; |
| } |
| /* 确保提示框内的文字也生效 */ |
| .intro-content .performance-note p { color: #C0C0C0 !important; } |
| |
| </style> |
| |
| <div class="intro-content"> |
| <h1>🌌 <span class="pi-symbol">π³</span>: Scalable Permutation-Equivariant Visual Geometry Learning</h1> |
| <p class="subtitle"> |
| <a class="themed-link" href="https://github.com/yyfz/Pi3">🐙 GitHub Repository</a> | |
| <a class="themed-link" href="https://yyfz.github.io/pi3/">🚀 Project Page</a> |
| </p> |
| |
| <p>Transform your videos or image collections into detailed 3D models. The <strong class="pi-symbol">π³</strong> model processes your visual data to generate a rich 3D point cloud and calculate the corresponding camera perspectives.</p> |
| |
| <h3>How to Use:</h3> |
| <ol> |
| <li><strong>Provide Your Media:</strong> Upload a video or image set. You can specify a sampling interval below. By default, videos are sampled at 1 frame per second, and for image sets, every image is used (interval of 1). Your inputs will be displayed in the "Preview" gallery.</li> |
| <li><strong>Generate the 3D Model:</strong> Press the "Reconstruct" button to initiate the process.</li> |
| <li><strong>Explore and Refine Your Model:</strong> The generated 3D model will appear in the viewer on the right. Interact with it by rotating, panning, and zooming. You can also download the model as a GLB file. For further refinement, use the options below the viewer to adjust point confidence, filter by frame, or toggle camera visibility.</li> |
| </ol> |
| |
| <div class="performance-note"> |
| <p><strong>A Quick Note on Performance:</strong> The core processing by <strong class="pi-symbol">π³</strong> is incredibly fast, typically finishing in under a second. However, rendering the final 3D point cloud can take longer, depending on the complexity of the scene and the capabilities of the rendering engine.</p> |
| </div> |
| </div> |
| """ |
| ) |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| with gr.Group(): |
| gr.Markdown("### 1. Upload Media") |
| input_video = gr.Video(label="Upload Video", interactive=True) |
| input_images = gr.File(file_count="multiple", label="Or Upload Images", interactive=True) |
| interval = gr.Number(None, label='Frame/Image Interval', info="Sampling interval. Video default: 1 FPS. Image default: 1 (all images).") |
| |
| image_gallery = gr.Gallery( |
| label="Image Preview", |
| columns=4, |
| height="300px", |
| show_download_button=True, |
| object_fit="contain", |
| preview=True, |
| ) |
|
|
| with gr.Column(scale=2): |
| gr.Markdown("### 2. View Reconstruction") |
| log_output = gr.Markdown("Please upload media and click Reconstruct.", elem_classes=["custom-log"]) |
| reconstruction_output = gr.Model3D(height=480, zoom_speed=0.5, pan_speed=0.5, label="3D Output") |
|
|
| |
| download_ply_output = gr.File(height=100, label="Download .ply Result", interactive=False) |
| |
| with gr.Row(): |
| submit_btn = gr.Button("Reconstruct", scale=3, variant="primary") |
| clear_btn = gr.ClearButton( |
| scale=1 |
| ) |
| |
| with gr.Group(): |
| gr.Markdown("### 3. Adjust Visualization") |
| with gr.Row(): |
| conf_thres = gr.Slider(minimum=0, maximum=100, value=20, step=0.1, label="Confidence Threshold (%)") |
| show_cam = gr.Checkbox(label="Show Cameras", value=True) |
| frame_filter = gr.Dropdown(choices=["All"], value="All", label="Show Points from Frame") |
|
|
| |
| clear_btn.add([input_video, input_images, reconstruction_output, log_output, target_dir_output, image_gallery, interval]) |
|
|
| |
| examples = [ |
| [skating, None, 10, 20, True], |
| [parkour_long, None, 20, 10, True], |
| [cartoon_horse, None, 10, 20, True], |
| [skiing, None, 30, 70, True], |
| [man_walking_long, None, 1, 50, True], |
| [house, None, 1, 20, True], |
| [parkour, None, 1, 20, True], |
| [valley, None, 1, 20, True], |
| ] |
|
|
| def example_pipeline( |
| input_video, |
| input_images, |
| interval, |
| conf_thres, |
| show_cam, |
| ): |
| """ |
| 1) Copy example images to new target_dir |
| 2) Reconstruct |
| 3) Return model3D + logs + new_dir + updated dropdown + gallery |
| We do NOT return is_example. It's just an input. |
| """ |
| target_dir, image_paths = handle_uploads(input_video, input_images, interval) |
| |
| frame_filter = "All" |
| glbfile, ply_file, log_msg, dropdown = gradio_demo( |
| target_dir, conf_thres, frame_filter, show_cam |
| ) |
| return glbfile, ply_file, log_msg, target_dir, dropdown, image_paths |
|
|
| gr.Markdown("Click any row to load an example.", elem_classes=["example-log"]) |
|
|
| gr.Examples( |
| examples=examples, |
| inputs=[ |
| input_video, |
| input_images, |
| interval, |
| conf_thres, |
| show_cam, |
| ], |
| outputs=[reconstruction_output, download_ply_output, log_output, target_dir_output, frame_filter, image_gallery], |
| fn=example_pipeline, |
| cache_examples=False, |
| examples_per_page=50, |
| run_on_click=False, |
| ) |
|
|
| |
| |
| |
| |
| |
| |
| |
| submit_btn.click(fn=clear_fields, inputs=[], outputs=[reconstruction_output]).then( |
| fn=update_log, inputs=[], outputs=[log_output] |
| ).then( |
| fn=gradio_demo, |
| inputs=[ |
| target_dir_output, |
| conf_thres, |
| frame_filter, |
| show_cam, |
| ], |
| outputs=[reconstruction_output, download_ply_output, log_output, frame_filter], |
| ).then( |
| fn=lambda: "False", inputs=[], outputs=[is_example] |
| ) |
|
|
| |
| |
| |
| conf_thres.change( |
| update_visualization, |
| [ |
| target_dir_output, |
| conf_thres, |
| frame_filter, |
| show_cam, |
| is_example, |
| ], |
| [reconstruction_output, download_ply_output, log_output], |
| ) |
| frame_filter.change( |
| update_visualization, |
| [ |
| target_dir_output, |
| conf_thres, |
| frame_filter, |
| show_cam, |
| is_example, |
| ], |
| [reconstruction_output, download_ply_output, log_output], |
| ) |
| |
| show_cam.change( |
| update_visualization, |
| [ |
| target_dir_output, |
| conf_thres, |
| frame_filter, |
| show_cam, |
| is_example, |
| ], |
| [reconstruction_output, download_ply_output, log_output], |
| ) |
|
|
| |
| |
| |
| input_video.change( |
| fn=update_gallery_on_upload, |
| inputs=[input_video, input_images, interval], |
| outputs=[reconstruction_output, target_dir_output, image_gallery, log_output], |
| ) |
| input_images.change( |
| fn=update_gallery_on_upload, |
| inputs=[input_video, input_images, interval], |
| outputs=[reconstruction_output, target_dir_output, image_gallery, log_output], |
| ) |
|
|
| demo.queue(max_size=20).launch(show_error=True, share=True) |
|
|