import os import gradio as gr from transformers import pipeline import torch import numpy as np from PIL import Image # Load the depth estimation model or segmentation model depth_estimator = pipeline(task="depth-estimation", model="Intel/dpt-hybrid-midas") def launch(input_image): try: # Resize the input image to a fixed size (e.g., 640x480) input_image = input_image.resize((640, 480)) # Ensure the input image is in RGB mode if input_image.mode != "RGB": input_image = input_image.convert("RGB") # Run the image segmentation model (or depth estimation) out = depth_estimator(input_image) # Assuming the output contains the predicted depth or segmentation mask predicted_output = out["predicted_depth"] if "predicted_depth" in out else out["segmentation_mask"] # Resize the output to match the input image size (H, W) predicted_output_resized = torch.nn.functional.interpolate( predicted_output.unsqueeze(0), # Add batch dimension size=input_image.size[::-1], # Match input image size (H, W) mode="bicubic", align_corners=False ) # Normalize the prediction for display output = predicted_output_resized.squeeze().cpu().numpy() formatted = (output * 255 / np.max(output)).astype("uint8") # Convert the depth map or segmentation mask to an image output_image = Image.fromarray(formatted) return output_image except Exception as e: print(f"Error processing the image: {str(e)}") return "An error occurred while processing the image." # Define the Gradio interface without 'image_size' argument iface = gr.Interface( fn=launch, inputs=gr.Image(type="pil"), outputs=gr.Image(type="pil") # Remove image_size argument ) # Launch the interface iface.launch()