import os
import gradio as gr
from transformers import pipeline
import torch
import numpy as np
from PIL import Image

# Load the depth estimation model or segmentation model
depth_estimator = pipeline(task="depth-estimation", model="Intel/dpt-hybrid-midas")

def launch(input_image):
    try:
        # Resize the input image to a fixed size (e.g., 640x480)
        input_image = input_image.resize((640, 480))
        
        # Ensure the input image is in RGB mode
        if input_image.mode != "RGB":
            input_image = input_image.convert("RGB")

        # Run the image segmentation model (or depth estimation)
        out = depth_estimator(input_image)
        
        # Assuming the output contains the predicted depth or segmentation mask
        predicted_output = out["predicted_depth"] if "predicted_depth" in out else out["segmentation_mask"]
        
        # Resize the output to match the input image size (H, W)
        predicted_output_resized = torch.nn.functional.interpolate(
            predicted_output.unsqueeze(0),  # Add batch dimension
            size=input_image.size[::-1],  # Match input image size (H, W)
            mode="bicubic",
            align_corners=False
        )

        # Normalize the prediction for display
        output = predicted_output_resized.squeeze().cpu().numpy()
        formatted = (output * 255 / np.max(output)).astype("uint8")

        # Convert the depth map or segmentation mask to an image
        output_image = Image.fromarray(formatted)

        return output_image

    except Exception as e:
        print(f"Error processing the image: {str(e)}")
        return "An error occurred while processing the image."

# Define the Gradio interface without 'image_size' argument
iface = gr.Interface(
    fn=launch,
    inputs=gr.Image(type="pil"),
    outputs=gr.Image(type="pil")  # Remove image_size argument
)

# Launch the interface
iface.launch()