| from PIL import Image |
| import torch |
| import torch.nn as nn |
| import torch.nn.functional as F |
| from huggingface_hub import hf_hub_download |
|
|
| |
| |
| from transformers import DepthProConfig, DepthProImageProcessorFast, DepthProForDepthEstimation |
|
|
| |
| config = DepthProConfig(use_fov_model=False) |
| model = DepthProForDepthEstimation(config) |
| features = config.fusion_hidden_size |
| semantic_classifier_dropout = 0.1 |
| num_labels = 1 |
| model.head.head = nn.Sequential( |
| nn.Conv2d(features, features, kernel_size=3, padding=1, bias=False), |
| nn.BatchNorm2d(features), |
| nn.ReLU(), |
| nn.Dropout(semantic_classifier_dropout), |
| nn.Conv2d(features, features, kernel_size=1), |
| nn.ConvTranspose2d(features, num_labels, kernel_size=2, stride=2, padding=0, bias=True), |
| ) |
|
|
| |
| weights_path = hf_hub_download(repo_id="geetu040/DepthPro_Segmentation_Human", filename="model_weights.pth") |
| model.load_state_dict(torch.load(weights_path, map_location=torch.device('cpu'), weights_only=True)) |
|
|
| |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| model = model.to(device) |
|
|
| |
| image_processor = DepthProImageProcessorFast() |
|
|
| def predict(image): |
| |
|
|
| image = image.convert("RGB") |
|
|
| |
| inputs = image_processor(images=image, return_tensors="pt") |
| inputs = {k: v.to(device) for k, v in inputs.items()} |
|
|
| |
| with torch.no_grad(): |
| output = model(**inputs) |
|
|
| |
| output = output[0] |
| output = F.interpolate( |
| output.unsqueeze(0), |
| size=(image.height, image.width) |
| ) |
| output = output.squeeze() |
| output = output.sigmoid() |
| output = (output > 0.5).float() |
| output = output.cpu() |
| output = output * 255 |
| output = output.numpy() |
| output = output.astype('uint8') |
| output = Image.fromarray(output) |
|
|
| return output |
|
|