File size: 3,729 Bytes
b01e5b1
80e1925
2418377
80e1925
 
 
 
 
2418377
 
 
 
c60ae80
2418377
c60ae80
80e1925
b01e5b1
3babd16
2418377
 
 
 
 
 
 
 
80e1925
2418377
80e1925
 
2418377
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80e1925
2418377
 
80e1925
2418377
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80e1925
 
 
 
2418377
 
 
 
80e1925
2418377
80e1925
 
 
 
 
 
 
 
 
 
 
c60ae80
2418377
c60ae80
2418377
 
 
 
 
 
 
 
c60ae80
2418377
 
 
 
 
 
 
b226f55
2418377
 
b226f55
2418377
 
 
 
b226f55
2418377
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3babd16
80e1925
2418377
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import gradio as gr
print("Loading models...")

import cv2
import numpy as np
from PIL import Image
from rembg import remove
from sentence_transformers import SentenceTransformer
import urllib.request
import pathlib

print("Libraries loaded")

# Load CLIP Model
image_model = SentenceTransformer("clip-ViT-B-32")
print("CLIP loaded")


# Load Anime Face Cascade
def load_anime_model():
    url = "https://raw.githubusercontent.com/nagadomi/lbpcascade_animeface/master/lbpcascade_animeface.xml"
    path = pathlib.Path("lbpcascade_animeface.xml")

    if not path.exists():
        print("Downloading anime face model...")
        urllib.request.urlretrieve(url, path.as_posix())

    return cv2.CascadeClassifier(path.as_posix())


# Load Human Face Cascade
def load_human_model():
    path = pathlib.Path(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
    return cv2.CascadeClassifier(path.as_posix())


anime_detector = load_anime_model()
human_detector = load_human_model()

print("Anime + Human detectors loaded")


# Embedding Function
def get_image_embedding(image):
    emb = image_model.encode(image)
    return {"embedding": emb.tolist()}


# Face Crop + Background Remove
def process_image(input_image, mode):

    img = cv2.cvtColor(np.array(input_image), cv2.COLOR_RGB2BGR)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Choose detector
    if mode == "Anime":
        detector = anime_detector
    else:
        detector = human_detector

    faces = detector.detectMultiScale(
        gray,
        scaleFactor=1.1,
        minNeighbors=5,
        minSize=(24, 24)
    )

    if len(faces) == 0:
        print("direct to background removal")
        pil_image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        output = remove(pil_image)
        output = output.resize((224, 224))
       
        return "Success ✅", output

    x, y, w, h = faces[0]

    height, width, _ = img.shape

    # Expand bounding box
    top_expand = 0.5
    side_expand = 0.3
    bottom_expand = 0.2

    x1 = int(max(0, x - w * side_expand))
    x2 = int(min(width, x + w + w * side_expand))
    y1 = int(max(0, y - h * top_expand))
    y2 = int(min(height, y + h + h * bottom_expand))

    cropped = img[y1:y2, x1:x2]

    pil_image = Image.fromarray(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB))

    # Background removal
    output = remove(pil_image)

    # Resize for CLIP
    output = output.resize((224, 224))

    return "Success ✅", output


# Gradio UI
with gr.Blocks() as demo:

    with gr.Tab("Full Pipeline"):
        mode_selector = gr.Dropdown(
            choices=["Anime", "Human"],
            value="Anime",
            label="Detection Mode"
        )

        img_input = gr.Image(type="pil")
        embedding_output = gr.JSON()

        run_btn = gr.Button("Run Pipeline")

        def run_pipeline(img, mode):
            status_msg, processed_img = process_image(img, mode)
            if status_msg != "Success ✅":
                return {"embedding": None}

            embedding = get_image_embedding(processed_img)
            return embedding

        run_btn.click(
            run_pipeline,
            inputs=[img_input, mode_selector],
            outputs=[embedding_output]
        )

    with gr.Tab("Embedding Only"):
        img_input2 = gr.Image(type="pil")
        embedding_output2 = gr.JSON()
        run_btn2 = gr.Button("Get Embedding")

        def get_embedding_only(img):
            embedding = get_image_embedding(img)
            return embedding

        run_btn2.click(
            get_embedding_only,
            inputs=img_input2,
            outputs=embedding_output2
        )


print("Launching demo...")
demo.queue(max_size=15).launch()