wzzanthony7 commited on
Commit
798edfa
·
verified ·
1 Parent(s): 72e376f
Files changed (1) hide show
  1. app.py +170 -8
app.py CHANGED
@@ -1,10 +1,132 @@
1
  import gradio as gr
2
  import os
3
  import sys
 
4
  from PIL import Image,ImageDraw
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  def drawWithAllBox_info(pil_image, box_info):
7
- colors = ['red', 'green', 'blue', 'orange', 'purple', 'cyan', 'magenta', 'yellow', 'brown', 'pink', 'gray', 'lime']
8
  draw = ImageDraw.Draw(pil_image)
9
  for box in box_info:
10
  x, y, w, h = box['x'], box['y'], box['width'], box['height']
@@ -27,12 +149,52 @@ def draw_image_with_boxes(image):
27
  pil_image = image.copy() if hasattr(image, 'copy') else Image.fromarray(image)
28
  return drawWithAllBox_info(pil_image, test_box_info)
29
 
30
- demo = gr.Interface(
31
- fn=draw_image_with_boxes,
32
- inputs=gr.Image(type="pil", label="Upload Image"),
33
- outputs=gr.Image(type="pil", label="Image with Boxes"),
34
- title="Fraction & Tick Detector",
35
- description="Upload an image and see the detected boxes drawn on it."
36
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  demo.launch()
 
1
  import gradio as gr
2
  import os
3
  import sys
4
+ import json
5
  from PIL import Image,ImageDraw
6
+ classes = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'tick', 'fraction']
7
+ API_KEY = os.environ.get("ROBOFLOW_API_KEY")
8
+ def packFilterBoxInfo(filter_box_info):
9
+ # 数字类别映射
10
+ digit_classes = {
11
+ 'one': '1', 'two': '2', 'three': '3', 'four': '4', 'five': '5',
12
+ 'six': '6', 'seven': '7', 'eight': '8', 'nine': '9', 'zero': '0'
13
+ }
14
+ fraction_boxes = []
15
+ number_boxes = []
16
+
17
+ for box in filter_box_info:
18
+ if box['class'] == 'fraction':
19
+ fraction_boxes.append(box)
20
+ elif box['class'] in digit_classes:
21
+ number_boxes.append(box)
22
+
23
+ fraction_boxes.sort(key=lambda x: x['x'] - x['width']/2)
24
 
25
+ fraction_values = []
26
+
27
+ for frac_box in fraction_boxes:
28
+ # fraction框的边界
29
+ frac_x = frac_box['x']
30
+ frac_y = frac_box['y']
31
+ frac_width = frac_box['width']
32
+ frac_height = frac_box['height']
33
+
34
+ # 定义分子分母的区域
35
+ numerator_numbers = []
36
+ denominator_numbers = []
37
+
38
+ # 遍历所有数字,判断是否在当前fraction框内
39
+ for num_box in number_boxes:
40
+ # 检查数字是否在fraction框的水平范围内
41
+ if (frac_x - frac_width/2 <= num_box['x'] <= frac_x + frac_width/2 and frac_y - frac_height/2 <= num_box['y'] <= frac_y + frac_height/2):
42
+ # 获取数字值
43
+ digit = digit_classes[num_box['class']]
44
+
45
+ # 根据y坐标判断是分子还是分母
46
+ if num_box['y'] < frac_y: # 在分数线上方
47
+ numerator_numbers.append((num_box['x'], num_box['y'], num_box['width'], num_box['height'], digit))
48
+ else: # 在分数线下方
49
+ denominator_numbers.append((num_box['x'], num_box['y'], num_box['width'], num_box['height'], digit))
50
+
51
+ # 按x坐标排序
52
+ numerator_numbers.sort(key=lambda x: x[0]-x[2]/2)
53
+ denominator_numbers.sort(key=lambda x: x[0]-x[2]/2)
54
+
55
+ # 提取排序后的数字
56
+ numerator = ''.join(digit for _, _, _, _, digit in numerator_numbers)
57
+ denominator = ''.join(digit for _, _, _, _, digit in denominator_numbers)
58
+ if numerator == "":
59
+ numerator = "?"
60
+ if denominator == "":
61
+ denominator = "?"
62
+ fraction_values.append(f"{numerator}/{denominator}")
63
+
64
+ return fraction_values
65
+ #Assume its coordinate are top-left, bottom-right
66
+ def getOverlap(box1, box2):
67
+ b1_x1, b1_y1, b1_x2, b1_y2 = box1
68
+ b2_x1, b2_y1, b2_x2, b2_y2 = box2
69
+ inter_x1 = max(b1_x1, b2_x1)
70
+ inter_y1= max(b1_y1, b2_y1)
71
+ inter_x2 = min(b1_x2, b2_x2)
72
+ inter_y2 = min(b1_y2, b2_y2)
73
+ if inter_x1 < inter_x2 and inter_y1 < inter_y2:
74
+ inter_area = (inter_x2 - inter_x1) * (inter_y2 - inter_y1)
75
+ else:
76
+ return 0.0
77
+ b1_area = abs(b1_x2 - b1_x1) * abs(b1_y2 - b1_y1)
78
+ b2_area = abs(b2_x2 - b2_x1) * abs(b2_y2 - b2_y1)
79
+ union_area = b2_area + b1_area - inter_area
80
+
81
+ return inter_area / union_area
82
+
83
+ def generate_textual_description(box_info):
84
+ fraction_values = packFilterBoxInfo(box_info)
85
+ # Create a dictionary to store information by class ID
86
+ class_summary = {c: [] for c in classes}
87
+ for box in box_info:
88
+ c_name = box['class']
89
+ if c_name not in class_summary:
90
+ continue
91
+ else:
92
+ x, y, w, h = box['x'], box['y'], box['width'], box['height']
93
+ class_summary[c_name].append([x-w/2, y-h/2, x+w/2, y+h/2])
94
+ # Generate a summary for each class
95
+ #the index of the left one
96
+ class_summary['zero'].sort()
97
+ left_most_zero_cor = class_summary['zero'][0]
98
+ left_zero = True
99
+ class_summary['one'].sort()
100
+ right_most_one_cor = class_summary['one'][-1]
101
+ right_one = True
102
+ for fra_box in class_summary['fraction']:
103
+ if getOverlap(fra_box, left_most_zero_cor) >= 0.5:
104
+ left_zero = False
105
+ if getOverlap(fra_box, right_most_one_cor) >= 0.5:
106
+ left_one = False
107
+ textual_description = ""
108
+ textual_description += "The key elements are interpreted via visual translator. Their coordinates are represented as outlined boxes (top-left, bottom-right)"
109
+ #print(f"The key elements are interpreted via visual translator. Their coordinates are represented as outlined boxes (top-left, bottom-right)")
110
+ if left_zero:
111
+ textual_description += f"There is a zero on the left side of the number line. Its coordinate is ({(left_most_zero_cor[0]:.2f, left_most_zero_cor[1]:.2f), (left_most_zero_cor[2]:.2f, left_most_zero_cor[3]:.2f)})"
112
+ if right_one:
113
+ textual_description += f"There is a one on the right side of the number line. Its coordinate is ({(right_most_one_cor[0]:.2f, right_most_one_cor[1]:.2f), (right_most_one_cor[2]:.2f, right_most_one_cor[3]:.2f)})"
114
+ present_classes = ['fraction', 'tick']
115
+ for cid, boxes in class_summary.items():
116
+ class_name = cid
117
+ if class_name not in present_classes:
118
+ continue
119
+ count = len(boxes)
120
+ boxes.sort(key=lambda x: x[0]) # it has been the x of the top-left corner
121
+ if count > 0:
122
+ textual_description += f"\nThere are {count} {class_name}s. Their coordinates are: "
123
+ for box in boxes:
124
+ textual_description += f"{{({box[0]:.2f}, {box[1]:.2f}), ({box[2]:.2f}, {box[3]:.2f})}, }"
125
+ if (class_name == "fraction"):
126
+ textual_description += f"\nThe fraction numbers from left to right are: {fraction_values}. "
127
+ return textual_description
128
  def drawWithAllBox_info(pil_image, box_info):
129
+ colors = ['red', 'green', 'blue', 'orange', 'purple', 'cyan', 'magenta', 'yellow', 'brown', 'pink', 'gray', 'lime', 'navy']
130
  draw = ImageDraw.Draw(pil_image)
131
  for box in box_info:
132
  x, y, w, h = box['x'], box['y'], box['width'], box['height']
 
149
  pil_image = image.copy() if hasattr(image, 'copy') else Image.fromarray(image)
150
  return drawWithAllBox_info(pil_image, test_box_info)
151
 
152
+ def process_image(image):
153
+ if image is None:
154
+ return None, "", None
155
+ pil_image = image.copy() if hasattr(image, 'copy') else Image.fromarray(image)
156
+ boxed_img = drawWithAllBox_info(pil_image, test_box_info)
157
+ textual = generate_textual_description(test_box_info)
158
+ # kept_box_info assumed to be test_box_info for this demo
159
+ json_str = json.dumps(test_box_info, indent=2)
160
+ return boxed_img, textual, json_str
161
+
162
+ def download_json(json_str):
163
+ # Save JSON to a temp file for download
164
+ import tempfile
165
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w") as f:
166
+ f.write(json_str)
167
+ temp_path = f.name
168
+ return gr.File.update(value=temp_path, visible=True)
169
+
170
+ with gr.Blocks() as demo:
171
+ gr.Markdown("# Fraction & Tick Detector\nUpload an image to see detected boxes, textual description, and download the box info as JSON.")
172
+ with gr.Row():
173
+ with gr.Column():
174
+ img_input = gr.Image(type="pil", label="Upload Image")
175
+ run_btn = gr.Button("Run Detection")
176
+ with gr.Column():
177
+ img_out = gr.Image(type="pil", label="Image with Boxes")
178
+ text_out = gr.Textbox(label="Textual Description", lines=8)
179
+ json_file = gr.File(label="Download Box Info (JSON)", visible=False)
180
+ download_btn = gr.Button("Download Box Info as JSON")
181
+ # State to hold the JSON string
182
+ json_state = gr.State("")
183
+
184
+ def _process(image):
185
+ boxed_img, textual, json_str = process_image(image)
186
+ return boxed_img, textual, json_str, gr.File.update(visible=False)
187
+
188
+ run_btn.click(_process, inputs=img_input, outputs=[img_out, text_out, json_state, json_file])
189
+
190
+ def _download(json_str):
191
+ # Save JSON to a temp file for download
192
+ import tempfile
193
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w") as f:
194
+ f.write(json_str)
195
+ temp_path = f.name
196
+ return gr.File.update(value=temp_path, visible=True)
197
+
198
+ download_btn.click(_download, inputs=json_state, outputs=json_file)
199
 
200
  demo.launch()