scanning / app.py
eho69's picture
Update app.py
753c2e3 verified
# import gradio as gr
# import cv2
# import numpy as np
# from PIL import Image
# import torch
# from transformers import DetrImageProcessor, DetrForObjectDetection
# import os
# try:
# import spaces
# except ImportError:
# # Mocking spaces for local development
# class spaces:
# @staticmethod
# def GPU(func):
# def wrapper(*args, **kwargs):
# return func(*args, **kwargs)
# return wrapper
# # Load DETR model for object detection
# try:
# processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
# model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
# # Warm up the model
# dummy_input = processor(images=np.zeros((100, 100, 3)), return_tensors="pt")
# with torch.no_grad():
# _ = model(**dummy_input)
# except Exception as e:
# print(f"Error loading model: {e}")
# processor = None
# model = None
# @spaces.GPU
# def detect_objects(image):
# if model is None or processor is None:
# return image, "Model failed to load. Please check logs."
# # Convert to PIL if it's already a numpy array (to avoid rescaling warnings in processor)
# if isinstance(image, np.ndarray):
# image_pil = Image.fromarray(image)
# else:
# image_pil = image
# # Ensure it's RGB
# if image_pil.mode != "RGB":
# image_pil = image_pil.convert("RGB")
# # Prepare image for the model
# inputs = processor(images=image_pil, return_tensors="pt")
# with torch.no_grad():
# outputs = model(**inputs)
# # Post-process outputs
# target_sizes = torch.tensor([image_pil.size[::-1]]) # (height, width)
# results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.7)[0]
# # Draw bounding boxes on the image (using numpy for cv2)
# annotated_image = np.array(image_pil)
# detection_info = []
# for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
# box = [round(i, 2) for i in box.tolist()]
# label_name = model.config.id2label[label.item()]
# confidence = round(score.item(), 3)
# # Draw rectangle
# x1, y1, x2, y2 = map(int, box)
# cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
# # Add label
# label_text = f"{label_name}: {confidence}"
# cv2.putText(annotated_image, label_text, (x1, y1 - 10),
# cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
# detection_info.append(f"• {label_name} (confidence: {confidence})")
# # Create summary text
# summary = f"**Detected {len(detection_info)} object(s):**\n\n" + "\n".join(detection_info) if detection_info else "No objects detected with confidence > 0.7"
# return annotated_image, summary
# def scan_edges(image):
# """
# Simple edge detection using OpenCV
# """
# # Convert PIL image to numpy array
# if isinstance(image, Image.Image):
# image = np.array(image)
# # Convert to grayscale
# gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
# # Apply Gaussian blur
# blurred = cv2.GaussianBlur(gray, (5, 5), 0)
# # enhanced = clahe.apply(gray_image)
# # Edge detection using Canny
# edges = cv2.Canny(blurred, 50, 150)
# # Convert back to RGB for display
# edges_rgb = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)
# return edges_rgb
# # def scan_edges(image):
# # # --- 1. Convert PIL image to numpy array if needed ---
# # if isinstance(image, Image.Image):
# # image = np.array(image)
# # # --- 2. Convert to grayscale ---
# # gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# # # --- 3. Gaussian blur BEFORE CLAHE to reduce high-freq noise ---
# # # that CLAHE would otherwise amplify
# # blurred = cv2.GaussianBlur(gray, (5, 5), 0)
# # # --- 4. Apply CLAHE on the blurred image ---
# # # clipLimit=2.0 → controls noise amplification in flat regions
# # # tileGridSize → 8x8 tiles work well for engine block scale features;
# # # increase (e.g. 16x16) if bearing saddles are small
# # # relative to full image resolution
# # clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
# # enhanced = clahe.apply(blurred)
# # # --- 5. Canny edge detection on CLAHE-enhanced image ---
# # # Lower threshold (30) helps recover weak edges in shadow regions
# # # Upper threshold (120) keeps strong structural edges
# # # Tune these if you get too much noise or missing arcs
# # edges = cv2.Canny(enhanced, 30, 120)
# # # --- 6. Convert single-channel edge map back to RGB for display ---
# # edges_rgb = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)
# # return edges_rgb
# def process_image(image, mode):
# """
# Process image based on selected mode
# """
# if image is None:
# return None, "Please upload an image."
# if mode == "Object Detection":
# return detect_objects(image)
# else: # Edge Detection
# edges = scan_edges(image)
# return edges, "Edge detection completed"
# # Create Gradio interface
# with gr.Blocks(title="Object Scanner") as demo:
# gr.Markdown("Detect objects or scan edges using your camera or uploaded images")
# with gr.Tabs():
# with gr.TabItem(" Image Scanner"):
# with gr.Row():
# with gr.Column():
# input_image = gr.Image(
# sources=["upload", "webcam"],
# type="pil",
# label="Upload or Capture Image"
# )
# mode = gr.Radio(
# choices=["Object Detection", "Edge Detection"],
# value="Object Detection",
# label="Scanning Mode"
# )
# scan_btn = gr.Button(" Process Image", variant="primary")
# with gr.Column():
# output_image = gr.Image(type="numpy", label="Processed Result")
# output_text = gr.Markdown(label="Detection Results")
# # Examples
# if os.path.exists("examples"):
# gr.Examples(
# examples=[
# ["examples/sample1.jpg", "Object Detection"],
# ["examples/sample2.jpg", "Edge Detection"],
# ],
# inputs=[input_image, mode],
# outputs=[output_image, output_text],
# fn=process_image,
# cache_examples=False,
# )
# with gr.TabItem("🎥 Live Edge Scan"):
# gr.Markdown("### Real-time Edge Detection")
# with gr.Row():
# with gr.Column():
# camera_input = gr.Image(
# sources=["webcam"],
# streaming=True,
# type="numpy",
# label="Live Feed"
# )
# with gr.Column():
# camera_output = gr.Image(
# label="Edge Stream"
# )
# # Live stream logic for edges
# camera_input.stream(
# fn=scan_edges,
# inputs=camera_input,
# outputs=camera_output
# )
# # Static scan logic
# scan_btn.click(
# fn=process_image,
# inputs=[input_image, mode],
# outputs=[output_image, output_text]
# )
# if __name__ == "__main__":
# demo.launch(
# server_name="0.0.0.0",
# server_port=7860,
# theme=gr.themes.Soft(),
# ssr_mode=False
# )
import gradio as gr
import cv2
import numpy as np
from PIL import Image
import torch
from transformers import DetrImageProcessor, DetrForObjectDetection
import os
try:
import spaces
except ImportError:
# Mocking spaces for local development
class spaces:
@staticmethod
def GPU(func):
def wrapper(*args, **kwargs):
return func(*args, **kwargs)
return wrapper
# Load DETR model for object detection
try:
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
# Warm up the model
dummy_input = processor(images=np.zeros((100, 100, 3)), return_tensors="pt")
with torch.no_grad():
_ = model(**dummy_input)
except Exception as e:
print(f"Error loading model: {e}")
processor = None
model = None
@spaces.GPU
def detect_objects(image):
if model is None or processor is None:
return image, "Model failed to load. Please check logs."
# Convert to PIL if it's already a numpy array (to avoid rescaling warnings in processor)
if isinstance(image, np.ndarray):
image_pil = Image.fromarray(image)
else:
image_pil = image
# Ensure it's RGB
if image_pil.mode != "RGB":
image_pil = image_pil.convert("RGB")
# Prepare image for the model
inputs = processor(images=image_pil, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
# Post-process outputs
target_sizes = torch.tensor([image_pil.size[::-1]]) # (height, width)
results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.7)[0]
# Draw bounding boxes on the image (using numpy for cv2)
annotated_image = np.array(image_pil)
detection_info = []
for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
box = [round(i, 2) for i in box.tolist()]
label_name = model.config.id2label[label.item()]
confidence = round(score.item(), 3)
# Draw rectangle
x1, y1, x2, y2 = map(int, box)
cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
# Add label
label_text = f"{label_name}: {confidence}"
cv2.putText(annotated_image, label_text, (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
detection_info.append(f"• {label_name} (confidence: {confidence})")
# Create summary text
summary = f"**Detected {len(detection_info)} object(s):**\n\n" + "\n".join(detection_info) if detection_info else "No objects detected with confidence > 0.7"
return annotated_image, summary
def scan_edges(image):
"""
Edge detection with CLAHE preprocessing to recover edges lost in
shadowed regions (e.g. bearing saddle arcs on engine blocks).
Pipeline:
RGB → Grayscale → Gaussian Blur → CLAHE → Canny → Closing
"""
# 1. Convert PIL image to numpy array if needed
if isinstance(image, Image.Image):
image = np.array(image)
# 2. Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# 3. Gaussian blur BEFORE CLAHE — prevents CLAHE from amplifying
# surface noise on metallic/reflective parts
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
# 4. CLAHE — boosts local contrast in dark/shadowed regions
# clipLimit=2.0 : caps noise amplification in uniform areas
# tileGridSize : 8x8 tiles suit engine-block scale features;
# use (16,16) if bearing saddles are small in frame
clahe = cv2.createCLAHE(clipLimit=9.9, tileGridSize=(8, 8))
enhanced = clahe.apply(blurred)
# 5. Canny on CLAHE-enhanced image
# Thresholds lowered vs original (50,150) → (30,120) so weak edges
# in shadowed saddle arcs are no longer missed
edges = cv2.Canny(enhanced, 30, 120)
# 6. Morphological Closing Operation - Fills gaps in broken edges
# STEP 1: Dilation (Grow phase) - Expands white pixels to connect gaps
# STEP 2: Erosion (Shrink phase) - Shrinks back to original thickness
# kernel size 3x3 is suitable for small gaps; use 5x5 for larger gaps
kernel = np.ones((3, 3), np.uint8)
closed_edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=1)
# 7. Convert back to RGB for Gradio display
edges_rgb = cv2.cvtColor(closed_edges, cv2.COLOR_GRAY2RGB)
return edges_rgb
def extract_green_channel(image):
"""
Extract the green channel from an RGB image.
Green channel often provides good contrast for vegetation and certain materials.
"""
# 1. Convert PIL image to numpy array if needed
if isinstance(image, Image.Image):
image = np.array(image)
# 2. Extract green channel (index 1 in RGB)
green_channel = image[:, :, 1]
# 3. Convert to RGB for display (all channels = green)
green_rgb = cv2.cvtColor(green_channel, cv2.COLOR_GRAY2RGB)
return green_rgb
def green_bilateral_edges(image):
"""
Edge detection using green channel with bilateral filtering.
Pipeline:
RGB → Green Channel → Bilateral Filter → Canny Edge Detection → Closing
Bilateral filtering preserves edges while reducing noise, making it ideal
for edge detection on noisy or textured surfaces.
"""
# 1. Convert PIL image to numpy array if needed
if isinstance(image, Image.Image):
image = np.array(image)
# 2. Extract green channel
green_channel = image[:, :, 1]
# 3. Apply bilateral filter
# d=9 : diameter of pixel neighborhood
# sigmaColor=75 : filter sigma in the color space (larger = more colors mixed)
# sigmaSpace=75 : filter sigma in the coordinate space (larger = farther pixels influence)
# Bilateral filtering smooths flat regions while preserving sharp edges
bilateral = cv2.bilateralFilter(green_channel, d=9, sigmaColor=75, sigmaSpace=75)
# 4. Apply Canny edge detection
# Using moderate thresholds for balanced edge detection
edges = cv2.Canny(bilateral, 50, 150)
# 5. Morphological Closing Operation - Fills gaps in broken edges
# STEP 1: Dilation (Grow phase) - Expands white pixels to connect gaps
# STEP 2: Erosion (Shrink phase) - Shrinks back to original thickness
# kernel size 3x3 is suitable for small gaps; use 5x5 for larger gaps
kernel = np.ones((3, 3), np.uint8)
closed_edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=1)
# 6. Convert back to RGB for Gradio display
edges_rgb = cv2.cvtColor(closed_edges, cv2.COLOR_GRAY2RGB)
return edges_rgb
def process_image(image, mode):
"""
Process image based on selected mode
"""
if image is None:
return None, "Please upload an image."
if mode == "Object Detection":
return detect_objects(image)
elif mode == "Edge Detection":
edges = scan_edges(image)
return edges, "Edge detection completed (CLAHE + Canny + Closing)"
elif mode == "Green Channel":
green = extract_green_channel(image)
return green, "Green channel extracted"
elif mode == "Green + Bilateral Edges":
edges = green_bilateral_edges(image)
return edges, "Edge detection completed (Green Channel + Bilateral Filter + Canny + Closing)"
else:
return image, "Unknown mode selected"
def process_live_stream(image, mode):
"""
Process live stream based on selected mode
"""
if image is None:
return None
if mode == "Edge Detection":
return scan_edges(image)
elif mode == "Green Channel":
return extract_green_channel(image)
elif mode == "Green + Bilateral Edges":
return green_bilateral_edges(image)
else:
return scan_edges(image) # Default to edge detection
# Create Gradio interface
with gr.Blocks(title="Object Scanner") as demo:
gr.Markdown("# 🔍 Object Scanner\nDetect objects, scan edges, or extract green channel using your camera or uploaded images")
with gr.Tabs():
with gr.TabItem("📷 Image Scanner"):
with gr.Row():
with gr.Column():
input_image = gr.Image(
sources=["upload", "webcam"],
type="pil",
label="Upload or Capture Image"
)
mode = gr.Radio(
choices=[
"Object Detection",
"Edge Detection",
"Green Channel",
"Green + Bilateral Edges"
],
value="Object Detection",
label="Scanning Mode"
)
scan_btn = gr.Button("🔍 Process Image", variant="primary")
with gr.Column():
output_image = gr.Image(type="numpy", label="Processed Result")
output_text = gr.Markdown(label="Detection Results")
# Examples
if os.path.exists("examples"):
gr.Examples(
examples=[
["examples/sample1.jpg", "Object Detection"],
["examples/sample2.jpg", "Edge Detection"],
["examples/sample1.jpg", "Green Channel"],
["examples/sample2.jpg", "Green + Bilateral Edges"],
],
inputs=[input_image, mode],
outputs=[output_image, output_text],
fn=process_image,
cache_examples=False,
)
with gr.TabItem("🎥 Live Processing"):
gr.Markdown("### Real-time Image Processing")
with gr.Row():
with gr.Column():
camera_input = gr.Image(
sources=["webcam"],
streaming=True,
type="numpy",
label="Live Feed"
)
live_mode = gr.Radio(
choices=[
"Edge Detection",
"Green Channel",
"Green + Bilateral Edges"
],
value="Edge Detection",
label="Processing Mode"
)
with gr.Column():
camera_output = gr.Image(
label="Processed Stream"
)
# Live stream logic
camera_input.stream(
fn=lambda img, mode: process_live_stream(img, mode),
inputs=[camera_input, live_mode],
outputs=camera_output
)
# Static scan logic
scan_btn.click(
fn=process_image,
inputs=[input_image, mode],
outputs=[output_image, output_text]
)
# Info section
with gr.Accordion("ℹ️ Mode Information", open=False):
gr.Markdown("""
### Available Modes:
**Object Detection** - Uses DETR model to detect and label objects with bounding boxes
**Edge Detection** - CLAHE-enhanced Canny edge detection with morphological closing to fill gaps
- Pipeline: Grayscale → Gaussian Blur → CLAHE → Canny → Closing
- Closing operation connects broken edges (useful for interrupted arcs and curves)
**Green Channel** - Extracts the green channel, useful for vegetation and certain materials
**Green + Bilateral Edges** - Combines green channel extraction with bilateral filtering and closing
- Pipeline: Green Channel → Bilateral Filter → Canny → Closing
- Bilateral filtering preserves edges while reducing noise
- Closing fills gaps in broken edges, ideal for textured surfaces
### Morphological Closing:
- **Step 1 (Dilation)**: Expands white pixels to connect nearby edges
- **Step 2 (Erosion)**: Shrinks pixels back to original size while keeping gaps filled
- **Result**: Broken arcs and interrupted lines become continuous edges
""")
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
theme=gr.themes.Soft(),
ssr_mode=False
)