Spaces:

eho69
/

scanning

Sleeping

App Files Files Community

scanning / app.py

eho69

Update app.py

753c2e3 verified 2 months ago

raw

history blame contribute delete

20.9 kB

	# import gradio as gr
	# import cv2
	# import numpy as np
	# from PIL import Image
	# import torch
	# from transformers import DetrImageProcessor, DetrForObjectDetection
	# import os

	# try:
	# import spaces
	# except ImportError:
	# # Mocking spaces for local development
	# class spaces:
	# @staticmethod
	# def GPU(func):
	# def wrapper(args, *kwargs):
	# return func(args, *kwargs)
	# return wrapper

	# # Load DETR model for object detection
	# try:
	# processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
	# model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
	# # Warm up the model
	# dummy_input = processor(images=np.zeros((100, 100, 3)), return_tensors="pt")
	# with torch.no_grad():
	# _ = model(**dummy_input)
	# except Exception as e:
	# print(f"Error loading model: {e}")
	# processor = None
	# model = None

	# @spaces.GPU
	# def detect_objects(image):

	# if model is None or processor is None:
	# return image, "Model failed to load. Please check logs."

	# # Convert to PIL if it's already a numpy array (to avoid rescaling warnings in processor)
	# if isinstance(image, np.ndarray):
	# image_pil = Image.fromarray(image)
	# else:
	# image_pil = image

	# # Ensure it's RGB
	# if image_pil.mode != "RGB":
	# image_pil = image_pil.convert("RGB")

	# # Prepare image for the model
	# inputs = processor(images=image_pil, return_tensors="pt")

	# with torch.no_grad():
	# outputs = model(**inputs)

	# # Post-process outputs
	# target_sizes = torch.tensor([image_pil.size[::-1]]) # (height, width)
	# results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.7)[0]

	# # Draw bounding boxes on the image (using numpy for cv2)
	# annotated_image = np.array(image_pil)

	# detection_info = []

	# for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
	# box = [round(i, 2) for i in box.tolist()]
	# label_name = model.config.id2label[label.item()]
	# confidence = round(score.item(), 3)

	# # Draw rectangle
	# x1, y1, x2, y2 = map(int, box)
	# cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2)

	# # Add label
	# label_text = f"{label_name}: {confidence}"
	# cv2.putText(annotated_image, label_text, (x1, y1 - 10),
	# cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

	# detection_info.append(f"• {label_name} (confidence: {confidence})")

	# # Create summary text
	# summary = f"Detected {len(detection_info)} object(s):\n\n" + "\n".join(detection_info) if detection_info else "No objects detected with confidence > 0.7"

	# return annotated_image, summary

	# def scan_edges(image):
	# """
	# Simple edge detection using OpenCV
	# """
	# # Convert PIL image to numpy array
	# if isinstance(image, Image.Image):
	# image = np.array(image)

	# # Convert to grayscale
	# gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

	# clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))

	# # Apply Gaussian blur
	# blurred = cv2.GaussianBlur(gray, (5, 5), 0)

	# # enhanced = clahe.apply(gray_image)

	# # Edge detection using Canny
	# edges = cv2.Canny(blurred, 50, 150)

	# # Convert back to RGB for display
	# edges_rgb = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)

	# return edges_rgb

	# # def scan_edges(image):

	# # # --- 1. Convert PIL image to numpy array if needed ---
	# # if isinstance(image, Image.Image):
	# # image = np.array(image)

	# # # --- 2. Convert to grayscale ---
	# # gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

	# # # --- 3. Gaussian blur BEFORE CLAHE to reduce high-freq noise ---
	# # # that CLAHE would otherwise amplify
	# # blurred = cv2.GaussianBlur(gray, (5, 5), 0)

	# # # --- 4. Apply CLAHE on the blurred image ---
	# # # clipLimit=2.0 → controls noise amplification in flat regions
	# # # tileGridSize → 8x8 tiles work well for engine block scale features;
	# # # increase (e.g. 16x16) if bearing saddles are small
	# # # relative to full image resolution
	# # clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
	# # enhanced = clahe.apply(blurred)

	# # # --- 5. Canny edge detection on CLAHE-enhanced image ---
	# # # Lower threshold (30) helps recover weak edges in shadow regions
	# # # Upper threshold (120) keeps strong structural edges
	# # # Tune these if you get too much noise or missing arcs
	# # edges = cv2.Canny(enhanced, 30, 120)

	# # # --- 6. Convert single-channel edge map back to RGB for display ---
	# # edges_rgb = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)

	# # return edges_rgb

	# def process_image(image, mode):
	# """
	# Process image based on selected mode
	# """
	# if image is None:
	# return None, "Please upload an image."

	# if mode == "Object Detection":
	# return detect_objects(image)
	# else: # Edge Detection
	# edges = scan_edges(image)
	# return edges, "Edge detection completed"

	# # Create Gradio interface
	# with gr.Blocks(title="Object Scanner") as demo:
	# gr.Markdown("Detect objects or scan edges using your camera or uploaded images")

	# with gr.Tabs():
	# with gr.TabItem(" Image Scanner"):
	# with gr.Row():
	# with gr.Column():
	# input_image = gr.Image(
	# sources=["upload", "webcam"],
	# type="pil",
	# label="Upload or Capture Image"
	# )
	# mode = gr.Radio(
	# choices=["Object Detection", "Edge Detection"],
	# value="Object Detection",
	# label="Scanning Mode"
	# )
	# scan_btn = gr.Button(" Process Image", variant="primary")

	# with gr.Column():
	# output_image = gr.Image(type="numpy", label="Processed Result")
	# output_text = gr.Markdown(label="Detection Results")

	# # Examples
	# if os.path.exists("examples"):
	# gr.Examples(
	# examples=[
	# ["examples/sample1.jpg", "Object Detection"],
	# ["examples/sample2.jpg", "Edge Detection"],
	# ],
	# inputs=[input_image, mode],
	# outputs=[output_image, output_text],
	# fn=process_image,
	# cache_examples=False,
	# )

	# with gr.TabItem("🎥 Live Edge Scan"):
	# gr.Markdown("### Real-time Edge Detection")
	# with gr.Row():
	# with gr.Column():
	# camera_input = gr.Image(
	# sources=["webcam"],
	# streaming=True,
	# type="numpy",
	# label="Live Feed"
	# )
	# with gr.Column():
	# camera_output = gr.Image(
	# label="Edge Stream"
	# )

	# # Live stream logic for edges
	# camera_input.stream(
	# fn=scan_edges,
	# inputs=camera_input,
	# outputs=camera_output
	# )

	# # Static scan logic
	# scan_btn.click(
	# fn=process_image,
	# inputs=[input_image, mode],
	# outputs=[output_image, output_text]
	# )

	# if __name__ == "__main__":
	# demo.launch(
	# server_name="0.0.0.0",
	# server_port=7860,
	# theme=gr.themes.Soft(),
	# ssr_mode=False
	# )

	import gradio as gr
	import cv2
	import numpy as np
	from PIL import Image
	import torch
	from transformers import DetrImageProcessor, DetrForObjectDetection
	import os

	try:
	import spaces
	except ImportError:
	# Mocking spaces for local development
	class spaces:
	@staticmethod
	def GPU(func):
	def wrapper(args, *kwargs):
	return func(args, *kwargs)
	return wrapper

	# Load DETR model for object detection
	try:
	processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
	model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
	# Warm up the model
	dummy_input = processor(images=np.zeros((100, 100, 3)), return_tensors="pt")
	with torch.no_grad():
	_ = model(**dummy_input)
	except Exception as e:
	print(f"Error loading model: {e}")
	processor = None
	model = None

	@spaces.GPU
	def detect_objects(image):

	if model is None or processor is None:
	return image, "Model failed to load. Please check logs."

	# Convert to PIL if it's already a numpy array (to avoid rescaling warnings in processor)
	if isinstance(image, np.ndarray):
	image_pil = Image.fromarray(image)
	else:
	image_pil = image

	# Ensure it's RGB
	if image_pil.mode != "RGB":
	image_pil = image_pil.convert("RGB")

	# Prepare image for the model
	inputs = processor(images=image_pil, return_tensors="pt")

	with torch.no_grad():
	outputs = model(**inputs)

	# Post-process outputs
	target_sizes = torch.tensor([image_pil.size[::-1]]) # (height, width)
	results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.7)[0]

	# Draw bounding boxes on the image (using numpy for cv2)
	annotated_image = np.array(image_pil)

	detection_info = []

	for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
	box = [round(i, 2) for i in box.tolist()]
	label_name = model.config.id2label[label.item()]
	confidence = round(score.item(), 3)

	# Draw rectangle
	x1, y1, x2, y2 = map(int, box)
	cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2)

	# Add label
	label_text = f"{label_name}: {confidence}"
	cv2.putText(annotated_image, label_text, (x1, y1 - 10),
	cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

	detection_info.append(f"• {label_name} (confidence: {confidence})")

	# Create summary text
	summary = f"Detected {len(detection_info)} object(s):\n\n" + "\n".join(detection_info) if detection_info else "No objects detected with confidence > 0.7"

	return annotated_image, summary


	def scan_edges(image):
	"""
	Edge detection with CLAHE preprocessing to recover edges lost in
	shadowed regions (e.g. bearing saddle arcs on engine blocks).
	Pipeline:
	RGB → Grayscale → Gaussian Blur → CLAHE → Canny → Closing
	"""
	# 1. Convert PIL image to numpy array if needed
	if isinstance(image, Image.Image):
	image = np.array(image)

	# 2. Convert to grayscale
	gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

	# 3. Gaussian blur BEFORE CLAHE — prevents CLAHE from amplifying
	# surface noise on metallic/reflective parts
	blurred = cv2.GaussianBlur(gray, (5, 5), 0)

	# 4. CLAHE — boosts local contrast in dark/shadowed regions
	# clipLimit=2.0 : caps noise amplification in uniform areas
	# tileGridSize : 8x8 tiles suit engine-block scale features;
	# use (16,16) if bearing saddles are small in frame
	clahe = cv2.createCLAHE(clipLimit=9.9, tileGridSize=(8, 8))
	enhanced = clahe.apply(blurred)

	# 5. Canny on CLAHE-enhanced image
	# Thresholds lowered vs original (50,150) → (30,120) so weak edges
	# in shadowed saddle arcs are no longer missed
	edges = cv2.Canny(enhanced, 30, 120)

	# 6. Morphological Closing Operation - Fills gaps in broken edges
	# STEP 1: Dilation (Grow phase) - Expands white pixels to connect gaps
	# STEP 2: Erosion (Shrink phase) - Shrinks back to original thickness
	# kernel size 3x3 is suitable for small gaps; use 5x5 for larger gaps
	kernel = np.ones((3, 3), np.uint8)
	closed_edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=1)

	# 7. Convert back to RGB for Gradio display
	edges_rgb = cv2.cvtColor(closed_edges, cv2.COLOR_GRAY2RGB)

	return edges_rgb


	def extract_green_channel(image):
	"""
	Extract the green channel from an RGB image.
	Green channel often provides good contrast for vegetation and certain materials.
	"""
	# 1. Convert PIL image to numpy array if needed
	if isinstance(image, Image.Image):
	image = np.array(image)

	# 2. Extract green channel (index 1 in RGB)
	green_channel = image[:, :, 1]

	# 3. Convert to RGB for display (all channels = green)
	green_rgb = cv2.cvtColor(green_channel, cv2.COLOR_GRAY2RGB)

	return green_rgb


	def green_bilateral_edges(image):
	"""
	Edge detection using green channel with bilateral filtering.
	Pipeline:
	RGB → Green Channel → Bilateral Filter → Canny Edge Detection → Closing

	Bilateral filtering preserves edges while reducing noise, making it ideal
	for edge detection on noisy or textured surfaces.
	"""
	# 1. Convert PIL image to numpy array if needed
	if isinstance(image, Image.Image):
	image = np.array(image)

	# 2. Extract green channel
	green_channel = image[:, :, 1]

	# 3. Apply bilateral filter
	# d=9 : diameter of pixel neighborhood
	# sigmaColor=75 : filter sigma in the color space (larger = more colors mixed)
	# sigmaSpace=75 : filter sigma in the coordinate space (larger = farther pixels influence)
	# Bilateral filtering smooths flat regions while preserving sharp edges
	bilateral = cv2.bilateralFilter(green_channel, d=9, sigmaColor=75, sigmaSpace=75)

	# 4. Apply Canny edge detection
	# Using moderate thresholds for balanced edge detection
	edges = cv2.Canny(bilateral, 50, 150)

	# 5. Morphological Closing Operation - Fills gaps in broken edges
	# STEP 1: Dilation (Grow phase) - Expands white pixels to connect gaps
	# STEP 2: Erosion (Shrink phase) - Shrinks back to original thickness
	# kernel size 3x3 is suitable for small gaps; use 5x5 for larger gaps
	kernel = np.ones((3, 3), np.uint8)
	closed_edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=1)

	# 6. Convert back to RGB for Gradio display
	edges_rgb = cv2.cvtColor(closed_edges, cv2.COLOR_GRAY2RGB)

	return edges_rgb


	def process_image(image, mode):
	"""
	Process image based on selected mode
	"""
	if image is None:
	return None, "Please upload an image."

	if mode == "Object Detection":
	return detect_objects(image)
	elif mode == "Edge Detection":
	edges = scan_edges(image)
	return edges, "Edge detection completed (CLAHE + Canny + Closing)"
	elif mode == "Green Channel":
	green = extract_green_channel(image)
	return green, "Green channel extracted"
	elif mode == "Green + Bilateral Edges":
	edges = green_bilateral_edges(image)
	return edges, "Edge detection completed (Green Channel + Bilateral Filter + Canny + Closing)"
	else:
	return image, "Unknown mode selected"


	def process_live_stream(image, mode):
	"""
	Process live stream based on selected mode
	"""
	if image is None:
	return None

	if mode == "Edge Detection":
	return scan_edges(image)
	elif mode == "Green Channel":
	return extract_green_channel(image)
	elif mode == "Green + Bilateral Edges":
	return green_bilateral_edges(image)
	else:
	return scan_edges(image) # Default to edge detection


	# Create Gradio interface
	with gr.Blocks(title="Object Scanner") as demo:
	gr.Markdown("# 🔍 Object Scanner\nDetect objects, scan edges, or extract green channel using your camera or uploaded images")

	with gr.Tabs():
	with gr.TabItem("📷 Image Scanner"):
	with gr.Row():
	with gr.Column():
	input_image = gr.Image(
	sources=["upload", "webcam"],
	type="pil",
	label="Upload or Capture Image"
	)
	mode = gr.Radio(
	choices=[
	"Object Detection",
	"Edge Detection",
	"Green Channel",
	"Green + Bilateral Edges"
	],
	value="Object Detection",
	label="Scanning Mode"
	)
	scan_btn = gr.Button("🔍 Process Image", variant="primary")

	with gr.Column():
	output_image = gr.Image(type="numpy", label="Processed Result")
	output_text = gr.Markdown(label="Detection Results")

	# Examples
	if os.path.exists("examples"):
	gr.Examples(
	examples=[
	["examples/sample1.jpg", "Object Detection"],
	["examples/sample2.jpg", "Edge Detection"],
	["examples/sample1.jpg", "Green Channel"],
	["examples/sample2.jpg", "Green + Bilateral Edges"],
	],
	inputs=[input_image, mode],
	outputs=[output_image, output_text],
	fn=process_image,
	cache_examples=False,
	)

	with gr.TabItem("🎥 Live Processing"):
	gr.Markdown("### Real-time Image Processing")
	with gr.Row():
	with gr.Column():
	camera_input = gr.Image(
	sources=["webcam"],
	streaming=True,
	type="numpy",
	label="Live Feed"
	)
	live_mode = gr.Radio(
	choices=[
	"Edge Detection",
	"Green Channel",
	"Green + Bilateral Edges"
	],
	value="Edge Detection",
	label="Processing Mode"
	)
	with gr.Column():
	camera_output = gr.Image(
	label="Processed Stream"
	)

	# Live stream logic
	camera_input.stream(
	fn=lambda img, mode: process_live_stream(img, mode),
	inputs=[camera_input, live_mode],
	outputs=camera_output
	)

	# Static scan logic
	scan_btn.click(
	fn=process_image,
	inputs=[input_image, mode],
	outputs=[output_image, output_text]
	)

	# Info section
	with gr.Accordion("ℹ️ Mode Information", open=False):
	gr.Markdown("""
	### Available Modes:

	Object Detection - Uses DETR model to detect and label objects with bounding boxes

	Edge Detection - CLAHE-enhanced Canny edge detection with morphological closing to fill gaps
	- Pipeline: Grayscale → Gaussian Blur → CLAHE → Canny → Closing
	- Closing operation connects broken edges (useful for interrupted arcs and curves)

	Green Channel - Extracts the green channel, useful for vegetation and certain materials

	Green + Bilateral Edges - Combines green channel extraction with bilateral filtering and closing
	- Pipeline: Green Channel → Bilateral Filter → Canny → Closing
	- Bilateral filtering preserves edges while reducing noise
	- Closing fills gaps in broken edges, ideal for textured surfaces

	### Morphological Closing:
	- Step 1 (Dilation): Expands white pixels to connect nearby edges
	- Step 2 (Erosion): Shrinks pixels back to original size while keeping gaps filled
	- Result: Broken arcs and interrupted lines become continuous edges
	""")

	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	theme=gr.themes.Soft(),
	ssr_mode=False
	)