# import gradio as gr # import cv2 # import numpy as np # from PIL import Image # import torch # from transformers import DetrImageProcessor, DetrForObjectDetection # import os # try: # import spaces # except ImportError: # # Mocking spaces for local development # class spaces: # @staticmethod # def GPU(func): # def wrapper(*args, **kwargs): # return func(*args, **kwargs) # return wrapper # # Load DETR model for object detection # try: # processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50") # model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50") # # Warm up the model # dummy_input = processor(images=np.zeros((100, 100, 3)), return_tensors="pt") # with torch.no_grad(): # _ = model(**dummy_input) # except Exception as e: # print(f"Error loading model: {e}") # processor = None # model = None # @spaces.GPU # def detect_objects(image): # if model is None or processor is None: # return image, "Model failed to load. Please check logs." # # Convert to PIL if it's already a numpy array (to avoid rescaling warnings in processor) # if isinstance(image, np.ndarray): # image_pil = Image.fromarray(image) # else: # image_pil = image # # Ensure it's RGB # if image_pil.mode != "RGB": # image_pil = image_pil.convert("RGB") # # Prepare image for the model # inputs = processor(images=image_pil, return_tensors="pt") # with torch.no_grad(): # outputs = model(**inputs) # # Post-process outputs # target_sizes = torch.tensor([image_pil.size[::-1]]) # (height, width) # results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.7)[0] # # Draw bounding boxes on the image (using numpy for cv2) # annotated_image = np.array(image_pil) # detection_info = [] # for score, label, box in zip(results["scores"], results["labels"], results["boxes"]): # box = [round(i, 2) for i in box.tolist()] # label_name = model.config.id2label[label.item()] # confidence = round(score.item(), 3) # # Draw rectangle # x1, y1, x2, y2 = map(int, box) # cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2) # # Add label # label_text = f"{label_name}: {confidence}" # cv2.putText(annotated_image, label_text, (x1, y1 - 10), # cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) # detection_info.append(f"â€ĸ {label_name} (confidence: {confidence})") # # Create summary text # summary = f"**Detected {len(detection_info)} object(s):**\n\n" + "\n".join(detection_info) if detection_info else "No objects detected with confidence > 0.7" # return annotated_image, summary # def scan_edges(image): # """ # Simple edge detection using OpenCV # """ # # Convert PIL image to numpy array # if isinstance(image, Image.Image): # image = np.array(image) # # Convert to grayscale # gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) # clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) # # Apply Gaussian blur # blurred = cv2.GaussianBlur(gray, (5, 5), 0) # # enhanced = clahe.apply(gray_image) # # Edge detection using Canny # edges = cv2.Canny(blurred, 50, 150) # # Convert back to RGB for display # edges_rgb = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB) # return edges_rgb # # def scan_edges(image): # # # --- 1. Convert PIL image to numpy array if needed --- # # if isinstance(image, Image.Image): # # image = np.array(image) # # # --- 2. Convert to grayscale --- # # gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) # # # --- 3. Gaussian blur BEFORE CLAHE to reduce high-freq noise --- # # # that CLAHE would otherwise amplify # # blurred = cv2.GaussianBlur(gray, (5, 5), 0) # # # --- 4. Apply CLAHE on the blurred image --- # # # clipLimit=2.0 → controls noise amplification in flat regions # # # tileGridSize → 8x8 tiles work well for engine block scale features; # # # increase (e.g. 16x16) if bearing saddles are small # # # relative to full image resolution # # clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) # # enhanced = clahe.apply(blurred) # # # --- 5. Canny edge detection on CLAHE-enhanced image --- # # # Lower threshold (30) helps recover weak edges in shadow regions # # # Upper threshold (120) keeps strong structural edges # # # Tune these if you get too much noise or missing arcs # # edges = cv2.Canny(enhanced, 30, 120) # # # --- 6. Convert single-channel edge map back to RGB for display --- # # edges_rgb = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB) # # return edges_rgb # def process_image(image, mode): # """ # Process image based on selected mode # """ # if image is None: # return None, "Please upload an image." # if mode == "Object Detection": # return detect_objects(image) # else: # Edge Detection # edges = scan_edges(image) # return edges, "Edge detection completed" # # Create Gradio interface # with gr.Blocks(title="Object Scanner") as demo: # gr.Markdown("Detect objects or scan edges using your camera or uploaded images") # with gr.Tabs(): # with gr.TabItem(" Image Scanner"): # with gr.Row(): # with gr.Column(): # input_image = gr.Image( # sources=["upload", "webcam"], # type="pil", # label="Upload or Capture Image" # ) # mode = gr.Radio( # choices=["Object Detection", "Edge Detection"], # value="Object Detection", # label="Scanning Mode" # ) # scan_btn = gr.Button(" Process Image", variant="primary") # with gr.Column(): # output_image = gr.Image(type="numpy", label="Processed Result") # output_text = gr.Markdown(label="Detection Results") # # Examples # if os.path.exists("examples"): # gr.Examples( # examples=[ # ["examples/sample1.jpg", "Object Detection"], # ["examples/sample2.jpg", "Edge Detection"], # ], # inputs=[input_image, mode], # outputs=[output_image, output_text], # fn=process_image, # cache_examples=False, # ) # with gr.TabItem("đŸŽĨ Live Edge Scan"): # gr.Markdown("### Real-time Edge Detection") # with gr.Row(): # with gr.Column(): # camera_input = gr.Image( # sources=["webcam"], # streaming=True, # type="numpy", # label="Live Feed" # ) # with gr.Column(): # camera_output = gr.Image( # label="Edge Stream" # ) # # Live stream logic for edges # camera_input.stream( # fn=scan_edges, # inputs=camera_input, # outputs=camera_output # ) # # Static scan logic # scan_btn.click( # fn=process_image, # inputs=[input_image, mode], # outputs=[output_image, output_text] # ) # if __name__ == "__main__": # demo.launch( # server_name="0.0.0.0", # server_port=7860, # theme=gr.themes.Soft(), # ssr_mode=False # ) import gradio as gr import cv2 import numpy as np from PIL import Image import torch from transformers import DetrImageProcessor, DetrForObjectDetection import os try: import spaces except ImportError: # Mocking spaces for local development class spaces: @staticmethod def GPU(func): def wrapper(*args, **kwargs): return func(*args, **kwargs) return wrapper # Load DETR model for object detection try: processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50") model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50") # Warm up the model dummy_input = processor(images=np.zeros((100, 100, 3)), return_tensors="pt") with torch.no_grad(): _ = model(**dummy_input) except Exception as e: print(f"Error loading model: {e}") processor = None model = None @spaces.GPU def detect_objects(image): if model is None or processor is None: return image, "Model failed to load. Please check logs." # Convert to PIL if it's already a numpy array (to avoid rescaling warnings in processor) if isinstance(image, np.ndarray): image_pil = Image.fromarray(image) else: image_pil = image # Ensure it's RGB if image_pil.mode != "RGB": image_pil = image_pil.convert("RGB") # Prepare image for the model inputs = processor(images=image_pil, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) # Post-process outputs target_sizes = torch.tensor([image_pil.size[::-1]]) # (height, width) results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.7)[0] # Draw bounding boxes on the image (using numpy for cv2) annotated_image = np.array(image_pil) detection_info = [] for score, label, box in zip(results["scores"], results["labels"], results["boxes"]): box = [round(i, 2) for i in box.tolist()] label_name = model.config.id2label[label.item()] confidence = round(score.item(), 3) # Draw rectangle x1, y1, x2, y2 = map(int, box) cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2) # Add label label_text = f"{label_name}: {confidence}" cv2.putText(annotated_image, label_text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) detection_info.append(f"â€ĸ {label_name} (confidence: {confidence})") # Create summary text summary = f"**Detected {len(detection_info)} object(s):**\n\n" + "\n".join(detection_info) if detection_info else "No objects detected with confidence > 0.7" return annotated_image, summary def scan_edges(image): """ Edge detection with CLAHE preprocessing to recover edges lost in shadowed regions (e.g. bearing saddle arcs on engine blocks). Pipeline: RGB → Grayscale → Gaussian Blur → CLAHE → Canny → Closing """ # 1. Convert PIL image to numpy array if needed if isinstance(image, Image.Image): image = np.array(image) # 2. Convert to grayscale gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) # 3. Gaussian blur BEFORE CLAHE — prevents CLAHE from amplifying # surface noise on metallic/reflective parts blurred = cv2.GaussianBlur(gray, (5, 5), 0) # 4. CLAHE — boosts local contrast in dark/shadowed regions # clipLimit=2.0 : caps noise amplification in uniform areas # tileGridSize : 8x8 tiles suit engine-block scale features; # use (16,16) if bearing saddles are small in frame clahe = cv2.createCLAHE(clipLimit=9.9, tileGridSize=(8, 8)) enhanced = clahe.apply(blurred) # 5. Canny on CLAHE-enhanced image # Thresholds lowered vs original (50,150) → (30,120) so weak edges # in shadowed saddle arcs are no longer missed edges = cv2.Canny(enhanced, 30, 120) # 6. Morphological Closing Operation - Fills gaps in broken edges # STEP 1: Dilation (Grow phase) - Expands white pixels to connect gaps # STEP 2: Erosion (Shrink phase) - Shrinks back to original thickness # kernel size 3x3 is suitable for small gaps; use 5x5 for larger gaps kernel = np.ones((3, 3), np.uint8) closed_edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=1) # 7. Convert back to RGB for Gradio display edges_rgb = cv2.cvtColor(closed_edges, cv2.COLOR_GRAY2RGB) return edges_rgb def extract_green_channel(image): """ Extract the green channel from an RGB image. Green channel often provides good contrast for vegetation and certain materials. """ # 1. Convert PIL image to numpy array if needed if isinstance(image, Image.Image): image = np.array(image) # 2. Extract green channel (index 1 in RGB) green_channel = image[:, :, 1] # 3. Convert to RGB for display (all channels = green) green_rgb = cv2.cvtColor(green_channel, cv2.COLOR_GRAY2RGB) return green_rgb def green_bilateral_edges(image): """ Edge detection using green channel with bilateral filtering. Pipeline: RGB → Green Channel → Bilateral Filter → Canny Edge Detection → Closing Bilateral filtering preserves edges while reducing noise, making it ideal for edge detection on noisy or textured surfaces. """ # 1. Convert PIL image to numpy array if needed if isinstance(image, Image.Image): image = np.array(image) # 2. Extract green channel green_channel = image[:, :, 1] # 3. Apply bilateral filter # d=9 : diameter of pixel neighborhood # sigmaColor=75 : filter sigma in the color space (larger = more colors mixed) # sigmaSpace=75 : filter sigma in the coordinate space (larger = farther pixels influence) # Bilateral filtering smooths flat regions while preserving sharp edges bilateral = cv2.bilateralFilter(green_channel, d=9, sigmaColor=75, sigmaSpace=75) # 4. Apply Canny edge detection # Using moderate thresholds for balanced edge detection edges = cv2.Canny(bilateral, 50, 150) # 5. Morphological Closing Operation - Fills gaps in broken edges # STEP 1: Dilation (Grow phase) - Expands white pixels to connect gaps # STEP 2: Erosion (Shrink phase) - Shrinks back to original thickness # kernel size 3x3 is suitable for small gaps; use 5x5 for larger gaps kernel = np.ones((3, 3), np.uint8) closed_edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel, iterations=1) # 6. Convert back to RGB for Gradio display edges_rgb = cv2.cvtColor(closed_edges, cv2.COLOR_GRAY2RGB) return edges_rgb def process_image(image, mode): """ Process image based on selected mode """ if image is None: return None, "Please upload an image." if mode == "Object Detection": return detect_objects(image) elif mode == "Edge Detection": edges = scan_edges(image) return edges, "Edge detection completed (CLAHE + Canny + Closing)" elif mode == "Green Channel": green = extract_green_channel(image) return green, "Green channel extracted" elif mode == "Green + Bilateral Edges": edges = green_bilateral_edges(image) return edges, "Edge detection completed (Green Channel + Bilateral Filter + Canny + Closing)" else: return image, "Unknown mode selected" def process_live_stream(image, mode): """ Process live stream based on selected mode """ if image is None: return None if mode == "Edge Detection": return scan_edges(image) elif mode == "Green Channel": return extract_green_channel(image) elif mode == "Green + Bilateral Edges": return green_bilateral_edges(image) else: return scan_edges(image) # Default to edge detection # Create Gradio interface with gr.Blocks(title="Object Scanner") as demo: gr.Markdown("# 🔍 Object Scanner\nDetect objects, scan edges, or extract green channel using your camera or uploaded images") with gr.Tabs(): with gr.TabItem("📷 Image Scanner"): with gr.Row(): with gr.Column(): input_image = gr.Image( sources=["upload", "webcam"], type="pil", label="Upload or Capture Image" ) mode = gr.Radio( choices=[ "Object Detection", "Edge Detection", "Green Channel", "Green + Bilateral Edges" ], value="Object Detection", label="Scanning Mode" ) scan_btn = gr.Button("🔍 Process Image", variant="primary") with gr.Column(): output_image = gr.Image(type="numpy", label="Processed Result") output_text = gr.Markdown(label="Detection Results") # Examples if os.path.exists("examples"): gr.Examples( examples=[ ["examples/sample1.jpg", "Object Detection"], ["examples/sample2.jpg", "Edge Detection"], ["examples/sample1.jpg", "Green Channel"], ["examples/sample2.jpg", "Green + Bilateral Edges"], ], inputs=[input_image, mode], outputs=[output_image, output_text], fn=process_image, cache_examples=False, ) with gr.TabItem("đŸŽĨ Live Processing"): gr.Markdown("### Real-time Image Processing") with gr.Row(): with gr.Column(): camera_input = gr.Image( sources=["webcam"], streaming=True, type="numpy", label="Live Feed" ) live_mode = gr.Radio( choices=[ "Edge Detection", "Green Channel", "Green + Bilateral Edges" ], value="Edge Detection", label="Processing Mode" ) with gr.Column(): camera_output = gr.Image( label="Processed Stream" ) # Live stream logic camera_input.stream( fn=lambda img, mode: process_live_stream(img, mode), inputs=[camera_input, live_mode], outputs=camera_output ) # Static scan logic scan_btn.click( fn=process_image, inputs=[input_image, mode], outputs=[output_image, output_text] ) # Info section with gr.Accordion("â„šī¸ Mode Information", open=False): gr.Markdown(""" ### Available Modes: **Object Detection** - Uses DETR model to detect and label objects with bounding boxes **Edge Detection** - CLAHE-enhanced Canny edge detection with morphological closing to fill gaps - Pipeline: Grayscale → Gaussian Blur → CLAHE → Canny → Closing - Closing operation connects broken edges (useful for interrupted arcs and curves) **Green Channel** - Extracts the green channel, useful for vegetation and certain materials **Green + Bilateral Edges** - Combines green channel extraction with bilateral filtering and closing - Pipeline: Green Channel → Bilateral Filter → Canny → Closing - Bilateral filtering preserves edges while reducing noise - Closing fills gaps in broken edges, ideal for textured surfaces ### Morphological Closing: - **Step 1 (Dilation)**: Expands white pixels to connect nearby edges - **Step 2 (Erosion)**: Shrinks pixels back to original size while keeping gaps filled - **Result**: Broken arcs and interrupted lines become continuous edges """) if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, theme=gr.themes.Soft(), ssr_mode=False )