| import base64 |
| from PIL import Image |
| import requests |
| from io import BytesIO |
| import numpy as np |
|
|
| def is_placeholder_image(image: Image.Image) -> bool: |
| img_array = np.array(image) |
| |
| if len(img_array.shape) != 3: |
| return True |
| |
| height, width = img_array.shape[:2] |
| |
| gray = np.mean(img_array, axis=2) |
| |
| unique_colors = len(np.unique(gray)) |
| |
| if unique_colors < 10: |
| return True |
| |
| black_white_ratio = np.sum((gray < 20) | (gray > 235)) / (height * width) |
| |
| if black_white_ratio > 0.8: |
| return True |
| |
| std_dev = np.std(gray) |
| if std_dev < 15: |
| return True |
| |
| sample_size = min(100, height // 10, width // 10) |
| if sample_size < 2: |
| return False |
| |
| step_h = height // sample_size |
| step_w = width // sample_size |
| |
| grid_pattern = True |
| for i in range(0, height - step_h, step_h): |
| for j in range(0, width - step_w, step_w): |
| block = gray[i:i+step_h, j:j+step_w] |
| block_std = np.std(block) |
| if block_std > 30: |
| grid_pattern = False |
| break |
| if not grid_pattern: |
| break |
| |
| if grid_pattern and black_white_ratio > 0.5: |
| return True |
| |
| return False |
|
|
| def load_image_from_url(url_or_base64: str) -> Image.Image: |
| try: |
| if url_or_base64.startswith("data:image"): |
| header, encoded = url_or_base64.split(",", 1) |
| image_data = base64.b64decode(encoded) |
| return Image.open(BytesIO(image_data)).convert("RGB") |
| else: |
| response = requests.get(url_or_base64, timeout=10) |
| response.raise_for_status() |
| return Image.open(BytesIO(response.content)).convert("RGB") |
| except Exception as e: |
| raise ValueError(f"Failed to load image: {str(e)}") |
|
|
| def filter_valid_images(images: list) -> list: |
| valid_images = [] |
| for img in images: |
| if not img or not isinstance(img, str) or img.strip() in ["", "string", "null", "undefined"]: |
| continue |
| try: |
| pil_image = load_image_from_url(img) |
| if not is_placeholder_image(pil_image): |
| valid_images.append(pil_image) |
| else: |
| print(f"[IMAGE FILTER] Ignoring placeholder/empty image") |
| except Exception as e: |
| print(f"[IMAGE FILTER] Warning: Failed to load image: {e}, skipping") |
| continue |
| return valid_images |
|
|
|
|