| import os |
| import sys |
| import cv2 |
| import json |
| import glob |
| import argparse |
| import subprocess |
| from typing import List, Tuple, Dict, Any |
|
|
| import numpy as np |
| from tqdm import tqdm |
|
|
|
|
| |
| def parse_args(): |
| ap = argparse.ArgumentParser("OWLv2 detection on JPG folders (Top-K per image), multi-GPU.") |
| ap.add_argument("--input_dir", type=str, required=True, help="Root that contains subfolders of JPGs; if JPGs are directly under input_dir, it will be treated as a single set.") |
| ap.add_argument("--startswith", type=str, default="", help="Filter folder name prefix (or input_dir basename if no subfolders).") |
| ap.add_argument("--endswith", type=str, default="", help="Filter folder name ends (or input_dir basename if no subfolders).") |
| ap.add_argument("--output_dir", type=str, required=True) |
| ap.add_argument("--frame_stride", type=int, default=1, help="Sample every N-th image within a folder.") |
| ap.add_argument("--top_k", type=int, default=5) |
| ap.add_argument("--max_frames", type=int, default=0, help="Max processed images per folder; 0 means no limit.") |
| ap.add_argument("--num_workers", type=int, default=1, help="#GPUs/#workers") |
| ap.add_argument("--worker_idx", type=int, default=-1, help="internal; >=0 means child worker") |
| ap.add_argument("--shard_file", type=str, default="", help="internal; JSON with folder paths for this worker") |
| ap.add_argument("--scenic_root", type=str, default="/gz-data/Memory/owlv2/big_vision") |
| ap.add_argument("--checkpoint_path", type=str, default="/gz-data/Memory/owlv2/owl2-b16-960-st-ngrams-curated-ft-lvisbase-ens-cold-weight-05_209b65b") |
| return ap.parse_args() |
|
|
|
|
| |
| def _has_jpgs(path: str) -> bool: |
| exts = ("*.jpg", "*.jpeg", "*.JPG", "*.JPEG") |
| for pat in exts: |
| if glob.glob(os.path.join(path, pat)): |
| return True |
| return False |
|
|
|
|
| def iter_image_dirs(input_dir: str, startswith: str, endswith: str) -> List[str]: |
| """ |
| Returns a list of directories to process. |
| - If input_dir contains subfolders: return subfolders that contain JPGs and match startswith. |
| - Else if input_dir itself contains JPGs and its basename matches startswith: return [input_dir]. |
| """ |
| input_dir = os.path.abspath(input_dir) |
| subs = sorted([p for p in glob.glob(os.path.join(input_dir, "*")) if os.path.isdir(p)]) |
| |
| dirs = [d for d in subs if os.path.basename(d).startswith(startswith) and os.path.basename(d).endswith(endswith) and _has_jpgs(d)] |
| if dirs: |
| return dirs |
|
|
| |
| base_ok = os.path.basename(os.path.normpath(input_dir)).startswith(startswith) and os.path.basename(d).endswith(endswith) |
| if base_ok and _has_jpgs(input_dir): |
| return [input_dir] |
| return [] |
|
|
|
|
| def ensure_dir(p: str): |
| os.makedirs(p, exist_ok=True) |
|
|
|
|
| def draw_single_box(frame_bgr: np.ndarray, box: List[float], color=(0, 255, 0), thickness=2) -> np.ndarray: |
| x1, y1, x2, y2 = map(int, box) |
| out = frame_bgr.copy() |
| cv2.rectangle(out, (x1, y1), (x2, y2), color, thickness) |
| return out |
|
|
|
|
| def list_images_sorted(folder: str) -> List[str]: |
| pats = ["*.jpg", "*.jpeg", "*.JPG", "*.JPEG"] |
| files = [] |
| for pat in pats: |
| files.extend(glob.glob(os.path.join(folder, pat))) |
| |
| return sorted(files) |
|
|
|
|
| |
| def worker_run(args, dir_paths: List[str]): |
| import sys as _sys |
| if args.scenic_root not in _sys.path: |
| _sys.path.append(args.scenic_root) |
|
|
| |
| import tensorflow as tf |
| tf.config.experimental.set_visible_devices([], "GPU") |
|
|
| from scenic.projects.owl_vit import configs |
| from scenic.projects.owl_vit import models |
| import jax |
| import functools |
| import owlv2_helper as helper |
|
|
| class OWLv2Objectness: |
| def __init__(self, top_k: int = 5): |
| self.top_k = top_k |
| self.config = configs.owl_v2_clip_b16.get_config(init_mode="canonical_checkpoint") |
| if args.checkpoint_path: |
| self.config.init_from.checkpoint_path = os.path.abspath(args.checkpoint_path) |
| |
| print("[OWLv2] checkpoint_path =", self.config.init_from.checkpoint_path) |
|
|
| print("cp path:", self.config.init_from.checkpoint_path) |
| |
| self.module = models.TextZeroShotDetectionModule( |
| body_configs=self.config.model.body, |
| objectness_head_configs=self.config.model.objectness_head, |
| normalize=self.config.model.normalize, |
| box_bias=self.config.model.box_bias, |
| ) |
| self.variables = self.module.load_variables(self.config.init_from.checkpoint_path) |
|
|
|
|
| self.image_embedder = jax.jit( |
| functools.partial(self.module.apply, self.variables, train=False, method=self.module.image_embedder) |
| ) |
| self.objectness_predictor = jax.jit( |
| functools.partial(self.module.apply, self.variables, method=self.module.objectness_predictor) |
| ) |
| self.box_predictor = jax.jit( |
| functools.partial(self.module.apply, self.variables, method=self.module.box_predictor) |
| ) |
|
|
| def detect(self, image_bgr: np.ndarray) -> List[Tuple[List[float], float]]: |
| image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB) |
| processed = helper.preprocess_images([image_rgb], self.config.dataset_configs.input_size)[0] |
| feature_map = self.image_embedder(processed[None, ...]) |
| b, h, w, d = feature_map.shape |
| image_features = feature_map.reshape(b, h * w, d) |
|
|
| obj_logits = self.objectness_predictor(image_features)["objectness_logits"] |
| raw_boxes = self.box_predictor(image_features=image_features, feature_map=feature_map)["pred_boxes"] |
|
|
| obj = np.array(obj_logits[0], dtype=np.float32) |
| raw_boxes = np.array(raw_boxes[0], dtype=np.float32) |
| boxes = helper.rescale_detection_box(raw_boxes, image_rgb) |
|
|
| if len(obj) == 0: |
| return [] |
|
|
| k = min(self.top_k, len(obj)) |
| thresh = np.partition(obj, -k)[-k] |
|
|
| filtered: List[Tuple[List[float], float]] = [] |
| H, W = image_rgb.shape[:2] |
| for box, score in zip(boxes, obj): |
| if score < thresh: |
| continue |
| if helper.too_small(box) or helper.too_large(box, image_rgb): |
| continue |
| x1, y1, x2, y2 = box |
| x1 = max(0, min(float(x1), W - 1)) |
| y1 = max(0, min(float(y1), H - 1)) |
| x2 = max(0, min(float(x2), W - 1)) |
| y2 = max(0, min(float(y2), H - 1)) |
| filtered.append(([x1, y1, x2, y2], float(score))) |
|
|
| kept_boxes = helper.remove_overlapping_bboxes([b for b, _ in filtered]) |
|
|
| def _match_score(bb: List[float]) -> float: |
| arr = np.array([b for b, _ in filtered], dtype=np.float32) |
| idx = int(np.argmin(np.abs(arr - np.array(bb, dtype=np.float32)).sum(axis=1))) |
| return filtered[idx][1] |
|
|
| return [(bb, _match_score(bb)) for bb in kept_boxes] |
|
|
| detector = OWLv2Objectness(top_k=args.top_k) |
|
|
| for dpath in tqdm(dir_paths, desc=f"Worker{args.worker_idx}", unit="set"): |
| stem = os.path.basename(os.path.normpath(dpath)) |
| images = list_images_sorted(dpath) |
| if not images: |
| print(f"[WARN][w{args.worker_idx}] No JPGs under: {dpath}") |
| continue |
|
|
| saved_cnt = 0 |
| pbar = tqdm(total=len(images), desc=f"{stem}[w{args.worker_idx}]", unit="img", leave=False) |
|
|
| for idx, ipath in enumerate(images): |
| pbar.update(1) |
| if args.frame_stride > 1 and (idx % args.frame_stride) != 0: |
| continue |
|
|
| frame = cv2.imread(ipath, cv2.IMREAD_COLOR) |
| if frame is None: |
| print(f"[WARN][w{args.worker_idx}] Cannot read: {ipath}") |
| continue |
|
|
| boxes_scores = detector.detect(frame) |
| if boxes_scores: |
| boxes_scores = sorted(boxes_scores, key=lambda x: x[1], reverse=True)[:args.top_k] |
|
|
| fname = os.path.basename(ipath) |
| for i, (box, score) in enumerate(boxes_scores): |
| out_dir = os.path.join(args.output_dir, stem, f"object_{i}") |
| ensure_dir(out_dir) |
| vis = draw_single_box(frame, box, color=(0, 255, 0), thickness=2) |
| cv2.imwrite(os.path.join(out_dir, fname), vis) |
|
|
| saved_cnt += 1 |
| if args.max_frames and saved_cnt >= args.max_frames: |
| break |
|
|
| pbar.close() |
|
|
|
|
| |
| def main(): |
| args = parse_args() |
|
|
| |
| if args.worker_idx >= 0: |
| if not args.shard_file or not os.path.exists(args.shard_file): |
| raise RuntimeError("Worker requires --shard_file with JSON list of folder paths.") |
| with open(args.shard_file, "r", encoding="utf-8") as f: |
| dir_paths = json.load(f) |
| worker_run(args, dir_paths) |
| return |
|
|
| |
| dir_paths = iter_image_dirs(args.input_dir, args.startswith, args.endswith) |
| if not dir_paths: |
| print(f"[INFO] No JPG folders (or JPGs) startwith '{args.startswith}' and endwith '{args.endswith}' under {args.input_dir}") |
| return |
|
|
| num_workers = max(1, int(args.num_workers)) |
| shards: List[List[str]] = [[] for _ in range(num_workers)] |
| for i, d in enumerate(dir_paths): |
| shards[i % num_workers].append(d) |
|
|
| procs = [] |
| tmp_dir = os.path.join(args.output_dir, "_shards_tmp") |
| ensure_dir(tmp_dir) |
|
|
| for w in range(num_workers): |
| shard_path = os.path.join(tmp_dir, f"shard_{w}.json") |
| with open(shard_path, "w", encoding="utf-8") as f: |
| json.dump(shards[w], f, ensure_ascii=False, indent=0) |
|
|
| |
| env = os.environ.copy() |
| env["CUDA_VISIBLE_DEVICES"] = str(w) |
|
|
| cmd = [ |
| sys.executable, __file__, |
| "--input_dir", args.input_dir, |
| "--startswith", args.startswith, |
| "--output_dir", args.output_dir, |
| "--frame_stride", str(args.frame_stride), |
| "--top_k", str(args.top_k), |
| "--max_frames", str(args.max_frames), |
| "--num_workers", str(num_workers), |
| "--worker_idx", str(w), |
| "--shard_file", shard_path, |
| "--scenic_root", args.scenic_root, |
| "--checkpoint_path", args.checkpoint_path, |
| ] |
| print(f"[Master] Launch worker {w}: GPU={env['CUDA_VISIBLE_DEVICES']} folders={len(shards[w])}") |
| procs.append(subprocess.Popen(cmd, env=env)) |
|
|
| |
| rc = 0 |
| for p in procs: |
| p.wait() |
| rc |= p.returncode |
|
|
| if rc != 0: |
| print("[Master] Some workers failed. Return code:", rc) |
| else: |
| print("[Master] All workers done. Output:", args.output_dir) |
|
|
|
|
| if __name__ == "__main__": |
| main() |