Upload weights, notebooks, sample images

Browse files

Files changed (4) hide show

configs/end2end.yaml +15 -11
configs/highlight_decoder_pretrain.yaml +6 -4
configs/pretrained_config.yaml +2 -1
configs/tokeninp_pretrain.yaml +43 -53

configs/end2end.yaml CHANGED Viewed

@@ -31,7 +31,7 @@ parameters:
           REASSEMBLE_OUT_CHANNELS: [96,192,384,768]  # Output channels for each decoder stage
           REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5]  # Spatial upsampling factors for each stage
           READOUT_TYPE: "ignore"  # Readout type for DPT decoder
-          # FROM_PRETRAINED: "highlight_decoder.pt"  # Path to pretrained token inpainter weights (optional)
           USE_BN: False  # Use batch normalization in decoder
           DROPOUT: 0.1  # Dropout rate in decoder layers
           OUTPUT_IMAGE_SIZE: [896,896]  # Output image resolution [height, width]
@@ -41,20 +41,24 @@ parameters:
       TOKEN_INPAINTER:
         TOKEN_INPAINTER_CLASS: "TokenInpainter_Prior"  # Token inpainter class name
         TOKEN_INPAINTER_MODULE: "token_inpainters"  # Module name to import token inpainter from
-        # FROM_PRETRAINED: "token_inpainter.pt"  # Path to pretrained token inpainter weights (optional)
-        TOKEN_INPAINTER_LR: 5.0e-4  # Learning rate for token inpainter (can differ from base LR)
         DEPTH: 6  # Number of transformer blocks
         HEADS: 16  # Number of attention heads
         DROP: 0.05 # Dropout rate
         USE_POSITIONAL_ENCODING: True  # Enable 2D sinusoidal positional encodings
         USE_FINAL_NORM: True  # Enable final LayerNorm before output projection
         USE_LOCAL_PRIOR: True  # Blend local mean prior for masked seeds
-        LOCAL_PRIOR_WEIGHT: 0.85  # Weight for local prior blending (1.0 = only mask_token, 0.0 = only local mean)
         LOCAL_PRIOR_KERNEL: 5  # Kernel size for local prior blending (> 1)
         SEED_NOISE_STD: 0.02  # Standard deviation of noise added to masked seeds during training
-  INPAINT_MASK_DILATION:
-    value: 31  # Dilation kernel size (pixels) for inpaint mask - Must be odd
   USE_TORCH_COMPILE:  # Enable PyTorch 2.0 torch.compile for faster training (experimental)
     value: False
   DISTRIBUTE:
@@ -128,10 +132,10 @@ parameters:
       ALL_DATASETS:
         FEW_IMAGES: False   # Override FEW_IMAGES for all datasets (for quick debugging set True)
         TARGET_SIZE: [896,896]  # Override target image size [height, width] for all datasets
-        LOAD_RGB_ONLY: True
   BATCH_SIZE: # Max batch size with img size 896 is 32
-    value: 6 # Number of samples per batch (adjust based on GPU memory)
   NUM_WORKERS:
     value: 12  # Number of data loading worker processes (0 = main process only, "auto" = 90% of CPU affinity)
   SHUFFLE:
@@ -145,9 +149,9 @@ parameters:
   MOGE_MODEL:
     value: "Ruicheng/moge-2-vits-normal"  # MoGe model name for normal estimation (HuggingFace format)
   SURFACE_ROUGHNESS:
-    value: 8.0  # Blinn-Phong surface roughness exponent (higher = sharper highlights)
   INTENSITY:
-    value: 2.0  # Specular highlight intensity multiplier
   LIGHT_DISTANCE_RANGE:
     value: [0.0, 1]  # Range for light source distance sampling [min, max] (normalized)
   LIGHT_LEFT_RIGHT_ANGLE:

           REASSEMBLE_OUT_CHANNELS: [96,192,384,768]  # Output channels for each decoder stage
           REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5]  # Spatial upsampling factors for each stage
           READOUT_TYPE: "ignore"  # Readout type for DPT decoder
+          FROM_PRETRAINED: "highlight_decoder.pt"  # Path to pretrained decoder weights (optional)
           USE_BN: False  # Use batch normalization in decoder
           DROPOUT: 0.1  # Dropout rate in decoder layers
           OUTPUT_IMAGE_SIZE: [896,896]  # Output image resolution [height, width]
       TOKEN_INPAINTER:
         TOKEN_INPAINTER_CLASS: "TokenInpainter_Prior"  # Token inpainter class name
         TOKEN_INPAINTER_MODULE: "token_inpainters"  # Module name to import token inpainter from
+        # FROM_PRETRAINED: "token_inpainter.pth"  # Path to pretrained token inpainter weights (optional)
+        TOKEN_INPAINTER_LR: 1.0e-4  # Learning rate for token inpainter (can differ from base LR)
         DEPTH: 6  # Number of transformer blocks
         HEADS: 16  # Number of attention heads
         DROP: 0.05 # Dropout rate
         USE_POSITIONAL_ENCODING: True  # Enable 2D sinusoidal positional encodings
         USE_FINAL_NORM: True  # Enable final LayerNorm before output projection
         USE_LOCAL_PRIOR: True  # Blend local mean prior for masked seeds
+        LOCAL_PRIOR_WEIGHT: 0.8  # Weight for local prior blending (1.0 = only mask_token, 0.0 = only local mean)
         LOCAL_PRIOR_KERNEL: 5  # Kernel size for local prior blending (> 1)
         SEED_NOISE_STD: 0.02  # Standard deviation of noise added to masked seeds during training
+  # FORWARD PASS PARAMETERS
+  INPAINT_MASK_THRESHOLD:
+    value: 0.2  # Threshold for inpaint mask
+  INPAINT_MASK_DILATION:
+    value: 40 # Dilation kernel size (pixels) for inpaint mask - Must be odd
   USE_TORCH_COMPILE:  # Enable PyTorch 2.0 torch.compile for faster training (experimental)
     value: False
   DISTRIBUTE:
       ALL_DATASETS:
         FEW_IMAGES: False   # Override FEW_IMAGES for all datasets (for quick debugging set True)
         TARGET_SIZE: [896,896]  # Override target image size [height, width] for all datasets
+        LOAD_RGB_ONLY: True
   BATCH_SIZE: # Max batch size with img size 896 is 32
+    value: 4 # Number of samples per batch (adjust based on GPU memory)
   NUM_WORKERS:
     value: 12  # Number of data loading worker processes (0 = main process only, "auto" = 90% of CPU affinity)
   SHUFFLE:
   MOGE_MODEL:
     value: "Ruicheng/moge-2-vits-normal"  # MoGe model name for normal estimation (HuggingFace format)
   SURFACE_ROUGHNESS:
+    value: 100.0  # Blinn-Phong surface roughness exponent (higher = sharper highlights)
   INTENSITY:
+    value: 0.8  # Specular highlight intensity multiplier
   LIGHT_DISTANCE_RANGE:
     value: [0.0, 1]  # Range for light source distance sampling [min, max] (normalized)
   LIGHT_LEFT_RIGHT_ANGLE:

configs/highlight_decoder_pretrain.yaml CHANGED Viewed

@@ -18,7 +18,7 @@ parameters:
           REASSEMBLE_OUT_CHANNELS: [96,192,384,768]  # Output channels for each decoder stage
           REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5]  # Spatial upsampling factors for each stage
           READOUT_TYPE: "ignore"  # Readout type for DPT decoder
-          # FROM_PRETRAINED: "highlight_decoder.pt"  # Path to pretrained token inpainter weights (optional)
           USE_BN: False  # Use batch normalization in decoder
           DROPOUT: 0.1  # Dropout rate in decoder layers
           OUTPUT_IMAGE_SIZE: [896,896]  # Output image resolution [height, width]
@@ -41,7 +41,9 @@ parameters:
         LOCAL_PRIOR_KERNEL: 5  # Kernel size for local prior blending (> 1)
         SEED_NOISE_STD: 0.02  # Standard deviation of noise added to masked seeds during training
   INPAINT_MASK_DILATION:
-    value: 3  # Dilation kernel size (pixels) for inpaint mask - Must be odd
   USE_TORCH_COMPILE:  # Enable PyTorch 2.0 torch.compile for faster training (experimental)
     value: False
   DISTRIBUTE:
@@ -69,7 +71,7 @@ parameters:
   BATCH_SIZE: # Max batch size with img size 896 is 32
-    value: 20 # Number of samples per batch (adjust based on GPU memory)
   NUM_WORKERS:
     value: 12  # Number of data loading worker processes (0 = main process only, "auto" = 90% of CPU affinity)
   SHUFFLE:
@@ -105,7 +107,7 @@ parameters:
   ### OPTIMIZATION
   EPOCHS:
-    value: 20  # Maximum number of training epochs<
   LEARNING_RATE:
     value: 1.0e-4  # Base learning rate for optimizer
   WEIGHT_DECAY:

           REASSEMBLE_OUT_CHANNELS: [96,192,384,768]  # Output channels for each decoder stage
           REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5]  # Spatial upsampling factors for each stage
           READOUT_TYPE: "ignore"  # Readout type for DPT decoder
+          FROM_PRETRAINED: "highlight_decoder.pt"  # Path to pretrained token inpainter weights (optional)
           USE_BN: False  # Use batch normalization in decoder
           DROPOUT: 0.1  # Dropout rate in decoder layers
           OUTPUT_IMAGE_SIZE: [896,896]  # Output image resolution [height, width]
         LOCAL_PRIOR_KERNEL: 5  # Kernel size for local prior blending (> 1)
         SEED_NOISE_STD: 0.02  # Standard deviation of noise added to masked seeds during training
   INPAINT_MASK_DILATION:
+    value: None  # Dilation kernel size (pixels) for inpaint mask (None = compute based on image size)
+  INPAINT_MASK_THRESHOLD:
+    value: 0.2  # Inpaint selection threshold
   USE_TORCH_COMPILE:  # Enable PyTorch 2.0 torch.compile for faster training (experimental)
     value: False
   DISTRIBUTE:
   BATCH_SIZE: # Max batch size with img size 896 is 32
+    value: 16 # Number of samples per batch (adjust based on GPU memory)
   NUM_WORKERS:
     value: 12  # Number of data loading worker processes (0 = main process only, "auto" = 90% of CPU affinity)
   SHUFFLE:
   ### OPTIMIZATION
   EPOCHS:
+    value: 40  # Maximum number of training epochs<
   LEARNING_RATE:
     value: 1.0e-4  # Base learning rate for optimizer
   WEIGHT_DECAY:

configs/pretrained_config.yaml CHANGED Viewed

@@ -18,7 +18,7 @@ parameters:
           REASSEMBLE_OUT_CHANNELS: [768,1024,1536,2048]  # Output channels for each decoder stage (DPT-style reassembly)
           REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5]  # Spatial upsampling factors for each stage
           READOUT_TYPE: "ignore"  # Readout type for DPT decoder ("ignore", "project", etc.)
-          # FROM_PRETRAINED: "diffuse_decoder.pt"  # Path to pretrained decoder weights (optional)
           USE_BN: False  # Use batch normalization in decoder
           DROPOUT: 0.1  # Dropout rate in decoder layers
           OUTPUT_IMAGE_SIZE: [896,896]  # Output image resolution [height, width]
@@ -31,6 +31,7 @@ parameters:
           REASSEMBLE_OUT_CHANNELS: [96,192,384,768]  # Output channels for each decoder stage
           REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5]  # Spatial upsampling factors for each stage
           READOUT_TYPE: "ignore"  # Readout type for DPT decoder
           USE_BN: False  # Use batch normalization in decoder
           DROPOUT: 0.1  # Dropout rate in decoder layers
           OUTPUT_IMAGE_SIZE: [896,896]  # Output image resolution [height, width]

           REASSEMBLE_OUT_CHANNELS: [768,1024,1536,2048]  # Output channels for each decoder stage (DPT-style reassembly)
           REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5]  # Spatial upsampling factors for each stage
           READOUT_TYPE: "ignore"  # Readout type for DPT decoder ("ignore", "project", etc.)
+          FROM_PRETRAINED: "diffuse_decoder.pt"  # Path to pretrained decoder weights (optional)
           USE_BN: False  # Use batch normalization in decoder
           DROPOUT: 0.1  # Dropout rate in decoder layers
           OUTPUT_IMAGE_SIZE: [896,896]  # Output image resolution [height, width]
           REASSEMBLE_OUT_CHANNELS: [96,192,384,768]  # Output channels for each decoder stage
           REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5]  # Spatial upsampling factors for each stage
           READOUT_TYPE: "ignore"  # Readout type for DPT decoder
+          FROM_PRETRAINED: "highlight_decoder.pt"  # Path to pretrained decoder weights (optional)
           USE_BN: False  # Use batch normalization in decoder
           DROPOUT: 0.1  # Dropout rate in decoder layers
           OUTPUT_IMAGE_SIZE: [896,896]  # Output image resolution [height, width]

configs/tokeninp_pretrain.yaml CHANGED Viewed

@@ -31,6 +31,7 @@ parameters:
           REASSEMBLE_OUT_CHANNELS: [96,192,384,768]  # Output channels for each decoder stage
           REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5]  # Spatial upsampling factors for each stage
           READOUT_TYPE: "ignore"  # Readout type for DPT decoder
           USE_BN: False  # Use batch normalization in decoder
           DROPOUT: 0.1  # Dropout rate in decoder layers
           OUTPUT_IMAGE_SIZE: [896,896]  # Output image resolution [height, width]
@@ -51,8 +52,14 @@ parameters:
         LOCAL_PRIOR_WEIGHT: 0.25  # Weight for local prior blending (1.0 = only mask_token, 0.0 = only local mean)
         LOCAL_PRIOR_KERNEL: 5  # Kernel size for local prior blending (> 1)
         SEED_NOISE_STD: 0.02  # Standard deviation of noise added to masked seeds during training
-  INPAINT_MASK_DILATION:
-    value: 15  # Dilation kernel size (pixels) for inpaint mask - Must be odd
   USE_TORCH_COMPILE:  # Enable PyTorch 2.0 torch.compile for faster training (experimental)
     value: False
   DISTRIBUTE:
@@ -61,89 +68,72 @@ parameters:
   ### DATA
   DATASETS:
     value:
       SCRREAM:
-        VAL_SCENES: ["scene10_full_00","scene11_full_00","scene044_full_00","scene04_reduced_00","scene04_reduced_01","scene04_reduced_02"]  # List of validation scene names
-        TARGET_SIZE: [896,896]  # Target image size [height, width] in pixels
         RESIZE_MODE: "resize+crop"  # Image resizing mode: "resize", "crop", "resize+crop", or "pad"
-        FEW_IMAGES: False  # If True, load only first 10 images per scene (for quick debugging)
-        SAMPLE_EVERY_N: 2  # Load every Nth frame from each scene (1 = all frames, 4 = every 4th frame)
-        LOAD_RGB_ONLY: True  # If True, ignore polarization data and load only RGB images
       HOUSECAT6D:
         VAL_SCENES: ["val_scene1","val_scene2"]  # Validation scene names
-        TARGET_SIZE: [896,896]  # Target image size [height, width]
         RESIZE_MODE: "resize+crop"  # Image resizing mode
-        SAMPLE_EVERY_N: 2  # Load every Nth frame
-        LOAD_RGB_ONLY: True  # Ignore polarization data if True
       CROMO:
         TRAIN_SCENES: ["kitchen"]  # Training scene names (list or string)
         # VAL_SCENES: "station"  # Validation scene names (optional)
-        TARGET_SIZE: [896,896]  # Target image size [height, width]
         RESIZE_MODE: "resize"  # Image resizing mode
         SAMPLE_EVERY_N: 2  # Load every Nth frame
-        LOAD_RGB_ONLY: True  # Ignore polarization data if True
       PSD:
         TRAIN_SCENES: "PSD_Train"  # Training scene name (string or list)
         VAL_SCENES: "PSD_Val"  # Validation scene name (string or list)
-        TARGET_SIZE: [896,896]  # Target image size [height, width]
         RESIZE_MODE: "resize+crop"  # Image resizing mode
         SAMPLE_EVERY_N: 1  # Load every Nth frame (1 = all frames)
-        LOAD_RGB_ONLY: True  # Ignore polarization data if True
       SCARED:
         VAL_SCENES: ["v22","v23","v24","v25","v26","v27","v28","v29","v30","v31","v32","v33","v34"]  # Validation scene names
-        TARGET_SIZE: [896,896]  # Target image size [height, width]
         RESIZE_MODE: "resize+crop"  # Image resizing mode
         SAMPLE_EVERY_N: 8  # Load every Nth frame
-        LOAD_RGB_ONLY: True  # Ignore polarization data if True
-        HIGHLIGHT_ENABLE: False  # Enable highlight detection/processing in dataset
-        HIGHLIGHT_BRIGHTNESS_THRESHOLD: 0.9  # Brightness threshold for highlight detection (0-1)
-        HIGHLIGHT_RETURN_MASK: True  # Return highlight mask in dataset output
-        HIGHLIGHT_RECT_SIZE: [1000, 1000]  # Size of highlight rectangle region [height, width]
-        HIGHLIGHT_RETURN_RECT_AS_RGB: False  # Return highlight rectangle as RGB if True
-        HIGHLIGHT_RETURN_RECT: True  # Return highlight rectangle region if True
       STEREOMIS_TRACKING:
         VAL_SCENES: ["P2_2"]  # Validation scene names
-        TARGET_SIZE: [896,896]  # Target image size [height, width]
         RESIZE_MODE: "resize+crop"  # Image resizing mode
-        SAMPLE_EVERY_N: 4  # Load every Nth frame
-        LOAD_RGB_ONLY: True  # Ignore polarization data if True
-        FEW_IMAGES: False  # Load only first 10 imagas if True
-        HIGHLIGHT_ENABLE: False  # Enable highlight detection/processing
-        HIGHLIGHT_BRIGHTNESS_THRESHOLD: 0.9  # Brightness threshold for highlight detection
-        HIGHLIGHT_RETURN_MASK: True  # Return highlight mask in dataset output
-        HIGHLIGHT_RECT_SIZE: [800, 800]  # Size of highlight rectangle region
-        HIGHLIGHT_RETURN_RECT_AS_RGB: False  # Return highlight rectangle as RGB if True
-        HIGHLIGHT_RETURN_RECT: True  # Return highlight rectangle region if True
       CHOLEC80:
-        VAL_SCENES: ["val"]  # Validation scene names
-        TARGET_SIZE: [896,896]  # Target image size [height, width]
         RESIZE_MODE: "resize+crop"  # Image resizing mode
-        SAMPLE_EVERY_N: 10  # Load every Nth frame
-        LOAD_RGB_ONLY: True  # Ignore polarization data if True
-        HIGHLIGHT_ENABLE: False  # Enable highlight detection/processing
-        HIGHLIGHT_BRIGHTNESS_THRESHOLD: 0.9  # Brightness threshold for highlight detection
-        HIGHLIGHT_RETURN_MASK: True  # Return highlight mask in dataset output
-        HIGHLIGHT_RECT_SIZE: [800, 800]  # Size of highlight rectangle region
-        HIGHLIGHT_RETURN_RECT_AS_RGB: False  # Return highlight rectangle as RGB if True
-        HIGHLIGHT_RETURN_RECT: True  # Return highlight rectangle region if True
       SUNRGBD:
         VAL_SCENES: ["realsense"]  # Validation scene names
-        TARGET_SIZE: [896,896]  # Target image size [height, width]
         RESIZE_MODE: "resize+crop"  # Image resizing mode
         SAMPLE_EVERY_N: 4  # Load every Nth frame
-        LOAD_RGB_ONLY: True  # Ignore polarization data if True
   FEW_IMAGES_ALL_DATASETS:
     value: False  # If True, override all datasets' FEW_IMAGES to True (for quick debugging across all datasets)
@@ -163,9 +153,9 @@ parameters:
   MOGE_MODEL:
     value: "Ruicheng/moge-2-vits-normal"  # MoGe model name for normal estimation (HuggingFace format)
   SURFACE_ROUGHNESS:
-    value: 8.0  # Blinn-Phong surface roughness exponent (higher = sharper highlights)
   INTENSITY:
-    value: 2.0  # Specular highlight intensity multiplier
   LIGHT_DISTANCE_RANGE:
     value: [0.0, 1]  # Range for light source distance sampling [min, max] (normalized)
   LIGHT_LEFT_RIGHT_ANGLE:

           REASSEMBLE_OUT_CHANNELS: [96,192,384,768]  # Output channels for each decoder stage
           REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5]  # Spatial upsampling factors for each stage
           READOUT_TYPE: "ignore"  # Readout type for DPT decoder
+          FROM_PRETRAINED: "highlight_decoder.pt"  # Path to pretrained decoder weights (optional)
           USE_BN: False  # Use batch normalization in decoder
           DROPOUT: 0.1  # Dropout rate in decoder layers
           OUTPUT_IMAGE_SIZE: [896,896]  # Output image resolution [height, width]
         LOCAL_PRIOR_WEIGHT: 0.25  # Weight for local prior blending (1.0 = only mask_token, 0.0 = only local mean)
         LOCAL_PRIOR_KERNEL: 5  # Kernel size for local prior blending (> 1)
         SEED_NOISE_STD: 0.02  # Standard deviation of noise added to masked seeds during training
+  # FORWARD PASS PARAMETERS
+  INPAINT_MASK_THRESHOLD:
+    value: 0.2  # Threshold for inpaint mask
+  INPAINT_MASK_DILATION:
+    value: -1 # Dilation kernel size (pixels) for inpaint mask - Must be odd
   USE_TORCH_COMPILE:  # Enable PyTorch 2.0 torch.compile for faster training (experimental)
     value: False
   DISTRIBUTE:
   ### DATA
   DATASETS:
     value:
+      # Reserved key: key-value pairs here override the same keys for every dataset (per-dataset entries still override this).
       SCRREAM:
+        VAL_SCENES: ["scene10", "scene04"]  # List of validation scene names
         RESIZE_MODE: "resize+crop"  # Image resizing mode: "resize", "crop", "resize+crop", or "pad"
+        SAMPLE_EVERY_N: 6  # Load every Nth frame from each scene (1 = all frames, 4 = every 4th frame)
       HOUSECAT6D:
         VAL_SCENES: ["val_scene1","val_scene2"]  # Validation scene names
         RESIZE_MODE: "resize+crop"  # Image resizing mode
+        SAMPLE_EVERY_N: 4  # Load every Nth frame
       CROMO:
         TRAIN_SCENES: ["kitchen"]  # Training scene names (list or string)
         # VAL_SCENES: "station"  # Validation scene names (optional)
         RESIZE_MODE: "resize"  # Image resizing mode
         SAMPLE_EVERY_N: 2  # Load every Nth frame
       PSD:
         TRAIN_SCENES: "PSD_Train"  # Training scene name (string or list)
         VAL_SCENES: "PSD_Val"  # Validation scene name (string or list)
         RESIZE_MODE: "resize+crop"  # Image resizing mode
         SAMPLE_EVERY_N: 1  # Load every Nth frame (1 = all frames)
       SCARED:
         VAL_SCENES: ["v22","v23","v24","v25","v26","v27","v28","v29","v30","v31","v32","v33","v34"]  # Validation scene names
         RESIZE_MODE: "resize+crop"  # Image resizing mode
         SAMPLE_EVERY_N: 8  # Load every Nth frame
       STEREOMIS_TRACKING:
         VAL_SCENES: ["P2_2"]  # Validation scene names
         RESIZE_MODE: "resize+crop"  # Image resizing mode
+        SAMPLE_EVERY_N: 2  # Load every Nth frame
       CHOLEC80:
+        TRAIN_SCENES: ["train"]  # Validation scene names
+        VAL_SCENES: ["test"]  # Validation scene names
         RESIZE_MODE: "resize+crop"  # Image resizing mode
+        SAMPLE_EVERY_N: 40  # Load every Nth frame
       SUNRGBD:
         VAL_SCENES: ["realsense"]  # Validation scene names
         RESIZE_MODE: "resize+crop"  # Image resizing mode
         SAMPLE_EVERY_N: 4  # Load every Nth frame
+      # SCANNET:
+      #   TRAIN_SCENES: ["train"]
+      #   VAL_SCENES: ["val"]
+      #   RESIZE_MODE: "resize+crop"
+      #   SAMPLE_EVERY_N: 5
+      # OPENIMAGESV7:
+      #   TRAIN_SCENES: ["thescene"]
+      #   # VAL_SCENES: [""]
+      #   RESIZE_MODE: "resize+crop"
+      #   SAMPLE_EVERY_N: 5
+      # ENDOSYNTH:
+      #   TRAIN_SCENES: ["scene"]
+      #   # VAL_SCENES: ["val"]
+      #   RESIZE_MODE: "resize+crop"
+      #   SAMPLE_EVERY_N: 1
+      ALL_DATASETS:
+        FEW_IMAGES: False   # Override FEW_IMAGES for all datasets (for quick debugging set True)
+        TARGET_SIZE: [896,896]  # Override target image size [height, width] for all datasets
+        LOAD_RGB_ONLY: True
   FEW_IMAGES_ALL_DATASETS:
     value: False  # If True, override all datasets' FEW_IMAGES to True (for quick debugging across all datasets)
   MOGE_MODEL:
     value: "Ruicheng/moge-2-vits-normal"  # MoGe model name for normal estimation (HuggingFace format)
   SURFACE_ROUGHNESS:
+    value: 100.0  # Blinn-Phong surface roughness exponent (higher = sharper highlights)
   INTENSITY:
+    value: 0.8  # Specular highlight intensity multiplier
   LIGHT_DISTANCE_RANGE:
     value: [0.0, 1]  # Range for light source distance sampling [min, max] (normalized)
   LIGHT_LEFT_RIGHT_ANGLE: