AlbeRota commited on
Commit
7eae0ca
·
verified ·
1 Parent(s): a0d3c7b

Upload weights, notebooks, sample images

Browse files
configs/end2end.yaml CHANGED
@@ -31,7 +31,7 @@ parameters:
31
  REASSEMBLE_OUT_CHANNELS: [96,192,384,768] # Output channels for each decoder stage
32
  REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5] # Spatial upsampling factors for each stage
33
  READOUT_TYPE: "ignore" # Readout type for DPT decoder
34
- # FROM_PRETRAINED: "highlight_decoder.pt" # Path to pretrained token inpainter weights (optional)
35
  USE_BN: False # Use batch normalization in decoder
36
  DROPOUT: 0.1 # Dropout rate in decoder layers
37
  OUTPUT_IMAGE_SIZE: [896,896] # Output image resolution [height, width]
@@ -41,20 +41,24 @@ parameters:
41
  TOKEN_INPAINTER:
42
  TOKEN_INPAINTER_CLASS: "TokenInpainter_Prior" # Token inpainter class name
43
  TOKEN_INPAINTER_MODULE: "token_inpainters" # Module name to import token inpainter from
44
- # FROM_PRETRAINED: "token_inpainter.pt" # Path to pretrained token inpainter weights (optional)
45
- TOKEN_INPAINTER_LR: 5.0e-4 # Learning rate for token inpainter (can differ from base LR)
46
  DEPTH: 6 # Number of transformer blocks
47
  HEADS: 16 # Number of attention heads
48
  DROP: 0.05 # Dropout rate
49
  USE_POSITIONAL_ENCODING: True # Enable 2D sinusoidal positional encodings
50
  USE_FINAL_NORM: True # Enable final LayerNorm before output projection
51
  USE_LOCAL_PRIOR: True # Blend local mean prior for masked seeds
52
- LOCAL_PRIOR_WEIGHT: 0.85 # Weight for local prior blending (1.0 = only mask_token, 0.0 = only local mean)
53
  LOCAL_PRIOR_KERNEL: 5 # Kernel size for local prior blending (> 1)
54
  SEED_NOISE_STD: 0.02 # Standard deviation of noise added to masked seeds during training
55
-
56
- INPAINT_MASK_DILATION:
57
- value: 31 # Dilation kernel size (pixels) for inpaint mask - Must be odd
 
 
 
 
58
  USE_TORCH_COMPILE: # Enable PyTorch 2.0 torch.compile for faster training (experimental)
59
  value: False
60
  DISTRIBUTE:
@@ -128,10 +132,10 @@ parameters:
128
  ALL_DATASETS:
129
  FEW_IMAGES: False # Override FEW_IMAGES for all datasets (for quick debugging set True)
130
  TARGET_SIZE: [896,896] # Override target image size [height, width] for all datasets
131
- LOAD_RGB_ONLY: True
132
 
133
  BATCH_SIZE: # Max batch size with img size 896 is 32
134
- value: 6 # Number of samples per batch (adjust based on GPU memory)
135
  NUM_WORKERS:
136
  value: 12 # Number of data loading worker processes (0 = main process only, "auto" = 90% of CPU affinity)
137
  SHUFFLE:
@@ -145,9 +149,9 @@ parameters:
145
  MOGE_MODEL:
146
  value: "Ruicheng/moge-2-vits-normal" # MoGe model name for normal estimation (HuggingFace format)
147
  SURFACE_ROUGHNESS:
148
- value: 8.0 # Blinn-Phong surface roughness exponent (higher = sharper highlights)
149
  INTENSITY:
150
- value: 2.0 # Specular highlight intensity multiplier
151
  LIGHT_DISTANCE_RANGE:
152
  value: [0.0, 1] # Range for light source distance sampling [min, max] (normalized)
153
  LIGHT_LEFT_RIGHT_ANGLE:
 
31
  REASSEMBLE_OUT_CHANNELS: [96,192,384,768] # Output channels for each decoder stage
32
  REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5] # Spatial upsampling factors for each stage
33
  READOUT_TYPE: "ignore" # Readout type for DPT decoder
34
+ FROM_PRETRAINED: "highlight_decoder.pt" # Path to pretrained decoder weights (optional)
35
  USE_BN: False # Use batch normalization in decoder
36
  DROPOUT: 0.1 # Dropout rate in decoder layers
37
  OUTPUT_IMAGE_SIZE: [896,896] # Output image resolution [height, width]
 
41
  TOKEN_INPAINTER:
42
  TOKEN_INPAINTER_CLASS: "TokenInpainter_Prior" # Token inpainter class name
43
  TOKEN_INPAINTER_MODULE: "token_inpainters" # Module name to import token inpainter from
44
+ # FROM_PRETRAINED: "token_inpainter.pth" # Path to pretrained token inpainter weights (optional)
45
+ TOKEN_INPAINTER_LR: 1.0e-4 # Learning rate for token inpainter (can differ from base LR)
46
  DEPTH: 6 # Number of transformer blocks
47
  HEADS: 16 # Number of attention heads
48
  DROP: 0.05 # Dropout rate
49
  USE_POSITIONAL_ENCODING: True # Enable 2D sinusoidal positional encodings
50
  USE_FINAL_NORM: True # Enable final LayerNorm before output projection
51
  USE_LOCAL_PRIOR: True # Blend local mean prior for masked seeds
52
+ LOCAL_PRIOR_WEIGHT: 0.8 # Weight for local prior blending (1.0 = only mask_token, 0.0 = only local mean)
53
  LOCAL_PRIOR_KERNEL: 5 # Kernel size for local prior blending (> 1)
54
  SEED_NOISE_STD: 0.02 # Standard deviation of noise added to masked seeds during training
55
+
56
+ # FORWARD PASS PARAMETERS
57
+ INPAINT_MASK_THRESHOLD:
58
+ value: 0.2 # Threshold for inpaint mask
59
+ INPAINT_MASK_DILATION:
60
+ value: 40 # Dilation kernel size (pixels) for inpaint mask - Must be odd
61
+
62
  USE_TORCH_COMPILE: # Enable PyTorch 2.0 torch.compile for faster training (experimental)
63
  value: False
64
  DISTRIBUTE:
 
132
  ALL_DATASETS:
133
  FEW_IMAGES: False # Override FEW_IMAGES for all datasets (for quick debugging set True)
134
  TARGET_SIZE: [896,896] # Override target image size [height, width] for all datasets
135
+ LOAD_RGB_ONLY: True
136
 
137
  BATCH_SIZE: # Max batch size with img size 896 is 32
138
+ value: 4 # Number of samples per batch (adjust based on GPU memory)
139
  NUM_WORKERS:
140
  value: 12 # Number of data loading worker processes (0 = main process only, "auto" = 90% of CPU affinity)
141
  SHUFFLE:
 
149
  MOGE_MODEL:
150
  value: "Ruicheng/moge-2-vits-normal" # MoGe model name for normal estimation (HuggingFace format)
151
  SURFACE_ROUGHNESS:
152
+ value: 100.0 # Blinn-Phong surface roughness exponent (higher = sharper highlights)
153
  INTENSITY:
154
+ value: 0.8 # Specular highlight intensity multiplier
155
  LIGHT_DISTANCE_RANGE:
156
  value: [0.0, 1] # Range for light source distance sampling [min, max] (normalized)
157
  LIGHT_LEFT_RIGHT_ANGLE:
configs/highlight_decoder_pretrain.yaml CHANGED
@@ -18,7 +18,7 @@ parameters:
18
  REASSEMBLE_OUT_CHANNELS: [96,192,384,768] # Output channels for each decoder stage
19
  REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5] # Spatial upsampling factors for each stage
20
  READOUT_TYPE: "ignore" # Readout type for DPT decoder
21
- # FROM_PRETRAINED: "highlight_decoder.pt" # Path to pretrained token inpainter weights (optional)
22
  USE_BN: False # Use batch normalization in decoder
23
  DROPOUT: 0.1 # Dropout rate in decoder layers
24
  OUTPUT_IMAGE_SIZE: [896,896] # Output image resolution [height, width]
@@ -41,7 +41,9 @@ parameters:
41
  LOCAL_PRIOR_KERNEL: 5 # Kernel size for local prior blending (> 1)
42
  SEED_NOISE_STD: 0.02 # Standard deviation of noise added to masked seeds during training
43
  INPAINT_MASK_DILATION:
44
- value: 3 # Dilation kernel size (pixels) for inpaint mask - Must be odd
 
 
45
  USE_TORCH_COMPILE: # Enable PyTorch 2.0 torch.compile for faster training (experimental)
46
  value: False
47
  DISTRIBUTE:
@@ -69,7 +71,7 @@ parameters:
69
 
70
 
71
  BATCH_SIZE: # Max batch size with img size 896 is 32
72
- value: 20 # Number of samples per batch (adjust based on GPU memory)
73
  NUM_WORKERS:
74
  value: 12 # Number of data loading worker processes (0 = main process only, "auto" = 90% of CPU affinity)
75
  SHUFFLE:
@@ -105,7 +107,7 @@ parameters:
105
 
106
  ### OPTIMIZATION
107
  EPOCHS:
108
- value: 20 # Maximum number of training epochs<
109
  LEARNING_RATE:
110
  value: 1.0e-4 # Base learning rate for optimizer
111
  WEIGHT_DECAY:
 
18
  REASSEMBLE_OUT_CHANNELS: [96,192,384,768] # Output channels for each decoder stage
19
  REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5] # Spatial upsampling factors for each stage
20
  READOUT_TYPE: "ignore" # Readout type for DPT decoder
21
+ FROM_PRETRAINED: "highlight_decoder.pt" # Path to pretrained token inpainter weights (optional)
22
  USE_BN: False # Use batch normalization in decoder
23
  DROPOUT: 0.1 # Dropout rate in decoder layers
24
  OUTPUT_IMAGE_SIZE: [896,896] # Output image resolution [height, width]
 
41
  LOCAL_PRIOR_KERNEL: 5 # Kernel size for local prior blending (> 1)
42
  SEED_NOISE_STD: 0.02 # Standard deviation of noise added to masked seeds during training
43
  INPAINT_MASK_DILATION:
44
+ value: None # Dilation kernel size (pixels) for inpaint mask (None = compute based on image size)
45
+ INPAINT_MASK_THRESHOLD:
46
+ value: 0.2 # Inpaint selection threshold
47
  USE_TORCH_COMPILE: # Enable PyTorch 2.0 torch.compile for faster training (experimental)
48
  value: False
49
  DISTRIBUTE:
 
71
 
72
 
73
  BATCH_SIZE: # Max batch size with img size 896 is 32
74
+ value: 16 # Number of samples per batch (adjust based on GPU memory)
75
  NUM_WORKERS:
76
  value: 12 # Number of data loading worker processes (0 = main process only, "auto" = 90% of CPU affinity)
77
  SHUFFLE:
 
107
 
108
  ### OPTIMIZATION
109
  EPOCHS:
110
+ value: 40 # Maximum number of training epochs<
111
  LEARNING_RATE:
112
  value: 1.0e-4 # Base learning rate for optimizer
113
  WEIGHT_DECAY:
configs/pretrained_config.yaml CHANGED
@@ -18,7 +18,7 @@ parameters:
18
  REASSEMBLE_OUT_CHANNELS: [768,1024,1536,2048] # Output channels for each decoder stage (DPT-style reassembly)
19
  REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5] # Spatial upsampling factors for each stage
20
  READOUT_TYPE: "ignore" # Readout type for DPT decoder ("ignore", "project", etc.)
21
- # FROM_PRETRAINED: "diffuse_decoder.pt" # Path to pretrained decoder weights (optional)
22
  USE_BN: False # Use batch normalization in decoder
23
  DROPOUT: 0.1 # Dropout rate in decoder layers
24
  OUTPUT_IMAGE_SIZE: [896,896] # Output image resolution [height, width]
@@ -31,6 +31,7 @@ parameters:
31
  REASSEMBLE_OUT_CHANNELS: [96,192,384,768] # Output channels for each decoder stage
32
  REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5] # Spatial upsampling factors for each stage
33
  READOUT_TYPE: "ignore" # Readout type for DPT decoder
 
34
  USE_BN: False # Use batch normalization in decoder
35
  DROPOUT: 0.1 # Dropout rate in decoder layers
36
  OUTPUT_IMAGE_SIZE: [896,896] # Output image resolution [height, width]
 
18
  REASSEMBLE_OUT_CHANNELS: [768,1024,1536,2048] # Output channels for each decoder stage (DPT-style reassembly)
19
  REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5] # Spatial upsampling factors for each stage
20
  READOUT_TYPE: "ignore" # Readout type for DPT decoder ("ignore", "project", etc.)
21
+ FROM_PRETRAINED: "diffuse_decoder.pt" # Path to pretrained decoder weights (optional)
22
  USE_BN: False # Use batch normalization in decoder
23
  DROPOUT: 0.1 # Dropout rate in decoder layers
24
  OUTPUT_IMAGE_SIZE: [896,896] # Output image resolution [height, width]
 
31
  REASSEMBLE_OUT_CHANNELS: [96,192,384,768] # Output channels for each decoder stage
32
  REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5] # Spatial upsampling factors for each stage
33
  READOUT_TYPE: "ignore" # Readout type for DPT decoder
34
+ FROM_PRETRAINED: "highlight_decoder.pt" # Path to pretrained decoder weights (optional)
35
  USE_BN: False # Use batch normalization in decoder
36
  DROPOUT: 0.1 # Dropout rate in decoder layers
37
  OUTPUT_IMAGE_SIZE: [896,896] # Output image resolution [height, width]
configs/tokeninp_pretrain.yaml CHANGED
@@ -31,6 +31,7 @@ parameters:
31
  REASSEMBLE_OUT_CHANNELS: [96,192,384,768] # Output channels for each decoder stage
32
  REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5] # Spatial upsampling factors for each stage
33
  READOUT_TYPE: "ignore" # Readout type for DPT decoder
 
34
  USE_BN: False # Use batch normalization in decoder
35
  DROPOUT: 0.1 # Dropout rate in decoder layers
36
  OUTPUT_IMAGE_SIZE: [896,896] # Output image resolution [height, width]
@@ -51,8 +52,14 @@ parameters:
51
  LOCAL_PRIOR_WEIGHT: 0.25 # Weight for local prior blending (1.0 = only mask_token, 0.0 = only local mean)
52
  LOCAL_PRIOR_KERNEL: 5 # Kernel size for local prior blending (> 1)
53
  SEED_NOISE_STD: 0.02 # Standard deviation of noise added to masked seeds during training
54
- INPAINT_MASK_DILATION:
55
- value: 15 # Dilation kernel size (pixels) for inpaint mask - Must be odd
 
 
 
 
 
 
56
  USE_TORCH_COMPILE: # Enable PyTorch 2.0 torch.compile for faster training (experimental)
57
  value: False
58
  DISTRIBUTE:
@@ -61,89 +68,72 @@ parameters:
61
  ### DATA
62
  DATASETS:
63
  value:
 
64
  SCRREAM:
65
- VAL_SCENES: ["scene10_full_00","scene11_full_00","scene044_full_00","scene04_reduced_00","scene04_reduced_01","scene04_reduced_02"] # List of validation scene names
66
- TARGET_SIZE: [896,896] # Target image size [height, width] in pixels
67
  RESIZE_MODE: "resize+crop" # Image resizing mode: "resize", "crop", "resize+crop", or "pad"
68
- FEW_IMAGES: False # If True, load only first 10 images per scene (for quick debugging)
69
- SAMPLE_EVERY_N: 2 # Load every Nth frame from each scene (1 = all frames, 4 = every 4th frame)
70
- LOAD_RGB_ONLY: True # If True, ignore polarization data and load only RGB images
71
 
72
  HOUSECAT6D:
73
  VAL_SCENES: ["val_scene1","val_scene2"] # Validation scene names
74
- TARGET_SIZE: [896,896] # Target image size [height, width]
75
  RESIZE_MODE: "resize+crop" # Image resizing mode
76
-
77
- SAMPLE_EVERY_N: 2 # Load every Nth frame
78
- LOAD_RGB_ONLY: True # Ignore polarization data if True
79
 
80
  CROMO:
81
  TRAIN_SCENES: ["kitchen"] # Training scene names (list or string)
82
  # VAL_SCENES: "station" # Validation scene names (optional)
83
- TARGET_SIZE: [896,896] # Target image size [height, width]
84
  RESIZE_MODE: "resize" # Image resizing mode
85
-
86
  SAMPLE_EVERY_N: 2 # Load every Nth frame
87
- LOAD_RGB_ONLY: True # Ignore polarization data if True
88
 
89
  PSD:
90
  TRAIN_SCENES: "PSD_Train" # Training scene name (string or list)
91
  VAL_SCENES: "PSD_Val" # Validation scene name (string or list)
92
- TARGET_SIZE: [896,896] # Target image size [height, width]
93
  RESIZE_MODE: "resize+crop" # Image resizing mode
94
-
95
  SAMPLE_EVERY_N: 1 # Load every Nth frame (1 = all frames)
96
- LOAD_RGB_ONLY: True # Ignore polarization data if True
97
 
98
  SCARED:
99
  VAL_SCENES: ["v22","v23","v24","v25","v26","v27","v28","v29","v30","v31","v32","v33","v34"] # Validation scene names
100
- TARGET_SIZE: [896,896] # Target image size [height, width]
101
  RESIZE_MODE: "resize+crop" # Image resizing mode
102
  SAMPLE_EVERY_N: 8 # Load every Nth frame
103
- LOAD_RGB_ONLY: True # Ignore polarization data if True
104
-
105
- HIGHLIGHT_ENABLE: False # Enable highlight detection/processing in dataset
106
- HIGHLIGHT_BRIGHTNESS_THRESHOLD: 0.9 # Brightness threshold for highlight detection (0-1)
107
- HIGHLIGHT_RETURN_MASK: True # Return highlight mask in dataset output
108
- HIGHLIGHT_RECT_SIZE: [1000, 1000] # Size of highlight rectangle region [height, width]
109
- HIGHLIGHT_RETURN_RECT_AS_RGB: False # Return highlight rectangle as RGB if True
110
- HIGHLIGHT_RETURN_RECT: True # Return highlight rectangle region if True
111
-
112
  STEREOMIS_TRACKING:
113
  VAL_SCENES: ["P2_2"] # Validation scene names
114
- TARGET_SIZE: [896,896] # Target image size [height, width]
115
  RESIZE_MODE: "resize+crop" # Image resizing mode
116
- SAMPLE_EVERY_N: 4 # Load every Nth frame
117
- LOAD_RGB_ONLY: True # Ignore polarization data if True
118
- FEW_IMAGES: False # Load only first 10 imagas if True
119
- HIGHLIGHT_ENABLE: False # Enable highlight detection/processing
120
- HIGHLIGHT_BRIGHTNESS_THRESHOLD: 0.9 # Brightness threshold for highlight detection
121
- HIGHLIGHT_RETURN_MASK: True # Return highlight mask in dataset output
122
- HIGHLIGHT_RECT_SIZE: [800, 800] # Size of highlight rectangle region
123
- HIGHLIGHT_RETURN_RECT_AS_RGB: False # Return highlight rectangle as RGB if True
124
- HIGHLIGHT_RETURN_RECT: True # Return highlight rectangle region if True
125
 
126
  CHOLEC80:
127
- VAL_SCENES: ["val"] # Validation scene names
128
- TARGET_SIZE: [896,896] # Target image size [height, width]
129
  RESIZE_MODE: "resize+crop" # Image resizing mode
130
- SAMPLE_EVERY_N: 10 # Load every Nth frame
131
- LOAD_RGB_ONLY: True # Ignore polarization data if True
132
-
133
- HIGHLIGHT_ENABLE: False # Enable highlight detection/processing
134
- HIGHLIGHT_BRIGHTNESS_THRESHOLD: 0.9 # Brightness threshold for highlight detection
135
- HIGHLIGHT_RETURN_MASK: True # Return highlight mask in dataset output
136
- HIGHLIGHT_RECT_SIZE: [800, 800] # Size of highlight rectangle region
137
- HIGHLIGHT_RETURN_RECT_AS_RGB: False # Return highlight rectangle as RGB if True
138
- HIGHLIGHT_RETURN_RECT: True # Return highlight rectangle region if True
139
 
140
  SUNRGBD:
141
  VAL_SCENES: ["realsense"] # Validation scene names
142
- TARGET_SIZE: [896,896] # Target image size [height, width]
143
  RESIZE_MODE: "resize+crop" # Image resizing mode
144
  SAMPLE_EVERY_N: 4 # Load every Nth frame
145
- LOAD_RGB_ONLY: True # Ignore polarization data if True
146
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
  FEW_IMAGES_ALL_DATASETS:
149
  value: False # If True, override all datasets' FEW_IMAGES to True (for quick debugging across all datasets)
@@ -163,9 +153,9 @@ parameters:
163
  MOGE_MODEL:
164
  value: "Ruicheng/moge-2-vits-normal" # MoGe model name for normal estimation (HuggingFace format)
165
  SURFACE_ROUGHNESS:
166
- value: 8.0 # Blinn-Phong surface roughness exponent (higher = sharper highlights)
167
  INTENSITY:
168
- value: 2.0 # Specular highlight intensity multiplier
169
  LIGHT_DISTANCE_RANGE:
170
  value: [0.0, 1] # Range for light source distance sampling [min, max] (normalized)
171
  LIGHT_LEFT_RIGHT_ANGLE:
 
31
  REASSEMBLE_OUT_CHANNELS: [96,192,384,768] # Output channels for each decoder stage
32
  REASSEMBLE_FACTORS: [4.0, 2.0, 1.0, 0.5] # Spatial upsampling factors for each stage
33
  READOUT_TYPE: "ignore" # Readout type for DPT decoder
34
+ FROM_PRETRAINED: "highlight_decoder.pt" # Path to pretrained decoder weights (optional)
35
  USE_BN: False # Use batch normalization in decoder
36
  DROPOUT: 0.1 # Dropout rate in decoder layers
37
  OUTPUT_IMAGE_SIZE: [896,896] # Output image resolution [height, width]
 
52
  LOCAL_PRIOR_WEIGHT: 0.25 # Weight for local prior blending (1.0 = only mask_token, 0.0 = only local mean)
53
  LOCAL_PRIOR_KERNEL: 5 # Kernel size for local prior blending (> 1)
54
  SEED_NOISE_STD: 0.02 # Standard deviation of noise added to masked seeds during training
55
+
56
+ # FORWARD PASS PARAMETERS
57
+ INPAINT_MASK_THRESHOLD:
58
+ value: 0.2 # Threshold for inpaint mask
59
+ INPAINT_MASK_DILATION:
60
+ value: -1 # Dilation kernel size (pixels) for inpaint mask - Must be odd
61
+
62
+
63
  USE_TORCH_COMPILE: # Enable PyTorch 2.0 torch.compile for faster training (experimental)
64
  value: False
65
  DISTRIBUTE:
 
68
  ### DATA
69
  DATASETS:
70
  value:
71
+ # Reserved key: key-value pairs here override the same keys for every dataset (per-dataset entries still override this).
72
  SCRREAM:
73
+ VAL_SCENES: ["scene10", "scene04"] # List of validation scene names
 
74
  RESIZE_MODE: "resize+crop" # Image resizing mode: "resize", "crop", "resize+crop", or "pad"
75
+ SAMPLE_EVERY_N: 6 # Load every Nth frame from each scene (1 = all frames, 4 = every 4th frame)
 
 
76
 
77
  HOUSECAT6D:
78
  VAL_SCENES: ["val_scene1","val_scene2"] # Validation scene names
 
79
  RESIZE_MODE: "resize+crop" # Image resizing mode
80
+ SAMPLE_EVERY_N: 4 # Load every Nth frame
 
 
81
 
82
  CROMO:
83
  TRAIN_SCENES: ["kitchen"] # Training scene names (list or string)
84
  # VAL_SCENES: "station" # Validation scene names (optional)
 
85
  RESIZE_MODE: "resize" # Image resizing mode
 
86
  SAMPLE_EVERY_N: 2 # Load every Nth frame
 
87
 
88
  PSD:
89
  TRAIN_SCENES: "PSD_Train" # Training scene name (string or list)
90
  VAL_SCENES: "PSD_Val" # Validation scene name (string or list)
 
91
  RESIZE_MODE: "resize+crop" # Image resizing mode
 
92
  SAMPLE_EVERY_N: 1 # Load every Nth frame (1 = all frames)
 
93
 
94
  SCARED:
95
  VAL_SCENES: ["v22","v23","v24","v25","v26","v27","v28","v29","v30","v31","v32","v33","v34"] # Validation scene names
 
96
  RESIZE_MODE: "resize+crop" # Image resizing mode
97
  SAMPLE_EVERY_N: 8 # Load every Nth frame
98
+
 
 
 
 
 
 
 
 
99
  STEREOMIS_TRACKING:
100
  VAL_SCENES: ["P2_2"] # Validation scene names
 
101
  RESIZE_MODE: "resize+crop" # Image resizing mode
102
+ SAMPLE_EVERY_N: 2 # Load every Nth frame
 
 
 
 
 
 
 
 
103
 
104
  CHOLEC80:
105
+ TRAIN_SCENES: ["train"] # Validation scene names
106
+ VAL_SCENES: ["test"] # Validation scene names
107
  RESIZE_MODE: "resize+crop" # Image resizing mode
108
+ SAMPLE_EVERY_N: 40 # Load every Nth frame
 
 
 
 
 
 
 
 
109
 
110
  SUNRGBD:
111
  VAL_SCENES: ["realsense"] # Validation scene names
 
112
  RESIZE_MODE: "resize+crop" # Image resizing mode
113
  SAMPLE_EVERY_N: 4 # Load every Nth frame
114
+
115
+ # SCANNET:
116
+ # TRAIN_SCENES: ["train"]
117
+ # VAL_SCENES: ["val"]
118
+ # RESIZE_MODE: "resize+crop"
119
+ # SAMPLE_EVERY_N: 5
120
+
121
+ # OPENIMAGESV7:
122
+ # TRAIN_SCENES: ["thescene"]
123
+ # # VAL_SCENES: [""]
124
+ # RESIZE_MODE: "resize+crop"
125
+ # SAMPLE_EVERY_N: 5
126
+
127
+ # ENDOSYNTH:
128
+ # TRAIN_SCENES: ["scene"]
129
+ # # VAL_SCENES: ["val"]
130
+ # RESIZE_MODE: "resize+crop"
131
+ # SAMPLE_EVERY_N: 1
132
+
133
+ ALL_DATASETS:
134
+ FEW_IMAGES: False # Override FEW_IMAGES for all datasets (for quick debugging set True)
135
+ TARGET_SIZE: [896,896] # Override target image size [height, width] for all datasets
136
+ LOAD_RGB_ONLY: True
137
 
138
  FEW_IMAGES_ALL_DATASETS:
139
  value: False # If True, override all datasets' FEW_IMAGES to True (for quick debugging across all datasets)
 
153
  MOGE_MODEL:
154
  value: "Ruicheng/moge-2-vits-normal" # MoGe model name for normal estimation (HuggingFace format)
155
  SURFACE_ROUGHNESS:
156
+ value: 100.0 # Blinn-Phong surface roughness exponent (higher = sharper highlights)
157
  INTENSITY:
158
+ value: 0.8 # Specular highlight intensity multiplier
159
  LIGHT_DISTANCE_RANGE:
160
  value: [0.0, 1] # Range for light source distance sampling [min, max] (normalized)
161
  LIGHT_LEFT_RIGHT_ANGLE: