ReduceOnPlateauLR_patience = 50 albu_train_transforms = [ dict( always_apply=True, border_mode=0, min_height=512, min_width=512, type='PadIfNeeded'), dict(always_apply=True, type='Flip'), dict( always_apply=True, interpolation=4, limit=( -180, 180, ), type='Rotate'), dict( p=0.5, transforms=[ dict( alpha=20, approximate=True, border_mode=0, interpolation=4, mask_value=( 0, 0, 0, ), p=0.5, same_dxdy=True, sigma=15, type='ElasticTransform'), dict( alpha=40, approximate=True, border_mode=0, interpolation=4, mask_value=( 0, 0, 0, ), p=0.5, same_dxdy=False, sigma=15, type='ElasticTransform'), ], type='OneOf'), dict( always_apply=True, brightness=0.2, contrast=0.1, hue=0.2, saturation=0.2, type='ColorJitter'), dict(p=0.5, type='AdvancedBlur'), dict(always_apply=True, height=512, type='CenterCrop', width=512), ] auto_scale_lr = dict(base_batch_size=16, enable=False) batch_size = 8 class_weight = [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.1, ] classes = ( 'background', 'stomatal complex', 'stoma', 'outer ledge', 'pore', 'pavement cell', ) crop_size = ( 512, 512, ) data_root = 'StomataPy400K_filtered_train/' dataset_type = 'StomataDataset' default_hooks = dict( checkpoint=dict( by_epoch=True, interval=999999, save_best='mIoU', save_last=True, type='CheckpointHook'), early_stopping=dict( monitor='mIoU', patience=150, rule='greater', type='EarlyStoppingHook'), logger=dict(interval=600, log_metric_by_epoch=True, type='LoggerHook'), param_scheduler=dict(type='ParamSchedulerHook'), sampler_seed=dict(type='DistSamplerSeedHook'), timer=dict(type='IterTimerHook'), visualization=dict(draw=True, interval=50, type='SegVisualizationHook')) default_scope = 'mmseg' dinov2_checkpoint = 'train/checkpoints/dinov2_converted.pth' early_stopping_patience = 150 env_cfg = dict( cudnn_benchmark=True, dist_cfg=dict(backend='nccl'), mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) find_unused_parameters = True fp16 = dict(loss_scale='dynamic') launcher = 'pytorch' load_from = None log_level = 'INFO' log_processor = dict(by_epoch=True) lr = 1e-05 max_epochs = 300 model = dict( backbone=dict( block_chunks=0, depth=24, embed_dim=1024, ffn_bias=True, ffn_layer='mlp', img_size=512, init_cfg=dict( checkpoint='train/checkpoints/dinov2_converted.pth', type='Pretrained'), init_values=1e-05, mlp_ratio=4, num_heads=16, patch_size=16, proj_bias=True, qkv_bias=True, reins_config=dict( embed_dims=1024, link_token_to_query=True, lora_dim=16, num_layers=24, patch_size=16, token_length=100, type='LoRAReins'), type='ReinsDinoVisionTransformer'), data_preprocessor=dict( bgr_to_rgb=True, mean=[ 123.675, 116.28, 103.53, ], pad_val=0, seg_pad_val=255, size=( 512, 512, ), std=[ 58.395, 57.12, 57.375, ], type='SegDataPreProcessor'), decode_head=dict( align_corners=False, enforce_decoder_input_project=False, feat_channels=256, in_channels=[ 1024, 1024, 1024, 1024, ], loss_cls=dict( class_weight=[ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.1, ], loss_weight=2.0, reduction='mean', type='mmdet.CrossEntropyLoss', use_sigmoid=False), loss_dice=dict( activate=True, eps=1.0, loss_weight=5.0, naive_dice=True, reduction='mean', type='mmdet.DiceLoss', use_sigmoid=True), loss_mask=dict( loss_weight=5.0, reduction='mean', type='mmdet.CrossEntropyLoss', use_sigmoid=True), num_classes=6, num_queries=100, num_transformer_feat_level=3, out_channels=256, pixel_decoder=dict( act_cfg=dict(type='ReLU'), encoder=dict( init_cfg=None, layer_cfg=dict( ffn_cfg=dict( act_cfg=dict(inplace=True, type='ReLU'), embed_dims=256, feedforward_channels=1024, ffn_drop=0.0, num_fcs=2), self_attn_cfg=dict( batch_first=True, dropout=0.0, embed_dims=256, im2col_step=64, init_cfg=None, norm_cfg=None, num_heads=8, num_levels=3, num_points=4)), num_layers=6), init_cfg=None, norm_cfg=dict(num_groups=32, type='GN'), num_outs=3, positional_encoding=dict(normalize=True, num_feats=128), type='mmdet.MSDeformAttnPixelDecoder'), positional_encoding=dict(normalize=True, num_feats=128), replace_query_feat=True, strides=[ 4, 8, 16, 32, ], train_cfg=dict( assigner=dict( match_costs=[ dict(type='mmdet.ClassificationCost', weight=2.0), dict( type='mmdet.CrossEntropyLossCost', use_sigmoid=True, weight=5.0), dict( eps=1.0, pred_act=True, type='mmdet.DiceCost', weight=5.0), ], type='mmdet.HungarianAssigner'), importance_sample_ratio=0.75, num_points=12544, oversample_ratio=3.0, sampler=dict(type='mmdet.MaskPseudoSampler')), transformer_decoder=dict( init_cfg=None, layer_cfg=dict( cross_attn_cfg=dict( attn_drop=0.0, batch_first=True, dropout_layer=None, embed_dims=256, num_heads=8, proj_drop=0.0), ffn_cfg=dict( act_cfg=dict(inplace=True, type='ReLU'), add_identity=True, dropout_layer=None, embed_dims=256, feedforward_channels=2048, ffn_drop=0.0, num_fcs=2), self_attn_cfg=dict( attn_drop=0.0, batch_first=True, dropout_layer=None, embed_dims=256, num_heads=8, proj_drop=0.0)), num_layers=9, return_intermediate=True), type='ReinMask2FormerHead'), test_cfg=dict(crop_size=( 512, 512, ), mode='slide', stride=( 1364, 1364, )), train_cfg=dict(), type='EncoderDecoder') model_crop_size = ( 512, 512, ) n_gpus = 4 num_classes = 6 num_workers = 16 optim_wrapper = dict( constructor='PEFTOptimWrapperConstructor', optimizer=dict( betas=( 0.9, 0.999, ), eps=1e-08, lr=1e-05, type='AdamW', weight_decay=0.05), paramwise_cfg=dict( custom_keys=dict({ 'learnable_tokens': dict(decay_mult=0.0, lr_mult=1.0), 'level_embed': dict(decay_mult=0.0, lr_mult=1.0), 'norm': dict(decay_mult=0.0), 'query_embed': dict(decay_mult=0.0, lr_mult=1.0), 'reins.scale': dict(decay_mult=0.0, lr_mult=1.0) }), norm_decay_mult=0.0)) optimizer_config = dict( cumulative_iters=8, type='GradientCumulativeOptimizerHook') original_batch_size = 4 original_lr = 0.0001 original_n_gpus = 8 output_dir = 'StomataPy400K_aperture_512' param_scheduler = [ dict( begin=0, by_epoch=True, convert_to_iter_based=True, end=30, end_factor=1.0, start_factor=0.001, type='LinearLR', verbose=False), dict( T_max=270, begin=30, by_epoch=True, convert_to_iter_based=True, end=300, eta_min=1.0000000000000002e-10, eta_min_ratio=None, type='CosineAnnealingLR', verbose=False), dict( by_epoch=True, factor=0.75, monitor='mIoU', patience=50, rule='greater', type='ReduceOnPlateauLR', verbose=False), ] randomness = dict(deterministic=False, seed=42) resume = False test_cfg = dict(type='ValLoop') test_dataloader = dict( batch_size=1, dataset=dict( ann_file='splits//val.txt', data_prefix=dict(img_path='images', seg_map_path='labels'), data_root='StomataPy400K_filtered_train/', pipeline=[ dict(type='LoadImageFromFile'), dict(prob=0.5, type='RandomFlip'), dict(keep_ratio=False, scale=( 512, 512, ), type='Resize'), dict(reduce_zero_label=False, type='LoadAnnotations'), dict(type='PackSegInputs'), ], type='StomataDataset'), num_workers=16) test_evaluator = dict( iou_metrics=[ 'mIoU', ], type='IoUMetric') test_pipeline = [ dict(type='LoadImageFromFile'), dict(prob=0.5, type='RandomFlip'), dict(keep_ratio=False, scale=( 512, 512, ), type='Resize'), dict(reduce_zero_label=False, type='LoadAnnotations'), dict(type='PackSegInputs'), ] train_cfg = dict(max_epochs=300, type='EpochBasedTrainLoop', val_interval=1) train_dataloader = dict( batch_size=8, dataset=dict( ann_file='splits//train.txt', data_prefix=dict(img_path='images', seg_map_path='labels'), data_root='StomataPy400K_filtered_train/', pipeline=[ dict(type='LoadImageFromFile'), dict(reduce_zero_label=False, type='LoadAnnotations'), dict( max_size=2048, resize_type='ResizeShortestEdge', scales=[ 358, 409, 460, 512, 563, 614, 665, 716, ], type='RandomChoiceResize'), dict( cat_max_ratio=0.75, crop_size=( 512, 512, ), type='RandomCrop'), dict( keymap=dict(gt_seg_map='mask', img='image'), transforms=[ dict( always_apply=True, border_mode=0, min_height=512, min_width=512, type='PadIfNeeded'), dict(always_apply=True, type='Flip'), dict( always_apply=True, interpolation=4, limit=( -180, 180, ), type='Rotate'), dict( p=0.5, transforms=[ dict( alpha=20, approximate=True, border_mode=0, interpolation=4, mask_value=( 0, 0, 0, ), p=0.5, same_dxdy=True, sigma=15, type='ElasticTransform'), dict( alpha=40, approximate=True, border_mode=0, interpolation=4, mask_value=( 0, 0, 0, ), p=0.5, same_dxdy=False, sigma=15, type='ElasticTransform'), ], type='OneOf'), dict( always_apply=True, brightness=0.2, contrast=0.1, hue=0.2, saturation=0.2, type='ColorJitter'), dict(p=0.5, type='AdvancedBlur'), dict( always_apply=True, height=512, type='CenterCrop', width=512), ], type='Albu'), dict( cutout_ratio=( 0.02, 0.05, ), n_holes=10, prob=0.5, type='RandomCutOut'), dict( interpolation='lanczos', keep_ratio=True, scale=( 512, 512, ), type='Resize'), dict( meta_keys=( 'img_path', 'img_shape', 'img', 'gt_seg_map', ), type='PackSegInputs'), ], type='StomataDataset'), num_workers=16) train_pipeline = [ dict(type='LoadImageFromFile'), dict(reduce_zero_label=False, type='LoadAnnotations'), dict( max_size=2048, resize_type='ResizeShortestEdge', scales=[ 358, 409, 460, 512, 563, 614, 665, 716, ], type='RandomChoiceResize'), dict(cat_max_ratio=0.75, crop_size=( 512, 512, ), type='RandomCrop'), dict( keymap=dict(gt_seg_map='mask', img='image'), transforms=[ dict( always_apply=True, border_mode=0, min_height=512, min_width=512, type='PadIfNeeded'), dict(always_apply=True, type='Flip'), dict( always_apply=True, interpolation=4, limit=( -180, 180, ), type='Rotate'), dict( p=0.5, transforms=[ dict( alpha=20, approximate=True, border_mode=0, interpolation=4, mask_value=( 0, 0, 0, ), p=0.5, same_dxdy=True, sigma=15, type='ElasticTransform'), dict( alpha=40, approximate=True, border_mode=0, interpolation=4, mask_value=( 0, 0, 0, ), p=0.5, same_dxdy=False, sigma=15, type='ElasticTransform'), ], type='OneOf'), dict( always_apply=True, brightness=0.2, contrast=0.1, hue=0.2, saturation=0.2, type='ColorJitter'), dict(p=0.5, type='AdvancedBlur'), dict(always_apply=True, height=512, type='CenterCrop', width=512), ], type='Albu'), dict( cutout_ratio=( 0.02, 0.05, ), n_holes=10, prob=0.5, type='RandomCutOut'), dict( interpolation='lanczos', keep_ratio=True, scale=( 512, 512, ), type='Resize'), dict( meta_keys=( 'img_path', 'img_shape', 'img', 'gt_seg_map', ), type='PackSegInputs'), ] tta_model = dict(type='SegTTAModel') val_cfg = dict(type='ValLoop') val_dataloader = dict( batch_size=1, dataset=dict( ann_file='splits//val.txt', data_prefix=dict(img_path='images', seg_map_path='labels'), data_root='StomataPy400K_filtered_train/', pipeline=[ dict(type='LoadImageFromFile'), dict(prob=0.5, type='RandomFlip'), dict(keep_ratio=False, scale=( 512, 512, ), type='Resize'), dict(reduce_zero_label=False, type='LoadAnnotations'), dict(type='PackSegInputs'), ], type='StomataDataset'), num_workers=16) val_evaluator = dict( iou_metrics=[ 'mIoU', ], type='IoUMetric') val_interval = 1 visualizer = dict( name='visualizer', type='SegLocalVisualizer', vis_backends=[ dict(type='LocalVisBackend'), dict( init_kwargs=dict( name='StomataPy400K_aperture_512', project='StomataPy'), type='WandbVisBackend'), ]) wandb_project = 'StomataPy' warmup_epochs = 30 work_dir = '../Models/StomataPy400K_aperture_512'