| name: qwen3_asr_0_6b_full_finetune_sortformer_train |
| seed: 42 |
| num_workers: 4 |
| batch_size: 80 |
| model: |
| audio_encoder: qwen3_asr |
| sample_rate: 16000 |
| num_speakers: 4 |
| max_num_of_spks: 4 |
| feat_per_sec: 13.0 |
| label_feat_per_sec: 25.0 |
| pil_weight: 0.5 |
| ats_weight: 0.5 |
| qwen3_asr_checkpoint: Qwen/Qwen3-ASR-0.6B |
| qwen3_asr_pretrained: true |
| freeze_qwen3_asr_preprocessor: false |
| qwen3_asr_freeze_layers: 0 |
| use_lora: false |
| qwen3_asr_use_lora: false |
| use_ecapa_preprocessor: false |
| rttm_frame_length_sec: 0.07692307692307693 |
| der_collar: 0.25 |
| der_ignore_overlap: true |
| model_defaults: |
| fc_d_model: 1024 |
| tf_d_model: 192 |
| train_ds: |
| manifest_filepath: null |
| sample_rate: 16000 |
| num_speakers: 4 |
| num_spks: 4 |
| feat_per_sec: 13.0 |
| label_feat_per_sec: 25.0 |
| session_len_sec: 45 |
| shift_sec: 8 |
| soft_label_thres: 0.5 |
| soft_targets: false |
| labels: null |
| batch_size: 80 |
| shuffle: true |
| num_workers: 4 |
| validation_mode: false |
| precompute_dac_vae: false |
| use_hf_streaming: true |
| use_window_index: true |
| window_index_path: outputs/*_45s_8s.parquet |
| group_rows_by_source: true |
| window_chunk_size: 4096 |
| index_prefetch_streams: 2 |
| row_cache_size: 4 |
| hf_dataset_path: parquet |
| hf_data_files: |
| train: hf://datasets/tsw0411/real_dia_dataset/data/train/**/*.parquet |
| hf_split: train |
| shuffle_seed: 42 |
| shuffle_buffer_size: 16 |
| prefetch_rows: 8 |
| pin_memory: true |
| drop_last: false |
| prefetch_factor: 4 |
| persistent_workers: true |
| in_order: false |
| window_stride: 0.07692307692307693 |
| subsampling_factor: 1 |
| validation_ds: |
| manifest_filepath: null |
| sample_rate: 16000 |
| num_speakers: 4 |
| num_spks: 4 |
| feat_per_sec: 13.0 |
| label_feat_per_sec: 25.0 |
| session_len_sec: 45 |
| shift_sec: 45 |
| soft_label_thres: 0.5 |
| soft_targets: false |
| labels: null |
| batch_size: 80 |
| shuffle: false |
| num_workers: 4 |
| validation_mode: true |
| eval_window_stride_sec: 45 |
| use_hf_streaming: true |
| use_window_index: true |
| window_index_path: outputs/*_45s_45s.parquet |
| group_rows_by_source: true |
| window_chunk_size: 4096 |
| index_prefetch_streams: 2 |
| row_cache_size: 4 |
| hf_dataset_path: parquet |
| hf_data_files: |
| train: hf://datasets/tsw0411/real_dia_dataset/data/validation/**/*.parquet |
| hf_split: train |
| shuffle_seed: 42 |
| shuffle_buffer_size: 100 |
| prefetch_rows: 4 |
| pin_memory: true |
| drop_last: false |
| prefetch_factor: 2 |
| persistent_workers: true |
| window_stride: 0.07692307692307693 |
| subsampling_factor: 1 |
| sortformer_modules: |
| num_spks: 4 |
| dropout_rate: 0.1 |
| fc_d_model: 1024 |
| tf_d_model: 192 |
| subsampling_factor: 1 |
| encoder: |
| d_model: 1024 |
| subsampling_factor: 1 |
| transformer_encoder: |
| num_layers: 18 |
| hidden_size: 192 |
| inner_size: 768 |
| num_attention_heads: 8 |
| attn_score_dropout: 0.5 |
| attn_layer_dropout: 0.5 |
| ffn_dropout: 0.5 |
| hidden_act: relu |
| pre_ln: false |
| pre_ln_final_layer_norm: true |
| loss: |
| reduction: mean |
| weight: null |
| lr: 2.0e-05 |
| optim: |
| name: adamw |
| lr: 2.0e-05 |
| betas: |
| - 0.9 |
| - 0.98 |
| weight_decay: 0.001 |
| sched: |
| name: InverseSquareRootAnnealing |
| warmup_steps: 2500 |
| min_lr: 1.0e-06 |
| trainer: |
| precision: bf16-mixed |
| gradient_accumulation_steps: 1 |
| max_steps: 10000 |
| log_every_n_steps: 50 |
| ddp_find_unused_parameters: false |
| ddp_gradient_as_bucket_view: true |
| ddp_static_graph: true |
| val_check_interval: 1000 |
| max_grad_norm: 1.0 |
| num_sanity_val_steps: 0 |
| limit_val_batches: null |
| exp_manager: |
| exp_dir: ./outputs |
| name: qwen3_asr_0_6b_full_finetune_sortformer_train |
| resume_if_exists: true |
| resume_from_checkpoint: null |
| save_last: true |
| create_checkpoint_callback: true |
| checkpoint_callback_params: |
| monitor: val_der |
| mode: min |
| save_top_k: 3 |
| every_n_train_steps: 10000 |
|
|