qwen_aut_no_lora / config.yaml
tsw0411's picture
Upload folder using huggingface_hub
906c557 verified
name: qwen3_asr_0_6b_full_finetune_sortformer_train
seed: 42
num_workers: 4
batch_size: 80
model:
audio_encoder: qwen3_asr
sample_rate: 16000
num_speakers: 4
max_num_of_spks: 4
feat_per_sec: 13.0
label_feat_per_sec: 25.0
pil_weight: 0.5
ats_weight: 0.5
qwen3_asr_checkpoint: Qwen/Qwen3-ASR-0.6B
qwen3_asr_pretrained: true
freeze_qwen3_asr_preprocessor: false
qwen3_asr_freeze_layers: 0
use_lora: false
qwen3_asr_use_lora: false
use_ecapa_preprocessor: false
rttm_frame_length_sec: 0.07692307692307693
der_collar: 0.25
der_ignore_overlap: true
model_defaults:
fc_d_model: 1024
tf_d_model: 192
train_ds:
manifest_filepath: null
sample_rate: 16000
num_speakers: 4
num_spks: 4
feat_per_sec: 13.0
label_feat_per_sec: 25.0
session_len_sec: 45
shift_sec: 8
soft_label_thres: 0.5
soft_targets: false
labels: null
batch_size: 80
shuffle: true
num_workers: 4
validation_mode: false
precompute_dac_vae: false
use_hf_streaming: true
use_window_index: true
window_index_path: outputs/*_45s_8s.parquet
group_rows_by_source: true
window_chunk_size: 4096
index_prefetch_streams: 2
row_cache_size: 4
hf_dataset_path: parquet
hf_data_files:
train: hf://datasets/tsw0411/real_dia_dataset/data/train/**/*.parquet
hf_split: train
shuffle_seed: 42
shuffle_buffer_size: 16
prefetch_rows: 8
pin_memory: true
drop_last: false
prefetch_factor: 4
persistent_workers: true
in_order: false
window_stride: 0.07692307692307693
subsampling_factor: 1
validation_ds:
manifest_filepath: null
sample_rate: 16000
num_speakers: 4
num_spks: 4
feat_per_sec: 13.0
label_feat_per_sec: 25.0
session_len_sec: 45
shift_sec: 45
soft_label_thres: 0.5
soft_targets: false
labels: null
batch_size: 80
shuffle: false
num_workers: 4
validation_mode: true
eval_window_stride_sec: 45
use_hf_streaming: true
use_window_index: true
window_index_path: outputs/*_45s_45s.parquet
group_rows_by_source: true
window_chunk_size: 4096
index_prefetch_streams: 2
row_cache_size: 4
hf_dataset_path: parquet
hf_data_files:
train: hf://datasets/tsw0411/real_dia_dataset/data/validation/**/*.parquet
hf_split: train
shuffle_seed: 42
shuffle_buffer_size: 100
prefetch_rows: 4
pin_memory: true
drop_last: false
prefetch_factor: 2
persistent_workers: true
window_stride: 0.07692307692307693
subsampling_factor: 1
sortformer_modules:
num_spks: 4
dropout_rate: 0.1
fc_d_model: 1024
tf_d_model: 192
subsampling_factor: 1
encoder:
d_model: 1024
subsampling_factor: 1
transformer_encoder:
num_layers: 18
hidden_size: 192
inner_size: 768
num_attention_heads: 8
attn_score_dropout: 0.5
attn_layer_dropout: 0.5
ffn_dropout: 0.5
hidden_act: relu
pre_ln: false
pre_ln_final_layer_norm: true
loss:
reduction: mean
weight: null
lr: 2.0e-05
optim:
name: adamw
lr: 2.0e-05
betas:
- 0.9
- 0.98
weight_decay: 0.001
sched:
name: InverseSquareRootAnnealing
warmup_steps: 2500
min_lr: 1.0e-06
trainer:
precision: bf16-mixed
gradient_accumulation_steps: 1
max_steps: 10000
log_every_n_steps: 50
ddp_find_unused_parameters: false
ddp_gradient_as_bucket_view: true
ddp_static_graph: true
val_check_interval: 1000
max_grad_norm: 1.0
num_sanity_val_steps: 0
limit_val_batches: null
exp_manager:
exp_dir: ./outputs
name: qwen3_asr_0_6b_full_finetune_sortformer_train
resume_if_exists: true
resume_from_checkpoint: null
save_last: true
create_checkpoint_callback: true
checkpoint_callback_params:
monitor: val_der
mode: min
save_top_k: 3
every_n_train_steps: 10000