tiny-router / training_args.json
cu3126's picture
Duplicate from tgupj/tiny-router
fd0e5cf
raw
history blame contribute delete
675 Bytes
{
"train_file": "data/synthetic/train.jsonl",
"validation_file": "data/synthetic/validation.jsonl",
"test_file": null,
"output_dir": "artifacts/tiny-router",
"encoder_name": "microsoft/deberta-v3-small",
"device": "auto",
"feature_mode": "full_interaction",
"pooling_type": "attention",
"use_head_dependencies": true,
"dependency_hidden_dim": 32,
"max_length": 128,
"recency_max": 3600,
"batch_size": 32,
"epochs": 20,
"encoder_lr": 2e-05,
"head_lr": 0.0001,
"weight_decay": 0.01,
"warmup_ratio": 0.1,
"dropout": 0.1,
"seed": 13,
"patience": 2,
"mixed_precision": false,
"confidence_threshold": 0.8,
"head_loss_weights": "{}"
}