| { |
| "lr_mp": 0.0001, |
| "lr_vision_backbone": 0.0, |
| "lr_language_backbone": 0.0001, |
| "lr_right_tower": 0.0001, |
| "lr_kv_bridge": 0.0, |
| "lr_activation_bridge": 0.0, |
| "batch_size": 32, |
| "gradient_accumulation_steps": 4, |
| "max_grad_norm": 1.0, |
| "max_training_steps": 10000, |
| "stop_after_step": 5100, |
| "warmup_ratio": 0.03, |
| "stats_log_interval": 100, |
| "precision": "bf16", |
| "compile": false, |
| "do_eval": true, |
| "eval_interval": 1000, |
| "max_val_batches": 64, |
| "max_images_per_example": 1, |
| "max_sample_length": 1024, |
| "train_dataset_path": "patrickamadeus/the_cauldron", |
| "train_dataset_name": [ |
| "all" |
| ], |
| "train_split": "train", |
| "val_split": "validation", |
| "stream_dataset": false, |
| "enable_source_filter": true, |
| "allowed_dataset_sources": [ |
| "aokvqa", |
| "chart2text", |
| "chartqa", |
| "docvqa", |
| "figureqa", |
| "iconqa", |
| "infographic_vqa", |
| "ocrvqa", |
| "robut_sqa", |
| "scienceqa", |
| "textcaps", |
| "textvqa", |
| "vistext", |
| "visual7w", |
| "visualmrc", |
| "vqav2", |
| "vsr" |
| ], |
| "relevance_min_rating": 1, |
| "image_correspondence_min_rating": 1, |
| "visual_dependency_min_rating": 1, |
| "formatting_min_rating": 1, |
| "wandb_entity": "HuggingFace", |
| "log_wandb": false, |
| "push_checkpoints_to_hub": true, |
| "save_training_state_to_hub": false, |
| "checkpoint_repo_pattern": "patrickamadeus/dt-memory-full-replace-{i}", |
| "hf_private": false, |
| "push_final_model_to_hub": true, |
| "resume_from_vlm_checkpoint": true, |
| "resume_checkpoint_path": null |
| } |