| { |
| "type": "smolvla", |
| "n_obs_steps": 1, |
| "normalization_mapping": { |
| "VISUAL": "IDENTITY", |
| "STATE": "MEAN_STD", |
| "ACTION": "MEAN_STD" |
| }, |
| "input_features": { |
| "observation.state": { |
| "type": "STATE", |
| "shape": [ |
| 6 |
| ] |
| }, |
| "observation.images.top": { |
| "type": "VISUAL", |
| "shape": [ |
| 3, |
| 480, |
| 640 |
| ] |
| }, |
| "observation.images.wrist": { |
| "type": "VISUAL", |
| "shape": [ |
| 3, |
| 480, |
| 640 |
| ] |
| } |
| }, |
| "output_features": { |
| "action": { |
| "type": "ACTION", |
| "shape": [ |
| 6 |
| ] |
| } |
| }, |
| "device": "cuda", |
| "use_amp": false, |
| "push_to_hub": true, |
| "repo_id": "nvail23/Blue-Snap-Task", |
| "private": null, |
| "tags": null, |
| "license": null, |
| "chunk_size": 50, |
| "n_action_steps": 50, |
| "max_state_dim": 32, |
| "max_action_dim": 32, |
| "resize_imgs_with_padding": [ |
| 512, |
| 512 |
| ], |
| "empty_cameras": 0, |
| "adapt_to_pi_aloha": false, |
| "use_delta_joint_actions_aloha": false, |
| "tokenizer_max_length": 48, |
| "num_steps": 10, |
| "use_cache": true, |
| "freeze_vision_encoder": true, |
| "train_expert_only": true, |
| "train_state_proj": true, |
| "optimizer_lr": 0.0001, |
| "optimizer_betas": [ |
| 0.9, |
| 0.95 |
| ], |
| "optimizer_eps": 1e-08, |
| "optimizer_weight_decay": 1e-10, |
| "optimizer_grad_clip_norm": 10.0, |
| "scheduler_warmup_steps": 1000, |
| "scheduler_decay_steps": 30000, |
| "scheduler_decay_lr": 2.5e-06, |
| "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", |
| "load_vlm_weights": true, |
| "add_image_special_tokens": false, |
| "attention_mode": "cross_attn", |
| "prefix_length": 0, |
| "pad_language_to": "max_length", |
| "num_expert_layers": 0, |
| "num_vlm_layers": 16, |
| "self_attn_every_n_layers": 2, |
| "expert_width_multiplier": 0.75, |
| "min_period": 0.004, |
| "max_period": 4.0 |
| } |