{ "processor_class": "AlinVLAv0Processor", "processor_kwargs": { "modality_configs": { "kuka": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "agibot_gripper": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary", "wrist_left", "wrist_right" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "state" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "action" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "dlr_edan_shared_control_converted_externally_to_rlds": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "furniture_bench_dataset_converted_externally_to_rlds": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "austin_sirius_dataset_converted_externally_to_rlds": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "neural_gr1": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "ego_view" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_arm", "left_hand", "left_leg", "neck", "right_arm", "right_hand", "right_leg", "waist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "left_arm", "left_hand", "left_leg", "neck", "right_arm", "right_hand", "right_leg", "waist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "language_table": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "austin_sailor_dataset_converted_externally_to_rlds": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "cmu_stretch": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "humanoid_everyday_h1": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "egocentric_resized" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_arm", "left_hand", "right_arm", "right_hand" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "left_arm", "left_hand", "right_arm", "right_hand" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "berkeley_autolab_ur5": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "droid": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary", "secondary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "taco_play": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "fractal20220817_data": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "berkeley_fanuc_manipulation": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "joint_position", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "berkeley_cable_routing": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary", "secondary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "joint_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "galaxea": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary", "wrist_left", "wrist_right" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "state" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "action" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "action_net": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "state" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "action" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "utaustin_mutex": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "joint_position", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "nyu_franka_play_dataset_converted_externally_to_rlds": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary", "secondary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "roboturk": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "none" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "stanford_hydra_dataset_converted_externally_to_rlds": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "simulation_gr1": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "ego_view" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_arm", "left_hand", "left_leg", "neck", "right_arm", "right_hand", "right_leg", "waist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "left_arm", "left_hand", "left_leg", "neck", "right_arm", "right_hand", "right_leg", "waist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "jaco_play": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "toto": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "joint_position", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "ucsd_kitchen_dataset_converted_externally_to_rlds": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "joint_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "bc_z": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "agibot_dexhand": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "state" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "action" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "viola": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "joint_position", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "fmb_dataset": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary", "secondary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "humanoid_everyday_g1": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "egocentric_resized" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "left_arm", "left_hand", "right_arm", "right_hand" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "left_arm", "left_hand", "right_arm", "right_hand" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "dobbe": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "bridge_orig": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary", "secondary" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "austin_buds_dataset_converted_externally_to_rlds": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "joint_position", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "iamlab_cmu_pickup_insert_converted_externally_to_rlds": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "primary", "wrist" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_position" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "end_effector_position", "end_effector_rotation", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "DELTA", "type": "EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } }, "new_embodiment": { "video": { "delta_indices": [ 0 ], "modality_keys": [ "exterior_image_1_left", "wrist_image_left" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "state": { "delta_indices": [ 0 ], "modality_keys": [ "joint_pos_abs", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "action": { "delta_indices": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "joint_pos_abs", "gripper_close" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": [ { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null }, { "rep": "ABSOLUTE", "type": "NON_EEF", "format": "DEFAULT", "state_key": null } ] }, "language": { "delta_indices": [ 0 ], "modality_keys": [ "annotation.human.action.task_description" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "tactile": { "delta_indices": [ -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "left", "right" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null }, "torque": { "delta_indices": [ -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], "modality_keys": [ "torque" ], "sin_cos_embedding_keys": null, "mean_std_embedding_keys": null, "action_configs": null } } }, "image_crop_size": null, "image_target_size": null, "use_albumentations": true, "random_rotation_angle": null, "color_jitter_params": { "brightness": 0.3, "contrast": 0.4, "saturation": 0.5, "hue": 0.08 }, "shortest_image_edge": null, "crop_fraction": 0.95, "model_name": "huiwon/alinvlm_v1_3", "model_type": "qwen3", "formalize_language": true, "max_state_dim": 64, "max_action_dim": 64, "max_action_horizon": 16, "use_percentiles": true, "clip_outliers": true, "apply_sincos_state_encoding": false, "use_relative_action": true, "memory_length": 1, "max_tactile_dim": 0, "max_torque_dim": 0 } }