patrickamadeus commited on
Commit
381d2fd
·
verified ·
1 Parent(s): f39d1de

Upload step 2000 checkpoint

Browse files
Files changed (3) hide show
  1. config.json +1 -1
  2. model.safetensors +1 -1
  3. train_config.json +2 -2
config.json CHANGED
@@ -117,7 +117,7 @@
117
  "kv_bridge_use_gate": false,
118
  "memory_mode": "replace",
119
  "memory_donor_layers": [
120
- 28
121
  ],
122
  "memory_share_donor_across_right_layers": true,
123
  "memory_reduce_strategy": "single"
 
117
  "kv_bridge_use_gate": false,
118
  "memory_mode": "replace",
119
  "memory_donor_layers": [
120
+ 20
121
  ],
122
  "memory_share_donor_across_right_layers": true,
123
  "memory_reduce_strategy": "single"
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68fa287882fb95f413fb71f587555e107524cd46be61b8cd29437a6b05d8532c
3
  size 725313232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dbbd77b2cd202e3e4e67413889c5d3868e57c44d40adf9d64f6e5904a617e42
3
  size 725313232
train_config.json CHANGED
@@ -2,14 +2,14 @@
2
  "lr_mp": 0.0001,
3
  "lr_vision_backbone": 0.0,
4
  "lr_language_backbone": 0.0001,
5
- "lr_right_tower": 0.0,
6
  "lr_kv_bridge": 0.0,
7
  "lr_activation_bridge": 0.0,
8
  "batch_size": 32,
9
  "gradient_accumulation_steps": 4,
10
  "max_grad_norm": 1.0,
11
  "max_training_steps": 10000,
12
- "stop_after_step": 4100,
13
  "warmup_ratio": 0.03,
14
  "stats_log_interval": 100,
15
  "precision": "bf16",
 
2
  "lr_mp": 0.0001,
3
  "lr_vision_backbone": 0.0,
4
  "lr_language_backbone": 0.0001,
5
+ "lr_right_tower": 0.0001,
6
  "lr_kv_bridge": 0.0,
7
  "lr_activation_bridge": 0.0,
8
  "batch_size": 32,
9
  "gradient_accumulation_steps": 4,
10
  "max_grad_norm": 1.0,
11
  "max_training_steps": 10000,
12
+ "stop_after_step": 5100,
13
  "warmup_ratio": 0.03,
14
  "stats_log_interval": 100,
15
  "precision": "bf16",