Ba2han commited on
Commit
4451ae6
·
verified ·
1 Parent(s): cd9641e

Training in progress, step 787, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -2,10 +2,6 @@
2
  "architectures": [
3
  "Lfm2MoeForCausalLM"
4
  ],
5
- "auto_map": {
6
- "AutoConfig": "configuration_lfm2_moe.Lfm2MoeConfig",
7
- "AutoModelForCausalLM": "modeling_lfm2_moe.Lfm2MoeForCausalLM"
8
- },
9
  "bos_token_id": 1,
10
  "conv_L_cache": 3,
11
  "conv_bias": false,
@@ -58,7 +54,7 @@
58
  },
59
  "routed_scaling_factor": 1.0,
60
  "tie_word_embeddings": true,
61
- "transformers_version": "5.0.0",
62
  "unsloth_version": "2026.2.1",
63
  "use_cache": false,
64
  "use_expert_bias": true,
 
2
  "architectures": [
3
  "Lfm2MoeForCausalLM"
4
  ],
 
 
 
 
5
  "bos_token_id": 1,
6
  "conv_L_cache": 3,
7
  "conv_bias": false,
 
54
  },
55
  "routed_scaling_factor": 1.0,
56
  "tie_word_embeddings": true,
57
+ "transformers_version": "5.1.0",
58
  "unsloth_version": "2026.2.1",
59
  "use_cache": false,
60
  "use_expert_bias": true,
last-checkpoint/generation_config.json CHANGED
@@ -6,5 +6,5 @@
6
  ],
7
  "max_length": 128000,
8
  "pad_token_id": 0,
9
- "transformers_version": "5.0.0"
10
  }
 
6
  ],
7
  "max_length": 128000,
8
  "pad_token_id": 0,
9
+ "transformers_version": "5.1.0"
10
  }
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40e17c89497b5cd4bd3ecfa28124e69b48e8b6f44321e8bdfd1883338fad37fd
3
  size 16680154224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b736062eaac30e44ca67caf1ce09593718a395a7a1dc31fc16c89c1defe4175c
3
  size 16680154224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87104116517c4b02d01101d7f70236234f5587f99f70337e258ba962a852bc82
3
  size 16957053431
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c4d1fad685a5000302bddfd85760e47ccc5ca261e87b4717a65046211952539
3
  size 16957053431
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2f48fa3eb3ff3cb63aed61f5533830f1ec557c446e2f39b134fdbb9e96c5a89
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eb573fa1560f5f656344d3d66feb08558992d4c6a4eade7857ad80a9228635e
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61939866eb4198b29c114feb629767cec00e0cef9627f9daf15d68f9eef37b21
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0ac818d332552519d4b3474aba8f5df01c1bbac981657d656e6a60c290aa364
3
  size 1465
last-checkpoint/tokenizer_config.json CHANGED
@@ -10,13 +10,11 @@
10
  "attention_mask"
11
  ],
12
  "model_max_length": 1000000000000000019884624838656,
13
- "model_specific_special_tokens": {},
14
  "pad_token": "<|pad|>",
15
  "padding_side": "right",
16
  "sp_model_kwargs": {},
17
  "spaces_between_special_tokens": false,
18
  "tokenizer_class": "TokenizersBackend",
19
- "unk_token": null,
20
  "use_default_system_prompt": false,
21
  "use_fast": true
22
  }
 
10
  "attention_mask"
11
  ],
12
  "model_max_length": 1000000000000000019884624838656,
 
13
  "pad_token": "<|pad|>",
14
  "padding_side": "right",
15
  "sp_model_kwargs": {},
16
  "spaces_between_special_tokens": false,
17
  "tokenizer_class": "TokenizersBackend",
 
18
  "use_default_system_prompt": false,
19
  "use_fast": true
20
  }
last-checkpoint/trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8b1f4d55a54ea38ec2dcd9d26576585b8ab84993116cab1f96ec9a8f5ee729b
3
  size 5713
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17bc4384b6af3d811a9355184d198ce26ed126f67ecdc8a58780aeafcf329ae5
3
  size 5713