ChiefTheLord commited on
Commit
1baf1e6
·
verified ·
1 Parent(s): 909957c

Upload folder using huggingface_hub

Browse files
checkpoints-v2.0-discrete-conditional/checkpoint-2025/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df64895704ce7ee225447930abf51c42b71b46cfaf5ee01ac150ebd7fccfedc9
3
+ size 24241232
checkpoints-v2.0-discrete-conditional/checkpoint-2025/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b6874334f8c63084c0ebf84f5bdfd1ceb10e60ad76acf4232fac094d8e4eeb2
3
+ size 519947
checkpoints-v2.0-discrete-conditional/checkpoint-2025/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dba89da8a809d5a61d70d07cc13134c6b963b7bc2be7b422caa0fa496bb1523
3
+ size 14645
checkpoints-v2.0-discrete-conditional/checkpoint-2025/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82fcb4caa8fdae60e6f732b2f1e186aa8584a48525083d6c24d6f93011d1f002
3
+ size 1383
checkpoints-v2.0-discrete-conditional/checkpoint-2025/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f908761de2319f63d4eab082d069db9fa95e0b5bf598478383d5d8b095c0fb4f
3
+ size 1465
checkpoints-v2.0-discrete-conditional/checkpoint-2025/trainer_state.json ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 512,
7
+ "global_step": 2025,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.12641975308641976,
14
+ "grad_norm": 0.015813976526260376,
15
+ "learning_rate": 0.000498046875,
16
+ "loss": 0.44495490193367004,
17
+ "step": 256
18
+ },
19
+ {
20
+ "epoch": 0.2528395061728395,
21
+ "grad_norm": 0.006338431965559721,
22
+ "learning_rate": 0.000998046875,
23
+ "loss": 0.016131538897752762,
24
+ "step": 512
25
+ },
26
+ {
27
+ "epoch": 0.2528395061728395,
28
+ "eval_bleu": 0.9709026962307936,
29
+ "eval_loss": 0.014414189542393858,
30
+ "eval_mse_loss": 0.014414189542393858,
31
+ "step": 512
32
+ },
33
+ {
34
+ "epoch": 0.2528395061728395,
35
+ "eval_bleu": 0.9709026962307936,
36
+ "eval_loss": 0.014414189542393858,
37
+ "eval_mse_loss": 0.014414189542393858,
38
+ "eval_runtime": 8.5639,
39
+ "eval_samples_per_second": 305.702,
40
+ "eval_steps_per_second": 4.788,
41
+ "step": 512
42
+ },
43
+ {
44
+ "epoch": 0.37925925925925924,
45
+ "grad_norm": 0.005291212350130081,
46
+ "learning_rate": 0.0009315344337660421,
47
+ "loss": 0.013332298956811428,
48
+ "step": 768
49
+ },
50
+ {
51
+ "epoch": 0.505679012345679,
52
+ "grad_norm": 0.004620287101715803,
53
+ "learning_rate": 0.0007439821899385376,
54
+ "loss": 0.012249683029949665,
55
+ "step": 1024
56
+ },
57
+ {
58
+ "epoch": 0.505679012345679,
59
+ "eval_bleu": 0.9708651851802215,
60
+ "eval_loss": 0.012245714698532006,
61
+ "eval_mse_loss": 0.012245714698532006,
62
+ "step": 1024
63
+ },
64
+ {
65
+ "epoch": 0.505679012345679,
66
+ "eval_bleu": 0.9708651851802215,
67
+ "eval_loss": 0.012245714698532006,
68
+ "eval_mse_loss": 0.012245714698532006,
69
+ "eval_runtime": 7.9385,
70
+ "eval_samples_per_second": 329.784,
71
+ "eval_steps_per_second": 5.165,
72
+ "step": 1024
73
+ },
74
+ {
75
+ "epoch": 0.6320987654320988,
76
+ "grad_norm": 0.004163551609963179,
77
+ "learning_rate": 0.0004890997654891032,
78
+ "loss": 0.011324185878038406,
79
+ "step": 1280
80
+ },
81
+ {
82
+ "epoch": 0.7585185185185185,
83
+ "grad_norm": 0.004867972806096077,
84
+ "learning_rate": 0.00023722540797531234,
85
+ "loss": 0.01125150453299284,
86
+ "step": 1536
87
+ },
88
+ {
89
+ "epoch": 0.7585185185185185,
90
+ "eval_bleu": 0.9709484876437786,
91
+ "eval_loss": 0.010418103053802398,
92
+ "eval_mse_loss": 0.010418103053802398,
93
+ "step": 1536
94
+ },
95
+ {
96
+ "epoch": 0.7585185185185185,
97
+ "eval_bleu": 0.9709484876437786,
98
+ "eval_loss": 0.010418103053802398,
99
+ "eval_mse_loss": 0.010418103053802398,
100
+ "eval_runtime": 7.0369,
101
+ "eval_samples_per_second": 372.037,
102
+ "eval_steps_per_second": 5.826,
103
+ "step": 1536
104
+ },
105
+ {
106
+ "epoch": 0.8849382716049383,
107
+ "grad_norm": 0.005268606822937727,
108
+ "learning_rate": 5.786724825584927e-05,
109
+ "loss": 0.011080899275839329,
110
+ "step": 1792
111
+ }
112
+ ],
113
+ "logging_steps": 256,
114
+ "max_steps": 2025,
115
+ "num_input_tokens_seen": 0,
116
+ "num_train_epochs": 1,
117
+ "save_steps": 512,
118
+ "stateful_callbacks": {
119
+ "TrainerControl": {
120
+ "args": {
121
+ "should_epoch_stop": false,
122
+ "should_evaluate": false,
123
+ "should_log": false,
124
+ "should_save": true,
125
+ "should_training_stop": true
126
+ },
127
+ "attributes": {}
128
+ }
129
+ },
130
+ "total_flos": 0.0,
131
+ "train_batch_size": 64,
132
+ "trial_name": null,
133
+ "trial_params": null
134
+ }
checkpoints-v2.0-discrete-conditional/checkpoint-2025/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42e9b63c7e9394633b45af8e0063d83bdec5ceb33cd31e93f7ed3a2ff4ca46cc
3
+ size 5137