Training in progress, step 35000, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
|
@@ -1213,6 +1213,10 @@ You can finetune this model on your own dataset.
|
|
| 1213 |
| 0.6132 | 34700 | 0.3007 |
|
| 1214 |
| 0.6141 | 34750 | 0.3932 |
|
| 1215 |
| 0.6149 | 34800 | 0.3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1216 |
|
| 1217 |
|
| 1218 |
### Framework Versions
|
|
|
|
| 1213 |
| 0.6132 | 34700 | 0.3007 |
|
| 1214 |
| 0.6141 | 34750 | 0.3932 |
|
| 1215 |
| 0.6149 | 34800 | 0.3 |
|
| 1216 |
+
| 0.6158 | 34850 | 0.2785 |
|
| 1217 |
+
| 0.6167 | 34900 | 0.3015 |
|
| 1218 |
+
| 0.6176 | 34950 | 0.3291 |
|
| 1219 |
+
| 0.6185 | 35000 | 0.2634 |
|
| 1220 |
|
| 1221 |
|
| 1222 |
### Framework Versions
|
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d79a423fd264f8aac56fd95502b2264a968c0f7cf7b4c2a1c6541772551d3d3
|
| 3 |
size 90864192
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 180609210
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a70db7a09d43a45af8ba846e6b88322513995922a372c0450b6f22318eba386
|
| 3 |
size 180609210
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5dbdbbf596351fb268b717a3c2006112ba85ad19e4243b93a29f0071121e106
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33dd792f68c210603692aedada855793ed50b3aa941307b551dc822363f2d043
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ba7705125f055257ca9fb00bd8484394e209b5a6e280efe9550fda932420046
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4880,6 +4880,34 @@
|
|
| 4880 |
"learning_rate": 2.1405430877068978e-05,
|
| 4881 |
"loss": 0.3,
|
| 4882 |
"step": 34800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4883 |
}
|
| 4884 |
],
|
| 4885 |
"logging_steps": 50,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.618472902051563,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 35000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4880 |
"learning_rate": 2.1405430877068978e-05,
|
| 4881 |
"loss": 0.3,
|
| 4882 |
"step": 34800
|
| 4883 |
+
},
|
| 4884 |
+
{
|
| 4885 |
+
"epoch": 0.6158223038999134,
|
| 4886 |
+
"grad_norm": 1.4415560960769653,
|
| 4887 |
+
"learning_rate": 2.135634485873044e-05,
|
| 4888 |
+
"loss": 0.2785,
|
| 4889 |
+
"step": 34850
|
| 4890 |
+
},
|
| 4891 |
+
{
|
| 4892 |
+
"epoch": 0.61670583661713,
|
| 4893 |
+
"grad_norm": 1.8688596487045288,
|
| 4894 |
+
"learning_rate": 2.1307258840391904e-05,
|
| 4895 |
+
"loss": 0.3015,
|
| 4896 |
+
"step": 34900
|
| 4897 |
+
},
|
| 4898 |
+
{
|
| 4899 |
+
"epoch": 0.6175893693343465,
|
| 4900 |
+
"grad_norm": 3.085685968399048,
|
| 4901 |
+
"learning_rate": 2.125817282205337e-05,
|
| 4902 |
+
"loss": 0.3291,
|
| 4903 |
+
"step": 34950
|
| 4904 |
+
},
|
| 4905 |
+
{
|
| 4906 |
+
"epoch": 0.618472902051563,
|
| 4907 |
+
"grad_norm": 1.3053193092346191,
|
| 4908 |
+
"learning_rate": 2.120908680371483e-05,
|
| 4909 |
+
"loss": 0.2634,
|
| 4910 |
+
"step": 35000
|
| 4911 |
}
|
| 4912 |
],
|
| 4913 |
"logging_steps": 50,
|