robot4 commited on
Commit
541fbc9
·
verified ·
1 Parent(s): af9853e

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +13 -0
  2. checkpoint-1000/config.json +41 -0
  3. checkpoint-1000/model.safetensors +3 -0
  4. checkpoint-1000/optimizer.pt +3 -0
  5. checkpoint-1000/rng_state.pth +0 -0
  6. checkpoint-1000/scheduler.pt +0 -0
  7. checkpoint-1000/special_tokens_map.json +7 -0
  8. checkpoint-1000/tokenizer.json +0 -0
  9. checkpoint-1000/tokenizer_config.json +56 -0
  10. checkpoint-1000/trainer_state.json +128 -0
  11. checkpoint-1000/training_args.bin +3 -0
  12. checkpoint-1000/vocab.txt +0 -0
  13. checkpoint-1500/config.json +41 -0
  14. checkpoint-1500/model.safetensors +3 -0
  15. checkpoint-1500/optimizer.pt +3 -0
  16. checkpoint-1500/rng_state.pth +0 -0
  17. checkpoint-1500/scheduler.pt +0 -0
  18. checkpoint-1500/special_tokens_map.json +7 -0
  19. checkpoint-1500/tokenizer.json +0 -0
  20. checkpoint-1500/tokenizer_config.json +56 -0
  21. checkpoint-1500/trainer_state.json +175 -0
  22. checkpoint-1500/training_args.bin +3 -0
  23. checkpoint-1500/vocab.txt +0 -0
  24. checkpoint-2000/config.json +41 -0
  25. checkpoint-2000/model.safetensors +3 -0
  26. checkpoint-2000/optimizer.pt +3 -0
  27. checkpoint-2000/rng_state.pth +0 -0
  28. checkpoint-2000/scheduler.pt +0 -0
  29. checkpoint-2000/special_tokens_map.json +7 -0
  30. checkpoint-2000/tokenizer.json +0 -0
  31. checkpoint-2000/tokenizer_config.json +56 -0
  32. checkpoint-2000/trainer_state.json +222 -0
  33. checkpoint-2000/training_args.bin +3 -0
  34. checkpoint-2000/vocab.txt +0 -0
  35. checkpoint-2500/config.json +41 -0
  36. checkpoint-2500/model.safetensors +3 -0
  37. checkpoint-2500/optimizer.pt +3 -0
  38. checkpoint-2500/rng_state.pth +0 -0
  39. checkpoint-2500/scheduler.pt +0 -0
  40. checkpoint-2500/special_tokens_map.json +7 -0
  41. checkpoint-2500/tokenizer.json +0 -0
  42. checkpoint-2500/tokenizer_config.json +56 -0
  43. checkpoint-2500/trainer_state.json +269 -0
  44. checkpoint-2500/training_args.bin +3 -0
  45. checkpoint-2500/vocab.txt +0 -0
  46. checkpoint-3000/config.json +41 -0
  47. checkpoint-3000/model.safetensors +3 -0
  48. checkpoint-3000/optimizer.pt +3 -0
  49. checkpoint-3000/rng_state.pth +0 -0
  50. checkpoint-3000/scheduler.pt +0 -0
.gitattributes CHANGED
@@ -4,3 +4,16 @@
4
  results/checkpoint-4000/optimizer.pt filter=lfs diff=lfs merge=lfs -text
5
  results/checkpoint-4000/scheduler.pt filter=lfs diff=lfs merge=lfs -text
6
  基于BERT的情感分析系统.pptx filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  results/checkpoint-4000/optimizer.pt filter=lfs diff=lfs merge=lfs -text
5
  results/checkpoint-4000/scheduler.pt filter=lfs diff=lfs merge=lfs -text
6
  基于BERT的情感分析系统.pptx filter=lfs diff=lfs merge=lfs -text
7
+ checkpoint-1000/optimizer.pt filter=lfs diff=lfs merge=lfs -text
8
+ checkpoint-1500/optimizer.pt filter=lfs diff=lfs merge=lfs -text
9
+ checkpoint-2000/optimizer.pt filter=lfs diff=lfs merge=lfs -text
10
+ checkpoint-2500/optimizer.pt filter=lfs diff=lfs merge=lfs -text
11
+ checkpoint-3000/optimizer.pt filter=lfs diff=lfs merge=lfs -text
12
+ checkpoint-3500/optimizer.pt filter=lfs diff=lfs merge=lfs -text
13
+ checkpoint-4000/optimizer.pt filter=lfs diff=lfs merge=lfs -text
14
+ checkpoint-4500/optimizer.pt filter=lfs diff=lfs merge=lfs -text
15
+ checkpoint-500/optimizer.pt filter=lfs diff=lfs merge=lfs -text
16
+ checkpoint-5000/optimizer.pt filter=lfs diff=lfs merge=lfs -text
17
+ checkpoint-5500/optimizer.pt filter=lfs diff=lfs merge=lfs -text
18
+ checkpoint-6000/optimizer.pt filter=lfs diff=lfs merge=lfs -text
19
+ checkpoint-6500/optimizer.pt filter=lfs diff=lfs merge=lfs -text
checkpoint-1000/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "directionality": "bidi",
8
+ "dtype": "float32",
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "negative",
14
+ "1": "neutral",
15
+ "2": "positive"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "negative": 0,
21
+ "neutral": 1,
22
+ "positive": 2
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "max_position_embeddings": 512,
26
+ "model_type": "bert",
27
+ "num_attention_heads": 12,
28
+ "num_hidden_layers": 12,
29
+ "pad_token_id": 0,
30
+ "pooler_fc_size": 768,
31
+ "pooler_num_attention_heads": 12,
32
+ "pooler_num_fc_layers": 3,
33
+ "pooler_size_per_head": 128,
34
+ "pooler_type": "first_token_transform",
35
+ "position_embedding_type": "absolute",
36
+ "problem_type": "single_label_classification",
37
+ "transformers_version": "4.57.3",
38
+ "type_vocab_size": 2,
39
+ "use_cache": true,
40
+ "vocab_size": 21128
41
+ }
checkpoint-1000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e583d628bf3f7664d535cf9ef598114a7c49fe257c7fe24c2c221b5f2233b850
3
+ size 409103316
checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42f24930a75cafe396570302ac3432d90d46838277c4d457f1085f00561acef3
3
+ size 818320969
checkpoint-1000/rng_state.pth ADDED
Binary file (14.4 kB). View file
 
checkpoint-1000/scheduler.pt ADDED
Binary file (1.45 kB). View file
 
checkpoint-1000/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoint-1000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1000/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": false,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1000,
3
+ "best_metric": 0.7448374295446439,
4
+ "best_model_checkpoint": "/Users/wangyiqiu/Desktop/program/\u795e\u7ecf\u7f51\u7edc\u62d3\u6251/results/checkpoint-1000",
5
+ "epoch": 0.06317119393556538,
6
+ "eval_steps": 500,
7
+ "global_step": 1000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.006317119393556538,
14
+ "grad_norm": 13.061114311218262,
15
+ "learning_rate": 4.169298799747316e-07,
16
+ "loss": 1.354,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.012634238787113077,
21
+ "grad_norm": 13.682186126708984,
22
+ "learning_rate": 8.380711728785009e-07,
23
+ "loss": 1.0853,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.018951358180669616,
28
+ "grad_norm": 4.851679801940918,
29
+ "learning_rate": 1.2592124657822702e-06,
30
+ "loss": 0.9111,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 0.025268477574226154,
35
+ "grad_norm": 5.82253360748291,
36
+ "learning_rate": 1.6803537586860393e-06,
37
+ "loss": 0.7179,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 0.03158559696778269,
42
+ "grad_norm": 5.032683372497559,
43
+ "learning_rate": 2.1014950515898086e-06,
44
+ "loss": 0.6422,
45
+ "step": 500
46
+ },
47
+ {
48
+ "epoch": 0.03158559696778269,
49
+ "eval_accuracy": 0.7368075050637859,
50
+ "eval_f1": 0.7170832086299176,
51
+ "eval_loss": 0.6070035696029663,
52
+ "eval_precision": 0.7218199142709759,
53
+ "eval_recall": 0.7368075050637859,
54
+ "eval_runtime": 582.5178,
55
+ "eval_samples_per_second": 96.619,
56
+ "eval_steps_per_second": 3.02,
57
+ "step": 500
58
+ },
59
+ {
60
+ "epoch": 0.03790271636133923,
61
+ "grad_norm": 7.424877166748047,
62
+ "learning_rate": 2.5226363444935774e-06,
63
+ "loss": 0.6155,
64
+ "step": 600
65
+ },
66
+ {
67
+ "epoch": 0.04421983575489577,
68
+ "grad_norm": 16.976255416870117,
69
+ "learning_rate": 2.943777637397347e-06,
70
+ "loss": 0.5944,
71
+ "step": 700
72
+ },
73
+ {
74
+ "epoch": 0.05053695514845231,
75
+ "grad_norm": 9.103567123413086,
76
+ "learning_rate": 3.3649189303011164e-06,
77
+ "loss": 0.5812,
78
+ "step": 800
79
+ },
80
+ {
81
+ "epoch": 0.056854074542008845,
82
+ "grad_norm": 7.061375617980957,
83
+ "learning_rate": 3.7860602232048853e-06,
84
+ "loss": 0.5965,
85
+ "step": 900
86
+ },
87
+ {
88
+ "epoch": 0.06317119393556538,
89
+ "grad_norm": 6.224503040313721,
90
+ "learning_rate": 4.207201516108655e-06,
91
+ "loss": 0.5553,
92
+ "step": 1000
93
+ },
94
+ {
95
+ "epoch": 0.06317119393556538,
96
+ "eval_accuracy": 0.7581642443409972,
97
+ "eval_f1": 0.7448374295446439,
98
+ "eval_loss": 0.5610596537590027,
99
+ "eval_precision": 0.7461287482946488,
100
+ "eval_recall": 0.7581642443409972,
101
+ "eval_runtime": 584.5541,
102
+ "eval_samples_per_second": 96.282,
103
+ "eval_steps_per_second": 3.009,
104
+ "step": 1000
105
+ }
106
+ ],
107
+ "logging_steps": 100,
108
+ "max_steps": 47490,
109
+ "num_input_tokens_seen": 0,
110
+ "num_train_epochs": 3,
111
+ "save_steps": 500,
112
+ "stateful_callbacks": {
113
+ "TrainerControl": {
114
+ "args": {
115
+ "should_epoch_stop": false,
116
+ "should_evaluate": false,
117
+ "should_log": false,
118
+ "should_save": true,
119
+ "should_training_stop": false
120
+ },
121
+ "attributes": {}
122
+ }
123
+ },
124
+ "total_flos": 2104907341824000.0,
125
+ "train_batch_size": 32,
126
+ "trial_name": null,
127
+ "trial_params": null
128
+ }
checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88724115c05a14013e1bf5182b6efa00270cfee7c30485da6eb058c6d09f75a8
3
+ size 5805
checkpoint-1000/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1500/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "directionality": "bidi",
8
+ "dtype": "float32",
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "negative",
14
+ "1": "neutral",
15
+ "2": "positive"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "negative": 0,
21
+ "neutral": 1,
22
+ "positive": 2
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "max_position_embeddings": 512,
26
+ "model_type": "bert",
27
+ "num_attention_heads": 12,
28
+ "num_hidden_layers": 12,
29
+ "pad_token_id": 0,
30
+ "pooler_fc_size": 768,
31
+ "pooler_num_attention_heads": 12,
32
+ "pooler_num_fc_layers": 3,
33
+ "pooler_size_per_head": 128,
34
+ "pooler_type": "first_token_transform",
35
+ "position_embedding_type": "absolute",
36
+ "problem_type": "single_label_classification",
37
+ "transformers_version": "4.57.3",
38
+ "type_vocab_size": 2,
39
+ "use_cache": true,
40
+ "vocab_size": 21128
41
+ }
checkpoint-1500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4209fdfbb0289e0649ee9e29950a97c6f5f549a5f73c74da86f22954d84d3c6b
3
+ size 409103316
checkpoint-1500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2c6935f688f73008b30e77553a70ce309f0077be2763f4129a83cf96713d28a
3
+ size 818320969
checkpoint-1500/rng_state.pth ADDED
Binary file (14.4 kB). View file
 
checkpoint-1500/scheduler.pt ADDED
Binary file (1.45 kB). View file
 
checkpoint-1500/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoint-1500/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1500/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": false,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
checkpoint-1500/trainer_state.json ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1500,
3
+ "best_metric": 0.7632915279870514,
4
+ "best_model_checkpoint": "/Users/wangyiqiu/Desktop/program/\u795e\u7ecf\u7f51\u7edc\u62d3\u6251/results/checkpoint-1500",
5
+ "epoch": 0.09475679090334807,
6
+ "eval_steps": 500,
7
+ "global_step": 1500,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.006317119393556538,
14
+ "grad_norm": 13.061114311218262,
15
+ "learning_rate": 4.169298799747316e-07,
16
+ "loss": 1.354,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.012634238787113077,
21
+ "grad_norm": 13.682186126708984,
22
+ "learning_rate": 8.380711728785009e-07,
23
+ "loss": 1.0853,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.018951358180669616,
28
+ "grad_norm": 4.851679801940918,
29
+ "learning_rate": 1.2592124657822702e-06,
30
+ "loss": 0.9111,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 0.025268477574226154,
35
+ "grad_norm": 5.82253360748291,
36
+ "learning_rate": 1.6803537586860393e-06,
37
+ "loss": 0.7179,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 0.03158559696778269,
42
+ "grad_norm": 5.032683372497559,
43
+ "learning_rate": 2.1014950515898086e-06,
44
+ "loss": 0.6422,
45
+ "step": 500
46
+ },
47
+ {
48
+ "epoch": 0.03158559696778269,
49
+ "eval_accuracy": 0.7368075050637859,
50
+ "eval_f1": 0.7170832086299176,
51
+ "eval_loss": 0.6070035696029663,
52
+ "eval_precision": 0.7218199142709759,
53
+ "eval_recall": 0.7368075050637859,
54
+ "eval_runtime": 582.5178,
55
+ "eval_samples_per_second": 96.619,
56
+ "eval_steps_per_second": 3.02,
57
+ "step": 500
58
+ },
59
+ {
60
+ "epoch": 0.03790271636133923,
61
+ "grad_norm": 7.424877166748047,
62
+ "learning_rate": 2.5226363444935774e-06,
63
+ "loss": 0.6155,
64
+ "step": 600
65
+ },
66
+ {
67
+ "epoch": 0.04421983575489577,
68
+ "grad_norm": 16.976255416870117,
69
+ "learning_rate": 2.943777637397347e-06,
70
+ "loss": 0.5944,
71
+ "step": 700
72
+ },
73
+ {
74
+ "epoch": 0.05053695514845231,
75
+ "grad_norm": 9.103567123413086,
76
+ "learning_rate": 3.3649189303011164e-06,
77
+ "loss": 0.5812,
78
+ "step": 800
79
+ },
80
+ {
81
+ "epoch": 0.056854074542008845,
82
+ "grad_norm": 7.061375617980957,
83
+ "learning_rate": 3.7860602232048853e-06,
84
+ "loss": 0.5965,
85
+ "step": 900
86
+ },
87
+ {
88
+ "epoch": 0.06317119393556538,
89
+ "grad_norm": 6.224503040313721,
90
+ "learning_rate": 4.207201516108655e-06,
91
+ "loss": 0.5553,
92
+ "step": 1000
93
+ },
94
+ {
95
+ "epoch": 0.06317119393556538,
96
+ "eval_accuracy": 0.7581642443409972,
97
+ "eval_f1": 0.7448374295446439,
98
+ "eval_loss": 0.5610596537590027,
99
+ "eval_precision": 0.7461287482946488,
100
+ "eval_recall": 0.7581642443409972,
101
+ "eval_runtime": 584.5541,
102
+ "eval_samples_per_second": 96.282,
103
+ "eval_steps_per_second": 3.009,
104
+ "step": 1000
105
+ },
106
+ {
107
+ "epoch": 0.06948831332912192,
108
+ "grad_norm": 6.321476459503174,
109
+ "learning_rate": 4.628342809012423e-06,
110
+ "loss": 0.592,
111
+ "step": 1100
112
+ },
113
+ {
114
+ "epoch": 0.07580543272267846,
115
+ "grad_norm": 8.201200485229492,
116
+ "learning_rate": 5.0494841019161935e-06,
117
+ "loss": 0.5518,
118
+ "step": 1200
119
+ },
120
+ {
121
+ "epoch": 0.082122552116235,
122
+ "grad_norm": 6.514477729797363,
123
+ "learning_rate": 5.470625394819963e-06,
124
+ "loss": 0.5897,
125
+ "step": 1300
126
+ },
127
+ {
128
+ "epoch": 0.08843967150979154,
129
+ "grad_norm": 8.077017784118652,
130
+ "learning_rate": 5.891766687723732e-06,
131
+ "loss": 0.5476,
132
+ "step": 1400
133
+ },
134
+ {
135
+ "epoch": 0.09475679090334807,
136
+ "grad_norm": 9.256704330444336,
137
+ "learning_rate": 6.3129079806275005e-06,
138
+ "loss": 0.5263,
139
+ "step": 1500
140
+ },
141
+ {
142
+ "epoch": 0.09475679090334807,
143
+ "eval_accuracy": 0.7675278064034683,
144
+ "eval_f1": 0.7632915279870514,
145
+ "eval_loss": 0.5426821112632751,
146
+ "eval_precision": 0.760979358962669,
147
+ "eval_recall": 0.7675278064034683,
148
+ "eval_runtime": 587.2504,
149
+ "eval_samples_per_second": 95.84,
150
+ "eval_steps_per_second": 2.995,
151
+ "step": 1500
152
+ }
153
+ ],
154
+ "logging_steps": 100,
155
+ "max_steps": 47490,
156
+ "num_input_tokens_seen": 0,
157
+ "num_train_epochs": 3,
158
+ "save_steps": 500,
159
+ "stateful_callbacks": {
160
+ "TrainerControl": {
161
+ "args": {
162
+ "should_epoch_stop": false,
163
+ "should_evaluate": false,
164
+ "should_log": false,
165
+ "should_save": true,
166
+ "should_training_stop": false
167
+ },
168
+ "attributes": {}
169
+ }
170
+ },
171
+ "total_flos": 3157361012736000.0,
172
+ "train_batch_size": 32,
173
+ "trial_name": null,
174
+ "trial_params": null
175
+ }
checkpoint-1500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88724115c05a14013e1bf5182b6efa00270cfee7c30485da6eb058c6d09f75a8
3
+ size 5805
checkpoint-1500/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2000/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "directionality": "bidi",
8
+ "dtype": "float32",
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "negative",
14
+ "1": "neutral",
15
+ "2": "positive"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "negative": 0,
21
+ "neutral": 1,
22
+ "positive": 2
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "max_position_embeddings": 512,
26
+ "model_type": "bert",
27
+ "num_attention_heads": 12,
28
+ "num_hidden_layers": 12,
29
+ "pad_token_id": 0,
30
+ "pooler_fc_size": 768,
31
+ "pooler_num_attention_heads": 12,
32
+ "pooler_num_fc_layers": 3,
33
+ "pooler_size_per_head": 128,
34
+ "pooler_type": "first_token_transform",
35
+ "position_embedding_type": "absolute",
36
+ "problem_type": "single_label_classification",
37
+ "transformers_version": "4.57.3",
38
+ "type_vocab_size": 2,
39
+ "use_cache": true,
40
+ "vocab_size": 21128
41
+ }
checkpoint-2000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4ea44ef70cf8d03e38dc4bd19e0395e151e76ca0ec086f9b64a9bafbffe8634
3
+ size 409103316
checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bb16411239bf92da62b02a0c869901a9e3f917e66b5d90f3980cbdd70d7dfd2
3
+ size 818320969
checkpoint-2000/rng_state.pth ADDED
Binary file (14.4 kB). View file
 
checkpoint-2000/scheduler.pt ADDED
Binary file (1.45 kB). View file
 
checkpoint-2000/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoint-2000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2000/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": false,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 2000,
3
+ "best_metric": 0.7644619158467771,
4
+ "best_model_checkpoint": "/Users/wangyiqiu/Desktop/program/\u795e\u7ecf\u7f51\u7edc\u62d3\u6251/results/checkpoint-2000",
5
+ "epoch": 0.12634238787113075,
6
+ "eval_steps": 500,
7
+ "global_step": 2000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.006317119393556538,
14
+ "grad_norm": 13.061114311218262,
15
+ "learning_rate": 4.169298799747316e-07,
16
+ "loss": 1.354,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.012634238787113077,
21
+ "grad_norm": 13.682186126708984,
22
+ "learning_rate": 8.380711728785009e-07,
23
+ "loss": 1.0853,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.018951358180669616,
28
+ "grad_norm": 4.851679801940918,
29
+ "learning_rate": 1.2592124657822702e-06,
30
+ "loss": 0.9111,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 0.025268477574226154,
35
+ "grad_norm": 5.82253360748291,
36
+ "learning_rate": 1.6803537586860393e-06,
37
+ "loss": 0.7179,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 0.03158559696778269,
42
+ "grad_norm": 5.032683372497559,
43
+ "learning_rate": 2.1014950515898086e-06,
44
+ "loss": 0.6422,
45
+ "step": 500
46
+ },
47
+ {
48
+ "epoch": 0.03158559696778269,
49
+ "eval_accuracy": 0.7368075050637859,
50
+ "eval_f1": 0.7170832086299176,
51
+ "eval_loss": 0.6070035696029663,
52
+ "eval_precision": 0.7218199142709759,
53
+ "eval_recall": 0.7368075050637859,
54
+ "eval_runtime": 582.5178,
55
+ "eval_samples_per_second": 96.619,
56
+ "eval_steps_per_second": 3.02,
57
+ "step": 500
58
+ },
59
+ {
60
+ "epoch": 0.03790271636133923,
61
+ "grad_norm": 7.424877166748047,
62
+ "learning_rate": 2.5226363444935774e-06,
63
+ "loss": 0.6155,
64
+ "step": 600
65
+ },
66
+ {
67
+ "epoch": 0.04421983575489577,
68
+ "grad_norm": 16.976255416870117,
69
+ "learning_rate": 2.943777637397347e-06,
70
+ "loss": 0.5944,
71
+ "step": 700
72
+ },
73
+ {
74
+ "epoch": 0.05053695514845231,
75
+ "grad_norm": 9.103567123413086,
76
+ "learning_rate": 3.3649189303011164e-06,
77
+ "loss": 0.5812,
78
+ "step": 800
79
+ },
80
+ {
81
+ "epoch": 0.056854074542008845,
82
+ "grad_norm": 7.061375617980957,
83
+ "learning_rate": 3.7860602232048853e-06,
84
+ "loss": 0.5965,
85
+ "step": 900
86
+ },
87
+ {
88
+ "epoch": 0.06317119393556538,
89
+ "grad_norm": 6.224503040313721,
90
+ "learning_rate": 4.207201516108655e-06,
91
+ "loss": 0.5553,
92
+ "step": 1000
93
+ },
94
+ {
95
+ "epoch": 0.06317119393556538,
96
+ "eval_accuracy": 0.7581642443409972,
97
+ "eval_f1": 0.7448374295446439,
98
+ "eval_loss": 0.5610596537590027,
99
+ "eval_precision": 0.7461287482946488,
100
+ "eval_recall": 0.7581642443409972,
101
+ "eval_runtime": 584.5541,
102
+ "eval_samples_per_second": 96.282,
103
+ "eval_steps_per_second": 3.009,
104
+ "step": 1000
105
+ },
106
+ {
107
+ "epoch": 0.06948831332912192,
108
+ "grad_norm": 6.321476459503174,
109
+ "learning_rate": 4.628342809012423e-06,
110
+ "loss": 0.592,
111
+ "step": 1100
112
+ },
113
+ {
114
+ "epoch": 0.07580543272267846,
115
+ "grad_norm": 8.201200485229492,
116
+ "learning_rate": 5.0494841019161935e-06,
117
+ "loss": 0.5518,
118
+ "step": 1200
119
+ },
120
+ {
121
+ "epoch": 0.082122552116235,
122
+ "grad_norm": 6.514477729797363,
123
+ "learning_rate": 5.470625394819963e-06,
124
+ "loss": 0.5897,
125
+ "step": 1300
126
+ },
127
+ {
128
+ "epoch": 0.08843967150979154,
129
+ "grad_norm": 8.077017784118652,
130
+ "learning_rate": 5.891766687723732e-06,
131
+ "loss": 0.5476,
132
+ "step": 1400
133
+ },
134
+ {
135
+ "epoch": 0.09475679090334807,
136
+ "grad_norm": 9.256704330444336,
137
+ "learning_rate": 6.3129079806275005e-06,
138
+ "loss": 0.5263,
139
+ "step": 1500
140
+ },
141
+ {
142
+ "epoch": 0.09475679090334807,
143
+ "eval_accuracy": 0.7675278064034683,
144
+ "eval_f1": 0.7632915279870514,
145
+ "eval_loss": 0.5426821112632751,
146
+ "eval_precision": 0.760979358962669,
147
+ "eval_recall": 0.7675278064034683,
148
+ "eval_runtime": 587.2504,
149
+ "eval_samples_per_second": 95.84,
150
+ "eval_steps_per_second": 2.995,
151
+ "step": 1500
152
+ },
153
+ {
154
+ "epoch": 0.10107391029690461,
155
+ "grad_norm": 6.117814064025879,
156
+ "learning_rate": 6.73404927353127e-06,
157
+ "loss": 0.5563,
158
+ "step": 1600
159
+ },
160
+ {
161
+ "epoch": 0.10739102969046115,
162
+ "grad_norm": 9.015992164611816,
163
+ "learning_rate": 7.15519056643504e-06,
164
+ "loss": 0.5622,
165
+ "step": 1700
166
+ },
167
+ {
168
+ "epoch": 0.11370814908401769,
169
+ "grad_norm": 8.684099197387695,
170
+ "learning_rate": 7.576331859338809e-06,
171
+ "loss": 0.5483,
172
+ "step": 1800
173
+ },
174
+ {
175
+ "epoch": 0.12002526847757422,
176
+ "grad_norm": 5.517951488494873,
177
+ "learning_rate": 7.997473152242578e-06,
178
+ "loss": 0.5467,
179
+ "step": 1900
180
+ },
181
+ {
182
+ "epoch": 0.12634238787113075,
183
+ "grad_norm": 4.840009689331055,
184
+ "learning_rate": 8.418614445146347e-06,
185
+ "loss": 0.5472,
186
+ "step": 2000
187
+ },
188
+ {
189
+ "epoch": 0.12634238787113075,
190
+ "eval_accuracy": 0.7682740485412743,
191
+ "eval_f1": 0.7644619158467771,
192
+ "eval_loss": 0.5479554533958435,
193
+ "eval_precision": 0.7616941910129872,
194
+ "eval_recall": 0.7682740485412743,
195
+ "eval_runtime": 594.3974,
196
+ "eval_samples_per_second": 94.687,
197
+ "eval_steps_per_second": 2.959,
198
+ "step": 2000
199
+ }
200
+ ],
201
+ "logging_steps": 100,
202
+ "max_steps": 47490,
203
+ "num_input_tokens_seen": 0,
204
+ "num_train_epochs": 3,
205
+ "save_steps": 500,
206
+ "stateful_callbacks": {
207
+ "TrainerControl": {
208
+ "args": {
209
+ "should_epoch_stop": false,
210
+ "should_evaluate": false,
211
+ "should_log": false,
212
+ "should_save": true,
213
+ "should_training_stop": false
214
+ },
215
+ "attributes": {}
216
+ }
217
+ },
218
+ "total_flos": 4209814683648000.0,
219
+ "train_batch_size": 32,
220
+ "trial_name": null,
221
+ "trial_params": null
222
+ }
checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88724115c05a14013e1bf5182b6efa00270cfee7c30485da6eb058c6d09f75a8
3
+ size 5805
checkpoint-2000/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2500/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "directionality": "bidi",
8
+ "dtype": "float32",
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "negative",
14
+ "1": "neutral",
15
+ "2": "positive"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "negative": 0,
21
+ "neutral": 1,
22
+ "positive": 2
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "max_position_embeddings": 512,
26
+ "model_type": "bert",
27
+ "num_attention_heads": 12,
28
+ "num_hidden_layers": 12,
29
+ "pad_token_id": 0,
30
+ "pooler_fc_size": 768,
31
+ "pooler_num_attention_heads": 12,
32
+ "pooler_num_fc_layers": 3,
33
+ "pooler_size_per_head": 128,
34
+ "pooler_type": "first_token_transform",
35
+ "position_embedding_type": "absolute",
36
+ "problem_type": "single_label_classification",
37
+ "transformers_version": "4.57.3",
38
+ "type_vocab_size": 2,
39
+ "use_cache": true,
40
+ "vocab_size": 21128
41
+ }
checkpoint-2500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a65784efe8a30dcba18ab4014fe925034545bd17cb304dbd85a3e45f1ee18b8
3
+ size 409103316
checkpoint-2500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d1420b60e06ba0d05e36c8a399cface5b24ca58c2dca60adf781f4a8d792a2b
3
+ size 818320969
checkpoint-2500/rng_state.pth ADDED
Binary file (14.4 kB). View file
 
checkpoint-2500/scheduler.pt ADDED
Binary file (1.45 kB). View file
 
checkpoint-2500/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoint-2500/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2500/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": false,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
checkpoint-2500/trainer_state.json ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 2500,
3
+ "best_metric": 0.7701652961241094,
4
+ "best_model_checkpoint": "/Users/wangyiqiu/Desktop/program/\u795e\u7ecf\u7f51\u7edc\u62d3\u6251/results/checkpoint-2500",
5
+ "epoch": 0.15792798483891346,
6
+ "eval_steps": 500,
7
+ "global_step": 2500,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.006317119393556538,
14
+ "grad_norm": 13.061114311218262,
15
+ "learning_rate": 4.169298799747316e-07,
16
+ "loss": 1.354,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.012634238787113077,
21
+ "grad_norm": 13.682186126708984,
22
+ "learning_rate": 8.380711728785009e-07,
23
+ "loss": 1.0853,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.018951358180669616,
28
+ "grad_norm": 4.851679801940918,
29
+ "learning_rate": 1.2592124657822702e-06,
30
+ "loss": 0.9111,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 0.025268477574226154,
35
+ "grad_norm": 5.82253360748291,
36
+ "learning_rate": 1.6803537586860393e-06,
37
+ "loss": 0.7179,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 0.03158559696778269,
42
+ "grad_norm": 5.032683372497559,
43
+ "learning_rate": 2.1014950515898086e-06,
44
+ "loss": 0.6422,
45
+ "step": 500
46
+ },
47
+ {
48
+ "epoch": 0.03158559696778269,
49
+ "eval_accuracy": 0.7368075050637859,
50
+ "eval_f1": 0.7170832086299176,
51
+ "eval_loss": 0.6070035696029663,
52
+ "eval_precision": 0.7218199142709759,
53
+ "eval_recall": 0.7368075050637859,
54
+ "eval_runtime": 582.5178,
55
+ "eval_samples_per_second": 96.619,
56
+ "eval_steps_per_second": 3.02,
57
+ "step": 500
58
+ },
59
+ {
60
+ "epoch": 0.03790271636133923,
61
+ "grad_norm": 7.424877166748047,
62
+ "learning_rate": 2.5226363444935774e-06,
63
+ "loss": 0.6155,
64
+ "step": 600
65
+ },
66
+ {
67
+ "epoch": 0.04421983575489577,
68
+ "grad_norm": 16.976255416870117,
69
+ "learning_rate": 2.943777637397347e-06,
70
+ "loss": 0.5944,
71
+ "step": 700
72
+ },
73
+ {
74
+ "epoch": 0.05053695514845231,
75
+ "grad_norm": 9.103567123413086,
76
+ "learning_rate": 3.3649189303011164e-06,
77
+ "loss": 0.5812,
78
+ "step": 800
79
+ },
80
+ {
81
+ "epoch": 0.056854074542008845,
82
+ "grad_norm": 7.061375617980957,
83
+ "learning_rate": 3.7860602232048853e-06,
84
+ "loss": 0.5965,
85
+ "step": 900
86
+ },
87
+ {
88
+ "epoch": 0.06317119393556538,
89
+ "grad_norm": 6.224503040313721,
90
+ "learning_rate": 4.207201516108655e-06,
91
+ "loss": 0.5553,
92
+ "step": 1000
93
+ },
94
+ {
95
+ "epoch": 0.06317119393556538,
96
+ "eval_accuracy": 0.7581642443409972,
97
+ "eval_f1": 0.7448374295446439,
98
+ "eval_loss": 0.5610596537590027,
99
+ "eval_precision": 0.7461287482946488,
100
+ "eval_recall": 0.7581642443409972,
101
+ "eval_runtime": 584.5541,
102
+ "eval_samples_per_second": 96.282,
103
+ "eval_steps_per_second": 3.009,
104
+ "step": 1000
105
+ },
106
+ {
107
+ "epoch": 0.06948831332912192,
108
+ "grad_norm": 6.321476459503174,
109
+ "learning_rate": 4.628342809012423e-06,
110
+ "loss": 0.592,
111
+ "step": 1100
112
+ },
113
+ {
114
+ "epoch": 0.07580543272267846,
115
+ "grad_norm": 8.201200485229492,
116
+ "learning_rate": 5.0494841019161935e-06,
117
+ "loss": 0.5518,
118
+ "step": 1200
119
+ },
120
+ {
121
+ "epoch": 0.082122552116235,
122
+ "grad_norm": 6.514477729797363,
123
+ "learning_rate": 5.470625394819963e-06,
124
+ "loss": 0.5897,
125
+ "step": 1300
126
+ },
127
+ {
128
+ "epoch": 0.08843967150979154,
129
+ "grad_norm": 8.077017784118652,
130
+ "learning_rate": 5.891766687723732e-06,
131
+ "loss": 0.5476,
132
+ "step": 1400
133
+ },
134
+ {
135
+ "epoch": 0.09475679090334807,
136
+ "grad_norm": 9.256704330444336,
137
+ "learning_rate": 6.3129079806275005e-06,
138
+ "loss": 0.5263,
139
+ "step": 1500
140
+ },
141
+ {
142
+ "epoch": 0.09475679090334807,
143
+ "eval_accuracy": 0.7675278064034683,
144
+ "eval_f1": 0.7632915279870514,
145
+ "eval_loss": 0.5426821112632751,
146
+ "eval_precision": 0.760979358962669,
147
+ "eval_recall": 0.7675278064034683,
148
+ "eval_runtime": 587.2504,
149
+ "eval_samples_per_second": 95.84,
150
+ "eval_steps_per_second": 2.995,
151
+ "step": 1500
152
+ },
153
+ {
154
+ "epoch": 0.10107391029690461,
155
+ "grad_norm": 6.117814064025879,
156
+ "learning_rate": 6.73404927353127e-06,
157
+ "loss": 0.5563,
158
+ "step": 1600
159
+ },
160
+ {
161
+ "epoch": 0.10739102969046115,
162
+ "grad_norm": 9.015992164611816,
163
+ "learning_rate": 7.15519056643504e-06,
164
+ "loss": 0.5622,
165
+ "step": 1700
166
+ },
167
+ {
168
+ "epoch": 0.11370814908401769,
169
+ "grad_norm": 8.684099197387695,
170
+ "learning_rate": 7.576331859338809e-06,
171
+ "loss": 0.5483,
172
+ "step": 1800
173
+ },
174
+ {
175
+ "epoch": 0.12002526847757422,
176
+ "grad_norm": 5.517951488494873,
177
+ "learning_rate": 7.997473152242578e-06,
178
+ "loss": 0.5467,
179
+ "step": 1900
180
+ },
181
+ {
182
+ "epoch": 0.12634238787113075,
183
+ "grad_norm": 4.840009689331055,
184
+ "learning_rate": 8.418614445146347e-06,
185
+ "loss": 0.5472,
186
+ "step": 2000
187
+ },
188
+ {
189
+ "epoch": 0.12634238787113075,
190
+ "eval_accuracy": 0.7682740485412743,
191
+ "eval_f1": 0.7644619158467771,
192
+ "eval_loss": 0.5479554533958435,
193
+ "eval_precision": 0.7616941910129872,
194
+ "eval_recall": 0.7682740485412743,
195
+ "eval_runtime": 594.3974,
196
+ "eval_samples_per_second": 94.687,
197
+ "eval_steps_per_second": 2.959,
198
+ "step": 2000
199
+ },
200
+ {
201
+ "epoch": 0.1326595072646873,
202
+ "grad_norm": 9.188036918640137,
203
+ "learning_rate": 8.839755738050117e-06,
204
+ "loss": 0.5436,
205
+ "step": 2100
206
+ },
207
+ {
208
+ "epoch": 0.13897662665824384,
209
+ "grad_norm": 5.845507621765137,
210
+ "learning_rate": 9.260897030953885e-06,
211
+ "loss": 0.5684,
212
+ "step": 2200
213
+ },
214
+ {
215
+ "epoch": 0.14529374605180037,
216
+ "grad_norm": 6.014614105224609,
217
+ "learning_rate": 9.682038323857656e-06,
218
+ "loss": 0.5268,
219
+ "step": 2300
220
+ },
221
+ {
222
+ "epoch": 0.15161086544535693,
223
+ "grad_norm": 5.183818817138672,
224
+ "learning_rate": 1.0103179616761426e-05,
225
+ "loss": 0.5505,
226
+ "step": 2400
227
+ },
228
+ {
229
+ "epoch": 0.15792798483891346,
230
+ "grad_norm": 4.270262718200684,
231
+ "learning_rate": 1.0524320909665192e-05,
232
+ "loss": 0.5327,
233
+ "step": 2500
234
+ },
235
+ {
236
+ "epoch": 0.15792798483891346,
237
+ "eval_accuracy": 0.7718631178707225,
238
+ "eval_f1": 0.7701652961241094,
239
+ "eval_loss": 0.538950502872467,
240
+ "eval_precision": 0.7692113501499637,
241
+ "eval_recall": 0.7718631178707225,
242
+ "eval_runtime": 598.0361,
243
+ "eval_samples_per_second": 94.111,
244
+ "eval_steps_per_second": 2.941,
245
+ "step": 2500
246
+ }
247
+ ],
248
+ "logging_steps": 100,
249
+ "max_steps": 47490,
250
+ "num_input_tokens_seen": 0,
251
+ "num_train_epochs": 3,
252
+ "save_steps": 500,
253
+ "stateful_callbacks": {
254
+ "TrainerControl": {
255
+ "args": {
256
+ "should_epoch_stop": false,
257
+ "should_evaluate": false,
258
+ "should_log": false,
259
+ "should_save": true,
260
+ "should_training_stop": false
261
+ },
262
+ "attributes": {}
263
+ }
264
+ },
265
+ "total_flos": 5262268354560000.0,
266
+ "train_batch_size": 32,
267
+ "trial_name": null,
268
+ "trial_params": null
269
+ }
checkpoint-2500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88724115c05a14013e1bf5182b6efa00270cfee7c30485da6eb058c6d09f75a8
3
+ size 5805
checkpoint-2500/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-3000/config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "directionality": "bidi",
8
+ "dtype": "float32",
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "negative",
14
+ "1": "neutral",
15
+ "2": "positive"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "negative": 0,
21
+ "neutral": 1,
22
+ "positive": 2
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "max_position_embeddings": 512,
26
+ "model_type": "bert",
27
+ "num_attention_heads": 12,
28
+ "num_hidden_layers": 12,
29
+ "pad_token_id": 0,
30
+ "pooler_fc_size": 768,
31
+ "pooler_num_attention_heads": 12,
32
+ "pooler_num_fc_layers": 3,
33
+ "pooler_size_per_head": 128,
34
+ "pooler_type": "first_token_transform",
35
+ "position_embedding_type": "absolute",
36
+ "problem_type": "single_label_classification",
37
+ "transformers_version": "4.57.3",
38
+ "type_vocab_size": 2,
39
+ "use_cache": true,
40
+ "vocab_size": 21128
41
+ }
checkpoint-3000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ce0f10418c09aad543969f7b335b600bc4485c0894c8752b10067c20058c90e
3
+ size 409103316
checkpoint-3000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e18d3d6e0e530bc9fd092dde6f6e67f21536e58d7cb195ac7cb5a2475b38081c
3
+ size 818320969
checkpoint-3000/rng_state.pth ADDED
Binary file (14.4 kB). View file
 
checkpoint-3000/scheduler.pt ADDED
Binary file (1.45 kB). View file