lvlinkun commited on
Commit
7c4218a
·
1 Parent(s): 926d52f

initalize longcat model repo

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model.safetensors filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 128,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 512,
12
+ "layer_norm_eps": 1e-12,
13
+ "max_position_embeddings": 128,
14
+ "model_type": "bert",
15
+ "num_attention_heads": 4,
16
+ "num_hidden_layers": 2,
17
+ "pad_token_id": 0,
18
+ "position_embedding_type": "absolute",
19
+ "torch_dtype": "float32",
20
+ "transformers_version": "4.55.0",
21
+ "type_vocab_size": 2,
22
+ "use_cache": true,
23
+ "vocab_size": 197
24
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84793f9c782ee26cb2f1ad7b077dad5285f8460ed6d2ba8464693e0c1915dc51
3
+ size 1824792
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch>=1.9.0
2
+ transformers>=4.20.0
3
+ tokenizers>=0.12.0
4
+ numpy>=1.21.0
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "[PAD]",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "[UNK]",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "[CLS]",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "[SEP]",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 4,
44
+ "content": "[MASK]",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ }
51
+ ],
52
+ "normalizer": {
53
+ "type": "BertNormalizer",
54
+ "clean_text": true,
55
+ "handle_chinese_chars": true,
56
+ "strip_accents": null,
57
+ "lowercase": true
58
+ },
59
+ "pre_tokenizer": {
60
+ "type": "BertPreTokenizer"
61
+ },
62
+ "post_processor": {
63
+ "type": "TemplateProcessing",
64
+ "single": [
65
+ {
66
+ "SpecialToken": {
67
+ "id": "[CLS]",
68
+ "type_id": 0
69
+ }
70
+ },
71
+ {
72
+ "Sequence": {
73
+ "id": "A",
74
+ "type_id": 0
75
+ }
76
+ },
77
+ {
78
+ "SpecialToken": {
79
+ "id": "[SEP]",
80
+ "type_id": 0
81
+ }
82
+ }
83
+ ],
84
+ "pair": [
85
+ {
86
+ "SpecialToken": {
87
+ "id": "[CLS]",
88
+ "type_id": 0
89
+ }
90
+ },
91
+ {
92
+ "Sequence": {
93
+ "id": "A",
94
+ "type_id": 0
95
+ }
96
+ },
97
+ {
98
+ "SpecialToken": {
99
+ "id": "[SEP]",
100
+ "type_id": 0
101
+ }
102
+ },
103
+ {
104
+ "Sequence": {
105
+ "id": "B",
106
+ "type_id": 1
107
+ }
108
+ },
109
+ {
110
+ "SpecialToken": {
111
+ "id": "[SEP]",
112
+ "type_id": 1
113
+ }
114
+ }
115
+ ],
116
+ "special_tokens": {
117
+ "[CLS]": {
118
+ "id": "[CLS]",
119
+ "ids": [
120
+ 2
121
+ ],
122
+ "tokens": [
123
+ "[CLS]"
124
+ ]
125
+ },
126
+ "[SEP]": {
127
+ "id": "[SEP]",
128
+ "ids": [
129
+ 3
130
+ ],
131
+ "tokens": [
132
+ "[SEP]"
133
+ ]
134
+ }
135
+ }
136
+ },
137
+ "decoder": {
138
+ "type": "WordPiece",
139
+ "prefix": "##",
140
+ "cleanup": true
141
+ },
142
+ "model": {
143
+ "type": "WordPiece",
144
+ "unk_token": "[UNK]",
145
+ "continuing_subword_prefix": "##",
146
+ "max_input_chars_per_word": 100,
147
+ "vocab": {
148
+ "[PAD]": 0,
149
+ "[UNK]": 1,
150
+ "[CLS]": 2,
151
+ "[SEP]": 3,
152
+ "[MASK]": 4,
153
+ "!": 5,
154
+ "\"": 6,
155
+ "#": 7,
156
+ "$": 8,
157
+ "%": 9,
158
+ "&": 10,
159
+ "'": 11,
160
+ "(": 12,
161
+ ")": 13,
162
+ "*": 14,
163
+ "+": 15,
164
+ ",": 16,
165
+ "-": 17,
166
+ ".": 18,
167
+ "/": 19,
168
+ "0": 20,
169
+ "1": 21,
170
+ "2": 22,
171
+ "3": 23,
172
+ "4": 24,
173
+ "5": 25,
174
+ "6": 26,
175
+ "7": 27,
176
+ "8": 28,
177
+ "9": 29,
178
+ ":": 30,
179
+ ";": 31,
180
+ "<": 32,
181
+ "=": 33,
182
+ ">": 34,
183
+ "?": 35,
184
+ "@": 36,
185
+ "A": 37,
186
+ "B": 38,
187
+ "C": 39,
188
+ "D": 40,
189
+ "E": 41,
190
+ "F": 42,
191
+ "G": 43,
192
+ "H": 44,
193
+ "I": 45,
194
+ "J": 46,
195
+ "K": 47,
196
+ "L": 48,
197
+ "M": 49,
198
+ "N": 50,
199
+ "O": 51,
200
+ "P": 52,
201
+ "Q": 53,
202
+ "R": 54,
203
+ "S": 55,
204
+ "T": 56,
205
+ "U": 57,
206
+ "V": 58,
207
+ "W": 59,
208
+ "X": 60,
209
+ "Y": 61,
210
+ "Z": 62,
211
+ "[": 63,
212
+ "\\": 64,
213
+ "]": 65,
214
+ "^": 66,
215
+ "_": 67,
216
+ "`": 68,
217
+ "b": 70,
218
+ "c": 71,
219
+ "d": 72,
220
+ "e": 73,
221
+ "f": 74,
222
+ "g": 75,
223
+ "h": 76,
224
+ "j": 78,
225
+ "k": 79,
226
+ "l": 80,
227
+ "m": 81,
228
+ "n": 82,
229
+ "o": 83,
230
+ "p": 84,
231
+ "q": 85,
232
+ "r": 86,
233
+ "s": 87,
234
+ "t": 88,
235
+ "u": 89,
236
+ "v": 90,
237
+ "w": 91,
238
+ "x": 92,
239
+ "y": 93,
240
+ "z": 94,
241
+ "{": 95,
242
+ "|": 96,
243
+ "}": 97,
244
+ "~": 98,
245
+ "the": 99,
246
+ "be": 100,
247
+ "to": 101,
248
+ "of": 102,
249
+ "and": 103,
250
+ "a": 104,
251
+ "in": 105,
252
+ "that": 106,
253
+ "have": 107,
254
+ "i": 108,
255
+ "it": 109,
256
+ "for": 110,
257
+ "not": 111,
258
+ "on": 112,
259
+ "with": 113,
260
+ "he": 114,
261
+ "as": 115,
262
+ "you": 116,
263
+ "do": 117,
264
+ "at": 118,
265
+ "this": 119,
266
+ "but": 120,
267
+ "his": 121,
268
+ "by": 122,
269
+ "from": 123,
270
+ "they": 124,
271
+ "we": 125,
272
+ "say": 126,
273
+ "her": 127,
274
+ "she": 128,
275
+ "or": 129,
276
+ "an": 130,
277
+ "will": 131,
278
+ "my": 132,
279
+ "one": 133,
280
+ "all": 134,
281
+ "would": 135,
282
+ "there": 136,
283
+ "their": 137,
284
+ "what": 138,
285
+ "so": 139,
286
+ "up": 140,
287
+ "out": 141,
288
+ "if": 142,
289
+ "about": 143,
290
+ "who": 144,
291
+ "get": 145,
292
+ "which": 146,
293
+ "go": 147,
294
+ "me": 148,
295
+ "when": 149,
296
+ "make": 150,
297
+ "can": 151,
298
+ "like": 152,
299
+ "time": 153,
300
+ "no": 154,
301
+ "just": 155,
302
+ "him": 156,
303
+ "know": 157,
304
+ "take": 158,
305
+ "people": 159,
306
+ "into": 160,
307
+ "year": 161,
308
+ "your": 162,
309
+ "good": 163,
310
+ "some": 164,
311
+ "could": 165,
312
+ "them": 166,
313
+ "see": 167,
314
+ "other": 168,
315
+ "than": 169,
316
+ "then": 170,
317
+ "now": 171,
318
+ "look": 172,
319
+ "only": 173,
320
+ "come": 174,
321
+ "its": 175,
322
+ "over": 176,
323
+ "think": 177,
324
+ "also": 178,
325
+ "back": 179,
326
+ "after": 180,
327
+ "use": 181,
328
+ "two": 182,
329
+ "how": 183,
330
+ "our": 184,
331
+ "work": 185,
332
+ "first": 186,
333
+ "well": 187,
334
+ "way": 188,
335
+ "even": 189,
336
+ "new": 190,
337
+ "want": 191,
338
+ "because": 192,
339
+ "any": 193,
340
+ "these": 194,
341
+ "give": 195,
342
+ "day": 196,
343
+ "most": 197,
344
+ "us ": 198
345
+ }
346
+ }
347
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_text": true,
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "[CLS]",
47
+ "do_basic_tokenize": true,
48
+ "do_lower_case": true,
49
+ "extra_special_tokens": {},
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "never_split": null,
53
+ "pad_token": "[PAD]",
54
+ "sep_token": "[SEP]",
55
+ "strip_accents": null,
56
+ "tokenize_chinese_chars": true,
57
+ "tokenizer_class": "BertTokenizer",
58
+ "unk_token": "[UNK]",
59
+ "wordpieces_prefix": "##"
60
+ }
vocab.txt ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [PAD]
2
+ [UNK]
3
+ [CLS]
4
+ [SEP]
5
+ [MASK]
6
+ !
7
+ "
8
+ #
9
+ $
10
+ %
11
+ &
12
+ '
13
+ (
14
+ )
15
+ *
16
+ +
17
+ ,
18
+ -
19
+ .
20
+ /
21
+ 0
22
+ 1
23
+ 2
24
+ 3
25
+ 4
26
+ 5
27
+ 6
28
+ 7
29
+ 8
30
+ 9
31
+ :
32
+ ;
33
+ <
34
+ =
35
+ >
36
+ ?
37
+ @
38
+ A
39
+ B
40
+ C
41
+ D
42
+ E
43
+ F
44
+ G
45
+ H
46
+ I
47
+ J
48
+ K
49
+ L
50
+ M
51
+ N
52
+ O
53
+ P
54
+ Q
55
+ R
56
+ S
57
+ T
58
+ U
59
+ V
60
+ W
61
+ X
62
+ Y
63
+ Z
64
+ [
65
+ \
66
+ ]
67
+ ^
68
+ _
69
+ `
70
+ b
71
+ c
72
+ d
73
+ e
74
+ f
75
+ g
76
+ h
77
+ j
78
+ k
79
+ l
80
+ m
81
+ n
82
+ o
83
+ p
84
+ q
85
+ r
86
+ s
87
+ t
88
+ u
89
+ v
90
+ w
91
+ x
92
+ y
93
+ z
94
+ {
95
+ |
96
+ }
97
+ ~
98
+ the
99
+ be
100
+ to
101
+ of
102
+ and
103
+ a
104
+ in
105
+ that
106
+ have
107
+ i
108
+ it
109
+ for
110
+ not
111
+ on
112
+ with
113
+ he
114
+ as
115
+ you
116
+ do
117
+ at
118
+ this
119
+ but
120
+ his
121
+ by
122
+ from
123
+ they
124
+ we
125
+ say
126
+ her
127
+ she
128
+ or
129
+ an
130
+ will
131
+ my
132
+ one
133
+ all
134
+ would
135
+ there
136
+ their
137
+ what
138
+ so
139
+ up
140
+ out
141
+ if
142
+ about
143
+ who
144
+ get
145
+ which
146
+ go
147
+ me
148
+ when
149
+ make
150
+ can
151
+ like
152
+ time
153
+ no
154
+ just
155
+ him
156
+ know
157
+ take
158
+ people
159
+ into
160
+ year
161
+ your
162
+ good
163
+ some
164
+ could
165
+ them
166
+ see
167
+ other
168
+ than
169
+ then
170
+ now
171
+ look
172
+ only
173
+ come
174
+ its
175
+ over
176
+ think
177
+ also
178
+ back
179
+ after
180
+ use
181
+ two
182
+ how
183
+ our
184
+ work
185
+ first
186
+ well
187
+ way
188
+ even
189
+ new
190
+ want
191
+ because
192
+ any
193
+ these
194
+ give
195
+ day
196
+ most
197
+ us