niobures commited on
Commit
0f825ec
·
verified ·
1 Parent(s): 1eb14fe

PANNs (models_onnx)

Browse files
.gitattributes CHANGED
@@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  PANNs.[[:space:]]Large-Scale[[:space:]]Pretrained[[:space:]]Audio[[:space:]]Neural[[:space:]]Networks[[:space:]]for[[:space:]]Audio[[:space:]]Pattern[[:space:]]Recognition.pdf filter=lfs diff=lfs merge=lfs -text
 
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  PANNs.[[:space:]]Large-Scale[[:space:]]Pretrained[[:space:]]Audio[[:space:]]Neural[[:space:]]Networks[[:space:]]for[[:space:]]Audio[[:space:]]Pattern[[:space:]]Recognition.pdf filter=lfs diff=lfs merge=lfs -text
37
+ models/onnx/ailia-models/code/output.png filter=lfs diff=lfs merge=lfs -text
38
+ models/onnx/ailia-models/code/R9_ZSCveAHg_7s.wav filter=lfs diff=lfs merge=lfs -text
models/onnx/ailia-models/audio_tagging.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db1ec74b56aadcc9bc4ea6864e6514101f13b05f6bcc1df7fee508bcc2cdbe48
3
+ size 327324443
models/onnx/ailia-models/audio_tagging.onnx.prototxt ADDED
@@ -0,0 +1,1565 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ir_version: 4
2
+ producer_name: "pytorch"
3
+ producer_version: "1.11.0"
4
+ model_version: 0
5
+ graph {
6
+ name: "torch-jit-export"
7
+ node {
8
+ input: "input.1"
9
+ output: "onnx::Pad_85"
10
+ name: "Unsqueeze_0"
11
+ op_type: "Unsqueeze"
12
+ attribute {
13
+ name: "axes"
14
+ ints: 1
15
+ type: INTS
16
+ }
17
+ }
18
+ node {
19
+ input: "onnx::Pad_85"
20
+ output: "input"
21
+ name: "Pad_1"
22
+ op_type: "Pad"
23
+ attribute {
24
+ name: "mode"
25
+ s: "reflect"
26
+ type: STRING
27
+ }
28
+ attribute {
29
+ name: "pads"
30
+ ints: 0
31
+ ints: 0
32
+ ints: 512
33
+ ints: 0
34
+ ints: 0
35
+ ints: 512
36
+ type: INTS
37
+ }
38
+ }
39
+ node {
40
+ input: "input"
41
+ input: "spectrogram_extractor.stft.conv_real.weight"
42
+ output: "onnx::Unsqueeze_87"
43
+ name: "Conv_2"
44
+ op_type: "Conv"
45
+ attribute {
46
+ name: "dilations"
47
+ ints: 1
48
+ type: INTS
49
+ }
50
+ attribute {
51
+ name: "group"
52
+ i: 1
53
+ type: INT
54
+ }
55
+ attribute {
56
+ name: "kernel_shape"
57
+ ints: 1024
58
+ type: INTS
59
+ }
60
+ attribute {
61
+ name: "pads"
62
+ ints: 0
63
+ ints: 0
64
+ type: INTS
65
+ }
66
+ attribute {
67
+ name: "strides"
68
+ ints: 320
69
+ type: INTS
70
+ }
71
+ }
72
+ node {
73
+ input: "input"
74
+ input: "spectrogram_extractor.stft.conv_imag.weight"
75
+ output: "onnx::Unsqueeze_88"
76
+ name: "Conv_3"
77
+ op_type: "Conv"
78
+ attribute {
79
+ name: "dilations"
80
+ ints: 1
81
+ type: INTS
82
+ }
83
+ attribute {
84
+ name: "group"
85
+ i: 1
86
+ type: INT
87
+ }
88
+ attribute {
89
+ name: "kernel_shape"
90
+ ints: 1024
91
+ type: INTS
92
+ }
93
+ attribute {
94
+ name: "pads"
95
+ ints: 0
96
+ ints: 0
97
+ type: INTS
98
+ }
99
+ attribute {
100
+ name: "strides"
101
+ ints: 320
102
+ type: INTS
103
+ }
104
+ }
105
+ node {
106
+ input: "onnx::Unsqueeze_87"
107
+ output: "onnx::Transpose_89"
108
+ name: "Unsqueeze_4"
109
+ op_type: "Unsqueeze"
110
+ attribute {
111
+ name: "axes"
112
+ ints: 1
113
+ type: INTS
114
+ }
115
+ }
116
+ node {
117
+ input: "onnx::Transpose_89"
118
+ output: "real"
119
+ name: "Transpose_5"
120
+ op_type: "Transpose"
121
+ attribute {
122
+ name: "perm"
123
+ ints: 0
124
+ ints: 1
125
+ ints: 3
126
+ ints: 2
127
+ type: INTS
128
+ }
129
+ }
130
+ node {
131
+ input: "onnx::Unsqueeze_88"
132
+ output: "onnx::Transpose_91"
133
+ name: "Unsqueeze_6"
134
+ op_type: "Unsqueeze"
135
+ attribute {
136
+ name: "axes"
137
+ ints: 1
138
+ type: INTS
139
+ }
140
+ }
141
+ node {
142
+ input: "onnx::Transpose_91"
143
+ output: "imag"
144
+ name: "Transpose_7"
145
+ op_type: "Transpose"
146
+ attribute {
147
+ name: "perm"
148
+ ints: 0
149
+ ints: 1
150
+ ints: 3
151
+ ints: 2
152
+ type: INTS
153
+ }
154
+ }
155
+ node {
156
+ input: "real"
157
+ input: "onnx::Pow_204"
158
+ output: "onnx::Add_95"
159
+ name: "Pow_8"
160
+ op_type: "Pow"
161
+ }
162
+ node {
163
+ input: "imag"
164
+ input: "onnx::Pow_205"
165
+ output: "onnx::Add_98"
166
+ name: "Pow_9"
167
+ op_type: "Pow"
168
+ }
169
+ node {
170
+ input: "onnx::Add_95"
171
+ input: "onnx::Add_98"
172
+ output: "onnx::MatMul_99"
173
+ name: "Add_10"
174
+ op_type: "Add"
175
+ }
176
+ node {
177
+ input: "onnx::MatMul_99"
178
+ input: "logmel_extractor.melW"
179
+ output: "mel_spectrogram"
180
+ name: "MatMul_11"
181
+ op_type: "MatMul"
182
+ }
183
+ node {
184
+ input: "mel_spectrogram"
185
+ output: "onnx::Log_101"
186
+ name: "Clip_12"
187
+ op_type: "Clip"
188
+ attribute {
189
+ name: "max"
190
+ f: inf
191
+ type: FLOAT
192
+ }
193
+ attribute {
194
+ name: "min"
195
+ f: 1.000000013351432e-10
196
+ type: FLOAT
197
+ }
198
+ }
199
+ node {
200
+ input: "onnx::Log_101"
201
+ output: "onnx::Div_102"
202
+ name: "Log_13"
203
+ op_type: "Log"
204
+ }
205
+ node {
206
+ output: "onnx::Div_103"
207
+ name: "Constant_14"
208
+ op_type: "Constant"
209
+ attribute {
210
+ name: "value"
211
+ t {
212
+ dims: 1
213
+ data_type: 1
214
+ }
215
+ type: TENSOR
216
+ }
217
+ }
218
+ node {
219
+ input: "onnx::Div_102"
220
+ input: "onnx::Div_103"
221
+ output: "onnx::Mul_104"
222
+ name: "Div_15"
223
+ op_type: "Div"
224
+ }
225
+ node {
226
+ output: "onnx::Mul_105"
227
+ name: "Constant_16"
228
+ op_type: "Constant"
229
+ attribute {
230
+ name: "value"
231
+ t {
232
+ data_type: 1
233
+ }
234
+ type: TENSOR
235
+ }
236
+ }
237
+ node {
238
+ input: "onnx::Mul_104"
239
+ input: "onnx::Mul_105"
240
+ output: "onnx::Sub_106"
241
+ name: "Mul_17"
242
+ op_type: "Mul"
243
+ }
244
+ node {
245
+ output: "onnx::Sub_107"
246
+ name: "Constant_18"
247
+ op_type: "Constant"
248
+ attribute {
249
+ name: "value"
250
+ t {
251
+ data_type: 1
252
+ }
253
+ type: TENSOR
254
+ }
255
+ }
256
+ node {
257
+ input: "onnx::Sub_106"
258
+ input: "onnx::Sub_107"
259
+ output: "onnx::Transpose_108"
260
+ name: "Sub_19"
261
+ op_type: "Sub"
262
+ }
263
+ node {
264
+ input: "onnx::Transpose_108"
265
+ output: "input.4"
266
+ name: "Transpose_20"
267
+ op_type: "Transpose"
268
+ attribute {
269
+ name: "perm"
270
+ ints: 0
271
+ ints: 3
272
+ ints: 2
273
+ ints: 1
274
+ type: INTS
275
+ }
276
+ }
277
+ node {
278
+ input: "input.4"
279
+ input: "bn0.weight"
280
+ input: "bn0.bias"
281
+ input: "bn0.running_mean"
282
+ input: "bn0.running_var"
283
+ output: "onnx::Transpose_110"
284
+ name: "BatchNormalization_21"
285
+ op_type: "BatchNormalization"
286
+ attribute {
287
+ name: "epsilon"
288
+ f: 9.999999747378752e-06
289
+ type: FLOAT
290
+ }
291
+ attribute {
292
+ name: "momentum"
293
+ f: 0.8999999761581421
294
+ type: FLOAT
295
+ }
296
+ }
297
+ node {
298
+ input: "onnx::Transpose_110"
299
+ output: "input.8"
300
+ name: "Transpose_22"
301
+ op_type: "Transpose"
302
+ attribute {
303
+ name: "perm"
304
+ ints: 0
305
+ ints: 3
306
+ ints: 2
307
+ ints: 1
308
+ type: INTS
309
+ }
310
+ }
311
+ node {
312
+ input: "input.8"
313
+ input: "onnx::Conv_169"
314
+ input: "onnx::Conv_170"
315
+ output: "onnx::Relu_168"
316
+ name: "Conv_23"
317
+ op_type: "Conv"
318
+ attribute {
319
+ name: "dilations"
320
+ ints: 1
321
+ ints: 1
322
+ type: INTS
323
+ }
324
+ attribute {
325
+ name: "group"
326
+ i: 1
327
+ type: INT
328
+ }
329
+ attribute {
330
+ name: "kernel_shape"
331
+ ints: 3
332
+ ints: 3
333
+ type: INTS
334
+ }
335
+ attribute {
336
+ name: "pads"
337
+ ints: 1
338
+ ints: 1
339
+ ints: 1
340
+ ints: 1
341
+ type: INTS
342
+ }
343
+ attribute {
344
+ name: "strides"
345
+ ints: 1
346
+ ints: 1
347
+ type: INTS
348
+ }
349
+ }
350
+ node {
351
+ input: "onnx::Relu_168"
352
+ output: "onnx::Conv_114"
353
+ name: "Relu_24"
354
+ op_type: "Relu"
355
+ }
356
+ node {
357
+ input: "onnx::Conv_114"
358
+ input: "onnx::Conv_172"
359
+ input: "onnx::Conv_173"
360
+ output: "onnx::Relu_171"
361
+ name: "Conv_25"
362
+ op_type: "Conv"
363
+ attribute {
364
+ name: "dilations"
365
+ ints: 1
366
+ ints: 1
367
+ type: INTS
368
+ }
369
+ attribute {
370
+ name: "group"
371
+ i: 1
372
+ type: INT
373
+ }
374
+ attribute {
375
+ name: "kernel_shape"
376
+ ints: 3
377
+ ints: 3
378
+ type: INTS
379
+ }
380
+ attribute {
381
+ name: "pads"
382
+ ints: 1
383
+ ints: 1
384
+ ints: 1
385
+ ints: 1
386
+ type: INTS
387
+ }
388
+ attribute {
389
+ name: "strides"
390
+ ints: 1
391
+ ints: 1
392
+ type: INTS
393
+ }
394
+ }
395
+ node {
396
+ input: "onnx::Relu_171"
397
+ output: "onnx::Pad_117"
398
+ name: "Relu_26"
399
+ op_type: "Relu"
400
+ }
401
+ node {
402
+ input: "onnx::Pad_117"
403
+ output: "onnx::AveragePool_118"
404
+ name: "Pad_27"
405
+ op_type: "Pad"
406
+ attribute {
407
+ name: "mode"
408
+ s: "constant"
409
+ type: STRING
410
+ }
411
+ attribute {
412
+ name: "pads"
413
+ ints: 0
414
+ ints: 0
415
+ ints: 0
416
+ ints: 0
417
+ ints: 0
418
+ ints: 0
419
+ ints: 0
420
+ ints: 0
421
+ type: INTS
422
+ }
423
+ attribute {
424
+ name: "value"
425
+ f: 0.0
426
+ type: FLOAT
427
+ }
428
+ }
429
+ node {
430
+ input: "onnx::AveragePool_118"
431
+ output: "x"
432
+ name: "AveragePool_28"
433
+ op_type: "AveragePool"
434
+ attribute {
435
+ name: "kernel_shape"
436
+ ints: 2
437
+ ints: 2
438
+ type: INTS
439
+ }
440
+ attribute {
441
+ name: "pads"
442
+ ints: 0
443
+ ints: 0
444
+ ints: 0
445
+ ints: 0
446
+ type: INTS
447
+ }
448
+ attribute {
449
+ name: "strides"
450
+ ints: 2
451
+ ints: 2
452
+ type: INTS
453
+ }
454
+ }
455
+ node {
456
+ input: "x"
457
+ input: "onnx::Conv_175"
458
+ input: "onnx::Conv_176"
459
+ output: "onnx::Relu_174"
460
+ name: "Conv_29"
461
+ op_type: "Conv"
462
+ attribute {
463
+ name: "dilations"
464
+ ints: 1
465
+ ints: 1
466
+ type: INTS
467
+ }
468
+ attribute {
469
+ name: "group"
470
+ i: 1
471
+ type: INT
472
+ }
473
+ attribute {
474
+ name: "kernel_shape"
475
+ ints: 3
476
+ ints: 3
477
+ type: INTS
478
+ }
479
+ attribute {
480
+ name: "pads"
481
+ ints: 1
482
+ ints: 1
483
+ ints: 1
484
+ ints: 1
485
+ type: INTS
486
+ }
487
+ attribute {
488
+ name: "strides"
489
+ ints: 1
490
+ ints: 1
491
+ type: INTS
492
+ }
493
+ }
494
+ node {
495
+ input: "onnx::Relu_174"
496
+ output: "onnx::Conv_122"
497
+ name: "Relu_30"
498
+ op_type: "Relu"
499
+ }
500
+ node {
501
+ input: "onnx::Conv_122"
502
+ input: "onnx::Conv_178"
503
+ input: "onnx::Conv_179"
504
+ output: "onnx::Relu_177"
505
+ name: "Conv_31"
506
+ op_type: "Conv"
507
+ attribute {
508
+ name: "dilations"
509
+ ints: 1
510
+ ints: 1
511
+ type: INTS
512
+ }
513
+ attribute {
514
+ name: "group"
515
+ i: 1
516
+ type: INT
517
+ }
518
+ attribute {
519
+ name: "kernel_shape"
520
+ ints: 3
521
+ ints: 3
522
+ type: INTS
523
+ }
524
+ attribute {
525
+ name: "pads"
526
+ ints: 1
527
+ ints: 1
528
+ ints: 1
529
+ ints: 1
530
+ type: INTS
531
+ }
532
+ attribute {
533
+ name: "strides"
534
+ ints: 1
535
+ ints: 1
536
+ type: INTS
537
+ }
538
+ }
539
+ node {
540
+ input: "onnx::Relu_177"
541
+ output: "onnx::Pad_125"
542
+ name: "Relu_32"
543
+ op_type: "Relu"
544
+ }
545
+ node {
546
+ input: "onnx::Pad_125"
547
+ output: "onnx::AveragePool_126"
548
+ name: "Pad_33"
549
+ op_type: "Pad"
550
+ attribute {
551
+ name: "mode"
552
+ s: "constant"
553
+ type: STRING
554
+ }
555
+ attribute {
556
+ name: "pads"
557
+ ints: 0
558
+ ints: 0
559
+ ints: 0
560
+ ints: 0
561
+ ints: 0
562
+ ints: 0
563
+ ints: 0
564
+ ints: 0
565
+ type: INTS
566
+ }
567
+ attribute {
568
+ name: "value"
569
+ f: 0.0
570
+ type: FLOAT
571
+ }
572
+ }
573
+ node {
574
+ input: "onnx::AveragePool_126"
575
+ output: "x.3"
576
+ name: "AveragePool_34"
577
+ op_type: "AveragePool"
578
+ attribute {
579
+ name: "kernel_shape"
580
+ ints: 2
581
+ ints: 2
582
+ type: INTS
583
+ }
584
+ attribute {
585
+ name: "pads"
586
+ ints: 0
587
+ ints: 0
588
+ ints: 0
589
+ ints: 0
590
+ type: INTS
591
+ }
592
+ attribute {
593
+ name: "strides"
594
+ ints: 2
595
+ ints: 2
596
+ type: INTS
597
+ }
598
+ }
599
+ node {
600
+ input: "x.3"
601
+ input: "onnx::Conv_181"
602
+ input: "onnx::Conv_182"
603
+ output: "onnx::Relu_180"
604
+ name: "Conv_35"
605
+ op_type: "Conv"
606
+ attribute {
607
+ name: "dilations"
608
+ ints: 1
609
+ ints: 1
610
+ type: INTS
611
+ }
612
+ attribute {
613
+ name: "group"
614
+ i: 1
615
+ type: INT
616
+ }
617
+ attribute {
618
+ name: "kernel_shape"
619
+ ints: 3
620
+ ints: 3
621
+ type: INTS
622
+ }
623
+ attribute {
624
+ name: "pads"
625
+ ints: 1
626
+ ints: 1
627
+ ints: 1
628
+ ints: 1
629
+ type: INTS
630
+ }
631
+ attribute {
632
+ name: "strides"
633
+ ints: 1
634
+ ints: 1
635
+ type: INTS
636
+ }
637
+ }
638
+ node {
639
+ input: "onnx::Relu_180"
640
+ output: "onnx::Conv_130"
641
+ name: "Relu_36"
642
+ op_type: "Relu"
643
+ }
644
+ node {
645
+ input: "onnx::Conv_130"
646
+ input: "onnx::Conv_184"
647
+ input: "onnx::Conv_185"
648
+ output: "onnx::Relu_183"
649
+ name: "Conv_37"
650
+ op_type: "Conv"
651
+ attribute {
652
+ name: "dilations"
653
+ ints: 1
654
+ ints: 1
655
+ type: INTS
656
+ }
657
+ attribute {
658
+ name: "group"
659
+ i: 1
660
+ type: INT
661
+ }
662
+ attribute {
663
+ name: "kernel_shape"
664
+ ints: 3
665
+ ints: 3
666
+ type: INTS
667
+ }
668
+ attribute {
669
+ name: "pads"
670
+ ints: 1
671
+ ints: 1
672
+ ints: 1
673
+ ints: 1
674
+ type: INTS
675
+ }
676
+ attribute {
677
+ name: "strides"
678
+ ints: 1
679
+ ints: 1
680
+ type: INTS
681
+ }
682
+ }
683
+ node {
684
+ input: "onnx::Relu_183"
685
+ output: "onnx::Pad_133"
686
+ name: "Relu_38"
687
+ op_type: "Relu"
688
+ }
689
+ node {
690
+ input: "onnx::Pad_133"
691
+ output: "onnx::AveragePool_134"
692
+ name: "Pad_39"
693
+ op_type: "Pad"
694
+ attribute {
695
+ name: "mode"
696
+ s: "constant"
697
+ type: STRING
698
+ }
699
+ attribute {
700
+ name: "pads"
701
+ ints: 0
702
+ ints: 0
703
+ ints: 0
704
+ ints: 0
705
+ ints: 0
706
+ ints: 0
707
+ ints: 0
708
+ ints: 0
709
+ type: INTS
710
+ }
711
+ attribute {
712
+ name: "value"
713
+ f: 0.0
714
+ type: FLOAT
715
+ }
716
+ }
717
+ node {
718
+ input: "onnx::AveragePool_134"
719
+ output: "x.7"
720
+ name: "AveragePool_40"
721
+ op_type: "AveragePool"
722
+ attribute {
723
+ name: "kernel_shape"
724
+ ints: 2
725
+ ints: 2
726
+ type: INTS
727
+ }
728
+ attribute {
729
+ name: "pads"
730
+ ints: 0
731
+ ints: 0
732
+ ints: 0
733
+ ints: 0
734
+ type: INTS
735
+ }
736
+ attribute {
737
+ name: "strides"
738
+ ints: 2
739
+ ints: 2
740
+ type: INTS
741
+ }
742
+ }
743
+ node {
744
+ input: "x.7"
745
+ input: "onnx::Conv_187"
746
+ input: "onnx::Conv_188"
747
+ output: "onnx::Relu_186"
748
+ name: "Conv_41"
749
+ op_type: "Conv"
750
+ attribute {
751
+ name: "dilations"
752
+ ints: 1
753
+ ints: 1
754
+ type: INTS
755
+ }
756
+ attribute {
757
+ name: "group"
758
+ i: 1
759
+ type: INT
760
+ }
761
+ attribute {
762
+ name: "kernel_shape"
763
+ ints: 3
764
+ ints: 3
765
+ type: INTS
766
+ }
767
+ attribute {
768
+ name: "pads"
769
+ ints: 1
770
+ ints: 1
771
+ ints: 1
772
+ ints: 1
773
+ type: INTS
774
+ }
775
+ attribute {
776
+ name: "strides"
777
+ ints: 1
778
+ ints: 1
779
+ type: INTS
780
+ }
781
+ }
782
+ node {
783
+ input: "onnx::Relu_186"
784
+ output: "onnx::Conv_138"
785
+ name: "Relu_42"
786
+ op_type: "Relu"
787
+ }
788
+ node {
789
+ input: "onnx::Conv_138"
790
+ input: "onnx::Conv_190"
791
+ input: "onnx::Conv_191"
792
+ output: "onnx::Relu_189"
793
+ name: "Conv_43"
794
+ op_type: "Conv"
795
+ attribute {
796
+ name: "dilations"
797
+ ints: 1
798
+ ints: 1
799
+ type: INTS
800
+ }
801
+ attribute {
802
+ name: "group"
803
+ i: 1
804
+ type: INT
805
+ }
806
+ attribute {
807
+ name: "kernel_shape"
808
+ ints: 3
809
+ ints: 3
810
+ type: INTS
811
+ }
812
+ attribute {
813
+ name: "pads"
814
+ ints: 1
815
+ ints: 1
816
+ ints: 1
817
+ ints: 1
818
+ type: INTS
819
+ }
820
+ attribute {
821
+ name: "strides"
822
+ ints: 1
823
+ ints: 1
824
+ type: INTS
825
+ }
826
+ }
827
+ node {
828
+ input: "onnx::Relu_189"
829
+ output: "onnx::Pad_141"
830
+ name: "Relu_44"
831
+ op_type: "Relu"
832
+ }
833
+ node {
834
+ input: "onnx::Pad_141"
835
+ output: "onnx::AveragePool_142"
836
+ name: "Pad_45"
837
+ op_type: "Pad"
838
+ attribute {
839
+ name: "mode"
840
+ s: "constant"
841
+ type: STRING
842
+ }
843
+ attribute {
844
+ name: "pads"
845
+ ints: 0
846
+ ints: 0
847
+ ints: 0
848
+ ints: 0
849
+ ints: 0
850
+ ints: 0
851
+ ints: 0
852
+ ints: 0
853
+ type: INTS
854
+ }
855
+ attribute {
856
+ name: "value"
857
+ f: 0.0
858
+ type: FLOAT
859
+ }
860
+ }
861
+ node {
862
+ input: "onnx::AveragePool_142"
863
+ output: "x.11"
864
+ name: "AveragePool_46"
865
+ op_type: "AveragePool"
866
+ attribute {
867
+ name: "kernel_shape"
868
+ ints: 2
869
+ ints: 2
870
+ type: INTS
871
+ }
872
+ attribute {
873
+ name: "pads"
874
+ ints: 0
875
+ ints: 0
876
+ ints: 0
877
+ ints: 0
878
+ type: INTS
879
+ }
880
+ attribute {
881
+ name: "strides"
882
+ ints: 2
883
+ ints: 2
884
+ type: INTS
885
+ }
886
+ }
887
+ node {
888
+ input: "x.11"
889
+ input: "onnx::Conv_193"
890
+ input: "onnx::Conv_194"
891
+ output: "onnx::Relu_192"
892
+ name: "Conv_47"
893
+ op_type: "Conv"
894
+ attribute {
895
+ name: "dilations"
896
+ ints: 1
897
+ ints: 1
898
+ type: INTS
899
+ }
900
+ attribute {
901
+ name: "group"
902
+ i: 1
903
+ type: INT
904
+ }
905
+ attribute {
906
+ name: "kernel_shape"
907
+ ints: 3
908
+ ints: 3
909
+ type: INTS
910
+ }
911
+ attribute {
912
+ name: "pads"
913
+ ints: 1
914
+ ints: 1
915
+ ints: 1
916
+ ints: 1
917
+ type: INTS
918
+ }
919
+ attribute {
920
+ name: "strides"
921
+ ints: 1
922
+ ints: 1
923
+ type: INTS
924
+ }
925
+ }
926
+ node {
927
+ input: "onnx::Relu_192"
928
+ output: "onnx::Conv_146"
929
+ name: "Relu_48"
930
+ op_type: "Relu"
931
+ }
932
+ node {
933
+ input: "onnx::Conv_146"
934
+ input: "onnx::Conv_196"
935
+ input: "onnx::Conv_197"
936
+ output: "onnx::Relu_195"
937
+ name: "Conv_49"
938
+ op_type: "Conv"
939
+ attribute {
940
+ name: "dilations"
941
+ ints: 1
942
+ ints: 1
943
+ type: INTS
944
+ }
945
+ attribute {
946
+ name: "group"
947
+ i: 1
948
+ type: INT
949
+ }
950
+ attribute {
951
+ name: "kernel_shape"
952
+ ints: 3
953
+ ints: 3
954
+ type: INTS
955
+ }
956
+ attribute {
957
+ name: "pads"
958
+ ints: 1
959
+ ints: 1
960
+ ints: 1
961
+ ints: 1
962
+ type: INTS
963
+ }
964
+ attribute {
965
+ name: "strides"
966
+ ints: 1
967
+ ints: 1
968
+ type: INTS
969
+ }
970
+ }
971
+ node {
972
+ input: "onnx::Relu_195"
973
+ output: "onnx::Pad_149"
974
+ name: "Relu_50"
975
+ op_type: "Relu"
976
+ }
977
+ node {
978
+ input: "onnx::Pad_149"
979
+ output: "onnx::AveragePool_150"
980
+ name: "Pad_51"
981
+ op_type: "Pad"
982
+ attribute {
983
+ name: "mode"
984
+ s: "constant"
985
+ type: STRING
986
+ }
987
+ attribute {
988
+ name: "pads"
989
+ ints: 0
990
+ ints: 0
991
+ ints: 0
992
+ ints: 0
993
+ ints: 0
994
+ ints: 0
995
+ ints: 0
996
+ ints: 0
997
+ type: INTS
998
+ }
999
+ attribute {
1000
+ name: "value"
1001
+ f: 0.0
1002
+ type: FLOAT
1003
+ }
1004
+ }
1005
+ node {
1006
+ input: "onnx::AveragePool_150"
1007
+ output: "x.15"
1008
+ name: "AveragePool_52"
1009
+ op_type: "AveragePool"
1010
+ attribute {
1011
+ name: "kernel_shape"
1012
+ ints: 2
1013
+ ints: 2
1014
+ type: INTS
1015
+ }
1016
+ attribute {
1017
+ name: "pads"
1018
+ ints: 0
1019
+ ints: 0
1020
+ ints: 0
1021
+ ints: 0
1022
+ type: INTS
1023
+ }
1024
+ attribute {
1025
+ name: "strides"
1026
+ ints: 2
1027
+ ints: 2
1028
+ type: INTS
1029
+ }
1030
+ }
1031
+ node {
1032
+ input: "x.15"
1033
+ input: "onnx::Conv_199"
1034
+ input: "onnx::Conv_200"
1035
+ output: "onnx::Relu_198"
1036
+ name: "Conv_53"
1037
+ op_type: "Conv"
1038
+ attribute {
1039
+ name: "dilations"
1040
+ ints: 1
1041
+ ints: 1
1042
+ type: INTS
1043
+ }
1044
+ attribute {
1045
+ name: "group"
1046
+ i: 1
1047
+ type: INT
1048
+ }
1049
+ attribute {
1050
+ name: "kernel_shape"
1051
+ ints: 3
1052
+ ints: 3
1053
+ type: INTS
1054
+ }
1055
+ attribute {
1056
+ name: "pads"
1057
+ ints: 1
1058
+ ints: 1
1059
+ ints: 1
1060
+ ints: 1
1061
+ type: INTS
1062
+ }
1063
+ attribute {
1064
+ name: "strides"
1065
+ ints: 1
1066
+ ints: 1
1067
+ type: INTS
1068
+ }
1069
+ }
1070
+ node {
1071
+ input: "onnx::Relu_198"
1072
+ output: "onnx::Conv_154"
1073
+ name: "Relu_54"
1074
+ op_type: "Relu"
1075
+ }
1076
+ node {
1077
+ input: "onnx::Conv_154"
1078
+ input: "onnx::Conv_202"
1079
+ input: "onnx::Conv_203"
1080
+ output: "onnx::Relu_201"
1081
+ name: "Conv_55"
1082
+ op_type: "Conv"
1083
+ attribute {
1084
+ name: "dilations"
1085
+ ints: 1
1086
+ ints: 1
1087
+ type: INTS
1088
+ }
1089
+ attribute {
1090
+ name: "group"
1091
+ i: 1
1092
+ type: INT
1093
+ }
1094
+ attribute {
1095
+ name: "kernel_shape"
1096
+ ints: 3
1097
+ ints: 3
1098
+ type: INTS
1099
+ }
1100
+ attribute {
1101
+ name: "pads"
1102
+ ints: 1
1103
+ ints: 1
1104
+ ints: 1
1105
+ ints: 1
1106
+ type: INTS
1107
+ }
1108
+ attribute {
1109
+ name: "strides"
1110
+ ints: 1
1111
+ ints: 1
1112
+ type: INTS
1113
+ }
1114
+ }
1115
+ node {
1116
+ input: "onnx::Relu_201"
1117
+ output: "onnx::Pad_157"
1118
+ name: "Relu_56"
1119
+ op_type: "Relu"
1120
+ }
1121
+ node {
1122
+ input: "onnx::Pad_157"
1123
+ output: "onnx::AveragePool_158"
1124
+ name: "Pad_57"
1125
+ op_type: "Pad"
1126
+ attribute {
1127
+ name: "mode"
1128
+ s: "constant"
1129
+ type: STRING
1130
+ }
1131
+ attribute {
1132
+ name: "pads"
1133
+ ints: 0
1134
+ ints: 0
1135
+ ints: 0
1136
+ ints: 0
1137
+ ints: 0
1138
+ ints: 0
1139
+ ints: 0
1140
+ ints: 0
1141
+ type: INTS
1142
+ }
1143
+ attribute {
1144
+ name: "value"
1145
+ f: 0.0
1146
+ type: FLOAT
1147
+ }
1148
+ }
1149
+ node {
1150
+ input: "onnx::AveragePool_158"
1151
+ output: "x.19"
1152
+ name: "AveragePool_58"
1153
+ op_type: "AveragePool"
1154
+ attribute {
1155
+ name: "kernel_shape"
1156
+ ints: 1
1157
+ ints: 1
1158
+ type: INTS
1159
+ }
1160
+ attribute {
1161
+ name: "pads"
1162
+ ints: 0
1163
+ ints: 0
1164
+ ints: 0
1165
+ ints: 0
1166
+ type: INTS
1167
+ }
1168
+ attribute {
1169
+ name: "strides"
1170
+ ints: 1
1171
+ ints: 1
1172
+ type: INTS
1173
+ }
1174
+ }
1175
+ node {
1176
+ input: "x.19"
1177
+ output: "onnx::ReduceMax_160"
1178
+ name: "ReduceMean_59"
1179
+ op_type: "ReduceMean"
1180
+ attribute {
1181
+ name: "axes"
1182
+ ints: 3
1183
+ type: INTS
1184
+ }
1185
+ attribute {
1186
+ name: "keepdims"
1187
+ i: 0
1188
+ type: INT
1189
+ }
1190
+ }
1191
+ node {
1192
+ input: "onnx::ReduceMax_160"
1193
+ output: "onnx::Add_161"
1194
+ name: "ReduceMax_60"
1195
+ op_type: "ReduceMax"
1196
+ attribute {
1197
+ name: "axes"
1198
+ ints: 2
1199
+ type: INTS
1200
+ }
1201
+ attribute {
1202
+ name: "keepdims"
1203
+ i: 0
1204
+ type: INT
1205
+ }
1206
+ }
1207
+ node {
1208
+ input: "onnx::ReduceMax_160"
1209
+ output: "onnx::Add_162"
1210
+ name: "ReduceMean_61"
1211
+ op_type: "ReduceMean"
1212
+ attribute {
1213
+ name: "axes"
1214
+ ints: 2
1215
+ type: INTS
1216
+ }
1217
+ attribute {
1218
+ name: "keepdims"
1219
+ i: 0
1220
+ type: INT
1221
+ }
1222
+ }
1223
+ node {
1224
+ input: "onnx::Add_161"
1225
+ input: "onnx::Add_162"
1226
+ output: "x.23"
1227
+ name: "Add_62"
1228
+ op_type: "Add"
1229
+ }
1230
+ node {
1231
+ input: "x.23"
1232
+ input: "fc1.weight"
1233
+ input: "fc1.bias"
1234
+ output: "onnx::Relu_164"
1235
+ name: "Gemm_63"
1236
+ op_type: "Gemm"
1237
+ attribute {
1238
+ name: "alpha"
1239
+ f: 1.0
1240
+ type: FLOAT
1241
+ }
1242
+ attribute {
1243
+ name: "beta"
1244
+ f: 1.0
1245
+ type: FLOAT
1246
+ }
1247
+ attribute {
1248
+ name: "transB"
1249
+ i: 1
1250
+ type: INT
1251
+ }
1252
+ }
1253
+ node {
1254
+ input: "onnx::Relu_164"
1255
+ output: "onnx::Gemm_165"
1256
+ name: "Relu_64"
1257
+ op_type: "Relu"
1258
+ }
1259
+ node {
1260
+ input: "onnx::Gemm_165"
1261
+ input: "fc_audioset.weight"
1262
+ input: "fc_audioset.bias"
1263
+ output: "onnx::Sigmoid_166"
1264
+ name: "Gemm_65"
1265
+ op_type: "Gemm"
1266
+ attribute {
1267
+ name: "alpha"
1268
+ f: 1.0
1269
+ type: FLOAT
1270
+ }
1271
+ attribute {
1272
+ name: "beta"
1273
+ f: 1.0
1274
+ type: FLOAT
1275
+ }
1276
+ attribute {
1277
+ name: "transB"
1278
+ i: 1
1279
+ type: INT
1280
+ }
1281
+ }
1282
+ node {
1283
+ input: "onnx::Sigmoid_166"
1284
+ output: "167"
1285
+ name: "Sigmoid_66"
1286
+ op_type: "Sigmoid"
1287
+ }
1288
+ initializer {
1289
+ dims: 513
1290
+ dims: 1
1291
+ dims: 1024
1292
+ data_type: 1
1293
+ name: "spectrogram_extractor.stft.conv_real.weight"
1294
+ }
1295
+ initializer {
1296
+ dims: 513
1297
+ dims: 1
1298
+ dims: 1024
1299
+ data_type: 1
1300
+ name: "spectrogram_extractor.stft.conv_imag.weight"
1301
+ }
1302
+ initializer {
1303
+ dims: 513
1304
+ dims: 64
1305
+ data_type: 1
1306
+ name: "logmel_extractor.melW"
1307
+ }
1308
+ initializer {
1309
+ dims: 64
1310
+ data_type: 1
1311
+ name: "bn0.weight"
1312
+ }
1313
+ initializer {
1314
+ dims: 64
1315
+ data_type: 1
1316
+ name: "bn0.bias"
1317
+ }
1318
+ initializer {
1319
+ dims: 64
1320
+ data_type: 1
1321
+ name: "bn0.running_mean"
1322
+ }
1323
+ initializer {
1324
+ dims: 64
1325
+ data_type: 1
1326
+ name: "bn0.running_var"
1327
+ }
1328
+ initializer {
1329
+ dims: 2048
1330
+ dims: 2048
1331
+ data_type: 1
1332
+ name: "fc1.weight"
1333
+ }
1334
+ initializer {
1335
+ dims: 2048
1336
+ data_type: 1
1337
+ name: "fc1.bias"
1338
+ }
1339
+ initializer {
1340
+ dims: 527
1341
+ dims: 2048
1342
+ data_type: 1
1343
+ name: "fc_audioset.weight"
1344
+ }
1345
+ initializer {
1346
+ dims: 527
1347
+ data_type: 1
1348
+ name: "fc_audioset.bias"
1349
+ }
1350
+ initializer {
1351
+ dims: 64
1352
+ dims: 1
1353
+ dims: 3
1354
+ dims: 3
1355
+ data_type: 1
1356
+ name: "onnx::Conv_169"
1357
+ }
1358
+ initializer {
1359
+ dims: 64
1360
+ data_type: 1
1361
+ name: "onnx::Conv_170"
1362
+ }
1363
+ initializer {
1364
+ dims: 64
1365
+ dims: 64
1366
+ dims: 3
1367
+ dims: 3
1368
+ data_type: 1
1369
+ name: "onnx::Conv_172"
1370
+ }
1371
+ initializer {
1372
+ dims: 64
1373
+ data_type: 1
1374
+ name: "onnx::Conv_173"
1375
+ }
1376
+ initializer {
1377
+ dims: 128
1378
+ dims: 64
1379
+ dims: 3
1380
+ dims: 3
1381
+ data_type: 1
1382
+ name: "onnx::Conv_175"
1383
+ }
1384
+ initializer {
1385
+ dims: 128
1386
+ data_type: 1
1387
+ name: "onnx::Conv_176"
1388
+ }
1389
+ initializer {
1390
+ dims: 128
1391
+ dims: 128
1392
+ dims: 3
1393
+ dims: 3
1394
+ data_type: 1
1395
+ name: "onnx::Conv_178"
1396
+ }
1397
+ initializer {
1398
+ dims: 128
1399
+ data_type: 1
1400
+ name: "onnx::Conv_179"
1401
+ }
1402
+ initializer {
1403
+ dims: 256
1404
+ dims: 128
1405
+ dims: 3
1406
+ dims: 3
1407
+ data_type: 1
1408
+ name: "onnx::Conv_181"
1409
+ }
1410
+ initializer {
1411
+ dims: 256
1412
+ data_type: 1
1413
+ name: "onnx::Conv_182"
1414
+ }
1415
+ initializer {
1416
+ dims: 256
1417
+ dims: 256
1418
+ dims: 3
1419
+ dims: 3
1420
+ data_type: 1
1421
+ name: "onnx::Conv_184"
1422
+ }
1423
+ initializer {
1424
+ dims: 256
1425
+ data_type: 1
1426
+ name: "onnx::Conv_185"
1427
+ }
1428
+ initializer {
1429
+ dims: 512
1430
+ dims: 256
1431
+ dims: 3
1432
+ dims: 3
1433
+ data_type: 1
1434
+ name: "onnx::Conv_187"
1435
+ }
1436
+ initializer {
1437
+ dims: 512
1438
+ data_type: 1
1439
+ name: "onnx::Conv_188"
1440
+ }
1441
+ initializer {
1442
+ dims: 512
1443
+ dims: 512
1444
+ dims: 3
1445
+ dims: 3
1446
+ data_type: 1
1447
+ name: "onnx::Conv_190"
1448
+ }
1449
+ initializer {
1450
+ dims: 512
1451
+ data_type: 1
1452
+ name: "onnx::Conv_191"
1453
+ }
1454
+ initializer {
1455
+ dims: 1024
1456
+ dims: 512
1457
+ dims: 3
1458
+ dims: 3
1459
+ data_type: 1
1460
+ name: "onnx::Conv_193"
1461
+ }
1462
+ initializer {
1463
+ dims: 1024
1464
+ data_type: 1
1465
+ name: "onnx::Conv_194"
1466
+ }
1467
+ initializer {
1468
+ dims: 1024
1469
+ dims: 1024
1470
+ dims: 3
1471
+ dims: 3
1472
+ data_type: 1
1473
+ name: "onnx::Conv_196"
1474
+ }
1475
+ initializer {
1476
+ dims: 1024
1477
+ data_type: 1
1478
+ name: "onnx::Conv_197"
1479
+ }
1480
+ initializer {
1481
+ dims: 2048
1482
+ dims: 1024
1483
+ dims: 3
1484
+ dims: 3
1485
+ data_type: 1
1486
+ name: "onnx::Conv_199"
1487
+ }
1488
+ initializer {
1489
+ dims: 2048
1490
+ data_type: 1
1491
+ name: "onnx::Conv_200"
1492
+ }
1493
+ initializer {
1494
+ dims: 2048
1495
+ dims: 2048
1496
+ dims: 3
1497
+ dims: 3
1498
+ data_type: 1
1499
+ name: "onnx::Conv_202"
1500
+ }
1501
+ initializer {
1502
+ dims: 2048
1503
+ data_type: 1
1504
+ name: "onnx::Conv_203"
1505
+ }
1506
+ initializer {
1507
+ data_type: 1
1508
+ name: "onnx::Pow_204"
1509
+ }
1510
+ initializer {
1511
+ data_type: 1
1512
+ name: "onnx::Pow_205"
1513
+ }
1514
+ input {
1515
+ name: "input.1"
1516
+ type {
1517
+ tensor_type {
1518
+ elem_type: 1
1519
+ shape {
1520
+ dim {
1521
+ dim_value: 1
1522
+ }
1523
+ dim {
1524
+ dim_value: 224000
1525
+ }
1526
+ }
1527
+ }
1528
+ }
1529
+ }
1530
+ output {
1531
+ name: "167"
1532
+ type {
1533
+ tensor_type {
1534
+ elem_type: 1
1535
+ shape {
1536
+ dim {
1537
+ dim_value: 1
1538
+ }
1539
+ dim {
1540
+ dim_value: 527
1541
+ }
1542
+ }
1543
+ }
1544
+ }
1545
+ }
1546
+ output {
1547
+ name: "onnx::Gemm_165"
1548
+ type {
1549
+ tensor_type {
1550
+ elem_type: 1
1551
+ shape {
1552
+ dim {
1553
+ dim_value: 1
1554
+ }
1555
+ dim {
1556
+ dim_value: 2048
1557
+ }
1558
+ }
1559
+ }
1560
+ }
1561
+ }
1562
+ }
1563
+ opset_import {
1564
+ version: 9
1565
+ }
models/onnx/ailia-models/code/R9_ZSCveAHg_7s.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59429c7174a3707575581d0b4181b1e2a420bda5d1de8d5195cf0b1e07d8bdfd
3
+ size 448078
models/onnx/ailia-models/code/README.md ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # audioset tagging cnn
3
+
4
+ The Model have been used for audio tagging and sound event detection.
5
+
6
+ ## audio tagging
7
+
8
+ ### Input
9
+
10
+ Audio file
11
+ ```
12
+ R9_ZSCveAHg_7s.wav
13
+ https://github.com/qiuqiangkong/audioset_tagging_cnn/tree/master/resources
14
+ ```
15
+
16
+ ### Output
17
+
18
+ ```
19
+ Speech: 0.893
20
+ Telephone bell ringing: 0.754
21
+ Inside, small room: 0.235
22
+ Telephone: 0.183
23
+ Music: 0.092
24
+ Ringtone: 0.047
25
+ Inside, large room or hall: 0.028
26
+ Alarm: 0.014
27
+ Animal: 0.009
28
+ Vehicle: 0.008
29
+ ```
30
+
31
+ ## sound event detection
32
+
33
+ ### Input
34
+
35
+ Audio file
36
+ ```
37
+ R9_ZSCveAHg_7s.wav
38
+ https://github.com/qiuqiangkong/audioset_tagging_cnn/tree/master/resources
39
+ ```
40
+
41
+ ### Output
42
+
43
+
44
+ ![Input](output.png)
45
+
46
+ ## Usage
47
+ Automatically downloads the onnx and prototxt files on the first run.
48
+ It is necessary to be connected to the Internet while downloading.
49
+
50
+ You can run with other wav file by adding `--input` option.
51
+
52
+ ```bash
53
+ $ python3 audioset_tagging_cnn.py --input [wav_file]
54
+ ```
55
+
56
+ ### audio tagging
57
+
58
+ For the sample wav,
59
+ ```bash
60
+ $ python3 audioset_tagging_cnn.py --mode audio_tagging
61
+ ```
62
+
63
+ ### sound event detection
64
+
65
+ For the sample wav,
66
+ ```bash
67
+ $ python3 audioset_tagging_cnn.py --mode sound_event_detection
68
+
69
+ ```
70
+
71
+ You can use --savepath option to change the name of the output file to save.
72
+ ```bash
73
+ $ python audioset_tagging_cnn.py --savepath output.png
74
+
75
+ ```
76
+
77
+
78
+ ## Reference
79
+
80
+ [audioset_tagging_cnn](https://github.com/qiuqiangkong/audioset_tagging_cnn)
81
+
82
+ ## Framework
83
+
84
+ Pytorch
85
+
86
+ ## Model Format
87
+
88
+ ONNX opset=9
89
+
90
+ ## Netron
91
+
92
+ [sound_event_detection.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/audio_processing/audioset_tagging_cnn/sound_event_detection.onnx.prototxt)
93
+
94
+ [audio_tagging.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/audio_processing/audioset_tagging_cnn/audio_tagging.onnx.prototxt)
models/onnx/ailia-models/code/audioset_tagging_cnn.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import time
4
+ import librosa
5
+ import argparse
6
+ import utilities
7
+ import numpy as np
8
+ import matplotlib.pyplot as plt
9
+ import json
10
+
11
+ import ailia
12
+
13
+ # import original modules
14
+ sys.path.append('../../util')
15
+ from arg_utils import get_base_parser, update_parser # noqa: E402
16
+ from model_utils import check_and_download_models # noqa: E402
17
+
18
+ # logger
19
+ from logging import getLogger # noqa: E402
20
+
21
+ logger = getLogger(__name__)
22
+
23
+ # ======================
24
+ # Parameters
25
+ # ======================
26
+
27
+ WEIGHT_TAGGING_PATH = './audio_tagging.onnx'
28
+ MODEL_TAGGING_PATH = './audio_tagging.onnx.prototxt'
29
+ WEIGHT_DETECTION_PATH = './sound_event_detection.onnx'
30
+ MODEL_DETECTION_PATH = './sound_event_detection.onnx.prototxt'
31
+ REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/audioset_tagging_cnn/'
32
+
33
+ WAVE_PATH = "R9_ZSCveAHg_7s.wav"
34
+ SAVE_PATH = "output.png"
35
+
36
+
37
+ # ======================
38
+ # Arguemnt Parser Config
39
+ # ======================
40
+
41
+ parser = get_base_parser(
42
+ 'audioset_tagging_cnn', WAVE_PATH, None, input_ftype='audio'
43
+ )
44
+ parser.add_argument('--mode', type=str, default="audio_tagging", choices=["audio_tagging", "sound_event_detection"])
45
+ parser.add_argument('--sample_rate', type=int, default=32000)
46
+ parser.add_argument('--window_size', type=int, default=1024)
47
+ parser.add_argument('--hop_size', type=int, default=320)
48
+ parser.add_argument('--mel_bins', type=int, default=64)
49
+ parser.add_argument('--fmin', type=int, default=50)
50
+ parser.add_argument('--fmax', type=int, default=14000)
51
+ parser.add_argument('-w', '--write_json', action='store_true', help='Flag to output results to json file.')
52
+
53
+ args = parser.parse_args()
54
+
55
+
56
+ def audio_tagging(args,model):
57
+ """Inference audio tagging result of an audio clip.
58
+ """
59
+
60
+ # Arugments & parameters
61
+ sample_rate = args.sample_rate
62
+ window_size = args.window_size
63
+ hop_size = args.hop_size
64
+ mel_bins = args.mel_bins
65
+ fmin = args.fmin
66
+ fmax = args.fmax
67
+ audio_path = args.input
68
+
69
+ classes_num = utilities.classes_num
70
+ labels = utilities.labels
71
+
72
+ # Load audio
73
+ (waveform, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True)
74
+
75
+ waveform = waveform[None, :] # (1, audio_length)
76
+
77
+ clipwise_output = model.run(waveform)[0][0]
78
+
79
+ sorted_indexes = np.argsort(clipwise_output)[::-1]
80
+
81
+ # Print audio tagging top probabilities
82
+ for k in range(10):
83
+ print('{}: {:.3f}'.format(np.array(labels)[sorted_indexes[k]],
84
+ clipwise_output[sorted_indexes[k]]))
85
+ if args.write_json:
86
+ result = []
87
+ for k in range(10):
88
+ result.append({
89
+ 'label': np.array(labels)[sorted_indexes[k]],
90
+ 'prob': float(clipwise_output[sorted_indexes[k]])
91
+ })
92
+ with open('output_tagging.json', 'w') as f:
93
+ json.dump(result, f, indent=2)
94
+
95
+ return clipwise_output, labels
96
+
97
+
98
+ def sound_event_detection(args,model):
99
+ """Inference sound event detection result of an audio clip."""
100
+
101
+ # Arugments & parameters
102
+ sample_rate = args.sample_rate
103
+ window_size = args.window_size
104
+ hop_size = args.hop_size
105
+ mel_bins = args.mel_bins
106
+ fmin = args.fmin
107
+ fmax = args.fmax
108
+ audio_path = args.input
109
+
110
+ classes_num = utilities.classes_num
111
+ labels = utilities.labels
112
+ frames_per_second = sample_rate // hop_size
113
+
114
+ # Load audio
115
+ (waveform, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True)
116
+
117
+ waveform = waveform[None, :] # (1, audio_length)
118
+
119
+ framewise_output = model.run(waveform)[0][0]
120
+
121
+ print('Sound event detection result (time_steps x classes_num): {}'.format(
122
+ framewise_output.shape))
123
+
124
+ sorted_indexes = np.argsort(np.max(framewise_output, axis=0))[::-1]
125
+
126
+ top_k = 10 # Show top results
127
+ top_result_mat = framewise_output[:, sorted_indexes[0 : top_k]]
128
+ """(time_steps, top_k)"""
129
+
130
+ # Plot result
131
+ stft = librosa.core.stft(y=waveform[0], n_fft=window_size,
132
+ hop_length=hop_size, window='hann', center=True)
133
+ frames_num = stft.shape[-1]
134
+
135
+ fig, axs = plt.subplots(2, 1, sharex=True, figsize=(10, 4))
136
+ axs[0].matshow(np.log(np.abs(stft)), origin='lower', aspect='auto', cmap='jet')
137
+ axs[0].set_ylabel('Frequency bins')
138
+ axs[0].set_title('Log spectrogram')
139
+ axs[1].matshow(top_result_mat.T, origin='upper', aspect='auto', cmap='jet', vmin=0, vmax=1)
140
+ axs[1].xaxis.set_ticks(np.arange(0, frames_num, frames_per_second))
141
+ axs[1].xaxis.set_ticklabels(np.arange(0, frames_num / frames_per_second))
142
+ axs[1].yaxis.set_ticks(np.arange(0, top_k))
143
+ axs[1].yaxis.set_ticklabels(np.array(labels)[sorted_indexes[0 : top_k]])
144
+ axs[1].yaxis.grid(color='k', linestyle='solid', linewidth=0.3, alpha=0.3)
145
+ axs[1].set_xlabel('Seconds')
146
+ axs[1].xaxis.set_ticks_position('bottom')
147
+
148
+ plt.tight_layout()
149
+ if args.savepath is None:
150
+ plt.savefig(SAVE_PATH)
151
+ else:
152
+ plt.savefig(args.savepath)
153
+
154
+ return framewise_output, labels
155
+
156
+ ## ======================
157
+ ## Main function
158
+ ## ======================
159
+
160
+ def main(func):
161
+ # model files check and download
162
+
163
+ # create instance
164
+ if args.mode == "audio_tagging":
165
+ model = ailia.Net(None,WEIGHT_TAGGING_PATH)
166
+ elif args.mode == "sound_event_detection":
167
+ model = ailia.Net(None,WEIGHT_DETECTION_PATH)
168
+
169
+ logger.info('Start inference...')
170
+ if args.benchmark:
171
+ logger.info('BENCHMARK mode')
172
+ for c in range(5):
173
+ start = int(round(time.time() * 1000))
174
+ func(args,model)
175
+ end = int(round(time.time() * 1000))
176
+ logger.info("\tailia processing time {} ms".format(end-start))
177
+ else:
178
+ func(args,model)
179
+
180
+ logger.info('Script finished successfully.')
181
+
182
+
183
+
184
+ if __name__ == '__main__':
185
+ check_and_download_models(WEIGHT_TAGGING_PATH, MODEL_TAGGING_PATH, REMOTE_PATH)
186
+ check_and_download_models(WEIGHT_DETECTION_PATH, MODEL_DETECTION_PATH, REMOTE_PATH)
187
+
188
+ if args.mode == 'audio_tagging':
189
+ main(audio_tagging)
190
+ elif args.mode == 'sound_event_detection':
191
+ main(sound_event_detection)
192
+ else:
193
+ raise Exception('Error argument!')
194
+
195
+
models/onnx/ailia-models/code/class_labels_indices.csv ADDED
@@ -0,0 +1,528 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ index,mid,display_name
2
+ 0,/m/09x0r,"Speech"
3
+ 1,/m/05zppz,"Male speech, man speaking"
4
+ 2,/m/02zsn,"Female speech, woman speaking"
5
+ 3,/m/0ytgt,"Child speech, kid speaking"
6
+ 4,/m/01h8n0,"Conversation"
7
+ 5,/m/02qldy,"Narration, monologue"
8
+ 6,/m/0261r1,"Babbling"
9
+ 7,/m/0brhx,"Speech synthesizer"
10
+ 8,/m/07p6fty,"Shout"
11
+ 9,/m/07q4ntr,"Bellow"
12
+ 10,/m/07rwj3x,"Whoop"
13
+ 11,/m/07sr1lc,"Yell"
14
+ 12,/m/04gy_2,"Battle cry"
15
+ 13,/t/dd00135,"Children shouting"
16
+ 14,/m/03qc9zr,"Screaming"
17
+ 15,/m/02rtxlg,"Whispering"
18
+ 16,/m/01j3sz,"Laughter"
19
+ 17,/t/dd00001,"Baby laughter"
20
+ 18,/m/07r660_,"Giggle"
21
+ 19,/m/07s04w4,"Snicker"
22
+ 20,/m/07sq110,"Belly laugh"
23
+ 21,/m/07rgt08,"Chuckle, chortle"
24
+ 22,/m/0463cq4,"Crying, sobbing"
25
+ 23,/t/dd00002,"Baby cry, infant cry"
26
+ 24,/m/07qz6j3,"Whimper"
27
+ 25,/m/07qw_06,"Wail, moan"
28
+ 26,/m/07plz5l,"Sigh"
29
+ 27,/m/015lz1,"Singing"
30
+ 28,/m/0l14jd,"Choir"
31
+ 29,/m/01swy6,"Yodeling"
32
+ 30,/m/02bk07,"Chant"
33
+ 31,/m/01c194,"Mantra"
34
+ 32,/t/dd00003,"Male singing"
35
+ 33,/t/dd00004,"Female singing"
36
+ 34,/t/dd00005,"Child singing"
37
+ 35,/t/dd00006,"Synthetic singing"
38
+ 36,/m/06bxc,"Rapping"
39
+ 37,/m/02fxyj,"Humming"
40
+ 38,/m/07s2xch,"Groan"
41
+ 39,/m/07r4k75,"Grunt"
42
+ 40,/m/01w250,"Whistling"
43
+ 41,/m/0lyf6,"Breathing"
44
+ 42,/m/07mzm6,"Wheeze"
45
+ 43,/m/01d3sd,"Snoring"
46
+ 44,/m/07s0dtb,"Gasp"
47
+ 45,/m/07pyy8b,"Pant"
48
+ 46,/m/07q0yl5,"Snort"
49
+ 47,/m/01b_21,"Cough"
50
+ 48,/m/0dl9sf8,"Throat clearing"
51
+ 49,/m/01hsr_,"Sneeze"
52
+ 50,/m/07ppn3j,"Sniff"
53
+ 51,/m/06h7j,"Run"
54
+ 52,/m/07qv_x_,"Shuffle"
55
+ 53,/m/07pbtc8,"Walk, footsteps"
56
+ 54,/m/03cczk,"Chewing, mastication"
57
+ 55,/m/07pdhp0,"Biting"
58
+ 56,/m/0939n_,"Gargling"
59
+ 57,/m/01g90h,"Stomach rumble"
60
+ 58,/m/03q5_w,"Burping, eructation"
61
+ 59,/m/02p3nc,"Hiccup"
62
+ 60,/m/02_nn,"Fart"
63
+ 61,/m/0k65p,"Hands"
64
+ 62,/m/025_jnm,"Finger snapping"
65
+ 63,/m/0l15bq,"Clapping"
66
+ 64,/m/01jg02,"Heart sounds, heartbeat"
67
+ 65,/m/01jg1z,"Heart murmur"
68
+ 66,/m/053hz1,"Cheering"
69
+ 67,/m/028ght,"Applause"
70
+ 68,/m/07rkbfh,"Chatter"
71
+ 69,/m/03qtwd,"Crowd"
72
+ 70,/m/07qfr4h,"Hubbub, speech noise, speech babble"
73
+ 71,/t/dd00013,"Children playing"
74
+ 72,/m/0jbk,"Animal"
75
+ 73,/m/068hy,"Domestic animals, pets"
76
+ 74,/m/0bt9lr,"Dog"
77
+ 75,/m/05tny_,"Bark"
78
+ 76,/m/07r_k2n,"Yip"
79
+ 77,/m/07qf0zm,"Howl"
80
+ 78,/m/07rc7d9,"Bow-wow"
81
+ 79,/m/0ghcn6,"Growling"
82
+ 80,/t/dd00136,"Whimper (dog)"
83
+ 81,/m/01yrx,"Cat"
84
+ 82,/m/02yds9,"Purr"
85
+ 83,/m/07qrkrw,"Meow"
86
+ 84,/m/07rjwbb,"Hiss"
87
+ 85,/m/07r81j2,"Caterwaul"
88
+ 86,/m/0ch8v,"Livestock, farm animals, working animals"
89
+ 87,/m/03k3r,"Horse"
90
+ 88,/m/07rv9rh,"Clip-clop"
91
+ 89,/m/07q5rw0,"Neigh, whinny"
92
+ 90,/m/01xq0k1,"Cattle, bovinae"
93
+ 91,/m/07rpkh9,"Moo"
94
+ 92,/m/0239kh,"Cowbell"
95
+ 93,/m/068zj,"Pig"
96
+ 94,/t/dd00018,"Oink"
97
+ 95,/m/03fwl,"Goat"
98
+ 96,/m/07q0h5t,"Bleat"
99
+ 97,/m/07bgp,"Sheep"
100
+ 98,/m/025rv6n,"Fowl"
101
+ 99,/m/09b5t,"Chicken, rooster"
102
+ 100,/m/07st89h,"Cluck"
103
+ 101,/m/07qn5dc,"Crowing, cock-a-doodle-doo"
104
+ 102,/m/01rd7k,"Turkey"
105
+ 103,/m/07svc2k,"Gobble"
106
+ 104,/m/09ddx,"Duck"
107
+ 105,/m/07qdb04,"Quack"
108
+ 106,/m/0dbvp,"Goose"
109
+ 107,/m/07qwf61,"Honk"
110
+ 108,/m/01280g,"Wild animals"
111
+ 109,/m/0cdnk,"Roaring cats (lions, tigers)"
112
+ 110,/m/04cvmfc,"Roar"
113
+ 111,/m/015p6,"Bird"
114
+ 112,/m/020bb7,"Bird vocalization, bird call, bird song"
115
+ 113,/m/07pggtn,"Chirp, tweet"
116
+ 114,/m/07sx8x_,"Squawk"
117
+ 115,/m/0h0rv,"Pigeon, dove"
118
+ 116,/m/07r_25d,"Coo"
119
+ 117,/m/04s8yn,"Crow"
120
+ 118,/m/07r5c2p,"Caw"
121
+ 119,/m/09d5_,"Owl"
122
+ 120,/m/07r_80w,"Hoot"
123
+ 121,/m/05_wcq,"Bird flight, flapping wings"
124
+ 122,/m/01z5f,"Canidae, dogs, wolves"
125
+ 123,/m/06hps,"Rodents, rats, mice"
126
+ 124,/m/04rmv,"Mouse"
127
+ 125,/m/07r4gkf,"Patter"
128
+ 126,/m/03vt0,"Insect"
129
+ 127,/m/09xqv,"Cricket"
130
+ 128,/m/09f96,"Mosquito"
131
+ 129,/m/0h2mp,"Fly, housefly"
132
+ 130,/m/07pjwq1,"Buzz"
133
+ 131,/m/01h3n,"Bee, wasp, etc."
134
+ 132,/m/09ld4,"Frog"
135
+ 133,/m/07st88b,"Croak"
136
+ 134,/m/078jl,"Snake"
137
+ 135,/m/07qn4z3,"Rattle"
138
+ 136,/m/032n05,"Whale vocalization"
139
+ 137,/m/04rlf,"Music"
140
+ 138,/m/04szw,"Musical instrument"
141
+ 139,/m/0fx80y,"Plucked string instrument"
142
+ 140,/m/0342h,"Guitar"
143
+ 141,/m/02sgy,"Electric guitar"
144
+ 142,/m/018vs,"Bass guitar"
145
+ 143,/m/042v_gx,"Acoustic guitar"
146
+ 144,/m/06w87,"Steel guitar, slide guitar"
147
+ 145,/m/01glhc,"Tapping (guitar technique)"
148
+ 146,/m/07s0s5r,"Strum"
149
+ 147,/m/018j2,"Banjo"
150
+ 148,/m/0jtg0,"Sitar"
151
+ 149,/m/04rzd,"Mandolin"
152
+ 150,/m/01bns_,"Zither"
153
+ 151,/m/07xzm,"Ukulele"
154
+ 152,/m/05148p4,"Keyboard (musical)"
155
+ 153,/m/05r5c,"Piano"
156
+ 154,/m/01s0ps,"Electric piano"
157
+ 155,/m/013y1f,"Organ"
158
+ 156,/m/03xq_f,"Electronic organ"
159
+ 157,/m/03gvt,"Hammond organ"
160
+ 158,/m/0l14qv,"Synthesizer"
161
+ 159,/m/01v1d8,"Sampler"
162
+ 160,/m/03q5t,"Harpsichord"
163
+ 161,/m/0l14md,"Percussion"
164
+ 162,/m/02hnl,"Drum kit"
165
+ 163,/m/0cfdd,"Drum machine"
166
+ 164,/m/026t6,"Drum"
167
+ 165,/m/06rvn,"Snare drum"
168
+ 166,/m/03t3fj,"Rimshot"
169
+ 167,/m/02k_mr,"Drum roll"
170
+ 168,/m/0bm02,"Bass drum"
171
+ 169,/m/011k_j,"Timpani"
172
+ 170,/m/01p970,"Tabla"
173
+ 171,/m/01qbl,"Cymbal"
174
+ 172,/m/03qtq,"Hi-hat"
175
+ 173,/m/01sm1g,"Wood block"
176
+ 174,/m/07brj,"Tambourine"
177
+ 175,/m/05r5wn,"Rattle (instrument)"
178
+ 176,/m/0xzly,"Maraca"
179
+ 177,/m/0mbct,"Gong"
180
+ 178,/m/016622,"Tubular bells"
181
+ 179,/m/0j45pbj,"Mallet percussion"
182
+ 180,/m/0dwsp,"Marimba, xylophone"
183
+ 181,/m/0dwtp,"Glockenspiel"
184
+ 182,/m/0dwt5,"Vibraphone"
185
+ 183,/m/0l156b,"Steelpan"
186
+ 184,/m/05pd6,"Orchestra"
187
+ 185,/m/01kcd,"Brass instrument"
188
+ 186,/m/0319l,"French horn"
189
+ 187,/m/07gql,"Trumpet"
190
+ 188,/m/07c6l,"Trombone"
191
+ 189,/m/0l14_3,"Bowed string instrument"
192
+ 190,/m/02qmj0d,"String section"
193
+ 191,/m/07y_7,"Violin, fiddle"
194
+ 192,/m/0d8_n,"Pizzicato"
195
+ 193,/m/01xqw,"Cello"
196
+ 194,/m/02fsn,"Double bass"
197
+ 195,/m/085jw,"Wind instrument, woodwind instrument"
198
+ 196,/m/0l14j_,"Flute"
199
+ 197,/m/06ncr,"Saxophone"
200
+ 198,/m/01wy6,"Clarinet"
201
+ 199,/m/03m5k,"Harp"
202
+ 200,/m/0395lw,"Bell"
203
+ 201,/m/03w41f,"Church bell"
204
+ 202,/m/027m70_,"Jingle bell"
205
+ 203,/m/0gy1t2s,"Bicycle bell"
206
+ 204,/m/07n_g,"Tuning fork"
207
+ 205,/m/0f8s22,"Chime"
208
+ 206,/m/026fgl,"Wind chime"
209
+ 207,/m/0150b9,"Change ringing (campanology)"
210
+ 208,/m/03qjg,"Harmonica"
211
+ 209,/m/0mkg,"Accordion"
212
+ 210,/m/0192l,"Bagpipes"
213
+ 211,/m/02bxd,"Didgeridoo"
214
+ 212,/m/0l14l2,"Shofar"
215
+ 213,/m/07kc_,"Theremin"
216
+ 214,/m/0l14t7,"Singing bowl"
217
+ 215,/m/01hgjl,"Scratching (performance technique)"
218
+ 216,/m/064t9,"Pop music"
219
+ 217,/m/0glt670,"Hip hop music"
220
+ 218,/m/02cz_7,"Beatboxing"
221
+ 219,/m/06by7,"Rock music"
222
+ 220,/m/03lty,"Heavy metal"
223
+ 221,/m/05r6t,"Punk rock"
224
+ 222,/m/0dls3,"Grunge"
225
+ 223,/m/0dl5d,"Progressive rock"
226
+ 224,/m/07sbbz2,"Rock and roll"
227
+ 225,/m/05w3f,"Psychedelic rock"
228
+ 226,/m/06j6l,"Rhythm and blues"
229
+ 227,/m/0gywn,"Soul music"
230
+ 228,/m/06cqb,"Reggae"
231
+ 229,/m/01lyv,"Country"
232
+ 230,/m/015y_n,"Swing music"
233
+ 231,/m/0gg8l,"Bluegrass"
234
+ 232,/m/02x8m,"Funk"
235
+ 233,/m/02w4v,"Folk music"
236
+ 234,/m/06j64v,"Middle Eastern music"
237
+ 235,/m/03_d0,"Jazz"
238
+ 236,/m/026z9,"Disco"
239
+ 237,/m/0ggq0m,"Classical music"
240
+ 238,/m/05lls,"Opera"
241
+ 239,/m/02lkt,"Electronic music"
242
+ 240,/m/03mb9,"House music"
243
+ 241,/m/07gxw,"Techno"
244
+ 242,/m/07s72n,"Dubstep"
245
+ 243,/m/0283d,"Drum and bass"
246
+ 244,/m/0m0jc,"Electronica"
247
+ 245,/m/08cyft,"Electronic dance music"
248
+ 246,/m/0fd3y,"Ambient music"
249
+ 247,/m/07lnk,"Trance music"
250
+ 248,/m/0g293,"Music of Latin America"
251
+ 249,/m/0ln16,"Salsa music"
252
+ 250,/m/0326g,"Flamenco"
253
+ 251,/m/0155w,"Blues"
254
+ 252,/m/05fw6t,"Music for children"
255
+ 253,/m/02v2lh,"New-age music"
256
+ 254,/m/0y4f8,"Vocal music"
257
+ 255,/m/0z9c,"A capella"
258
+ 256,/m/0164x2,"Music of Africa"
259
+ 257,/m/0145m,"Afrobeat"
260
+ 258,/m/02mscn,"Christian music"
261
+ 259,/m/016cjb,"Gospel music"
262
+ 260,/m/028sqc,"Music of Asia"
263
+ 261,/m/015vgc,"Carnatic music"
264
+ 262,/m/0dq0md,"Music of Bollywood"
265
+ 263,/m/06rqw,"Ska"
266
+ 264,/m/02p0sh1,"Traditional music"
267
+ 265,/m/05rwpb,"Independent music"
268
+ 266,/m/074ft,"Song"
269
+ 267,/m/025td0t,"Background music"
270
+ 268,/m/02cjck,"Theme music"
271
+ 269,/m/03r5q_,"Jingle (music)"
272
+ 270,/m/0l14gg,"Soundtrack music"
273
+ 271,/m/07pkxdp,"Lullaby"
274
+ 272,/m/01z7dr,"Video game music"
275
+ 273,/m/0140xf,"Christmas music"
276
+ 274,/m/0ggx5q,"Dance music"
277
+ 275,/m/04wptg,"Wedding music"
278
+ 276,/t/dd00031,"Happy music"
279
+ 277,/t/dd00032,"Funny music"
280
+ 278,/t/dd00033,"Sad music"
281
+ 279,/t/dd00034,"Tender music"
282
+ 280,/t/dd00035,"Exciting music"
283
+ 281,/t/dd00036,"Angry music"
284
+ 282,/t/dd00037,"Scary music"
285
+ 283,/m/03m9d0z,"Wind"
286
+ 284,/m/09t49,"Rustling leaves"
287
+ 285,/t/dd00092,"Wind noise (microphone)"
288
+ 286,/m/0jb2l,"Thunderstorm"
289
+ 287,/m/0ngt1,"Thunder"
290
+ 288,/m/0838f,"Water"
291
+ 289,/m/06mb1,"Rain"
292
+ 290,/m/07r10fb,"Raindrop"
293
+ 291,/t/dd00038,"Rain on surface"
294
+ 292,/m/0j6m2,"Stream"
295
+ 293,/m/0j2kx,"Waterfall"
296
+ 294,/m/05kq4,"Ocean"
297
+ 295,/m/034srq,"Waves, surf"
298
+ 296,/m/06wzb,"Steam"
299
+ 297,/m/07swgks,"Gurgling"
300
+ 298,/m/02_41,"Fire"
301
+ 299,/m/07pzfmf,"Crackle"
302
+ 300,/m/07yv9,"Vehicle"
303
+ 301,/m/019jd,"Boat, Water vehicle"
304
+ 302,/m/0hsrw,"Sailboat, sailing ship"
305
+ 303,/m/056ks2,"Rowboat, canoe, kayak"
306
+ 304,/m/02rlv9,"Motorboat, speedboat"
307
+ 305,/m/06q74,"Ship"
308
+ 306,/m/012f08,"Motor vehicle (road)"
309
+ 307,/m/0k4j,"Car"
310
+ 308,/m/0912c9,"Vehicle horn, car horn, honking"
311
+ 309,/m/07qv_d5,"Toot"
312
+ 310,/m/02mfyn,"Car alarm"
313
+ 311,/m/04gxbd,"Power windows, electric windows"
314
+ 312,/m/07rknqz,"Skidding"
315
+ 313,/m/0h9mv,"Tire squeal"
316
+ 314,/t/dd00134,"Car passing by"
317
+ 315,/m/0ltv,"Race car, auto racing"
318
+ 316,/m/07r04,"Truck"
319
+ 317,/m/0gvgw0,"Air brake"
320
+ 318,/m/05x_td,"Air horn, truck horn"
321
+ 319,/m/02rhddq,"Reversing beeps"
322
+ 320,/m/03cl9h,"Ice cream truck, ice cream van"
323
+ 321,/m/01bjv,"Bus"
324
+ 322,/m/03j1ly,"Emergency vehicle"
325
+ 323,/m/04qvtq,"Police car (siren)"
326
+ 324,/m/012n7d,"Ambulance (siren)"
327
+ 325,/m/012ndj,"Fire engine, fire truck (siren)"
328
+ 326,/m/04_sv,"Motorcycle"
329
+ 327,/m/0btp2,"Traffic noise, roadway noise"
330
+ 328,/m/06d_3,"Rail transport"
331
+ 329,/m/07jdr,"Train"
332
+ 330,/m/04zmvq,"Train whistle"
333
+ 331,/m/0284vy3,"Train horn"
334
+ 332,/m/01g50p,"Railroad car, train wagon"
335
+ 333,/t/dd00048,"Train wheels squealing"
336
+ 334,/m/0195fx,"Subway, metro, underground"
337
+ 335,/m/0k5j,"Aircraft"
338
+ 336,/m/014yck,"Aircraft engine"
339
+ 337,/m/04229,"Jet engine"
340
+ 338,/m/02l6bg,"Propeller, airscrew"
341
+ 339,/m/09ct_,"Helicopter"
342
+ 340,/m/0cmf2,"Fixed-wing aircraft, airplane"
343
+ 341,/m/0199g,"Bicycle"
344
+ 342,/m/06_fw,"Skateboard"
345
+ 343,/m/02mk9,"Engine"
346
+ 344,/t/dd00065,"Light engine (high frequency)"
347
+ 345,/m/08j51y,"Dental drill, dentist's drill"
348
+ 346,/m/01yg9g,"Lawn mower"
349
+ 347,/m/01j4z9,"Chainsaw"
350
+ 348,/t/dd00066,"Medium engine (mid frequency)"
351
+ 349,/t/dd00067,"Heavy engine (low frequency)"
352
+ 350,/m/01h82_,"Engine knocking"
353
+ 351,/t/dd00130,"Engine starting"
354
+ 352,/m/07pb8fc,"Idling"
355
+ 353,/m/07q2z82,"Accelerating, revving, vroom"
356
+ 354,/m/02dgv,"Door"
357
+ 355,/m/03wwcy,"Doorbell"
358
+ 356,/m/07r67yg,"Ding-dong"
359
+ 357,/m/02y_763,"Sliding door"
360
+ 358,/m/07rjzl8,"Slam"
361
+ 359,/m/07r4wb8,"Knock"
362
+ 360,/m/07qcpgn,"Tap"
363
+ 361,/m/07q6cd_,"Squeak"
364
+ 362,/m/0642b4,"Cupboard open or close"
365
+ 363,/m/0fqfqc,"Drawer open or close"
366
+ 364,/m/04brg2,"Dishes, pots, and pans"
367
+ 365,/m/023pjk,"Cutlery, silverware"
368
+ 366,/m/07pn_8q,"Chopping (food)"
369
+ 367,/m/0dxrf,"Frying (food)"
370
+ 368,/m/0fx9l,"Microwave oven"
371
+ 369,/m/02pjr4,"Blender"
372
+ 370,/m/02jz0l,"Water tap, faucet"
373
+ 371,/m/0130jx,"Sink (filling or washing)"
374
+ 372,/m/03dnzn,"Bathtub (filling or washing)"
375
+ 373,/m/03wvsk,"Hair dryer"
376
+ 374,/m/01jt3m,"Toilet flush"
377
+ 375,/m/012xff,"Toothbrush"
378
+ 376,/m/04fgwm,"Electric toothbrush"
379
+ 377,/m/0d31p,"Vacuum cleaner"
380
+ 378,/m/01s0vc,"Zipper (clothing)"
381
+ 379,/m/03v3yw,"Keys jangling"
382
+ 380,/m/0242l,"Coin (dropping)"
383
+ 381,/m/01lsmm,"Scissors"
384
+ 382,/m/02g901,"Electric shaver, electric razor"
385
+ 383,/m/05rj2,"Shuffling cards"
386
+ 384,/m/0316dw,"Typing"
387
+ 385,/m/0c2wf,"Typewriter"
388
+ 386,/m/01m2v,"Computer keyboard"
389
+ 387,/m/081rb,"Writing"
390
+ 388,/m/07pp_mv,"Alarm"
391
+ 389,/m/07cx4,"Telephone"
392
+ 390,/m/07pp8cl,"Telephone bell ringing"
393
+ 391,/m/01hnzm,"Ringtone"
394
+ 392,/m/02c8p,"Telephone dialing, DTMF"
395
+ 393,/m/015jpf,"Dial tone"
396
+ 394,/m/01z47d,"Busy signal"
397
+ 395,/m/046dlr,"Alarm clock"
398
+ 396,/m/03kmc9,"Siren"
399
+ 397,/m/0dgbq,"Civil defense siren"
400
+ 398,/m/030rvx,"Buzzer"
401
+ 399,/m/01y3hg,"Smoke detector, smoke alarm"
402
+ 400,/m/0c3f7m,"Fire alarm"
403
+ 401,/m/04fq5q,"Foghorn"
404
+ 402,/m/0l156k,"Whistle"
405
+ 403,/m/06hck5,"Steam whistle"
406
+ 404,/t/dd00077,"Mechanisms"
407
+ 405,/m/02bm9n,"Ratchet, pawl"
408
+ 406,/m/01x3z,"Clock"
409
+ 407,/m/07qjznt,"Tick"
410
+ 408,/m/07qjznl,"Tick-tock"
411
+ 409,/m/0l7xg,"Gears"
412
+ 410,/m/05zc1,"Pulleys"
413
+ 411,/m/0llzx,"Sewing machine"
414
+ 412,/m/02x984l,"Mechanical fan"
415
+ 413,/m/025wky1,"Air conditioning"
416
+ 414,/m/024dl,"Cash register"
417
+ 415,/m/01m4t,"Printer"
418
+ 416,/m/0dv5r,"Camera"
419
+ 417,/m/07bjf,"Single-lens reflex camera"
420
+ 418,/m/07k1x,"Tools"
421
+ 419,/m/03l9g,"Hammer"
422
+ 420,/m/03p19w,"Jackhammer"
423
+ 421,/m/01b82r,"Sawing"
424
+ 422,/m/02p01q,"Filing (rasp)"
425
+ 423,/m/023vsd,"Sanding"
426
+ 424,/m/0_ksk,"Power tool"
427
+ 425,/m/01d380,"Drill"
428
+ 426,/m/014zdl,"Explosion"
429
+ 427,/m/032s66,"Gunshot, gunfire"
430
+ 428,/m/04zjc,"Machine gun"
431
+ 429,/m/02z32qm,"Fusillade"
432
+ 430,/m/0_1c,"Artillery fire"
433
+ 431,/m/073cg4,"Cap gun"
434
+ 432,/m/0g6b5,"Fireworks"
435
+ 433,/g/122z_qxw,"Firecracker"
436
+ 434,/m/07qsvvw,"Burst, pop"
437
+ 435,/m/07pxg6y,"Eruption"
438
+ 436,/m/07qqyl4,"Boom"
439
+ 437,/m/083vt,"Wood"
440
+ 438,/m/07pczhz,"Chop"
441
+ 439,/m/07pl1bw,"Splinter"
442
+ 440,/m/07qs1cx,"Crack"
443
+ 441,/m/039jq,"Glass"
444
+ 442,/m/07q7njn,"Chink, clink"
445
+ 443,/m/07rn7sz,"Shatter"
446
+ 444,/m/04k94,"Liquid"
447
+ 445,/m/07rrlb6,"Splash, splatter"
448
+ 446,/m/07p6mqd,"Slosh"
449
+ 447,/m/07qlwh6,"Squish"
450
+ 448,/m/07r5v4s,"Drip"
451
+ 449,/m/07prgkl,"Pour"
452
+ 450,/m/07pqc89,"Trickle, dribble"
453
+ 451,/t/dd00088,"Gush"
454
+ 452,/m/07p7b8y,"Fill (with liquid)"
455
+ 453,/m/07qlf79,"Spray"
456
+ 454,/m/07ptzwd,"Pump (liquid)"
457
+ 455,/m/07ptfmf,"Stir"
458
+ 456,/m/0dv3j,"Boiling"
459
+ 457,/m/0790c,"Sonar"
460
+ 458,/m/0dl83,"Arrow"
461
+ 459,/m/07rqsjt,"Whoosh, swoosh, swish"
462
+ 460,/m/07qnq_y,"Thump, thud"
463
+ 461,/m/07rrh0c,"Thunk"
464
+ 462,/m/0b_fwt,"Electronic tuner"
465
+ 463,/m/02rr_,"Effects unit"
466
+ 464,/m/07m2kt,"Chorus effect"
467
+ 465,/m/018w8,"Basketball bounce"
468
+ 466,/m/07pws3f,"Bang"
469
+ 467,/m/07ryjzk,"Slap, smack"
470
+ 468,/m/07rdhzs,"Whack, thwack"
471
+ 469,/m/07pjjrj,"Smash, crash"
472
+ 470,/m/07pc8lb,"Breaking"
473
+ 471,/m/07pqn27,"Bouncing"
474
+ 472,/m/07rbp7_,"Whip"
475
+ 473,/m/07pyf11,"Flap"
476
+ 474,/m/07qb_dv,"Scratch"
477
+ 475,/m/07qv4k0,"Scrape"
478
+ 476,/m/07pdjhy,"Rub"
479
+ 477,/m/07s8j8t,"Roll"
480
+ 478,/m/07plct2,"Crushing"
481
+ 479,/t/dd00112,"Crumpling, crinkling"
482
+ 480,/m/07qcx4z,"Tearing"
483
+ 481,/m/02fs_r,"Beep, bleep"
484
+ 482,/m/07qwdck,"Ping"
485
+ 483,/m/07phxs1,"Ding"
486
+ 484,/m/07rv4dm,"Clang"
487
+ 485,/m/07s02z0,"Squeal"
488
+ 486,/m/07qh7jl,"Creak"
489
+ 487,/m/07qwyj0,"Rustle"
490
+ 488,/m/07s34ls,"Whir"
491
+ 489,/m/07qmpdm,"Clatter"
492
+ 490,/m/07p9k1k,"Sizzle"
493
+ 491,/m/07qc9xj,"Clicking"
494
+ 492,/m/07rwm0c,"Clickety-clack"
495
+ 493,/m/07phhsh,"Rumble"
496
+ 494,/m/07qyrcz,"Plop"
497
+ 495,/m/07qfgpx,"Jingle, tinkle"
498
+ 496,/m/07rcgpl,"Hum"
499
+ 497,/m/07p78v5,"Zing"
500
+ 498,/t/dd00121,"Boing"
501
+ 499,/m/07s12q4,"Crunch"
502
+ 500,/m/028v0c,"Silence"
503
+ 501,/m/01v_m0,"Sine wave"
504
+ 502,/m/0b9m1,"Harmonic"
505
+ 503,/m/0hdsk,"Chirp tone"
506
+ 504,/m/0c1dj,"Sound effect"
507
+ 505,/m/07pt_g0,"Pulse"
508
+ 506,/t/dd00125,"Inside, small room"
509
+ 507,/t/dd00126,"Inside, large room or hall"
510
+ 508,/t/dd00127,"Inside, public space"
511
+ 509,/t/dd00128,"Outside, urban or manmade"
512
+ 510,/t/dd00129,"Outside, rural or natural"
513
+ 511,/m/01b9nn,"Reverberation"
514
+ 512,/m/01jnbd,"Echo"
515
+ 513,/m/096m7z,"Noise"
516
+ 514,/m/06_y0by,"Environmental noise"
517
+ 515,/m/07rgkc5,"Static"
518
+ 516,/m/06xkwv,"Mains hum"
519
+ 517,/m/0g12c5,"Distortion"
520
+ 518,/m/08p9q4,"Sidetone"
521
+ 519,/m/07szfh9,"Cacophony"
522
+ 520,/m/0chx_,"White noise"
523
+ 521,/m/0cj0r,"Pink noise"
524
+ 522,/m/07p_0gm,"Throbbing"
525
+ 523,/m/01jwx6,"Vibration"
526
+ 524,/m/07c52,"Television"
527
+ 525,/m/06bz3,"Radio"
528
+ 526,/m/07hvw1,"Field recording"
models/onnx/ailia-models/code/output.png ADDED

Git LFS Details

  • SHA256: 1d2f6c84bd9d28bb7eb07e11fc19a9d04f4ad54e45219fe9268b515b778bbecd
  • Pointer size: 131 Bytes
  • Size of remote file: 228 kB
models/onnx/ailia-models/code/utilities.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import csv
3
+
4
+ def create_folder(fd):
5
+ if not os.path.exists(fd):
6
+ os.makedirs(fd)
7
+
8
+ def get_filename(path):
9
+ path = os.path.realpath(path)
10
+ na_ext = path.split('/')[-1]
11
+ na = os.path.splitext(na_ext)[0]
12
+ return na
13
+
14
+
15
+
16
+ # Load label
17
+ with open('class_labels_indices.csv', 'r') as f:
18
+ reader = csv.reader(f, delimiter=',')
19
+ lines = list(reader)
20
+
21
+ labels = []
22
+ ids = [] # Each label has a unique id such as "/m/068hy"
23
+ for i1 in range(1, len(lines)):
24
+ id = lines[i1][1]
25
+ label = lines[i1][2]
26
+ ids.append(id)
27
+ labels.append(label)
28
+
29
+ classes_num = len(labels)
models/onnx/ailia-models/sound_event_detection.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01d426f5ca083ca9934d0d39fe38de31a91b7e3ed7d0a797d4c67e743f1555dc
3
+ size 327325913
models/onnx/ailia-models/sound_event_detection.onnx.prototxt ADDED
@@ -0,0 +1,1759 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ir_version: 4
2
+ producer_name: "pytorch"
3
+ producer_version: "1.11.0"
4
+ model_version: 0
5
+ graph {
6
+ name: "torch-jit-export"
7
+ node {
8
+ input: "input.1"
9
+ output: "onnx::Pad_85"
10
+ name: "Unsqueeze_0"
11
+ op_type: "Unsqueeze"
12
+ attribute {
13
+ name: "axes"
14
+ ints: 1
15
+ type: INTS
16
+ }
17
+ }
18
+ node {
19
+ input: "onnx::Pad_85"
20
+ output: "input"
21
+ name: "Pad_1"
22
+ op_type: "Pad"
23
+ attribute {
24
+ name: "mode"
25
+ s: "reflect"
26
+ type: STRING
27
+ }
28
+ attribute {
29
+ name: "pads"
30
+ ints: 0
31
+ ints: 0
32
+ ints: 512
33
+ ints: 0
34
+ ints: 0
35
+ ints: 512
36
+ type: INTS
37
+ }
38
+ }
39
+ node {
40
+ input: "input"
41
+ input: "spectrogram_extractor.stft.conv_real.weight"
42
+ output: "onnx::Unsqueeze_87"
43
+ name: "Conv_2"
44
+ op_type: "Conv"
45
+ attribute {
46
+ name: "dilations"
47
+ ints: 1
48
+ type: INTS
49
+ }
50
+ attribute {
51
+ name: "group"
52
+ i: 1
53
+ type: INT
54
+ }
55
+ attribute {
56
+ name: "kernel_shape"
57
+ ints: 1024
58
+ type: INTS
59
+ }
60
+ attribute {
61
+ name: "pads"
62
+ ints: 0
63
+ ints: 0
64
+ type: INTS
65
+ }
66
+ attribute {
67
+ name: "strides"
68
+ ints: 320
69
+ type: INTS
70
+ }
71
+ }
72
+ node {
73
+ input: "input"
74
+ input: "spectrogram_extractor.stft.conv_imag.weight"
75
+ output: "onnx::Unsqueeze_88"
76
+ name: "Conv_3"
77
+ op_type: "Conv"
78
+ attribute {
79
+ name: "dilations"
80
+ ints: 1
81
+ type: INTS
82
+ }
83
+ attribute {
84
+ name: "group"
85
+ i: 1
86
+ type: INT
87
+ }
88
+ attribute {
89
+ name: "kernel_shape"
90
+ ints: 1024
91
+ type: INTS
92
+ }
93
+ attribute {
94
+ name: "pads"
95
+ ints: 0
96
+ ints: 0
97
+ type: INTS
98
+ }
99
+ attribute {
100
+ name: "strides"
101
+ ints: 320
102
+ type: INTS
103
+ }
104
+ }
105
+ node {
106
+ input: "onnx::Unsqueeze_87"
107
+ output: "onnx::Transpose_89"
108
+ name: "Unsqueeze_4"
109
+ op_type: "Unsqueeze"
110
+ attribute {
111
+ name: "axes"
112
+ ints: 1
113
+ type: INTS
114
+ }
115
+ }
116
+ node {
117
+ input: "onnx::Transpose_89"
118
+ output: "real"
119
+ name: "Transpose_5"
120
+ op_type: "Transpose"
121
+ attribute {
122
+ name: "perm"
123
+ ints: 0
124
+ ints: 1
125
+ ints: 3
126
+ ints: 2
127
+ type: INTS
128
+ }
129
+ }
130
+ node {
131
+ input: "onnx::Unsqueeze_88"
132
+ output: "onnx::Transpose_91"
133
+ name: "Unsqueeze_6"
134
+ op_type: "Unsqueeze"
135
+ attribute {
136
+ name: "axes"
137
+ ints: 1
138
+ type: INTS
139
+ }
140
+ }
141
+ node {
142
+ input: "onnx::Transpose_91"
143
+ output: "imag"
144
+ name: "Transpose_7"
145
+ op_type: "Transpose"
146
+ attribute {
147
+ name: "perm"
148
+ ints: 0
149
+ ints: 1
150
+ ints: 3
151
+ ints: 2
152
+ type: INTS
153
+ }
154
+ }
155
+ node {
156
+ input: "real"
157
+ input: "onnx::Pow_242"
158
+ output: "onnx::Add_95"
159
+ name: "Pow_8"
160
+ op_type: "Pow"
161
+ }
162
+ node {
163
+ input: "imag"
164
+ input: "onnx::Pow_243"
165
+ output: "onnx::Add_98"
166
+ name: "Pow_9"
167
+ op_type: "Pow"
168
+ }
169
+ node {
170
+ input: "onnx::Add_95"
171
+ input: "onnx::Add_98"
172
+ output: "onnx::MatMul_99"
173
+ name: "Add_10"
174
+ op_type: "Add"
175
+ }
176
+ node {
177
+ input: "onnx::MatMul_99"
178
+ input: "logmel_extractor.melW"
179
+ output: "mel_spectrogram"
180
+ name: "MatMul_11"
181
+ op_type: "MatMul"
182
+ }
183
+ node {
184
+ input: "mel_spectrogram"
185
+ output: "onnx::Log_101"
186
+ name: "Clip_12"
187
+ op_type: "Clip"
188
+ attribute {
189
+ name: "max"
190
+ f: inf
191
+ type: FLOAT
192
+ }
193
+ attribute {
194
+ name: "min"
195
+ f: 1.000000013351432e-10
196
+ type: FLOAT
197
+ }
198
+ }
199
+ node {
200
+ input: "onnx::Log_101"
201
+ output: "onnx::Div_102"
202
+ name: "Log_13"
203
+ op_type: "Log"
204
+ }
205
+ node {
206
+ output: "onnx::Div_103"
207
+ name: "Constant_14"
208
+ op_type: "Constant"
209
+ attribute {
210
+ name: "value"
211
+ t {
212
+ dims: 1
213
+ data_type: 1
214
+ }
215
+ type: TENSOR
216
+ }
217
+ }
218
+ node {
219
+ input: "onnx::Div_102"
220
+ input: "onnx::Div_103"
221
+ output: "onnx::Mul_104"
222
+ name: "Div_15"
223
+ op_type: "Div"
224
+ }
225
+ node {
226
+ output: "onnx::Mul_105"
227
+ name: "Constant_16"
228
+ op_type: "Constant"
229
+ attribute {
230
+ name: "value"
231
+ t {
232
+ data_type: 1
233
+ }
234
+ type: TENSOR
235
+ }
236
+ }
237
+ node {
238
+ input: "onnx::Mul_104"
239
+ input: "onnx::Mul_105"
240
+ output: "onnx::Sub_106"
241
+ name: "Mul_17"
242
+ op_type: "Mul"
243
+ }
244
+ node {
245
+ output: "onnx::Sub_107"
246
+ name: "Constant_18"
247
+ op_type: "Constant"
248
+ attribute {
249
+ name: "value"
250
+ t {
251
+ data_type: 1
252
+ }
253
+ type: TENSOR
254
+ }
255
+ }
256
+ node {
257
+ input: "onnx::Sub_106"
258
+ input: "onnx::Sub_107"
259
+ output: "onnx::Transpose_108"
260
+ name: "Sub_19"
261
+ op_type: "Sub"
262
+ }
263
+ node {
264
+ input: "onnx::Transpose_108"
265
+ output: "input.4"
266
+ name: "Transpose_20"
267
+ op_type: "Transpose"
268
+ attribute {
269
+ name: "perm"
270
+ ints: 0
271
+ ints: 3
272
+ ints: 2
273
+ ints: 1
274
+ type: INTS
275
+ }
276
+ }
277
+ node {
278
+ input: "input.4"
279
+ input: "bn0.weight"
280
+ input: "bn0.bias"
281
+ input: "bn0.running_mean"
282
+ input: "bn0.running_var"
283
+ output: "onnx::Transpose_110"
284
+ name: "BatchNormalization_21"
285
+ op_type: "BatchNormalization"
286
+ attribute {
287
+ name: "epsilon"
288
+ f: 9.999999747378752e-06
289
+ type: FLOAT
290
+ }
291
+ attribute {
292
+ name: "momentum"
293
+ f: 0.8999999761581421
294
+ type: FLOAT
295
+ }
296
+ }
297
+ node {
298
+ input: "onnx::Transpose_110"
299
+ output: "input.8"
300
+ name: "Transpose_22"
301
+ op_type: "Transpose"
302
+ attribute {
303
+ name: "perm"
304
+ ints: 0
305
+ ints: 3
306
+ ints: 2
307
+ ints: 1
308
+ type: INTS
309
+ }
310
+ }
311
+ node {
312
+ input: "input.8"
313
+ input: "onnx::Conv_207"
314
+ input: "onnx::Conv_208"
315
+ output: "onnx::Relu_206"
316
+ name: "Conv_23"
317
+ op_type: "Conv"
318
+ attribute {
319
+ name: "dilations"
320
+ ints: 1
321
+ ints: 1
322
+ type: INTS
323
+ }
324
+ attribute {
325
+ name: "group"
326
+ i: 1
327
+ type: INT
328
+ }
329
+ attribute {
330
+ name: "kernel_shape"
331
+ ints: 3
332
+ ints: 3
333
+ type: INTS
334
+ }
335
+ attribute {
336
+ name: "pads"
337
+ ints: 1
338
+ ints: 1
339
+ ints: 1
340
+ ints: 1
341
+ type: INTS
342
+ }
343
+ attribute {
344
+ name: "strides"
345
+ ints: 1
346
+ ints: 1
347
+ type: INTS
348
+ }
349
+ }
350
+ node {
351
+ input: "onnx::Relu_206"
352
+ output: "onnx::Conv_114"
353
+ name: "Relu_24"
354
+ op_type: "Relu"
355
+ }
356
+ node {
357
+ input: "onnx::Conv_114"
358
+ input: "onnx::Conv_210"
359
+ input: "onnx::Conv_211"
360
+ output: "onnx::Relu_209"
361
+ name: "Conv_25"
362
+ op_type: "Conv"
363
+ attribute {
364
+ name: "dilations"
365
+ ints: 1
366
+ ints: 1
367
+ type: INTS
368
+ }
369
+ attribute {
370
+ name: "group"
371
+ i: 1
372
+ type: INT
373
+ }
374
+ attribute {
375
+ name: "kernel_shape"
376
+ ints: 3
377
+ ints: 3
378
+ type: INTS
379
+ }
380
+ attribute {
381
+ name: "pads"
382
+ ints: 1
383
+ ints: 1
384
+ ints: 1
385
+ ints: 1
386
+ type: INTS
387
+ }
388
+ attribute {
389
+ name: "strides"
390
+ ints: 1
391
+ ints: 1
392
+ type: INTS
393
+ }
394
+ }
395
+ node {
396
+ input: "onnx::Relu_209"
397
+ output: "onnx::Pad_117"
398
+ name: "Relu_26"
399
+ op_type: "Relu"
400
+ }
401
+ node {
402
+ input: "onnx::Pad_117"
403
+ output: "onnx::AveragePool_118"
404
+ name: "Pad_27"
405
+ op_type: "Pad"
406
+ attribute {
407
+ name: "mode"
408
+ s: "constant"
409
+ type: STRING
410
+ }
411
+ attribute {
412
+ name: "pads"
413
+ ints: 0
414
+ ints: 0
415
+ ints: 0
416
+ ints: 0
417
+ ints: 0
418
+ ints: 0
419
+ ints: 0
420
+ ints: 0
421
+ type: INTS
422
+ }
423
+ attribute {
424
+ name: "value"
425
+ f: 0.0
426
+ type: FLOAT
427
+ }
428
+ }
429
+ node {
430
+ input: "onnx::AveragePool_118"
431
+ output: "x"
432
+ name: "AveragePool_28"
433
+ op_type: "AveragePool"
434
+ attribute {
435
+ name: "kernel_shape"
436
+ ints: 2
437
+ ints: 2
438
+ type: INTS
439
+ }
440
+ attribute {
441
+ name: "pads"
442
+ ints: 0
443
+ ints: 0
444
+ ints: 0
445
+ ints: 0
446
+ type: INTS
447
+ }
448
+ attribute {
449
+ name: "strides"
450
+ ints: 2
451
+ ints: 2
452
+ type: INTS
453
+ }
454
+ }
455
+ node {
456
+ input: "x"
457
+ input: "onnx::Conv_213"
458
+ input: "onnx::Conv_214"
459
+ output: "onnx::Relu_212"
460
+ name: "Conv_29"
461
+ op_type: "Conv"
462
+ attribute {
463
+ name: "dilations"
464
+ ints: 1
465
+ ints: 1
466
+ type: INTS
467
+ }
468
+ attribute {
469
+ name: "group"
470
+ i: 1
471
+ type: INT
472
+ }
473
+ attribute {
474
+ name: "kernel_shape"
475
+ ints: 3
476
+ ints: 3
477
+ type: INTS
478
+ }
479
+ attribute {
480
+ name: "pads"
481
+ ints: 1
482
+ ints: 1
483
+ ints: 1
484
+ ints: 1
485
+ type: INTS
486
+ }
487
+ attribute {
488
+ name: "strides"
489
+ ints: 1
490
+ ints: 1
491
+ type: INTS
492
+ }
493
+ }
494
+ node {
495
+ input: "onnx::Relu_212"
496
+ output: "onnx::Conv_122"
497
+ name: "Relu_30"
498
+ op_type: "Relu"
499
+ }
500
+ node {
501
+ input: "onnx::Conv_122"
502
+ input: "onnx::Conv_216"
503
+ input: "onnx::Conv_217"
504
+ output: "onnx::Relu_215"
505
+ name: "Conv_31"
506
+ op_type: "Conv"
507
+ attribute {
508
+ name: "dilations"
509
+ ints: 1
510
+ ints: 1
511
+ type: INTS
512
+ }
513
+ attribute {
514
+ name: "group"
515
+ i: 1
516
+ type: INT
517
+ }
518
+ attribute {
519
+ name: "kernel_shape"
520
+ ints: 3
521
+ ints: 3
522
+ type: INTS
523
+ }
524
+ attribute {
525
+ name: "pads"
526
+ ints: 1
527
+ ints: 1
528
+ ints: 1
529
+ ints: 1
530
+ type: INTS
531
+ }
532
+ attribute {
533
+ name: "strides"
534
+ ints: 1
535
+ ints: 1
536
+ type: INTS
537
+ }
538
+ }
539
+ node {
540
+ input: "onnx::Relu_215"
541
+ output: "onnx::Pad_125"
542
+ name: "Relu_32"
543
+ op_type: "Relu"
544
+ }
545
+ node {
546
+ input: "onnx::Pad_125"
547
+ output: "onnx::AveragePool_126"
548
+ name: "Pad_33"
549
+ op_type: "Pad"
550
+ attribute {
551
+ name: "mode"
552
+ s: "constant"
553
+ type: STRING
554
+ }
555
+ attribute {
556
+ name: "pads"
557
+ ints: 0
558
+ ints: 0
559
+ ints: 0
560
+ ints: 0
561
+ ints: 0
562
+ ints: 0
563
+ ints: 0
564
+ ints: 0
565
+ type: INTS
566
+ }
567
+ attribute {
568
+ name: "value"
569
+ f: 0.0
570
+ type: FLOAT
571
+ }
572
+ }
573
+ node {
574
+ input: "onnx::AveragePool_126"
575
+ output: "x.3"
576
+ name: "AveragePool_34"
577
+ op_type: "AveragePool"
578
+ attribute {
579
+ name: "kernel_shape"
580
+ ints: 2
581
+ ints: 2
582
+ type: INTS
583
+ }
584
+ attribute {
585
+ name: "pads"
586
+ ints: 0
587
+ ints: 0
588
+ ints: 0
589
+ ints: 0
590
+ type: INTS
591
+ }
592
+ attribute {
593
+ name: "strides"
594
+ ints: 2
595
+ ints: 2
596
+ type: INTS
597
+ }
598
+ }
599
+ node {
600
+ input: "x.3"
601
+ input: "onnx::Conv_219"
602
+ input: "onnx::Conv_220"
603
+ output: "onnx::Relu_218"
604
+ name: "Conv_35"
605
+ op_type: "Conv"
606
+ attribute {
607
+ name: "dilations"
608
+ ints: 1
609
+ ints: 1
610
+ type: INTS
611
+ }
612
+ attribute {
613
+ name: "group"
614
+ i: 1
615
+ type: INT
616
+ }
617
+ attribute {
618
+ name: "kernel_shape"
619
+ ints: 3
620
+ ints: 3
621
+ type: INTS
622
+ }
623
+ attribute {
624
+ name: "pads"
625
+ ints: 1
626
+ ints: 1
627
+ ints: 1
628
+ ints: 1
629
+ type: INTS
630
+ }
631
+ attribute {
632
+ name: "strides"
633
+ ints: 1
634
+ ints: 1
635
+ type: INTS
636
+ }
637
+ }
638
+ node {
639
+ input: "onnx::Relu_218"
640
+ output: "onnx::Conv_130"
641
+ name: "Relu_36"
642
+ op_type: "Relu"
643
+ }
644
+ node {
645
+ input: "onnx::Conv_130"
646
+ input: "onnx::Conv_222"
647
+ input: "onnx::Conv_223"
648
+ output: "onnx::Relu_221"
649
+ name: "Conv_37"
650
+ op_type: "Conv"
651
+ attribute {
652
+ name: "dilations"
653
+ ints: 1
654
+ ints: 1
655
+ type: INTS
656
+ }
657
+ attribute {
658
+ name: "group"
659
+ i: 1
660
+ type: INT
661
+ }
662
+ attribute {
663
+ name: "kernel_shape"
664
+ ints: 3
665
+ ints: 3
666
+ type: INTS
667
+ }
668
+ attribute {
669
+ name: "pads"
670
+ ints: 1
671
+ ints: 1
672
+ ints: 1
673
+ ints: 1
674
+ type: INTS
675
+ }
676
+ attribute {
677
+ name: "strides"
678
+ ints: 1
679
+ ints: 1
680
+ type: INTS
681
+ }
682
+ }
683
+ node {
684
+ input: "onnx::Relu_221"
685
+ output: "onnx::Pad_133"
686
+ name: "Relu_38"
687
+ op_type: "Relu"
688
+ }
689
+ node {
690
+ input: "onnx::Pad_133"
691
+ output: "onnx::AveragePool_134"
692
+ name: "Pad_39"
693
+ op_type: "Pad"
694
+ attribute {
695
+ name: "mode"
696
+ s: "constant"
697
+ type: STRING
698
+ }
699
+ attribute {
700
+ name: "pads"
701
+ ints: 0
702
+ ints: 0
703
+ ints: 0
704
+ ints: 0
705
+ ints: 0
706
+ ints: 0
707
+ ints: 0
708
+ ints: 0
709
+ type: INTS
710
+ }
711
+ attribute {
712
+ name: "value"
713
+ f: 0.0
714
+ type: FLOAT
715
+ }
716
+ }
717
+ node {
718
+ input: "onnx::AveragePool_134"
719
+ output: "x.7"
720
+ name: "AveragePool_40"
721
+ op_type: "AveragePool"
722
+ attribute {
723
+ name: "kernel_shape"
724
+ ints: 2
725
+ ints: 2
726
+ type: INTS
727
+ }
728
+ attribute {
729
+ name: "pads"
730
+ ints: 0
731
+ ints: 0
732
+ ints: 0
733
+ ints: 0
734
+ type: INTS
735
+ }
736
+ attribute {
737
+ name: "strides"
738
+ ints: 2
739
+ ints: 2
740
+ type: INTS
741
+ }
742
+ }
743
+ node {
744
+ input: "x.7"
745
+ input: "onnx::Conv_225"
746
+ input: "onnx::Conv_226"
747
+ output: "onnx::Relu_224"
748
+ name: "Conv_41"
749
+ op_type: "Conv"
750
+ attribute {
751
+ name: "dilations"
752
+ ints: 1
753
+ ints: 1
754
+ type: INTS
755
+ }
756
+ attribute {
757
+ name: "group"
758
+ i: 1
759
+ type: INT
760
+ }
761
+ attribute {
762
+ name: "kernel_shape"
763
+ ints: 3
764
+ ints: 3
765
+ type: INTS
766
+ }
767
+ attribute {
768
+ name: "pads"
769
+ ints: 1
770
+ ints: 1
771
+ ints: 1
772
+ ints: 1
773
+ type: INTS
774
+ }
775
+ attribute {
776
+ name: "strides"
777
+ ints: 1
778
+ ints: 1
779
+ type: INTS
780
+ }
781
+ }
782
+ node {
783
+ input: "onnx::Relu_224"
784
+ output: "onnx::Conv_138"
785
+ name: "Relu_42"
786
+ op_type: "Relu"
787
+ }
788
+ node {
789
+ input: "onnx::Conv_138"
790
+ input: "onnx::Conv_228"
791
+ input: "onnx::Conv_229"
792
+ output: "onnx::Relu_227"
793
+ name: "Conv_43"
794
+ op_type: "Conv"
795
+ attribute {
796
+ name: "dilations"
797
+ ints: 1
798
+ ints: 1
799
+ type: INTS
800
+ }
801
+ attribute {
802
+ name: "group"
803
+ i: 1
804
+ type: INT
805
+ }
806
+ attribute {
807
+ name: "kernel_shape"
808
+ ints: 3
809
+ ints: 3
810
+ type: INTS
811
+ }
812
+ attribute {
813
+ name: "pads"
814
+ ints: 1
815
+ ints: 1
816
+ ints: 1
817
+ ints: 1
818
+ type: INTS
819
+ }
820
+ attribute {
821
+ name: "strides"
822
+ ints: 1
823
+ ints: 1
824
+ type: INTS
825
+ }
826
+ }
827
+ node {
828
+ input: "onnx::Relu_227"
829
+ output: "onnx::Pad_141"
830
+ name: "Relu_44"
831
+ op_type: "Relu"
832
+ }
833
+ node {
834
+ input: "onnx::Pad_141"
835
+ output: "onnx::AveragePool_142"
836
+ name: "Pad_45"
837
+ op_type: "Pad"
838
+ attribute {
839
+ name: "mode"
840
+ s: "constant"
841
+ type: STRING
842
+ }
843
+ attribute {
844
+ name: "pads"
845
+ ints: 0
846
+ ints: 0
847
+ ints: 0
848
+ ints: 0
849
+ ints: 0
850
+ ints: 0
851
+ ints: 0
852
+ ints: 0
853
+ type: INTS
854
+ }
855
+ attribute {
856
+ name: "value"
857
+ f: 0.0
858
+ type: FLOAT
859
+ }
860
+ }
861
+ node {
862
+ input: "onnx::AveragePool_142"
863
+ output: "x.11"
864
+ name: "AveragePool_46"
865
+ op_type: "AveragePool"
866
+ attribute {
867
+ name: "kernel_shape"
868
+ ints: 2
869
+ ints: 2
870
+ type: INTS
871
+ }
872
+ attribute {
873
+ name: "pads"
874
+ ints: 0
875
+ ints: 0
876
+ ints: 0
877
+ ints: 0
878
+ type: INTS
879
+ }
880
+ attribute {
881
+ name: "strides"
882
+ ints: 2
883
+ ints: 2
884
+ type: INTS
885
+ }
886
+ }
887
+ node {
888
+ input: "x.11"
889
+ input: "onnx::Conv_231"
890
+ input: "onnx::Conv_232"
891
+ output: "onnx::Relu_230"
892
+ name: "Conv_47"
893
+ op_type: "Conv"
894
+ attribute {
895
+ name: "dilations"
896
+ ints: 1
897
+ ints: 1
898
+ type: INTS
899
+ }
900
+ attribute {
901
+ name: "group"
902
+ i: 1
903
+ type: INT
904
+ }
905
+ attribute {
906
+ name: "kernel_shape"
907
+ ints: 3
908
+ ints: 3
909
+ type: INTS
910
+ }
911
+ attribute {
912
+ name: "pads"
913
+ ints: 1
914
+ ints: 1
915
+ ints: 1
916
+ ints: 1
917
+ type: INTS
918
+ }
919
+ attribute {
920
+ name: "strides"
921
+ ints: 1
922
+ ints: 1
923
+ type: INTS
924
+ }
925
+ }
926
+ node {
927
+ input: "onnx::Relu_230"
928
+ output: "onnx::Conv_146"
929
+ name: "Relu_48"
930
+ op_type: "Relu"
931
+ }
932
+ node {
933
+ input: "onnx::Conv_146"
934
+ input: "onnx::Conv_234"
935
+ input: "onnx::Conv_235"
936
+ output: "onnx::Relu_233"
937
+ name: "Conv_49"
938
+ op_type: "Conv"
939
+ attribute {
940
+ name: "dilations"
941
+ ints: 1
942
+ ints: 1
943
+ type: INTS
944
+ }
945
+ attribute {
946
+ name: "group"
947
+ i: 1
948
+ type: INT
949
+ }
950
+ attribute {
951
+ name: "kernel_shape"
952
+ ints: 3
953
+ ints: 3
954
+ type: INTS
955
+ }
956
+ attribute {
957
+ name: "pads"
958
+ ints: 1
959
+ ints: 1
960
+ ints: 1
961
+ ints: 1
962
+ type: INTS
963
+ }
964
+ attribute {
965
+ name: "strides"
966
+ ints: 1
967
+ ints: 1
968
+ type: INTS
969
+ }
970
+ }
971
+ node {
972
+ input: "onnx::Relu_233"
973
+ output: "onnx::Pad_149"
974
+ name: "Relu_50"
975
+ op_type: "Relu"
976
+ }
977
+ node {
978
+ input: "onnx::Pad_149"
979
+ output: "onnx::AveragePool_150"
980
+ name: "Pad_51"
981
+ op_type: "Pad"
982
+ attribute {
983
+ name: "mode"
984
+ s: "constant"
985
+ type: STRING
986
+ }
987
+ attribute {
988
+ name: "pads"
989
+ ints: 0
990
+ ints: 0
991
+ ints: 0
992
+ ints: 0
993
+ ints: 0
994
+ ints: 0
995
+ ints: 0
996
+ ints: 0
997
+ type: INTS
998
+ }
999
+ attribute {
1000
+ name: "value"
1001
+ f: 0.0
1002
+ type: FLOAT
1003
+ }
1004
+ }
1005
+ node {
1006
+ input: "onnx::AveragePool_150"
1007
+ output: "x.15"
1008
+ name: "AveragePool_52"
1009
+ op_type: "AveragePool"
1010
+ attribute {
1011
+ name: "kernel_shape"
1012
+ ints: 2
1013
+ ints: 2
1014
+ type: INTS
1015
+ }
1016
+ attribute {
1017
+ name: "pads"
1018
+ ints: 0
1019
+ ints: 0
1020
+ ints: 0
1021
+ ints: 0
1022
+ type: INTS
1023
+ }
1024
+ attribute {
1025
+ name: "strides"
1026
+ ints: 2
1027
+ ints: 2
1028
+ type: INTS
1029
+ }
1030
+ }
1031
+ node {
1032
+ input: "x.15"
1033
+ input: "onnx::Conv_237"
1034
+ input: "onnx::Conv_238"
1035
+ output: "onnx::Relu_236"
1036
+ name: "Conv_53"
1037
+ op_type: "Conv"
1038
+ attribute {
1039
+ name: "dilations"
1040
+ ints: 1
1041
+ ints: 1
1042
+ type: INTS
1043
+ }
1044
+ attribute {
1045
+ name: "group"
1046
+ i: 1
1047
+ type: INT
1048
+ }
1049
+ attribute {
1050
+ name: "kernel_shape"
1051
+ ints: 3
1052
+ ints: 3
1053
+ type: INTS
1054
+ }
1055
+ attribute {
1056
+ name: "pads"
1057
+ ints: 1
1058
+ ints: 1
1059
+ ints: 1
1060
+ ints: 1
1061
+ type: INTS
1062
+ }
1063
+ attribute {
1064
+ name: "strides"
1065
+ ints: 1
1066
+ ints: 1
1067
+ type: INTS
1068
+ }
1069
+ }
1070
+ node {
1071
+ input: "onnx::Relu_236"
1072
+ output: "onnx::Conv_154"
1073
+ name: "Relu_54"
1074
+ op_type: "Relu"
1075
+ }
1076
+ node {
1077
+ input: "onnx::Conv_154"
1078
+ input: "onnx::Conv_240"
1079
+ input: "onnx::Conv_241"
1080
+ output: "onnx::Relu_239"
1081
+ name: "Conv_55"
1082
+ op_type: "Conv"
1083
+ attribute {
1084
+ name: "dilations"
1085
+ ints: 1
1086
+ ints: 1
1087
+ type: INTS
1088
+ }
1089
+ attribute {
1090
+ name: "group"
1091
+ i: 1
1092
+ type: INT
1093
+ }
1094
+ attribute {
1095
+ name: "kernel_shape"
1096
+ ints: 3
1097
+ ints: 3
1098
+ type: INTS
1099
+ }
1100
+ attribute {
1101
+ name: "pads"
1102
+ ints: 1
1103
+ ints: 1
1104
+ ints: 1
1105
+ ints: 1
1106
+ type: INTS
1107
+ }
1108
+ attribute {
1109
+ name: "strides"
1110
+ ints: 1
1111
+ ints: 1
1112
+ type: INTS
1113
+ }
1114
+ }
1115
+ node {
1116
+ input: "onnx::Relu_239"
1117
+ output: "onnx::Pad_157"
1118
+ name: "Relu_56"
1119
+ op_type: "Relu"
1120
+ }
1121
+ node {
1122
+ input: "onnx::Pad_157"
1123
+ output: "onnx::AveragePool_158"
1124
+ name: "Pad_57"
1125
+ op_type: "Pad"
1126
+ attribute {
1127
+ name: "mode"
1128
+ s: "constant"
1129
+ type: STRING
1130
+ }
1131
+ attribute {
1132
+ name: "pads"
1133
+ ints: 0
1134
+ ints: 0
1135
+ ints: 0
1136
+ ints: 0
1137
+ ints: 0
1138
+ ints: 0
1139
+ ints: 0
1140
+ ints: 0
1141
+ type: INTS
1142
+ }
1143
+ attribute {
1144
+ name: "value"
1145
+ f: 0.0
1146
+ type: FLOAT
1147
+ }
1148
+ }
1149
+ node {
1150
+ input: "onnx::AveragePool_158"
1151
+ output: "x.19"
1152
+ name: "AveragePool_58"
1153
+ op_type: "AveragePool"
1154
+ attribute {
1155
+ name: "kernel_shape"
1156
+ ints: 1
1157
+ ints: 1
1158
+ type: INTS
1159
+ }
1160
+ attribute {
1161
+ name: "pads"
1162
+ ints: 0
1163
+ ints: 0
1164
+ ints: 0
1165
+ ints: 0
1166
+ type: INTS
1167
+ }
1168
+ attribute {
1169
+ name: "strides"
1170
+ ints: 1
1171
+ ints: 1
1172
+ type: INTS
1173
+ }
1174
+ }
1175
+ node {
1176
+ input: "x.19"
1177
+ output: "onnx::MaxPool_160"
1178
+ name: "ReduceMean_59"
1179
+ op_type: "ReduceMean"
1180
+ attribute {
1181
+ name: "axes"
1182
+ ints: 3
1183
+ type: INTS
1184
+ }
1185
+ attribute {
1186
+ name: "keepdims"
1187
+ i: 0
1188
+ type: INT
1189
+ }
1190
+ }
1191
+ node {
1192
+ input: "onnx::MaxPool_160"
1193
+ output: "onnx::Add_161"
1194
+ name: "MaxPool_60"
1195
+ op_type: "MaxPool"
1196
+ attribute {
1197
+ name: "kernel_shape"
1198
+ ints: 3
1199
+ type: INTS
1200
+ }
1201
+ attribute {
1202
+ name: "pads"
1203
+ ints: 1
1204
+ ints: 1
1205
+ type: INTS
1206
+ }
1207
+ attribute {
1208
+ name: "strides"
1209
+ ints: 1
1210
+ type: INTS
1211
+ }
1212
+ }
1213
+ node {
1214
+ input: "onnx::MaxPool_160"
1215
+ output: "onnx::AveragePool_162"
1216
+ name: "Pad_61"
1217
+ op_type: "Pad"
1218
+ attribute {
1219
+ name: "mode"
1220
+ s: "constant"
1221
+ type: STRING
1222
+ }
1223
+ attribute {
1224
+ name: "pads"
1225
+ ints: 0
1226
+ ints: 0
1227
+ ints: 1
1228
+ ints: 0
1229
+ ints: 0
1230
+ ints: 1
1231
+ type: INTS
1232
+ }
1233
+ attribute {
1234
+ name: "value"
1235
+ f: 0.0
1236
+ type: FLOAT
1237
+ }
1238
+ }
1239
+ node {
1240
+ input: "onnx::AveragePool_162"
1241
+ output: "onnx::Add_163"
1242
+ name: "AveragePool_62"
1243
+ op_type: "AveragePool"
1244
+ attribute {
1245
+ name: "kernel_shape"
1246
+ ints: 3
1247
+ type: INTS
1248
+ }
1249
+ attribute {
1250
+ name: "pads"
1251
+ ints: 0
1252
+ ints: 0
1253
+ type: INTS
1254
+ }
1255
+ attribute {
1256
+ name: "strides"
1257
+ ints: 1
1258
+ type: INTS
1259
+ }
1260
+ }
1261
+ node {
1262
+ input: "onnx::Add_161"
1263
+ input: "onnx::Add_163"
1264
+ output: "x.23"
1265
+ name: "Add_63"
1266
+ op_type: "Add"
1267
+ }
1268
+ node {
1269
+ input: "x.23"
1270
+ output: "onnx::MatMul_165"
1271
+ name: "Transpose_64"
1272
+ op_type: "Transpose"
1273
+ attribute {
1274
+ name: "perm"
1275
+ ints: 0
1276
+ ints: 2
1277
+ ints: 1
1278
+ type: INTS
1279
+ }
1280
+ }
1281
+ node {
1282
+ input: "onnx::MatMul_165"
1283
+ input: "onnx::MatMul_244"
1284
+ output: "onnx::Add_167"
1285
+ name: "MatMul_65"
1286
+ op_type: "MatMul"
1287
+ }
1288
+ node {
1289
+ input: "fc1.bias"
1290
+ input: "onnx::Add_167"
1291
+ output: "onnx::Relu_168"
1292
+ name: "Add_66"
1293
+ op_type: "Add"
1294
+ }
1295
+ node {
1296
+ input: "onnx::Relu_168"
1297
+ output: "onnx::MatMul_169"
1298
+ name: "Relu_67"
1299
+ op_type: "Relu"
1300
+ }
1301
+ node {
1302
+ input: "onnx::MatMul_169"
1303
+ input: "onnx::MatMul_245"
1304
+ output: "onnx::Add_171"
1305
+ name: "MatMul_68"
1306
+ op_type: "MatMul"
1307
+ }
1308
+ node {
1309
+ input: "fc_audioset.bias"
1310
+ input: "onnx::Add_171"
1311
+ output: "onnx::Sigmoid_172"
1312
+ name: "Add_69"
1313
+ op_type: "Add"
1314
+ }
1315
+ node {
1316
+ input: "onnx::Sigmoid_172"
1317
+ output: "segmentwise_output"
1318
+ name: "Sigmoid_70"
1319
+ op_type: "Sigmoid"
1320
+ }
1321
+ node {
1322
+ input: "segmentwise_output"
1323
+ output: "174"
1324
+ name: "ReduceMax_71"
1325
+ op_type: "ReduceMax"
1326
+ attribute {
1327
+ name: "axes"
1328
+ ints: 1
1329
+ type: INTS
1330
+ }
1331
+ attribute {
1332
+ name: "keepdims"
1333
+ i: 0
1334
+ type: INT
1335
+ }
1336
+ }
1337
+ node {
1338
+ input: "segmentwise_output"
1339
+ output: "onnx::Expand_177"
1340
+ name: "Unsqueeze_72"
1341
+ op_type: "Unsqueeze"
1342
+ attribute {
1343
+ name: "axes"
1344
+ ints: 2
1345
+ type: INTS
1346
+ }
1347
+ }
1348
+ node {
1349
+ output: "onnx::Tile_178"
1350
+ name: "Constant_73"
1351
+ op_type: "Constant"
1352
+ attribute {
1353
+ name: "value"
1354
+ t {
1355
+ dims: 4
1356
+ data_type: 7
1357
+ }
1358
+ type: TENSOR
1359
+ }
1360
+ }
1361
+ node {
1362
+ input: "onnx::ConstantOfShape_246"
1363
+ output: "onnx::Expand_180"
1364
+ name: "ConstantOfShape_74"
1365
+ op_type: "ConstantOfShape"
1366
+ attribute {
1367
+ name: "value"
1368
+ t {
1369
+ dims: 1
1370
+ data_type: 7
1371
+ raw_data: "\001\000\000\000\000\000\000\000"
1372
+ }
1373
+ type: TENSOR
1374
+ }
1375
+ }
1376
+ node {
1377
+ input: "onnx::Expand_177"
1378
+ input: "onnx::Expand_180"
1379
+ output: "onnx::Tile_181"
1380
+ name: "Expand_75"
1381
+ op_type: "Expand"
1382
+ }
1383
+ node {
1384
+ input: "onnx::Tile_181"
1385
+ input: "onnx::Tile_178"
1386
+ output: "onnx::Reshape_182"
1387
+ name: "Tile_76"
1388
+ op_type: "Tile"
1389
+ }
1390
+ node {
1391
+ input: "onnx::Reshape_182"
1392
+ input: "onnx::Reshape_250"
1393
+ output: "framewise_output"
1394
+ name: "Reshape_77"
1395
+ op_type: "Reshape"
1396
+ }
1397
+ node {
1398
+ input: "framewise_output"
1399
+ output: "onnx::Expand_189"
1400
+ name: "Slice_78"
1401
+ op_type: "Slice"
1402
+ attribute {
1403
+ name: "axes"
1404
+ ints: 1
1405
+ type: INTS
1406
+ }
1407
+ attribute {
1408
+ name: "ends"
1409
+ ints: 9223372036854775807
1410
+ type: INTS
1411
+ }
1412
+ attribute {
1413
+ name: "starts"
1414
+ ints: -1
1415
+ type: INTS
1416
+ }
1417
+ }
1418
+ node {
1419
+ input: "onnx::ConstantOfShape_259"
1420
+ output: "onnx::Expand_202"
1421
+ name: "ConstantOfShape_79"
1422
+ op_type: "ConstantOfShape"
1423
+ attribute {
1424
+ name: "value"
1425
+ t {
1426
+ dims: 1
1427
+ data_type: 7
1428
+ raw_data: "\001\000\000\000\000\000\000\000"
1429
+ }
1430
+ type: TENSOR
1431
+ }
1432
+ }
1433
+ node {
1434
+ input: "onnx::Expand_189"
1435
+ input: "onnx::Expand_202"
1436
+ output: "onnx::Tile_203"
1437
+ name: "Expand_80"
1438
+ op_type: "Expand"
1439
+ }
1440
+ node {
1441
+ input: "onnx::Tile_203"
1442
+ input: "onnx::Tile_258"
1443
+ output: "onnx::Concat_204"
1444
+ name: "Tile_81"
1445
+ op_type: "Tile"
1446
+ }
1447
+ node {
1448
+ input: "framewise_output"
1449
+ input: "onnx::Concat_204"
1450
+ output: "205"
1451
+ name: "Concat_82"
1452
+ op_type: "Concat"
1453
+ attribute {
1454
+ name: "axis"
1455
+ i: 1
1456
+ type: INT
1457
+ }
1458
+ }
1459
+ initializer {
1460
+ dims: 513
1461
+ dims: 1
1462
+ dims: 1024
1463
+ data_type: 1
1464
+ name: "spectrogram_extractor.stft.conv_real.weight"
1465
+ }
1466
+ initializer {
1467
+ dims: 513
1468
+ dims: 1
1469
+ dims: 1024
1470
+ data_type: 1
1471
+ name: "spectrogram_extractor.stft.conv_imag.weight"
1472
+ }
1473
+ initializer {
1474
+ dims: 513
1475
+ dims: 64
1476
+ data_type: 1
1477
+ name: "logmel_extractor.melW"
1478
+ }
1479
+ initializer {
1480
+ dims: 64
1481
+ data_type: 1
1482
+ name: "bn0.weight"
1483
+ }
1484
+ initializer {
1485
+ dims: 64
1486
+ data_type: 1
1487
+ name: "bn0.bias"
1488
+ }
1489
+ initializer {
1490
+ dims: 64
1491
+ data_type: 1
1492
+ name: "bn0.running_mean"
1493
+ }
1494
+ initializer {
1495
+ dims: 64
1496
+ data_type: 1
1497
+ name: "bn0.running_var"
1498
+ }
1499
+ initializer {
1500
+ dims: 2048
1501
+ data_type: 1
1502
+ name: "fc1.bias"
1503
+ }
1504
+ initializer {
1505
+ dims: 527
1506
+ data_type: 1
1507
+ name: "fc_audioset.bias"
1508
+ }
1509
+ initializer {
1510
+ dims: 64
1511
+ dims: 1
1512
+ dims: 3
1513
+ dims: 3
1514
+ data_type: 1
1515
+ name: "onnx::Conv_207"
1516
+ }
1517
+ initializer {
1518
+ dims: 64
1519
+ data_type: 1
1520
+ name: "onnx::Conv_208"
1521
+ }
1522
+ initializer {
1523
+ dims: 64
1524
+ dims: 64
1525
+ dims: 3
1526
+ dims: 3
1527
+ data_type: 1
1528
+ name: "onnx::Conv_210"
1529
+ }
1530
+ initializer {
1531
+ dims: 64
1532
+ data_type: 1
1533
+ name: "onnx::Conv_211"
1534
+ }
1535
+ initializer {
1536
+ dims: 128
1537
+ dims: 64
1538
+ dims: 3
1539
+ dims: 3
1540
+ data_type: 1
1541
+ name: "onnx::Conv_213"
1542
+ }
1543
+ initializer {
1544
+ dims: 128
1545
+ data_type: 1
1546
+ name: "onnx::Conv_214"
1547
+ }
1548
+ initializer {
1549
+ dims: 128
1550
+ dims: 128
1551
+ dims: 3
1552
+ dims: 3
1553
+ data_type: 1
1554
+ name: "onnx::Conv_216"
1555
+ }
1556
+ initializer {
1557
+ dims: 128
1558
+ data_type: 1
1559
+ name: "onnx::Conv_217"
1560
+ }
1561
+ initializer {
1562
+ dims: 256
1563
+ dims: 128
1564
+ dims: 3
1565
+ dims: 3
1566
+ data_type: 1
1567
+ name: "onnx::Conv_219"
1568
+ }
1569
+ initializer {
1570
+ dims: 256
1571
+ data_type: 1
1572
+ name: "onnx::Conv_220"
1573
+ }
1574
+ initializer {
1575
+ dims: 256
1576
+ dims: 256
1577
+ dims: 3
1578
+ dims: 3
1579
+ data_type: 1
1580
+ name: "onnx::Conv_222"
1581
+ }
1582
+ initializer {
1583
+ dims: 256
1584
+ data_type: 1
1585
+ name: "onnx::Conv_223"
1586
+ }
1587
+ initializer {
1588
+ dims: 512
1589
+ dims: 256
1590
+ dims: 3
1591
+ dims: 3
1592
+ data_type: 1
1593
+ name: "onnx::Conv_225"
1594
+ }
1595
+ initializer {
1596
+ dims: 512
1597
+ data_type: 1
1598
+ name: "onnx::Conv_226"
1599
+ }
1600
+ initializer {
1601
+ dims: 512
1602
+ dims: 512
1603
+ dims: 3
1604
+ dims: 3
1605
+ data_type: 1
1606
+ name: "onnx::Conv_228"
1607
+ }
1608
+ initializer {
1609
+ dims: 512
1610
+ data_type: 1
1611
+ name: "onnx::Conv_229"
1612
+ }
1613
+ initializer {
1614
+ dims: 1024
1615
+ dims: 512
1616
+ dims: 3
1617
+ dims: 3
1618
+ data_type: 1
1619
+ name: "onnx::Conv_231"
1620
+ }
1621
+ initializer {
1622
+ dims: 1024
1623
+ data_type: 1
1624
+ name: "onnx::Conv_232"
1625
+ }
1626
+ initializer {
1627
+ dims: 1024
1628
+ dims: 1024
1629
+ dims: 3
1630
+ dims: 3
1631
+ data_type: 1
1632
+ name: "onnx::Conv_234"
1633
+ }
1634
+ initializer {
1635
+ dims: 1024
1636
+ data_type: 1
1637
+ name: "onnx::Conv_235"
1638
+ }
1639
+ initializer {
1640
+ dims: 2048
1641
+ dims: 1024
1642
+ dims: 3
1643
+ dims: 3
1644
+ data_type: 1
1645
+ name: "onnx::Conv_237"
1646
+ }
1647
+ initializer {
1648
+ dims: 2048
1649
+ data_type: 1
1650
+ name: "onnx::Conv_238"
1651
+ }
1652
+ initializer {
1653
+ dims: 2048
1654
+ dims: 2048
1655
+ dims: 3
1656
+ dims: 3
1657
+ data_type: 1
1658
+ name: "onnx::Conv_240"
1659
+ }
1660
+ initializer {
1661
+ dims: 2048
1662
+ data_type: 1
1663
+ name: "onnx::Conv_241"
1664
+ }
1665
+ initializer {
1666
+ data_type: 1
1667
+ name: "onnx::Pow_242"
1668
+ }
1669
+ initializer {
1670
+ data_type: 1
1671
+ name: "onnx::Pow_243"
1672
+ }
1673
+ initializer {
1674
+ dims: 2048
1675
+ dims: 2048
1676
+ data_type: 1
1677
+ name: "onnx::MatMul_244"
1678
+ }
1679
+ initializer {
1680
+ dims: 2048
1681
+ dims: 527
1682
+ data_type: 1
1683
+ name: "onnx::MatMul_245"
1684
+ }
1685
+ initializer {
1686
+ dims: 1
1687
+ data_type: 7
1688
+ name: "onnx::ConstantOfShape_246"
1689
+ }
1690
+ initializer {
1691
+ dims: 3
1692
+ data_type: 7
1693
+ name: "onnx::Reshape_250"
1694
+ }
1695
+ initializer {
1696
+ dims: 3
1697
+ data_type: 7
1698
+ name: "onnx::Tile_258"
1699
+ }
1700
+ initializer {
1701
+ dims: 1
1702
+ data_type: 7
1703
+ name: "onnx::ConstantOfShape_259"
1704
+ }
1705
+ input {
1706
+ name: "input.1"
1707
+ type {
1708
+ tensor_type {
1709
+ elem_type: 1
1710
+ shape {
1711
+ dim {
1712
+ dim_value: 1
1713
+ }
1714
+ dim {
1715
+ dim_value: 224000
1716
+ }
1717
+ }
1718
+ }
1719
+ }
1720
+ }
1721
+ output {
1722
+ name: "205"
1723
+ type {
1724
+ tensor_type {
1725
+ elem_type: 1
1726
+ shape {
1727
+ dim {
1728
+ dim_value: 1
1729
+ }
1730
+ dim {
1731
+ dim_value: 701
1732
+ }
1733
+ dim {
1734
+ dim_value: 527
1735
+ }
1736
+ }
1737
+ }
1738
+ }
1739
+ }
1740
+ output {
1741
+ name: "174"
1742
+ type {
1743
+ tensor_type {
1744
+ elem_type: 1
1745
+ shape {
1746
+ dim {
1747
+ dim_value: 1
1748
+ }
1749
+ dim {
1750
+ dim_value: 527
1751
+ }
1752
+ }
1753
+ }
1754
+ }
1755
+ }
1756
+ }
1757
+ opset_import {
1758
+ version: 9
1759
+ }
models/onnx/ailia-models/source.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ https://github.com/axinc-ai/ailia-models/tree/master/audio_processing/audioset_tagging_cnn
2
+
3
+ https://storage.googleapis.com/ailia-models/audioset_tagging_cnn/audio_tagging.onnx
4
+ https://storage.googleapis.com/ailia-models/audioset_tagging_cnn/audio_tagging.onnx.prototxt
5
+
6
+ https://storage.googleapis.com/ailia-models/audioset_tagging_cnn/sound_event_detection.onnx
7
+ https://storage.googleapis.com/ailia-models/audioset_tagging_cnn/sound_event_detection.onnx.prototxt