Audio-to-Audio
hibiki
File size: 1,675 Bytes
41a859b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
{
  "card": 2048,
  "n_q": 32,
  "dep_q": 16,
  "delays": [
    0,
    0,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    0,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2
  ],
  "dim": 2048,
  "text_card": 48000,
  "existing_text_padding_id": 3,
  "num_heads": 16,
  "num_layers": 28,
  "hidden_scale": 6,
  "causal": true,
  "layer_scale": null,
  "context": 3000,
  "max_period": 20000.0,
  "gating": "silu",
  "norm": "rms_norm_f32",
  "positional_embedding": "rope_concat",
  "depformer_dim": 1024,
  "depformer_num_heads": 16,
  "depformer_num_layers": 6,
  "depformer_dim_feedforward": null,
  "depformer_multi_linear": true,
  "depformer_norm": "layer_norm",
  "depformer_pos_emb": "none",
  "depformer_weights_per_step": true,
  "demux_second_stream": false,
  "kv_repeat": 2,
  "depformer_kv_repeat": 1,
  "text_card_out": null,
  "conditioners": {},
  "fuser": {
    "cross_attention_pos_emb": false,
    "cross_attention_pos_emb_scale": 1,
    "sum": [],
    "prepend": [],
    "cross": []
  },
  "cross_attention": false,
  "model_id": {
    "sig": "77f82164",
    "epoch": 110
  },
  "depformer_weights_per_step_schedule": [
    0,
    1,
    2,
    3,
    4,
    5,
    6,
    7,
    8,
    8,
    8,
    8,
    8,
    8,
    8,
    8
  ],
  "model_type": "hibiki",
  "lm_gen_config": {
    "temp": 0.8,
    "temp_text": 0.8,
    "top_k": 250,
    "top_k_text": 250
  },
  "mimi_name": "mimi-pytorch-e351c8d8@125.safetensors",
  "tokenizer_name": "tokenizer_spm_48k_multi6_2.model",
  "moshi_name": "hibiki-pytorch-77f82164@110.safetensors"
}