{ "att_dropout": 0.0, "att_heads": 16, "embed_dim": 1024, "ff_dim": 3072, "ff_dropout": 0.0, "head_dim": 128, "kv_heads": 8, "layer_types": [ "stateless", "stateful", "stateless", "stateful", "stateless", "stateful", "stateless", "stateful", "stateless", "stateful", "stateless", "stateful", "stateless", "stateful", "stateless", "stateful", "stateless", "stateful", "stateless", "stateful", "stateless", "stateful", "stateless", "stateful", "stateless", "stateful", "stateless", "stateful" ], "memory_gate_type": "linear", "num_layers": 28, "padding_idx": 151669, "rope_base": 1000000, "seq_len": 8192, "skip_stm": false, "stm_batch_size": 1, "stm_size": 4096, "tie_embeddings": true, "training_cache": true, "use_flash_attention": false, "use_memory_gate": true, "use_separate_memory_projections": true, "vocab_size": 151936 }