"""Create a tiny random Glm4Moe model for testing optimum-intel export."""
import torch
from transformers import AutoTokenizer
from transformers.models.glm4_moe.modeling_glm4_moe import Glm4MoeForCausalLM, Glm4MoeConfig
def create_tiny_glm4_moe():
config = Glm4MoeConfig(
vocab_size=1000,
hidden_size=64,
intermediate_size=128,
num_hidden_layers=2,
num_attention_heads=4,
num_key_value_heads=4,
hidden_act="silu",
max_position_embeddings=256,
rms_norm_eps=1e-5,
n_routed_experts=4,
n_shared_experts=1,
num_experts_per_tok=2,
moe_intermediate_size=32,
first_k_dense_replace=1,
n_group=1,
topk_group=1,
norm_topk_prob=True,
routed_scaling_factor=1.8,
topk_method="noaux_tc",
rope_theta=10000,
tie_word_embeddings=False,
)
model = Glm4MoeForCausalLM(config)
model.eval()
# Verify model works
input_ids = torch.randint(0, 1000, (1, 10))
with torch.no_grad():
outputs = model(input_ids)
print(f"Model output shape: {outputs.logits.shape}")
print(f"Num parameters: {sum(p.numel() for p in model.parameters()):,}")
# Save model
output_dir = "tiny-random-glm4-moe"
model.save_pretrained(output_dir)
# Create and save a simple tokenizer
from transformers import PreTrainedTokenizerFast
from tokenizers import Tokenizer, models, pre_tokenizers
tokenizer_model = models.WordPiece(
vocab={f"token_{i}": i for i in range(1000)},
unk_token="token_0",
)
base_tokenizer = Tokenizer(tokenizer_model)
base_tokenizer.pre_tokenizer = pre_tokenizers.Whitespace()
tokenizer = PreTrainedTokenizerFast(
tokenizer_object=base_tokenizer,
unk_token="token_0",
pad_token="token_0",
eos_token="token_1",
bos_token="token_2",
)
tokenizer.save_pretrained(output_dir)
print(f"Model saved to {output_dir}")
return model, config
if __name__ == "__main__":
create_tiny_glm4_moe()
- Downloads last month
- 52
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
🙋
Ask for provider support