configuration_vortex.py · Matrix-Corp/Vortex-7b-V1 at main

Vortex-7b-V1 / configuration_vortex.py

Upload Vortex model

bf64b03 verified 14 days ago

4.23 kB

	"""
	Vortex configuration for HuggingFace.
	"""

	from typing import Optional, List, Dict, Any
	from transformers import PretrainedConfig


	class VortexConfig(PretrainedConfig):
	"""
	Configuration class for Vortex model.
	Compatible with HuggingFace transformers.
	"""

	model_type = "vortex"
	tie_word_embeddings = True

	def __init__(
	self,
	d_model: int = 4096,
	num_layers: int = 32,
	num_heads: int = 32,
	d_state: int = 16,
	d_conv: int = 4,
	window_size: int = 512,
	ffn_expansion: int = 4,
	num_domains: int = 7,
	vocab_size: int = 50000,
	max_seq_len: int = 16384,
	ssm_ratio: float = 0.6,
	enable_equation_module: bool = True,
	enable_numerical_module: bool = True,
	enable_citation_module: bool = True,
	enable_molecular_module: bool = True,
	special_tokens: Optional[Dict[str, int]] = None,
	domain_tags: Optional[List[str]] = None,
	initializer_range: float = 0.02,
	tie_word_embeddings: bool = True,
	**kwargs
	):
	super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
	self.d_model = d_model
	self.num_layers = num_layers
	self.num_heads = num_heads
	self.d_state = d_state
	self.d_conv = d_conv
	self.window_size = window_size
	self.ffn_expansion = ffn_expansion
	self.num_domains = num_domains
	self.vocab_size = vocab_size
	self.max_seq_len = max_seq_len
	self.ssm_ratio = ssm_ratio
	self.enable_equation_module = enable_equation_module
	self.enable_numerical_module = enable_numerical_module
	self.enable_citation_module = enable_citation_module
	self.enable_molecular_module = enable_molecular_module
	self.special_tokens = special_tokens or {
	"[PAD]": 0, "[UNK]": 1, "[BOS]": 2, "[EOS]": 3,
	"[EQUATION]": 4, "[/EQUATION]": 5,
	"[CITATION]": 6, "[/CITATION]": 7,
	"[MOLECULE]": 8, "[/MOLECULE]": 9,
	"[FIGURE]": 10, "[TABLE]": 11,
	"[MATH]": 12, "[CHEM]": 13, "[BIO]": 14,
	"[PHYS]": 15, "[EARTH]": 16, "[SPACE]": 17, "[ZOO]": 18,
	}
	self.domain_tags = domain_tags or ["[MATH]", "[CHEM]", "[BIO]", "[PHYS]", "[EARTH]", "[SPACE]", "[ZOO]"]
	self.initializer_range = initializer_range
	# Compute derived attributes
	self.head_dim = self.d_model // self.num_heads

	@classmethod
	def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
	"""Load config from pretrained model."""
	import json
	import os

	config_path = os.path.join(pretrained_model_name_or_path, "config.json")
	if os.path.exists(config_path):
	with open(config_path, "r") as f:
	config_dict = json.load(f)
	config_dict.update(kwargs)
	return cls(**config_dict)
	else:
	# Return default config
	return cls(**kwargs)

	def to_dict(self) -> Dict[str, Any]:
	"""Convert to dictionary."""
	return {
	"model_type": self.model_type,
	"d_model": self.d_model,
	"num_layers": self.num_layers,
	"num_heads": self.num_heads,
	"head_dim": self.head_dim,
	"d_state": self.d_state,
	"d_conv": self.d_conv,
	"window_size": self.window_size,
	"ffn_expansion": self.ffn_expansion,
	"num_domains": self.num_domains,
	"vocab_size": self.vocab_size,
	"max_seq_len": self.max_seq_len,
	"ssm_ratio": self.ssm_ratio,
	"enable_equation_module": self.enable_equation_module,
	"enable_numerical_module": self.enable_numerical_module,
	"enable_citation_module": self.enable_citation_module,
	"enable_molecular_module": self.enable_molecular_module,
	"special_tokens": self.special_tokens,
	"domain_tags": self.domain_tags,
	"initializer_range": self.initializer_range,
	}