| """
|
| Vortex configuration for HuggingFace.
|
| """
|
|
|
| from typing import Optional, List, Dict, Any
|
| from transformers import PretrainedConfig
|
|
|
|
|
| class VortexConfig(PretrainedConfig):
|
| """
|
| Configuration class for Vortex model.
|
| Compatible with HuggingFace transformers.
|
| """
|
|
|
| model_type = "vortex"
|
| tie_word_embeddings = True
|
|
|
| def __init__(
|
| self,
|
| d_model: int = 4096,
|
| num_layers: int = 32,
|
| num_heads: int = 32,
|
| d_state: int = 16,
|
| d_conv: int = 4,
|
| window_size: int = 512,
|
| ffn_expansion: int = 4,
|
| num_domains: int = 7,
|
| vocab_size: int = 50000,
|
| max_seq_len: int = 16384,
|
| ssm_ratio: float = 0.6,
|
| enable_equation_module: bool = True,
|
| enable_numerical_module: bool = True,
|
| enable_citation_module: bool = True,
|
| enable_molecular_module: bool = True,
|
| special_tokens: Optional[Dict[str, int]] = None,
|
| domain_tags: Optional[List[str]] = None,
|
| initializer_range: float = 0.02,
|
| tie_word_embeddings: bool = True,
|
| **kwargs
|
| ):
|
| super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
|
| self.d_model = d_model
|
| self.num_layers = num_layers
|
| self.num_heads = num_heads
|
| self.d_state = d_state
|
| self.d_conv = d_conv
|
| self.window_size = window_size
|
| self.ffn_expansion = ffn_expansion
|
| self.num_domains = num_domains
|
| self.vocab_size = vocab_size
|
| self.max_seq_len = max_seq_len
|
| self.ssm_ratio = ssm_ratio
|
| self.enable_equation_module = enable_equation_module
|
| self.enable_numerical_module = enable_numerical_module
|
| self.enable_citation_module = enable_citation_module
|
| self.enable_molecular_module = enable_molecular_module
|
| self.special_tokens = special_tokens or {
|
| "[PAD]": 0, "[UNK]": 1, "[BOS]": 2, "[EOS]": 3,
|
| "[EQUATION]": 4, "[/EQUATION]": 5,
|
| "[CITATION]": 6, "[/CITATION]": 7,
|
| "[MOLECULE]": 8, "[/MOLECULE]": 9,
|
| "[FIGURE]": 10, "[TABLE]": 11,
|
| "[MATH]": 12, "[CHEM]": 13, "[BIO]": 14,
|
| "[PHYS]": 15, "[EARTH]": 16, "[SPACE]": 17, "[ZOO]": 18,
|
| }
|
| self.domain_tags = domain_tags or ["[MATH]", "[CHEM]", "[BIO]", "[PHYS]", "[EARTH]", "[SPACE]", "[ZOO]"]
|
| self.initializer_range = initializer_range
|
|
|
| self.head_dim = self.d_model // self.num_heads
|
|
|
| @classmethod
|
| def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
|
| """Load config from pretrained model."""
|
| import json
|
| import os
|
|
|
| config_path = os.path.join(pretrained_model_name_or_path, "config.json")
|
| if os.path.exists(config_path):
|
| with open(config_path, "r") as f:
|
| config_dict = json.load(f)
|
| config_dict.update(kwargs)
|
| return cls(**config_dict)
|
| else:
|
|
|
| return cls(**kwargs)
|
|
|
| def to_dict(self) -> Dict[str, Any]:
|
| """Convert to dictionary."""
|
| return {
|
| "model_type": self.model_type,
|
| "d_model": self.d_model,
|
| "num_layers": self.num_layers,
|
| "num_heads": self.num_heads,
|
| "head_dim": self.head_dim,
|
| "d_state": self.d_state,
|
| "d_conv": self.d_conv,
|
| "window_size": self.window_size,
|
| "ffn_expansion": self.ffn_expansion,
|
| "num_domains": self.num_domains,
|
| "vocab_size": self.vocab_size,
|
| "max_seq_len": self.max_seq_len,
|
| "ssm_ratio": self.ssm_ratio,
|
| "enable_equation_module": self.enable_equation_module,
|
| "enable_numerical_module": self.enable_numerical_module,
|
| "enable_citation_module": self.enable_citation_module,
|
| "enable_molecular_module": self.enable_molecular_module,
|
| "special_tokens": self.special_tokens,
|
| "domain_tags": self.domain_tags,
|
| "initializer_range": self.initializer_range,
|
| }
|
|
|