| """ |
| SHIVIK-Code Configuration |
| |
| Extends LlamaConfig for SHIVIK-Code specific settings. |
| """ |
|
|
| from transformers import LlamaConfig |
|
|
|
|
| class ShivikCodeConfig(LlamaConfig): |
| """ |
| Configuration class for SHIVIK-Code. |
| |
| Extends LlamaConfig with: |
| - Extended context length defaults |
| - Tool token configuration |
| - FIM token configuration |
| """ |
| |
| model_type = "shivik_code" |
| |
| def __init__( |
| self, |
| vocab_size=128279, |
| hidden_size=2048, |
| intermediate_size=8192, |
| num_hidden_layers=16, |
| num_attention_heads=32, |
| num_key_value_heads=8, |
| hidden_act="silu", |
| max_position_embeddings=32768, |
| initializer_range=0.02, |
| rms_norm_eps=1e-5, |
| use_cache=True, |
| pad_token_id=None, |
| bos_token_id=128000, |
| eos_token_id=128001, |
| tie_word_embeddings=False, |
| rope_theta=500000.0, |
| rope_scaling=None, |
| attention_bias=False, |
| attention_dropout=0.0, |
| mlp_bias=False, |
| |
| tool_call_start_id=128256, |
| tool_call_end_id=128257, |
| tool_result_start_id=128258, |
| tool_result_end_id=128259, |
| fim_prefix_id=128276, |
| fim_suffix_id=128277, |
| fim_middle_id=128278, |
| **kwargs, |
| ): |
| |
| if rope_scaling is None: |
| rope_scaling = { |
| "type": "yarn", |
| "factor": 8.0, |
| "original_max_position_embeddings": 4096, |
| } |
| |
| super().__init__( |
| vocab_size=vocab_size, |
| hidden_size=hidden_size, |
| intermediate_size=intermediate_size, |
| num_hidden_layers=num_hidden_layers, |
| num_attention_heads=num_attention_heads, |
| num_key_value_heads=num_key_value_heads, |
| hidden_act=hidden_act, |
| max_position_embeddings=max_position_embeddings, |
| initializer_range=initializer_range, |
| rms_norm_eps=rms_norm_eps, |
| use_cache=use_cache, |
| pad_token_id=pad_token_id, |
| bos_token_id=bos_token_id, |
| eos_token_id=eos_token_id, |
| tie_word_embeddings=tie_word_embeddings, |
| rope_theta=rope_theta, |
| rope_scaling=rope_scaling, |
| attention_bias=attention_bias, |
| attention_dropout=attention_dropout, |
| mlp_bias=mlp_bias, |
| **kwargs, |
| ) |
| |
| |
| self.tool_call_start_id = tool_call_start_id |
| self.tool_call_end_id = tool_call_end_id |
| self.tool_result_start_id = tool_result_start_id |
| self.tool_result_end_id = tool_result_end_id |
| |
| |
| self.fim_prefix_id = fim_prefix_id |
| self.fim_suffix_id = fim_suffix_id |
| self.fim_middle_id = fim_middle_id |
|
|