DeciCoder-1b / configuration_decicoder.py

Upload configuration_decicoder.py with huggingface_hub (#2)

1849e15 over 2 years ago

1.84 kB

	from transformers.models.llama.configuration_llama import LlamaConfig
	from transformers.utils import logging


	logger = logging.get_logger(__name__)

	LLAMA_PRETRAINED_CONFIG_ARCHIVE_MAP = {}


	class DeciCoderConfig(LlamaConfig):
	r"""
	This is the configuration class to store the configuration of a [`LlamaModel`]. It is used to instantiate an LLaMA
	model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
	defaults will yield a similar configuration to that of the LLaMA-7B.

	Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
	documentation from [`PretrainedConfig`] for more information.


	Args:
	naive_attention_prefill (`bool`, optional, defaults to False):
	Whether to use naive matmul or scaled dot product attention during prefill.
	naive_attention_decode_batched (`bool`, optional, defaults to True):
	Whether to use naive matmul or scaled dot product attention during decode for batch_size > 1.
	naive_attention_decode_single (`bool`, optional, defaults to False):
	Whether to use naive matmul or scaled dot product attention during decode for batch_size == 1.


	```"""
	model_type = "llama"
	keys_to_ignore_at_inference = ["past_key_values"]

	def __init__(
	self,
	naive_attention_prefill: bool = False,
	naive_attention_decode_batched: bool = True,
	naive_attention_decode_single: bool = False,
	**kwargs,
	):
	self.naive_attention_prefill = naive_attention_prefill
	self.naive_attention_decode_batched = naive_attention_decode_batched
	self.naive_attention_decode_single = naive_attention_decode_single

	super().__init__(**kwargs,)