stepfun-ai
/

NextStep-1-Large

text-generation

Model card Files Files and versions

NextStep-1-Large / models /config.py

jingwwu's picture

Upload folder using huggingface_hub

22243b2 verified 3 months ago

history blame contribute delete

1.43 kB

	from transformers.models.llama.configuration_llama import LlamaConfig

	class NextStepConfig(LlamaConfig):

	model_type = "nextstep"

	def __init__(
	self,
	vae_name_or_path: str \| None = None,
	latent_size: int = 32,
	latent_patch_size: int = 2,
	latent_channels: int = 16,
	boi: int \| None = None,
	eoi: int \| None = None,
	image_placeholder_id: int \| None = None,
	pad_token_id_added: int \| None = None,
	lm_loss_weight: float = 0.01,
	im_loss_weight: float = 1.0,
	fm_head_dim: int = 1536,
	fm_head_layers: int = 12,
	fm_head_batch_mul: int = 4,
	o_attention_bias: bool \| None = None,
	**kwargs,
	):
	super().__init__(**kwargs)

	self.vae_name_or_path = vae_name_or_path

	self.latent_size = latent_size
	self.latent_patch_size = latent_patch_size
	self.latent_channels = latent_channels

	self.boi = boi
	self.eoi = eoi
	self.image_placeholder_id = image_placeholder_id
	self.pad_token_id_added = pad_token_id_added

	self.lm_loss_weight = lm_loss_weight
	self.im_loss_weight = im_loss_weight

	self.fm_head_dim = fm_head_dim
	self.fm_head_layers = fm_head_layers
	self.fm_head_batch_mul = fm_head_batch_mul

	self.o_attention_bias = self.attention_bias if o_attention_bias is None else o_attention_bias