|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | from transformers.configuration_utils import PretrainedConfig | 
					
						
						|  | from transformers.utils import logging | 
					
						
						|  | from .configuration_nemotron_h import NemotronHConfig | 
					
						
						|  | from .configuration_radio import RADIOConfig | 
					
						
						|  |  | 
					
						
						|  | logger = logging.get_logger(__name__) | 
					
						
						|  |  | 
					
						
						|  | class NemotronH_Nano_VL_V2_Config(PretrainedConfig): | 
					
						
						|  | model_type = 'NemotronH_Nano_VL_V2' | 
					
						
						|  | is_composition = True | 
					
						
						|  |  | 
					
						
						|  | def __init__( | 
					
						
						|  | self, | 
					
						
						|  | vision_config=None, | 
					
						
						|  | llm_config=None, | 
					
						
						|  | force_image_size=None, | 
					
						
						|  | downsample_ratio=0.5, | 
					
						
						|  | template=None, | 
					
						
						|  | ps_version='v1', | 
					
						
						|  | image_tag_type="internvl", | 
					
						
						|  | projector_hidden_size=4096, | 
					
						
						|  | vit_hidden_size=1280, | 
					
						
						|  | attn_implementation="flash_attention_2", | 
					
						
						|  | video_pruning_rate: float = 0.0, | 
					
						
						|  | **kwargs | 
					
						
						|  | ): | 
					
						
						|  | super().__init__(**kwargs) | 
					
						
						|  |  | 
					
						
						|  | if vision_config is not None: | 
					
						
						|  | self.vision_config = RADIOConfig(**vision_config) | 
					
						
						|  | else: | 
					
						
						|  | self.vision_config = RADIOConfig() | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if llm_config is not None: | 
					
						
						|  | self.llm_config = NemotronHConfig(**llm_config) | 
					
						
						|  | else: | 
					
						
						|  | self.llm_config = NemotronHConfig() | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | self.force_image_size = force_image_size | 
					
						
						|  | self.downsample_ratio = downsample_ratio | 
					
						
						|  | self.template = template | 
					
						
						|  | self.ps_version = ps_version | 
					
						
						|  | self.image_tag_type = image_tag_type | 
					
						
						|  | self.projector_hidden_size = projector_hidden_size | 
					
						
						|  | self.vit_hidden_size = vit_hidden_size | 
					
						
						|  | self.video_pruning_rate = video_pruning_rate | 
					
						
						|  |  | 
					
						
						|  | self._attn_implementation = attn_implementation | 
					
						
						|  | self.vision_config.use_flash_attn = self._attn_implementation is not None and "flash_attention" in self._attn_implementation | 
					
						
						|  | self.llm_config._attn_implementation = self._attn_implementation | 
					
						
						|  |  |