Spaces:
Runtime error
Runtime error
| import subprocess | |
| import os | |
| import torch | |
| from transformers import BitsAndBytesConfig, AutoConfig, AutoModelForCausalLM, LlavaNextForConditionalGeneration, LlavaForConditionalGeneration, PaliGemmaForConditionalGeneration, Idefics2ForConditionalGeneration, Owlv2ForObjectDetection, GroundingDinoForObjectDetection, SamModel | |
| import spaces | |
| os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" | |
| def install_flash_attn(): | |
| subprocess.run( | |
| "pip install flash-attn --no-build-isolation", | |
| env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, | |
| shell=True, | |
| ) | |
| ARCHITECTURE_MAP = { | |
| "LlavaNextForConditionalGeneration": LlavaNextForConditionalGeneration, | |
| "LlavaForConditionalGeneration": LlavaForConditionalGeneration, | |
| "PaliGemmaForConditionalGeneration": PaliGemmaForConditionalGeneration, | |
| "Idefics2ForConditionalGeneration": Idefics2ForConditionalGeneration, | |
| "Owlv2ForObjectDetection": Owlv2ForObjectDetection, | |
| "GroundingDinoForObjectDetection": GroundingDinoForObjectDetection, | |
| "SamModel": SamModel, | |
| "AutoModelForCausalLM": AutoModelForCausalLM, | |
| } | |
| def get_model_summary(model_name): | |
| try: | |
| config = AutoConfig.from_pretrained(model_name, trust_remote_code=True) | |
| architecture = config.architectures[0] | |
| quantization_config = getattr(config, 'quantization_config', None) | |
| if quantization_config: | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit=quantization_config.get('load_in_4bit', False), | |
| load_in_8bit=quantization_config.get('load_in_8bit', False), | |
| bnb_4bit_compute_dtype=quantization_config.get('bnb_4bit_compute_dtype', torch.float16), | |
| bnb_4bit_quant_type=quantization_config.get('bnb_4bit_quant_type', 'nf4'), | |
| bnb_4bit_use_double_quant=quantization_config.get('bnb_4bit_use_double_quant', False), | |
| llm_int8_enable_fp32_cpu_offload=quantization_config.get('llm_int8_enable_fp32_cpu_offload', False), | |
| llm_int8_has_fp16_weight=quantization_config.get('llm_int8_has_fp16_weight', False), | |
| llm_int8_skip_modules=quantization_config.get('llm_int8_skip_modules', None), | |
| llm_int8_threshold=quantization_config.get('llm_int8_threshold', 6.0), | |
| ) | |
| else: | |
| bnb_config = None | |
| model_class = ARCHITECTURE_MAP.get(architecture, AutoModelForCausalLM) | |
| model = model_class.from_pretrained( | |
| model_name, config=bnb_config, trust_remote_code=True | |
| ) | |
| if model and not quantization_config: | |
| model = model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu")) | |
| model_summary = str(model) if model else "Model architecture not found." | |
| config_content = config.to_json_string() if config else "Configuration not found." | |
| return f"## Model Architecture\n\n{model_summary}\n\n## Configuration\n\n{config_content}", "" | |
| except ValueError as ve: | |
| return "", f"ValueError: {ve}" | |
| except EnvironmentError as ee: | |
| return "", f"EnvironmentError: {ee}" | |
| except Exception as e: | |
| return "", str(e) | |