Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoModel, AutoConfig | |
| import torch | |
| import json | |
| from collections import defaultdict, OrderedDict | |
| def analyze_model_parameters(model_path, hf_token=None, show_layer_details=False): | |
| try: | |
| # Prepare token parameter | |
| token_kwargs = {} | |
| if hf_token and hf_token.strip(): | |
| token_kwargs['token'] = hf_token.strip() | |
| # Load model configuration first | |
| config = AutoConfig.from_pretrained(model_path, trust_remote_code=True, **token_kwargs) | |
| # Load model on CPU - FIXED: Use low_cpu_mem_usage=False to avoid meta tensors | |
| model = AutoModel.from_pretrained( | |
| model_path, | |
| torch_dtype=torch.float32, # Explicitly set dtype | |
| low_cpu_mem_usage=False, # Disable meta tensors | |
| trust_remote_code=True, | |
| **token_kwargs | |
| ) | |
| # Ensure model is on CPU | |
| model = model.cpu() | |
| # Initialize counters | |
| total_params = 0 | |
| trainable_params = 0 | |
| embedding_params = 0 | |
| non_embedding_params = 0 | |
| # Track unique parameters to handle weight tying | |
| unique_params = {} | |
| param_details = [] | |
| layer_breakdown = defaultdict(lambda: {'total': 0, 'trainable': 0, 'params': []}) | |
| # Embedding layer patterns (common names for embedding layers) | |
| embedding_patterns = [ | |
| 'embeddings', 'embed', 'wte', 'wpe', 'word_embedding', | |
| 'position_embedding', 'token_embedding', 'embed_tokens', | |
| 'embed_positions', 'embed_layer_norm' | |
| ] | |
| def is_embedding_param(name): | |
| name_lower = name.lower() | |
| return any(pattern in name_lower for pattern in embedding_patterns) | |
| def get_layer_name(param_name): | |
| """Extract layer information from parameter name""" | |
| parts = param_name.split('.') | |
| if len(parts) >= 2: | |
| # Handle common transformer architectures | |
| if 'layer' in parts or 'layers' in parts: | |
| for i, part in enumerate(parts): | |
| if part in ['layer', 'layers'] and i + 1 < len(parts): | |
| try: | |
| layer_num = int(parts[i + 1]) | |
| return f"Layer {layer_num}" | |
| except ValueError: | |
| pass | |
| # Handle other patterns | |
| if 'encoder' in parts: | |
| return "Encoder" | |
| elif 'decoder' in parts: | |
| return "Decoder" | |
| elif any(emb in param_name.lower() for emb in embedding_patterns): | |
| return "Embeddings" | |
| elif 'classifier' in param_name.lower() or 'head' in param_name.lower(): | |
| return "Classification Head" | |
| elif 'pooler' in param_name.lower(): | |
| return "Pooler" | |
| elif 'ln' in param_name.lower() or 'norm' in param_name.lower(): | |
| return "Layer Norm" | |
| return "Other" | |
| # Analyze all parameters | |
| for name, param in model.named_parameters(): | |
| param_size = param.numel() | |
| is_trainable = param.requires_grad | |
| is_embedding = is_embedding_param(name) | |
| layer_name = get_layer_name(name) | |
| # Handle weight tying by using data pointer | |
| ptr = param.data_ptr() | |
| if ptr not in unique_params: | |
| unique_params[ptr] = { | |
| 'name': name, | |
| 'size': param_size, | |
| 'trainable': is_trainable, | |
| 'embedding': is_embedding, | |
| 'layer': layer_name, | |
| 'shape': list(param.shape) | |
| } | |
| # Add to totals | |
| total_params += param_size | |
| if is_trainable: | |
| trainable_params += param_size | |
| if is_embedding: | |
| embedding_params += param_size | |
| else: | |
| non_embedding_params += param_size | |
| # Add to layer breakdown | |
| layer_breakdown[layer_name]['total'] += param_size | |
| if is_trainable: | |
| layer_breakdown[layer_name]['trainable'] += param_size | |
| # Add parameter details | |
| param_details.append({ | |
| 'name': name, | |
| 'shape': list(param.shape), | |
| 'size': param_size, | |
| 'trainable': is_trainable, | |
| 'embedding': is_embedding, | |
| 'layer': layer_name, | |
| 'shared': ptr in [p['ptr'] for p in param_details if 'ptr' in p], | |
| 'ptr': ptr | |
| }) | |
| # Add to layer breakdown details | |
| layer_breakdown[layer_name]['params'].append({ | |
| 'name': name, | |
| 'shape': list(param.shape), | |
| 'size': param_size, | |
| 'trainable': is_trainable | |
| }) | |
| # Format the summary | |
| summary = f""" | |
| π **MODEL ANALYSIS: {model_path}** | |
| π **PARAMETER SUMMARY** | |
| βββ Total Parameters: {total_params:,} | |
| βββ Trainable Parameters: {trainable_params:,} | |
| βββ Non-trainable Parameters: {total_params - trainable_params:,} | |
| βββ Trainable Percentage: {(trainable_params/total_params*100):.1f}% | |
| π§ **PARAMETER BREAKDOWN** | |
| βββ Embedding Parameters: {embedding_params:,} ({embedding_params/total_params*100:.1f}%) | |
| βββ Non-embedding Parameters: {non_embedding_params:,} ({non_embedding_params/total_params*100:.1f}%) | |
| π **MODEL INFO** | |
| βββ Model Type: {config.model_type if hasattr(config, 'model_type') else 'Unknown'} | |
| βββ Architecture: {config.architectures[0] if hasattr(config, 'architectures') and config.architectures else 'Unknown'} | |
| βββ Hidden Size: {getattr(config, 'hidden_size', 'Unknown')} | |
| """ | |
| # Add layer breakdown summary | |
| if layer_breakdown: | |
| summary += "\nποΈ **LAYER BREAKDOWN SUMMARY**\n" | |
| sorted_layers = sorted(layer_breakdown.items(), key=lambda x: ( | |
| 0 if x[0] == "Embeddings" else | |
| 1 if x[0].startswith("Layer") else | |
| 2 if x[0] == "Layer Norm" else | |
| 3 if x[0] == "Pooler" else | |
| 4 if x[0] == "Classification Head" else 5 | |
| )) | |
| for layer_name, info in sorted_layers: | |
| percentage = info['total'] / total_params * 100 | |
| summary += f"βββ {layer_name}: {info['total']:,} params ({percentage:.1f}%)\n" | |
| # Detailed layer breakdown if requested | |
| layer_details = "" | |
| if show_layer_details: | |
| layer_details = "\n" + "="*60 + "\n" | |
| layer_details += "π **DETAILED LAYER-BY-LAYER BREAKDOWN**\n" | |
| layer_details += "="*60 + "\n" | |
| for layer_name, info in sorted_layers: | |
| layer_details += f"\nπ **{layer_name.upper()}**\n" | |
| layer_details += f" Total: {info['total']:,} | Trainable: {info['trainable']:,}\n" | |
| layer_details += f" Parameters:\n" | |
| for param_info in info['params']: | |
| trainable_mark = "β" if param_info['trainable'] else "β" | |
| layer_details += f" {trainable_mark} {param_info['name']}: {param_info['shape']} β {param_info['size']:,}\n" | |
| return summary + layer_details | |
| except Exception as e: | |
| error_msg = str(e) | |
| if "401" in error_msg or "authentication" in error_msg.lower(): | |
| return f"π **Authentication Error:** This model requires a valid HuggingFace token.\n\nPlease provide your HuggingFace token in the token field above.\n\nOriginal error: {error_msg}" | |
| elif "404" in error_msg or "not found" in error_msg.lower(): | |
| return f"π **Model Not Found:** The model '{model_path}' was not found.\n\nPlease check:\n- Model path is correct\n- Model exists on HuggingFace Hub\n- You have access to the model (use token if private)\n\nOriginal error: {error_msg}" | |
| else: | |
| return f"β **Error loading model:** {error_msg}\n\nPlease check that the model path is correct and accessible." | |
| def count_parameters_basic(model_path, hf_token=None): | |
| """Basic parameter counting without layer details""" | |
| return analyze_model_parameters(model_path, hf_token, show_layer_details=False) | |
| def count_parameters_detailed(model_path, hf_token=None): | |
| """Detailed parameter counting with layer-by-layer breakdown""" | |
| return analyze_model_parameters(model_path, hf_token, show_layer_details=True) | |
| # Create Gradio interface with multiple outputs | |
| with gr.Blocks(title="π€ Advanced HuggingFace Model Parameter Analyzer", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # π€ Advanced HuggingFace Model Parameter Analyzer | |
| Enter any HuggingFace model path to get detailed parameter analysis including: | |
| - **Total & trainable parameter counts** | |
| - **Embedding vs non-embedding breakdown** | |
| - **Layer-by-layer analysis** | |
| - **Weight sharing detection** | |
| - **Private model access** with HuggingFace token | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| model_input = gr.Textbox( | |
| label="π HuggingFace Model Path", | |
| placeholder="e.g., bert-base-uncased, gpt2, microsoft/DialoGPT-medium", | |
| value="bert-base-uncased" | |
| ) | |
| with gr.Column(scale=1): | |
| hf_token_input = gr.Textbox( | |
| label="π HuggingFace Token (Optional)", | |
| placeholder="hf_...", | |
| type="password", | |
| info="Required for private models or gated models" | |
| ) | |
| with gr.Row(): | |
| analyze_btn = gr.Button("π Analyze Model", variant="primary") | |
| detailed_btn = gr.Button("π Detailed Analysis", variant="secondary") | |
| output_text = gr.Textbox( | |
| label="π Analysis Results", | |
| lines=20, | |
| max_lines=50, | |
| show_copy_button=True | |
| ) | |
| # Event handlers | |
| analyze_btn.click( | |
| fn=count_parameters_basic, | |
| inputs=[model_input, hf_token_input], | |
| outputs=output_text | |
| ) | |
| detailed_btn.click( | |
| fn=count_parameters_detailed, | |
| inputs=[model_input, hf_token_input], | |
| outputs=output_text | |
| ) | |
| # Example models | |
| gr.Examples( | |
| examples=[ | |
| ["bert-base-uncased"], | |
| ["gpt2"], | |
| ["roberta-base"], | |
| ["distilbert-base-uncased"], | |
| ["microsoft/DialoGPT-medium"], | |
| ["facebook/bart-base"], | |
| ["t5-small"], | |
| ["google/flan-t5-small"] | |
| ], | |
| inputs=model_input, | |
| label="π― Example Models" | |
| ) | |
| gr.Markdown(""" | |
| ### π Notes: | |
| - **Weight tying detection**: Automatically handles shared parameters (e.g., input/output embeddings) | |
| - **Layer categorization**: Groups parameters by transformer layers, embeddings, etc. | |
| - **Detailed analysis**: Click "Detailed Analysis" for parameter-by-parameter breakdown | |
| - **Private models**: Use your HuggingFace token to access private or gated models | |
| - **Token security**: Token is only used for this session and not stored | |
| - **Model compatibility**: Works with most HuggingFace transformer models | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() |