Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,51 +1,251 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from transformers import AutoModel
|
| 3 |
import torch
|
|
|
|
|
|
|
| 4 |
|
| 5 |
-
def
|
| 6 |
try:
|
|
|
|
|
|
|
|
|
|
| 7 |
# Load model on CPU
|
| 8 |
model = AutoModel.from_pretrained(model_path, device_map="cpu", trust_remote_code=True)
|
| 9 |
|
| 10 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
unique_params = {}
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
unique_params[p.data_ptr()] = (name, p.numel())
|
| 15 |
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
|
| 24 |
|
| 25 |
-
# Format numbers with commas for readability
|
| 26 |
-
return f"""
|
| 27 |
-
Total Parameters: {total_params:,}
|
| 28 |
-
Trainable Parameters: {trainable_params:,}
|
| 29 |
-
"""
|
| 30 |
except Exception as e:
|
| 31 |
-
return f"Error loading model
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
)
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
if __name__ == "__main__":
|
| 51 |
demo.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from transformers import AutoModel, AutoConfig
|
| 3 |
import torch
|
| 4 |
+
import json
|
| 5 |
+
from collections import defaultdict, OrderedDict
|
| 6 |
|
| 7 |
+
def analyze_model_parameters(model_path, show_layer_details=False):
|
| 8 |
try:
|
| 9 |
+
# Load model configuration first
|
| 10 |
+
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
|
| 11 |
+
|
| 12 |
# Load model on CPU
|
| 13 |
model = AutoModel.from_pretrained(model_path, device_map="cpu", trust_remote_code=True)
|
| 14 |
|
| 15 |
+
# Initialize counters
|
| 16 |
+
total_params = 0
|
| 17 |
+
trainable_params = 0
|
| 18 |
+
embedding_params = 0
|
| 19 |
+
non_embedding_params = 0
|
| 20 |
+
|
| 21 |
+
# Track unique parameters to handle weight tying
|
| 22 |
unique_params = {}
|
| 23 |
+
param_details = []
|
| 24 |
+
layer_breakdown = defaultdict(lambda: {'total': 0, 'trainable': 0, 'params': []})
|
|
|
|
| 25 |
|
| 26 |
+
# Embedding layer patterns (common names for embedding layers)
|
| 27 |
+
embedding_patterns = [
|
| 28 |
+
'embeddings', 'embed', 'wte', 'wpe', 'word_embedding',
|
| 29 |
+
'position_embedding', 'token_embedding', 'embed_tokens',
|
| 30 |
+
'embed_positions', 'embed_layer_norm'
|
| 31 |
+
]
|
| 32 |
|
| 33 |
+
def is_embedding_param(name):
|
| 34 |
+
name_lower = name.lower()
|
| 35 |
+
return any(pattern in name_lower for pattern in embedding_patterns)
|
| 36 |
+
|
| 37 |
+
def get_layer_name(param_name):
|
| 38 |
+
"""Extract layer information from parameter name"""
|
| 39 |
+
parts = param_name.split('.')
|
| 40 |
+
if len(parts) >= 2:
|
| 41 |
+
# Handle common transformer architectures
|
| 42 |
+
if 'layer' in parts or 'layers' in parts:
|
| 43 |
+
for i, part in enumerate(parts):
|
| 44 |
+
if part in ['layer', 'layers'] and i + 1 < len(parts):
|
| 45 |
+
try:
|
| 46 |
+
layer_num = int(parts[i + 1])
|
| 47 |
+
return f"Layer {layer_num}"
|
| 48 |
+
except ValueError:
|
| 49 |
+
pass
|
| 50 |
+
# Handle other patterns
|
| 51 |
+
if 'encoder' in parts:
|
| 52 |
+
return "Encoder"
|
| 53 |
+
elif 'decoder' in parts:
|
| 54 |
+
return "Decoder"
|
| 55 |
+
elif any(emb in param_name.lower() for emb in embedding_patterns):
|
| 56 |
+
return "Embeddings"
|
| 57 |
+
elif 'classifier' in param_name.lower() or 'head' in param_name.lower():
|
| 58 |
+
return "Classification Head"
|
| 59 |
+
elif 'pooler' in param_name.lower():
|
| 60 |
+
return "Pooler"
|
| 61 |
+
elif 'ln' in param_name.lower() or 'norm' in param_name.lower():
|
| 62 |
+
return "Layer Norm"
|
| 63 |
+
return "Other"
|
| 64 |
+
|
| 65 |
+
# Analyze all parameters
|
| 66 |
+
for name, param in model.named_parameters():
|
| 67 |
+
param_size = param.numel()
|
| 68 |
+
is_trainable = param.requires_grad
|
| 69 |
+
is_embedding = is_embedding_param(name)
|
| 70 |
+
layer_name = get_layer_name(name)
|
| 71 |
+
|
| 72 |
+
# Handle weight tying by using data pointer
|
| 73 |
+
ptr = param.data_ptr()
|
| 74 |
+
if ptr not in unique_params:
|
| 75 |
+
unique_params[ptr] = {
|
| 76 |
+
'name': name,
|
| 77 |
+
'size': param_size,
|
| 78 |
+
'trainable': is_trainable,
|
| 79 |
+
'embedding': is_embedding,
|
| 80 |
+
'layer': layer_name,
|
| 81 |
+
'shape': list(param.shape)
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
# Add to totals
|
| 85 |
+
total_params += param_size
|
| 86 |
+
if is_trainable:
|
| 87 |
+
trainable_params += param_size
|
| 88 |
+
if is_embedding:
|
| 89 |
+
embedding_params += param_size
|
| 90 |
+
else:
|
| 91 |
+
non_embedding_params += param_size
|
| 92 |
+
|
| 93 |
+
# Add to layer breakdown
|
| 94 |
+
layer_breakdown[layer_name]['total'] += param_size
|
| 95 |
+
if is_trainable:
|
| 96 |
+
layer_breakdown[layer_name]['trainable'] += param_size
|
| 97 |
+
|
| 98 |
+
# Add parameter details
|
| 99 |
+
param_details.append({
|
| 100 |
+
'name': name,
|
| 101 |
+
'shape': list(param.shape),
|
| 102 |
+
'size': param_size,
|
| 103 |
+
'trainable': is_trainable,
|
| 104 |
+
'embedding': is_embedding,
|
| 105 |
+
'layer': layer_name,
|
| 106 |
+
'shared': ptr in [p['ptr'] for p in param_details if 'ptr' in p],
|
| 107 |
+
'ptr': ptr
|
| 108 |
+
})
|
| 109 |
+
|
| 110 |
+
# Add to layer breakdown details
|
| 111 |
+
layer_breakdown[layer_name]['params'].append({
|
| 112 |
+
'name': name,
|
| 113 |
+
'shape': list(param.shape),
|
| 114 |
+
'size': param_size,
|
| 115 |
+
'trainable': is_trainable
|
| 116 |
+
})
|
| 117 |
+
|
| 118 |
+
# Format the summary
|
| 119 |
+
summary = f"""
|
| 120 |
+
π **MODEL ANALYSIS: {model_path}**
|
| 121 |
+
|
| 122 |
+
π **PARAMETER SUMMARY**
|
| 123 |
+
βββ Total Parameters: {total_params:,}
|
| 124 |
+
βββ Trainable Parameters: {trainable_params:,}
|
| 125 |
+
βββ Non-trainable Parameters: {total_params - trainable_params:,}
|
| 126 |
+
βββ Trainable Percentage: {(trainable_params/total_params*100):.1f}%
|
| 127 |
+
|
| 128 |
+
π§ **PARAMETER BREAKDOWN**
|
| 129 |
+
βββ Embedding Parameters: {embedding_params:,} ({embedding_params/total_params*100:.1f}%)
|
| 130 |
+
βββ Non-embedding Parameters: {non_embedding_params:,} ({non_embedding_params/total_params*100:.1f}%)
|
| 131 |
+
|
| 132 |
+
π **MODEL INFO**
|
| 133 |
+
βββ Model Type: {config.model_type if hasattr(config, 'model_type') else 'Unknown'}
|
| 134 |
+
βββ Architecture: {config.architectures[0] if hasattr(config, 'architectures') and config.architectures else 'Unknown'}
|
| 135 |
+
βββ Hidden Size: {getattr(config, 'hidden_size', 'Unknown')}
|
| 136 |
+
"""
|
| 137 |
+
|
| 138 |
+
# Add layer breakdown summary
|
| 139 |
+
if layer_breakdown:
|
| 140 |
+
summary += "\nποΈ **LAYER BREAKDOWN SUMMARY**\n"
|
| 141 |
+
sorted_layers = sorted(layer_breakdown.items(), key=lambda x: (
|
| 142 |
+
0 if x[0] == "Embeddings" else
|
| 143 |
+
1 if x[0].startswith("Layer") else
|
| 144 |
+
2 if x[0] == "Layer Norm" else
|
| 145 |
+
3 if x[0] == "Pooler" else
|
| 146 |
+
4 if x[0] == "Classification Head" else 5
|
| 147 |
+
))
|
| 148 |
+
|
| 149 |
+
for layer_name, info in sorted_layers:
|
| 150 |
+
percentage = info['total'] / total_params * 100
|
| 151 |
+
summary += f"βββ {layer_name}: {info['total']:,} params ({percentage:.1f}%)\n"
|
| 152 |
+
|
| 153 |
+
# Detailed layer breakdown if requested
|
| 154 |
+
layer_details = ""
|
| 155 |
+
if show_layer_details:
|
| 156 |
+
layer_details = "\n" + "="*60 + "\n"
|
| 157 |
+
layer_details += "π **DETAILED LAYER-BY-LAYER BREAKDOWN**\n"
|
| 158 |
+
layer_details += "="*60 + "\n"
|
| 159 |
+
|
| 160 |
+
for layer_name, info in sorted_layers:
|
| 161 |
+
layer_details += f"\nπ **{layer_name.upper()}**\n"
|
| 162 |
+
layer_details += f" Total: {info['total']:,} | Trainable: {info['trainable']:,}\n"
|
| 163 |
+
layer_details += f" Parameters:\n"
|
| 164 |
+
|
| 165 |
+
for param_info in info['params']:
|
| 166 |
+
trainable_mark = "β" if param_info['trainable'] else "β"
|
| 167 |
+
layer_details += f" {trainable_mark} {param_info['name']}: {param_info['shape']} β {param_info['size']:,}\n"
|
| 168 |
|
| 169 |
+
return summary + layer_details
|
| 170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
except Exception as e:
|
| 172 |
+
return f"β **Error loading model:** {str(e)}\n\nPlease check that the model path is correct and the model is accessible."
|
| 173 |
+
|
| 174 |
+
def count_parameters_basic(model_path):
|
| 175 |
+
"""Basic parameter counting without layer details"""
|
| 176 |
+
return analyze_model_parameters(model_path, show_layer_details=False)
|
| 177 |
+
|
| 178 |
+
def count_parameters_detailed(model_path):
|
| 179 |
+
"""Detailed parameter counting with layer-by-layer breakdown"""
|
| 180 |
+
return analyze_model_parameters(model_path, show_layer_details=True)
|
| 181 |
+
|
| 182 |
+
# Create Gradio interface with multiple outputs
|
| 183 |
+
with gr.Blocks(title="π€ Advanced HuggingFace Model Parameter Analyzer", theme=gr.themes.Soft()) as demo:
|
| 184 |
+
gr.Markdown("""
|
| 185 |
+
# π€ Advanced HuggingFace Model Parameter Analyzer
|
| 186 |
+
|
| 187 |
+
Enter any HuggingFace model path to get detailed parameter analysis including:
|
| 188 |
+
- **Total & trainable parameter counts**
|
| 189 |
+
- **Embedding vs non-embedding breakdown**
|
| 190 |
+
- **Layer-by-layer analysis**
|
| 191 |
+
- **Weight sharing detection**
|
| 192 |
+
""")
|
| 193 |
+
|
| 194 |
+
with gr.Row():
|
| 195 |
+
with gr.Column(scale=2):
|
| 196 |
+
model_input = gr.Textbox(
|
| 197 |
+
label="π HuggingFace Model Path",
|
| 198 |
+
placeholder="e.g., bert-base-uncased, gpt2, microsoft/DialoGPT-medium",
|
| 199 |
+
value="bert-base-uncased"
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
with gr.Column(scale=1):
|
| 203 |
+
analyze_btn = gr.Button("π Analyze Model", variant="primary")
|
| 204 |
+
detailed_btn = gr.Button("π Detailed Analysis", variant="secondary")
|
| 205 |
+
|
| 206 |
+
output_text = gr.Textbox(
|
| 207 |
+
label="π Analysis Results",
|
| 208 |
+
lines=20,
|
| 209 |
+
max_lines=50,
|
| 210 |
+
show_copy_button=True
|
| 211 |
+
)
|
| 212 |
+
|
| 213 |
+
# Event handlers
|
| 214 |
+
analyze_btn.click(
|
| 215 |
+
fn=count_parameters_basic,
|
| 216 |
+
inputs=model_input,
|
| 217 |
+
outputs=output_text
|
| 218 |
+
)
|
| 219 |
+
|
| 220 |
+
detailed_btn.click(
|
| 221 |
+
fn=count_parameters_detailed,
|
| 222 |
+
inputs=model_input,
|
| 223 |
+
outputs=output_text
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
# Example models
|
| 227 |
+
gr.Examples(
|
| 228 |
+
examples=[
|
| 229 |
+
["bert-base-uncased"],
|
| 230 |
+
["gpt2"],
|
| 231 |
+
["roberta-base"],
|
| 232 |
+
["distilbert-base-uncased"],
|
| 233 |
+
["microsoft/DialoGPT-medium"],
|
| 234 |
+
["facebook/bart-base"],
|
| 235 |
+
["t5-small"],
|
| 236 |
+
["google/flan-t5-small"]
|
| 237 |
+
],
|
| 238 |
+
inputs=model_input,
|
| 239 |
+
label="π― Example Models"
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
gr.Markdown("""
|
| 243 |
+
### π Notes:
|
| 244 |
+
- **Weight tying detection**: Automatically handles shared parameters (e.g., input/output embeddings)
|
| 245 |
+
- **Layer categorization**: Groups parameters by transformer layers, embeddings, etc.
|
| 246 |
+
- **Detailed analysis**: Click "Detailed Analysis" for parameter-by-parameter breakdown
|
| 247 |
+
- **Model compatibility**: Works with most HuggingFace transformer models
|
| 248 |
+
""")
|
| 249 |
|
| 250 |
if __name__ == "__main__":
|
| 251 |
demo.launch()
|