Molbap HF Staff commited on
Commit
bcdb26d
Β·
1 Parent(s): 077bf87

better fragment

Browse files
Files changed (1) hide show
  1. src/fragments/memory-profiler.html +8 -175
src/fragments/memory-profiler.html CHANGED
@@ -1,178 +1,11 @@
1
- <div style="border: 1px solid #e2e8f0; border-radius: 8px; background: white; margin: 1.5rem 0;">
2
- <div style="padding: 1rem; border-bottom: 1px solid #e2e8f0; background: #f8f9fa;">
3
- <h4 style="margin: 0 0 0.5rem 0; color: #495057;">πŸš€ CUDA Warmup Efficiency Benchmark</h4>
4
- <p style="margin: 0; font-size: 0.9em; color: #6c757d;">
5
- Compare model loading with and without transformers' CUDA warmup via `caching_allocator_warmup`. This demonstrates the loading time and memory efficiency improvements.
6
- </p>
7
  </div>
8
-
9
- <div style="padding: 1rem;">
10
- <div style="display: grid; grid-template-columns: 1fr auto; gap: 1rem; align-items: end; margin-bottom: 1.5rem;">
11
- <div>
12
- <label style="display: block; font-weight: 600; margin-bottom: 0.5rem; color: #374151;">Model to Profile:</label>
13
- <select id="memory-model-select" style="width: 100%; padding: 0.5rem; border: 1px solid #d1d5db; border-radius: 6px; background: white;">
14
- <option value="openai-community/gpt2">openai-community/gpt2</option>
15
- <option value="google/gemma-2-2b">google/gemma-2-2b</option>
16
- <option value="microsoft/DialoGPT-small">microsoft/DialoGPT-small</option>
17
- <option value="facebook/opt-125m">facebook/opt-125m</option>
18
- </select>
19
- <div style="font-size: 0.8em; color: #6c757d; margin-top: 0.25rem;">
20
- Select a model or enter a custom HuggingFace model ID
21
- </div>
22
- </div>
23
-
24
- <div>
25
- <button id="memory-profile-btn" style="padding: 0.75rem 1.5rem; background: #dc2626; color: white; border: none; border-radius: 6px; cursor: pointer; font-weight: 500;">
26
- πŸ”₯ Profile Memory
27
- </button>
28
- </div>
29
- </div>
30
-
31
- <div id="memory-chart-container" style="width: 100%; height: 400px; border: 1px solid #e2e8f0; border-radius: 6px; background: #f8f9fa; position: relative;">
32
- <div id="memory-placeholder" style="position: absolute; top: 50%; left: 50%; transform: translate(-50%, -50%); text-align: center; color: #6c757d; font-style: italic;">
33
- Click "Profile Memory" to generate memory allocation timeline
34
- </div>
35
- <canvas id="memory-chart" width="100%" height="400" style="display: none;"></canvas>
36
- </div>
37
-
38
- <div id="memory-stats" style="margin-top: 1rem; padding: 1rem; background: #f1f5f9; border-radius: 6px; display: none;">
39
- <h5 style="margin: 0 0 0.5rem 0; color: #374151;">Memory Statistics</h5>
40
- <div id="memory-results"></div>
41
- </div>
42
  </div>
43
-
44
- <div style="padding: 1rem; border-top: 1px solid #e2e8f0; background: #f8f9fa; font-size: 0.9em; color: #6c757d;">
45
- <strong>Note:</strong> This demo requires GPU access. The warmup feature reduces peak memory usage during model loading.
46
- In the original app, this uses ZeroGPU to measure actual memory allocation timelines.
47
  </div>
48
- </div>
49
-
50
- <script>
51
- document.addEventListener('DOMContentLoaded', function() {
52
- const modelSelect = document.getElementById('memory-model-select');
53
- const profileBtn = document.getElementById('memory-profile-btn');
54
- const chartContainer = document.getElementById('memory-chart-container');
55
- const placeholder = document.getElementById('memory-placeholder');
56
- const canvas = document.getElementById('memory-chart');
57
- const statsDiv = document.getElementById('memory-stats');
58
- const resultsDiv = document.getElementById('memory-results');
59
-
60
- profileBtn.addEventListener('click', function() {
61
- const model = modelSelect.value;
62
-
63
- // Show loading state
64
- profileBtn.disabled = true;
65
- profileBtn.textContent = 'Profiling...';
66
- placeholder.innerHTML = '<div style="color: #6c757d;"><em>Loading model and measuring memory usage...</em><br><div style="margin-top: 0.5rem;">This may take a few moments</div></div>';
67
- statsDiv.style.display = 'none';
68
-
69
- // Simulate profiling time
70
- setTimeout(() => {
71
- // Generate mock data
72
- const timePoints = [];
73
- const warmupData = [];
74
- const noWarmupData = [];
75
-
76
- // Generate realistic-looking memory allocation curves
77
- for (let i = 0; i <= 50; i++) {
78
- const time = i * 0.1; // 5 seconds total
79
- timePoints.push(time);
80
-
81
- // Warmup curve (more efficient)
82
- const warmupMem = Math.max(0, 500 + Math.pow(i, 1.5) * 15 + Math.random() * 50);
83
- warmupData.push(warmupMem);
84
-
85
- // No warmup curve (less efficient, higher peak)
86
- const noWarmupMem = Math.max(0, 600 + Math.pow(i, 1.8) * 18 + Math.random() * 80);
87
- noWarmupData.push(noWarmupMem);
88
- }
89
-
90
- // Clear placeholder and show results
91
- placeholder.style.display = 'none';
92
- canvas.style.display = 'block';
93
-
94
- // Draw simple chart
95
- const ctx = canvas.getContext('2d');
96
- const width = canvas.width = chartContainer.offsetWidth - 2;
97
- const height = canvas.height = 400;
98
-
99
- ctx.clearRect(0, 0, width, height);
100
-
101
- // Draw axes
102
- ctx.strokeStyle = '#d1d5db';
103
- ctx.beginPath();
104
- ctx.moveTo(50, 20);
105
- ctx.lineTo(50, height - 50);
106
- ctx.lineTo(width - 20, height - 50);
107
- ctx.stroke();
108
-
109
- // Draw grid
110
- ctx.strokeStyle = '#f3f4f6';
111
- for (let i = 1; i < 10; i++) {
112
- const y = 20 + (height - 70) * i / 10;
113
- ctx.beginPath();
114
- ctx.moveTo(50, y);
115
- ctx.lineTo(width - 20, y);
116
- ctx.stroke();
117
- }
118
-
119
- // Draw data
120
- const maxMem = Math.max(...noWarmupData);
121
- const drawLine = (data, color) => {
122
- ctx.strokeStyle = color;
123
- ctx.lineWidth = 3;
124
- ctx.beginPath();
125
- for (let i = 0; i < data.length; i++) {
126
- const x = 50 + (width - 70) * i / (data.length - 1);
127
- const y = height - 50 - (height - 70) * data[i] / maxMem;
128
- if (i === 0) ctx.moveTo(x, y);
129
- else ctx.lineTo(x, y);
130
- }
131
- ctx.stroke();
132
- };
133
-
134
- drawLine(noWarmupData, '#ef4444'); // Red for no warmup
135
- drawLine(warmupData, '#22c55e'); // Green for warmup
136
-
137
- // Add labels
138
- ctx.fillStyle = '#374151';
139
- ctx.font = '14px sans-serif';
140
- ctx.fillText('Memory (MiB)', 10, height / 2);
141
- ctx.fillText('Time (seconds)', width / 2 - 50, height - 10);
142
-
143
- // Add legend
144
- ctx.fillStyle = '#ef4444';
145
- ctx.fillRect(width - 200, 30, 15, 15);
146
- ctx.fillStyle = '#374151';
147
- ctx.fillText('πŸ“ˆ Warmup OFF (Standard)', width - 180, 42);
148
-
149
- ctx.fillStyle = '#22c55e';
150
- ctx.fillRect(width - 200, 50, 15, 15);
151
- ctx.fillStyle = '#374151';
152
- ctx.fillText('πŸš€ Warmup ON (Optimized)', width - 180, 62);
153
-
154
- // Show statistics
155
- const peakWarmup = Math.max(...warmupData);
156
- const peakNoWarmup = Math.max(...noWarmupData);
157
- const savings = ((peakNoWarmup - peakWarmup) / peakNoWarmup * 100);
158
-
159
- resultsDiv.innerHTML = `
160
- <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 1rem;">
161
- <div>
162
- <strong>Peak Memory (Warmup OFF):</strong> ${peakNoWarmup.toFixed(0)} MiB<br>
163
- <strong>Peak Memory (Warmup ON):</strong> ${peakWarmup.toFixed(0)} MiB
164
- </div>
165
- <div>
166
- <strong>Memory Savings:</strong> ${savings.toFixed(1)}%<br>
167
- <strong>Model:</strong> ${model}
168
- </div>
169
- </div>
170
- `;
171
- statsDiv.style.display = 'block';
172
-
173
- profileBtn.disabled = false;
174
- profileBtn.textContent = 'πŸ”₯ Profile Memory';
175
- }, 3000);
176
- });
177
- });
178
- </script>
 
1
+ <div class=interactive-demo>
2
+ <div class=demo-header>
3
+ <h3>πŸš€ CUDA Warmup Efficiency Benchmark</h3>
 
 
 
4
  </div>
5
+ <div class=demo-content>
6
+ <iframe src=https://molbap-cuda-warmup-transformers.hf.space width=100% height=800px frameborder=0 style="border-radius: 8px; background: white;"></iframe>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  </div>
8
+ <div class=demo-footer>
9
+ Real CUDA warmup benchmarking with actual Transformers models. Measure the performance impact of the <code>caching_allocator_warmup</code> function at <code>transformers/src/transformers/modeling_utils.py:6186</code>. This interactive tool loads models twice - once with warmup disabled and once with warmup enabled - to demonstrate the significant loading time improvements.
 
 
10
  </div>
11
+ </div>