Spaces:

transformers-community
/

Transformers-tenets

Running

App Files Files Community

Molbap HF Staff commited on Sep 26

Commit

bcdb26d

1 Parent(s): 077bf87

better fragment

Browse files

Files changed (1) hide show

src/fragments/memory-profiler.html +8 -175

src/fragments/memory-profiler.html CHANGED Viewed

@@ -1,178 +1,11 @@
-<div style="border: 1px solid #e2e8f0; border-radius: 8px; background: white; margin: 1.5rem 0;">
-    <div style="padding: 1rem; border-bottom: 1px solid #e2e8f0; background: #f8f9fa;">
-        <h4 style="margin: 0 0 0.5rem 0; color: #495057;">🚀 CUDA Warmup Efficiency Benchmark</h4>
-        <p style="margin: 0; font-size: 0.9em; color: #6c757d;">
-            Compare model loading with and without transformers' CUDA warmup via `caching_allocator_warmup`. This demonstrates the loading time and memory efficiency improvements.
-        </p>
     </div>
-    <div style="padding: 1rem;">
-        <div style="display: grid; grid-template-columns: 1fr auto; gap: 1rem; align-items: end; margin-bottom: 1.5rem;">
-            <div>
-                <label style="display: block; font-weight: 600; margin-bottom: 0.5rem; color: #374151;">Model to Profile:</label>
-                <select id="memory-model-select" style="width: 100%; padding: 0.5rem; border: 1px solid #d1d5db; border-radius: 6px; background: white;">
-                    <option value="openai-community/gpt2">openai-community/gpt2</option>
-                    <option value="google/gemma-2-2b">google/gemma-2-2b</option>
-                    <option value="microsoft/DialoGPT-small">microsoft/DialoGPT-small</option>
-                    <option value="facebook/opt-125m">facebook/opt-125m</option>
-                </select>
-                <div style="font-size: 0.8em; color: #6c757d; margin-top: 0.25rem;">
-                    Select a model or enter a custom HuggingFace model ID
-                </div>
-            </div>
-            <div>
-                <button id="memory-profile-btn" style="padding: 0.75rem 1.5rem; background: #dc2626; color: white; border: none; border-radius: 6px; cursor: pointer; font-weight: 500;">
-                    🔥 Profile Memory
-                </button>
-            </div>
-        </div>
-        <div id="memory-chart-container" style="width: 100%; height: 400px; border: 1px solid #e2e8f0; border-radius: 6px; background: #f8f9fa; position: relative;">
-            <div id="memory-placeholder" style="position: absolute; top: 50%; left: 50%; transform: translate(-50%, -50%); text-align: center; color: #6c757d; font-style: italic;">
-                Click "Profile Memory" to generate memory allocation timeline
-            </div>
-            <canvas id="memory-chart" width="100%" height="400" style="display: none;"></canvas>
-        </div>
-        <div id="memory-stats" style="margin-top: 1rem; padding: 1rem; background: #f1f5f9; border-radius: 6px; display: none;">
-            <h5 style="margin: 0 0 0.5rem 0; color: #374151;">Memory Statistics</h5>
-            <div id="memory-results"></div>
-        </div>
     </div>
-    <div style="padding: 1rem; border-top: 1px solid #e2e8f0; background: #f8f9fa; font-size: 0.9em; color: #6c757d;">
-        <strong>Note:</strong> This demo requires GPU access. The warmup feature reduces peak memory usage during model loading.
-        In the original app, this uses ZeroGPU to measure actual memory allocation timelines.
     </div>
-</div>
-<script>
-document.addEventListener('DOMContentLoaded', function() {
-    const modelSelect = document.getElementById('memory-model-select');
-    const profileBtn = document.getElementById('memory-profile-btn');
-    const chartContainer = document.getElementById('memory-chart-container');
-    const placeholder = document.getElementById('memory-placeholder');
-    const canvas = document.getElementById('memory-chart');
-    const statsDiv = document.getElementById('memory-stats');
-    const resultsDiv = document.getElementById('memory-results');
-    profileBtn.addEventListener('click', function() {
-        const model = modelSelect.value;
-        // Show loading state
-        profileBtn.disabled = true;
-        profileBtn.textContent = 'Profiling...';
-        placeholder.innerHTML = '<div style="color: #6c757d;"><em>Loading model and measuring memory usage...</em><br><div style="margin-top: 0.5rem;">This may take a few moments</div></div>';
-        statsDiv.style.display = 'none';
-        // Simulate profiling time
-        setTimeout(() => {
-            // Generate mock data
-            const timePoints = [];
-            const warmupData = [];
-            const noWarmupData = [];
-            // Generate realistic-looking memory allocation curves
-            for (let i = 0; i <= 50; i++) {
-                const time = i * 0.1; // 5 seconds total
-                timePoints.push(time);
-                // Warmup curve (more efficient)
-                const warmupMem = Math.max(0, 500 + Math.pow(i, 1.5) * 15 + Math.random() * 50);
-                warmupData.push(warmupMem);
-                // No warmup curve (less efficient, higher peak)
-                const noWarmupMem = Math.max(0, 600 + Math.pow(i, 1.8) * 18 + Math.random() * 80);
-                noWarmupData.push(noWarmupMem);
-            }
-            // Clear placeholder and show results
-            placeholder.style.display = 'none';
-            canvas.style.display = 'block';
-            // Draw simple chart
-            const ctx = canvas.getContext('2d');
-            const width = canvas.width = chartContainer.offsetWidth - 2;
-            const height = canvas.height = 400;
-            ctx.clearRect(0, 0, width, height);
-            // Draw axes
-            ctx.strokeStyle = '#d1d5db';
-            ctx.beginPath();
-            ctx.moveTo(50, 20);
-            ctx.lineTo(50, height - 50);
-            ctx.lineTo(width - 20, height - 50);
-            ctx.stroke();
-            // Draw grid
-            ctx.strokeStyle = '#f3f4f6';
-            for (let i = 1; i < 10; i++) {
-                const y = 20 + (height - 70) * i / 10;
-                ctx.beginPath();
-                ctx.moveTo(50, y);
-                ctx.lineTo(width - 20, y);
-                ctx.stroke();
-            }
-            // Draw data
-            const maxMem = Math.max(...noWarmupData);
-            const drawLine = (data, color) => {
-                ctx.strokeStyle = color;
-                ctx.lineWidth = 3;
-                ctx.beginPath();
-                for (let i = 0; i < data.length; i++) {
-                    const x = 50 + (width - 70) * i / (data.length - 1);
-                    const y = height - 50 - (height - 70) * data[i] / maxMem;
-                    if (i === 0) ctx.moveTo(x, y);
-                    else ctx.lineTo(x, y);
-                }
-                ctx.stroke();
-            };
-            drawLine(noWarmupData, '#ef4444'); // Red for no warmup
-            drawLine(warmupData, '#22c55e');   // Green for warmup
-            // Add labels
-            ctx.fillStyle = '#374151';
-            ctx.font = '14px sans-serif';
-            ctx.fillText('Memory (MiB)', 10, height / 2);
-            ctx.fillText('Time (seconds)', width / 2 - 50, height - 10);
-            // Add legend
-            ctx.fillStyle = '#ef4444';
-            ctx.fillRect(width - 200, 30, 15, 15);
-            ctx.fillStyle = '#374151';
-            ctx.fillText('📈 Warmup OFF (Standard)', width - 180, 42);
-            ctx.fillStyle = '#22c55e';
-            ctx.fillRect(width - 200, 50, 15, 15);
-            ctx.fillStyle = '#374151';
-            ctx.fillText('🚀 Warmup ON (Optimized)', width - 180, 62);
-            // Show statistics
-            const peakWarmup = Math.max(...warmupData);
-            const peakNoWarmup = Math.max(...noWarmupData);
-            const savings = ((peakNoWarmup - peakWarmup) / peakNoWarmup * 100);
-            resultsDiv.innerHTML = `
-                <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 1rem;">
-                    <div>
-                        <strong>Peak Memory (Warmup OFF):</strong> ${peakNoWarmup.toFixed(0)} MiB<br>
-                        <strong>Peak Memory (Warmup ON):</strong> ${peakWarmup.toFixed(0)} MiB
-                    </div>
-                    <div>
-                        <strong>Memory Savings:</strong> ${savings.toFixed(1)}%<br>
-                        <strong>Model:</strong> ${model}
-                    </div>
-                </div>
-            `;
-            statsDiv.style.display = 'block';
-            profileBtn.disabled = false;
-            profileBtn.textContent = '🔥 Profile Memory';
-        }, 3000);
-    });
-});
-</script>

+<div class=interactive-demo>
+    <div class=demo-header>
+        <h3>🚀 CUDA Warmup Efficiency Benchmark</h3>
     </div>
+    <div class=demo-content>
+        <iframe src=https://molbap-cuda-warmup-transformers.hf.space width=100% height=800px frameborder=0 style="border-radius: 8px; background: white;"></iframe>
     </div>
+    <div class=demo-footer>
+        Real CUDA warmup benchmarking with actual Transformers models. Measure the performance impact of the <code>caching_allocator_warmup</code> function at <code>transformers/src/transformers/modeling_utils.py:6186</code>. This interactive tool loads models twice - once with warmup disabled and once with warmup enabled - to demonstrate the significant loading time improvements.
     </div>
+</div>