Spaces:
Running
Running
Update index.html
Browse files- index.html +199 -72
index.html
CHANGED
|
@@ -40,6 +40,22 @@
|
|
| 40 |
.reveal h3 { font-size: 1.4rem; line-height: 1.2; }
|
| 41 |
.reveal p, .reveal li { font-size: 1.7rem; line-height: 1.35; }
|
| 42 |
.reveal pre code { font-size: 0.67em; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
@media (max-width: 1024px) { .reveal h1{font-size:2.2rem;} .reveal h2{font-size:1.6rem;} }
|
| 44 |
.reveal table td, .reveal table th { font-size: 0.85rem; padding: 4px 8px; }
|
| 45 |
body::after {
|
|
@@ -77,15 +93,17 @@
|
|
| 77 |
" /> <!-- 1 · Opening -->
|
| 78 |
</section>
|
| 79 |
<section data-auto-animate>
|
| 80 |
-
<
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
|
|
|
|
|
|
| 84 |
<h1 class="animate__animated animate__fadeInDown">PyTorch × Transformers Journey</h1>
|
| 85 |
<h3 class="animate__animated animate__fadeInDown animate__delay-1s">Pythonicity, Autodiff & Modularity in Modern AI</h3>
|
| 86 |
<p class="animate__animated animate__fadeInUp animate__delay-2s">Pablo Montalvo‑Leroux · ML Engineer @ Hugging Face</p>
|
| 87 |
</section>
|
| 88 |
-
|
| 89 |
<section>
|
| 90 |
<h2>2016‑2018: Backprop & Birth Pangs</h2>
|
| 91 |
<p>The journey began with uncertainty: back in 2016, machine learning was far from standardized. Tools like Theano and CNTK were fading, and many of us—myself included—were jumping framework to framework. It was a time of raw experimentation.</p>
|
|
@@ -100,11 +118,27 @@
|
|
| 100 |
<section>
|
| 101 |
<h2>Transformers × PyTorch: Reproducibility</h2>
|
| 102 |
<p>That all changed with <code>pytorch-pretrained-bert</code>, the predecessor to Transformers. Suddenly, the magic of BERT was available in an interface that made sense.</p>
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
<
|
| 106 |
-
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
</section>
|
| 109 |
|
| 110 |
|
|
@@ -129,12 +163,24 @@
|
|
| 129 |
|
| 130 |
<section>
|
| 131 |
<h2>Clone the Paper Tonight → Tweak Tomorrow</h2>
|
| 132 |
-
<p>PyTorch lowered the barrier to implementation
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
<
|
| 136 |
-
|
| 137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
</section>
|
| 139 |
|
| 140 |
<!-- 6 · One Model · One File -->
|
|
@@ -169,43 +215,80 @@ class BertModel(PreTrainedModel):
|
|
| 169 |
|
| 170 |
<section>
|
| 171 |
<h2>Beyond Transformers: Ecosystem Reuse</h2>
|
| 172 |
-
<p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
<pre><code class="language-python" data-trim data-noescape>
|
| 175 |
-
from
|
| 176 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 177 |
-
from trl import DPOConfig, DPOTrainer
|
| 178 |
-
|
| 179 |
-
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")
|
| 180 |
-
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")
|
| 181 |
-
dataset = load_dataset("trl-lib/ultrafeedback_binarized", split="train")
|
| 182 |
-
training_args = DPOConfig(output_dir="Qwen2.5-0.5B-DPO")
|
| 183 |
-
trainer = DPOTrainer(
|
| 184 |
-
model=model,
|
| 185 |
-
args=training_args,
|
| 186 |
-
train_dataset=dataset,
|
| 187 |
-
processing_class=tokenizer
|
| 188 |
-
)
|
| 189 |
-
trainer.train()
|
| 190 |
-
</code></pre>
|
| 191 |
|
| 192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
</section>
|
| 194 |
|
|
|
|
| 195 |
|
| 196 |
<!-- 8 · Paradigms come at a cost -->
|
| 197 |
<section>
|
| 198 |
-
<h2>Paradigms
|
| 199 |
-
<
|
| 200 |
-
<
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
</section>
|
| 208 |
-
|
| 209 |
<!-- 8 · Back to Python: Mary Shelley Mode -->
|
| 210 |
<section>
|
| 211 |
<h2>Back to Python: Modular “Mary Shelley” Mode</h2>
|
|
@@ -500,7 +583,7 @@ class GlmForCausalLM(LlamaForCausalLM):
|
|
| 500 |
"layer.*.self_attn.v_proj": "colwise",
|
| 501 |
"layer.*.self_attn.o_proj": "rowwise"
|
| 502 |
}</code></pre>
|
| 503 |
-
<p
|
| 504 |
|
| 505 |
<pre><code class="language-python" data-trim data-noescape>
|
| 506 |
def translate_to_torch_parallel_style(style: str):
|
|
@@ -567,23 +650,45 @@ print(y)
|
|
| 567 |
<p class="fragment">Same Transformer code — now with a <strong>3× faster</strong> GELU on A100s.</p>
|
| 568 |
</section>
|
| 569 |
|
| 570 |
-
|
| 571 |
-
<!-- 18 · API design lessons -->
|
| 572 |
<section>
|
| 573 |
<h2>API Design Lessons</h2>
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
<
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 584 |
</section>
|
| 585 |
-
|
| 586 |
-
|
| 587 |
<!-- 14 · Rise of Multimodality -->
|
| 588 |
<section>
|
| 589 |
<h2>Rise of Multimodality</h2>
|
|
@@ -609,21 +714,43 @@ model = AutoModelForConditionalGeneration.from_pretrained("Qwen/Qwen3-8B")
|
|
| 609 |
<iframe src="assets/model_growth.html" width="80%" height="600" style="border:none;"></iframe>
|
| 610 |
</section>
|
| 611 |
|
| 612 |
-
<!-- 20 · Takeaways -->
|
| 613 |
<section>
|
| 614 |
<h2>Takeaways & The Future</h2>
|
| 615 |
-
<
|
| 616 |
-
<
|
| 617 |
-
<
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 627 |
<a href="https://huggingface.co/transformers/contribute" target="_blank">
|
| 628 |
hf.co/transformers/contribute
|
| 629 |
</a>
|
|
|
|
| 40 |
.reveal h3 { font-size: 1.4rem; line-height: 1.2; }
|
| 41 |
.reveal p, .reveal li { font-size: 1.7rem; line-height: 1.35; }
|
| 42 |
.reveal pre code { font-size: 0.67em; }
|
| 43 |
+
/* Make <strong> more vibrant and aligned with the accent */
|
| 44 |
+
.reveal strong {
|
| 45 |
+
color: var(--accent-secondary); /* orange highlight */
|
| 46 |
+
font-weight: 800;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
/* Make <code> more obvious: change background, font, and padding */
|
| 50 |
+
.reveal code {
|
| 51 |
+
background: rgba(255, 255, 255, 0.1);
|
| 52 |
+
color: #ffd080;
|
| 53 |
+
padding: 0.15em 0.4em;
|
| 54 |
+
border-radius: 0.3em;
|
| 55 |
+
font-family: 'Fira Code', monospace;
|
| 56 |
+
font-size: 0.95em;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
@media (max-width: 1024px) { .reveal h1{font-size:2.2rem;} .reveal h2{font-size:1.6rem;} }
|
| 60 |
.reveal table td, .reveal table th { font-size: 0.85rem; padding: 4px 8px; }
|
| 61 |
body::after {
|
|
|
|
| 93 |
" /> <!-- 1 · Opening -->
|
| 94 |
</section>
|
| 95 |
<section data-auto-animate>
|
| 96 |
+
<div style="display: flex; align-items: center; justify-content: center; gap: 1.2rem; margin-bottom: 1rem;" class="animate__animated animate__fadeInDown">
|
| 97 |
+
<img src="assets/torchlogo.png" alt="PyTorch Logo" style="height: 48px;" />
|
| 98 |
+
<span style="color: white; font-size: 2.4rem; font-weight: 700;">×</span>
|
| 99 |
+
<img src="assets/head_logo.svg" alt="Transformers Logo" style="height: 48px;" />
|
| 100 |
+
</div>
|
| 101 |
+
|
| 102 |
<h1 class="animate__animated animate__fadeInDown">PyTorch × Transformers Journey</h1>
|
| 103 |
<h3 class="animate__animated animate__fadeInDown animate__delay-1s">Pythonicity, Autodiff & Modularity in Modern AI</h3>
|
| 104 |
<p class="animate__animated animate__fadeInUp animate__delay-2s">Pablo Montalvo‑Leroux · ML Engineer @ Hugging Face</p>
|
| 105 |
</section>
|
| 106 |
+
|
| 107 |
<section>
|
| 108 |
<h2>2016‑2018: Backprop & Birth Pangs</h2>
|
| 109 |
<p>The journey began with uncertainty: back in 2016, machine learning was far from standardized. Tools like Theano and CNTK were fading, and many of us—myself included—were jumping framework to framework. It was a time of raw experimentation.</p>
|
|
|
|
| 118 |
<section>
|
| 119 |
<h2>Transformers × PyTorch: Reproducibility</h2>
|
| 120 |
<p>That all changed with <code>pytorch-pretrained-bert</code>, the predecessor to Transformers. Suddenly, the magic of BERT was available in an interface that made sense.</p>
|
| 121 |
+
|
| 122 |
+
<div style="display: flex; gap: 2rem; justify-content: space-between; margin-top: 2rem;">
|
| 123 |
+
<div style="flex: 1; background: #2d2d2d; padding: 1.2rem; border-radius: 1rem; box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
|
| 124 |
+
<p style="font-weight: 800; color: var(--accent-primary); margin-bottom: 0.6rem;">
|
| 125 |
+
🧩 Simpler Interface
|
| 126 |
+
</p>
|
| 127 |
+
<p>No static graphs, just Python functions and PyTorch modules.</p>
|
| 128 |
+
</div>
|
| 129 |
+
<div style="flex: 1; background: #2d2d2d; padding: 1.2rem; border-radius: 1rem; box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
|
| 130 |
+
<p style="font-weight: 800; color: var(--accent-primary); margin-bottom: 0.6rem;">
|
| 131 |
+
✨ Hackability
|
| 132 |
+
</p>
|
| 133 |
+
<p>Readable, hackable code meant results could be shared, reproduced, improved.</p>
|
| 134 |
+
</div>
|
| 135 |
+
<div style="flex: 1; background: #2d2d2d; padding: 1.2rem; border-radius: 1rem; box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
|
| 136 |
+
<p style="font-weight: 800; color: var(--accent-primary); margin-bottom: 0.6rem;">
|
| 137 |
+
🚀 Community Shift
|
| 138 |
+
</p>
|
| 139 |
+
<p>This shifted the research community towards PyTorch.</p>
|
| 140 |
+
</div>
|
| 141 |
+
</div>
|
| 142 |
</section>
|
| 143 |
|
| 144 |
|
|
|
|
| 163 |
|
| 164 |
<section>
|
| 165 |
<h2>Clone the Paper Tonight → Tweak Tomorrow</h2>
|
| 166 |
+
<p>PyTorch lowered the barrier to implementation — Transformers built on top of that simplicity.</p>
|
| 167 |
+
|
| 168 |
+
<div style="display: flex; gap: 1.5rem; margin-top: 2rem;">
|
| 169 |
+
<div style="flex: 1; background: #2d2d2d; padding: 1.2rem; border-radius: 1rem; box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
|
| 170 |
+
<p style="font-weight: 800; color: var(--accent-primary); margin-bottom: 0.5rem;">🔍 Live Debugging</p>
|
| 171 |
+
<p>2018: BERT fine-tunes meant <code>print(tensor)</code>, not <em>recompile & hope</em>.</p>
|
| 172 |
+
</div>
|
| 173 |
+
|
| 174 |
+
<div style="flex: 1; background: #2d2d2d; padding: 1.2rem; border-radius: 1rem; box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
|
| 175 |
+
<p style="font-weight: 800; color: var(--accent-primary); margin-bottom: 0.5rem;">🤝 Fast Review</p>
|
| 176 |
+
<p>Patches were understandable and reproducible — merged quickly, verified quickly.</p>
|
| 177 |
+
</div>
|
| 178 |
+
|
| 179 |
+
<div style="flex: 1; background: #2d2d2d; padding: 1.2rem; border-radius: 1rem; box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
|
| 180 |
+
<p style="font-weight: 800; color: var(--accent-primary); margin-bottom: 0.5rem;">⚡ Fast Iteration</p>
|
| 181 |
+
<p>Experiments shifted from <em>weeks</em> to <strong>hours</strong> — feedback cycles accelerated.</p>
|
| 182 |
+
</div>
|
| 183 |
+
</div>
|
| 184 |
</section>
|
| 185 |
|
| 186 |
<!-- 6 · One Model · One File -->
|
|
|
|
| 215 |
|
| 216 |
<section>
|
| 217 |
<h2>Beyond Transformers: Ecosystem Reuse</h2>
|
| 218 |
+
<p><strong>Transformers</strong> makes modeling easy. <strong>vLLM</strong> makes inference fast.</p>
|
| 219 |
+
|
| 220 |
+
<div style="display: flex; gap: 2rem; margin-top: 2rem;">
|
| 221 |
+
<div style="flex: 1;">
|
| 222 |
+
<p><strong>🔧 Prototype with Transformers:</strong></p>
|
| 223 |
+
<pre><code class="language-python" data-trim data-noescape>
|
| 224 |
+
from transformers import pipeline
|
| 225 |
+
|
| 226 |
+
pipe = pipeline("text-generation", model="meta-llama/Llama-3.2-1B")
|
| 227 |
+
print(pipe("The future of AI is")[0]["generated_text"])
|
| 228 |
+
</code></pre>
|
| 229 |
+
</div>
|
| 230 |
+
<div style="flex: 1;">
|
| 231 |
+
<img src="assets/vLLM-Full-Logo.png" alt="vLLM Illustration" style="border-radius: 1rem; box-shadow: 0 0 12px #000; width: 100%;" />
|
| 232 |
+
</div>
|
| 233 |
+
</div>
|
| 234 |
+
</section>
|
| 235 |
+
<section>
|
| 236 |
+
<h2>Deploy with vLLM — No Rewrite Needed</h2>
|
| 237 |
+
<p><strong>vLLM</strong> supports <code>transformers</code> models out of the box. Just specify <code>model_impl="transformers"</code> if needed:</p>
|
| 238 |
|
| 239 |
<pre><code class="language-python" data-trim data-noescape>
|
| 240 |
+
from vllm import LLM, SamplingParams
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
|
| 242 |
+
llm = LLM(model="meta-llama/Llama-3.2-1B", model_impl="transformers")
|
| 243 |
+
params = SamplingParams(max_tokens=20)
|
| 244 |
+
outputs = llm.generate("The future of AI is", sampling_params=params)
|
| 245 |
+
print(outputs[0].outputs[0].text)
|
| 246 |
+
</code></pre>
|
| 247 |
+
<p class="fragment">We also support SGLang now, along with thousands of other libraries! </p>
|
| 248 |
+
|
| 249 |
+
</section>
|
| 250 |
+
<section>
|
| 251 |
+
<h2 style="margin-bottom: 1rem;">
|
| 252 |
+
Transformers × PyTorch — Enabling the Community
|
| 253 |
+
</h2>
|
| 254 |
+
<img src="assets/transformers_as_ref.png" alt="Transformers as Reference"
|
| 255 |
+
style="
|
| 256 |
+
width: 120%;
|
| 257 |
+
height: 110%;
|
| 258 |
+
object-fit: cover;
|
| 259 |
+
margin-left: -2.5%;
|
| 260 |
+
margin-top: -2.5%;
|
| 261 |
+
" />
|
| 262 |
</section>
|
| 263 |
|
| 264 |
+
|
| 265 |
|
| 266 |
<!-- 8 · Paradigms come at a cost -->
|
| 267 |
<section>
|
| 268 |
+
<h2>Paradigms Come at a Cost</h2>
|
| 269 |
+
<div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 1.5rem; margin-top: 2rem;">
|
| 270 |
+
<div style="background: #2d2d2d; padding: 1.2rem; border-radius: 1rem; box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
|
| 271 |
+
<p style="font-weight: 800; color: var(--accent-primary); margin-bottom: 0.5rem;">📈 Community Growth</p>
|
| 272 |
+
<p>The scientific and engineering ML community thrived with Transformers.</p>
|
| 273 |
+
</div>
|
| 274 |
+
|
| 275 |
+
<div style="background: #2d2d2d; padding: 1.2rem; border-radius: 1rem; box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
|
| 276 |
+
<p style="font-weight: 800; color: var(--accent-primary); margin-bottom: 0.5rem;">🔥 PyTorch Synergy</p>
|
| 277 |
+
<p>Transformers and PyTorch grew together — adoption fed back into both ecosystems.</p>
|
| 278 |
+
</div>
|
| 279 |
+
|
| 280 |
+
<div style="background: #2d2d2d; padding: 1.2rem; border-radius: 1rem; box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
|
| 281 |
+
<p style="font-weight: 800; color: var(--accent-primary); margin-bottom: 0.5rem;">🛠️ Maintenance Pressure</p>
|
| 282 |
+
<p>We duplicate code on purpose — to preserve clarity, portability, and hackability.</p>
|
| 283 |
+
</div>
|
| 284 |
+
|
| 285 |
+
<div class="fragment" style="background: #2d2d2d; padding: 1.2rem; border-radius: 1rem; box-shadow: 0 4px 12px rgba(0,0,0,0.3);">
|
| 286 |
+
<p style="font-weight: 800; color: var(--accent-primary); margin-bottom: 0.5rem;">🧬 Pythonic Modularity</p>
|
| 287 |
+
<p>The <strong>Modularity</strong> of python is never far :)</p>
|
| 288 |
+
</div>
|
| 289 |
+
</div>
|
| 290 |
</section>
|
| 291 |
+
|
| 292 |
<!-- 8 · Back to Python: Mary Shelley Mode -->
|
| 293 |
<section>
|
| 294 |
<h2>Back to Python: Modular “Mary Shelley” Mode</h2>
|
|
|
|
| 583 |
"layer.*.self_attn.v_proj": "colwise",
|
| 584 |
"layer.*.self_attn.o_proj": "rowwise"
|
| 585 |
}</code></pre>
|
| 586 |
+
<p>Translated to</p>
|
| 587 |
|
| 588 |
<pre><code class="language-python" data-trim data-noescape>
|
| 589 |
def translate_to_torch_parallel_style(style: str):
|
|
|
|
| 650 |
<p class="fragment">Same Transformer code — now with a <strong>3× faster</strong> GELU on A100s.</p>
|
| 651 |
</section>
|
| 652 |
|
|
|
|
|
|
|
| 653 |
<section>
|
| 654 |
<h2>API Design Lessons</h2>
|
| 655 |
+
|
| 656 |
+
<div style="display: flex; gap: 1.2rem; margin-top: 1.2rem;">
|
| 657 |
+
<div style="flex: 1; background: #2c2c2c; padding: 0.9rem; border-radius: 0.6rem; box-shadow: 0 3px 10px rgba(0,0,0,0.25); font-size: 1.35rem;">
|
| 658 |
+
<p style="font-weight: 700; color: var(--accent-primary); margin-bottom: 0.4rem;">🔍 Make Easy Things Obvious</p>
|
| 659 |
+
<p style="margin-bottom: 0.4rem;">Models load in <code>one line</code> — no boilerplate.</p>
|
| 660 |
+
<pre><code class="language-python" style="font-size: 0.75em;">model = AutoModel.from_pretrained("bert-base-uncased")</code></pre>
|
| 661 |
+
</div>
|
| 662 |
+
|
| 663 |
+
<div style="flex: 1; background: #2c2c2c; padding: 0.9rem; border-radius: 0.6rem; box-shadow: 0 3px 10px rgba(0,0,0,0.25); font-size: 1.35rem;">
|
| 664 |
+
<p style="font-weight: 700; color: var(--accent-primary); margin-bottom: 0.4rem;">📄 Paper-to-Repo Diff ≈ 0</p>
|
| 665 |
+
<p style="margin-bottom: 0.4rem;">Code reflects architecture directly.</p>
|
| 666 |
+
<pre><code class="language-python" style="font-size: 0.75em;">class LlamaAttention(nn.Module): ...</code></pre>
|
| 667 |
+
</div>
|
| 668 |
+
</div>
|
| 669 |
+
|
| 670 |
+
<div style="display: flex; gap: 1.2rem; margin-top: 1.2rem;">
|
| 671 |
+
<div style="flex: 1; background: #2c2c2c; padding: 0.9rem; border-radius: 0.6rem; box-shadow: 0 3px 10px rgba(0,0,0,0.25); font-size: 1.35rem;">
|
| 672 |
+
<p style="font-weight: 700; color: var(--accent-primary); margin-bottom: 0.4rem;">🚀 Prototyping → Production</p>
|
| 673 |
+
<p style="margin-bottom: 0.4rem;">Same model runs in vLLM for deployment:</p>
|
| 674 |
+
<pre><code class="language-python" style="font-size: 0.75em;">LLM(model="llama", model_impl="transformers")</code></pre>
|
| 675 |
+
</div>
|
| 676 |
+
|
| 677 |
+
<div style="flex: 1; background: #2c2c2c; padding: 0.9rem; border-radius: 0.6rem; box-shadow: 0 3px 10px rgba(0,0,0,0.25); font-size: 1.35rem;">
|
| 678 |
+
<p style="font-weight: 700; color: var(--accent-primary); margin-bottom: 0.4rem;">🎛️ Hide Sharding, Show Intent</p>
|
| 679 |
+
<p style="margin-bottom: 0.4rem;">Declarative TP via config:</p>
|
| 680 |
+
<pre><code class="language-json" style="font-size: 0.75em;">"q_proj": "colwise"</code></pre>
|
| 681 |
+
</div>
|
| 682 |
+
</div>
|
| 683 |
+
|
| 684 |
+
<p style="font-size: 1.35rem; margin-top: 1.6rem;">
|
| 685 |
+
We tune radios without building RF amps. ML should feel the same.
|
| 686 |
+
</p>
|
| 687 |
+
<p class="fragment" style="font-size: 1.35rem;">
|
| 688 |
+
…while empowering those who do build the amps.
|
| 689 |
+
</p>
|
| 690 |
</section>
|
| 691 |
+
|
|
|
|
| 692 |
<!-- 14 · Rise of Multimodality -->
|
| 693 |
<section>
|
| 694 |
<h2>Rise of Multimodality</h2>
|
|
|
|
| 714 |
<iframe src="assets/model_growth.html" width="80%" height="600" style="border:none;"></iframe>
|
| 715 |
</section>
|
| 716 |
|
|
|
|
| 717 |
<section>
|
| 718 |
<h2>Takeaways & The Future</h2>
|
| 719 |
+
<div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 1rem; margin-top: 1.5rem;">
|
| 720 |
+
<div style="background: #2d2d2d; padding: 1rem; border-radius: 0.8rem;">
|
| 721 |
+
<p style="font-weight: 700; font-size: 1.4rem; color: var(--accent-primary); margin-bottom: 0.4rem;">
|
| 722 |
+
🤝 Symbiotic Growth
|
| 723 |
+
</p>
|
| 724 |
+
<p style="display: flex; align-items: center; gap: 0.4rem; font-size: 1.4rem;">
|
| 725 |
+
<img src="assets/torchlogo.png" alt="PyTorch" style="height: 1.4rem;" />
|
| 726 |
+
PyTorch & <code>transformers</code> grow together
|
| 727 |
+
<img src="assets/head_logo.svg" alt="Transformers" style="height: 1.4rem;" />
|
| 728 |
+
</p>
|
| 729 |
+
</div>
|
| 730 |
+
|
| 731 |
+
<div style="background: #2d2d2d; padding: 1rem; border-radius: 0.8rem;">
|
| 732 |
+
<p style="font-weight: 700; font-size: 1.4rem; color: var(--accent-primary); margin-bottom: 0.4rem;">
|
| 733 |
+
🧠 Pythonicity × Pragmatism
|
| 734 |
+
</p>
|
| 735 |
+
<p style="font-size: 1.4rem;">High-level code, low-level control — a winning combination for fast iteration.</p>
|
| 736 |
+
</div>
|
| 737 |
+
|
| 738 |
+
<div style="background: #2d2d2d; padding: 1rem; border-radius: 0.8rem;">
|
| 739 |
+
<p style="font-weight: 700; font-size: 1.4rem; color: var(--accent-primary); margin-bottom: 0.4rem;">
|
| 740 |
+
🚢 Models Ship Faster
|
| 741 |
+
</p>
|
| 742 |
+
<p style="font-size: 1.4rem;">Open-source models are scaling up — and landing in users' hands faster than ever.</p>
|
| 743 |
+
</div>
|
| 744 |
+
|
| 745 |
+
<div style="background: #2d2d2d; padding: 1rem; border-radius: 0.8rem;">
|
| 746 |
+
<p style="font-weight: 700; font-size: 1.4rem; color: var(--accent-primary); margin-bottom: 0.4rem;">
|
| 747 |
+
📚 Source of Truth for Model Definitions
|
| 748 |
+
</p>
|
| 749 |
+
<p style="font-size: 1.4rem;">We aim to be the canonical reference — while enabling the community to build, remix, and deploy at scale.</p>
|
| 750 |
+
</div>
|
| 751 |
+
</div>
|
| 752 |
+
|
| 753 |
+
<p style="margin-top: 1.5rem; font-size: 1.3rem;">
|
| 754 |
<a href="https://huggingface.co/transformers/contribute" target="_blank">
|
| 755 |
hf.co/transformers/contribute
|
| 756 |
</a>
|