ror HF Staff commited on
Commit
b52e342
·
verified ·
1 Parent(s): c1a3d27

stable-dash (#2)

Browse files

- Added skipped count (793d7ea8a06b4867ffa923aab290aea0c3bee9a8)
- Added hardware specs (b38ad0fcb553cc42ea2f032872e00515ccc667f2)
- Added visual hints on summary page (ed6addfe7669661f964dfb6c14a604cb791cddb4)
- Added red text for failing models (746981fce24934b467d6b4f7574db19110186414)
- Added global failure rate (1b5e076ca21569847059dc8417282ea926707fc6)

Files changed (7) hide show
  1. .gitignore +2 -1
  2. app.py +53 -13
  3. data.py +17 -8
  4. sample_amd.json +496 -366
  5. sample_nvidia.json +159 -460
  6. styles.css +11 -0
  7. summary_page.py +51 -1
.gitignore CHANGED
@@ -1 +1,2 @@
1
- __pycache__
 
 
1
+ __pycache__
2
+ __ignore*
app.py CHANGED
@@ -23,6 +23,51 @@ Ci_results.load_data()
23
  Ci_results.schedule_data_reload()
24
 
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  # Load CSS from external file
27
  def load_css():
28
  try:
@@ -42,10 +87,7 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css()) as demo:
42
  gr.Markdown("# 🤖 TCID", elem_classes=["sidebar-title"])
43
 
44
  # Description with integrated last update time
45
- if Ci_results.last_update_time:
46
- description_text = f"**Transformer CI Dashboard**\n\n*Result overview by model and hardware (last updated: {Ci_results.last_update_time})*\n"
47
- else:
48
- description_text = f"**Transformer CI Dashboard**\n\n*Result overview by model and hardware (loading...)*\n"
49
  description_display = gr.Markdown(description_text, elem_classes=["sidebar-description"])
50
 
51
  # Summary button at the top
@@ -66,11 +108,17 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css()) as demo:
66
  model_choices = [model.lower() for model in Ci_results.available_models] if Ci_results.available_models else ["auto", "bert", "clip", "llama"]
67
 
68
  for model_name in model_choices:
 
 
 
 
 
 
69
  btn = gr.Button(
70
  model_name,
71
  variant="secondary",
72
  size="sm",
73
- elem_classes=["model-button"]
74
  )
75
  model_buttons.append(btn)
76
 
@@ -143,14 +191,6 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css()) as demo:
143
  outputs=[summary_display, detail_view]
144
  )
145
 
146
- # Function to get current description text
147
- def get_description_text():
148
- """Get description text with integrated last update time."""
149
- if Ci_results.last_update_time:
150
- return f"**Transformer CI Dashboard**\n\n*Result overview by model and hardware (last updated: {Ci_results.last_update_time})*\n"
151
- else:
152
- return f"**Transformer CI Dashboard**\n\n*Result overview by model and hardware (loading...)*\n"
153
-
154
  # Function to get CI job links
155
  def get_ci_links():
156
  """Get CI job links from the most recent data."""
 
23
  Ci_results.schedule_data_reload()
24
 
25
 
26
+ # Function to check if a model has failures
27
+ def model_has_failures(model_name):
28
+ """Check if a model has any failures (AMD or NVIDIA)."""
29
+ if Ci_results.df is None or Ci_results.df.empty:
30
+ return False
31
+
32
+ # Normalize model name to match DataFrame index
33
+ model_name_lower = model_name.lower()
34
+
35
+ # Check if model exists in DataFrame
36
+ if model_name_lower not in Ci_results.df.index:
37
+ return False
38
+
39
+ try:
40
+ row = Ci_results.df.loc[model_name_lower]
41
+
42
+ # Check for failures in both AMD and NVIDIA
43
+ amd_multi_failures = row.get('failed_multi_no_amd', 0) or 0
44
+ amd_single_failures = row.get('failed_single_no_amd', 0) or 0
45
+ nvidia_multi_failures = row.get('failed_multi_no_nvidia', 0) or 0
46
+ nvidia_single_failures = row.get('failed_single_no_nvidia', 0) or 0
47
+
48
+ total_failures = amd_multi_failures + amd_single_failures + nvidia_multi_failures + nvidia_single_failures
49
+ return total_failures > 0
50
+
51
+ except Exception:
52
+ return False
53
+
54
+
55
+ # Function to get current description text
56
+ def get_description_text():
57
+ """Get description text with integrated last update time."""
58
+ msg = [
59
+ "Transformer CI Dashboard",
60
+ "-",
61
+ "AMD runs on MI325",
62
+ "NVIDIA runs on A10",
63
+ ]
64
+ msg = ["**" + x + "**" for x in msg] + [""]
65
+ if Ci_results.last_update_time:
66
+ msg.append(f"*Result overview by model and hardware (last updated: {Ci_results.last_update_time})*")
67
+ else:
68
+ msg.append("*Result overview by model and hardware (loading...)*")
69
+ return "<br>".join(msg)
70
+
71
  # Load CSS from external file
72
  def load_css():
73
  try:
 
87
  gr.Markdown("# 🤖 TCID", elem_classes=["sidebar-title"])
88
 
89
  # Description with integrated last update time
90
+ description_text = get_description_text()
 
 
 
91
  description_display = gr.Markdown(description_text, elem_classes=["sidebar-description"])
92
 
93
  # Summary button at the top
 
108
  model_choices = [model.lower() for model in Ci_results.available_models] if Ci_results.available_models else ["auto", "bert", "clip", "llama"]
109
 
110
  for model_name in model_choices:
111
+ # Check if model has failures to determine styling
112
+ has_failures = model_has_failures(model_name)
113
+ button_classes = ["model-button"]
114
+ if has_failures:
115
+ button_classes.append("model-button-failed")
116
+
117
  btn = gr.Button(
118
  model_name,
119
  variant="secondary",
120
  size="sm",
121
+ elem_classes=button_classes
122
  )
123
  model_buttons.append(btn)
124
 
 
191
  outputs=[summary_display, detail_view]
192
  )
193
 
 
 
 
 
 
 
 
 
194
  # Function to get CI job links
195
  def get_ci_links():
196
  """Get CI job links from the most recent data."""
data.py CHANGED
@@ -86,6 +86,8 @@ def get_sample_data() -> pd.DataFrame:
86
  [
87
  "success_amd",
88
  "success_nvidia",
 
 
89
  "failed_multi_no_amd",
90
  "failed_multi_no_nvidia",
91
  "failed_single_no_amd",
@@ -104,15 +106,22 @@ def get_sample_data() -> pd.DataFrame:
104
  filtered_joined.index = "sample_" + filtered_joined.index
105
  return filtered_joined
106
 
 
 
 
107
  def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
108
  """Extract and process model data from DataFrame row."""
109
  # Handle missing values and get counts directly from dataframe
110
- success_amd = int(row.get('success_amd', 0)) if pd.notna(row.get('success_amd', 0)) else 0
111
- success_nvidia = int(row.get('success_nvidia', 0)) if pd.notna(row.get('success_nvidia', 0)) else 0
112
- failed_multi_amd = int(row.get('failed_multi_no_amd', 0)) if pd.notna(row.get('failed_multi_no_amd', 0)) else 0
113
- failed_multi_nvidia = int(row.get('failed_multi_no_nvidia', 0)) if pd.notna(row.get('failed_multi_no_nvidia', 0)) else 0
114
- failed_single_amd = int(row.get('failed_single_no_amd', 0)) if pd.notna(row.get('failed_single_no_amd', 0)) else 0
115
- failed_single_nvidia = int(row.get('failed_single_no_nvidia', 0)) if pd.notna(row.get('failed_single_no_nvidia', 0)) else 0
 
 
 
 
116
  # Calculate total failures
117
  total_failed_amd = failed_multi_amd + failed_single_amd
118
  total_failed_nvidia = failed_multi_nvidia + failed_single_nvidia
@@ -120,13 +129,13 @@ def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int],
120
  amd_stats = {
121
  'passed': success_amd,
122
  'failed': total_failed_amd,
123
- 'skipped': 0, # Not available in this dataset
124
  'error': 0 # Not available in this dataset
125
  }
126
  nvidia_stats = {
127
  'passed': success_nvidia,
128
  'failed': total_failed_nvidia,
129
- 'skipped': 0, # Not available in this dataset
130
  'error': 0 # Not available in this dataset
131
  }
132
  return amd_stats, nvidia_stats, failed_multi_amd, failed_single_amd, failed_multi_nvidia, failed_single_nvidia
 
86
  [
87
  "success_amd",
88
  "success_nvidia",
89
+ "skipped_amd",
90
+ "skipped_nvidia",
91
  "failed_multi_no_amd",
92
  "failed_multi_no_nvidia",
93
  "failed_single_no_amd",
 
106
  filtered_joined.index = "sample_" + filtered_joined.index
107
  return filtered_joined
108
 
109
+ def safe_extract(row: pd.DataFrame, key: str) -> int:
110
+ return int(row.get(key, 0)) if pd.notna(row.get(key, 0)) else 0
111
+
112
  def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
113
  """Extract and process model data from DataFrame row."""
114
  # Handle missing values and get counts directly from dataframe
115
+ success_nvidia = safe_extract(row, "success_nvidia")
116
+ success_amd = safe_extract(row, "success_amd")
117
+
118
+ skipped_nvidia = safe_extract(row, "skipped_nvidia")
119
+ skipped_amd = safe_extract(row, "skipped_amd")
120
+
121
+ failed_multi_amd = safe_extract(row, 'failed_multi_no_amd')
122
+ failed_multi_nvidia = safe_extract(row, 'failed_multi_no_nvidia')
123
+ failed_single_amd = safe_extract(row, 'failed_single_no_amd')
124
+ failed_single_nvidia = safe_extract(row, 'failed_single_no_nvidia')
125
  # Calculate total failures
126
  total_failed_amd = failed_multi_amd + failed_single_amd
127
  total_failed_nvidia = failed_multi_nvidia + failed_single_nvidia
 
129
  amd_stats = {
130
  'passed': success_amd,
131
  'failed': total_failed_amd,
132
+ 'skipped': skipped_amd,
133
  'error': 0 # Not available in this dataset
134
  }
135
  nvidia_stats = {
136
  'passed': success_nvidia,
137
  'failed': total_failed_nvidia,
138
+ 'skipped': skipped_nvidia,
139
  'error': 0 # Not available in this dataset
140
  }
141
  return amd_stats, nvidia_stats, failed_multi_amd, failed_single_amd, failed_multi_nvidia, failed_single_nvidia
sample_amd.json CHANGED
@@ -52,20 +52,22 @@
52
  "multi": 0
53
  }
54
  },
 
55
  "success": 80,
56
- "time_spent": "5.47, 1.74, ",
 
57
  "failures": {},
58
  "job_link": {
59
- "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409911",
60
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410689"
61
  }
62
  },
63
  "models_bert": {
64
  "failed": {
65
  "PyTorch": {
66
  "unclassified": 0,
67
- "single": 2,
68
- "multi": 2
69
  },
70
  "TensorFlow": {
71
  "unclassified": 0,
@@ -113,33 +115,14 @@
113
  "multi": 0
114
  }
115
  },
 
116
  "success": 239,
117
- "time_spent": "0:02:22, 0:02:16, ",
118
- "failures": {
119
- "single": [
120
- {
121
- "line": "tests/models/bert/test_modeling_bert.py::BertModelTest::test_eager_padding_matches_padding_free_with_position_ids",
122
- "trace": "(line 4140) KeyError: 'eager'"
123
- },
124
- {
125
- "line": "tests/models/bert/test_modeling_bert.py::BertModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
126
- "trace": "(line 4216) AssertionError: Tensor-likes are not equal!"
127
- }
128
- ],
129
- "multi": [
130
- {
131
- "line": "tests/models/bert/test_modeling_bert.py::BertModelTest::test_eager_padding_matches_padding_free_with_position_ids",
132
- "trace": "(line 4140) KeyError: 'eager'"
133
- },
134
- {
135
- "line": "tests/models/bert/test_modeling_bert.py::BertModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
136
- "trace": "(line 4216) AssertionError: Tensor-likes are not equal!"
137
- }
138
- ]
139
- },
140
  "job_link": {
141
- "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409938",
142
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410716"
143
  }
144
  },
145
  "models_clip": {
@@ -195,12 +178,14 @@
195
  "multi": 0
196
  }
197
  },
 
198
  "success": 288,
199
- "time_spent": "0:03:29, 0:03:30, ",
 
200
  "failures": {},
201
  "job_link": {
202
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410734",
203
- "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409965"
204
  }
205
  },
206
  "models_detr": {
@@ -256,20 +241,22 @@
256
  "multi": 0
257
  }
258
  },
 
259
  "success": 77,
260
- "time_spent": "0:01:13, 0:01:49, ",
 
261
  "failures": {},
262
  "job_link": {
263
- "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410020",
264
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410868"
265
  }
266
  },
267
  "models_gemma3": {
268
  "failed": {
269
  "PyTorch": {
270
  "unclassified": 0,
271
- "single": 12,
272
- "multi": 13
273
  },
274
  "TensorFlow": {
275
  "unclassified": 0,
@@ -317,34 +304,12 @@
317
  "multi": 0
318
  }
319
  },
320
- "success": 341,
321
- "time_spent": "0:07:52, 0:09:43, ",
 
 
322
  "failures": {
323
  "single": [
324
- {
325
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
326
- "trace": "(line 4140) KeyError: 'eager'"
327
- },
328
- {
329
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
330
- "trace": "(line 4219) AssertionError: Tensor-likes are not close!"
331
- },
332
- {
333
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_eager_padding_matches_padding_free_with_position_ids",
334
- "trace": "(line 4140) KeyError: 'eager'"
335
- },
336
- {
337
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_export_text_only_with_hybrid_cache",
338
- "trace": "(line 1642) torch._dynamo.exc.TorchRuntimeError: Dynamo failed to run FX node with fake tensors: call_function <built-in function scaled_dot_product_attention>(*(FakeTensor(..., size=(1, 4, 1, 256), grad_fn=<AddBackward0>), FakeTensor(..., size=(1, 4, 4096, 256), grad_fn=<CloneBackward0>), FakeTensor(..., size=(1, 4, 4096, 256), grad_fn=<CloneBackward0>)), **{'attn_mask': FakeTensor(..., size=(1, 1, 1, 512), dtype=torch.bool), 'dropout_p': 0.0, 'scale': 0.0625, 'is_causal': False}): got RuntimeError('Attempting to broadcast a dimension of length 512 at -1! Mismatching argument at index 1 had torch.Size([1, 1, 1, 512]); but expected shape should be broadcastable to [1, 4, 1, 4096]')"
339
- },
340
- {
341
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_generation_beyond_sliding_window_1_sdpa",
342
- "trace": "(line 81) RuntimeError: The expanded size of the tensor (4826) must match the existing size (4807) at non-singleton dimension 3. Target sizes: [2, 4, 4807, 4826]. Tensor sizes: [2, 1, 4807, 4807]"
343
- },
344
- {
345
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_generation_beyond_sliding_window_2_eager",
346
- "trace": "(line 265) RuntimeError: The size of tensor a (4826) must match the size of tensor b (4807) at non-singleton dimension 3"
347
- },
348
  {
349
  "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_1b_text_only",
350
  "trace": "(line 715) AssertionError: Lists differ: ['Wri[57 chars]s, a silent stream,\\nInto the neural net, a wa[42 chars],\\n'] != ['Wri[57 chars]s, a river deep,\\nWith patterns hidden, secret[46 chars]ing']"
@@ -355,7 +320,7 @@
355
  },
356
  {
357
  "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_batch_crops",
358
- "trace": "(line 81) RuntimeError: The expanded size of the tensor (1646) must match the existing size (1617) at non-singleton dimension 3. Target sizes: [2, 8, 1617, 1646]. Tensor sizes: [2, 1, 1617, 1617]"
359
  },
360
  {
361
  "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_bf16",
@@ -371,34 +336,10 @@
371
  }
372
  ],
373
  "multi": [
374
- {
375
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
376
- "trace": "(line 4140) KeyError: 'eager'"
377
- },
378
- {
379
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
380
- "trace": "(line 4219) AssertionError: Tensor-likes are not close!"
381
- },
382
- {
383
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_eager_padding_matches_padding_free_with_position_ids",
384
- "trace": "(line 4140) KeyError: 'eager'"
385
- },
386
  {
387
  "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_model_parallelism",
388
  "trace": "(line 925) RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:1 and cuda:0!"
389
  },
390
- {
391
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_export_text_only_with_hybrid_cache",
392
- "trace": "(line 1642) torch._dynamo.exc.TorchRuntimeError: Dynamo failed to run FX node with fake tensors: call_function <built-in function scaled_dot_product_attention>(*(FakeTensor(..., size=(1, 4, 1, 256), grad_fn=<AddBackward0>), FakeTensor(..., size=(1, 4, 4096, 256), grad_fn=<CloneBackward0>), FakeTensor(..., size=(1, 4, 4096, 256), grad_fn=<CloneBackward0>)), **{'attn_mask': FakeTensor(..., size=(1, 1, 1, 512), dtype=torch.bool), 'dropout_p': 0.0, 'scale': 0.0625, 'is_causal': False}): got RuntimeError('Attempting to broadcast a dimension of length 512 at -1! Mismatching argument at index 1 had torch.Size([1, 1, 1, 512]); but expected shape should be broadcastable to [1, 4, 1, 4096]')"
393
- },
394
- {
395
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_generation_beyond_sliding_window_1_sdpa",
396
- "trace": "(line 81) RuntimeError: The expanded size of the tensor (4826) must match the existing size (4807) at non-singleton dimension 3. Target sizes: [2, 4, 4807, 4826]. Tensor sizes: [2, 1, 4807, 4807]"
397
- },
398
- {
399
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_generation_beyond_sliding_window_2_eager",
400
- "trace": "(line 265) RuntimeError: The size of tensor a (4826) must match the size of tensor b (4807) at non-singleton dimension 3"
401
- },
402
  {
403
  "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_1b_text_only",
404
  "trace": "(line 715) AssertionError: Lists differ: ['Wri[57 chars]s, a silent stream,\\nInto the neural net, a wa[42 chars],\\n'] != ['Wri[57 chars]s, a river deep,\\nWith patterns hidden, secret[46 chars]ing']"
@@ -409,7 +350,7 @@
409
  },
410
  {
411
  "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_batch_crops",
412
- "trace": "(line 81) RuntimeError: The expanded size of the tensor (1646) must match the existing size (1617) at non-singleton dimension 3. Target sizes: [2, 8, 1617, 1646]. Tensor sizes: [2, 1, 1617, 1617]"
413
  },
414
  {
415
  "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_bf16",
@@ -426,8 +367,8 @@
426
  ]
427
  },
428
  "job_link": {
429
- "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410076",
430
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410943"
431
  }
432
  },
433
  "models_gemma3n": {
@@ -483,20 +424,22 @@
483
  "multi": 0
484
  }
485
  },
486
- "success": 0,
487
- "time_spent": ".56, .97, ",
 
 
488
  "failures": {},
489
  "job_link": {
490
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410944",
491
- "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410122"
492
  }
493
  },
494
  "models_got_ocr2": {
495
  "failed": {
496
  "PyTorch": {
497
  "unclassified": 0,
498
- "single": 1,
499
- "multi": 2
500
  },
501
  "TensorFlow": {
502
  "unclassified": 0,
@@ -544,37 +487,22 @@
544
  "multi": 0
545
  }
546
  },
547
- "success": 146,
548
- "time_spent": "0:01:56, 0:01:39, ",
549
- "failures": {
550
- "multi": [
551
- {
552
- "line": "tests/models/got_ocr2/test_modeling_got_ocr2.py::GotOcr2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
553
- "trace": "(line 4140) KeyError: 'eager'"
554
- },
555
- {
556
- "line": "tests/models/got_ocr2/test_modeling_got_ocr2.py::GotOcr2ModelTest::test_multi_gpu_data_parallel_forward",
557
- "trace": "(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'"
558
- }
559
- ],
560
- "single": [
561
- {
562
- "line": "tests/models/got_ocr2/test_modeling_got_ocr2.py::GotOcr2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
563
- "trace": "(line 4140) KeyError: 'eager'"
564
- }
565
- ]
566
- },
567
  "job_link": {
568
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410969",
569
- "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410123"
570
  }
571
  },
572
  "models_gpt2": {
573
  "failed": {
574
  "PyTorch": {
575
  "unclassified": 0,
576
- "single": 1,
577
- "multi": 1
578
  },
579
  "TensorFlow": {
580
  "unclassified": 0,
@@ -622,33 +550,22 @@
622
  "multi": 0
623
  }
624
  },
 
625
  "success": 249,
626
- "time_spent": "0:04:53, 0:02:05, ",
627
- "failures": {
628
- "multi": [
629
- {
630
- "line": "tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
631
- "trace": "(line 4140) KeyError: 'eager'"
632
- }
633
- ],
634
- "single": [
635
- {
636
- "line": "tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
637
- "trace": "(line 4140) KeyError: 'eager'"
638
- }
639
- ]
640
- },
641
  "job_link": {
642
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410990",
643
- "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410088"
644
  }
645
  },
646
  "models_internvl": {
647
  "failed": {
648
  "PyTorch": {
649
  "unclassified": 0,
650
- "single": 2,
651
- "multi": 3
652
  },
653
  "TensorFlow": {
654
  "unclassified": 0,
@@ -696,28 +613,18 @@
696
  "multi": 0
697
  }
698
  },
699
- "success": 252,
700
- "time_spent": "0:02:54, 0:02:55, ",
 
 
701
  "failures": {
702
  "multi": [
703
- {
704
- "line": "tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_eager_padding_matches_padding_free_with_position_ids",
705
- "trace": "(line 4140) KeyError: 'eager'"
706
- },
707
- {
708
- "line": "tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_multi_gpu_data_parallel_forward",
709
- "trace": "(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'"
710
- },
711
  {
712
  "line": "tests/models/internvl/test_modeling_internvl.py::InternVLLlamaIntegrationTest::test_llama_small_model_integration_forward",
713
  "trace": "(line 727) AssertionError: False is not true : Actual logits: tensor([ -9.8750, -0.4885, 1.4668, -10.3359, -10.3359], dtype=torch.float16)"
714
  }
715
  ],
716
  "single": [
717
- {
718
- "line": "tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_eager_padding_matches_padding_free_with_position_ids",
719
- "trace": "(line 4140) KeyError: 'eager'"
720
- },
721
  {
722
  "line": "tests/models/internvl/test_modeling_internvl.py::InternVLLlamaIntegrationTest::test_llama_small_model_integration_forward",
723
  "trace": "(line 727) AssertionError: False is not true : Actual logits: tensor([ -9.8750, -0.4885, 1.4668, -10.3359, -10.3359], dtype=torch.float16)"
@@ -725,16 +632,16 @@
725
  ]
726
  },
727
  "job_link": {
728
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527411014",
729
- "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410165"
730
  }
731
  },
732
  "models_llama": {
733
  "failed": {
734
  "PyTorch": {
735
  "unclassified": 0,
736
- "single": 2,
737
- "multi": 3
738
  },
739
  "TensorFlow": {
740
  "unclassified": 0,
@@ -782,28 +689,18 @@
782
  "multi": 0
783
  }
784
  },
785
- "success": 232,
786
- "time_spent": "0:10:51, 0:23:47, ",
 
 
787
  "failures": {
788
  "multi": [
789
- {
790
- "line": "tests/models/llama/test_modeling_llama.py::LlamaModelTest::test_eager_padding_matches_padding_free_with_position_ids",
791
- "trace": "(line 4140) KeyError: 'eager'"
792
- },
793
- {
794
- "line": "tests/models/llama/test_modeling_llama.py::LlamaModelTest::test_multi_gpu_data_parallel_forward",
795
- "trace": "(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'"
796
- },
797
  {
798
  "line": "tests/models/llama/test_modeling_llama.py::LlamaIntegrationTest::test_model_7b_logits_bf16",
799
  "trace": "(line 727) AssertionError: False is not true"
800
  }
801
  ],
802
  "single": [
803
- {
804
- "line": "tests/models/llama/test_modeling_llama.py::LlamaModelTest::test_eager_padding_matches_padding_free_with_position_ids",
805
- "trace": "(line 4140) KeyError: 'eager'"
806
- },
807
  {
808
  "line": "tests/models/llama/test_modeling_llama.py::LlamaIntegrationTest::test_model_7b_logits_bf16",
809
  "trace": "(line 727) AssertionError: False is not true"
@@ -811,16 +708,16 @@
811
  ]
812
  },
813
  "job_link": {
814
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527411041",
815
- "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410199"
816
  }
817
  },
818
  "models_llava": {
819
  "failed": {
820
  "PyTorch": {
821
  "unclassified": 0,
822
- "single": 4,
823
- "multi": 5
824
  },
825
  "TensorFlow": {
826
  "unclassified": 0,
@@ -868,44 +765,18 @@
868
  "multi": 0
869
  }
870
  },
871
- "success": 202,
872
- "time_spent": "0:02:38, 0:02:51, ",
 
 
873
  "failures": {
874
  "multi": [
875
- {
876
- "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids",
877
- "trace": "(line 4140) KeyError: 'eager'"
878
- },
879
- {
880
- "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_flex_attention_with_grads",
881
- "trace": "(line 727) AssertionError: False is not true"
882
- },
883
- {
884
- "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_multi_gpu_data_parallel_forward",
885
- "trace": "(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'"
886
- },
887
- {
888
- "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
889
- "trace": "(line 4197) IndexError: The shape of the mask [3, 23] at index 1 does not match the shape of the indexed tensor [3, 3, 8, 8] at index 1"
890
- },
891
  {
892
  "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationIntegrationTest::test_batched_generation",
893
  "trace": "(line 399) importlib.metadata.PackageNotFoundError: No package metadata was found for bitsandbytes"
894
  }
895
  ],
896
  "single": [
897
- {
898
- "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids",
899
- "trace": "(line 4140) KeyError: 'eager'"
900
- },
901
- {
902
- "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_flex_attention_with_grads",
903
- "trace": "(line 727) AssertionError: False is not true"
904
- },
905
- {
906
- "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
907
- "trace": "(line 4197) IndexError: The shape of the mask [3, 23] at index 1 does not match the shape of the indexed tensor [3, 3, 8, 8] at index 1"
908
- },
909
  {
910
  "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationIntegrationTest::test_batched_generation",
911
  "trace": "(line 399) importlib.metadata.PackageNotFoundError: No package metadata was found for bitsandbytes"
@@ -913,16 +784,16 @@
913
  ]
914
  },
915
  "job_link": {
916
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527411134",
917
- "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410218"
918
  }
919
  },
920
  "models_mistral3": {
921
  "failed": {
922
  "PyTorch": {
923
  "unclassified": 0,
924
- "single": 2,
925
- "multi": 3
926
  },
927
  "TensorFlow": {
928
  "unclassified": 0,
@@ -970,28 +841,18 @@
970
  "multi": 0
971
  }
972
  },
973
- "success": 198,
974
- "time_spent": "0:14:37, 0:05:43, ",
 
 
975
  "failures": {
976
  "single": [
977
- {
978
- "line": "tests/models/mistral3/test_modeling_mistral3.py::Mistral3ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
979
- "trace": "(line 4140) KeyError: 'eager'"
980
- },
981
  {
982
  "line": "tests/models/mistral3/test_modeling_mistral3.py::Mistral3IntegrationTest::test_mistral3_integration_generate",
983
  "trace": "(line 715) AssertionError: 'The [14 chars] two cats lying on a pink surface, which appea[21 chars] bed' != 'The [14 chars] two tabby cats lying on a pink surface, which[23 chars]n or'"
984
  }
985
  ],
986
  "multi": [
987
- {
988
- "line": "tests/models/mistral3/test_modeling_mistral3.py::Mistral3ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
989
- "trace": "(line 4140) KeyError: 'eager'"
990
- },
991
- {
992
- "line": "tests/models/mistral3/test_modeling_mistral3.py::Mistral3ModelTest::test_multi_gpu_data_parallel_forward",
993
- "trace": "(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'"
994
- },
995
  {
996
  "line": "tests/models/mistral3/test_modeling_mistral3.py::Mistral3IntegrationTest::test_mistral3_integration_generate",
997
  "trace": "(line 715) AssertionError: 'The [14 chars] two cats lying on a pink surface, which appea[21 chars] bed' != 'The [14 chars] two tabby cats lying on a pink surface, which[23 chars]n or'"
@@ -999,16 +860,16 @@
999
  ]
1000
  },
1001
  "job_link": {
1002
- "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409417",
1003
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410265"
1004
  }
1005
  },
1006
  "models_modernbert": {
1007
  "failed": {
1008
  "PyTorch": {
1009
  "unclassified": 0,
1010
- "single": 5,
1011
- "multi": 5
1012
  },
1013
  "TensorFlow": {
1014
  "unclassified": 0,
@@ -1056,65 +917,22 @@
1056
  "multi": 0
1057
  }
1058
  },
1059
- "success": 132,
1060
- "time_spent": "0:02:22, 0:01:49, ",
1061
- "failures": {
1062
- "multi": [
1063
- {
1064
- "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_export",
1065
- "trace": "(line 715) AssertionError: Lists differ: ['mechanic', 'lawyer', 'teacher', 'waiter', 'doctor'] != ['lawyer', 'mechanic', 'teacher', 'doctor', 'waiter']"
1066
- },
1067
- {
1068
- "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_masked_lm",
1069
- "trace": "(line 401) AssertionError: Tensor-likes are not close!"
1070
- },
1071
- {
1072
- "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_no_head",
1073
- "trace": "(line 423) AssertionError: Tensor-likes are not close!"
1074
- },
1075
- {
1076
- "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_sequence_classification",
1077
- "trace": "(line 469) AssertionError: Tensor-likes are not close!"
1078
- },
1079
- {
1080
- "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_token_classification",
1081
- "trace": "(line 446) AssertionError: Tensor-likes are not close!"
1082
- }
1083
- ],
1084
- "single": [
1085
- {
1086
- "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_export",
1087
- "trace": "(line 715) AssertionError: Lists differ: ['mechanic', 'lawyer', 'teacher', 'waiter', 'doctor'] != ['lawyer', 'mechanic', 'teacher', 'doctor', 'waiter']"
1088
- },
1089
- {
1090
- "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_masked_lm",
1091
- "trace": "(line 401) AssertionError: Tensor-likes are not close!"
1092
- },
1093
- {
1094
- "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_no_head",
1095
- "trace": "(line 423) AssertionError: Tensor-likes are not close!"
1096
- },
1097
- {
1098
- "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_sequence_classification",
1099
- "trace": "(line 469) AssertionError: Tensor-likes are not close!"
1100
- },
1101
- {
1102
- "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_token_classification",
1103
- "trace": "(line 446) AssertionError: Tensor-likes are not close!"
1104
- }
1105
- ]
1106
- },
1107
  "job_link": {
1108
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410294",
1109
- "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409446"
1110
  }
1111
  },
1112
  "models_qwen2": {
1113
  "failed": {
1114
  "PyTorch": {
1115
  "unclassified": 0,
1116
- "single": 2,
1117
- "multi": 3
1118
  },
1119
  "TensorFlow": {
1120
  "unclassified": 0,
@@ -1162,45 +980,35 @@
1162
  "multi": 0
1163
  }
1164
  },
1165
- "success": 214,
1166
- "time_spent": "0:02:23, 0:02:39, ",
 
 
1167
  "failures": {
1168
  "multi": [
1169
- {
1170
- "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
1171
- "trace": "(line 4140) KeyError: 'eager'"
1172
- },
1173
- {
1174
- "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2ModelTest::test_multi_gpu_data_parallel_forward",
1175
- "trace": "(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'"
1176
- },
1177
  {
1178
  "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2IntegrationTest::test_export_static_cache",
1179
- "trace": "(line 1642) torch._dynamo.exc.TorchRuntimeError: Dynamo failed to run FX node with fake tensors: call_method index_copy_(*(FakeTensor(..., size=(1, 2, 26, 64), dtype=torch.bfloat16), 2, FakeTensor(..., device='cuda:0', size=(1,), dtype=torch.int64), FakeTensor(..., device='cuda:0', size=(1, 2, 1, 64), dtype=torch.bfloat16,"
1180
  }
1181
  ],
1182
  "single": [
1183
- {
1184
- "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
1185
- "trace": "(line 4140) KeyError: 'eager'"
1186
- },
1187
  {
1188
  "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2IntegrationTest::test_export_static_cache",
1189
- "trace": "(line 1642) torch._dynamo.exc.TorchRuntimeError: Dynamo failed to run FX node with fake tensors: call_method index_copy_(*(FakeTensor(..., size=(1, 2, 26, 64), dtype=torch.bfloat16), 2, FakeTensor(..., device='cuda:0', size=(1,), dtype=torch.int64), FakeTensor(..., device='cuda:0', size=(1, 2, 1, 64), dtype=torch.bfloat16,"
1190
  }
1191
  ]
1192
  },
1193
  "job_link": {
1194
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410392",
1195
- "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409572"
1196
  }
1197
  },
1198
  "models_qwen2_5_omni": {
1199
  "failed": {
1200
  "PyTorch": {
1201
  "unclassified": 0,
1202
- "single": 1,
1203
- "multi": 3
1204
  },
1205
  "TensorFlow": {
1206
  "unclassified": 0,
@@ -1248,33 +1056,35 @@
1248
  "multi": 0
1249
  }
1250
  },
 
1251
  "success": 167,
1252
- "time_spent": "0:06:59, 0:02:55, ",
 
1253
  "failures": {
1254
  "multi": [
1255
  {
1256
  "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniThinkerForConditionalGenerationModelTest::test_model_parallelism",
1257
  "trace": "(line 715) AssertionError: Items in the second set but not the first:"
1258
  },
1259
- {
1260
- "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniThinkerForConditionalGenerationModelTest::test_multi_gpu_data_parallel_forward",
1261
- "trace": "(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'"
1262
- },
1263
  {
1264
  "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test_batch",
1265
- "trace": "(line 715) AssertionError: Lists differ: [\"sys[96 chars]ant\\nsystem\\nYou are a helpful assistant.\\nuse[129 chars]er.\"] != [\"sys[96 chars]ant\\nThe sound is glass shattering, and the do[198 chars]er.\"]"
1266
  }
1267
  ],
1268
  "single": [
 
 
 
 
1269
  {
1270
  "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test_batch",
1271
- "trace": "(line 715) AssertionError: Lists differ: [\"sys[96 chars]ant\\nsystem\\nYou are a helpful assistant.\\nuse[129 chars]er.\"] != [\"sys[96 chars]ant\\nThe sound is glass shattering, and the do[198 chars]er.\"]"
1272
  }
1273
  ]
1274
  },
1275
  "job_link": {
1276
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410407",
1277
- "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409568"
1278
  }
1279
  },
1280
  "models_qwen2_5_vl": {
@@ -1282,7 +1092,7 @@
1282
  "PyTorch": {
1283
  "unclassified": 0,
1284
  "single": 1,
1285
- "multi": 2
1286
  },
1287
  "TensorFlow": {
1288
  "unclassified": 0,
@@ -1330,14 +1140,12 @@
1330
  "multi": 0
1331
  }
1332
  },
1333
- "success": 204,
1334
- "time_spent": "0:03:59, 0:03:58, ",
 
 
1335
  "failures": {
1336
  "multi": [
1337
- {
1338
- "line": "tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py::Qwen2_5_VLModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
1339
- "trace": "(line 406) AssertionError: Tensor-likes are not equal!"
1340
- },
1341
  {
1342
  "line": "tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py::Qwen2_5_VLIntegrationTest::test_small_model_integration_test_batch_different_resolutions",
1343
  "trace": "(line 715) AssertionError: Lists differ: ['sys[314 chars]ion\\n addCriterion\\n\\n addCriterion\\n\\n addCri[75 chars]n\\n'] != ['sys[314 chars]ion\\nThe dog in the picture appears to be a La[81 chars] is']"
@@ -1351,16 +1159,16 @@
1351
  ]
1352
  },
1353
  "job_link": {
1354
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410397",
1355
- "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409587"
1356
  }
1357
  },
1358
  "models_smolvlm": {
1359
  "failed": {
1360
  "PyTorch": {
1361
  "unclassified": 0,
1362
- "single": 1,
1363
- "multi": 1
1364
  },
1365
  "TensorFlow": {
1366
  "unclassified": 0,
@@ -1408,33 +1216,22 @@
1408
  "multi": 0
1409
  }
1410
  },
 
1411
  "success": 323,
1412
- "time_spent": "0:02:49, 0:02:35, ",
1413
- "failures": {
1414
- "single": [
1415
- {
1416
- "line": "tests/models/smolvlm/test_modeling_smolvlm.py::SmolVLMForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids",
1417
- "trace": "(line 4140) KeyError: 'eager'"
1418
- }
1419
- ],
1420
- "multi": [
1421
- {
1422
- "line": "tests/models/smolvlm/test_modeling_smolvlm.py::SmolVLMForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids",
1423
- "trace": "(line 4140) KeyError: 'eager'"
1424
- }
1425
- ]
1426
- },
1427
  "job_link": {
1428
- "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409653",
1429
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410495"
1430
  }
1431
  },
1432
  "models_t5": {
1433
  "failed": {
1434
  "PyTorch": {
1435
  "unclassified": 0,
1436
- "single": 3,
1437
- "multi": 4
1438
  },
1439
  "TensorFlow": {
1440
  "unclassified": 0,
@@ -1482,14 +1279,12 @@
1482
  "multi": 0
1483
  }
1484
  },
 
1485
  "success": 254,
1486
- "time_spent": "0:05:05, 0:03:30, ",
 
1487
  "failures": {
1488
  "multi": [
1489
- {
1490
- "line": "tests/models/t5/test_modeling_t5.py::T5ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
1491
- "trace": "(line 4140) KeyError: 'eager'"
1492
- },
1493
  {
1494
  "line": "tests/models/t5/test_modeling_t5.py::T5ModelTest::test_multi_gpu_data_parallel_forward",
1495
  "trace": "(line 131) TypeError: EncoderDecoderCache.__init__() missing 1 required positional argument: 'cross_attention_cache'"
@@ -1504,10 +1299,6 @@
1504
  }
1505
  ],
1506
  "single": [
1507
- {
1508
- "line": "tests/models/t5/test_modeling_t5.py::T5ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
1509
- "trace": "(line 4140) KeyError: 'eager'"
1510
- },
1511
  {
1512
  "line": "tests/models/t5/test_modeling_t5.py::T5ModelIntegrationTests::test_export_t5_summarization",
1513
  "trace": "(line 687) AttributeError: 'dict' object has no attribute 'batch_size'"
@@ -1519,8 +1310,8 @@
1519
  ]
1520
  },
1521
  "job_link": {
1522
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410524",
1523
- "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409705"
1524
  }
1525
  },
1526
  "models_vit": {
@@ -1576,12 +1367,14 @@
1576
  "multi": 0
1577
  }
1578
  },
 
1579
  "success": 135,
1580
- "time_spent": "0:02:19, 0:01:21, ",
 
1581
  "failures": {},
1582
  "job_link": {
1583
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410589",
1584
- "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409755"
1585
  }
1586
  },
1587
  "models_wav2vec2": {
@@ -1637,20 +1430,22 @@
1637
  "multi": 0
1638
  }
1639
  },
1640
- "success": 0,
1641
- "time_spent": "0.96, .03, ",
 
 
1642
  "failures": {},
1643
  "job_link": {
1644
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410594",
1645
- "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409797"
1646
  }
1647
  },
1648
  "models_whisper": {
1649
  "failed": {
1650
  "PyTorch": {
1651
  "unclassified": 0,
1652
- "single": 0,
1653
- "multi": 0
1654
  },
1655
  "TensorFlow": {
1656
  "unclassified": 0,
@@ -1698,12 +1493,347 @@
1698
  "multi": 0
1699
  }
1700
  },
1701
- "success": 0,
1702
- "time_spent": ".19, .20, ",
1703
- "failures": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1704
  "job_link": {
1705
- "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409794",
1706
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410606"
1707
  }
1708
  }
1709
- }
 
52
  "multi": 0
53
  }
54
  },
55
+ "errors": 0,
56
  "success": 80,
57
+ "skipped": 2,
58
+ "time_spent": "0.99, 2.41, ",
59
  "failures": {},
60
  "job_link": {
61
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329937",
62
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330183"
63
  }
64
  },
65
  "models_bert": {
66
  "failed": {
67
  "PyTorch": {
68
  "unclassified": 0,
69
+ "single": 0,
70
+ "multi": 0
71
  },
72
  "TensorFlow": {
73
  "unclassified": 0,
 
115
  "multi": 0
116
  }
117
  },
118
+ "errors": 0,
119
  "success": 239,
120
+ "skipped": 111,
121
+ "time_spent": "8.85, 0:01:00, ",
122
+ "failures": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  "job_link": {
124
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329946",
125
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330199"
126
  }
127
  },
128
  "models_clip": {
 
178
  "multi": 0
179
  }
180
  },
181
+ "errors": 0,
182
  "success": 288,
183
+ "skipped": 590,
184
+ "time_spent": "0:01:55, 0:01:58, ",
185
  "failures": {},
186
  "job_link": {
187
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330217",
188
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329991"
189
  }
190
  },
191
  "models_detr": {
 
241
  "multi": 0
242
  }
243
  },
244
+ "errors": 0,
245
  "success": 77,
246
+ "skipped": 159,
247
+ "time_spent": "4.40, 6.77, ",
248
  "failures": {},
249
  "job_link": {
250
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330035",
251
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330267"
252
  }
253
  },
254
  "models_gemma3": {
255
  "failed": {
256
  "PyTorch": {
257
  "unclassified": 0,
258
+ "single": 6,
259
+ "multi": 7
260
  },
261
  "TensorFlow": {
262
  "unclassified": 0,
 
304
  "multi": 0
305
  }
306
  },
307
+ "errors": 0,
308
+ "success": 349,
309
+ "skipped": 260,
310
+ "time_spent": "0:11:14, 0:11:08, ",
311
  "failures": {
312
  "single": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
  {
314
  "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_1b_text_only",
315
  "trace": "(line 715) AssertionError: Lists differ: ['Wri[57 chars]s, a silent stream,\\nInto the neural net, a wa[42 chars],\\n'] != ['Wri[57 chars]s, a river deep,\\nWith patterns hidden, secret[46 chars]ing']"
 
320
  },
321
  {
322
  "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_batch_crops",
323
+ "trace": "(line 715) AssertionError: Lists differ: [\"user\\nYou are a helpful assistant.\\n\\nHe[678 chars]h a'] != ['user\\nYou are a helpful assistant.\\n\\nHe[658 chars]h a']"
324
  },
325
  {
326
  "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_bf16",
 
336
  }
337
  ],
338
  "multi": [
 
 
 
 
 
 
 
 
 
 
 
 
339
  {
340
  "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_model_parallelism",
341
  "trace": "(line 925) RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:1 and cuda:0!"
342
  },
 
 
 
 
 
 
 
 
 
 
 
 
343
  {
344
  "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_1b_text_only",
345
  "trace": "(line 715) AssertionError: Lists differ: ['Wri[57 chars]s, a silent stream,\\nInto the neural net, a wa[42 chars],\\n'] != ['Wri[57 chars]s, a river deep,\\nWith patterns hidden, secret[46 chars]ing']"
 
350
  },
351
  {
352
  "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_batch_crops",
353
+ "trace": "(line 715) AssertionError: Lists differ: [\"user\\nYou are a helpful assistant.\\n\\nHe[678 chars]h a'] != ['user\\nYou are a helpful assistant.\\n\\nHe[658 chars]h a']"
354
  },
355
  {
356
  "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_bf16",
 
367
  ]
368
  },
369
  "job_link": {
370
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330061",
371
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330319"
372
  }
373
  },
374
  "models_gemma3n": {
 
424
  "multi": 0
425
  }
426
  },
427
+ "errors": 0,
428
+ "success": 197,
429
+ "skipped": 635,
430
+ "time_spent": "0:01:06, 0:01:08, ",
431
  "failures": {},
432
  "job_link": {
433
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330294",
434
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330077"
435
  }
436
  },
437
  "models_got_ocr2": {
438
  "failed": {
439
  "PyTorch": {
440
  "unclassified": 0,
441
+ "single": 0,
442
+ "multi": 0
443
  },
444
  "TensorFlow": {
445
  "unclassified": 0,
 
487
  "multi": 0
488
  }
489
  },
490
+ "errors": 0,
491
+ "success": 147,
492
+ "skipped": 163,
493
+ "time_spent": "0:01:03, 0:01:01, ",
494
+ "failures": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
495
  "job_link": {
496
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330314",
497
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330094"
498
  }
499
  },
500
  "models_gpt2": {
501
  "failed": {
502
  "PyTorch": {
503
  "unclassified": 0,
504
+ "single": 0,
505
+ "multi": 0
506
  },
507
  "TensorFlow": {
508
  "unclassified": 0,
 
550
  "multi": 0
551
  }
552
  },
553
+ "errors": 0,
554
  "success": 249,
555
+ "skipped": 99,
556
+ "time_spent": "0:02:01, 0:01:46, ",
557
+ "failures": {},
 
 
 
 
 
 
 
 
 
 
 
 
558
  "job_link": {
559
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330311",
560
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330113"
561
  }
562
  },
563
  "models_internvl": {
564
  "failed": {
565
  "PyTorch": {
566
  "unclassified": 0,
567
+ "single": 1,
568
+ "multi": 1
569
  },
570
  "TensorFlow": {
571
  "unclassified": 0,
 
613
  "multi": 0
614
  }
615
  },
616
+ "errors": 0,
617
+ "success": 253,
618
+ "skipped": 107,
619
+ "time_spent": "0:01:50, 0:02:00, ",
620
  "failures": {
621
  "multi": [
 
 
 
 
 
 
 
 
622
  {
623
  "line": "tests/models/internvl/test_modeling_internvl.py::InternVLLlamaIntegrationTest::test_llama_small_model_integration_forward",
624
  "trace": "(line 727) AssertionError: False is not true : Actual logits: tensor([ -9.8750, -0.4885, 1.4668, -10.3359, -10.3359], dtype=torch.float16)"
625
  }
626
  ],
627
  "single": [
 
 
 
 
628
  {
629
  "line": "tests/models/internvl/test_modeling_internvl.py::InternVLLlamaIntegrationTest::test_llama_small_model_integration_forward",
630
  "trace": "(line 727) AssertionError: False is not true : Actual logits: tensor([ -9.8750, -0.4885, 1.4668, -10.3359, -10.3359], dtype=torch.float16)"
 
632
  ]
633
  },
634
  "job_link": {
635
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330361",
636
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330105"
637
  }
638
  },
639
  "models_llama": {
640
  "failed": {
641
  "PyTorch": {
642
  "unclassified": 0,
643
+ "single": 1,
644
+ "multi": 1
645
  },
646
  "TensorFlow": {
647
  "unclassified": 0,
 
689
  "multi": 0
690
  }
691
  },
692
+ "errors": 0,
693
+ "success": 235,
694
+ "skipped": 101,
695
+ "time_spent": "0:03:15, 0:02:51, ",
696
  "failures": {
697
  "multi": [
 
 
 
 
 
 
 
 
698
  {
699
  "line": "tests/models/llama/test_modeling_llama.py::LlamaIntegrationTest::test_model_7b_logits_bf16",
700
  "trace": "(line 727) AssertionError: False is not true"
701
  }
702
  ],
703
  "single": [
 
 
 
 
704
  {
705
  "line": "tests/models/llama/test_modeling_llama.py::LlamaIntegrationTest::test_model_7b_logits_bf16",
706
  "trace": "(line 727) AssertionError: False is not true"
 
708
  ]
709
  },
710
  "job_link": {
711
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330531",
712
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330138"
713
  }
714
  },
715
  "models_llava": {
716
  "failed": {
717
  "PyTorch": {
718
  "unclassified": 0,
719
+ "single": 1,
720
+ "multi": 1
721
  },
722
  "TensorFlow": {
723
  "unclassified": 0,
 
765
  "multi": 0
766
  }
767
  },
768
+ "errors": 0,
769
+ "success": 206,
770
+ "skipped": 124,
771
+ "time_spent": "0:03:58, 0:04:34, ",
772
  "failures": {
773
  "multi": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
774
  {
775
  "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationIntegrationTest::test_batched_generation",
776
  "trace": "(line 399) importlib.metadata.PackageNotFoundError: No package metadata was found for bitsandbytes"
777
  }
778
  ],
779
  "single": [
 
 
 
 
 
 
 
 
 
 
 
 
780
  {
781
  "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationIntegrationTest::test_batched_generation",
782
  "trace": "(line 399) importlib.metadata.PackageNotFoundError: No package metadata was found for bitsandbytes"
 
784
  ]
785
  },
786
  "job_link": {
787
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330406",
788
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330161"
789
  }
790
  },
791
  "models_mistral3": {
792
  "failed": {
793
  "PyTorch": {
794
  "unclassified": 0,
795
+ "single": 1,
796
+ "multi": 1
797
  },
798
  "TensorFlow": {
799
  "unclassified": 0,
 
841
  "multi": 0
842
  }
843
  },
844
+ "errors": 0,
845
+ "success": 199,
846
+ "skipped": 105,
847
+ "time_spent": "0:04:34, 0:04:39, ",
848
  "failures": {
849
  "single": [
 
 
 
 
850
  {
851
  "line": "tests/models/mistral3/test_modeling_mistral3.py::Mistral3IntegrationTest::test_mistral3_integration_generate",
852
  "trace": "(line 715) AssertionError: 'The [14 chars] two cats lying on a pink surface, which appea[21 chars] bed' != 'The [14 chars] two tabby cats lying on a pink surface, which[23 chars]n or'"
853
  }
854
  ],
855
  "multi": [
 
 
 
 
 
 
 
 
856
  {
857
  "line": "tests/models/mistral3/test_modeling_mistral3.py::Mistral3IntegrationTest::test_mistral3_integration_generate",
858
  "trace": "(line 715) AssertionError: 'The [14 chars] two cats lying on a pink surface, which appea[21 chars] bed' != 'The [14 chars] two tabby cats lying on a pink surface, which[23 chars]n or'"
 
860
  ]
861
  },
862
  "job_link": {
863
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330418",
864
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329678"
865
  }
866
  },
867
  "models_modernbert": {
868
  "failed": {
869
  "PyTorch": {
870
  "unclassified": 0,
871
+ "single": 0,
872
+ "multi": 0
873
  },
874
  "TensorFlow": {
875
  "unclassified": 0,
 
917
  "multi": 0
918
  }
919
  },
920
+ "errors": 0,
921
+ "success": 142,
922
+ "skipped": 102,
923
+ "time_spent": "0:01:03, 9.02, ",
924
+ "failures": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
925
  "job_link": {
926
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329712",
927
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330429"
928
  }
929
  },
930
  "models_qwen2": {
931
  "failed": {
932
  "PyTorch": {
933
  "unclassified": 0,
934
+ "single": 1,
935
+ "multi": 1
936
  },
937
  "TensorFlow": {
938
  "unclassified": 0,
 
980
  "multi": 0
981
  }
982
  },
983
+ "errors": 0,
984
+ "success": 217,
985
+ "skipped": 113,
986
+ "time_spent": "0:01:08, 0:01:05, ",
987
  "failures": {
988
  "multi": [
 
 
 
 
 
 
 
 
989
  {
990
  "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2IntegrationTest::test_export_static_cache",
991
+ "trace": "(line 715) AssertionError: Lists differ: ['My [35 chars], organic, gluten free, vegan, and vegetarian. I love to use'] != ['My [35 chars], organic, gluten free, vegan, and free from preservatives. I']"
992
  }
993
  ],
994
  "single": [
 
 
 
 
995
  {
996
  "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2IntegrationTest::test_export_static_cache",
997
+ "trace": "(line 715) AssertionError: Lists differ: ['My [35 chars], organic, gluten free, vegan, and vegetarian. I love to use'] != ['My [35 chars], organic, gluten free, vegan, and free from preservatives. I']"
998
  }
999
  ]
1000
  },
1001
  "job_link": {
1002
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329761",
1003
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330508"
1004
  }
1005
  },
1006
  "models_qwen2_5_omni": {
1007
  "failed": {
1008
  "PyTorch": {
1009
  "unclassified": 0,
1010
+ "single": 2,
1011
+ "multi": 2
1012
  },
1013
  "TensorFlow": {
1014
  "unclassified": 0,
 
1056
  "multi": 0
1057
  }
1058
  },
1059
+ "errors": 0,
1060
  "success": 167,
1061
+ "skipped": 141,
1062
+ "time_spent": "0:02:23, 0:01:53, ",
1063
  "failures": {
1064
  "multi": [
1065
  {
1066
  "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniThinkerForConditionalGenerationModelTest::test_model_parallelism",
1067
  "trace": "(line 715) AssertionError: Items in the second set but not the first:"
1068
  },
 
 
 
 
1069
  {
1070
  "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test_batch",
1071
+ "trace": "(line 715) AssertionError: Lists differ: [\"sys[293 chars]s shattering, and the dog appears to be a Labrador Retriever.\"] != [\"sys[293 chars]s shattering, and the dog is a Labrador Retriever.\"]"
1072
  }
1073
  ],
1074
  "single": [
1075
+ {
1076
+ "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test",
1077
+ "trace": "(line 700) requests.exceptions.ConnectionError: HTTPSConnectionPool(host='qianwen-res.oss-accelerate-overseas.aliyuncs.com', port=443): Max retries exceeded with url: /Qwen2-VL/demo_small.jpg (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7cb8c91d02f0>: Failed to establish a new connection: [Errno -2] Name or service not known'))"
1078
+ },
1079
  {
1080
  "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test_batch",
1081
+ "trace": "(line 715) AssertionError: Lists differ: [\"sys[109 chars]d is a glass shattering, and the dog is a Labr[187 chars]er.\"] != [\"sys[109 chars]d is glass shattering, and the dog is a Labrad[185 chars]er.\"]"
1082
  }
1083
  ]
1084
  },
1085
  "job_link": {
1086
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329806",
1087
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330503"
1088
  }
1089
  },
1090
  "models_qwen2_5_vl": {
 
1092
  "PyTorch": {
1093
  "unclassified": 0,
1094
  "single": 1,
1095
+ "multi": 1
1096
  },
1097
  "TensorFlow": {
1098
  "unclassified": 0,
 
1140
  "multi": 0
1141
  }
1142
  },
1143
+ "errors": 0,
1144
+ "success": 205,
1145
+ "skipped": 113,
1146
+ "time_spent": "0:02:32, 0:02:29, ",
1147
  "failures": {
1148
  "multi": [
 
 
 
 
1149
  {
1150
  "line": "tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py::Qwen2_5_VLIntegrationTest::test_small_model_integration_test_batch_different_resolutions",
1151
  "trace": "(line 715) AssertionError: Lists differ: ['sys[314 chars]ion\\n addCriterion\\n\\n addCriterion\\n\\n addCri[75 chars]n\\n'] != ['sys[314 chars]ion\\nThe dog in the picture appears to be a La[81 chars] is']"
 
1159
  ]
1160
  },
1161
  "job_link": {
1162
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329760",
1163
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330498"
1164
  }
1165
  },
1166
  "models_smolvlm": {
1167
  "failed": {
1168
  "PyTorch": {
1169
  "unclassified": 0,
1170
+ "single": 0,
1171
+ "multi": 0
1172
  },
1173
  "TensorFlow": {
1174
  "unclassified": 0,
 
1216
  "multi": 0
1217
  }
1218
  },
1219
+ "errors": 0,
1220
  "success": 323,
1221
+ "skipped": 231,
1222
+ "time_spent": "0:01:08, 0:01:13, ",
1223
+ "failures": {},
 
 
 
 
 
 
 
 
 
 
 
 
1224
  "job_link": {
1225
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330553",
1226
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329835"
1227
  }
1228
  },
1229
  "models_t5": {
1230
  "failed": {
1231
  "PyTorch": {
1232
  "unclassified": 0,
1233
+ "single": 2,
1234
+ "multi": 3
1235
  },
1236
  "TensorFlow": {
1237
  "unclassified": 0,
 
1279
  "multi": 0
1280
  }
1281
  },
1282
+ "errors": 0,
1283
  "success": 254,
1284
+ "skipped": 325,
1285
+ "time_spent": "0:01:50, 0:01:40, ",
1286
  "failures": {
1287
  "multi": [
 
 
 
 
1288
  {
1289
  "line": "tests/models/t5/test_modeling_t5.py::T5ModelTest::test_multi_gpu_data_parallel_forward",
1290
  "trace": "(line 131) TypeError: EncoderDecoderCache.__init__() missing 1 required positional argument: 'cross_attention_cache'"
 
1299
  }
1300
  ],
1301
  "single": [
 
 
 
 
1302
  {
1303
  "line": "tests/models/t5/test_modeling_t5.py::T5ModelIntegrationTests::test_export_t5_summarization",
1304
  "trace": "(line 687) AttributeError: 'dict' object has no attribute 'batch_size'"
 
1310
  ]
1311
  },
1312
  "job_link": {
1313
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329815",
1314
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330559"
1315
  }
1316
  },
1317
  "models_vit": {
 
1367
  "multi": 0
1368
  }
1369
  },
1370
+ "errors": 0,
1371
  "success": 135,
1372
+ "skipped": 93,
1373
+ "time_spent": "9.85, 7.74, ",
1374
  "failures": {},
1375
  "job_link": {
1376
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329875",
1377
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330596"
1378
  }
1379
  },
1380
  "models_wav2vec2": {
 
1430
  "multi": 0
1431
  }
1432
  },
1433
+ "errors": 0,
1434
+ "success": 292,
1435
+ "skipped": 246,
1436
+ "time_spent": "0:01:56, 0:01:54, ",
1437
  "failures": {},
1438
  "job_link": {
1439
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329877",
1440
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330632"
1441
  }
1442
  },
1443
  "models_whisper": {
1444
  "failed": {
1445
  "PyTorch": {
1446
  "unclassified": 0,
1447
+ "single": 40,
1448
+ "multi": 42
1449
  },
1450
  "TensorFlow": {
1451
  "unclassified": 0,
 
1493
  "multi": 0
1494
  }
1495
  },
1496
+ "errors": 0,
1497
+ "success": 537,
1498
+ "skipped": 337,
1499
+ "time_spent": "0:03:23, 0:03:02, ",
1500
+ "failures": {
1501
+ "single": [
1502
+ {
1503
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_distil_token_timestamp_generation",
1504
+ "trace": "(line 2938) Failed: (subprocess)"
1505
+ },
1506
+ {
1507
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_generate_with_forced_decoder_ids",
1508
+ "trace": "(line 2938) Failed: (subprocess)"
1509
+ },
1510
+ {
1511
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_generate_with_prompt_ids",
1512
+ "trace": "(line 2938) Failed: (subprocess)"
1513
+ },
1514
+ {
1515
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_generate_with_prompt_ids_task_language",
1516
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1517
+ },
1518
+ {
1519
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_language_detection",
1520
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1521
+ },
1522
+ {
1523
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_batched_generation",
1524
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1525
+ },
1526
+ {
1527
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_batched_generation_multilingual",
1528
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1529
+ },
1530
+ {
1531
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_generation",
1532
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1533
+ },
1534
+ {
1535
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_generation_multilingual",
1536
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1537
+ },
1538
+ {
1539
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_logits_librispeech",
1540
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1541
+ },
1542
+ {
1543
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_timestamp_generation",
1544
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1545
+ },
1546
+ {
1547
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_small_en_logits_librispeech",
1548
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1549
+ },
1550
+ {
1551
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_small_longform_timestamps_generation",
1552
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1553
+ },
1554
+ {
1555
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_small_token_timestamp_generation",
1556
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1557
+ },
1558
+ {
1559
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_speculative_decoding_distil",
1560
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1561
+ },
1562
+ {
1563
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_speculative_decoding_non_distil",
1564
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1565
+ },
1566
+ {
1567
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_en_batched_generation",
1568
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1569
+ },
1570
+ {
1571
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_en_generation",
1572
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1573
+ },
1574
+ {
1575
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_generation",
1576
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1577
+ },
1578
+ {
1579
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_logits_librispeech",
1580
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1581
+ },
1582
+ {
1583
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_longform_timestamps_generation",
1584
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1585
+ },
1586
+ {
1587
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_specaugment_librispeech",
1588
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1589
+ },
1590
+ {
1591
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_static_generation",
1592
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1593
+ },
1594
+ {
1595
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_static_generation_long_form",
1596
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1597
+ },
1598
+ {
1599
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_timestamp_generation",
1600
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1601
+ },
1602
+ {
1603
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_token_timestamp_batch_generation",
1604
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1605
+ },
1606
+ {
1607
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_token_timestamp_generation",
1608
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1609
+ },
1610
+ {
1611
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_token_timestamp_generation_longform",
1612
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1613
+ },
1614
+ {
1615
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_empty_longform",
1616
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1617
+ },
1618
+ {
1619
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch",
1620
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1621
+ },
1622
+ {
1623
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard",
1624
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1625
+ },
1626
+ {
1627
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard_prev_cond",
1628
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1629
+ },
1630
+ {
1631
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_prev_cond",
1632
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1633
+ },
1634
+ {
1635
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_no_speech_detection",
1636
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1637
+ },
1638
+ {
1639
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_prompt_ids",
1640
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1641
+ },
1642
+ {
1643
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_single_batch",
1644
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1645
+ },
1646
+ {
1647
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_single_batch_beam",
1648
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1649
+ },
1650
+ {
1651
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_single_batch_prev_cond",
1652
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1653
+ },
1654
+ {
1655
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_shortform_multi_batch_hard_prev_cond",
1656
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1657
+ },
1658
+ {
1659
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_shortform_single_batch_prev_cond",
1660
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1661
+ }
1662
+ ],
1663
+ "multi": [
1664
+ {
1665
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelTest::test_multi_gpu_data_parallel_forward",
1666
+ "trace": "(line 2938) Failed: (subprocess)"
1667
+ },
1668
+ {
1669
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_distil_token_timestamp_generation",
1670
+ "trace": "(line 2938) Failed: (subprocess)"
1671
+ },
1672
+ {
1673
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_generate_with_forced_decoder_ids",
1674
+ "trace": "(line 2938) Failed: (subprocess)"
1675
+ },
1676
+ {
1677
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_generate_with_prompt_ids",
1678
+ "trace": "(line 131) TypeError: EncoderDecoderCache.__init__() missing 1 required positional argument: 'cross_attention_cache'"
1679
+ },
1680
+ {
1681
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_generate_with_prompt_ids_task_language",
1682
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1683
+ },
1684
+ {
1685
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_language_detection",
1686
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1687
+ },
1688
+ {
1689
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_batched_generation",
1690
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1691
+ },
1692
+ {
1693
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_batched_generation_multilingual",
1694
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1695
+ },
1696
+ {
1697
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_generation",
1698
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1699
+ },
1700
+ {
1701
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_generation_multilingual",
1702
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1703
+ },
1704
+ {
1705
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_logits_librispeech",
1706
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1707
+ },
1708
+ {
1709
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_timestamp_generation",
1710
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1711
+ },
1712
+ {
1713
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_small_en_logits_librispeech",
1714
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1715
+ },
1716
+ {
1717
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_small_longform_timestamps_generation",
1718
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1719
+ },
1720
+ {
1721
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_small_token_timestamp_generation",
1722
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1723
+ },
1724
+ {
1725
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_speculative_decoding_distil",
1726
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1727
+ },
1728
+ {
1729
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_speculative_decoding_non_distil",
1730
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1731
+ },
1732
+ {
1733
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_en_batched_generation",
1734
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1735
+ },
1736
+ {
1737
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_en_generation",
1738
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1739
+ },
1740
+ {
1741
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_generation",
1742
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1743
+ },
1744
+ {
1745
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_logits_librispeech",
1746
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1747
+ },
1748
+ {
1749
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_longform_timestamps_generation",
1750
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1751
+ },
1752
+ {
1753
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_specaugment_librispeech",
1754
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1755
+ },
1756
+ {
1757
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_static_generation",
1758
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1759
+ },
1760
+ {
1761
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_static_generation_long_form",
1762
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1763
+ },
1764
+ {
1765
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_timestamp_generation",
1766
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1767
+ },
1768
+ {
1769
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_token_timestamp_batch_generation",
1770
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1771
+ },
1772
+ {
1773
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_token_timestamp_generation",
1774
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1775
+ },
1776
+ {
1777
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_token_timestamp_generation_longform",
1778
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1779
+ },
1780
+ {
1781
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_empty_longform",
1782
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1783
+ },
1784
+ {
1785
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_empty_longform_multi_gpu",
1786
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1787
+ },
1788
+ {
1789
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch",
1790
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1791
+ },
1792
+ {
1793
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard",
1794
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1795
+ },
1796
+ {
1797
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard_prev_cond",
1798
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1799
+ },
1800
+ {
1801
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_prev_cond",
1802
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1803
+ },
1804
+ {
1805
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_no_speech_detection",
1806
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1807
+ },
1808
+ {
1809
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_prompt_ids",
1810
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1811
+ },
1812
+ {
1813
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_single_batch",
1814
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1815
+ },
1816
+ {
1817
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_single_batch_beam",
1818
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1819
+ },
1820
+ {
1821
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_single_batch_prev_cond",
1822
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1823
+ },
1824
+ {
1825
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_shortform_multi_batch_hard_prev_cond",
1826
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1827
+ },
1828
+ {
1829
+ "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_shortform_single_batch_prev_cond",
1830
+ "trace": "(line 172) ImportError: To support decoding audio data, please install 'torchcodec'."
1831
+ }
1832
+ ]
1833
+ },
1834
  "job_link": {
1835
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330636",
1836
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329883"
1837
  }
1838
  }
1839
+ }
sample_nvidia.json CHANGED
@@ -52,20 +52,22 @@
52
  "multi": 0
53
  }
54
  },
 
55
  "success": 226,
56
- "time_spent": "4.66, 6.10, ",
 
57
  "failures": {},
58
  "job_link": {
59
- "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561673",
60
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561472"
61
  }
62
  },
63
  "models_bert": {
64
  "failed": {
65
  "PyTorch": {
66
  "unclassified": 0,
67
- "single": 2,
68
- "multi": 2
69
  },
70
  "TensorFlow": {
71
  "unclassified": 0,
@@ -113,33 +115,14 @@
113
  "multi": 0
114
  }
115
  },
 
116
  "success": 527,
117
- "time_spent": "0:01:58, 0:02:00, ",
118
- "failures": {
119
- "single": [
120
- {
121
- "line": "tests/models/bert/test_modeling_bert.py::BertModelTest::test_eager_padding_matches_padding_free_with_position_ids",
122
- "trace": "(line 4140) KeyError: 'eager'"
123
- },
124
- {
125
- "line": "tests/models/bert/test_modeling_bert.py::BertModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
126
- "trace": "(line 4216) AssertionError: Tensor-likes are not equal!"
127
- }
128
- ],
129
- "multi": [
130
- {
131
- "line": "tests/models/bert/test_modeling_bert.py::BertModelTest::test_eager_padding_matches_padding_free_with_position_ids",
132
- "trace": "(line 4140) KeyError: 'eager'"
133
- },
134
- {
135
- "line": "tests/models/bert/test_modeling_bert.py::BertModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
136
- "trace": "(line 4216) AssertionError: Tensor-likes are not equal!"
137
- }
138
- ]
139
- },
140
  "job_link": {
141
- "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561709",
142
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561482"
143
  }
144
  },
145
  "models_clip": {
@@ -195,12 +178,14 @@
195
  "multi": 0
196
  }
197
  },
 
198
  "success": 660,
199
- "time_spent": "0:02:24, 0:02:20, ",
 
200
  "failures": {},
201
  "job_link": {
202
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561994",
203
- "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562125"
204
  }
205
  },
206
  "models_detr": {
@@ -256,20 +241,22 @@
256
  "multi": 0
257
  }
258
  },
 
259
  "success": 177,
260
- "time_spent": "0:01:14, 0:01:19, ",
 
261
  "failures": {},
262
  "job_link": {
263
- "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562517",
264
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562397"
265
  }
266
  },
267
  "models_gemma3": {
268
  "failed": {
269
  "PyTorch": {
270
  "unclassified": 0,
271
- "single": 7,
272
- "multi": 8
273
  },
274
  "TensorFlow": {
275
  "unclassified": 0,
@@ -317,77 +304,21 @@
317
  "multi": 0
318
  }
319
  },
320
- "success": 499,
321
- "time_spent": "0:07:50, 0:07:52, ",
 
 
322
  "failures": {
323
- "single": [
324
- {
325
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
326
- "trace": "(line 4140) KeyError: 'eager'"
327
- },
328
- {
329
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
330
- "trace": "(line 4216) AssertionError: Tensor-likes are not equal!"
331
- },
332
- {
333
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_eager_padding_matches_padding_free_with_position_ids",
334
- "trace": "(line 4140) KeyError: 'eager'"
335
- },
336
- {
337
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_export_text_only_with_hybrid_cache",
338
- "trace": "(line 1642) torch._dynamo.exc.TorchRuntimeError: Dynamo failed to run FX node with fake tensors: call_function <built-in function scaled_dot_product_attention>(*(FakeTensor(..., size=(1, 4, 1, 256), grad_fn=<AddBackward0>), FakeTensor(..., size=(1, 4, 4096, 256), grad_fn=<CloneBackward0>), FakeTensor(..., size=(1, 4, 4096, 256), grad_fn=<CloneBackward0>)), **{'attn_mask': FakeTensor(..., size=(1, 1, 1, 512), dtype=torch.bool), 'dropout_p': 0.0, 'scale': 0.0625, 'is_causal': False}): got RuntimeError('Attempting to broadcast a dimension of length 512 at -1! Mismatching argument at index 1 had torch.Size([1, 1, 1, 512]); but expected shape should be broadcastable to [1, 4, 1, 4096]')"
339
- },
340
- {
341
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_generation_beyond_sliding_window_1_sdpa",
342
- "trace": "(line 81) RuntimeError: The expanded size of the tensor (4826) must match the existing size (4807) at non-singleton dimension 3. Target sizes: [2, 4, 4807, 4826]. Tensor sizes: [2, 1, 4807, 4807]"
343
- },
344
- {
345
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_generation_beyond_sliding_window_2_eager",
346
- "trace": "(line 265) RuntimeError: The size of tensor a (4826) must match the size of tensor b (4807) at non-singleton dimension 3"
347
- },
348
- {
349
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_batch_crops",
350
- "trace": "(line 81) RuntimeError: The expanded size of the tensor (1646) must match the existing size (1617) at non-singleton dimension 3. Target sizes: [2, 8, 1617, 1646]. Tensor sizes: [2, 1, 1617, 1617]"
351
- }
352
- ],
353
  "multi": [
354
- {
355
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
356
- "trace": "(line 4140) KeyError: 'eager'"
357
- },
358
- {
359
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
360
- "trace": "(line 4219) AssertionError: Tensor-likes are not close!"
361
- },
362
- {
363
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_eager_padding_matches_padding_free_with_position_ids",
364
- "trace": "(line 4140) KeyError: 'eager'"
365
- },
366
  {
367
  "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_model_parallelism",
368
  "trace": "(line 925) RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:1 and cuda:0!"
369
- },
370
- {
371
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_export_text_only_with_hybrid_cache",
372
- "trace": "(line 1642) torch._dynamo.exc.TorchRuntimeError: Dynamo failed to run FX node with fake tensors: call_function <built-in function scaled_dot_product_attention>(*(FakeTensor(..., size=(1, 4, 1, 256), grad_fn=<AddBackward0>), FakeTensor(..., size=(1, 4, 4096, 256), grad_fn=<CloneBackward0>), FakeTensor(..., size=(1, 4, 4096, 256), grad_fn=<CloneBackward0>)), **{'attn_mask': FakeTensor(..., size=(1, 1, 1, 512), dtype=torch.bool), 'dropout_p': 0.0, 'scale': 0.0625, 'is_causal': False}): got RuntimeError('Attempting to broadcast a dimension of length 512 at -1! Mismatching argument at index 1 had torch.Size([1, 1, 1, 512]); but expected shape should be broadcastable to [1, 4, 1, 4096]')"
373
- },
374
- {
375
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_generation_beyond_sliding_window_1_sdpa",
376
- "trace": "(line 81) RuntimeError: The expanded size of the tensor (4826) must match the existing size (4807) at non-singleton dimension 3. Target sizes: [2, 4, 4807, 4826]. Tensor sizes: [2, 1, 4807, 4807]"
377
- },
378
- {
379
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_generation_beyond_sliding_window_2_eager",
380
- "trace": "(line 265) RuntimeError: The size of tensor a (4826) must match the size of tensor b (4807) at non-singleton dimension 3"
381
- },
382
- {
383
- "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_batch_crops",
384
- "trace": "(line 81) RuntimeError: The expanded size of the tensor (1646) must match the existing size (1617) at non-singleton dimension 3. Target sizes: [2, 8, 1617, 1646]. Tensor sizes: [2, 1, 1617, 1617]"
385
  }
386
  ]
387
  },
388
  "job_link": {
389
- "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563053",
390
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562857"
391
  }
392
  },
393
  "models_gemma3n": {
@@ -395,7 +326,7 @@
395
  "PyTorch": {
396
  "unclassified": 0,
397
  "single": 1,
398
- "multi": 2
399
  },
400
  "TensorFlow": {
401
  "unclassified": 0,
@@ -443,37 +374,29 @@
443
  "multi": 0
444
  }
445
  },
446
- "success": 286,
447
- "time_spent": "0:02:29, 0:02:32, ",
 
 
448
  "failures": {
449
- "multi": [
450
- {
451
- "line": "tests/models/gemma3n/test_modeling_gemma3n.py::Gemma3nTextModelTest::test_eager_padding_matches_padding_free_with_position_ids",
452
- "trace": "(line 4140) KeyError: 'eager'"
453
- },
454
- {
455
- "line": "tests/models/gemma3n/test_modeling_gemma3n.py::Gemma3nTextModelTest::test_multi_gpu_data_parallel_forward",
456
- "trace": "(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'"
457
- }
458
- ],
459
  "single": [
460
  {
461
- "line": "tests/models/gemma3n/test_modeling_gemma3n.py::Gemma3nTextModelTest::test_eager_padding_matches_padding_free_with_position_ids",
462
- "trace": "(line 4140) KeyError: 'eager'"
463
  }
464
  ]
465
  },
466
  "job_link": {
467
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562955",
468
- "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563061"
469
  }
470
  },
471
  "models_got_ocr2": {
472
  "failed": {
473
  "PyTorch": {
474
  "unclassified": 0,
475
- "single": 1,
476
- "multi": 2
477
  },
478
  "TensorFlow": {
479
  "unclassified": 0,
@@ -521,37 +444,22 @@
521
  "multi": 0
522
  }
523
  },
524
- "success": 254,
525
- "time_spent": "0:02:02, 0:02:15, ",
526
- "failures": {
527
- "multi": [
528
- {
529
- "line": "tests/models/got_ocr2/test_modeling_got_ocr2.py::GotOcr2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
530
- "trace": "(line 4140) KeyError: 'eager'"
531
- },
532
- {
533
- "line": "tests/models/got_ocr2/test_modeling_got_ocr2.py::GotOcr2ModelTest::test_multi_gpu_data_parallel_forward",
534
- "trace": "(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'"
535
- }
536
- ],
537
- "single": [
538
- {
539
- "line": "tests/models/got_ocr2/test_modeling_got_ocr2.py::GotOcr2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
540
- "trace": "(line 4140) KeyError: 'eager'"
541
- }
542
- ]
543
- },
544
  "job_link": {
545
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562995",
546
- "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563212"
547
  }
548
  },
549
  "models_gpt2": {
550
  "failed": {
551
  "PyTorch": {
552
  "unclassified": 0,
553
- "single": 1,
554
- "multi": 1
555
  },
556
  "TensorFlow": {
557
  "unclassified": 0,
@@ -599,33 +507,22 @@
599
  "multi": 0
600
  }
601
  },
 
602
  "success": 487,
603
- "time_spent": "0:02:23, 0:02:38, ",
604
- "failures": {
605
- "multi": [
606
- {
607
- "line": "tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
608
- "trace": "(line 4140) KeyError: 'eager'"
609
- }
610
- ],
611
- "single": [
612
- {
613
- "line": "tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
614
- "trace": "(line 4140) KeyError: 'eager'"
615
- }
616
- ]
617
- },
618
  "job_link": {
619
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563001",
620
- "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563255"
621
  }
622
  },
623
  "models_internvl": {
624
  "failed": {
625
  "PyTorch": {
626
  "unclassified": 0,
627
- "single": 2,
628
- "multi": 3
629
  },
630
  "TensorFlow": {
631
  "unclassified": 0,
@@ -673,28 +570,18 @@
673
  "multi": 0
674
  }
675
  },
676
- "success": 356,
677
- "time_spent": "0:05:48, 0:04:49, ",
 
 
678
  "failures": {
679
  "multi": [
680
- {
681
- "line": "tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_eager_padding_matches_padding_free_with_position_ids",
682
- "trace": "(line 4140) KeyError: 'eager'"
683
- },
684
  {
685
  "line": "tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_flex_attention_with_grads",
686
  "trace": "(line 439) torch._inductor.exc.InductorError: RuntimeError: No valid triton configs. OutOfResources: out of resource: shared memory, Required: 106496, Hardware limit: 101376. Reducing block sizes or `num_stages` may help."
687
- },
688
- {
689
- "line": "tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_multi_gpu_data_parallel_forward",
690
- "trace": "(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'"
691
  }
692
  ],
693
  "single": [
694
- {
695
- "line": "tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_eager_padding_matches_padding_free_with_position_ids",
696
- "trace": "(line 4140) KeyError: 'eager'"
697
- },
698
  {
699
  "line": "tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_flex_attention_with_grads",
700
  "trace": "(line 439) torch._inductor.exc.InductorError: RuntimeError: No valid triton configs. OutOfResources: out of resource: shared memory, Required: 106496, Hardware limit: 101376. Reducing block sizes or `num_stages` may help."
@@ -702,16 +589,16 @@
702
  ]
703
  },
704
  "job_link": {
705
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563553",
706
- "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563712"
707
  }
708
  },
709
  "models_llama": {
710
  "failed": {
711
  "PyTorch": {
712
  "unclassified": 0,
713
- "single": 1,
714
- "multi": 2
715
  },
716
  "TensorFlow": {
717
  "unclassified": 0,
@@ -759,37 +646,22 @@
759
  "multi": 0
760
  }
761
  },
762
- "success": 478,
763
- "time_spent": "0:04:05, 0:03:53, ",
764
- "failures": {
765
- "multi": [
766
- {
767
- "line": "tests/models/llama/test_modeling_llama.py::LlamaModelTest::test_eager_padding_matches_padding_free_with_position_ids",
768
- "trace": "(line 4140) KeyError: 'eager'"
769
- },
770
- {
771
- "line": "tests/models/llama/test_modeling_llama.py::LlamaModelTest::test_multi_gpu_data_parallel_forward",
772
- "trace": "(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'"
773
- }
774
- ],
775
- "single": [
776
- {
777
- "line": "tests/models/llama/test_modeling_llama.py::LlamaModelTest::test_eager_padding_matches_padding_free_with_position_ids",
778
- "trace": "(line 4140) KeyError: 'eager'"
779
- }
780
- ]
781
- },
782
  "job_link": {
783
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563871",
784
- "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526564103"
785
  }
786
  },
787
  "models_llava": {
788
  "failed": {
789
  "PyTorch": {
790
  "unclassified": 0,
791
- "single": 3,
792
- "multi": 4
793
  },
794
  "TensorFlow": {
795
  "unclassified": 0,
@@ -837,53 +709,22 @@
837
  "multi": 0
838
  }
839
  },
840
- "success": 346,
841
- "time_spent": "0:10:11, 0:09:28, ",
842
- "failures": {
843
- "multi": [
844
- {
845
- "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids",
846
- "trace": "(line 4140) KeyError: 'eager'"
847
- },
848
- {
849
- "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_flex_attention_with_grads",
850
- "trace": "(line 687) AssertionError: False is not true"
851
- },
852
- {
853
- "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_multi_gpu_data_parallel_forward",
854
- "trace": "(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'"
855
- },
856
- {
857
- "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
858
- "trace": "(line 4197) IndexError: The shape of the mask [3, 23] at index 1 does not match the shape of the indexed tensor [3, 3, 8, 8] at index 1"
859
- }
860
- ],
861
- "single": [
862
- {
863
- "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids",
864
- "trace": "(line 4140) KeyError: 'eager'"
865
- },
866
- {
867
- "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_flex_attention_with_grads",
868
- "trace": "(line 687) AssertionError: False is not true"
869
- },
870
- {
871
- "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
872
- "trace": "(line 4197) IndexError: The shape of the mask [3, 23] at index 1 does not match the shape of the indexed tensor [3, 3, 8, 8] at index 1"
873
- }
874
- ]
875
- },
876
  "job_link": {
877
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526564002",
878
- "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526564108"
879
  }
880
  },
881
  "models_mistral3": {
882
  "failed": {
883
  "PyTorch": {
884
  "unclassified": 0,
885
- "single": 1,
886
- "multi": 2
887
  },
888
  "TensorFlow": {
889
  "unclassified": 0,
@@ -931,37 +772,22 @@
931
  "multi": 0
932
  }
933
  },
934
- "success": 286,
935
- "time_spent": "0:10:06, 0:09:57, ",
936
- "failures": {
937
- "single": [
938
- {
939
- "line": "tests/models/mistral3/test_modeling_mistral3.py::Mistral3ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
940
- "trace": "(line 4140) KeyError: 'eager'"
941
- }
942
- ],
943
- "multi": [
944
- {
945
- "line": "tests/models/mistral3/test_modeling_mistral3.py::Mistral3ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
946
- "trace": "(line 4140) KeyError: 'eager'"
947
- },
948
- {
949
- "line": "tests/models/mistral3/test_modeling_mistral3.py::Mistral3ModelTest::test_multi_gpu_data_parallel_forward",
950
- "trace": "(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'"
951
- }
952
- ]
953
- },
954
  "job_link": {
955
- "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561480",
956
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561618"
957
  }
958
  },
959
  "models_modernbert": {
960
  "failed": {
961
  "PyTorch": {
962
  "unclassified": 0,
963
- "single": 5,
964
- "multi": 5
965
  },
966
  "TensorFlow": {
967
  "unclassified": 0,
@@ -1009,65 +835,22 @@
1009
  "multi": 0
1010
  }
1011
  },
1012
- "success": 164,
1013
- "time_spent": "0:01:29, 0:01:27, ",
1014
- "failures": {
1015
- "multi": [
1016
- {
1017
- "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_export",
1018
- "trace": "(line 675) AssertionError: Lists differ: ['mechanic', 'lawyer', 'teacher', 'waiter', 'doctor'] != ['lawyer', 'mechanic', 'teacher', 'doctor', 'waiter']"
1019
- },
1020
- {
1021
- "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_masked_lm",
1022
- "trace": "(line 401) AssertionError: Tensor-likes are not close!"
1023
- },
1024
- {
1025
- "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_no_head",
1026
- "trace": "(line 423) AssertionError: Tensor-likes are not close!"
1027
- },
1028
- {
1029
- "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_sequence_classification",
1030
- "trace": "(line 469) AssertionError: Tensor-likes are not close!"
1031
- },
1032
- {
1033
- "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_token_classification",
1034
- "trace": "(line 446) AssertionError: Tensor-likes are not close!"
1035
- }
1036
- ],
1037
- "single": [
1038
- {
1039
- "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_export",
1040
- "trace": "(line 675) AssertionError: Lists differ: ['mechanic', 'lawyer', 'teacher', 'waiter', 'doctor'] != ['lawyer', 'mechanic', 'teacher', 'doctor', 'waiter']"
1041
- },
1042
- {
1043
- "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_masked_lm",
1044
- "trace": "(line 401) AssertionError: Tensor-likes are not close!"
1045
- },
1046
- {
1047
- "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_no_head",
1048
- "trace": "(line 423) AssertionError: Tensor-likes are not close!"
1049
- },
1050
- {
1051
- "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_sequence_classification",
1052
- "trace": "(line 469) AssertionError: Tensor-likes are not close!"
1053
- },
1054
- {
1055
- "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_token_classification",
1056
- "trace": "(line 446) AssertionError: Tensor-likes are not close!"
1057
- }
1058
- ]
1059
- },
1060
  "job_link": {
1061
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561668",
1062
- "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561515"
1063
  }
1064
  },
1065
  "models_qwen2": {
1066
  "failed": {
1067
  "PyTorch": {
1068
  "unclassified": 0,
1069
- "single": 2,
1070
- "multi": 3
1071
  },
1072
  "TensorFlow": {
1073
  "unclassified": 0,
@@ -1115,45 +898,22 @@
1115
  "multi": 0
1116
  }
1117
  },
1118
- "success": 438,
1119
- "time_spent": "0:02:17, 0:02:18, ",
1120
- "failures": {
1121
- "multi": [
1122
- {
1123
- "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
1124
- "trace": "(line 4140) KeyError: 'eager'"
1125
- },
1126
- {
1127
- "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2ModelTest::test_multi_gpu_data_parallel_forward",
1128
- "trace": "(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'"
1129
- },
1130
- {
1131
- "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2IntegrationTest::test_export_static_cache",
1132
- "trace": "(line 1642) torch._dynamo.exc.TorchRuntimeError: Dynamo failed to run FX node with fake tensors: call_method index_copy_(*(FakeTensor(..., size=(1, 2, 26, 64), dtype=torch.bfloat16), 2, FakeTensor(..., device='cuda:0', size=(1,), dtype=torch.int64), FakeTensor(..., device='cuda:0', size=(1, 2, 1, 64), dtype=torch.bfloat16,"
1133
- }
1134
- ],
1135
- "single": [
1136
- {
1137
- "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
1138
- "trace": "(line 4140) KeyError: 'eager'"
1139
- },
1140
- {
1141
- "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2IntegrationTest::test_export_static_cache",
1142
- "trace": "(line 1642) torch._dynamo.exc.TorchRuntimeError: Dynamo failed to run FX node with fake tensors: call_method index_copy_(*(FakeTensor(..., size=(1, 2, 26, 64), dtype=torch.bfloat16), 2, FakeTensor(..., device='cuda:0', size=(1,), dtype=torch.int64), FakeTensor(..., device='cuda:0', size=(1, 2, 1, 64), dtype=torch.bfloat16,"
1143
- }
1144
- ]
1145
- },
1146
  "job_link": {
1147
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562376",
1148
- "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562270"
1149
  }
1150
  },
1151
  "models_qwen2_5_omni": {
1152
  "failed": {
1153
  "PyTorch": {
1154
  "unclassified": 0,
1155
- "single": 1,
1156
- "multi": 5
1157
  },
1158
  "TensorFlow": {
1159
  "unclassified": 0,
@@ -1201,41 +961,21 @@
1201
  "multi": 0
1202
  }
1203
  },
1204
- "success": 277,
1205
- "time_spent": "0:03:01, 0:03:21, ",
 
 
1206
  "failures": {
1207
  "multi": [
1208
  {
1209
  "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniThinkerForConditionalGenerationModelTest::test_model_parallelism",
1210
  "trace": "(line 675) AssertionError: Items in the second set but not the first:"
1211
- },
1212
- {
1213
- "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniThinkerForConditionalGenerationModelTest::test_multi_gpu_data_parallel_forward",
1214
- "trace": "(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'"
1215
- },
1216
- {
1217
- "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test_batch",
1218
- "trace": "(line 675) AssertionError: Lists differ: [\"sys[96 chars]ant\\nsystem\\nYou are a helpful assistant.\\nuse[129 chars]er.\"] != [\"sys[96 chars]ant\\nThe sound is glass shattering, and the do[198 chars]er.\"]"
1219
- },
1220
- {
1221
- "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test_multiturn",
1222
- "trace": "(line 849) torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 1 has a total capacity of 22.18 GiB of which 6.50 MiB is free. Process 51940 has 22.17 GiB memory in use. Of the allocated memory 21.74 GiB is allocated by PyTorch, and 27.83 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)"
1223
- },
1224
- {
1225
- "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test_w_audio",
1226
- "trace": "(line 1000) torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 1 has a total capacity of 22.18 GiB of which 8.50 MiB is free. Process 51940 has 22.17 GiB memory in use. Of the allocated memory 21.75 GiB is allocated by PyTorch, and 17.78 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)"
1227
- }
1228
- ],
1229
- "single": [
1230
- {
1231
- "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test_batch",
1232
- "trace": "(line 675) AssertionError: Lists differ: [\"sys[96 chars]ant\\nsystem\\nYou are a helpful assistant.\\nuse[129 chars]er.\"] != [\"sys[96 chars]ant\\nThe sound is glass shattering, and the do[198 chars]er.\"]"
1233
  }
1234
  ]
1235
  },
1236
  "job_link": {
1237
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562375",
1238
- "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562289"
1239
  }
1240
  },
1241
  "models_qwen2_5_vl": {
@@ -1291,8 +1031,10 @@
1291
  "multi": 0
1292
  }
1293
  },
1294
- "success": 311,
1295
- "time_spent": "0:03:25, 0:03:29, ",
 
 
1296
  "failures": {
1297
  "multi": [
1298
  {
@@ -1308,16 +1050,16 @@
1308
  ]
1309
  },
1310
  "job_link": {
1311
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562382",
1312
- "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562290"
1313
  }
1314
  },
1315
  "models_smolvlm": {
1316
  "failed": {
1317
  "PyTorch": {
1318
  "unclassified": 0,
1319
- "single": 1,
1320
- "multi": 1
1321
  },
1322
  "TensorFlow": {
1323
  "unclassified": 0,
@@ -1365,33 +1107,22 @@
1365
  "multi": 0
1366
  }
1367
  },
1368
- "success": 499,
1369
- "time_spent": "0:01:55, 0:01:47, ",
1370
- "failures": {
1371
- "single": [
1372
- {
1373
- "line": "tests/models/smolvlm/test_modeling_smolvlm.py::SmolVLMForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids",
1374
- "trace": "(line 4140) KeyError: 'eager'"
1375
- }
1376
- ],
1377
- "multi": [
1378
- {
1379
- "line": "tests/models/smolvlm/test_modeling_smolvlm.py::SmolVLMForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids",
1380
- "trace": "(line 4140) KeyError: 'eager'"
1381
- }
1382
- ]
1383
- },
1384
  "job_link": {
1385
- "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562675",
1386
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562798"
1387
  }
1388
  },
1389
  "models_t5": {
1390
  "failed": {
1391
  "PyTorch": {
1392
  "unclassified": 0,
1393
- "single": 2,
1394
- "multi": 3
1395
  },
1396
  "TensorFlow": {
1397
  "unclassified": 0,
@@ -1439,14 +1170,12 @@
1439
  "multi": 0
1440
  }
1441
  },
 
1442
  "success": 592,
1443
- "time_spent": "0:03:34, 0:03:41, ",
 
1444
  "failures": {
1445
  "multi": [
1446
- {
1447
- "line": "tests/models/t5/test_modeling_t5.py::T5ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
1448
- "trace": "(line 4140) KeyError: 'eager'"
1449
- },
1450
  {
1451
  "line": "tests/models/t5/test_modeling_t5.py::T5ModelTest::test_multi_gpu_data_parallel_forward",
1452
  "trace": "(line 131) TypeError: EncoderDecoderCache.__init__() missing 1 required positional argument: 'cross_attention_cache'"
@@ -1457,10 +1186,6 @@
1457
  }
1458
  ],
1459
  "single": [
1460
- {
1461
- "line": "tests/models/t5/test_modeling_t5.py::T5ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
1462
- "trace": "(line 4140) KeyError: 'eager'"
1463
- },
1464
  {
1465
  "line": "tests/models/t5/test_modeling_t5.py::T5ModelIntegrationTests::test_export_t5_summarization",
1466
  "trace": "(line 687) AttributeError: 'dict' object has no attribute 'batch_size'"
@@ -1468,8 +1193,8 @@
1468
  ]
1469
  },
1470
  "job_link": {
1471
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563047",
1472
- "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562939"
1473
  }
1474
  },
1475
  "models_vit": {
@@ -1525,12 +1250,14 @@
1525
  "multi": 0
1526
  }
1527
  },
 
1528
  "success": 217,
1529
- "time_spent": "7.34, 0:01:09, ",
 
1530
  "failures": {},
1531
  "job_link": {
1532
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563537",
1533
- "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563397"
1534
  }
1535
  },
1536
  "models_wav2vec2": {
@@ -1586,8 +1313,10 @@
1586
  "multi": 0
1587
  }
1588
  },
 
1589
  "success": 672,
1590
- "time_spent": "0:04:46, 0:04:23, ",
 
1591
  "failures": {
1592
  "multi": [
1593
  {
@@ -1627,16 +1356,16 @@
1627
  ]
1628
  },
1629
  "job_link": {
1630
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563711",
1631
- "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563582"
1632
  }
1633
  },
1634
  "models_whisper": {
1635
  "failed": {
1636
  "PyTorch": {
1637
  "unclassified": 0,
1638
- "single": 8,
1639
- "multi": 11
1640
  },
1641
  "TensorFlow": {
1642
  "unclassified": 0,
@@ -1684,14 +1413,12 @@
1684
  "multi": 0
1685
  }
1686
  },
1687
- "success": 1010,
1688
- "time_spent": "0:12:29, 0:14:19, ",
 
 
1689
  "failures": {
1690
  "single": [
1691
- {
1692
- "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelTest::test_eager_padding_matches_padding_free_with_position_ids",
1693
- "trace": "(line 4140) KeyError: 'eager'"
1694
- },
1695
  {
1696
  "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_batched_generation_multilingual",
1697
  "trace": "(line 756) RuntimeError: The frame has 0 channels, expected 1. If you are hitting this, it may be because you are using a buggy FFmpeg version. FFmpeg4 is known to fail here in some valid scenarios. Try to upgrade FFmpeg?"
@@ -1708,32 +1435,16 @@
1708
  "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard",
1709
  "trace": "(line 675) AssertionError: Lists differ: [\" Fo[272 chars]ting of classics, Sicilian, nade door variatio[8147 chars]le!'] != [\" Fo[272 chars]ting a classic Sicilian, nade door variation o[8150 chars]le!']"
1710
  },
1711
- {
1712
- "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard_prev_cond",
1713
- "trace": "(line 675) AssertionError: Lists differ: [\" Fo[422 chars]to a fisher shows in lip-nitsky attack that cu[7903 chars]le!\"] != [\" Fo[422 chars]to a Fisher shows in lip-nitsky attack that cu[7918 chars]le.\"]"
1714
- },
1715
  {
1716
  "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_shortform_single_batch_prev_cond",
1717
  "trace": "(line 675) AssertionError: Lists differ: [\" Fo[268 chars]ating, so soft, it would make JD power and her[196 chars]ke.\"] != [\" Fo[268 chars]ating so soft, it would make JD power and her [195 chars]ke.\"]"
1718
- },
1719
- {
1720
- "line": "tests/models/whisper/test_modeling_whisper.py::WhisperStandaloneDecoderModelTest::test_eager_padding_matches_padding_free_with_position_ids",
1721
- "trace": "(line 4140) KeyError: 'eager'"
1722
  }
1723
  ],
1724
  "multi": [
1725
- {
1726
- "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelTest::test_eager_padding_matches_padding_free_with_position_ids",
1727
- "trace": "(line 4140) KeyError: 'eager'"
1728
- },
1729
  {
1730
  "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelTest::test_multi_gpu_data_parallel_forward",
1731
  "trace": "(line 131) TypeError: EncoderDecoderCache.__init__() missing 1 required positional argument: 'cross_attention_cache'"
1732
  },
1733
- {
1734
- "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_generate_with_forced_decoder_ids",
1735
- "trace": "(line 713) requests.exceptions.ReadTimeout: (ReadTimeoutError(\"HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)\"), '(Request ID: 13cb0b08-c261-4ca3-a58f-91a2f3e327ed)')"
1736
- },
1737
  {
1738
  "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_batched_generation_multilingual",
1739
  "trace": "(line 756) RuntimeError: The frame has 0 channels, expected 1. If you are hitting this, it may be because you are using a buggy FFmpeg version. FFmpeg4 is known to fail here in some valid scenarios. Try to upgrade FFmpeg?"
@@ -1750,27 +1461,15 @@
1750
  "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard",
1751
  "trace": "(line 675) AssertionError: Lists differ: [\" Fo[272 chars]ting of classics, Sicilian, nade door variatio[8147 chars]le!'] != [\" Fo[272 chars]ting a classic Sicilian, nade door variation o[8150 chars]le!']"
1752
  },
1753
- {
1754
- "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard_prev_cond",
1755
- "trace": "(line 675) AssertionError: Lists differ: [\" Fo[422 chars]to a fisher shows in lip-nitsky attack that cu[7903 chars]le!\"] != [\" Fo[422 chars]to a Fisher shows in lip-nitsky attack that cu[7918 chars]le.\"]"
1756
- },
1757
  {
1758
  "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_shortform_single_batch_prev_cond",
1759
  "trace": "(line 675) AssertionError: Lists differ: [\" Fo[268 chars]ating, so soft, it would make JD power and her[196 chars]ke.\"] != [\" Fo[268 chars]ating so soft, it would make JD power and her [195 chars]ke.\"]"
1760
- },
1761
- {
1762
- "line": "tests/models/whisper/test_modeling_whisper.py::WhisperStandaloneDecoderModelTest::test_eager_padding_matches_padding_free_with_position_ids",
1763
- "trace": "(line 4140) KeyError: 'eager'"
1764
- },
1765
- {
1766
- "line": "tests/models/whisper/test_modeling_whisper.py::WhisperStandaloneDecoderModelTest::test_multi_gpu_data_parallel_forward",
1767
- "trace": "(line 1305) AttributeError: 'DynamicCache' object has no attribute 'layers'"
1768
  }
1769
  ]
1770
  },
1771
  "job_link": {
1772
- "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563737",
1773
- "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563862"
1774
  }
1775
  }
1776
- }
 
52
  "multi": 0
53
  }
54
  },
55
+ "errors": 0,
56
  "success": 226,
57
+ "skipped": 10,
58
+ "time_spent": "3.79, 5.93, ",
59
  "failures": {},
60
  "job_link": {
61
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215208",
62
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215147"
63
  }
64
  },
65
  "models_bert": {
66
  "failed": {
67
  "PyTorch": {
68
  "unclassified": 0,
69
+ "single": 0,
70
+ "multi": 0
71
  },
72
  "TensorFlow": {
73
  "unclassified": 0,
 
115
  "multi": 0
116
  }
117
  },
118
+ "errors": 0,
119
  "success": 527,
120
+ "skipped": 211,
121
+ "time_spent": "0:01:47, 0:01:50, ",
122
+ "failures": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  "job_link": {
124
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215196",
125
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215175"
126
  }
127
  },
128
  "models_clip": {
 
178
  "multi": 0
179
  }
180
  },
181
+ "errors": 0,
182
  "success": 660,
183
+ "skipped": 934,
184
+ "time_spent": "0:02:15, 0:02:11, ",
185
  "failures": {},
186
  "job_link": {
187
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215674",
188
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215699"
189
  }
190
  },
191
  "models_detr": {
 
241
  "multi": 0
242
  }
243
  },
244
+ "errors": 0,
245
  "success": 177,
246
+ "skipped": 271,
247
+ "time_spent": "0:01:07, 0:01:11, ",
248
  "failures": {},
249
  "job_link": {
250
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216030",
251
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216008"
252
  }
253
  },
254
  "models_gemma3": {
255
  "failed": {
256
  "PyTorch": {
257
  "unclassified": 0,
258
+ "single": 0,
259
+ "multi": 1
260
  },
261
  "TensorFlow": {
262
  "unclassified": 0,
 
304
  "multi": 0
305
  }
306
  },
307
+ "errors": 0,
308
+ "success": 507,
309
+ "skipped": 320,
310
+ "time_spent": "0:09:30, 0:09:28, ",
311
  "failures": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  "multi": [
 
 
 
 
 
 
 
 
 
 
 
 
313
  {
314
  "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_model_parallelism",
315
  "trace": "(line 925) RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:1 and cuda:0!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
  }
317
  ]
318
  },
319
  "job_link": {
320
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216642",
321
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216593"
322
  }
323
  },
324
  "models_gemma3n": {
 
326
  "PyTorch": {
327
  "unclassified": 0,
328
  "single": 1,
329
+ "multi": 0
330
  },
331
  "TensorFlow": {
332
  "unclassified": 0,
 
374
  "multi": 0
375
  }
376
  },
377
+ "errors": 0,
378
+ "success": 288,
379
+ "skipped": 703,
380
+ "time_spent": "0:02:15, 0:02:15, ",
381
  "failures": {
 
 
 
 
 
 
 
 
 
 
382
  "single": [
383
  {
384
+ "line": "tests/models/gemma3n/test_modeling_gemma3n.py::Gemma3nTextModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
385
+ "trace": "(line 4243) AssertionError: Tensor-likes are not close!"
386
  }
387
  ]
388
  },
389
  "job_link": {
390
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216605",
391
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216660"
392
  }
393
  },
394
  "models_got_ocr2": {
395
  "failed": {
396
  "PyTorch": {
397
  "unclassified": 0,
398
+ "single": 0,
399
+ "multi": 0
400
  },
401
  "TensorFlow": {
402
  "unclassified": 0,
 
444
  "multi": 0
445
  }
446
  },
447
+ "errors": 0,
448
+ "success": 257,
449
+ "skipped": 333,
450
+ "time_spent": "0:01:49, 0:01:49, ",
451
+ "failures": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
  "job_link": {
453
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216911",
454
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216742"
455
  }
456
  },
457
  "models_gpt2": {
458
  "failed": {
459
  "PyTorch": {
460
  "unclassified": 0,
461
+ "single": 0,
462
+ "multi": 0
463
  },
464
  "TensorFlow": {
465
  "unclassified": 0,
 
507
  "multi": 0
508
  }
509
  },
510
+ "errors": 0,
511
  "success": 487,
512
+ "skipped": 229,
513
+ "time_spent": "0:02:11, 0:02:01, ",
514
+ "failures": {},
 
 
 
 
 
 
 
 
 
 
 
 
515
  "job_link": {
516
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216717",
517
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216759"
518
  }
519
  },
520
  "models_internvl": {
521
  "failed": {
522
  "PyTorch": {
523
  "unclassified": 0,
524
+ "single": 1,
525
+ "multi": 1
526
  },
527
  "TensorFlow": {
528
  "unclassified": 0,
 
570
  "multi": 0
571
  }
572
  },
573
+ "errors": 0,
574
+ "success": 355,
575
+ "skipped": 241,
576
+ "time_spent": "0:04:33, 0:04:31, ",
577
  "failures": {
578
  "multi": [
 
 
 
 
579
  {
580
  "line": "tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_flex_attention_with_grads",
581
  "trace": "(line 439) torch._inductor.exc.InductorError: RuntimeError: No valid triton configs. OutOfResources: out of resource: shared memory, Required: 106496, Hardware limit: 101376. Reducing block sizes or `num_stages` may help."
 
 
 
 
582
  }
583
  ],
584
  "single": [
 
 
 
 
585
  {
586
  "line": "tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_flex_attention_with_grads",
587
  "trace": "(line 439) torch._inductor.exc.InductorError: RuntimeError: No valid triton configs. OutOfResources: out of resource: shared memory, Required: 106496, Hardware limit: 101376. Reducing block sizes or `num_stages` may help."
 
589
  ]
590
  },
591
  "job_link": {
592
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301217017",
593
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301217056"
594
  }
595
  },
596
  "models_llama": {
597
  "failed": {
598
  "PyTorch": {
599
  "unclassified": 0,
600
+ "single": 0,
601
+ "multi": 0
602
  },
603
  "TensorFlow": {
604
  "unclassified": 0,
 
646
  "multi": 0
647
  }
648
  },
649
+ "errors": 0,
650
+ "success": 481,
651
+ "skipped": 253,
652
+ "time_spent": "0:03:43, 0:03:37, ",
653
+ "failures": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
654
  "job_link": {
655
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301217239",
656
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301217242"
657
  }
658
  },
659
  "models_llava": {
660
  "failed": {
661
  "PyTorch": {
662
  "unclassified": 0,
663
+ "single": 0,
664
+ "multi": 0
665
  },
666
  "TensorFlow": {
667
  "unclassified": 0,
 
709
  "multi": 0
710
  }
711
  },
712
+ "errors": 0,
713
+ "success": 349,
714
+ "skipped": 159,
715
+ "time_spent": "0:08:59, 0:09:11, ",
716
+ "failures": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
717
  "job_link": {
718
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301217250",
719
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301217263"
720
  }
721
  },
722
  "models_mistral3": {
723
  "failed": {
724
  "PyTorch": {
725
  "unclassified": 0,
726
+ "single": 0,
727
+ "multi": 0
728
  },
729
  "TensorFlow": {
730
  "unclassified": 0,
 
772
  "multi": 0
773
  }
774
  },
775
+ "errors": 0,
776
+ "success": 283,
777
+ "skipped": 267,
778
+ "time_spent": "0:09:53, 0:09:40, ",
779
+ "failures": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
780
  "job_link": {
781
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215108",
782
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215124"
783
  }
784
  },
785
  "models_modernbert": {
786
  "failed": {
787
  "PyTorch": {
788
  "unclassified": 0,
789
+ "single": 0,
790
+ "multi": 0
791
  },
792
  "TensorFlow": {
793
  "unclassified": 0,
 
835
  "multi": 0
836
  }
837
  },
838
+ "errors": 0,
839
+ "success": 174,
840
+ "skipped": 218,
841
+ "time_spent": "0:01:27, 0:01:24, ",
842
+ "failures": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
843
  "job_link": {
844
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215158",
845
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215123"
846
  }
847
  },
848
  "models_qwen2": {
849
  "failed": {
850
  "PyTorch": {
851
  "unclassified": 0,
852
+ "single": 0,
853
+ "multi": 0
854
  },
855
  "TensorFlow": {
856
  "unclassified": 0,
 
898
  "multi": 0
899
  }
900
  },
901
+ "errors": 0,
902
+ "success": 443,
903
+ "skipped": 251,
904
+ "time_spent": "0:02:16, 0:02:16, ",
905
+ "failures": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
906
  "job_link": {
907
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215909",
908
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215891"
909
  }
910
  },
911
  "models_qwen2_5_omni": {
912
  "failed": {
913
  "PyTorch": {
914
  "unclassified": 0,
915
+ "single": 0,
916
+ "multi": 1
917
  },
918
  "TensorFlow": {
919
  "unclassified": 0,
 
961
  "multi": 0
962
  }
963
  },
964
+ "errors": 0,
965
+ "success": 278,
966
+ "skipped": 159,
967
+ "time_spent": "0:02:55, 0:03:00, ",
968
  "failures": {
969
  "multi": [
970
  {
971
  "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniThinkerForConditionalGenerationModelTest::test_model_parallelism",
972
  "trace": "(line 675) AssertionError: Items in the second set but not the first:"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
973
  }
974
  ]
975
  },
976
  "job_link": {
977
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215907",
978
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215896"
979
  }
980
  },
981
  "models_qwen2_5_vl": {
 
1031
  "multi": 0
1032
  }
1033
  },
1034
+ "errors": 0,
1035
+ "success": 309,
1036
+ "skipped": 141,
1037
+ "time_spent": "0:03:13, 0:03:14, ",
1038
  "failures": {
1039
  "multi": [
1040
  {
 
1050
  ]
1051
  },
1052
  "job_link": {
1053
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215945",
1054
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215911"
1055
  }
1056
  },
1057
  "models_smolvlm": {
1058
  "failed": {
1059
  "PyTorch": {
1060
  "unclassified": 0,
1061
+ "single": 0,
1062
+ "multi": 0
1063
  },
1064
  "TensorFlow": {
1065
  "unclassified": 0,
 
1107
  "multi": 0
1108
  }
1109
  },
1110
+ "errors": 0,
1111
+ "success": 497,
1112
+ "skipped": 269,
1113
+ "time_spent": "0:01:33, 0:01:36, ",
1114
+ "failures": {},
 
 
 
 
 
 
 
 
 
 
 
1115
  "job_link": {
1116
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216282",
1117
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216321"
1118
  }
1119
  },
1120
  "models_t5": {
1121
  "failed": {
1122
  "PyTorch": {
1123
  "unclassified": 0,
1124
+ "single": 1,
1125
+ "multi": 2
1126
  },
1127
  "TensorFlow": {
1128
  "unclassified": 0,
 
1170
  "multi": 0
1171
  }
1172
  },
1173
+ "errors": 0,
1174
  "success": 592,
1175
+ "skipped": 535,
1176
+ "time_spent": "0:03:13, 0:02:52, ",
1177
  "failures": {
1178
  "multi": [
 
 
 
 
1179
  {
1180
  "line": "tests/models/t5/test_modeling_t5.py::T5ModelTest::test_multi_gpu_data_parallel_forward",
1181
  "trace": "(line 131) TypeError: EncoderDecoderCache.__init__() missing 1 required positional argument: 'cross_attention_cache'"
 
1186
  }
1187
  ],
1188
  "single": [
 
 
 
 
1189
  {
1190
  "line": "tests/models/t5/test_modeling_t5.py::T5ModelIntegrationTests::test_export_t5_summarization",
1191
  "trace": "(line 687) AttributeError: 'dict' object has no attribute 'batch_size'"
 
1193
  ]
1194
  },
1195
  "job_link": {
1196
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216565",
1197
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216464"
1198
  }
1199
  },
1200
  "models_vit": {
 
1250
  "multi": 0
1251
  }
1252
  },
1253
+ "errors": 0,
1254
  "success": 217,
1255
+ "skipped": 199,
1256
+ "time_spent": "2.03, 1.28, ",
1257
  "failures": {},
1258
  "job_link": {
1259
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216869",
1260
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216833"
1261
  }
1262
  },
1263
  "models_wav2vec2": {
 
1313
  "multi": 0
1314
  }
1315
  },
1316
+ "errors": 0,
1317
  "success": 672,
1318
+ "skipped": 438,
1319
+ "time_spent": "0:03:37, 0:03:36, ",
1320
  "failures": {
1321
  "multi": [
1322
  {
 
1356
  ]
1357
  },
1358
  "job_link": {
1359
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216956",
1360
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216929"
1361
  }
1362
  },
1363
  "models_whisper": {
1364
  "failed": {
1365
  "PyTorch": {
1366
  "unclassified": 0,
1367
+ "single": 5,
1368
+ "multi": 6
1369
  },
1370
  "TensorFlow": {
1371
  "unclassified": 0,
 
1413
  "multi": 0
1414
  }
1415
  },
1416
+ "errors": 0,
1417
+ "success": 1014,
1418
+ "skipped": 475,
1419
+ "time_spent": "0:11:09, 0:11:47, ",
1420
  "failures": {
1421
  "single": [
 
 
 
 
1422
  {
1423
  "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_batched_generation_multilingual",
1424
  "trace": "(line 756) RuntimeError: The frame has 0 channels, expected 1. If you are hitting this, it may be because you are using a buggy FFmpeg version. FFmpeg4 is known to fail here in some valid scenarios. Try to upgrade FFmpeg?"
 
1435
  "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard",
1436
  "trace": "(line 675) AssertionError: Lists differ: [\" Fo[272 chars]ting of classics, Sicilian, nade door variatio[8147 chars]le!'] != [\" Fo[272 chars]ting a classic Sicilian, nade door variation o[8150 chars]le!']"
1437
  },
 
 
 
 
1438
  {
1439
  "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_shortform_single_batch_prev_cond",
1440
  "trace": "(line 675) AssertionError: Lists differ: [\" Fo[268 chars]ating, so soft, it would make JD power and her[196 chars]ke.\"] != [\" Fo[268 chars]ating so soft, it would make JD power and her [195 chars]ke.\"]"
 
 
 
 
1441
  }
1442
  ],
1443
  "multi": [
 
 
 
 
1444
  {
1445
  "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelTest::test_multi_gpu_data_parallel_forward",
1446
  "trace": "(line 131) TypeError: EncoderDecoderCache.__init__() missing 1 required positional argument: 'cross_attention_cache'"
1447
  },
 
 
 
 
1448
  {
1449
  "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_batched_generation_multilingual",
1450
  "trace": "(line 756) RuntimeError: The frame has 0 channels, expected 1. If you are hitting this, it may be because you are using a buggy FFmpeg version. FFmpeg4 is known to fail here in some valid scenarios. Try to upgrade FFmpeg?"
 
1461
  "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard",
1462
  "trace": "(line 675) AssertionError: Lists differ: [\" Fo[272 chars]ting of classics, Sicilian, nade door variatio[8147 chars]le!'] != [\" Fo[272 chars]ting a classic Sicilian, nade door variation o[8150 chars]le!']"
1463
  },
 
 
 
 
1464
  {
1465
  "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_shortform_single_batch_prev_cond",
1466
  "trace": "(line 675) AssertionError: Lists differ: [\" Fo[268 chars]ating, so soft, it would make JD power and her[196 chars]ke.\"] != [\" Fo[268 chars]ating so soft, it would make JD power and her [195 chars]ke.\"]"
 
 
 
 
 
 
 
 
1467
  }
1468
  ]
1469
  },
1470
  "job_link": {
1471
+ "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216943",
1472
+ "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301217012"
1473
  }
1474
  }
1475
+ }
styles.css CHANGED
@@ -184,6 +184,17 @@ div[data-testid="column"]:has(.model-container) {
184
  box-shadow: 0 2px 8px rgba(116, 185, 255, 0.2) !important;
185
  }
186
 
 
 
 
 
 
 
 
 
 
 
 
187
  /*
188
  .model-button:active {
189
  background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
 
184
  box-shadow: 0 2px 8px rgba(116, 185, 255, 0.2) !important;
185
  }
186
 
187
+ /* Model buttons with failures - fuzzy red border with inner glow */
188
+ .model-button-failed {
189
+ border: 1px solid #712626 !important;
190
+ box-shadow: inset 0 0 8px rgba(204, 68, 68, 0.4) !important;
191
+ }
192
+
193
+ .model-button-failed:hover {
194
+ border-color: #712626 !important;
195
+ box-shadow: 0 0 12px rgba(255, 107, 107, 0.5) !important;
196
+ }
197
+
198
  /*
199
  .model-button:active {
200
  background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
summary_page.py CHANGED
@@ -35,6 +35,42 @@ COLORS = {
35
  MODEL_NAME_FONT_SIZE = 16
36
  LABEL_FONT_SIZE = 14
37
  LABEL_OFFSET = 1 # Distance of label from bar
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
 
40
  def draw_text_and_bar(
@@ -48,9 +84,14 @@ def draw_text_and_bar(
48
  """Draw a horizontal bar chart for given stats and its label on the left."""
49
  # Text
50
  label_x = column_left_position - LABEL_OFFSET
 
 
 
 
 
51
  ax.text(
52
  label_x, y_bar, label, ha='right', va='center', color='#CCCCCC', fontsize=LABEL_FONT_SIZE,
53
- fontfamily='monospace', fontweight='normal'
54
  )
55
  # Bar
56
  total = sum(stats.values())
@@ -75,6 +116,9 @@ def create_summary_page(df: pd.DataFrame, available_models: list[str]) -> plt.Fi
75
  fontfamily='monospace', weight='normal')
76
  ax.axis('off')
77
  return fig
 
 
 
78
 
79
  # Calculate dimensions for N-column layout
80
  model_count = len(available_models)
@@ -86,6 +130,12 @@ def create_summary_page(df: pd.DataFrame, available_models: list[str]) -> plt.Fi
86
 
87
  fig, ax = plt.subplots(figsize=(FIGURE_WIDTH, figure_height), facecolor='#000000')
88
  ax.set_facecolor('#000000')
 
 
 
 
 
 
89
 
90
  visible_model_count = 0
91
  max_y = 0
 
35
  MODEL_NAME_FONT_SIZE = 16
36
  LABEL_FONT_SIZE = 14
37
  LABEL_OFFSET = 1 # Distance of label from bar
38
+ FAILURE_RATE_FONT_SIZE = 28
39
+
40
+
41
+ def calculate_overall_failure_rates(df: pd.DataFrame, available_models: list[str]) -> tuple[float, float]:
42
+ """Calculate overall failure rates for AMD and NVIDIA across all models."""
43
+ if df.empty or not available_models:
44
+ return 0.0, 0.0
45
+
46
+ total_amd_tests = 0
47
+ total_amd_failures = 0
48
+ total_nvidia_tests = 0
49
+ total_nvidia_failures = 0
50
+
51
+ for model_name in available_models:
52
+ if model_name not in df.index:
53
+ continue
54
+
55
+ row = df.loc[model_name]
56
+ amd_stats, nvidia_stats = extract_model_data(row)[:2]
57
+
58
+ # AMD totals
59
+ amd_total = sum(amd_stats.values())
60
+ if amd_total > 0:
61
+ total_amd_tests += amd_total
62
+ total_amd_failures += amd_stats['failed'] + amd_stats['error']
63
+
64
+ # NVIDIA totals
65
+ nvidia_total = sum(nvidia_stats.values())
66
+ if nvidia_total > 0:
67
+ total_nvidia_tests += nvidia_total
68
+ total_nvidia_failures += nvidia_stats['failed'] + nvidia_stats['error']
69
+
70
+ amd_failure_rate = (total_amd_failures / total_amd_tests * 100) if total_amd_tests > 0 else 0.0
71
+ nvidia_failure_rate = (total_nvidia_failures / total_nvidia_tests * 100) if total_nvidia_tests > 0 else 0.0
72
+
73
+ return amd_failure_rate, nvidia_failure_rate
74
 
75
 
76
  def draw_text_and_bar(
 
84
  """Draw a horizontal bar chart for given stats and its label on the left."""
85
  # Text
86
  label_x = column_left_position - LABEL_OFFSET
87
+ failures_present = any(stats[category] > 0 for category in ['failed', 'error'])
88
+ if failures_present:
89
+ props = dict(boxstyle='round', facecolor=COLORS['failed'], alpha=0.35)
90
+ else:
91
+ props = dict(alpha=0)
92
  ax.text(
93
  label_x, y_bar, label, ha='right', va='center', color='#CCCCCC', fontsize=LABEL_FONT_SIZE,
94
+ fontfamily='monospace', fontweight='normal', bbox=props
95
  )
96
  # Bar
97
  total = sum(stats.values())
 
116
  fontfamily='monospace', weight='normal')
117
  ax.axis('off')
118
  return fig
119
+
120
+ # Calculate overall failure rates
121
+ amd_failure_rate, nvidia_failure_rate = calculate_overall_failure_rates(df, available_models)
122
 
123
  # Calculate dimensions for N-column layout
124
  model_count = len(available_models)
 
130
 
131
  fig, ax = plt.subplots(figsize=(FIGURE_WIDTH, figure_height), facecolor='#000000')
132
  ax.set_facecolor('#000000')
133
+
134
+ # Add overall failure rates at the top as a proper title
135
+ failure_text = f"Overall Failure Rates: AMD {amd_failure_rate:.1f}% | NVIDIA {nvidia_failure_rate:.1f}%"
136
+ ax.text(50, -1.25, failure_text, ha='center', va='top',
137
+ color='#FFFFFF', fontsize=FAILURE_RATE_FONT_SIZE,
138
+ fontfamily='monospace', fontweight='bold')
139
 
140
  visible_model_count = 0
141
  max_y = 0