Spaces:

transformers-community
/

transformers-ci-dashboard

Running

App Files Files Community

ror HF Staff commited on Aug 5

Commit

b52e342

verified ·

1 Parent(s): c1a3d27

stable-dash (#2)

Browse files

- Added skipped count (793d7ea8a06b4867ffa923aab290aea0c3bee9a8)
- Added hardware specs (b38ad0fcb553cc42ea2f032872e00515ccc667f2)
- Added visual hints on summary page (ed6addfe7669661f964dfb6c14a604cb791cddb4)
- Added red text for failing models (746981fce24934b467d6b4f7574db19110186414)
- Added global failure rate (1b5e076ca21569847059dc8417282ea926707fc6)

Files changed (7) hide show

.gitignore +2 -1
app.py +53 -13
data.py +17 -8
sample_amd.json +496 -366
sample_nvidia.json +159 -460
styles.css +11 -0
summary_page.py +51 -1

.gitignore CHANGED Viewed

	@@ -1 +1,2 @@
1	- __pycache__


1	+ __pycache__
2	+ __ignore*

app.py CHANGED Viewed

@@ -23,6 +23,51 @@ Ci_results.load_data()
 Ci_results.schedule_data_reload()
 # Load CSS from external file
 def load_css():
     try:
@@ -42,10 +87,7 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css()) as demo:
             gr.Markdown("# 🤖 TCID", elem_classes=["sidebar-title"])
             # Description with integrated last update time
-            if Ci_results.last_update_time:
-                description_text = f"**Transformer CI Dashboard**\n\n*Result overview by model and hardware (last updated: {Ci_results.last_update_time})*\n"
-            else:
-                description_text = f"**Transformer CI Dashboard**\n\n*Result overview by model and hardware (loading...)*\n"
             description_display = gr.Markdown(description_text, elem_classes=["sidebar-description"])
             # Summary button at the top
@@ -66,11 +108,17 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css()) as demo:
                 model_choices = [model.lower() for model in Ci_results.available_models] if Ci_results.available_models else ["auto", "bert", "clip", "llama"]
                 for model_name in model_choices:
                     btn = gr.Button(
                         model_name,
                         variant="secondary",
                         size="sm",
-                        elem_classes=["model-button"]
                     )
                     model_buttons.append(btn)
@@ -143,14 +191,6 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css()) as demo:
         outputs=[summary_display, detail_view]
     )
-    # Function to get current description text
-    def get_description_text():
-        """Get description text with integrated last update time."""
-        if Ci_results.last_update_time:
-            return f"**Transformer CI Dashboard**\n\n*Result overview by model and hardware (last updated: {Ci_results.last_update_time})*\n"
-        else:
-            return f"**Transformer CI Dashboard**\n\n*Result overview by model and hardware (loading...)*\n"
     # Function to get CI job links
     def get_ci_links():
         """Get CI job links from the most recent data."""

 Ci_results.schedule_data_reload()
+# Function to check if a model has failures
+def model_has_failures(model_name):
+    """Check if a model has any failures (AMD or NVIDIA)."""
+    if Ci_results.df is None or Ci_results.df.empty:
+        return False
+    # Normalize model name to match DataFrame index
+    model_name_lower = model_name.lower()
+    # Check if model exists in DataFrame
+    if model_name_lower not in Ci_results.df.index:
+        return False
+    try:
+        row = Ci_results.df.loc[model_name_lower]
+        # Check for failures in both AMD and NVIDIA
+        amd_multi_failures = row.get('failed_multi_no_amd', 0) or 0
+        amd_single_failures = row.get('failed_single_no_amd', 0) or 0
+        nvidia_multi_failures = row.get('failed_multi_no_nvidia', 0) or 0
+        nvidia_single_failures = row.get('failed_single_no_nvidia', 0) or 0
+        total_failures = amd_multi_failures + amd_single_failures + nvidia_multi_failures + nvidia_single_failures
+        return total_failures > 0
+    except Exception:
+        return False
+# Function to get current description text
+def get_description_text():
+    """Get description text with integrated last update time."""
+    msg = [
+        "Transformer CI Dashboard",
+        "-",
+        "AMD runs on MI325",
+        "NVIDIA runs on A10",
+    ]
+    msg = ["**" + x + "**" for x in msg] + [""]
+    if Ci_results.last_update_time:
+        msg.append(f"*Result overview by model and hardware (last updated: {Ci_results.last_update_time})*")
+    else:
+        msg.append("*Result overview by model and hardware (loading...)*")
+    return "<br>".join(msg)
 # Load CSS from external file
 def load_css():
     try:
             gr.Markdown("# 🤖 TCID", elem_classes=["sidebar-title"])
             # Description with integrated last update time
+            description_text = get_description_text()
             description_display = gr.Markdown(description_text, elem_classes=["sidebar-description"])
             # Summary button at the top
                 model_choices = [model.lower() for model in Ci_results.available_models] if Ci_results.available_models else ["auto", "bert", "clip", "llama"]
                 for model_name in model_choices:
+                    # Check if model has failures to determine styling
+                    has_failures = model_has_failures(model_name)
+                    button_classes = ["model-button"]
+                    if has_failures:
+                        button_classes.append("model-button-failed")
                     btn = gr.Button(
                         model_name,
                         variant="secondary",
                         size="sm",
+                        elem_classes=button_classes
                     )
                     model_buttons.append(btn)
         outputs=[summary_display, detail_view]
     )
     # Function to get CI job links
     def get_ci_links():
         """Get CI job links from the most recent data."""

data.py CHANGED Viewed

@@ -86,6 +86,8 @@ def get_sample_data() -> pd.DataFrame:
         [
             "success_amd",
             "success_nvidia",
             "failed_multi_no_amd",
             "failed_multi_no_nvidia",
             "failed_single_no_amd",
@@ -104,15 +106,22 @@ def get_sample_data() -> pd.DataFrame:
     filtered_joined.index = "sample_" + filtered_joined.index
     return filtered_joined
 def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
     """Extract and process model data from DataFrame row."""
     # Handle missing values and get counts directly from dataframe
-    success_amd = int(row.get('success_amd', 0)) if pd.notna(row.get('success_amd', 0)) else 0
-    success_nvidia = int(row.get('success_nvidia', 0)) if pd.notna(row.get('success_nvidia', 0)) else 0
-    failed_multi_amd = int(row.get('failed_multi_no_amd', 0)) if pd.notna(row.get('failed_multi_no_amd', 0)) else 0
-    failed_multi_nvidia = int(row.get('failed_multi_no_nvidia', 0)) if pd.notna(row.get('failed_multi_no_nvidia', 0)) else 0
-    failed_single_amd = int(row.get('failed_single_no_amd', 0)) if pd.notna(row.get('failed_single_no_amd', 0)) else 0
-    failed_single_nvidia = int(row.get('failed_single_no_nvidia', 0)) if pd.notna(row.get('failed_single_no_nvidia', 0)) else 0
     # Calculate total failures
     total_failed_amd = failed_multi_amd + failed_single_amd
     total_failed_nvidia = failed_multi_nvidia + failed_single_nvidia
@@ -120,13 +129,13 @@ def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int],
     amd_stats = {
         'passed': success_amd,
         'failed': total_failed_amd,
-        'skipped': 0,  # Not available in this dataset
         'error': 0     # Not available in this dataset
     }
     nvidia_stats = {
         'passed': success_nvidia,
         'failed': total_failed_nvidia,
-        'skipped': 0,  # Not available in this dataset
         'error': 0     # Not available in this dataset
     }
     return amd_stats, nvidia_stats, failed_multi_amd, failed_single_amd, failed_multi_nvidia, failed_single_nvidia

         [
             "success_amd",
             "success_nvidia",
+            "skipped_amd",
+            "skipped_nvidia",
             "failed_multi_no_amd",
             "failed_multi_no_nvidia",
             "failed_single_no_amd",
     filtered_joined.index = "sample_" + filtered_joined.index
     return filtered_joined
+def safe_extract(row: pd.DataFrame, key: str) -> int:
+    return int(row.get(key, 0)) if pd.notna(row.get(key, 0)) else 0
 def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
     """Extract and process model data from DataFrame row."""
     # Handle missing values and get counts directly from dataframe
+    success_nvidia = safe_extract(row, "success_nvidia")
+    success_amd = safe_extract(row, "success_amd")
+    skipped_nvidia = safe_extract(row, "skipped_nvidia")
+    skipped_amd = safe_extract(row, "skipped_amd")
+    failed_multi_amd = safe_extract(row, 'failed_multi_no_amd')
+    failed_multi_nvidia = safe_extract(row, 'failed_multi_no_nvidia')
+    failed_single_amd = safe_extract(row, 'failed_single_no_amd')
+    failed_single_nvidia = safe_extract(row, 'failed_single_no_nvidia')
     # Calculate total failures
     total_failed_amd = failed_multi_amd + failed_single_amd
     total_failed_nvidia = failed_multi_nvidia + failed_single_nvidia
     amd_stats = {
         'passed': success_amd,
         'failed': total_failed_amd,
+        'skipped': skipped_amd,
         'error': 0     # Not available in this dataset
     }
     nvidia_stats = {
         'passed': success_nvidia,
         'failed': total_failed_nvidia,
+        'skipped': skipped_nvidia,
         'error': 0     # Not available in this dataset
     }
     return amd_stats, nvidia_stats, failed_multi_amd, failed_single_amd, failed_multi_nvidia, failed_single_nvidia

sample_amd.json CHANGED Viewed

@@ -52,20 +52,22 @@
                 "multi": 0
             }
         },
         "success": 80,
-        "time_spent": "5.47, 1.74, ",
         "failures": {},
         "job_link": {
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409911",
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410689"
         }
     },
     "models_bert": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 2,
-                "multi": 2
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -113,33 +115,14 @@
                 "multi": 0
             }
         },
         "success": 239,
-        "time_spent": "0:02:22, 0:02:16, ",
-        "failures": {
-            "single": [
-                {
-                    "line": "tests/models/bert/test_modeling_bert.py::BertModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/bert/test_modeling_bert.py::BertModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4216)  AssertionError: Tensor-likes are not equal!"
-                }
-            ],
-            "multi": [
-                {
-                    "line": "tests/models/bert/test_modeling_bert.py::BertModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/bert/test_modeling_bert.py::BertModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4216)  AssertionError: Tensor-likes are not equal!"
-                }
-            ]
-        },
         "job_link": {
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409938",
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410716"
         }
     },
     "models_clip": {
@@ -195,12 +178,14 @@
                 "multi": 0
             }
         },
         "success": 288,
-        "time_spent": "0:03:29, 0:03:30, ",
         "failures": {},
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410734",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409965"
         }
     },
     "models_detr": {
@@ -256,20 +241,22 @@
                 "multi": 0
             }
         },
         "success": 77,
-        "time_spent": "0:01:13, 0:01:49, ",
         "failures": {},
         "job_link": {
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410020",
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410868"
         }
     },
     "models_gemma3": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 12,
-                "multi": 13
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -317,34 +304,12 @@
                 "multi": 0
             }
         },
-        "success": 341,
-        "time_spent": "0:07:52, 0:09:43, ",
         "failures": {
             "single": [
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4219)  AssertionError: Tensor-likes are not close!"
-                },
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_export_text_only_with_hybrid_cache",
-                    "trace": "(line 1642)  torch._dynamo.exc.TorchRuntimeError: Dynamo failed to run FX node with fake tensors: call_function <built-in function scaled_dot_product_attention>(*(FakeTensor(..., size=(1, 4, 1, 256), grad_fn=<AddBackward0>), FakeTensor(..., size=(1, 4, 4096, 256), grad_fn=<CloneBackward0>), FakeTensor(..., size=(1, 4, 4096, 256), grad_fn=<CloneBackward0>)), **{'attn_mask': FakeTensor(..., size=(1, 1, 1, 512), dtype=torch.bool), 'dropout_p': 0.0, 'scale': 0.0625, 'is_causal': False}): got RuntimeError('Attempting to broadcast a dimension of length 512 at -1! Mismatching argument at index 1 had torch.Size([1, 1, 1, 512]); but expected shape should be broadcastable to [1, 4, 1, 4096]')"
-                },
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_generation_beyond_sliding_window_1_sdpa",
-                    "trace": "(line 81)  RuntimeError: The expanded size of the tensor (4826) must match the existing size (4807) at non-singleton dimension 3.  Target sizes: [2, 4, 4807, 4826].  Tensor sizes: [2, 1, 4807, 4807]"
-                },
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_generation_beyond_sliding_window_2_eager",
-                    "trace": "(line 265)  RuntimeError: The size of tensor a (4826) must match the size of tensor b (4807) at non-singleton dimension 3"
-                },
                 {
                     "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_1b_text_only",
                     "trace": "(line 715)  AssertionError: Lists differ: ['Wri[57 chars]s, a silent stream,\\nInto the neural net, a wa[42 chars],\\n'] != ['Wri[57 chars]s, a river deep,\\nWith patterns hidden, secret[46 chars]ing']"
@@ -355,7 +320,7 @@
                 },
                 {
                     "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_batch_crops",
-                    "trace": "(line 81)  RuntimeError: The expanded size of the tensor (1646) must match the existing size (1617) at non-singleton dimension 3.  Target sizes: [2, 8, 1617, 1646].  Tensor sizes: [2, 1, 1617, 1617]"
                 },
                 {
                     "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_bf16",
@@ -371,34 +336,10 @@
                 }
             ],
             "multi": [
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4219)  AssertionError: Tensor-likes are not close!"
-                },
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
                 {
                     "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_model_parallelism",
                     "trace": "(line 925)  RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:1 and cuda:0!"
                 },
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_export_text_only_with_hybrid_cache",
-                    "trace": "(line 1642)  torch._dynamo.exc.TorchRuntimeError: Dynamo failed to run FX node with fake tensors: call_function <built-in function scaled_dot_product_attention>(*(FakeTensor(..., size=(1, 4, 1, 256), grad_fn=<AddBackward0>), FakeTensor(..., size=(1, 4, 4096, 256), grad_fn=<CloneBackward0>), FakeTensor(..., size=(1, 4, 4096, 256), grad_fn=<CloneBackward0>)), **{'attn_mask': FakeTensor(..., size=(1, 1, 1, 512), dtype=torch.bool), 'dropout_p': 0.0, 'scale': 0.0625, 'is_causal': False}): got RuntimeError('Attempting to broadcast a dimension of length 512 at -1! Mismatching argument at index 1 had torch.Size([1, 1, 1, 512]); but expected shape should be broadcastable to [1, 4, 1, 4096]')"
-                },
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_generation_beyond_sliding_window_1_sdpa",
-                    "trace": "(line 81)  RuntimeError: The expanded size of the tensor (4826) must match the existing size (4807) at non-singleton dimension 3.  Target sizes: [2, 4, 4807, 4826].  Tensor sizes: [2, 1, 4807, 4807]"
-                },
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_generation_beyond_sliding_window_2_eager",
-                    "trace": "(line 265)  RuntimeError: The size of tensor a (4826) must match the size of tensor b (4807) at non-singleton dimension 3"
-                },
                 {
                     "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_1b_text_only",
                     "trace": "(line 715)  AssertionError: Lists differ: ['Wri[57 chars]s, a silent stream,\\nInto the neural net, a wa[42 chars],\\n'] != ['Wri[57 chars]s, a river deep,\\nWith patterns hidden, secret[46 chars]ing']"
@@ -409,7 +350,7 @@
                 },
                 {
                     "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_batch_crops",
-                    "trace": "(line 81)  RuntimeError: The expanded size of the tensor (1646) must match the existing size (1617) at non-singleton dimension 3.  Target sizes: [2, 8, 1617, 1646].  Tensor sizes: [2, 1, 1617, 1617]"
                 },
                 {
                     "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_bf16",
@@ -426,8 +367,8 @@
             ]
         },
         "job_link": {
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410076",
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410943"
         }
     },
     "models_gemma3n": {
@@ -483,20 +424,22 @@
                 "multi": 0
             }
         },
-        "success": 0,
-        "time_spent": ".56, .97, ",
         "failures": {},
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410944",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410122"
         }
     },
     "models_got_ocr2": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 1,
-                "multi": 2
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -544,37 +487,22 @@
                 "multi": 0
             }
         },
-        "success": 146,
-        "time_spent": "0:01:56, 0:01:39, ",
-        "failures": {
-            "multi": [
-                {
-                    "line": "tests/models/got_ocr2/test_modeling_got_ocr2.py::GotOcr2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/got_ocr2/test_modeling_got_ocr2.py::GotOcr2ModelTest::test_multi_gpu_data_parallel_forward",
-                    "trace": "(line 1305)  AttributeError: 'DynamicCache' object has no attribute 'layers'"
-                }
-            ],
-            "single": [
-                {
-                    "line": "tests/models/got_ocr2/test_modeling_got_ocr2.py::GotOcr2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                }
-            ]
-        },
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410969",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410123"
         }
     },
     "models_gpt2": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 1,
-                "multi": 1
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -622,33 +550,22 @@
                 "multi": 0
             }
         },
         "success": 249,
-        "time_spent": "0:04:53, 0:02:05, ",
-        "failures": {
-            "multi": [
-                {
-                    "line": "tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                }
-            ],
-            "single": [
-                {
-                    "line": "tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                }
-            ]
-        },
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410990",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410088"
         }
     },
     "models_internvl": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 2,
-                "multi": 3
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -696,28 +613,18 @@
                 "multi": 0
             }
         },
-        "success": 252,
-        "time_spent": "0:02:54, 0:02:55, ",
         "failures": {
             "multi": [
-                {
-                    "line": "tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_multi_gpu_data_parallel_forward",
-                    "trace": "(line 1305)  AttributeError: 'DynamicCache' object has no attribute 'layers'"
-                },
                 {
                     "line": "tests/models/internvl/test_modeling_internvl.py::InternVLLlamaIntegrationTest::test_llama_small_model_integration_forward",
                     "trace": "(line 727)  AssertionError: False is not true : Actual logits: tensor([ -9.8750,  -0.4885,   1.4668, -10.3359, -10.3359], dtype=torch.float16)"
                 }
             ],
             "single": [
-                {
-                    "line": "tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
                 {
                     "line": "tests/models/internvl/test_modeling_internvl.py::InternVLLlamaIntegrationTest::test_llama_small_model_integration_forward",
                     "trace": "(line 727)  AssertionError: False is not true : Actual logits: tensor([ -9.8750,  -0.4885,   1.4668, -10.3359, -10.3359], dtype=torch.float16)"
@@ -725,16 +632,16 @@
             ]
         },
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527411014",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410165"
         }
     },
     "models_llama": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 2,
-                "multi": 3
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -782,28 +689,18 @@
                 "multi": 0
             }
         },
-        "success": 232,
-        "time_spent": "0:10:51, 0:23:47, ",
         "failures": {
             "multi": [
-                {
-                    "line": "tests/models/llama/test_modeling_llama.py::LlamaModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/llama/test_modeling_llama.py::LlamaModelTest::test_multi_gpu_data_parallel_forward",
-                    "trace": "(line 1305)  AttributeError: 'DynamicCache' object has no attribute 'layers'"
-                },
                 {
                     "line": "tests/models/llama/test_modeling_llama.py::LlamaIntegrationTest::test_model_7b_logits_bf16",
                     "trace": "(line 727)  AssertionError: False is not true"
                 }
             ],
             "single": [
-                {
-                    "line": "tests/models/llama/test_modeling_llama.py::LlamaModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
                 {
                     "line": "tests/models/llama/test_modeling_llama.py::LlamaIntegrationTest::test_model_7b_logits_bf16",
                     "trace": "(line 727)  AssertionError: False is not true"
@@ -811,16 +708,16 @@
             ]
         },
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527411041",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410199"
         }
     },
     "models_llava": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 4,
-                "multi": 5
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -868,44 +765,18 @@
                 "multi": 0
             }
         },
-        "success": 202,
-        "time_spent": "0:02:38, 0:02:51, ",
         "failures": {
             "multi": [
-                {
-                    "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_flex_attention_with_grads",
-                    "trace": "(line 727)  AssertionError: False is not true"
-                },
-                {
-                    "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_multi_gpu_data_parallel_forward",
-                    "trace": "(line 1305)  AttributeError: 'DynamicCache' object has no attribute 'layers'"
-                },
-                {
-                    "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4197)  IndexError: The shape of the mask [3, 23] at index 1 does not match the shape of the indexed tensor [3, 3, 8, 8] at index 1"
-                },
                 {
                     "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationIntegrationTest::test_batched_generation",
                     "trace": "(line 399)  importlib.metadata.PackageNotFoundError: No package metadata was found for bitsandbytes"
                 }
             ],
             "single": [
-                {
-                    "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_flex_attention_with_grads",
-                    "trace": "(line 727)  AssertionError: False is not true"
-                },
-                {
-                    "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4197)  IndexError: The shape of the mask [3, 23] at index 1 does not match the shape of the indexed tensor [3, 3, 8, 8] at index 1"
-                },
                 {
                     "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationIntegrationTest::test_batched_generation",
                     "trace": "(line 399)  importlib.metadata.PackageNotFoundError: No package metadata was found for bitsandbytes"
@@ -913,16 +784,16 @@
             ]
         },
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527411134",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410218"
         }
     },
     "models_mistral3": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 2,
-                "multi": 3
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -970,28 +841,18 @@
                 "multi": 0
             }
         },
-        "success": 198,
-        "time_spent": "0:14:37, 0:05:43, ",
         "failures": {
             "single": [
-                {
-                    "line": "tests/models/mistral3/test_modeling_mistral3.py::Mistral3ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
                 {
                     "line": "tests/models/mistral3/test_modeling_mistral3.py::Mistral3IntegrationTest::test_mistral3_integration_generate",
                     "trace": "(line 715)  AssertionError: 'The [14 chars] two cats lying on a pink surface, which appea[21 chars] bed' != 'The [14 chars] two tabby cats lying on a pink surface, which[23 chars]n or'"
                 }
             ],
             "multi": [
-                {
-                    "line": "tests/models/mistral3/test_modeling_mistral3.py::Mistral3ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/mistral3/test_modeling_mistral3.py::Mistral3ModelTest::test_multi_gpu_data_parallel_forward",
-                    "trace": "(line 1305)  AttributeError: 'DynamicCache' object has no attribute 'layers'"
-                },
                 {
                     "line": "tests/models/mistral3/test_modeling_mistral3.py::Mistral3IntegrationTest::test_mistral3_integration_generate",
                     "trace": "(line 715)  AssertionError: 'The [14 chars] two cats lying on a pink surface, which appea[21 chars] bed' != 'The [14 chars] two tabby cats lying on a pink surface, which[23 chars]n or'"
@@ -999,16 +860,16 @@
             ]
         },
         "job_link": {
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409417",
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410265"
         }
     },
     "models_modernbert": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 5,
-                "multi": 5
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -1056,65 +917,22 @@
                 "multi": 0
             }
         },
-        "success": 132,
-        "time_spent": "0:02:22, 0:01:49, ",
-        "failures": {
-            "multi": [
-                {
-                    "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_export",
-                    "trace": "(line 715)  AssertionError: Lists differ: ['mechanic', 'lawyer', 'teacher', 'waiter', 'doctor'] != ['lawyer', 'mechanic', 'teacher', 'doctor', 'waiter']"
-                },
-                {
-                    "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_masked_lm",
-                    "trace": "(line 401)  AssertionError: Tensor-likes are not close!"
-                },
-                {
-                    "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_no_head",
-                    "trace": "(line 423)  AssertionError: Tensor-likes are not close!"
-                },
-                {
-                    "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_sequence_classification",
-                    "trace": "(line 469)  AssertionError: Tensor-likes are not close!"
-                },
-                {
-                    "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_token_classification",
-                    "trace": "(line 446)  AssertionError: Tensor-likes are not close!"
-                }
-            ],
-            "single": [
-                {
-                    "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_export",
-                    "trace": "(line 715)  AssertionError: Lists differ: ['mechanic', 'lawyer', 'teacher', 'waiter', 'doctor'] != ['lawyer', 'mechanic', 'teacher', 'doctor', 'waiter']"
-                },
-                {
-                    "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_masked_lm",
-                    "trace": "(line 401)  AssertionError: Tensor-likes are not close!"
-                },
-                {
-                    "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_no_head",
-                    "trace": "(line 423)  AssertionError: Tensor-likes are not close!"
-                },
-                {
-                    "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_sequence_classification",
-                    "trace": "(line 469)  AssertionError: Tensor-likes are not close!"
-                },
-                {
-                    "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_token_classification",
-                    "trace": "(line 446)  AssertionError: Tensor-likes are not close!"
-                }
-            ]
-        },
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410294",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409446"
         }
     },
     "models_qwen2": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 2,
-                "multi": 3
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -1162,45 +980,35 @@
                 "multi": 0
             }
         },
-        "success": 214,
-        "time_spent": "0:02:23, 0:02:39, ",
         "failures": {
             "multi": [
-                {
-                    "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2ModelTest::test_multi_gpu_data_parallel_forward",
-                    "trace": "(line 1305)  AttributeError: 'DynamicCache' object has no attribute 'layers'"
-                },
                 {
                     "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2IntegrationTest::test_export_static_cache",
-                    "trace": "(line 1642)  torch._dynamo.exc.TorchRuntimeError: Dynamo failed to run FX node with fake tensors: call_method index_copy_(*(FakeTensor(..., size=(1, 2, 26, 64), dtype=torch.bfloat16), 2, FakeTensor(..., device='cuda:0', size=(1,), dtype=torch.int64), FakeTensor(..., device='cuda:0', size=(1, 2, 1, 64), dtype=torch.bfloat16,"
                 }
             ],
             "single": [
-                {
-                    "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
                 {
                     "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2IntegrationTest::test_export_static_cache",
-                    "trace": "(line 1642)  torch._dynamo.exc.TorchRuntimeError: Dynamo failed to run FX node with fake tensors: call_method index_copy_(*(FakeTensor(..., size=(1, 2, 26, 64), dtype=torch.bfloat16), 2, FakeTensor(..., device='cuda:0', size=(1,), dtype=torch.int64), FakeTensor(..., device='cuda:0', size=(1, 2, 1, 64), dtype=torch.bfloat16,"
                 }
             ]
         },
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410392",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409572"
         }
     },
     "models_qwen2_5_omni": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 1,
-                "multi": 3
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -1248,33 +1056,35 @@
                 "multi": 0
             }
         },
         "success": 167,
-        "time_spent": "0:06:59, 0:02:55, ",
         "failures": {
             "multi": [
                 {
                     "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniThinkerForConditionalGenerationModelTest::test_model_parallelism",
                     "trace": "(line 715)  AssertionError: Items in the second set but not the first:"
                 },
-                {
-                    "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniThinkerForConditionalGenerationModelTest::test_multi_gpu_data_parallel_forward",
-                    "trace": "(line 1305)  AttributeError: 'DynamicCache' object has no attribute 'layers'"
-                },
                 {
                     "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test_batch",
-                    "trace": "(line 715)  AssertionError: Lists differ: [\"sys[96 chars]ant\\nsystem\\nYou are a helpful assistant.\\nuse[129 chars]er.\"] != [\"sys[96 chars]ant\\nThe sound is glass shattering, and the do[198 chars]er.\"]"
                 }
             ],
             "single": [
                 {
                     "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test_batch",
-                    "trace": "(line 715)  AssertionError: Lists differ: [\"sys[96 chars]ant\\nsystem\\nYou are a helpful assistant.\\nuse[129 chars]er.\"] != [\"sys[96 chars]ant\\nThe sound is glass shattering, and the do[198 chars]er.\"]"
                 }
             ]
         },
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410407",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409568"
         }
     },
     "models_qwen2_5_vl": {
@@ -1282,7 +1092,7 @@
             "PyTorch": {
                 "unclassified": 0,
                 "single": 1,
-                "multi": 2
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -1330,14 +1140,12 @@
                 "multi": 0
             }
         },
-        "success": 204,
-        "time_spent": "0:03:59, 0:03:58, ",
         "failures": {
             "multi": [
-                {
-                    "line": "tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py::Qwen2_5_VLModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 406)  AssertionError: Tensor-likes are not equal!"
-                },
                 {
                     "line": "tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py::Qwen2_5_VLIntegrationTest::test_small_model_integration_test_batch_different_resolutions",
                     "trace": "(line 715)  AssertionError: Lists differ: ['sys[314 chars]ion\\n addCriterion\\n\\n addCriterion\\n\\n addCri[75 chars]n\\n'] != ['sys[314 chars]ion\\nThe dog in the picture appears to be a La[81 chars] is']"
@@ -1351,16 +1159,16 @@
             ]
         },
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410397",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409587"
         }
     },
     "models_smolvlm": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 1,
-                "multi": 1
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -1408,33 +1216,22 @@
                 "multi": 0
             }
         },
         "success": 323,
-        "time_spent": "0:02:49, 0:02:35, ",
-        "failures": {
-            "single": [
-                {
-                    "line": "tests/models/smolvlm/test_modeling_smolvlm.py::SmolVLMForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                }
-            ],
-            "multi": [
-                {
-                    "line": "tests/models/smolvlm/test_modeling_smolvlm.py::SmolVLMForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                }
-            ]
-        },
         "job_link": {
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409653",
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410495"
         }
     },
     "models_t5": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 3,
-                "multi": 4
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -1482,14 +1279,12 @@
                 "multi": 0
             }
         },
         "success": 254,
-        "time_spent": "0:05:05, 0:03:30, ",
         "failures": {
             "multi": [
-                {
-                    "line": "tests/models/t5/test_modeling_t5.py::T5ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
                 {
                     "line": "tests/models/t5/test_modeling_t5.py::T5ModelTest::test_multi_gpu_data_parallel_forward",
                     "trace": "(line 131)  TypeError: EncoderDecoderCache.__init__() missing 1 required positional argument: 'cross_attention_cache'"
@@ -1504,10 +1299,6 @@
                 }
             ],
             "single": [
-                {
-                    "line": "tests/models/t5/test_modeling_t5.py::T5ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
                 {
                     "line": "tests/models/t5/test_modeling_t5.py::T5ModelIntegrationTests::test_export_t5_summarization",
                     "trace": "(line 687)  AttributeError: 'dict' object has no attribute 'batch_size'"
@@ -1519,8 +1310,8 @@
             ]
         },
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410524",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409705"
         }
     },
     "models_vit": {
@@ -1576,12 +1367,14 @@
                 "multi": 0
             }
         },
         "success": 135,
-        "time_spent": "0:02:19, 0:01:21, ",
         "failures": {},
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410589",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409755"
         }
     },
     "models_wav2vec2": {
@@ -1637,20 +1430,22 @@
                 "multi": 0
             }
         },
-        "success": 0,
-        "time_spent": "0.96, .03, ",
         "failures": {},
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410594",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409797"
         }
     },
     "models_whisper": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 0,
-                "multi": 0
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -1698,12 +1493,347 @@
                 "multi": 0
             }
         },
-        "success": 0,
-        "time_spent": ".19, .20, ",
-        "failures": {},
         "job_link": {
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527409794",
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460430974/job/46527410606"
         }
     }
-}

                 "multi": 0
             }
         },
+        "errors": 0,
         "success": 80,
+        "skipped": 2,
+        "time_spent": "0.99, 2.41, ",
         "failures": {},
         "job_link": {
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329937",
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330183"
         }
     },
     "models_bert": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 0,
+                "multi": 0
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
         "success": 239,
+        "skipped": 111,
+        "time_spent": "8.85, 0:01:00, ",
+        "failures": {},
         "job_link": {
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329946",
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330199"
         }
     },
     "models_clip": {
                 "multi": 0
             }
         },
+        "errors": 0,
         "success": 288,
+        "skipped": 590,
+        "time_spent": "0:01:55, 0:01:58, ",
         "failures": {},
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330217",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329991"
         }
     },
     "models_detr": {
                 "multi": 0
             }
         },
+        "errors": 0,
         "success": 77,
+        "skipped": 159,
+        "time_spent": "4.40, 6.77, ",
         "failures": {},
         "job_link": {
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330035",
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330267"
         }
     },
     "models_gemma3": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 6,
+                "multi": 7
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 349,
+        "skipped": 260,
+        "time_spent": "0:11:14, 0:11:08, ",
         "failures": {
             "single": [
                 {
                     "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_1b_text_only",
                     "trace": "(line 715)  AssertionError: Lists differ: ['Wri[57 chars]s, a silent stream,\\nInto the neural net, a wa[42 chars],\\n'] != ['Wri[57 chars]s, a river deep,\\nWith patterns hidden, secret[46 chars]ing']"
                 },
                 {
                     "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_batch_crops",
+                    "trace": "(line 715)  AssertionError: Lists differ: [\"user\\nYou are a helpful assistant.\\n\\nHe[678 chars]h a'] != ['user\\nYou are a helpful assistant.\\n\\nHe[658 chars]h a']"
                 },
                 {
                     "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_bf16",
                 }
             ],
             "multi": [
                 {
                     "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_model_parallelism",
                     "trace": "(line 925)  RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:1 and cuda:0!"
                 },
                 {
                     "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_1b_text_only",
                     "trace": "(line 715)  AssertionError: Lists differ: ['Wri[57 chars]s, a silent stream,\\nInto the neural net, a wa[42 chars],\\n'] != ['Wri[57 chars]s, a river deep,\\nWith patterns hidden, secret[46 chars]ing']"
                 },
                 {
                     "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_batch_crops",
+                    "trace": "(line 715)  AssertionError: Lists differ: [\"user\\nYou are a helpful assistant.\\n\\nHe[678 chars]h a'] != ['user\\nYou are a helpful assistant.\\n\\nHe[658 chars]h a']"
                 },
                 {
                     "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_bf16",
             ]
         },
         "job_link": {
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330061",
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330319"
         }
     },
     "models_gemma3n": {
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 197,
+        "skipped": 635,
+        "time_spent": "0:01:06, 0:01:08, ",
         "failures": {},
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330294",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330077"
         }
     },
     "models_got_ocr2": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 0,
+                "multi": 0
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 147,
+        "skipped": 163,
+        "time_spent": "0:01:03, 0:01:01, ",
+        "failures": {},
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330314",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330094"
         }
     },
     "models_gpt2": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 0,
+                "multi": 0
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
         "success": 249,
+        "skipped": 99,
+        "time_spent": "0:02:01, 0:01:46, ",
+        "failures": {},
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330311",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330113"
         }
     },
     "models_internvl": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 1,
+                "multi": 1
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 253,
+        "skipped": 107,
+        "time_spent": "0:01:50, 0:02:00, ",
         "failures": {
             "multi": [
                 {
                     "line": "tests/models/internvl/test_modeling_internvl.py::InternVLLlamaIntegrationTest::test_llama_small_model_integration_forward",
                     "trace": "(line 727)  AssertionError: False is not true : Actual logits: tensor([ -9.8750,  -0.4885,   1.4668, -10.3359, -10.3359], dtype=torch.float16)"
                 }
             ],
             "single": [
                 {
                     "line": "tests/models/internvl/test_modeling_internvl.py::InternVLLlamaIntegrationTest::test_llama_small_model_integration_forward",
                     "trace": "(line 727)  AssertionError: False is not true : Actual logits: tensor([ -9.8750,  -0.4885,   1.4668, -10.3359, -10.3359], dtype=torch.float16)"
             ]
         },
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330361",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330105"
         }
     },
     "models_llama": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 1,
+                "multi": 1
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 235,
+        "skipped": 101,
+        "time_spent": "0:03:15, 0:02:51, ",
         "failures": {
             "multi": [
                 {
                     "line": "tests/models/llama/test_modeling_llama.py::LlamaIntegrationTest::test_model_7b_logits_bf16",
                     "trace": "(line 727)  AssertionError: False is not true"
                 }
             ],
             "single": [
                 {
                     "line": "tests/models/llama/test_modeling_llama.py::LlamaIntegrationTest::test_model_7b_logits_bf16",
                     "trace": "(line 727)  AssertionError: False is not true"
             ]
         },
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330531",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330138"
         }
     },
     "models_llava": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 1,
+                "multi": 1
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 206,
+        "skipped": 124,
+        "time_spent": "0:03:58, 0:04:34, ",
         "failures": {
             "multi": [
                 {
                     "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationIntegrationTest::test_batched_generation",
                     "trace": "(line 399)  importlib.metadata.PackageNotFoundError: No package metadata was found for bitsandbytes"
                 }
             ],
             "single": [
                 {
                     "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationIntegrationTest::test_batched_generation",
                     "trace": "(line 399)  importlib.metadata.PackageNotFoundError: No package metadata was found for bitsandbytes"
             ]
         },
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330406",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330161"
         }
     },
     "models_mistral3": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 1,
+                "multi": 1
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 199,
+        "skipped": 105,
+        "time_spent": "0:04:34, 0:04:39, ",
         "failures": {
             "single": [
                 {
                     "line": "tests/models/mistral3/test_modeling_mistral3.py::Mistral3IntegrationTest::test_mistral3_integration_generate",
                     "trace": "(line 715)  AssertionError: 'The [14 chars] two cats lying on a pink surface, which appea[21 chars] bed' != 'The [14 chars] two tabby cats lying on a pink surface, which[23 chars]n or'"
                 }
             ],
             "multi": [
                 {
                     "line": "tests/models/mistral3/test_modeling_mistral3.py::Mistral3IntegrationTest::test_mistral3_integration_generate",
                     "trace": "(line 715)  AssertionError: 'The [14 chars] two cats lying on a pink surface, which appea[21 chars] bed' != 'The [14 chars] two tabby cats lying on a pink surface, which[23 chars]n or'"
             ]
         },
         "job_link": {
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330418",
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329678"
         }
     },
     "models_modernbert": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 0,
+                "multi": 0
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 142,
+        "skipped": 102,
+        "time_spent": "0:01:03, 9.02, ",
+        "failures": {},
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329712",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330429"
         }
     },
     "models_qwen2": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 1,
+                "multi": 1
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 217,
+        "skipped": 113,
+        "time_spent": "0:01:08, 0:01:05, ",
         "failures": {
             "multi": [
                 {
                     "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2IntegrationTest::test_export_static_cache",
+                    "trace": "(line 715)  AssertionError: Lists differ: ['My [35 chars], organic, gluten free, vegan, and vegetarian. I love to use'] != ['My [35 chars], organic, gluten free, vegan, and free from preservatives. I']"
                 }
             ],
             "single": [
                 {
                     "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2IntegrationTest::test_export_static_cache",
+                    "trace": "(line 715)  AssertionError: Lists differ: ['My [35 chars], organic, gluten free, vegan, and vegetarian. I love to use'] != ['My [35 chars], organic, gluten free, vegan, and free from preservatives. I']"
                 }
             ]
         },
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329761",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330508"
         }
     },
     "models_qwen2_5_omni": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 2,
+                "multi": 2
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
         "success": 167,
+        "skipped": 141,
+        "time_spent": "0:02:23, 0:01:53, ",
         "failures": {
             "multi": [
                 {
                     "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniThinkerForConditionalGenerationModelTest::test_model_parallelism",
                     "trace": "(line 715)  AssertionError: Items in the second set but not the first:"
                 },
                 {
                     "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test_batch",
+                    "trace": "(line 715)  AssertionError: Lists differ: [\"sys[293 chars]s shattering, and the dog appears to be a Labrador Retriever.\"] != [\"sys[293 chars]s shattering, and the dog is a Labrador Retriever.\"]"
                 }
             ],
             "single": [
+                {
+                    "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test",
+                    "trace": "(line 700)  requests.exceptions.ConnectionError: HTTPSConnectionPool(host='qianwen-res.oss-accelerate-overseas.aliyuncs.com', port=443): Max retries exceeded with url: /Qwen2-VL/demo_small.jpg (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7cb8c91d02f0>: Failed to establish a new connection: [Errno -2] Name or service not known'))"
+                },
                 {
                     "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test_batch",
+                    "trace": "(line 715)  AssertionError: Lists differ: [\"sys[109 chars]d is a glass shattering, and the dog is a Labr[187 chars]er.\"] != [\"sys[109 chars]d is glass shattering, and the dog is a Labrad[185 chars]er.\"]"
                 }
             ]
         },
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329806",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330503"
         }
     },
     "models_qwen2_5_vl": {
             "PyTorch": {
                 "unclassified": 0,
                 "single": 1,
+                "multi": 1
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 205,
+        "skipped": 113,
+        "time_spent": "0:02:32, 0:02:29, ",
         "failures": {
             "multi": [
                 {
                     "line": "tests/models/qwen2_5_vl/test_modeling_qwen2_5_vl.py::Qwen2_5_VLIntegrationTest::test_small_model_integration_test_batch_different_resolutions",
                     "trace": "(line 715)  AssertionError: Lists differ: ['sys[314 chars]ion\\n addCriterion\\n\\n addCriterion\\n\\n addCri[75 chars]n\\n'] != ['sys[314 chars]ion\\nThe dog in the picture appears to be a La[81 chars] is']"
             ]
         },
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329760",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330498"
         }
     },
     "models_smolvlm": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 0,
+                "multi": 0
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
         "success": 323,
+        "skipped": 231,
+        "time_spent": "0:01:08, 0:01:13, ",
+        "failures": {},
         "job_link": {
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330553",
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329835"
         }
     },
     "models_t5": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 2,
+                "multi": 3
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
         "success": 254,
+        "skipped": 325,
+        "time_spent": "0:01:50, 0:01:40, ",
         "failures": {
             "multi": [
                 {
                     "line": "tests/models/t5/test_modeling_t5.py::T5ModelTest::test_multi_gpu_data_parallel_forward",
                     "trace": "(line 131)  TypeError: EncoderDecoderCache.__init__() missing 1 required positional argument: 'cross_attention_cache'"
                 }
             ],
             "single": [
                 {
                     "line": "tests/models/t5/test_modeling_t5.py::T5ModelIntegrationTests::test_export_t5_summarization",
                     "trace": "(line 687)  AttributeError: 'dict' object has no attribute 'batch_size'"
             ]
         },
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329815",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330559"
         }
     },
     "models_vit": {
                 "multi": 0
             }
         },
+        "errors": 0,
         "success": 135,
+        "skipped": 93,
+        "time_spent": "9.85, 7.74, ",
         "failures": {},
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329875",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330596"
         }
     },
     "models_wav2vec2": {
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 292,
+        "skipped": 246,
+        "time_spent": "0:01:56, 0:01:54, ",
         "failures": {},
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329877",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330632"
         }
     },
     "models_whisper": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 40,
+                "multi": 42
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 537,
+        "skipped": 337,
+        "time_spent": "0:03:23, 0:03:02, ",
+        "failures": {
+            "single": [
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_distil_token_timestamp_generation",
+                    "trace": "(line 2938)  Failed: (subprocess)"
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_generate_with_forced_decoder_ids",
+                    "trace": "(line 2938)  Failed: (subprocess)"
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_generate_with_prompt_ids",
+                    "trace": "(line 2938)  Failed: (subprocess)"
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_generate_with_prompt_ids_task_language",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_language_detection",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_batched_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_batched_generation_multilingual",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_generation_multilingual",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_logits_librispeech",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_timestamp_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_small_en_logits_librispeech",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_small_longform_timestamps_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_small_token_timestamp_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_speculative_decoding_distil",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_speculative_decoding_non_distil",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_en_batched_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_en_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_logits_librispeech",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_longform_timestamps_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_specaugment_librispeech",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_static_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_static_generation_long_form",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_timestamp_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_token_timestamp_batch_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_token_timestamp_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_token_timestamp_generation_longform",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_empty_longform",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard_prev_cond",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_prev_cond",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_no_speech_detection",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_prompt_ids",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_single_batch",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_single_batch_beam",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_single_batch_prev_cond",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_shortform_multi_batch_hard_prev_cond",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_shortform_single_batch_prev_cond",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                }
+            ],
+            "multi": [
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelTest::test_multi_gpu_data_parallel_forward",
+                    "trace": "(line 2938)  Failed: (subprocess)"
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_distil_token_timestamp_generation",
+                    "trace": "(line 2938)  Failed: (subprocess)"
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_generate_with_forced_decoder_ids",
+                    "trace": "(line 2938)  Failed: (subprocess)"
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_generate_with_prompt_ids",
+                    "trace": "(line 131)  TypeError: EncoderDecoderCache.__init__() missing 1 required positional argument: 'cross_attention_cache'"
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_generate_with_prompt_ids_task_language",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_language_detection",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_batched_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_batched_generation_multilingual",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_generation_multilingual",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_logits_librispeech",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_timestamp_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_small_en_logits_librispeech",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_small_longform_timestamps_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_small_token_timestamp_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_speculative_decoding_distil",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_speculative_decoding_non_distil",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_en_batched_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_en_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_logits_librispeech",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_longform_timestamps_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_specaugment_librispeech",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_static_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_static_generation_long_form",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_timestamp_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_token_timestamp_batch_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_token_timestamp_generation",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_tiny_token_timestamp_generation_longform",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_empty_longform",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_empty_longform_multi_gpu",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard_prev_cond",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_prev_cond",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_no_speech_detection",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_prompt_ids",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_single_batch",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_single_batch_beam",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_single_batch_prev_cond",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_shortform_multi_batch_hard_prev_cond",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                },
+                {
+                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_shortform_single_batch_prev_cond",
+                    "trace": "(line 172)  ImportError: To support decoding audio data, please install 'torchcodec'."
+                }
+            ]
+        },
         "job_link": {
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301330636",
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712966867/job/47301329883"
         }
     }
+}

sample_nvidia.json CHANGED Viewed

@@ -52,20 +52,22 @@
                 "multi": 0
             }
         },
         "success": 226,
-        "time_spent": "4.66, 6.10, ",
         "failures": {},
         "job_link": {
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561673",
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561472"
         }
     },
     "models_bert": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 2,
-                "multi": 2
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -113,33 +115,14 @@
                 "multi": 0
             }
         },
         "success": 527,
-        "time_spent": "0:01:58, 0:02:00, ",
-        "failures": {
-            "single": [
-                {
-                    "line": "tests/models/bert/test_modeling_bert.py::BertModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/bert/test_modeling_bert.py::BertModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4216)  AssertionError: Tensor-likes are not equal!"
-                }
-            ],
-            "multi": [
-                {
-                    "line": "tests/models/bert/test_modeling_bert.py::BertModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/bert/test_modeling_bert.py::BertModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4216)  AssertionError: Tensor-likes are not equal!"
-                }
-            ]
-        },
         "job_link": {
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561709",
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561482"
         }
     },
     "models_clip": {
@@ -195,12 +178,14 @@
                 "multi": 0
             }
         },
         "success": 660,
-        "time_spent": "0:02:24, 0:02:20, ",
         "failures": {},
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561994",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562125"
         }
     },
     "models_detr": {
@@ -256,20 +241,22 @@
                 "multi": 0
             }
         },
         "success": 177,
-        "time_spent": "0:01:14, 0:01:19, ",
         "failures": {},
         "job_link": {
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562517",
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562397"
         }
     },
     "models_gemma3": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 7,
-                "multi": 8
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -317,77 +304,21 @@
                 "multi": 0
             }
         },
-        "success": 499,
-        "time_spent": "0:07:50, 0:07:52, ",
         "failures": {
-            "single": [
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4216)  AssertionError: Tensor-likes are not equal!"
-                },
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_export_text_only_with_hybrid_cache",
-                    "trace": "(line 1642)  torch._dynamo.exc.TorchRuntimeError: Dynamo failed to run FX node with fake tensors: call_function <built-in function scaled_dot_product_attention>(*(FakeTensor(..., size=(1, 4, 1, 256), grad_fn=<AddBackward0>), FakeTensor(..., size=(1, 4, 4096, 256), grad_fn=<CloneBackward0>), FakeTensor(..., size=(1, 4, 4096, 256), grad_fn=<CloneBackward0>)), **{'attn_mask': FakeTensor(..., size=(1, 1, 1, 512), dtype=torch.bool), 'dropout_p': 0.0, 'scale': 0.0625, 'is_causal': False}): got RuntimeError('Attempting to broadcast a dimension of length 512 at -1! Mismatching argument at index 1 had torch.Size([1, 1, 1, 512]); but expected shape should be broadcastable to [1, 4, 1, 4096]')"
-                },
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_generation_beyond_sliding_window_1_sdpa",
-                    "trace": "(line 81)  RuntimeError: The expanded size of the tensor (4826) must match the existing size (4807) at non-singleton dimension 3.  Target sizes: [2, 4, 4807, 4826].  Tensor sizes: [2, 1, 4807, 4807]"
-                },
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_generation_beyond_sliding_window_2_eager",
-                    "trace": "(line 265)  RuntimeError: The size of tensor a (4826) must match the size of tensor b (4807) at non-singleton dimension 3"
-                },
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_batch_crops",
-                    "trace": "(line 81)  RuntimeError: The expanded size of the tensor (1646) must match the existing size (1617) at non-singleton dimension 3.  Target sizes: [2, 8, 1617, 1646].  Tensor sizes: [2, 1, 1617, 1617]"
-                }
-            ],
             "multi": [
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3ModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4219)  AssertionError: Tensor-likes are not close!"
-                },
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
                 {
                     "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_model_parallelism",
                     "trace": "(line 925)  RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:1 and cuda:0!"
-                },
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_export_text_only_with_hybrid_cache",
-                    "trace": "(line 1642)  torch._dynamo.exc.TorchRuntimeError: Dynamo failed to run FX node with fake tensors: call_function <built-in function scaled_dot_product_attention>(*(FakeTensor(..., size=(1, 4, 1, 256), grad_fn=<AddBackward0>), FakeTensor(..., size=(1, 4, 4096, 256), grad_fn=<CloneBackward0>), FakeTensor(..., size=(1, 4, 4096, 256), grad_fn=<CloneBackward0>)), **{'attn_mask': FakeTensor(..., size=(1, 1, 1, 512), dtype=torch.bool), 'dropout_p': 0.0, 'scale': 0.0625, 'is_causal': False}): got RuntimeError('Attempting to broadcast a dimension of length 512 at -1! Mismatching argument at index 1 had torch.Size([1, 1, 1, 512]); but expected shape should be broadcastable to [1, 4, 1, 4096]')"
-                },
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_generation_beyond_sliding_window_1_sdpa",
-                    "trace": "(line 81)  RuntimeError: The expanded size of the tensor (4826) must match the existing size (4807) at non-singleton dimension 3.  Target sizes: [2, 4, 4807, 4826].  Tensor sizes: [2, 1, 4807, 4807]"
-                },
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_generation_beyond_sliding_window_2_eager",
-                    "trace": "(line 265)  RuntimeError: The size of tensor a (4826) must match the size of tensor b (4807) at non-singleton dimension 3"
-                },
-                {
-                    "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3IntegrationTest::test_model_4b_batch_crops",
-                    "trace": "(line 81)  RuntimeError: The expanded size of the tensor (1646) must match the existing size (1617) at non-singleton dimension 3.  Target sizes: [2, 8, 1617, 1646].  Tensor sizes: [2, 1, 1617, 1617]"
                 }
             ]
         },
         "job_link": {
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563053",
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562857"
         }
     },
     "models_gemma3n": {
@@ -395,7 +326,7 @@
             "PyTorch": {
                 "unclassified": 0,
                 "single": 1,
-                "multi": 2
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -443,37 +374,29 @@
                 "multi": 0
             }
         },
-        "success": 286,
-        "time_spent": "0:02:29, 0:02:32, ",
         "failures": {
-            "multi": [
-                {
-                    "line": "tests/models/gemma3n/test_modeling_gemma3n.py::Gemma3nTextModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/gemma3n/test_modeling_gemma3n.py::Gemma3nTextModelTest::test_multi_gpu_data_parallel_forward",
-                    "trace": "(line 1305)  AttributeError: 'DynamicCache' object has no attribute 'layers'"
-                }
-            ],
             "single": [
                 {
-                    "line": "tests/models/gemma3n/test_modeling_gemma3n.py::Gemma3nTextModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
                 }
             ]
         },
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562955",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563061"
         }
     },
     "models_got_ocr2": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 1,
-                "multi": 2
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -521,37 +444,22 @@
                 "multi": 0
             }
         },
-        "success": 254,
-        "time_spent": "0:02:02, 0:02:15, ",
-        "failures": {
-            "multi": [
-                {
-                    "line": "tests/models/got_ocr2/test_modeling_got_ocr2.py::GotOcr2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/got_ocr2/test_modeling_got_ocr2.py::GotOcr2ModelTest::test_multi_gpu_data_parallel_forward",
-                    "trace": "(line 1305)  AttributeError: 'DynamicCache' object has no attribute 'layers'"
-                }
-            ],
-            "single": [
-                {
-                    "line": "tests/models/got_ocr2/test_modeling_got_ocr2.py::GotOcr2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                }
-            ]
-        },
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562995",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563212"
         }
     },
     "models_gpt2": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 1,
-                "multi": 1
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -599,33 +507,22 @@
                 "multi": 0
             }
         },
         "success": 487,
-        "time_spent": "0:02:23, 0:02:38, ",
-        "failures": {
-            "multi": [
-                {
-                    "line": "tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                }
-            ],
-            "single": [
-                {
-                    "line": "tests/models/gpt2/test_modeling_gpt2.py::GPT2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                }
-            ]
-        },
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563001",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563255"
         }
     },
     "models_internvl": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 2,
-                "multi": 3
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -673,28 +570,18 @@
                 "multi": 0
             }
         },
-        "success": 356,
-        "time_spent": "0:05:48, 0:04:49, ",
         "failures": {
             "multi": [
-                {
-                    "line": "tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
                 {
                     "line": "tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_flex_attention_with_grads",
                     "trace": "(line 439)  torch._inductor.exc.InductorError: RuntimeError: No valid triton configs. OutOfResources: out of resource: shared memory, Required: 106496, Hardware limit: 101376. Reducing block sizes or `num_stages` may help."
-                },
-                {
-                    "line": "tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_multi_gpu_data_parallel_forward",
-                    "trace": "(line 1305)  AttributeError: 'DynamicCache' object has no attribute 'layers'"
                 }
             ],
             "single": [
-                {
-                    "line": "tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
                 {
                     "line": "tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_flex_attention_with_grads",
                     "trace": "(line 439)  torch._inductor.exc.InductorError: RuntimeError: No valid triton configs. OutOfResources: out of resource: shared memory, Required: 106496, Hardware limit: 101376. Reducing block sizes or `num_stages` may help."
@@ -702,16 +589,16 @@
             ]
         },
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563553",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563712"
         }
     },
     "models_llama": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 1,
-                "multi": 2
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -759,37 +646,22 @@
                 "multi": 0
             }
         },
-        "success": 478,
-        "time_spent": "0:04:05, 0:03:53, ",
-        "failures": {
-            "multi": [
-                {
-                    "line": "tests/models/llama/test_modeling_llama.py::LlamaModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/llama/test_modeling_llama.py::LlamaModelTest::test_multi_gpu_data_parallel_forward",
-                    "trace": "(line 1305)  AttributeError: 'DynamicCache' object has no attribute 'layers'"
-                }
-            ],
-            "single": [
-                {
-                    "line": "tests/models/llama/test_modeling_llama.py::LlamaModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                }
-            ]
-        },
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563871",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526564103"
         }
     },
     "models_llava": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 3,
-                "multi": 4
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -837,53 +709,22 @@
                 "multi": 0
             }
         },
-        "success": 346,
-        "time_spent": "0:10:11, 0:09:28, ",
-        "failures": {
-            "multi": [
-                {
-                    "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_flex_attention_with_grads",
-                    "trace": "(line 687)  AssertionError: False is not true"
-                },
-                {
-                    "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_multi_gpu_data_parallel_forward",
-                    "trace": "(line 1305)  AttributeError: 'DynamicCache' object has no attribute 'layers'"
-                },
-                {
-                    "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4197)  IndexError: The shape of the mask [3, 23] at index 1 does not match the shape of the indexed tensor [3, 3, 8, 8] at index 1"
-                }
-            ],
-            "single": [
-                {
-                    "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_flex_attention_with_grads",
-                    "trace": "(line 687)  AssertionError: False is not true"
-                },
-                {
-                    "line": "tests/models/llava/test_modeling_llava.py::LlavaForConditionalGenerationModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4197)  IndexError: The shape of the mask [3, 23] at index 1 does not match the shape of the indexed tensor [3, 3, 8, 8] at index 1"
-                }
-            ]
-        },
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526564002",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526564108"
         }
     },
     "models_mistral3": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 1,
-                "multi": 2
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -931,37 +772,22 @@
                 "multi": 0
             }
         },
-        "success": 286,
-        "time_spent": "0:10:06, 0:09:57, ",
-        "failures": {
-            "single": [
-                {
-                    "line": "tests/models/mistral3/test_modeling_mistral3.py::Mistral3ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                }
-            ],
-            "multi": [
-                {
-                    "line": "tests/models/mistral3/test_modeling_mistral3.py::Mistral3ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/mistral3/test_modeling_mistral3.py::Mistral3ModelTest::test_multi_gpu_data_parallel_forward",
-                    "trace": "(line 1305)  AttributeError: 'DynamicCache' object has no attribute 'layers'"
-                }
-            ]
-        },
         "job_link": {
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561480",
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561618"
         }
     },
     "models_modernbert": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 5,
-                "multi": 5
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -1009,65 +835,22 @@
                 "multi": 0
             }
         },
-        "success": 164,
-        "time_spent": "0:01:29, 0:01:27, ",
-        "failures": {
-            "multi": [
-                {
-                    "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_export",
-                    "trace": "(line 675)  AssertionError: Lists differ: ['mechanic', 'lawyer', 'teacher', 'waiter', 'doctor'] != ['lawyer', 'mechanic', 'teacher', 'doctor', 'waiter']"
-                },
-                {
-                    "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_masked_lm",
-                    "trace": "(line 401)  AssertionError: Tensor-likes are not close!"
-                },
-                {
-                    "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_no_head",
-                    "trace": "(line 423)  AssertionError: Tensor-likes are not close!"
-                },
-                {
-                    "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_sequence_classification",
-                    "trace": "(line 469)  AssertionError: Tensor-likes are not close!"
-                },
-                {
-                    "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_token_classification",
-                    "trace": "(line 446)  AssertionError: Tensor-likes are not close!"
-                }
-            ],
-            "single": [
-                {
-                    "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_export",
-                    "trace": "(line 675)  AssertionError: Lists differ: ['mechanic', 'lawyer', 'teacher', 'waiter', 'doctor'] != ['lawyer', 'mechanic', 'teacher', 'doctor', 'waiter']"
-                },
-                {
-                    "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_masked_lm",
-                    "trace": "(line 401)  AssertionError: Tensor-likes are not close!"
-                },
-                {
-                    "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_no_head",
-                    "trace": "(line 423)  AssertionError: Tensor-likes are not close!"
-                },
-                {
-                    "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_sequence_classification",
-                    "trace": "(line 469)  AssertionError: Tensor-likes are not close!"
-                },
-                {
-                    "line": "tests/models/modernbert/test_modeling_modernbert.py::ModernBertModelIntegrationTest::test_inference_token_classification",
-                    "trace": "(line 446)  AssertionError: Tensor-likes are not close!"
-                }
-            ]
-        },
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561668",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526561515"
         }
     },
     "models_qwen2": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 2,
-                "multi": 3
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -1115,45 +898,22 @@
                 "multi": 0
             }
         },
-        "success": 438,
-        "time_spent": "0:02:17, 0:02:18, ",
-        "failures": {
-            "multi": [
-                {
-                    "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2ModelTest::test_multi_gpu_data_parallel_forward",
-                    "trace": "(line 1305)  AttributeError: 'DynamicCache' object has no attribute 'layers'"
-                },
-                {
-                    "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2IntegrationTest::test_export_static_cache",
-                    "trace": "(line 1642)  torch._dynamo.exc.TorchRuntimeError: Dynamo failed to run FX node with fake tensors: call_method index_copy_(*(FakeTensor(..., size=(1, 2, 26, 64), dtype=torch.bfloat16), 2, FakeTensor(..., device='cuda:0', size=(1,), dtype=torch.int64), FakeTensor(..., device='cuda:0', size=(1, 2, 1, 64), dtype=torch.bfloat16,"
-                }
-            ],
-            "single": [
-                {
-                    "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/qwen2/test_modeling_qwen2.py::Qwen2IntegrationTest::test_export_static_cache",
-                    "trace": "(line 1642)  torch._dynamo.exc.TorchRuntimeError: Dynamo failed to run FX node with fake tensors: call_method index_copy_(*(FakeTensor(..., size=(1, 2, 26, 64), dtype=torch.bfloat16), 2, FakeTensor(..., device='cuda:0', size=(1,), dtype=torch.int64), FakeTensor(..., device='cuda:0', size=(1, 2, 1, 64), dtype=torch.bfloat16,"
-                }
-            ]
-        },
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562376",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562270"
         }
     },
     "models_qwen2_5_omni": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 1,
-                "multi": 5
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -1201,41 +961,21 @@
                 "multi": 0
             }
         },
-        "success": 277,
-        "time_spent": "0:03:01, 0:03:21, ",
         "failures": {
             "multi": [
                 {
                     "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniThinkerForConditionalGenerationModelTest::test_model_parallelism",
                     "trace": "(line 675)  AssertionError: Items in the second set but not the first:"
-                },
-                {
-                    "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniThinkerForConditionalGenerationModelTest::test_multi_gpu_data_parallel_forward",
-                    "trace": "(line 1305)  AttributeError: 'DynamicCache' object has no attribute 'layers'"
-                },
-                {
-                    "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test_batch",
-                    "trace": "(line 675)  AssertionError: Lists differ: [\"sys[96 chars]ant\\nsystem\\nYou are a helpful assistant.\\nuse[129 chars]er.\"] != [\"sys[96 chars]ant\\nThe sound is glass shattering, and the do[198 chars]er.\"]"
-                },
-                {
-                    "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test_multiturn",
-                    "trace": "(line 849)  torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 1 has a total capacity of 22.18 GiB of which 6.50 MiB is free. Process 51940 has 22.17 GiB memory in use. Of the allocated memory 21.74 GiB is allocated by PyTorch, and 27.83 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)"
-                },
-                {
-                    "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test_w_audio",
-                    "trace": "(line 1000)  torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 1 has a total capacity of 22.18 GiB of which 8.50 MiB is free. Process 51940 has 22.17 GiB memory in use. Of the allocated memory 21.75 GiB is allocated by PyTorch, and 17.78 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)"
-                }
-            ],
-            "single": [
-                {
-                    "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniModelIntegrationTest::test_small_model_integration_test_batch",
-                    "trace": "(line 675)  AssertionError: Lists differ: [\"sys[96 chars]ant\\nsystem\\nYou are a helpful assistant.\\nuse[129 chars]er.\"] != [\"sys[96 chars]ant\\nThe sound is glass shattering, and the do[198 chars]er.\"]"
                 }
             ]
         },
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562375",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562289"
         }
     },
     "models_qwen2_5_vl": {
@@ -1291,8 +1031,10 @@
                 "multi": 0
             }
         },
-        "success": 311,
-        "time_spent": "0:03:25, 0:03:29, ",
         "failures": {
             "multi": [
                 {
@@ -1308,16 +1050,16 @@
             ]
         },
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562382",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562290"
         }
     },
     "models_smolvlm": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 1,
-                "multi": 1
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -1365,33 +1107,22 @@
                 "multi": 0
             }
         },
-        "success": 499,
-        "time_spent": "0:01:55, 0:01:47, ",
-        "failures": {
-            "single": [
-                {
-                    "line": "tests/models/smolvlm/test_modeling_smolvlm.py::SmolVLMForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                }
-            ],
-            "multi": [
-                {
-                    "line": "tests/models/smolvlm/test_modeling_smolvlm.py::SmolVLMForConditionalGenerationModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                }
-            ]
-        },
         "job_link": {
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562675",
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562798"
         }
     },
     "models_t5": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 2,
-                "multi": 3
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -1439,14 +1170,12 @@
                 "multi": 0
             }
         },
         "success": 592,
-        "time_spent": "0:03:34, 0:03:41, ",
         "failures": {
             "multi": [
-                {
-                    "line": "tests/models/t5/test_modeling_t5.py::T5ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
                 {
                     "line": "tests/models/t5/test_modeling_t5.py::T5ModelTest::test_multi_gpu_data_parallel_forward",
                     "trace": "(line 131)  TypeError: EncoderDecoderCache.__init__() missing 1 required positional argument: 'cross_attention_cache'"
@@ -1457,10 +1186,6 @@
                 }
             ],
             "single": [
-                {
-                    "line": "tests/models/t5/test_modeling_t5.py::T5ModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
                 {
                     "line": "tests/models/t5/test_modeling_t5.py::T5ModelIntegrationTests::test_export_t5_summarization",
                     "trace": "(line 687)  AttributeError: 'dict' object has no attribute 'batch_size'"
@@ -1468,8 +1193,8 @@
             ]
         },
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563047",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526562939"
         }
     },
     "models_vit": {
@@ -1525,12 +1250,14 @@
                 "multi": 0
             }
         },
         "success": 217,
-        "time_spent": "7.34, 0:01:09, ",
         "failures": {},
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563537",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563397"
         }
     },
     "models_wav2vec2": {
@@ -1586,8 +1313,10 @@
                 "multi": 0
             }
         },
         "success": 672,
-        "time_spent": "0:04:46, 0:04:23, ",
         "failures": {
             "multi": [
                 {
@@ -1627,16 +1356,16 @@
             ]
         },
         "job_link": {
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563711",
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563582"
         }
     },
     "models_whisper": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
-                "single": 8,
-                "multi": 11
             },
             "TensorFlow": {
                 "unclassified": 0,
@@ -1684,14 +1413,12 @@
                 "multi": 0
             }
         },
-        "success": 1010,
-        "time_spent": "0:12:29, 0:14:19, ",
         "failures": {
             "single": [
-                {
-                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
                 {
                     "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_batched_generation_multilingual",
                     "trace": "(line 756)  RuntimeError: The frame has 0 channels, expected 1. If you are hitting this, it may be because you are using a buggy FFmpeg version. FFmpeg4 is known to fail here in some valid scenarios. Try to upgrade FFmpeg?"
@@ -1708,32 +1435,16 @@
                     "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard",
                     "trace": "(line 675)  AssertionError: Lists differ: [\" Fo[272 chars]ting of classics, Sicilian, nade door variatio[8147 chars]le!'] != [\" Fo[272 chars]ting a classic Sicilian, nade door variation o[8150 chars]le!']"
                 },
-                {
-                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard_prev_cond",
-                    "trace": "(line 675)  AssertionError: Lists differ: [\" Fo[422 chars]to a fisher shows in lip-nitsky attack that cu[7903 chars]le!\"] != [\" Fo[422 chars]to a Fisher shows in lip-nitsky attack that cu[7918 chars]le.\"]"
-                },
                 {
                     "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_shortform_single_batch_prev_cond",
                     "trace": "(line 675)  AssertionError: Lists differ: [\" Fo[268 chars]ating, so soft, it would make JD power and her[196 chars]ke.\"] != [\" Fo[268 chars]ating so soft, it would make JD power and her [195 chars]ke.\"]"
-                },
-                {
-                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperStandaloneDecoderModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
                 }
             ],
             "multi": [
-                {
-                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
                 {
                     "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelTest::test_multi_gpu_data_parallel_forward",
                     "trace": "(line 131)  TypeError: EncoderDecoderCache.__init__() missing 1 required positional argument: 'cross_attention_cache'"
                 },
-                {
-                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_generate_with_forced_decoder_ids",
-                    "trace": "(line 713)  requests.exceptions.ReadTimeout: (ReadTimeoutError(\"HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)\"), '(Request ID: 13cb0b08-c261-4ca3-a58f-91a2f3e327ed)')"
-                },
                 {
                     "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_batched_generation_multilingual",
                     "trace": "(line 756)  RuntimeError: The frame has 0 channels, expected 1. If you are hitting this, it may be because you are using a buggy FFmpeg version. FFmpeg4 is known to fail here in some valid scenarios. Try to upgrade FFmpeg?"
@@ -1750,27 +1461,15 @@
                     "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard",
                     "trace": "(line 675)  AssertionError: Lists differ: [\" Fo[272 chars]ting of classics, Sicilian, nade door variatio[8147 chars]le!'] != [\" Fo[272 chars]ting a classic Sicilian, nade door variation o[8150 chars]le!']"
                 },
-                {
-                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard_prev_cond",
-                    "trace": "(line 675)  AssertionError: Lists differ: [\" Fo[422 chars]to a fisher shows in lip-nitsky attack that cu[7903 chars]le!\"] != [\" Fo[422 chars]to a Fisher shows in lip-nitsky attack that cu[7918 chars]le.\"]"
-                },
                 {
                     "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_shortform_single_batch_prev_cond",
                     "trace": "(line 675)  AssertionError: Lists differ: [\" Fo[268 chars]ating, so soft, it would make JD power and her[196 chars]ke.\"] != [\" Fo[268 chars]ating so soft, it would make JD power and her [195 chars]ke.\"]"
-                },
-                {
-                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperStandaloneDecoderModelTest::test_eager_padding_matches_padding_free_with_position_ids",
-                    "trace": "(line 4140)  KeyError: 'eager'"
-                },
-                {
-                    "line": "tests/models/whisper/test_modeling_whisper.py::WhisperStandaloneDecoderModelTest::test_multi_gpu_data_parallel_forward",
-                    "trace": "(line 1305)  AttributeError: 'DynamicCache' object has no attribute 'layers'"
                 }
             ]
         },
         "job_link": {
-            "single": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563737",
-            "multi": "https://github.com/huggingface/transformers/actions/runs/16460401119/job/46526563862"
         }
     }
-}

                 "multi": 0
             }
         },
+        "errors": 0,
         "success": 226,
+        "skipped": 10,
+        "time_spent": "3.79, 5.93, ",
         "failures": {},
         "job_link": {
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215208",
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215147"
         }
     },
     "models_bert": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 0,
+                "multi": 0
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
         "success": 527,
+        "skipped": 211,
+        "time_spent": "0:01:47, 0:01:50, ",
+        "failures": {},
         "job_link": {
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215196",
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215175"
         }
     },
     "models_clip": {
                 "multi": 0
             }
         },
+        "errors": 0,
         "success": 660,
+        "skipped": 934,
+        "time_spent": "0:02:15, 0:02:11, ",
         "failures": {},
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215674",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215699"
         }
     },
     "models_detr": {
                 "multi": 0
             }
         },
+        "errors": 0,
         "success": 177,
+        "skipped": 271,
+        "time_spent": "0:01:07, 0:01:11, ",
         "failures": {},
         "job_link": {
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216030",
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216008"
         }
     },
     "models_gemma3": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 0,
+                "multi": 1
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 507,
+        "skipped": 320,
+        "time_spent": "0:09:30, 0:09:28, ",
         "failures": {
             "multi": [
                 {
                     "line": "tests/models/gemma3/test_modeling_gemma3.py::Gemma3Vision2TextModelTest::test_model_parallelism",
                     "trace": "(line 925)  RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:1 and cuda:0!"
                 }
             ]
         },
         "job_link": {
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216642",
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216593"
         }
     },
     "models_gemma3n": {
             "PyTorch": {
                 "unclassified": 0,
                 "single": 1,
+                "multi": 0
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 288,
+        "skipped": 703,
+        "time_spent": "0:02:15, 0:02:15, ",
         "failures": {
             "single": [
                 {
+                    "line": "tests/models/gemma3n/test_modeling_gemma3n.py::Gemma3nTextModelTest::test_sdpa_padding_matches_padding_free_with_position_ids",
+                    "trace": "(line 4243)  AssertionError: Tensor-likes are not close!"
                 }
             ]
         },
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216605",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216660"
         }
     },
     "models_got_ocr2": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 0,
+                "multi": 0
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 257,
+        "skipped": 333,
+        "time_spent": "0:01:49, 0:01:49, ",
+        "failures": {},
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216911",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216742"
         }
     },
     "models_gpt2": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 0,
+                "multi": 0
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
         "success": 487,
+        "skipped": 229,
+        "time_spent": "0:02:11, 0:02:01, ",
+        "failures": {},
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216717",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216759"
         }
     },
     "models_internvl": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 1,
+                "multi": 1
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 355,
+        "skipped": 241,
+        "time_spent": "0:04:33, 0:04:31, ",
         "failures": {
             "multi": [
                 {
                     "line": "tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_flex_attention_with_grads",
                     "trace": "(line 439)  torch._inductor.exc.InductorError: RuntimeError: No valid triton configs. OutOfResources: out of resource: shared memory, Required: 106496, Hardware limit: 101376. Reducing block sizes or `num_stages` may help."
                 }
             ],
             "single": [
                 {
                     "line": "tests/models/internvl/test_modeling_internvl.py::InternVLModelTest::test_flex_attention_with_grads",
                     "trace": "(line 439)  torch._inductor.exc.InductorError: RuntimeError: No valid triton configs. OutOfResources: out of resource: shared memory, Required: 106496, Hardware limit: 101376. Reducing block sizes or `num_stages` may help."
             ]
         },
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301217017",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301217056"
         }
     },
     "models_llama": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 0,
+                "multi": 0
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 481,
+        "skipped": 253,
+        "time_spent": "0:03:43, 0:03:37, ",
+        "failures": {},
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301217239",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301217242"
         }
     },
     "models_llava": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 0,
+                "multi": 0
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 349,
+        "skipped": 159,
+        "time_spent": "0:08:59, 0:09:11, ",
+        "failures": {},
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301217250",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301217263"
         }
     },
     "models_mistral3": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 0,
+                "multi": 0
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 283,
+        "skipped": 267,
+        "time_spent": "0:09:53, 0:09:40, ",
+        "failures": {},
         "job_link": {
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215108",
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215124"
         }
     },
     "models_modernbert": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 0,
+                "multi": 0
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 174,
+        "skipped": 218,
+        "time_spent": "0:01:27, 0:01:24, ",
+        "failures": {},
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215158",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215123"
         }
     },
     "models_qwen2": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 0,
+                "multi": 0
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 443,
+        "skipped": 251,
+        "time_spent": "0:02:16, 0:02:16, ",
+        "failures": {},
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215909",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215891"
         }
     },
     "models_qwen2_5_omni": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 0,
+                "multi": 1
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 278,
+        "skipped": 159,
+        "time_spent": "0:02:55, 0:03:00, ",
         "failures": {
             "multi": [
                 {
                     "line": "tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py::Qwen2_5OmniThinkerForConditionalGenerationModelTest::test_model_parallelism",
                     "trace": "(line 675)  AssertionError: Items in the second set but not the first:"
                 }
             ]
         },
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215907",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215896"
         }
     },
     "models_qwen2_5_vl": {
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 309,
+        "skipped": 141,
+        "time_spent": "0:03:13, 0:03:14, ",
         "failures": {
             "multi": [
                 {
             ]
         },
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215945",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301215911"
         }
     },
     "models_smolvlm": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 0,
+                "multi": 0
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 497,
+        "skipped": 269,
+        "time_spent": "0:01:33, 0:01:36, ",
+        "failures": {},
         "job_link": {
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216282",
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216321"
         }
     },
     "models_t5": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 1,
+                "multi": 2
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
         "success": 592,
+        "skipped": 535,
+        "time_spent": "0:03:13, 0:02:52, ",
         "failures": {
             "multi": [
                 {
                     "line": "tests/models/t5/test_modeling_t5.py::T5ModelTest::test_multi_gpu_data_parallel_forward",
                     "trace": "(line 131)  TypeError: EncoderDecoderCache.__init__() missing 1 required positional argument: 'cross_attention_cache'"
                 }
             ],
             "single": [
                 {
                     "line": "tests/models/t5/test_modeling_t5.py::T5ModelIntegrationTests::test_export_t5_summarization",
                     "trace": "(line 687)  AttributeError: 'dict' object has no attribute 'batch_size'"
             ]
         },
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216565",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216464"
         }
     },
     "models_vit": {
                 "multi": 0
             }
         },
+        "errors": 0,
         "success": 217,
+        "skipped": 199,
+        "time_spent": "2.03, 1.28, ",
         "failures": {},
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216869",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216833"
         }
     },
     "models_wav2vec2": {
                 "multi": 0
             }
         },
+        "errors": 0,
         "success": 672,
+        "skipped": 438,
+        "time_spent": "0:03:37, 0:03:36, ",
         "failures": {
             "multi": [
                 {
             ]
         },
         "job_link": {
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216956",
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216929"
         }
     },
     "models_whisper": {
         "failed": {
             "PyTorch": {
                 "unclassified": 0,
+                "single": 5,
+                "multi": 6
             },
             "TensorFlow": {
                 "unclassified": 0,
                 "multi": 0
             }
         },
+        "errors": 0,
+        "success": 1014,
+        "skipped": 475,
+        "time_spent": "0:11:09, 0:11:47, ",
         "failures": {
             "single": [
                 {
                     "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_batched_generation_multilingual",
                     "trace": "(line 756)  RuntimeError: The frame has 0 channels, expected 1. If you are hitting this, it may be because you are using a buggy FFmpeg version. FFmpeg4 is known to fail here in some valid scenarios. Try to upgrade FFmpeg?"
                     "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard",
                     "trace": "(line 675)  AssertionError: Lists differ: [\" Fo[272 chars]ting of classics, Sicilian, nade door variatio[8147 chars]le!'] != [\" Fo[272 chars]ting a classic Sicilian, nade door variation o[8150 chars]le!']"
                 },
                 {
                     "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_shortform_single_batch_prev_cond",
                     "trace": "(line 675)  AssertionError: Lists differ: [\" Fo[268 chars]ating, so soft, it would make JD power and her[196 chars]ke.\"] != [\" Fo[268 chars]ating so soft, it would make JD power and her [195 chars]ke.\"]"
                 }
             ],
             "multi": [
                 {
                     "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelTest::test_multi_gpu_data_parallel_forward",
                     "trace": "(line 131)  TypeError: EncoderDecoderCache.__init__() missing 1 required positional argument: 'cross_attention_cache'"
                 },
                 {
                     "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_large_batched_generation_multilingual",
                     "trace": "(line 756)  RuntimeError: The frame has 0 channels, expected 1. If you are hitting this, it may be because you are using a buggy FFmpeg version. FFmpeg4 is known to fail here in some valid scenarios. Try to upgrade FFmpeg?"
                     "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_longform_multi_batch_hard",
                     "trace": "(line 675)  AssertionError: Lists differ: [\" Fo[272 chars]ting of classics, Sicilian, nade door variatio[8147 chars]le!'] != [\" Fo[272 chars]ting a classic Sicilian, nade door variation o[8150 chars]le!']"
                 },
                 {
                     "line": "tests/models/whisper/test_modeling_whisper.py::WhisperModelIntegrationTests::test_whisper_shortform_single_batch_prev_cond",
                     "trace": "(line 675)  AssertionError: Lists differ: [\" Fo[268 chars]ating, so soft, it would make JD power and her[196 chars]ke.\"] != [\" Fo[268 chars]ating so soft, it would make JD power and her [195 chars]ke.\"]"
                 }
             ]
         },
         "job_link": {
+            "single": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301216943",
+            "multi": "https://github.com/huggingface/transformers/actions/runs/16712955100/job/47301217012"
         }
     }
+}

styles.css CHANGED Viewed

@@ -184,6 +184,17 @@ div[data-testid="column"]:has(.model-container) {
     box-shadow: 0 2px 8px rgba(116, 185, 255, 0.2) !important;
 }
 /*
 .model-button:active {
     background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;

     box-shadow: 0 2px 8px rgba(116, 185, 255, 0.2) !important;
 }
+/* Model buttons with failures - fuzzy red border with inner glow */
+.model-button-failed {
+    border: 1px solid #712626 !important;
+    box-shadow: inset 0 0 8px rgba(204, 68, 68, 0.4) !important;
+}
+.model-button-failed:hover {
+    border-color: #712626 !important;
+    box-shadow: 0 0 12px rgba(255, 107, 107, 0.5) !important;
+}
 /*
 .model-button:active {
     background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;

summary_page.py CHANGED Viewed

@@ -35,6 +35,42 @@ COLORS = {
 MODEL_NAME_FONT_SIZE = 16
 LABEL_FONT_SIZE = 14
 LABEL_OFFSET = 1  # Distance of label from bar
 def draw_text_and_bar(
@@ -48,9 +84,14 @@ def draw_text_and_bar(
     """Draw a horizontal bar chart for given stats and its label on the left."""
     # Text
     label_x = column_left_position - LABEL_OFFSET
     ax.text(
         label_x, y_bar, label, ha='right', va='center', color='#CCCCCC', fontsize=LABEL_FONT_SIZE,
-        fontfamily='monospace', fontweight='normal'
     )
     # Bar
     total = sum(stats.values())
@@ -75,6 +116,9 @@ def create_summary_page(df: pd.DataFrame, available_models: list[str]) -> plt.Fi
                 fontfamily='monospace', weight='normal')
         ax.axis('off')
         return fig
     # Calculate dimensions for N-column layout
     model_count = len(available_models)
@@ -86,6 +130,12 @@ def create_summary_page(df: pd.DataFrame, available_models: list[str]) -> plt.Fi
     fig, ax = plt.subplots(figsize=(FIGURE_WIDTH, figure_height), facecolor='#000000')
     ax.set_facecolor('#000000')
     visible_model_count = 0
     max_y = 0

 MODEL_NAME_FONT_SIZE = 16
 LABEL_FONT_SIZE = 14
 LABEL_OFFSET = 1  # Distance of label from bar
+FAILURE_RATE_FONT_SIZE = 28
+def calculate_overall_failure_rates(df: pd.DataFrame, available_models: list[str]) -> tuple[float, float]:
+    """Calculate overall failure rates for AMD and NVIDIA across all models."""
+    if df.empty or not available_models:
+        return 0.0, 0.0
+    total_amd_tests = 0
+    total_amd_failures = 0
+    total_nvidia_tests = 0
+    total_nvidia_failures = 0
+    for model_name in available_models:
+        if model_name not in df.index:
+            continue
+        row = df.loc[model_name]
+        amd_stats, nvidia_stats = extract_model_data(row)[:2]
+        # AMD totals
+        amd_total = sum(amd_stats.values())
+        if amd_total > 0:
+            total_amd_tests += amd_total
+            total_amd_failures += amd_stats['failed'] + amd_stats['error']
+        # NVIDIA totals
+        nvidia_total = sum(nvidia_stats.values())
+        if nvidia_total > 0:
+            total_nvidia_tests += nvidia_total
+            total_nvidia_failures += nvidia_stats['failed'] + nvidia_stats['error']
+    amd_failure_rate = (total_amd_failures / total_amd_tests * 100) if total_amd_tests > 0 else 0.0
+    nvidia_failure_rate = (total_nvidia_failures / total_nvidia_tests * 100) if total_nvidia_tests > 0 else 0.0
+    return amd_failure_rate, nvidia_failure_rate
 def draw_text_and_bar(
     """Draw a horizontal bar chart for given stats and its label on the left."""
     # Text
     label_x = column_left_position - LABEL_OFFSET
+    failures_present = any(stats[category] > 0 for category in ['failed', 'error'])
+    if failures_present:
+        props = dict(boxstyle='round', facecolor=COLORS['failed'], alpha=0.35)
+    else:
+        props = dict(alpha=0)
     ax.text(
         label_x, y_bar, label, ha='right', va='center', color='#CCCCCC', fontsize=LABEL_FONT_SIZE,
+        fontfamily='monospace', fontweight='normal', bbox=props
     )
     # Bar
     total = sum(stats.values())
                 fontfamily='monospace', weight='normal')
         ax.axis('off')
         return fig
+    # Calculate overall failure rates
+    amd_failure_rate, nvidia_failure_rate = calculate_overall_failure_rates(df, available_models)
     # Calculate dimensions for N-column layout
     model_count = len(available_models)
     fig, ax = plt.subplots(figsize=(FIGURE_WIDTH, figure_height), facecolor='#000000')
     ax.set_facecolor('#000000')
+    # Add overall failure rates at the top as a proper title
+    failure_text = f"Overall Failure Rates: AMD {amd_failure_rate:.1f}%  |  NVIDIA {nvidia_failure_rate:.1f}%"
+    ax.text(50, -1.25, failure_text, ha='center', va='top',
+           color='#FFFFFF', fontsize=FAILURE_RATE_FONT_SIZE,
+           fontfamily='monospace', fontweight='bold')
     visible_model_count = 0
     max_y = 0