ror HF Staff badaoui HF Staff commited on
Commit
721e588
·
verified ·
1 Parent(s): 78473e2

Add historical data visualization features (#7)

Browse files

- Add historical data visualization features (5309153902e48908d06ecd8f760b8d7faa23b08f)
- Remove logo images and use simple text labels (871d3046dfced20ab97c6f9a3c22fb9fcc5346c9)
- Merge main branch - resolve conflicts in app.py (76e62763f6cc45ecfb4a17b2b3d4cfa50ee71359)
- Remove unused files to reduce code size (a9eacdca256f51b6083daa882686822c64e6ef7d)
- small fix (6fc74fec4d3efa8b4771e2728e116159129b9f71)
- some code optimization (941f5e07ec8c7de6a868307f2f173fde66bebd4f)
- some more code factorization :) (fe596850370a2a4ce597340384555fa17ffb018b)
- remove some unused functions (2122146467c7c5fd5316bd7eee391b9abde7e26e)
- one fucntion for has_failures (63c305fd30cf61b2aee31616a20e8adf8876dddd)
- improve filter failing models (69d6e2e5856e33d75100ac0ee9c338b3606fa6cc)
- improve encore (f3f4c775fc02f6d67a42069e65ce9cd8f5b374a8)
- more factorization (0f8d3a81fe5811984035c2a37251366206a1ae08)
- fix data loading (646bbcb774acbb8fc658b9e43ef7dae1ea1877fa)
- fix first seen date feat (c3ed9cd51a4ed2d4b961e051e5c3b1331fa83467)


Co-authored-by: ABDENNACER BADAOUI <badaoui@users.noreply.huggingface.co>

Files changed (8) hide show
  1. app.py +450 -97
  2. data.py +408 -38
  3. model_page.py +46 -30
  4. requirements.txt +2 -0
  5. styles.css +416 -41
  6. summary_page.py +63 -61
  7. time_series_gradio.py +150 -0
  8. utils.py +12 -0
app.py CHANGED
@@ -2,11 +2,16 @@ import matplotlib.pyplot as plt
2
  import matplotlib
3
  import pandas as pd
4
  import gradio as gr
 
5
 
6
- from data import CIResults
7
  from utils import logger
8
  from summary_page import create_summary_page
9
  from model_page import plot_model_stats
 
 
 
 
10
 
11
 
12
  # Configure matplotlib to prevent memory warnings and set dark background
@@ -19,35 +24,36 @@ plt.ioff() # Turn off interactive mode to prevent figure accumulation
19
  # Load data once at startup
20
  Ci_results = CIResults()
21
  Ci_results.load_data()
 
 
 
 
 
 
22
  # Start the auto-reload scheduler
23
  Ci_results.schedule_data_reload()
24
 
25
 
26
  # Function to check if a model has failures
27
- def model_has_failures(model_name):
28
- """Check if a model has any failures (AMD or NVIDIA)."""
29
  if Ci_results.df is None or Ci_results.df.empty:
30
  return False
31
 
32
- # Normalize model name to match DataFrame index
33
  model_name_lower = model_name.lower()
34
-
35
- # Check if model exists in DataFrame
36
  if model_name_lower not in Ci_results.df.index:
37
  return False
 
38
  row = Ci_results.df.loc[model_name_lower]
39
 
40
- # Check for failures in both AMD and NVIDIA
41
- amd_multi_failures = row.get('failed_multi_no_amd', 0)
42
- amd_single_failures = row.get('failed_single_no_amd', 0)
43
- nvidia_multi_failures = row.get('failed_multi_no_nvidia', 0)
44
- nvidia_single_failures = row.get('failed_single_no_nvidia', 0)
45
- return any([
46
- amd_multi_failures > 0,
47
- amd_single_failures > 0,
48
- nvidia_multi_failures > 0,
49
- nvidia_single_failures > 0,
50
- ])
51
 
52
 
53
  # Function to get current description text
@@ -66,6 +72,46 @@ def get_description_text():
66
  msg.append("*This dashboard only tracks important models*<br>*(loading...)*")
67
  return "<br>".join(msg)
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  # Load CSS from external file
70
  def load_css():
71
  try:
@@ -77,9 +123,19 @@ def load_css():
77
  logger.warning("styles.css not found, using minimal default styles")
78
  return "body { background: #000; color: #fff; }"
79
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  # Create the Gradio interface with sidebar and dark theme
82
- with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cache=(3600, 3600)) as demo:
83
 
84
 
85
  with gr.Row():
@@ -91,7 +147,7 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
91
  description_text = get_description_text()
92
  description_display = gr.Markdown(description_text, elem_classes=["sidebar-description"])
93
 
94
- # Summary button at the top
95
  summary_button = gr.Button(
96
  "summary\n📊",
97
  variant="primary",
@@ -99,6 +155,14 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
99
  elem_classes=["summary-button"]
100
  )
101
 
 
 
 
 
 
 
 
 
102
  # Model selection header (clickable toggle)
103
  model_toggle_button = gr.Button(
104
  f"► Select model ({len(Ci_results.available_models)})",
@@ -108,83 +172,248 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
108
 
109
  # Model buttons container (collapsible) - start folded
110
  with gr.Column(elem_classes=["model-list", "model-list-hidden"]) as model_list_container:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  # Create individual buttons for each model
112
  model_buttons = []
113
  model_choices = [model.lower() for model in Ci_results.available_models] if Ci_results.available_models else ["auto", "bert", "clip", "llama"]
114
 
 
 
 
 
 
 
115
  print(f"Creating {len(model_choices)} model buttons: {model_choices}")
116
 
117
  for model_name in model_choices:
118
- # Check if model has failures to determine styling
119
- has_failures = model_has_failures(model_name)
120
- button_classes = ["model-button"]
121
- if has_failures:
122
- button_classes.append("model-button-failed")
123
 
124
- btn = gr.Button(
125
- model_name,
126
- variant="secondary",
127
- size="sm",
128
- elem_classes=button_classes
129
- )
130
- model_buttons.append(btn)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
  # CI job links at bottom of sidebar
133
  ci_links_display = gr.Markdown("🔗 **CI Jobs:** *Loading...*", elem_classes=["sidebar-links"])
134
 
135
  # Main content area
136
  with gr.Column(scale=4, elem_classes=["main-content"]):
137
- # Summary display (default view)
138
- summary_display = gr.Plot(
139
- value=create_summary_page(Ci_results.df, Ci_results.available_models),
140
- label="",
141
- format="png",
142
- elem_classes=["plot-container"],
143
- visible=True
144
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
- # Detailed view components (hidden by default)
147
- with gr.Column(visible=False, elem_classes=["detail-view"]) as detail_view:
 
 
 
 
 
 
148
 
149
- # Create the plot output
150
- plot_output = gr.Plot(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  label="",
152
- format="png",
153
  elem_classes=["plot-container"]
154
  )
155
 
156
- # Create two separate failed tests displays in a row layout
157
- with gr.Row():
158
- with gr.Column(scale=1):
159
- amd_failed_tests_output = gr.Textbox(
160
- value="",
161
- lines=8,
162
- max_lines=8,
163
- interactive=False,
164
- container=False,
165
- elem_classes=["failed-tests"]
166
- )
167
- with gr.Column(scale=1):
168
- nvidia_failed_tests_output = gr.Textbox(
169
- value="",
170
- lines=8,
171
- max_lines=8,
172
- interactive=False,
173
- container=False,
174
- elem_classes=["failed-tests"]
175
- )
176
 
177
- # Set up click handlers for model buttons
178
- for i, btn in enumerate(model_buttons):
179
- model_name = model_choices[i]
180
- btn.click(
181
- fn=lambda selected_model=model_name: plot_model_stats(Ci_results.df, selected_model),
182
- outputs=[plot_output, amd_failed_tests_output, nvidia_failed_tests_output]
183
- ).then(
184
- fn=lambda: [gr.update(visible=False), gr.update(visible=True)],
185
- outputs=[summary_display, detail_view]
 
 
 
 
 
 
 
 
 
 
 
186
  )
187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  # Model toggle functionality
189
  def toggle_model_list(current_visible):
190
  """Toggle the visibility of the model list."""
@@ -203,6 +432,10 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
203
 
204
  # Track model list visibility state
205
  model_list_visible = gr.State(False)
 
 
 
 
206
 
207
  model_toggle_button.click(
208
  fn=toggle_model_list,
@@ -210,17 +443,41 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
210
  outputs=[model_toggle_button, model_list_container, model_list_visible]
211
  )
212
 
213
- # Summary button click handler
214
- def show_summary_and_update_links():
215
- """Show summary page and update CI links."""
216
- return create_summary_page(Ci_results.df, Ci_results.available_models), get_description_text(), get_ci_links()
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
  summary_button.click(
219
- fn=show_summary_and_update_links,
220
- outputs=[summary_display, description_display, ci_links_display]
221
- ).then(
222
- fn=lambda: [gr.update(visible=True), gr.update(visible=False)],
223
- outputs=[summary_display, detail_view]
 
 
 
 
 
 
 
 
 
 
 
 
224
  )
225
 
226
  # Function to get CI job links
@@ -270,25 +527,19 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
270
  # AMD links
271
  if amd_multi_link or amd_single_link:
272
  links_md += "**AMD:**\n"
273
- if amd_multi_link == amd_single_link:
274
- links_md += f"• [Single and Multi GPU]({amd_multi_link})\n"
275
- else:
276
- if amd_multi_link:
277
- links_md += f"• [Multi GPU]({amd_multi_link})\n"
278
- if amd_single_link:
279
- links_md += f"• [Single GPU]({amd_single_link})\n"
280
  links_md += "\n"
281
 
282
  # NVIDIA links
283
  if nvidia_multi_link or nvidia_single_link:
284
  links_md += "**NVIDIA:**\n"
285
- if nvidia_single_link == nvidia_multi_link:
286
- links_md += f"• [Single and Multi GPU]({nvidia_multi_link})\n"
287
- else:
288
- if nvidia_multi_link:
289
- links_md += f"• [Multi GPU]({nvidia_multi_link})\n"
290
- if nvidia_single_link:
291
- links_md += f"• [Single GPU]({nvidia_single_link})\n"
292
 
293
  if not (amd_multi_link or amd_single_link or nvidia_multi_link or nvidia_single_link):
294
  links_md += "*No links available*"
@@ -299,10 +550,112 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
299
  return "🔗 **CI Jobs:** *Error loading links*\n\n❓ **[FAQ](README.md)**"
300
 
301
 
302
- # Auto-update summary, description, and CI links when the interface loads
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
  demo.load(
304
  fn=show_summary_and_update_links,
305
  outputs=[summary_display, description_display, ci_links_display]
 
 
 
306
  )
307
 
308
 
 
2
  import matplotlib
3
  import pandas as pd
4
  import gradio as gr
5
+ from gradio_toggle import Toggle
6
 
7
+ from data import CIResults, find_new_regressions
8
  from utils import logger
9
  from summary_page import create_summary_page
10
  from model_page import plot_model_stats
11
+ from time_series_gradio import (
12
+ create_time_series_summary_gradio,
13
+ create_model_time_series_gradio,
14
+ )
15
 
16
 
17
  # Configure matplotlib to prevent memory warnings and set dark background
 
24
  # Load data once at startup
25
  Ci_results = CIResults()
26
  Ci_results.load_data()
27
+ # Preload historical data at startup
28
+ if Ci_results.available_dates:
29
+ start_date_val = Ci_results.available_dates[-1] # Last date (oldest)
30
+ end_date_val = Ci_results.available_dates[0] # First date (newest)
31
+ Ci_results.load_historical_data(start_date_val, end_date_val)
32
+ logger.info(f"Preloaded historical data: {len(Ci_results.historical_df)} records")
33
  # Start the auto-reload scheduler
34
  Ci_results.schedule_data_reload()
35
 
36
 
37
  # Function to check if a model has failures
38
+ def model_has_failures_by_device(model_name, device='both'):
 
39
  if Ci_results.df is None or Ci_results.df.empty:
40
  return False
41
 
 
42
  model_name_lower = model_name.lower()
 
 
43
  if model_name_lower not in Ci_results.df.index:
44
  return False
45
+
46
  row = Ci_results.df.loc[model_name_lower]
47
 
48
+ if device in ('amd', 'both'):
49
+ if row.get('failed_multi_no_amd', 0) > 0 or row.get('failed_single_no_amd', 0) > 0:
50
+ return True
51
+
52
+ if device in ('nvidia', 'both'):
53
+ if row.get('failed_multi_no_nvidia', 0) > 0 or row.get('failed_single_no_nvidia', 0) > 0:
54
+ return True
55
+
56
+ return False
 
 
57
 
58
 
59
  # Function to get current description text
 
72
  msg.append("*This dashboard only tracks important models*<br>*(loading...)*")
73
  return "<br>".join(msg)
74
 
75
+ # Function to format new regressions for display
76
+ def get_regressions_text():
77
+ """Get formatted text for new regressions panel."""
78
+ try:
79
+ regressions = find_new_regressions(Ci_results.df, Ci_results.all_historical_data)
80
+
81
+ if not regressions:
82
+ return "### 🎉 No New Regressions\nAll failures were present in the previous run."
83
+
84
+ # Group by model and device
85
+ grouped = {}
86
+ for reg in regressions:
87
+ model = reg['model']
88
+ device = reg['device'].upper()
89
+ gpu_type = reg['gpu_type']
90
+ test = reg['test']
91
+
92
+ key = f"{model} ({device} {gpu_type})"
93
+ if key not in grouped:
94
+ grouped[key] = []
95
+ grouped[key].append(test)
96
+
97
+ # Format output
98
+ lines = [f"### ⚠️ New Regressions Detected: {len(regressions)} failure(s)"]
99
+ lines.append("")
100
+
101
+ for key in sorted(grouped.keys()):
102
+ tests = grouped[key]
103
+ lines.append(f"**{key}:**")
104
+ for test in tests[:5]: # Limit to 5 tests per model
105
+ lines.append(f" • {test}")
106
+ if len(tests) > 5:
107
+ lines.append(f" • ... and {len(tests) - 5} more")
108
+ lines.append("")
109
+
110
+ return "\n".join(lines)
111
+ except Exception as e:
112
+ logger.error(f"Error getting regressions: {e}")
113
+ return "### ⚠️ New Regressions\n*Unable to load regression data*"
114
+
115
  # Load CSS from external file
116
  def load_css():
117
  try:
 
123
  logger.warning("styles.css not found, using minimal default styles")
124
  return "body { background: #000; color: #fff; }"
125
 
126
+ js_func = """
127
+ function refresh() {
128
+ const url = new URL(window.location);
129
+
130
+ if (url.searchParams.get('__theme') !== 'dark') {
131
+ url.searchParams.set('__theme', 'dark');
132
+ window.location.href = url.href;
133
+ }
134
+ }
135
+ """
136
 
137
  # Create the Gradio interface with sidebar and dark theme
138
+ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), js=js_func) as demo:
139
 
140
 
141
  with gr.Row():
 
147
  description_text = get_description_text()
148
  description_display = gr.Markdown(description_text, elem_classes=["sidebar-description"])
149
 
150
+ # Summary button (for current view)
151
  summary_button = gr.Button(
152
  "summary\n📊",
153
  variant="primary",
 
155
  elem_classes=["summary-button"]
156
  )
157
 
158
+ history_view_button = Toggle(
159
+ label="History view",
160
+ value=False,
161
+ interactive=True,
162
+ elem_classes=["history-view-button"]
163
+ )
164
+
165
+
166
  # Model selection header (clickable toggle)
167
  model_toggle_button = gr.Button(
168
  f"► Select model ({len(Ci_results.available_models)})",
 
172
 
173
  # Model buttons container (collapsible) - start folded
174
  with gr.Column(elem_classes=["model-list", "model-list-hidden"]) as model_list_container:
175
+ # Toggles for filtering failing models by device
176
+ with gr.Row(elem_classes=["failing-models-filter-row"]):
177
+ show_amd_failures = gr.Checkbox(
178
+ label="Failing on AMD",
179
+ value=False,
180
+ interactive=True,
181
+ elem_classes=["failing-models-toggle", "amd-toggle"]
182
+ )
183
+ show_nvidia_failures = gr.Checkbox(
184
+ label="Failing on NVIDIA",
185
+ value=False,
186
+ interactive=True,
187
+ elem_classes=["failing-models-toggle", "nvidia-toggle"]
188
+ )
189
  # Create individual buttons for each model
190
  model_buttons = []
191
  model_choices = [model.lower() for model in Ci_results.available_models] if Ci_results.available_models else ["auto", "bert", "clip", "llama"]
192
 
193
+ # Categorize models by failure type
194
+ amd_failing_models = []
195
+ nvidia_failing_models = []
196
+ both_failing_models = []
197
+ passing_models = []
198
+
199
  print(f"Creating {len(model_choices)} model buttons: {model_choices}")
200
 
201
  for model_name in model_choices:
202
+ has_amd = model_has_failures_by_device(model_name, 'amd')
203
+ has_nvidia = model_has_failures_by_device(model_name, 'nvidia')
 
 
 
204
 
205
+ if has_amd and has_nvidia:
206
+ both_failing_models.append(model_name)
207
+ elif has_amd:
208
+ amd_failing_models.append(model_name)
209
+ elif has_nvidia:
210
+ nvidia_failing_models.append(model_name)
211
+ else:
212
+ passing_models.append(model_name)
213
+
214
+ # Container for all models (visible by default)
215
+ with gr.Column(visible=True, elem_classes=["all-models-container"]) as all_models_container:
216
+ for model_name in model_choices:
217
+ has_failures = model_has_failures_by_device(model_name, 'both')
218
+ button_classes = ["model-button"]
219
+ if has_failures:
220
+ button_classes.append("model-button-failed")
221
+
222
+ btn = gr.Button(
223
+ model_name,
224
+ variant="secondary",
225
+ size="sm",
226
+ elem_classes=button_classes
227
+ )
228
+ model_buttons.append(btn)
229
+
230
+ # Container for AMD failures (hidden by default)
231
+ amd_buttons = []
232
+ with gr.Column(visible=False, elem_classes=["amd-failures-container"]) as amd_failures_container:
233
+ amd_models_to_show = amd_failing_models + both_failing_models
234
+ for model_name in sorted(amd_models_to_show):
235
+ btn = gr.Button(
236
+ model_name,
237
+ variant="secondary",
238
+ size="sm",
239
+ elem_classes=["model-button", "model-button-failed"]
240
+ )
241
+ amd_buttons.append(btn)
242
+
243
+ # Container for NVIDIA failures (hidden by default)
244
+ nvidia_buttons = []
245
+ with gr.Column(visible=False, elem_classes=["nvidia-failures-container"]) as nvidia_failures_container:
246
+ nvidia_models_to_show = nvidia_failing_models + both_failing_models
247
+ for model_name in sorted(nvidia_models_to_show):
248
+ btn = gr.Button(
249
+ model_name,
250
+ variant="secondary",
251
+ size="sm",
252
+ elem_classes=["model-button", "model-button-failed"]
253
+ )
254
+ nvidia_buttons.append(btn)
255
+
256
+ # Container for both AMD and NVIDIA failures (hidden by default)
257
+ both_buttons = []
258
+ with gr.Column(visible=False, elem_classes=["both-failures-container"]) as both_failures_container:
259
+ all_failing = list(set(amd_failing_models + nvidia_failing_models + both_failing_models))
260
+ for model_name in sorted(all_failing):
261
+ btn = gr.Button(
262
+ model_name,
263
+ variant="secondary",
264
+ size="sm",
265
+ elem_classes=["model-button", "model-button-failed"]
266
+ )
267
+ both_buttons.append(btn)
268
 
269
  # CI job links at bottom of sidebar
270
  ci_links_display = gr.Markdown("🔗 **CI Jobs:** *Loading...*", elem_classes=["sidebar-links"])
271
 
272
  # Main content area
273
  with gr.Column(scale=4, elem_classes=["main-content"]):
274
+ # Current view components
275
+ with gr.Column(visible=True, elem_classes=["current-view"]) as current_view:
276
+ # Summary view (contains summary plot and regressions panel)
277
+ with gr.Column(visible=True, elem_classes=["summary-view"]) as summary_view:
278
+ # Summary display (default view)
279
+ summary_display = gr.Plot(
280
+ value=create_summary_page(Ci_results.df, Ci_results.available_models),
281
+ label="",
282
+ format="png",
283
+ elem_classes=["plot-container"],
284
+ visible=True
285
+ )
286
+
287
+ # New Regressions section (at the bottom, collapsible)
288
+ regressions_toggle_button = gr.Button(
289
+ "► New Regressions",
290
+ variant="secondary",
291
+ elem_classes=["regressions-header"]
292
+ )
293
+
294
+ with gr.Column(elem_classes=["regressions-content", "regressions-content-hidden"]) as regressions_content:
295
+ regressions_panel = gr.Markdown(
296
+ value=get_regressions_text(),
297
+ elem_classes=["regressions-panel"]
298
+ )
299
 
300
+ # Detailed view components (hidden by default)
301
+ with gr.Column(visible=False, elem_classes=["detail-view"]) as detail_view:
302
+ # Create the plot output
303
+ plot_output = gr.Plot(
304
+ label="",
305
+ format="png",
306
+ elem_classes=["plot-container"]
307
+ )
308
 
309
+ # Create two separate failed tests displays in a row layout
310
+ with gr.Row():
311
+ with gr.Column(scale=1):
312
+ amd_failed_tests_output = gr.Textbox(
313
+ value="",
314
+ lines=8,
315
+ max_lines=8,
316
+ interactive=False,
317
+ container=False,
318
+ elem_classes=["failed-tests"]
319
+ )
320
+ with gr.Column(scale=1):
321
+ nvidia_failed_tests_output = gr.Textbox(
322
+ value="",
323
+ lines=8,
324
+ max_lines=8,
325
+ interactive=False,
326
+ container=False,
327
+ elem_classes=["failed-tests"]
328
+ )
329
+
330
+ # Historical view components (hidden by default)
331
+ with gr.Column(visible=False, elem_classes=["historical-view"]) as historical_view:
332
+
333
+
334
+ # Time-series summary displays (multiple Gradio plots)
335
+ time_series_failure_rates = gr.Plot(
336
+ label="",
337
+ elem_classes=["plot-container"]
338
+ )
339
+
340
+ time_series_amd_tests = gr.Plot(
341
+ label="",
342
+ elem_classes=["plot-container"]
343
+ )
344
+
345
+ time_series_nvidia_tests = gr.Plot(
346
  label="",
 
347
  elem_classes=["plot-container"]
348
  )
349
 
350
+ # Time-series model view (hidden by default)
351
+ with gr.Column(visible=False, elem_classes=["time-series-detail-view"]) as time_series_detail_view:
352
+ # Time-series plots for specific model (with spacing)
353
+ time_series_amd_model_plot = gr.Plot(
354
+ label="",
355
+ elem_classes=["plot-container"]
356
+ )
357
+
358
+ time_series_nvidia_model_plot = gr.Plot(
359
+ label="",
360
+ elem_classes=["plot-container"]
361
+ )
 
 
 
 
 
 
 
 
362
 
363
+ # Failing models filter functionality
364
+ def filter_failing_models(show_amd, show_nvidia):
365
+ """Filter models based on AMD and/or NVIDIA failures.
366
+
367
+ Logic:
368
+ - Neither checked: show all models
369
+ - AMD only: show models with AMD failures (including those with both)
370
+ - NVIDIA only: show models with NVIDIA failures (including those with both)
371
+ - Both checked: show all models with any failures
372
+ """
373
+ show_all = not show_amd and not show_nvidia
374
+ show_amd_only = show_amd and not show_nvidia
375
+ show_nvidia_only = not show_amd and show_nvidia
376
+ show_all_failures = show_amd and show_nvidia
377
+
378
+ return (
379
+ gr.update(visible=show_all), # all_models_container
380
+ gr.update(visible=show_amd_only), # amd_failures_container
381
+ gr.update(visible=show_nvidia_only), # nvidia_failures_container
382
+ gr.update(visible=show_all_failures), # both_failures_container
383
  )
384
 
385
+ for checkbox in [show_amd_failures, show_nvidia_failures]:
386
+ checkbox.change(
387
+ fn=filter_failing_models,
388
+ inputs=[show_amd_failures, show_nvidia_failures],
389
+ outputs=[all_models_container, amd_failures_container, nvidia_failures_container, both_failures_container]
390
+ )
391
+
392
+ # Regressions panel toggle functionality
393
+ def toggle_regressions_panel(current_visible):
394
+ """Toggle the visibility of the regressions panel."""
395
+ new_visible = not current_visible
396
+ arrow = "▼" if new_visible else "►"
397
+ button_text = f"{arrow} New Regressions"
398
+
399
+ # Use CSS classes instead of Gradio visibility
400
+ css_classes = ["regressions-content"]
401
+ if new_visible:
402
+ css_classes.append("regressions-content-visible")
403
+ else:
404
+ css_classes.append("regressions-content-hidden")
405
+
406
+ return gr.update(value=button_text), gr.update(elem_classes=css_classes), new_visible
407
+
408
+ # Track regressions panel visibility state
409
+ regressions_visible = gr.State(False)
410
+
411
+ regressions_toggle_button.click(
412
+ fn=toggle_regressions_panel,
413
+ inputs=[regressions_visible],
414
+ outputs=[regressions_toggle_button, regressions_content, regressions_visible]
415
+ )
416
+
417
  # Model toggle functionality
418
  def toggle_model_list(current_visible):
419
  """Toggle the visibility of the model list."""
 
432
 
433
  # Track model list visibility state
434
  model_list_visible = gr.State(False)
435
+ # Track last selected model for mode switches
436
+ selected_model_state = gr.State(None)
437
+ # Track whether current view is model detail (True) or summary (False)
438
+ in_model_view_state = gr.State(False)
439
 
440
  model_toggle_button.click(
441
  fn=toggle_model_list,
 
443
  outputs=[model_toggle_button, model_list_container, model_list_visible]
444
  )
445
 
446
+
447
+ # Unified summary handler: respects History toggle
448
+ def handle_summary_click(history_mode: bool):
449
+ description = get_description_text()
450
+ links = get_ci_links()
451
+ if history_mode:
452
+ fr_plot, amd_plot, nvidia_plot = get_historical_summary_plots()
453
+ return (description, links, gr.update(visible=False), gr.update(visible=True),
454
+ gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
455
+ fr_plot, amd_plot, nvidia_plot, gr.update(visible=False), False, "")
456
+ else:
457
+ fig = create_summary_page(Ci_results.df, Ci_results.available_models)
458
+ return (description, links, gr.update(visible=True), gr.update(visible=False),
459
+ gr.update(visible=True), gr.update(value=fig, visible=True), gr.update(visible=False),
460
+ gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
461
+ gr.update(visible=False), False, "")
462
 
463
  summary_button.click(
464
+ fn=handle_summary_click,
465
+ inputs=[history_view_button],
466
+ outputs=[
467
+ description_display,
468
+ ci_links_display,
469
+ current_view,
470
+ historical_view,
471
+ summary_view,
472
+ summary_display,
473
+ detail_view,
474
+ time_series_failure_rates,
475
+ time_series_amd_tests,
476
+ time_series_nvidia_tests,
477
+ time_series_detail_view,
478
+ in_model_view_state,
479
+ selected_model_state,
480
+ ],
481
  )
482
 
483
  # Function to get CI job links
 
527
  # AMD links
528
  if amd_multi_link or amd_single_link:
529
  links_md += "**AMD:**\n"
530
+ if amd_multi_link:
531
+ links_md += f"• [Multi GPU]({amd_multi_link})\n"
532
+ if amd_single_link:
533
+ links_md += f"• [Single GPU]({amd_single_link})\n"
 
 
 
534
  links_md += "\n"
535
 
536
  # NVIDIA links
537
  if nvidia_multi_link or nvidia_single_link:
538
  links_md += "**NVIDIA:**\n"
539
+ if nvidia_multi_link:
540
+ links_md += f"• [Multi GPU]({nvidia_multi_link})\n"
541
+ if nvidia_single_link:
542
+ links_md += f"• [Single GPU]({nvidia_single_link})\n"
 
 
 
543
 
544
  if not (amd_multi_link or amd_single_link or nvidia_multi_link or nvidia_single_link):
545
  links_md += "*No links available*"
 
550
  return "🔗 **CI Jobs:** *Error loading links*\n\n❓ **[FAQ](README.md)**"
551
 
552
 
553
+
554
+ # Constants for Gradio updates
555
+ HIDDEN = gr.update(visible=False)
556
+ SHOWN = gr.update(visible=True)
557
+ NOOP = gr.update()
558
+
559
+ def get_historical_summary_plots():
560
+ """Get historical summary plots from preloaded data."""
561
+ plots = create_time_series_summary_gradio(Ci_results.historical_df)
562
+ return (
563
+ gr.update(value=plots['failure_rates'], visible=True),
564
+ gr.update(value=plots['amd_tests'], visible=True),
565
+ gr.update(value=plots['nvidia_tests'], visible=True),
566
+ )
567
+
568
+ def show_time_series_model(selected_model):
569
+ """Show time-series view for a specific model."""
570
+ plots = create_model_time_series_gradio(Ci_results.historical_df, selected_model)
571
+ return (
572
+ gr.update(value=plots['amd_plot'], visible=True),
573
+ gr.update(value=plots['nvidia_plot'], visible=True),
574
+ )
575
+
576
+ def handle_history_toggle(history_mode, last_selected_model, in_model_view):
577
+ """Handle toggling between current and historical view."""
578
+ if history_mode:
579
+ # Historical mode: show model detail if in model view, otherwise summary
580
+ if in_model_view and last_selected_model:
581
+ amd_ts, nvidia_ts = show_time_series_model(last_selected_model)
582
+ return (HIDDEN, SHOWN, HIDDEN, HIDDEN, HIDDEN, HIDDEN, HIDDEN, HIDDEN,
583
+ amd_ts, nvidia_ts, SHOWN, NOOP, NOOP, NOOP, True)
584
+
585
+ fr_plot, amd_plot, nvidia_plot = get_historical_summary_plots()
586
+ return (HIDDEN, SHOWN, HIDDEN, HIDDEN, HIDDEN, fr_plot, amd_plot, nvidia_plot,
587
+ NOOP, NOOP, HIDDEN, NOOP, NOOP, NOOP, False)
588
+ else:
589
+ # Current mode: show model detail if available, otherwise summary
590
+ if last_selected_model and Ci_results.df is not None and not Ci_results.df.empty and last_selected_model in Ci_results.df.index:
591
+ fig, amd_txt, nvidia_txt = plot_model_stats(Ci_results.df, last_selected_model, Ci_results.all_historical_data)
592
+ return (SHOWN, HIDDEN, HIDDEN, HIDDEN, SHOWN, HIDDEN, HIDDEN, HIDDEN,
593
+ NOOP, NOOP, HIDDEN, fig, amd_txt, nvidia_txt, True)
594
+
595
+ fig = create_summary_page(Ci_results.df, Ci_results.available_models)
596
+ return (SHOWN, HIDDEN, SHOWN, gr.update(value=fig, visible=True), HIDDEN,
597
+ HIDDEN, HIDDEN, HIDDEN, NOOP, NOOP, HIDDEN, NOOP, NOOP, NOOP, False)
598
+
599
+ def handle_model_click(selected_model: str, history_mode: bool):
600
+ """Handle clicking on a model button."""
601
+ if history_mode:
602
+ amd_ts, nvidia_ts = show_time_series_model(selected_model)
603
+ return (NOOP, NOOP, NOOP, HIDDEN, SHOWN, HIDDEN, HIDDEN, HIDDEN, HIDDEN, HIDDEN,
604
+ HIDDEN, amd_ts, nvidia_ts, SHOWN, selected_model, True)
605
+
606
+ fig, amd_txt, nvidia_txt = plot_model_stats(Ci_results.df, selected_model, Ci_results.all_historical_data)
607
+ return (fig, amd_txt, nvidia_txt, SHOWN, HIDDEN, HIDDEN, HIDDEN, SHOWN, NOOP, NOOP,
608
+ NOOP, NOOP, NOOP, HIDDEN, selected_model, True)
609
+
610
+ # Wire up history toggle
611
+ history_view_button.change(
612
+ fn=handle_history_toggle,
613
+ inputs=[history_view_button, selected_model_state, in_model_view_state],
614
+ outputs=[
615
+ current_view, historical_view, summary_view, summary_display, detail_view,
616
+ time_series_failure_rates, time_series_amd_tests, time_series_nvidia_tests,
617
+ time_series_amd_model_plot, time_series_nvidia_model_plot, time_series_detail_view,
618
+ plot_output, amd_failed_tests_output, nvidia_failed_tests_output, in_model_view_state,
619
+ ],
620
+ )
621
+
622
+ # Define common outputs for model click handlers
623
+ model_click_outputs = [
624
+ plot_output, amd_failed_tests_output, nvidia_failed_tests_output,
625
+ current_view, historical_view, summary_view, summary_display, detail_view,
626
+ time_series_failure_rates, time_series_amd_tests, time_series_nvidia_tests,
627
+ time_series_amd_model_plot, time_series_nvidia_model_plot, time_series_detail_view,
628
+ selected_model_state, in_model_view_state,
629
+ ]
630
+
631
+ # Helper function to connect button clicks
632
+ def connect_model_buttons(buttons, models):
633
+ """Connect a list of buttons to their corresponding models."""
634
+ for btn, model_name in zip(buttons, models):
635
+ btn.click(
636
+ fn=lambda history_mode, m=model_name: handle_model_click(m, history_mode),
637
+ inputs=[history_view_button],
638
+ outputs=model_click_outputs,
639
+ )
640
+
641
+ # Wire up all button groups
642
+ connect_model_buttons(model_buttons, model_choices)
643
+ connect_model_buttons(amd_buttons, sorted(amd_failing_models + both_failing_models))
644
+ connect_model_buttons(nvidia_buttons, sorted(nvidia_failing_models + both_failing_models))
645
+ connect_model_buttons(both_buttons, sorted(list(set(amd_failing_models + nvidia_failing_models + both_failing_models))))
646
+
647
+ # Summary button click handler
648
+ def show_summary_and_update_links():
649
+ """Show summary page and update CI links."""
650
+ return create_summary_page(Ci_results.df, Ci_results.available_models), get_description_text(), get_ci_links()
651
+
652
+ # Auto-update summary, description, CI links, and regressions when the interface loads
653
  demo.load(
654
  fn=show_summary_and_update_links,
655
  outputs=[summary_display, description_display, ci_links_display]
656
+ ).then(
657
+ fn=get_regressions_text,
658
+ outputs=[regressions_panel]
659
  )
660
 
661
 
data.py CHANGED
@@ -1,10 +1,13 @@
1
  from huggingface_hub import HfFileSystem
2
  import pandas as pd
3
  from utils import logger
 
4
  import threading
5
  import traceback
6
  import json
7
  import re
 
 
8
 
9
  # NOTE: if caching is an issue, try adding `use_listings_cache=False`
10
  fs = HfFileSystem()
@@ -54,12 +57,57 @@ KEYS_TO_KEEP = [
54
  "job_link_nvidia",
55
  ]
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  def log_dataframe_link(link: str) -> str:
59
  """
60
  Adds the link to the dataset in the logs, modifies it to get a clockable link and then returns the date of the
61
  report.
62
  """
 
 
63
  logger.info(f"Reading df located at {link}")
64
  # Make sure the links starts with an http adress
65
  if link.startswith("hf://"):
@@ -102,26 +150,148 @@ def read_one_dataframe(json_path: str, device_label: str) -> tuple[pd.DataFrame,
102
  df[f"failed_single_no_{device_label}"] = df["failures"].apply(lambda x: len(x["single"]) if "single" in x else 0)
103
  return df, df_upload_date
104
 
105
- def get_first_working_df(file_list: list[str]) -> str:
106
- for file in file_list:
107
- job_links = file.rsplit('/', 1)[0] + "/job_links.json"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  try:
109
- links = pd.read_json(f"hf://{job_links}", typ="series")
110
- has_one_working_link = any(links.values)
111
  except Exception as e:
112
- logger.error(f"Could not read job links from {job_links}: {e}")
113
- has_one_working_link = False
114
- if has_one_working_link:
115
- return file
116
- logger.warning(f"Skipping {file} as it has no working job links.")
117
- raise RuntimeError("Could not find any working dataframe in the provided list.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
  def get_distant_data() -> tuple[pd.DataFrame, str]:
120
  # Retrieve AMD dataframe
121
  amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
122
  files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
123
- file_amd = get_first_working_df(files_amd)
124
- df_amd, date_df_amd = read_one_dataframe(f"hf://{file_amd}", "amd")
125
  # Retrieve NVIDIA dataframe, which pattern should be:
126
  # hf://datasets/hf-internal-testing`/transformers_daily_ci/raw/main/YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
127
  nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
@@ -161,39 +331,173 @@ def get_sample_data() -> tuple[pd.DataFrame, str]:
161
  filtered_joined.index = "sample_" + filtered_joined.index
162
  return filtered_joined, "sample data was loaded"
163
 
164
- def safe_extract(row: pd.DataFrame, key: str) -> int:
165
- return int(row.get(key, 0)) if pd.notna(row.get(key, 0)) else 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
168
  """Extract and process model data from DataFrame row."""
169
- # Handle missing values and get counts directly from dataframe
170
- success_nvidia = safe_extract(row, "success_nvidia")
171
- success_amd = safe_extract(row, "success_amd")
172
-
173
- skipped_nvidia = safe_extract(row, "skipped_nvidia")
174
- skipped_amd = safe_extract(row, "skipped_amd")
175
 
176
- failed_multi_amd = safe_extract(row, 'failed_multi_no_amd')
177
- failed_multi_nvidia = safe_extract(row, 'failed_multi_no_nvidia')
178
- failed_single_amd = safe_extract(row, 'failed_single_no_amd')
179
- failed_single_nvidia = safe_extract(row, 'failed_single_no_nvidia')
180
- # Calculate total failures
181
- total_failed_amd = failed_multi_amd + failed_single_amd
182
- total_failed_nvidia = failed_multi_nvidia + failed_single_nvidia
183
- # Create stats dictionaries directly from dataframe values
184
  amd_stats = {
185
- 'passed': success_amd,
186
- 'failed': total_failed_amd,
187
- 'skipped': skipped_amd,
188
- 'error': 0 # Not available in this dataset
189
  }
190
  nvidia_stats = {
191
- 'passed': success_nvidia,
192
- 'failed': total_failed_nvidia,
193
- 'skipped': skipped_nvidia,
194
- 'error': 0 # Not available in this dataset
195
  }
196
- return amd_stats, nvidia_stats, failed_multi_amd, failed_single_amd, failed_multi_nvidia, failed_single_nvidia
 
 
 
197
 
198
 
199
 
@@ -203,6 +507,10 @@ class CIResults:
203
  self.df = pd.DataFrame()
204
  self.available_models = []
205
  self.latest_update_msg = ""
 
 
 
 
206
 
207
  def load_data(self) -> None:
208
  """Load data from the data source."""
@@ -211,6 +519,13 @@ class CIResults:
211
  logger.info("Loading distant data...")
212
  new_df, latest_update_msg = get_distant_data()
213
  self.latest_update_msg = latest_update_msg
 
 
 
 
 
 
 
214
  except Exception as e:
215
  error_msg = [
216
  "Loading data failed:",
@@ -220,11 +535,19 @@ class CIResults:
220
  "Falling back on sample data."
221
  ]
222
  logger.error("\n".join(error_msg))
 
223
  new_df, latest_update_msg = get_sample_data()
224
  self.latest_update_msg = latest_update_msg
 
 
 
225
  # Update attributes
226
  self.df = new_df
227
  self.available_models = new_df.index.tolist()
 
 
 
 
228
  # Log and return distant load status
229
  logger.info(f"Data loaded successfully: {len(self.available_models)} models")
230
  logger.info(f"Models: {self.available_models[:5]}{'...' if len(self.available_models) > 5 else ''}")
@@ -242,6 +565,53 @@ class CIResults:
242
  msg[model][col] = value
243
  logger.info(json.dumps(msg, indent=4))
244
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  def schedule_data_reload(self):
246
  """Schedule the next data reload."""
247
  def reload_data():
 
1
  from huggingface_hub import HfFileSystem
2
  import pandas as pd
3
  from utils import logger
4
+ from datetime import datetime, timedelta
5
  import threading
6
  import traceback
7
  import json
8
  import re
9
+ import random
10
+ from typing import List, Tuple, Optional, Dict
11
 
12
  # NOTE: if caching is an issue, try adding `use_listings_cache=False`
13
  fs = HfFileSystem()
 
57
  "job_link_nvidia",
58
  ]
59
 
60
+ # ============================================================================
61
+ # HELPER FUNCTIONS
62
+ # ============================================================================
63
+
64
+ def generate_fake_dates(num_days: int = 7) -> List[str]:
65
+ """Generate fake dates for the last N days."""
66
+ today = datetime.now()
67
+ return [(today - timedelta(days=i)).strftime("%Y-%m-%d") for i in range(num_days)]
68
+
69
+ def parse_json_field(value) -> dict:
70
+ """Safely parse a JSON field that might be a string or dict."""
71
+ if value is None or pd.isna(value):
72
+ return {}
73
+ if isinstance(value, str):
74
+ try:
75
+ return json.loads(value)
76
+ except:
77
+ return {}
78
+ # Handle dict-like objects (including pandas Series/dict)
79
+ if isinstance(value, dict):
80
+ return value
81
+ # Try to convert to dict if possible
82
+ try:
83
+ return dict(value) if hasattr(value, '__iter__') else {}
84
+ except:
85
+ return {}
86
+
87
+ def extract_date_from_path(path: str, pattern: str) -> Optional[str]:
88
+ """Extract date from file path using regex pattern."""
89
+ match = re.search(pattern, path)
90
+ return match.group(1) if match else None
91
+
92
+ def get_test_names(tests: list) -> set:
93
+ """Extract test names from a list of test dictionaries."""
94
+ return {test.get('line', '') for test in tests}
95
+
96
+ def safe_extract(row: pd.Series, key: str) -> int:
97
+ """Safely extract an integer value from a DataFrame row."""
98
+ return int(row.get(key, 0)) if pd.notna(row.get(key, 0)) else 0
99
+
100
+ # ============================================================================
101
+ # DATA LOADING FUNCTIONS
102
+ # ============================================================================
103
 
104
  def log_dataframe_link(link: str) -> str:
105
  """
106
  Adds the link to the dataset in the logs, modifies it to get a clockable link and then returns the date of the
107
  report.
108
  """
109
+ if link.startswith("sample_"):
110
+ return "9999-99-99"
111
  logger.info(f"Reading df located at {link}")
112
  # Make sure the links starts with an http adress
113
  if link.startswith("hf://"):
 
150
  df[f"failed_single_no_{device_label}"] = df["failures"].apply(lambda x: len(x["single"]) if "single" in x else 0)
151
  return df, df_upload_date
152
 
153
+ def get_available_dates() -> List[str]:
154
+ """Get list of available dates from both AMD and NVIDIA datasets."""
155
+ try:
156
+ # Get file lists
157
+ amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
158
+ nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
159
+
160
+ files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
161
+ files_nvidia = sorted(fs.glob(nvidia_src, refresh=True), reverse=True)
162
+
163
+ logger.info(f"Found {len(files_amd)} AMD files, {len(files_nvidia)} NVIDIA files")
164
+
165
+ # Extract dates using patterns
166
+ amd_pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/runs/[^/]+/ci_results_run_models_gpu/model_results\.json'
167
+ nvidia_pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/ci_results_run_models_gpu/model_results\.json'
168
+
169
+ amd_dates = {extract_date_from_path(f, amd_pattern) for f in files_amd}
170
+ amd_dates.discard(None) # Remove None values
171
+
172
+ nvidia_dates = {extract_date_from_path(f, nvidia_pattern) for f in files_nvidia}
173
+ nvidia_dates.discard(None)
174
+
175
+ logger.info(f"AMD dates: {sorted(amd_dates, reverse=True)[:5]}...")
176
+ logger.info(f"NVIDIA dates: {sorted(nvidia_dates, reverse=True)[:5]}...")
177
+
178
+ # Return intersection of both datasets
179
+ common_dates = sorted(amd_dates.intersection(nvidia_dates), reverse=True)
180
+ logger.info(f"Common dates: {len(common_dates)} dates where both AMD and NVIDIA have data")
181
+
182
+ if common_dates:
183
+ return common_dates[:30] # Limit to last 30 days
184
+
185
+ # No real dates available - log warning and return empty list
186
+ # This will allow the system to fall back to sample data properly
187
+ logger.warning("No common dates found between AMD and NVIDIA datasets")
188
+ return []
189
+
190
+ except Exception as e:
191
+ logger.error(f"Error getting available dates: {e}")
192
+ return []
193
+
194
+
195
+ def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
196
+ """Get data for a specific date."""
197
+ try:
198
+ # For AMD, we need to find the specific run file for the date
199
+ # AMD structure: YYYY-MM-DD/runs/{run_id}/ci_results_run_models_gpu/model_results.json
200
+ amd_src = f"hf://datasets/optimum-amd/transformers_daily_ci/{target_date}/runs/*/ci_results_run_models_gpu/model_results.json"
201
+ amd_files = fs.glob(amd_src, refresh=True)
202
+
203
+ if not amd_files:
204
+ raise FileNotFoundError(f"No AMD data found for date {target_date}")
205
+
206
+ # Use the first (most recent) run for the date
207
+ amd_file = amd_files[0]
208
+ # Ensure the AMD file path has the hf:// prefix
209
+ if not amd_file.startswith("hf://"):
210
+ amd_file = f"hf://{amd_file}"
211
+
212
+ # NVIDIA structure: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
213
+ nvidia_src = f"hf://datasets/hf-internal-testing/transformers_daily_ci/{target_date}/ci_results_run_models_gpu/model_results.json"
214
+
215
+ # Read dataframes - try each platform independently
216
+ df_amd = pd.DataFrame()
217
+ df_nvidia = pd.DataFrame()
218
+
219
+ try:
220
+ df_amd, _ = read_one_dataframe(amd_file, "amd")
221
+ logger.info(f"Successfully loaded AMD data for {target_date}")
222
+ except Exception as e:
223
+ logger.warning(f"Failed to load AMD data for {target_date}: {e}")
224
+
225
  try:
226
+ df_nvidia, _ = read_one_dataframe(nvidia_src, "nvidia")
227
+ logger.info(f"Successfully loaded NVIDIA data for {target_date}")
228
  except Exception as e:
229
+ logger.warning(f"Failed to load NVIDIA data for {target_date}: {e}")
230
+
231
+ # If both failed, return empty dataframe
232
+ if df_amd.empty and df_nvidia.empty:
233
+ logger.warning(f"No data available for either platform on {target_date}")
234
+ return pd.DataFrame(), target_date
235
+
236
+ # Join both dataframes (outer join to include data from either platform)
237
+ if not df_amd.empty and not df_nvidia.empty:
238
+ joined = df_amd.join(df_nvidia, rsuffix="_nvidia", lsuffix="_amd", how="outer")
239
+ elif not df_amd.empty:
240
+ joined = df_amd.copy()
241
+ else:
242
+ joined = df_nvidia.copy()
243
+
244
+ joined = joined[KEYS_TO_KEEP]
245
+ joined.index = joined.index.str.replace("^models_", "", regex=True)
246
+
247
+ # Filter out all but important models
248
+ important_models_lower = [model.lower() for model in IMPORTANT_MODELS]
249
+ filtered_joined = joined[joined.index.str.lower().isin(important_models_lower)]
250
+
251
+ return filtered_joined, target_date
252
+
253
+ except Exception as e:
254
+ logger.error(f"Error getting data for date {target_date}: {e}")
255
+ # Return empty dataframe instead of sample data for historical functionality
256
+ return pd.DataFrame(), target_date
257
+
258
+
259
+ def get_historical_data(start_date: str, end_date: str, sample_data = False) -> pd.DataFrame:
260
+ """Get historical data for a date range."""
261
+ if sample_data:
262
+ return get_fake_historical_data(start_date, end_date)
263
+
264
+ try:
265
+ start_dt = datetime.strptime(start_date, "%Y-%m-%d")
266
+ end_dt = datetime.strptime(end_date, "%Y-%m-%d")
267
+ historical_data = []
268
+
269
+ # Load data for each day in range
270
+ current_dt = start_dt
271
+ while current_dt <= end_dt:
272
+ date_str = current_dt.strftime("%Y-%m-%d")
273
+ try:
274
+ df, _ = get_data_for_date(date_str)
275
+ if not df.empty:
276
+ df['date'] = date_str
277
+ historical_data.append(df)
278
+ logger.info(f"Loaded data for {date_str}")
279
+ except Exception as e:
280
+ logger.warning(f"Could not load data for {date_str}: {e}")
281
+ current_dt += timedelta(days=1)
282
+
283
+ return pd.concat(historical_data, ignore_index=False) if historical_data else pd.DataFrame()
284
+
285
+ except Exception as e:
286
+ logger.error(f"Error getting historical data: {e}")
287
+ return get_fake_historical_data(start_date, end_date)
288
+
289
 
290
  def get_distant_data() -> tuple[pd.DataFrame, str]:
291
  # Retrieve AMD dataframe
292
  amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
293
  files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
294
+ df_amd, date_df_amd = read_one_dataframe(f"hf://{files_amd[0]}", "amd")
 
295
  # Retrieve NVIDIA dataframe, which pattern should be:
296
  # hf://datasets/hf-internal-testing`/transformers_daily_ci/raw/main/YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
297
  nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
 
331
  filtered_joined.index = "sample_" + filtered_joined.index
332
  return filtered_joined, "sample data was loaded"
333
 
334
+
335
+ def get_fake_historical_data(start_date: str, end_date: str) -> pd.DataFrame:
336
+ """Generate fake historical data for a date range when real data loading fails."""
337
+ try:
338
+ start_dt = datetime.strptime(start_date, "%Y-%m-%d")
339
+ end_dt = datetime.strptime(end_date, "%Y-%m-%d")
340
+ sample_df, _ = get_sample_data()
341
+ historical_data = []
342
+
343
+ # Generate data for each date
344
+ current_dt = start_dt
345
+ while current_dt <= end_dt:
346
+ date_df = sample_df.copy()
347
+ date_df['date'] = current_dt.strftime("%Y-%m-%d")
348
+
349
+ # Add random variations to make it realistic
350
+ for idx in date_df.index:
351
+ # Vary success/skipped counts (±20%)
352
+ for col in ['success_amd', 'success_nvidia', 'skipped_amd', 'skipped_nvidia']:
353
+ if col in date_df.columns and pd.notna(date_df.loc[idx, col]):
354
+ val = date_df.loc[idx, col]
355
+ if val > 0:
356
+ date_df.loc[idx, col] = max(0, int(val * random.uniform(0.8, 1.2)))
357
+
358
+ # Vary failure counts more dramatically (±50-100%)
359
+ for col in ['failed_multi_no_amd', 'failed_multi_no_nvidia', 'failed_single_no_amd', 'failed_single_no_nvidia']:
360
+ if col in date_df.columns and pd.notna(date_df.loc[idx, col]):
361
+ val = date_df.loc[idx, col]
362
+ date_df.loc[idx, col] = max(0, int(val * random.uniform(0.5, 2.0)))
363
+
364
+ historical_data.append(date_df)
365
+ current_dt += timedelta(days=1)
366
+
367
+ if not historical_data:
368
+ return pd.DataFrame()
369
+
370
+ combined_df = pd.concat(historical_data, ignore_index=False)
371
+ logger.info(f"Generated fake historical data: {len(combined_df)} records from {start_date} to {end_date}")
372
+ return combined_df
373
+
374
+ except Exception as e:
375
+ logger.error(f"Error generating fake historical data: {e}")
376
+ return pd.DataFrame()
377
+
378
+ def find_failure_first_seen(historical_df: pd.DataFrame, model_name: str, test_name: str, device: str, gpu_type: str) -> Optional[str]:
379
+ """Find the first date when a specific test failure appeared in historical data."""
380
+ if historical_df is None or historical_df.empty:
381
+ return None
382
+
383
+ try:
384
+ model_name_lower = model_name.lower()
385
+ # Filter by model name (case-insensitive)
386
+ model_data = historical_df[historical_df.index.str.lower() == model_name_lower].copy()
387
+ if model_data.empty:
388
+ return None
389
+
390
+ # Ensure we have a 'date' column
391
+ if 'date' not in model_data.columns:
392
+ return None
393
+
394
+ # Check each date (oldest first) for this failure
395
+ for _, row in model_data.sort_values('date').iterrows():
396
+ failures_raw = row.get(f'failures_{device}')
397
+ if failures_raw is None or pd.isna(failures_raw):
398
+ continue
399
+
400
+ # Parse failures (could be dict, string, or already parsed)
401
+ failures = parse_json_field(failures_raw)
402
+ if not isinstance(failures, dict) or gpu_type not in failures:
403
+ continue
404
+
405
+ # Check each test in this gpu_type
406
+ for test in failures.get(gpu_type, []):
407
+ if isinstance(test, dict) and test.get('line', '') == test_name:
408
+ date_value = row.get('date')
409
+ return date_value if date_value else None
410
+
411
+ return None
412
+
413
+ except Exception as e:
414
+ logger.error(f"Error finding first seen date for {test_name}: {e}")
415
+ return None
416
+
417
+
418
+ def _find_device_regressions(model_name: str, current_failures: dict, yesterday_failures: dict, device: str) -> list[dict]:
419
+ """Helper to find regressions for a specific device."""
420
+ regressions = []
421
+ for gpu_type in ['single', 'multi']:
422
+ current_tests = get_test_names(current_failures.get(gpu_type, []))
423
+ yesterday_tests = get_test_names(yesterday_failures.get(gpu_type, []))
424
+
425
+ # Find NEW failures: failing NOW but NOT yesterday
426
+ new_tests = current_tests - yesterday_tests
427
+ for test_name in new_tests:
428
+ if test_name: # Skip empty names
429
+ regressions.append({
430
+ 'model': model_name,
431
+ 'test': test_name.split('::')[-1], # Short name
432
+ 'test_full': test_name, # Full name
433
+ 'device': device,
434
+ 'gpu_type': gpu_type
435
+ })
436
+ return regressions
437
+
438
+ def find_new_regressions(current_df: pd.DataFrame, historical_df: pd.DataFrame) -> list[dict]:
439
+ """Compare current failures against previous day's failures to find new regressions."""
440
+ if current_df.empty or historical_df.empty:
441
+ return []
442
+
443
+ # Get yesterday's data
444
+ available_dates = sorted(historical_df['date'].unique(), reverse=True)
445
+ if not available_dates:
446
+ return []
447
+
448
+ yesterday_data = historical_df[historical_df['date'] == available_dates[0]]
449
+ new_regressions = []
450
+
451
+ # For each model, compare current vs yesterday
452
+ for model_name in current_df.index:
453
+ current_row = current_df.loc[model_name]
454
+ yesterday_row = yesterday_data[yesterday_data.index == model_name.lower()]
455
+
456
+ # Parse current failures
457
+ current_amd = parse_json_field(current_row.get('failures_amd', {}))
458
+ current_nvidia = parse_json_field(current_row.get('failures_nvidia', {}))
459
+
460
+ # Parse yesterday failures
461
+ yesterday_amd = {}
462
+ yesterday_nvidia = {}
463
+ if not yesterday_row.empty:
464
+ yesterday_row = yesterday_row.iloc[0]
465
+ yesterday_amd = parse_json_field(yesterday_row.get('failures_amd', {}))
466
+ yesterday_nvidia = parse_json_field(yesterday_row.get('failures_nvidia', {}))
467
+
468
+ # Find regressions for both devices
469
+ new_regressions.extend(_find_device_regressions(model_name, current_amd, yesterday_amd, 'amd'))
470
+ new_regressions.extend(_find_device_regressions(model_name, current_nvidia, yesterday_nvidia, 'nvidia'))
471
+
472
+ return new_regressions
473
+
474
 
475
  def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
476
  """Extract and process model data from DataFrame row."""
477
+ # Extract all counts
478
+ counts = {key: safe_extract(row, key) for key in [
479
+ 'success_amd', 'success_nvidia', 'skipped_amd', 'skipped_nvidia',
480
+ 'failed_multi_no_amd', 'failed_multi_no_nvidia',
481
+ 'failed_single_no_amd', 'failed_single_no_nvidia'
482
+ ]}
483
 
484
+ # Create stats dictionaries
 
 
 
 
 
 
 
485
  amd_stats = {
486
+ 'passed': counts['success_amd'],
487
+ 'failed': counts['failed_multi_no_amd'] + counts['failed_single_no_amd'],
488
+ 'skipped': counts['skipped_amd'],
489
+ 'error': 0
490
  }
491
  nvidia_stats = {
492
+ 'passed': counts['success_nvidia'],
493
+ 'failed': counts['failed_multi_no_nvidia'] + counts['failed_single_no_nvidia'],
494
+ 'skipped': counts['skipped_nvidia'],
495
+ 'error': 0
496
  }
497
+
498
+ return (amd_stats, nvidia_stats, counts['failed_multi_no_amd'],
499
+ counts['failed_single_no_amd'], counts['failed_multi_no_nvidia'],
500
+ counts['failed_single_no_nvidia'])
501
 
502
 
503
 
 
507
  self.df = pd.DataFrame()
508
  self.available_models = []
509
  self.latest_update_msg = ""
510
+ self.available_dates = []
511
+ self.historical_df = pd.DataFrame()
512
+ self.all_historical_data = pd.DataFrame() # Store all historical data at startup
513
+ self.sample_data = False
514
 
515
  def load_data(self) -> None:
516
  """Load data from the data source."""
 
519
  logger.info("Loading distant data...")
520
  new_df, latest_update_msg = get_distant_data()
521
  self.latest_update_msg = latest_update_msg
522
+ self.available_dates = get_available_dates()
523
+ logger.info(f"Available dates: {len(self.available_dates)} dates")
524
+ if self.available_dates:
525
+ logger.info(f"Date range: {self.available_dates[-1]} to {self.available_dates[0]}")
526
+ else:
527
+ logger.warning("No available dates found")
528
+ self.available_dates = []
529
  except Exception as e:
530
  error_msg = [
531
  "Loading data failed:",
 
535
  "Falling back on sample data."
536
  ]
537
  logger.error("\n".join(error_msg))
538
+ self.sample_data = True
539
  new_df, latest_update_msg = get_sample_data()
540
  self.latest_update_msg = latest_update_msg
541
+ # Generate fake dates for sample data historical functionality
542
+ self.available_dates = generate_fake_dates()
543
+
544
  # Update attributes
545
  self.df = new_df
546
  self.available_models = new_df.index.tolist()
547
+
548
+ # Load all historical data at startup
549
+ self.load_all_historical_data()
550
+
551
  # Log and return distant load status
552
  logger.info(f"Data loaded successfully: {len(self.available_models)} models")
553
  logger.info(f"Models: {self.available_models[:5]}{'...' if len(self.available_models) > 5 else ''}")
 
565
  msg[model][col] = value
566
  logger.info(json.dumps(msg, indent=4))
567
 
568
+ def load_all_historical_data(self) -> None:
569
+ """Load all available historical data at startup."""
570
+ try:
571
+ if not self.available_dates:
572
+ logger.warning("No available dates found, skipping historical data load")
573
+ self.all_historical_data = pd.DataFrame()
574
+ return
575
+
576
+ logger.info(f"Loading all historical data for {len(self.available_dates)} dates...")
577
+ start_date, end_date = self.available_dates[-1], self.available_dates[0]
578
+ self.all_historical_data = get_historical_data(start_date, end_date, self.sample_data)
579
+ logger.info(f"All historical data loaded: {len(self.all_historical_data)} records")
580
+ except Exception as e:
581
+ logger.error(f"Error loading all historical data: {e}")
582
+ self.all_historical_data = pd.DataFrame()
583
+
584
+ def load_historical_data(self, start_date: str, end_date: str) -> None:
585
+ """Load historical data for a date range from pre-loaded data."""
586
+ try:
587
+ logger.info(f"Filtering historical data from {start_date} to {end_date}")
588
+
589
+ if self.all_historical_data.empty:
590
+ logger.warning("No pre-loaded historical data available")
591
+ self.historical_df = pd.DataFrame()
592
+ return
593
+
594
+ # Filter by date range
595
+ start_dt = datetime.strptime(start_date, "%Y-%m-%d")
596
+ end_dt = datetime.strptime(end_date, "%Y-%m-%d")
597
+
598
+ filtered_data = [
599
+ self.all_historical_data[self.all_historical_data['date'] == date_str]
600
+ for date_str in self.all_historical_data['date'].unique()
601
+ if start_dt <= datetime.strptime(date_str, "%Y-%m-%d") <= end_dt
602
+ ]
603
+
604
+ if filtered_data:
605
+ self.historical_df = pd.concat(filtered_data, ignore_index=False)
606
+ logger.info(f"Historical data filtered: {len(self.historical_df)} records for {start_date} to {end_date}")
607
+ else:
608
+ self.historical_df = pd.DataFrame()
609
+ logger.warning(f"No historical data found for date range {start_date} to {end_date}")
610
+
611
+ except Exception as e:
612
+ logger.error(f"Error filtering historical data: {e}")
613
+ self.historical_df = pd.DataFrame()
614
+
615
  def schedule_data_reload(self):
616
  """Schedule the next data reload."""
617
  def reload_data():
model_page.py CHANGED
@@ -1,19 +1,13 @@
1
  import matplotlib.pyplot as plt
2
  import pandas as pd
3
- from utils import generate_underlined_line
4
- from data import extract_model_data
5
 
6
  # Figure dimensions
7
  FIGURE_WIDTH_DUAL = 18
8
  FIGURE_HEIGHT_DUAL = 9
9
 
10
- # Colors
11
- COLORS = {
12
- 'passed': '#4CAF50', # Medium green
13
- 'failed': '#E53E3E', # More red
14
- 'skipped': '#FFD54F', # Medium yellow
15
- 'error': '#8B0000' # Dark red
16
- }
17
 
18
  # Styling constants
19
  BLACK = '#000000'
@@ -42,11 +36,11 @@ def _create_pie_chart(ax: plt.Axes, device_label: str, filtered_stats: dict) ->
42
  """Create a pie chart for device statistics."""
43
  if not filtered_stats:
44
  ax.text(0.5, 0.5, 'No test results',
45
- horizontalalignment='center', verticalalignment='center',
46
- transform=ax.transAxes, fontsize=14, color='#888888',
47
- fontfamily='monospace', weight='normal')
48
  ax.set_title(device_label, fontsize=DEVICE_TITLE_FONT_SIZE, weight='bold',
49
- pad=DEVICE_TITLE_PAD, color=TITLE_COLOR, fontfamily='monospace')
50
  ax.axis('off')
51
  return
52
 
@@ -63,7 +57,7 @@ def _create_pie_chart(ax: plt.Axes, device_label: str, filtered_stats: dict) ->
63
  shadow=False,
64
  wedgeprops=dict(edgecolor='#1a1a1a', linewidth=BORDER_LINE_WIDTH), # Minimal borders
65
  textprops={'fontsize': 12, 'weight': 'normal',
66
- 'color': LABEL_COLOR, 'fontfamily': 'monospace'}
67
  )
68
 
69
  # Enhanced percentage text styling for better readability
@@ -82,10 +76,10 @@ def _create_pie_chart(ax: plt.Axes, device_label: str, filtered_stats: dict) ->
82
 
83
  # Device label closer to chart and bigger
84
  ax.set_title(device_label, fontsize=DEVICE_TITLE_FONT_SIZE, weight='normal',
85
- pad=DEVICE_TITLE_PAD, color=TITLE_COLOR, fontfamily='monospace')
86
 
87
 
88
- def plot_model_stats(df: pd.DataFrame, model_name: str) -> tuple[plt.Figure, str, str]:
89
  """Draws pie charts of model's passed, failed, skipped, and error stats for AMD and NVIDIA."""
90
  # Handle case where the dataframe is empty or the model name could not be found in it
91
  if df.empty or model_name not in df.index:
@@ -124,25 +118,25 @@ def plot_model_stats(df: pd.DataFrame, model_name: str) -> tuple[plt.Figure, str
124
  # Add subtle separation line between charts - stops at device labels level
125
  line_x = 0.5
126
  fig.add_artist(plt.Line2D([line_x, line_x], [0.0, SEPARATOR_LINE_Y_END],
127
- color='#333333', linewidth=SEPARATOR_LINE_WIDTH,
128
- alpha=SEPARATOR_ALPHA, transform=fig.transFigure))
129
 
130
  # Add central shared title for model name
131
  fig.suptitle(f'{model_name.lower()}', fontsize=32, weight='bold',
132
- color='#CCCCCC', fontfamily='monospace', y=MODEL_TITLE_Y)
133
 
134
  # Clean layout with padding and space for central title
135
  plt.tight_layout()
136
  plt.subplots_adjust(top=SUBPLOT_TOP, wspace=SUBPLOT_WSPACE)
137
 
138
- amd_failed_info = prepare_textbox_content(failures_amd, 'AMD', bool(amd_filtered))
139
- nvidia_failed_info = prepare_textbox_content(failures_nvidia, 'NVIDIA', bool(nvidia_filtered))
140
 
141
  return fig, amd_failed_info, nvidia_failed_info
142
 
143
 
144
- def prepare_textbox_content(failures: dict[str, list], device: str, data_available: bool) -> str:
145
- """Extract failure information from failures object."""
146
  # Catch the case where there is no data
147
  if not data_available:
148
  return generate_underlined_line(f"No data for {device}")
@@ -160,21 +154,43 @@ def prepare_textbox_content(failures: dict[str, list], device: str, data_availab
160
  ""
161
  ]
162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  # Add single-gpu failures
164
  if single_failures:
165
  info_lines.append(generate_underlined_line("Single GPU failures:"))
166
  for test in single_failures:
167
- name = test.get("line", "::*could not find name*")
168
- name = name.split("::")[-1]
169
- info_lines.append(name)
170
  info_lines.append("\n")
171
 
172
  # Add multi-gpu failures
173
  if multi_failures:
174
  info_lines.append(generate_underlined_line("Multi GPU failures:"))
175
  for test in multi_failures:
176
- name = test.get("line", "::*could not find name*")
177
- name = name.split("::")[-1]
178
- info_lines.append(name)
179
 
180
- return "\n".join(info_lines)
 
1
  import matplotlib.pyplot as plt
2
  import pandas as pd
3
+ from utils import generate_underlined_line, COLORS
4
+ from data import extract_model_data, find_failure_first_seen
5
 
6
  # Figure dimensions
7
  FIGURE_WIDTH_DUAL = 18
8
  FIGURE_HEIGHT_DUAL = 9
9
 
10
+ # Colors imported from utils
 
 
 
 
 
 
11
 
12
  # Styling constants
13
  BLACK = '#000000'
 
36
  """Create a pie chart for device statistics."""
37
  if not filtered_stats:
38
  ax.text(0.5, 0.5, 'No test results',
39
+ horizontalalignment='center', verticalalignment='center',
40
+ transform=ax.transAxes, fontsize=14, color='#888888',
41
+ fontfamily='monospace', weight='normal')
42
  ax.set_title(device_label, fontsize=DEVICE_TITLE_FONT_SIZE, weight='bold',
43
+ pad=DEVICE_TITLE_PAD, color=TITLE_COLOR, fontfamily='monospace')
44
  ax.axis('off')
45
  return
46
 
 
57
  shadow=False,
58
  wedgeprops=dict(edgecolor='#1a1a1a', linewidth=BORDER_LINE_WIDTH), # Minimal borders
59
  textprops={'fontsize': 12, 'weight': 'normal',
60
+ 'color': LABEL_COLOR, 'fontfamily': 'monospace'}
61
  )
62
 
63
  # Enhanced percentage text styling for better readability
 
76
 
77
  # Device label closer to chart and bigger
78
  ax.set_title(device_label, fontsize=DEVICE_TITLE_FONT_SIZE, weight='normal',
79
+ pad=DEVICE_TITLE_PAD, color=TITLE_COLOR, fontfamily='monospace')
80
 
81
 
82
+ def plot_model_stats(df: pd.DataFrame, model_name: str, historical_df: pd.DataFrame = None) -> tuple[plt.Figure, str, str]:
83
  """Draws pie charts of model's passed, failed, skipped, and error stats for AMD and NVIDIA."""
84
  # Handle case where the dataframe is empty or the model name could not be found in it
85
  if df.empty or model_name not in df.index:
 
118
  # Add subtle separation line between charts - stops at device labels level
119
  line_x = 0.5
120
  fig.add_artist(plt.Line2D([line_x, line_x], [0.0, SEPARATOR_LINE_Y_END],
121
+ color='#333333', linewidth=SEPARATOR_LINE_WIDTH,
122
+ alpha=SEPARATOR_ALPHA, transform=fig.transFigure))
123
 
124
  # Add central shared title for model name
125
  fig.suptitle(f'{model_name.lower()}', fontsize=32, weight='bold',
126
+ color='#CCCCCC', fontfamily='monospace', y=MODEL_TITLE_Y)
127
 
128
  # Clean layout with padding and space for central title
129
  plt.tight_layout()
130
  plt.subplots_adjust(top=SUBPLOT_TOP, wspace=SUBPLOT_WSPACE)
131
 
132
+ amd_failed_info = prepare_textbox_content(failures_amd, 'AMD', bool(amd_filtered), model_name, historical_df)
133
+ nvidia_failed_info = prepare_textbox_content(failures_nvidia, 'NVIDIA', bool(nvidia_filtered), model_name, historical_df)
134
 
135
  return fig, amd_failed_info, nvidia_failed_info
136
 
137
 
138
+ def prepare_textbox_content(failures: dict[str, list], device: str, data_available: bool, model_name: str = None, historical_df: pd.DataFrame = None) -> str:
139
+ """Extract failure information from failures object with first seen dates."""
140
  # Catch the case where there is no data
141
  if not data_available:
142
  return generate_underlined_line(f"No data for {device}")
 
154
  ""
155
  ]
156
 
157
+ # Helper function to format failure line with first seen date
158
+ def format_failure_line(test: dict, gpu_type: str) -> str:
159
+ full_name = test.get("line", "::*could not find name*")
160
+ short_name = full_name.split("::")[-1]
161
+
162
+ # Try to find first seen date if historical data is available
163
+ if historical_df is not None and model_name is not None and not historical_df.empty:
164
+ first_seen = find_failure_first_seen(
165
+ historical_df,
166
+ model_name,
167
+ full_name,
168
+ device.lower(),
169
+ gpu_type
170
+ )
171
+ if first_seen:
172
+ # Format date as MM-DD-YYYY
173
+ try:
174
+ from datetime import datetime
175
+ date_obj = datetime.strptime(first_seen, "%Y-%m-%d")
176
+ formatted_date = date_obj.strftime("%m-%d-%Y")
177
+ return f"{short_name} (First seen: {formatted_date})"
178
+ except:
179
+ return f"{short_name} (First seen: {first_seen})"
180
+
181
+ return short_name
182
+
183
  # Add single-gpu failures
184
  if single_failures:
185
  info_lines.append(generate_underlined_line("Single GPU failures:"))
186
  for test in single_failures:
187
+ info_lines.append(format_failure_line(test, "single"))
 
 
188
  info_lines.append("\n")
189
 
190
  # Add multi-gpu failures
191
  if multi_failures:
192
  info_lines.append(generate_underlined_line("Multi GPU failures:"))
193
  for test in multi_failures:
194
+ info_lines.append(format_failure_line(test, "multi"))
 
 
195
 
196
+ return "\n".join(info_lines)
requirements.txt CHANGED
@@ -1 +1,3 @@
1
  matplotlib>=3.8
 
 
 
1
  matplotlib>=3.8
2
+ gradio_toggle
3
+ plotly>=5.0
styles.css CHANGED
@@ -3,6 +3,8 @@
3
  --main-content-bottom-margin: 10px; /* Configurable bottom margin for main content */
4
  }
5
 
 
 
6
  .gradio-container {
7
  background-color: #000000 !important;
8
  color: white !important;
@@ -173,6 +175,96 @@ div[data-testid="column"]:has(.sidebar) {
173
  transition: max-height 0.3s ease !important;
174
  }
175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
  /* Model button styling */
178
  .model-button {
@@ -371,52 +463,28 @@ div[data-testid="column"]:has(.sidebar) {
371
 
372
  /* Plot container with smooth transitions and controlled scrolling */
373
  .plot-container {
374
- background-color: #000000 !important;
375
  border: none !important;
376
  transition: opacity 0.6s ease-in-out !important;
377
  flex: 1 1 auto !important;
378
  min-height: 0 !important;
379
  overflow-y: auto !important;
380
  scrollbar-width: thin !important;
381
- scrollbar-color: #333333 #000000 !important;
382
  }
383
 
384
  /* Custom scrollbar for plot container */
385
  .plot-container::-webkit-scrollbar {
386
  width: 8px !important;
387
- background: #000000 !important;
388
- }
389
-
390
- .plot-container::-webkit-scrollbar-track {
391
- background: #000000 !important;
392
- }
393
-
394
- .plot-container::-webkit-scrollbar-thumb {
395
- background-color: #333333 !important;
396
- border-radius: 4px !important;
397
- }
398
-
399
- .plot-container::-webkit-scrollbar-thumb:hover {
400
- background-color: #555555 !important;
401
  }
402
 
403
- /* Gradio plot component styling */
404
- .gr-plot {
405
- background-color: #000000 !important;
406
- transition: opacity 0.6s ease-in-out !important;
407
- }
408
 
409
- .gr-plot .gradio-plot {
410
- background-color: #000000 !important;
411
- transition: opacity 0.6s ease-in-out !important;
412
- }
413
 
414
  .gr-plot img {
415
  transition: opacity 0.6s ease-in-out !important;
416
  }
417
 
418
  /* Target the plot wrapper */
419
- div[data-testid="plot"] {
420
  background-color: #000000 !important;
421
  }
422
 
@@ -427,11 +495,6 @@ div[data-testid="plot"] {
427
  background-color: #000000 !important;
428
  }
429
 
430
- /* Ensure plot area background */
431
- .gr-plot > div,
432
- .plot-container > div {
433
- background-color: #000000 !important;
434
- }
435
 
436
  /* Prevent white flash during plot updates */
437
  .plot-container::before {
@@ -445,24 +508,26 @@ div[data-testid="plot"] {
445
  z-index: -1;
446
  }
447
 
448
- /* Force all plot elements to have black background */
449
- .plot-container *,
450
- .gr-plot *,
451
- div[data-testid="plot"] * {
452
- background-color: #000000 !important;
453
  }
454
 
455
- /* Override any white backgrounds in matplotlib */
456
- .plot-container canvas,
457
- .gr-plot canvas {
458
- background-color: #000000 !important;
459
- }
460
 
461
  /* Text elements */
462
  h1, h2, h3, p, .markdown {
463
  color: white !important;
464
  }
465
 
 
 
 
 
 
 
 
 
 
 
466
  /* Sidebar header enhancement */
467
  .sidebar h1 {
468
  background: linear-gradient(45deg, #74b9ff, #a29bfe) !important;
@@ -529,6 +594,116 @@ h1, h2, h3, p, .markdown {
529
  flex-direction: column !important;
530
  }
531
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
532
  /* Custom scrollbar for main content */
533
  .main-content {
534
  scrollbar-width: thin !important;
@@ -667,3 +842,203 @@ h1, h2, h3, p, .markdown {
667
  100% { scroll-behavior: auto; }
668
  }
669
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  --main-content-bottom-margin: 10px; /* Configurable bottom margin for main content */
4
  }
5
 
6
+
7
+
8
  .gradio-container {
9
  background-color: #000000 !important;
10
  color: white !important;
 
175
  transition: max-height 0.3s ease !important;
176
  }
177
 
178
+ .history-view-button {
179
+ background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
180
+ color: white !important;
181
+ margin: 0px 0px !important;
182
+ padding: 8px 12px !important;
183
+ font-weight: 600 !important;
184
+ font-size: 14px !important;
185
+ text-transform: uppercase !important;
186
+ letter-spacing: 0.3px !important;
187
+ font-family: monospace !important;
188
+ width: 100% !important;
189
+ max-width: 100% !important;
190
+ white-space: nowrap !important;
191
+ text-overflow: ellipsis !important;
192
+ display: block !important;
193
+ cursor: pointer !important;
194
+ transition: all 0.3s ease !important;
195
+ }
196
+
197
+ /* Failing models filter row */
198
+ .failing-models-filter-row {
199
+ background: linear-gradient(145deg, #1a1a1a, #0f0f0f) !important;
200
+ border: 1px solid #333 !important;
201
+ border-radius: 6px !important;
202
+ padding: 8px 8px !important;
203
+ margin: 0px 0px 12px 0px !important;
204
+ gap: 8px !important;
205
+ }
206
+
207
+ /* Failing models toggle styling */
208
+ .failing-models-toggle {
209
+ background: transparent !important;
210
+ border: none !important;
211
+ padding: 4px 6px !important;
212
+ margin: 0 !important;
213
+ flex: 1 !important;
214
+ }
215
+
216
+ .failing-models-toggle:hover {
217
+ background: rgba(255, 255, 255, 0.05) !important;
218
+ border-radius: 4px !important;
219
+ }
220
+
221
+ .failing-models-toggle label {
222
+ color: #FFFFFF !important;
223
+ font-family: monospace !important;
224
+ font-size: 11px !important;
225
+ font-weight: 600 !important;
226
+ text-transform: uppercase !important;
227
+ letter-spacing: 0.5px !important;
228
+ cursor: pointer !important;
229
+ display: flex !important;
230
+ align-items: center !important;
231
+ white-space: nowrap !important;
232
+ }
233
+
234
+ /* Override specific colors for AMD and NVIDIA to white */
235
+ .amd-toggle label,
236
+ .amd-toggle label span {
237
+ color: #FFFFFF !important;
238
+ }
239
+
240
+ .nvidia-toggle label,
241
+ .nvidia-toggle label span {
242
+ color: #FFFFFF !important;
243
+ }
244
+
245
+ .failing-models-toggle input[type="checkbox"] {
246
+ cursor: pointer !important;
247
+ width: 16px !important;
248
+ height: 16px !important;
249
+ margin-right: 6px !important;
250
+ }
251
+
252
+ .amd-toggle input[type="checkbox"] {
253
+ accent-color: #FF6B6B !important;
254
+ }
255
+
256
+ .nvidia-toggle input[type="checkbox"] {
257
+ accent-color: #76B900 !important;
258
+ }
259
+
260
+ .amd-toggle input[type="checkbox"]:checked {
261
+ accent-color: #FF8888 !important;
262
+ }
263
+
264
+ .nvidia-toggle input[type="checkbox"]:checked {
265
+ accent-color: #8BD918 !important;
266
+ }
267
+
268
 
269
  /* Model button styling */
270
  .model-button {
 
463
 
464
  /* Plot container with smooth transitions and controlled scrolling */
465
  .plot-container {
 
466
  border: none !important;
467
  transition: opacity 0.6s ease-in-out !important;
468
  flex: 1 1 auto !important;
469
  min-height: 0 !important;
470
  overflow-y: auto !important;
471
  scrollbar-width: thin !important;
472
+ padding: 0 !important;
473
  }
474
 
475
  /* Custom scrollbar for plot container */
476
  .plot-container::-webkit-scrollbar {
477
  width: 8px !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
  }
479
 
 
 
 
 
 
480
 
 
 
 
 
481
 
482
  .gr-plot img {
483
  transition: opacity 0.6s ease-in-out !important;
484
  }
485
 
486
  /* Target the plot wrapper */
487
+ div[data-testid="matplotlib"] {
488
  background-color: #000000 !important;
489
  }
490
 
 
495
  background-color: #000000 !important;
496
  }
497
 
 
 
 
 
 
498
 
499
  /* Prevent white flash during plot updates */
500
  .plot-container::before {
 
508
  z-index: -1;
509
  }
510
 
511
+ .vega-embed {
512
+ position: absolute !important;
 
 
 
513
  }
514
 
 
 
 
 
 
515
 
516
  /* Text elements */
517
  h1, h2, h3, p, .markdown {
518
  color: white !important;
519
  }
520
 
521
+ .toggle {
522
+ margin: 0 auto !important;
523
+ }
524
+
525
+ .toggle-label {
526
+ color: white !important;
527
+ font-family: monospace !important;
528
+ font-size: 14px !important;
529
+ }
530
+
531
  /* Sidebar header enhancement */
532
  .sidebar h1 {
533
  background: linear-gradient(45deg, #74b9ff, #a29bfe) !important;
 
594
  flex-direction: column !important;
595
  }
596
 
597
+ /* Summary view - position content slightly higher (not fully centered) */
598
+ .summary-view {
599
+ display: flex !important;
600
+ flex-direction: column !important;
601
+ align-items: center !important;
602
+ justify-content: flex-start !important;
603
+ gap: 10px !important;
604
+ padding-top: 20px !important;
605
+ }
606
+
607
+ /* Keep the summary display centered */
608
+ .summary-view .plot-container {
609
+ width: 100% !important;
610
+ }
611
+
612
+ /* Regressions components stay with the summary as a group */
613
+ .regressions-header {
614
+ margin: 0px 0px 10px 0px !important;
615
+ width: 100% !important;
616
+ max-width: 100% !important;
617
+ background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
618
+ color: white !important;
619
+ border: 1px solid #8B4513 !important;
620
+ border-radius: 5px !important;
621
+ font-weight: 600 !important;
622
+ font-size: 14px !important;
623
+ font-family: monospace !important;
624
+ text-align: left !important;
625
+ width: 100% !important;
626
+ transition: all 0.3s ease !important;
627
+ }
628
+
629
+ .regressions-header:hover {
630
+ background: linear-gradient(135deg, #3a3a3a, #2e2e2e) !important;
631
+ border-color: #B8621B !important;
632
+ }
633
+
634
+ /* Collapsible regressions content */
635
+ .regressions-content-visible {
636
+ max-height: 800px !important;
637
+ overflow-y: auto !important;
638
+ transition: max-height 0.3s ease !important;
639
+ scrollbar-width: thin !important;
640
+ -ms-overflow-style: none !important;
641
+ }
642
+
643
+ .regressions-content-visible::-webkit-scrollbar {
644
+ width: 8px !important;
645
+ background: transparent !important;
646
+ }
647
+
648
+ .regressions-content-visible::-webkit-scrollbar-thumb {
649
+ background-color: #333333 !important;
650
+ border-radius: 4px !important;
651
+ }
652
+
653
+ .regressions-content-hidden {
654
+ max-height: 0 !important;
655
+ overflow: hidden !important;
656
+ transition: max-height 0.3s ease !important;
657
+ }
658
+
659
+ /* New Regressions Panel */
660
+ .regressions-panel {
661
+ background: linear-gradient(145deg, #2a1a1a, #1a0f0f) !important;
662
+ border: 2px solid #8B4513 !important;
663
+ border-radius: 8px !important;
664
+ padding: 15px 20px !important;
665
+ margin: 0px 0px 15px 0px !important;
666
+ box-shadow: 0 4px 12px rgba(255, 107, 107, 0.2) !important;
667
+ animation: pulse-border 2s ease-in-out infinite !important;
668
+ }
669
+
670
+ .regressions-panel h3 {
671
+ color: #FFB86C !important;
672
+ font-family: monospace !important;
673
+ font-size: 16px !important;
674
+ font-weight: bold !important;
675
+ margin: 0 0 10px 0 !important;
676
+ display: flex !important;
677
+ align-items: center !important;
678
+ }
679
+
680
+ .regressions-panel p,
681
+ .regressions-panel ul,
682
+ .regressions-panel li {
683
+ color: #FFFFFF !important;
684
+ font-family: monospace !important;
685
+ font-size: 13px !important;
686
+ line-height: 1.6 !important;
687
+ margin: 4px 0 !important;
688
+ }
689
+
690
+ .regressions-panel strong {
691
+ color: #FF6B6B !important;
692
+ font-weight: 600 !important;
693
+ }
694
+
695
+ /* Pulse animation for new regressions */
696
+ @keyframes pulse-border {
697
+ 0%, 100% {
698
+ border-color: #8B4513;
699
+ box-shadow: 0 4px 12px rgba(255, 107, 107, 0.2);
700
+ }
701
+ 50% {
702
+ border-color: #B8621B;
703
+ box-shadow: 0 4px 16px rgba(255, 107, 107, 0.4);
704
+ }
705
+ }
706
+
707
  /* Custom scrollbar for main content */
708
  .main-content {
709
  scrollbar-width: thin !important;
 
842
  100% { scroll-behavior: auto; }
843
  }
844
 
845
+ /* View toggle buttons */
846
+ .view-toggle-row {
847
+ display: flex !important;
848
+ gap: 5px !important;
849
+ margin-bottom: 15px !important;
850
+ }
851
+
852
+ .view-toggle-button {
853
+ flex: 1 !important;
854
+ background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
855
+ color: white !important;
856
+ border: 1px solid #333 !important;
857
+ border-radius: 5px !important;
858
+ padding: 8px 6px !important;
859
+ transition: all 0.3s ease !important;
860
+ font-weight: 600 !important;
861
+ font-size: 12px !important;
862
+ text-transform: uppercase !important;
863
+ letter-spacing: 0.3px !important;
864
+ font-family: monospace !important;
865
+ height: 50px !important;
866
+ display: flex !important;
867
+ flex-direction: column !important;
868
+ justify-content: center !important;
869
+ align-items: center !important;
870
+ line-height: 1.2 !important;
871
+ cursor: pointer !important;
872
+ }
873
+
874
+ .view-toggle-button:hover {
875
+ background: linear-gradient(135deg, #3a3a3a, #2e2e2e) !important;
876
+ border-color: #555 !important;
877
+ }
878
+
879
+ .view-toggle-active {
880
+ background: linear-gradient(135deg, #4a4a4a, #3e3e3e) !important;
881
+ border: 2px solid #555555 !important;
882
+ box-shadow:
883
+ 0 4px 15px rgba(0, 0, 0, 0.3),
884
+ inset 0 1px 0 rgba(255, 255, 255, 0.2) !important;
885
+ }
886
+
887
+ /* Date selection styling */
888
+ .date-selection {
889
+ flex-grow: 0 !important;
890
+ background: linear-gradient(145deg, #0f0f0f, #1a1a1a) !important;
891
+ border: 1px solid #333 !important;
892
+ border-radius: 8px !important;
893
+ padding: 15px !important;
894
+ margin-bottom: 15px !important;
895
+ transition: all 0.3s ease !important;
896
+ overflow: hidden !important;
897
+ }
898
+
899
+ .date-selection-hidden {
900
+ max-height: 0 !important;
901
+ padding: 0 15px !important;
902
+ margin-bottom: 0 !important;
903
+ border: none !important;
904
+ }
905
+
906
+ .date-selection-visible {
907
+ max-height: 500px !important;
908
+ }
909
+
910
+ .date-header {
911
+ margin-bottom: 10px !important;
912
+ background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
913
+ color: white !important;
914
+ border: 1px solid #333 !important;
915
+ border-radius: 5px !important;
916
+ padding: 8px 12px !important;
917
+ transition: all 0.3s ease !important;
918
+ font-family: monospace !important;
919
+ font-size: 12px !important;
920
+ text-align: left !important;
921
+ cursor: pointer !important;
922
+ width: 100% !important;
923
+ box-sizing: border-box !important;
924
+ }
925
+
926
+ .date-header:hover {
927
+ background: linear-gradient(135deg, #3a3a3a, #2e2e2e) !important;
928
+ border-color: #444 !important;
929
+ transform: translateY(-1px) !important;
930
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.3) !important;
931
+ }
932
+
933
+ .date-dropdown {
934
+ background-color: #222222 !important;
935
+ color: white !important;
936
+ border: 1px solid #444444 !important;
937
+ border-radius: 5px !important;
938
+ font-family: monospace !important;
939
+ font-size: 12px !important;
940
+ }
941
+
942
+ .date-dropdown .gr-dropdown {
943
+ background-color: #222222 !important;
944
+ color: white !important;
945
+ border: 1px solid #444444 !important;
946
+ }
947
+
948
+ .load-historical-button {
949
+ background: linear-gradient(135deg, #2d5aa0, #1e3f73) !important;
950
+ color: white !important;
951
+ border: 1px solid #3a6bc7 !important;
952
+ border-radius: 5px !important;
953
+ padding: 8px 12px !important;
954
+ transition: all 0.3s ease !important;
955
+ font-weight: 500 !important;
956
+ font-size: 12px !important;
957
+ text-transform: uppercase !important;
958
+ letter-spacing: 0.1px !important;
959
+ font-family: monospace !important;
960
+ width: 100% !important;
961
+ margin-top: 10px !important;
962
+ }
963
+
964
+ .load-historical-button:hover {
965
+ background: linear-gradient(135deg, #3a6bc7, #2d5aa0) !important;
966
+ border-color: #4a7bd9 !important;
967
+ }
968
+
969
+ /* Historical view styling */
970
+ .historical-view {
971
+ background-color: #000000 !important;
972
+ padding: 30px 20px !important;
973
+ }
974
+
975
+ .time-series-detail-view {
976
+ background-color: #000000 !important;
977
+ padding: 30px 20px !important;
978
+ }
979
+
980
+ /* Plotly chart styling for historical view */
981
+ .historical-view .plot-container,
982
+ .time-series-detail-view .plot-container {
983
+ background-color: #000000 !important;
984
+ }
985
+
986
+ /* Plotly specific text styling */
987
+ .historical-view .js-plotly-plot .plotly,
988
+ .time-series-detail-view .js-plotly-plot .plotly {
989
+ background-color: #000000 !important;
990
+ }
991
+
992
+ /* Plotly legend text */
993
+ .historical-view .js-plotly-plot .legend text,
994
+ .time-series-detail-view .js-plotly-plot .legend text {
995
+ font-size: 16px !important;
996
+ fill: #CCCCCC !important;
997
+ }
998
+
999
+ /* Plotly axis titles */
1000
+ .historical-view .js-plotly-plot .g-xtitle text,
1001
+ .historical-view .js-plotly-plot .g-ytitle text,
1002
+ .time-series-detail-view .js-plotly-plot .g-xtitle text,
1003
+ .time-series-detail-view .js-plotly-plot .g-ytitle text {
1004
+ font-size: 16px !important;
1005
+ fill: #CCCCCC !important;
1006
+ }
1007
+
1008
+ /* Plotly axis tick labels */
1009
+ .historical-view .js-plotly-plot .xtick text,
1010
+ .historical-view .js-plotly-plot .ytick text,
1011
+ .time-series-detail-view .js-plotly-plot .xtick text,
1012
+ .time-series-detail-view .js-plotly-plot .ytick text {
1013
+ font-size: 14px !important;
1014
+ fill: #CCCCCC !important;
1015
+ }
1016
+
1017
+ /* Plotly title */
1018
+ .historical-view .js-plotly-plot .g-gtitle text,
1019
+ .time-series-detail-view .js-plotly-plot .g-gtitle text {
1020
+ font-size: 20px !important;
1021
+ fill: #FFFFFF !important;
1022
+ font-weight: 600 !important;
1023
+ }
1024
+
1025
+ /* Back button styling */
1026
+ .back-button {
1027
+ background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
1028
+ color: white !important;
1029
+ border: 1px solid #333 !important;
1030
+ border-radius: 5px !important;
1031
+ padding: 8px 12px !important;
1032
+ transition: all 0.3s ease !important;
1033
+ font-weight: 500 !important;
1034
+ font-size: 12px !important;
1035
+ font-family: monospace !important;
1036
+ margin-bottom: 15px !important;
1037
+ width: 100% !important;
1038
+ }
1039
+
1040
+ .back-button:hover {
1041
+ background: linear-gradient(135deg, #3a3a3a, #2e2e2e) !important;
1042
+ border-color: #555 !important;
1043
+ color: #74b9ff !important;
1044
+ }
summary_page.py CHANGED
@@ -1,54 +1,47 @@
1
  import matplotlib.pyplot as plt
2
  import pandas as pd
3
  from data import extract_model_data
 
4
 
5
  # Layout parameters
6
  COLUMNS = 3
7
 
8
  # Derived constants
9
- COLUMN_WIDTH = 100 / COLUMNS # Each column takes 25% of width
10
- BAR_WIDTH = COLUMN_WIDTH * 0.8 # 80% of column width for bars
11
- BAR_MARGIN = COLUMN_WIDTH * 0.1 # 10% margin on each side
12
 
13
  # Figure dimensions
14
- FIGURE_WIDTH = 22 # Wider to accommodate columns and legend
15
- MAX_HEIGHT = 14 # Maximum height in inches
16
  MIN_HEIGHT_PER_ROW = 2.8
17
  FIGURE_PADDING = 1
18
 
19
  # Bar styling
20
- BAR_HEIGHT_RATIO = 0.22 # Bar height as ratio of vertical spacing
21
- VERTICAL_SPACING_RATIO = 0.2 # Base vertical position ratio
22
- AMD_BAR_OFFSET = 0.25 # AMD bar offset ratio
23
- NVIDIA_BAR_OFFSET = 0.54 # NVIDIA bar offset ratio
24
-
25
- # Colors
26
- COLORS = {
27
- 'passed': '#4CAF50',
28
- 'failed': '#E53E3E',
29
- 'skipped': '#FFD54F',
30
- 'error': '#8B0000',
31
- 'empty': "#5B5B5B"
32
- }
33
 
34
  # Font styling
35
  MODEL_NAME_FONT_SIZE = 16
36
  LABEL_FONT_SIZE = 14
37
- LABEL_OFFSET = 1 # Distance of label from bar
38
  FAILURE_RATE_FONT_SIZE = 28
39
 
40
 
41
- def get_overall_stats(df: pd.DataFrame, available_models: list[str]) -> tuple[list[int], list[int]]:
 
42
  """Calculate overall failure rates for AMD and NVIDIA across all models."""
43
  if df.empty or not available_models:
44
  return 0.0, 0.0
45
 
46
- total_amd_passed = 0
47
- total_amd_failed = 0
48
- total_amd_skipped = 0
49
- total_nvidia_passed = 0
50
- total_nvidia_failed = 0
51
- total_nvidia_skipped = 0
52
 
53
  for model_name in available_models:
54
  if model_name not in df.index:
@@ -58,16 +51,21 @@ def get_overall_stats(df: pd.DataFrame, available_models: list[str]) -> tuple[li
58
  amd_stats, nvidia_stats = extract_model_data(row)[:2]
59
 
60
  # AMD totals
61
- total_amd_passed += amd_stats['passed']
62
- total_amd_failed += amd_stats['failed'] + amd_stats['error']
63
- total_amd_skipped += amd_stats['skipped']
64
-
 
65
  # NVIDIA totals
66
- total_nvidia_passed += nvidia_stats['passed']
67
- total_nvidia_failed += nvidia_stats['failed'] + nvidia_stats['error']
68
- total_nvidia_skipped += nvidia_stats['skipped']
 
 
 
 
69
 
70
- return [total_amd_passed, total_amd_failed, total_amd_skipped], [total_nvidia_passed, total_nvidia_failed, total_nvidia_skipped]
71
 
72
 
73
  def draw_text_and_bar(
@@ -115,14 +113,7 @@ def create_summary_page(df: pd.DataFrame, available_models: list[str]) -> plt.Fi
115
  return fig
116
 
117
  # Calculate overall failure rates
118
- amd_counts, nvidia_counts = get_overall_stats(df, available_models)
119
-
120
- amd_non_skipped = amd_counts[0] + amd_counts[1]
121
- amd_failure_rate = (amd_counts[1] / amd_non_skipped) if amd_non_skipped > 0 else 0.0
122
- amd_failure_rate *= 100
123
- nvidia_non_skipped = nvidia_counts[0] + nvidia_counts[1]
124
- nvidia_failure_rate = (nvidia_counts[1] / nvidia_non_skipped) if nvidia_non_skipped > 0 else 0.0
125
- nvidia_failure_rate *= 100
126
 
127
  # Calculate dimensions for N-column layout
128
  model_count = len(available_models)
@@ -143,6 +134,10 @@ def create_summary_page(df: pd.DataFrame, available_models: list[str]) -> plt.Fi
143
 
144
  visible_model_count = 0
145
  max_y = 0
 
 
 
 
146
 
147
  for i, model_name in enumerate(available_models):
148
  if model_name not in df.index:
@@ -152,6 +147,15 @@ def create_summary_page(df: pd.DataFrame, available_models: list[str]) -> plt.Fi
152
 
153
  # Extract and process model data
154
  amd_stats, nvidia_stats = extract_model_data(row)[:2]
 
 
 
 
 
 
 
 
 
155
 
156
  # Calculate position in 4-column grid
157
  col = visible_model_count % COLUMNS
@@ -176,44 +180,42 @@ def create_summary_page(df: pd.DataFrame, available_models: list[str]) -> plt.Fi
176
 
177
  # AMD label and bar in this column
178
  bar_height = min(0.4, vertical_spacing * BAR_HEIGHT_RATIO)
179
- # Draw AMD bar
180
  draw_text_and_bar("amd", amd_stats, y_amd_bar, col_left, bar_height, ax)
181
- # Draw NVIDIA bar
182
  draw_text_and_bar("nvidia", nvidia_stats, y_nvidia_bar, col_left, bar_height, ax)
183
 
184
  # Increment counter for next visible model
185
  visible_model_count += 1
186
 
187
 
 
 
 
 
 
 
 
 
 
188
  # Add AMD and NVIDIA test totals in the bottom left
189
  # Calculate line spacing to align middle with legend
190
  line_height = 0.4 # Height between lines
191
- legend_y = max_y + 1
192
-
193
  # Position the two lines so their middle aligns with legend_y
194
  amd_y = legend_y - line_height / 2
195
  nvidia_y = legend_y + line_height / 2
196
-
197
- amd_totals_text = f"AMD Tests - Passed: {amd_counts[0]}, Failed: {amd_counts[1]}, Skipped: {amd_counts[2]}"
198
- nvidia_totals_text = f"NVIDIA Tests - Passed: {nvidia_counts[0]}, Failed: {nvidia_counts[1]}, Skipped: {nvidia_counts[2]}"
199
-
200
  ax.text(0, amd_y, amd_totals_text,
201
  ha='left', va='bottom', color='#CCCCCC',
202
  fontsize=14, fontfamily='monospace')
203
-
204
  ax.text(0, nvidia_y, nvidia_totals_text,
205
  ha='left', va='bottom', color='#CCCCCC',
206
  fontsize=14, fontfamily='monospace')
207
-
208
- # Add legend horizontally in bottom right corner
209
- patch_height = 0.3
210
- patch_width = 3
211
-
212
- legend_start_x = 68.7
213
- legend_y = max_y + 1
214
- legend_spacing = 10
215
- legend_font_size = 15
216
-
217
  # Legend entries
218
  legend_items = [
219
  ('passed', 'Passed'),
 
1
  import matplotlib.pyplot as plt
2
  import pandas as pd
3
  from data import extract_model_data
4
+ from utils import COLORS
5
 
6
  # Layout parameters
7
  COLUMNS = 3
8
 
9
  # Derived constants
10
+ COLUMN_WIDTH = 100 / COLUMNS
11
+ BAR_WIDTH = COLUMN_WIDTH * 0.8
12
+ BAR_MARGIN = COLUMN_WIDTH * 0.1
13
 
14
  # Figure dimensions
15
+ FIGURE_WIDTH = 22
16
+ MAX_HEIGHT = 14
17
  MIN_HEIGHT_PER_ROW = 2.8
18
  FIGURE_PADDING = 1
19
 
20
  # Bar styling
21
+ BAR_HEIGHT_RATIO = 0.22
22
+ VERTICAL_SPACING_RATIO = 0.2
23
+ AMD_BAR_OFFSET = 0.25
24
+ NVIDIA_BAR_OFFSET = 0.54
25
+
26
+ # Colors imported from utils
 
 
 
 
 
 
 
27
 
28
  # Font styling
29
  MODEL_NAME_FONT_SIZE = 16
30
  LABEL_FONT_SIZE = 14
31
+ LABEL_OFFSET = 1
32
  FAILURE_RATE_FONT_SIZE = 28
33
 
34
 
35
+
36
+ def calculate_overall_failure_rates(df: pd.DataFrame, available_models: list[str]) -> tuple[float, float]:
37
  """Calculate overall failure rates for AMD and NVIDIA across all models."""
38
  if df.empty or not available_models:
39
  return 0.0, 0.0
40
 
41
+ total_amd_tests = 0
42
+ total_amd_failures = 0
43
+ total_nvidia_tests = 0
44
+ total_nvidia_failures = 0
 
 
45
 
46
  for model_name in available_models:
47
  if model_name not in df.index:
 
51
  amd_stats, nvidia_stats = extract_model_data(row)[:2]
52
 
53
  # AMD totals
54
+ amd_total = amd_stats['passed'] + amd_stats['failed'] + amd_stats['error']
55
+ if amd_total > 0:
56
+ total_amd_tests += amd_total
57
+ total_amd_failures += amd_stats['failed'] + amd_stats['error']
58
+
59
  # NVIDIA totals
60
+ nvidia_total = nvidia_stats['passed'] + nvidia_stats['failed'] + nvidia_stats['error']
61
+ if nvidia_total > 0:
62
+ total_nvidia_tests += nvidia_total
63
+ total_nvidia_failures += nvidia_stats['failed'] + nvidia_stats['error']
64
+
65
+ amd_failure_rate = (total_amd_failures / total_amd_tests * 100) if total_amd_tests > 0 else 0.0
66
+ nvidia_failure_rate = (total_nvidia_failures / total_nvidia_tests * 100) if total_nvidia_tests > 0 else 0.0
67
 
68
+ return amd_failure_rate, nvidia_failure_rate
69
 
70
 
71
  def draw_text_and_bar(
 
113
  return fig
114
 
115
  # Calculate overall failure rates
116
+ amd_failure_rate, nvidia_failure_rate = calculate_overall_failure_rates(df, available_models)
 
 
 
 
 
 
 
117
 
118
  # Calculate dimensions for N-column layout
119
  model_count = len(available_models)
 
134
 
135
  visible_model_count = 0
136
  max_y = 0
137
+
138
+ # Initialize counters for total tests
139
+ amd_totals = {'passed': 0, 'failed': 0, 'skipped': 0}
140
+ nvidia_totals = {'passed': 0, 'failed': 0, 'skipped': 0}
141
 
142
  for i, model_name in enumerate(available_models):
143
  if model_name not in df.index:
 
147
 
148
  # Extract and process model data
149
  amd_stats, nvidia_stats = extract_model_data(row)[:2]
150
+
151
+ # Accumulate totals
152
+ amd_totals['passed'] += amd_stats['passed']
153
+ amd_totals['failed'] += amd_stats['failed'] + amd_stats['error']
154
+ amd_totals['skipped'] += amd_stats['skipped']
155
+
156
+ nvidia_totals['passed'] += nvidia_stats['passed']
157
+ nvidia_totals['failed'] += nvidia_stats['failed'] + nvidia_stats['error']
158
+ nvidia_totals['skipped'] += nvidia_stats['skipped']
159
 
160
  # Calculate position in 4-column grid
161
  col = visible_model_count % COLUMNS
 
180
 
181
  # AMD label and bar in this column
182
  bar_height = min(0.4, vertical_spacing * BAR_HEIGHT_RATIO)
 
183
  draw_text_and_bar("amd", amd_stats, y_amd_bar, col_left, bar_height, ax)
184
+
185
  draw_text_and_bar("nvidia", nvidia_stats, y_nvidia_bar, col_left, bar_height, ax)
186
 
187
  # Increment counter for next visible model
188
  visible_model_count += 1
189
 
190
 
191
+ # Add legend horizontally in bottom right corner
192
+ patch_height = 0.3
193
+ patch_width = 3
194
+
195
+ legend_start_x = 68.7
196
+ legend_y = max_y + 1
197
+ legend_spacing = 10
198
+ legend_font_size = 15
199
+
200
  # Add AMD and NVIDIA test totals in the bottom left
201
  # Calculate line spacing to align middle with legend
202
  line_height = 0.4 # Height between lines
203
+
 
204
  # Position the two lines so their middle aligns with legend_y
205
  amd_y = legend_y - line_height / 2
206
  nvidia_y = legend_y + line_height / 2
207
+
208
+ amd_totals_text = f"AMD Tests - Passed: {amd_totals['passed']}, Failed: {amd_totals['failed']}, Skipped: {amd_totals['skipped']}"
209
+ nvidia_totals_text = f"NVIDIA Tests - Passed: {nvidia_totals['passed']}, Failed: {nvidia_totals['failed']}, Skipped: {nvidia_totals['skipped']}"
210
+
211
  ax.text(0, amd_y, amd_totals_text,
212
  ha='left', va='bottom', color='#CCCCCC',
213
  fontsize=14, fontfamily='monospace')
214
+
215
  ax.text(0, nvidia_y, nvidia_totals_text,
216
  ha='left', va='bottom', color='#CCCCCC',
217
  fontsize=14, fontfamily='monospace')
218
+
 
 
 
 
 
 
 
 
 
219
  # Legend entries
220
  legend_items = [
221
  ('passed', 'Passed'),
time_series_gradio.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from datetime import datetime
4
+ from data import extract_model_data
5
+ from utils import COLORS
6
+ import gradio as gr
7
+ import plotly.express as px
8
+ import plotly.graph_objects as go
9
+
10
+ def create_time_series_summary_gradio(historical_df: pd.DataFrame) -> dict:
11
+ empty_fig = lambda title: go.Figure().update_layout(title=title, height=500,
12
+ font=dict(size=16, color='#CCCCCC'), paper_bgcolor='#000000',
13
+ plot_bgcolor='#1a1a1a', margin=dict(b=130)) or go.Figure()
14
+
15
+ if historical_df.empty or 'date' not in historical_df.columns:
16
+ ef = empty_fig("No historical data available")
17
+ return {'failure_rates': ef, 'amd_tests': ef, 'nvidia_tests': ef}
18
+
19
+ daily_stats = []
20
+ for date in sorted(historical_df['date'].unique()):
21
+ dd = historical_df[historical_df['date'] == date]
22
+ counts = {'date': date}
23
+
24
+ for platform in ['amd', 'nvidia']:
25
+ tot_tests = tot_fails = p = f = s = 0
26
+ for _, row in dd.iterrows():
27
+ stats = extract_model_data(row)[0 if platform == 'amd' else 1]
28
+ tot = stats['passed'] + stats['failed'] + stats['error']
29
+ if tot > 0:
30
+ tot_tests += tot
31
+ tot_fails += stats['failed'] + stats['error']
32
+ p += stats['passed']
33
+ f += stats['failed'] + stats['error']
34
+ s += stats['skipped']
35
+
36
+ counts.update({f'{platform}_failure_rate': (tot_fails / tot_tests * 100) if tot_tests > 0 else 0,
37
+ f'{platform}_passed': p, f'{platform}_failed': f, f'{platform}_skipped': s})
38
+ daily_stats.append(counts)
39
+
40
+ fr_data = []
41
+ for i, s in enumerate(daily_stats):
42
+ for p in ['amd', 'nvidia']:
43
+ chg = s[f'{p}_failure_rate'] - daily_stats[i-1][f'{p}_failure_rate'] if i > 0 else 0
44
+ fr_data.append({'date': s['date'], 'failure_rate': s[f'{p}_failure_rate'],
45
+ 'platform': p.upper(), 'change': chg})
46
+
47
+ def build_test_data(platform):
48
+ data = []
49
+ for i, s in enumerate(daily_stats):
50
+ for tt in ['passed', 'failed', 'skipped']:
51
+ chg = s[f'{platform}_{tt}'] - daily_stats[i-1][f'{platform}_{tt}'] if i > 0 else 0
52
+ data.append({'date': s['date'], 'count': s[f'{platform}_{tt}'],
53
+ 'test_type': tt.capitalize(), 'change': chg})
54
+ return pd.DataFrame(data)
55
+
56
+ fr_df = pd.DataFrame(fr_data)
57
+
58
+ fig_fr = go.Figure()
59
+ for p, lc, mc in [('NVIDIA', '#76B900', '#FFFFFF'), ('AMD', '#ED1C24', '#404040')]:
60
+ d = fr_df[fr_df['platform'] == p]
61
+ if not d.empty:
62
+ fig_fr.add_trace(go.Scatter(x=d['date'], y=d['failure_rate'], mode='lines+markers',
63
+ name=p, line=dict(color=lc, width=3),
64
+ marker=dict(size=12, color=mc, line=dict(color=lc, width=2)),
65
+ hovertemplate=f'<b>{p}</b><br>Date: %{{x}}<br>Failure Rate: %{{y:.2f}}%<extra></extra>'))
66
+
67
+ fig_fr.update_layout(title="Overall Failure Rates Over Time", height=500,
68
+ font=dict(size=16, color='#CCCCCC'), paper_bgcolor='#000000', plot_bgcolor='#1a1a1a',
69
+ title_font_size=20, legend=dict(font=dict(size=16), bgcolor='rgba(0,0,0,0.5)',
70
+ orientation="h", yanchor="bottom", y=-0.4, xanchor="center", x=0.5),
71
+ xaxis=dict(title='Date', title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
72
+ yaxis=dict(title='Failure Rate (%)', title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
73
+ hovermode='x unified', margin=dict(b=130))
74
+
75
+ def create_line_fig(df, title):
76
+ fig = px.line(df, x='date', y='count', color='test_type',
77
+ color_discrete_map={"Passed": COLORS['passed'], "Failed": COLORS['failed'], "Skipped": COLORS['skipped']},
78
+ title=title, labels={'count': 'Number of Tests', 'date': 'Date', 'test_type': 'Test Type'})
79
+ fig.update_traces(mode='lines+markers', marker=dict(size=8), line=dict(width=3))
80
+ fig.update_layout(height=500, font=dict(size=16, color='#CCCCCC'), paper_bgcolor='#000000',
81
+ plot_bgcolor='#1a1a1a', title_font_size=20, legend=dict(font=dict(size=16),
82
+ bgcolor='rgba(0,0,0,0.5)', orientation="h", yanchor="bottom", y=-0.4, xanchor="center", x=0.5),
83
+ xaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
84
+ yaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
85
+ hovermode='x unified', margin=dict(b=130))
86
+ return fig
87
+
88
+ return {'failure_rates': fig_fr,
89
+ 'amd_tests': create_line_fig(build_test_data('amd'), "AMD Test Results Over Time"),
90
+ 'nvidia_tests': create_line_fig(build_test_data('nvidia'), "NVIDIA Test Results Over Time")}
91
+
92
+ def create_model_time_series_gradio(historical_df: pd.DataFrame, model_name: str) -> dict:
93
+ def empty_figs():
94
+ ef = lambda plat: go.Figure().update_layout(title=f"{model_name.upper()} - {plat} Results Over Time",
95
+ height=500, font=dict(size=16, color='#CCCCCC'), paper_bgcolor='#000000',
96
+ plot_bgcolor='#1a1a1a', margin=dict(b=130)) or go.Figure()
97
+ return {'amd_plot': ef('AMD'), 'nvidia_plot': ef('NVIDIA')}
98
+
99
+ if historical_df.empty or 'date' not in historical_df.columns:
100
+ return empty_figs()
101
+
102
+ md = historical_df[historical_df.index.str.lower() == model_name.lower()]
103
+ if md.empty:
104
+ return empty_figs()
105
+
106
+ dates = sorted(md['date'].unique())
107
+
108
+ def build_data(platform):
109
+ data = []
110
+ for i, date in enumerate(dates):
111
+ dd = md[md['date'] == date]
112
+ if dd.empty:
113
+ continue
114
+ r = dd.iloc[0]
115
+ passed = r.get(f'success_{platform}', 0)
116
+ failed = r.get(f'failed_multi_no_{platform}', 0) + r.get(f'failed_single_no_{platform}', 0)
117
+ skipped = r.get(f'skipped_{platform}', 0)
118
+
119
+ pc = fc = sc = 0
120
+ if i > 0:
121
+ prev_dd = md[md['date'] == dates[i-1]]
122
+ if not prev_dd.empty:
123
+ pr = prev_dd.iloc[0]
124
+ pc = pr.get(f'success_{platform}', 0)
125
+ fc = pr.get(f'failed_multi_no_{platform}', 0) + pr.get(f'failed_single_no_{platform}', 0)
126
+ sc = pr.get(f'skipped_{platform}', 0)
127
+
128
+ data.extend([
129
+ {'date': date, 'count': passed, 'test_type': 'Passed', 'change': passed - pc},
130
+ {'date': date, 'count': failed, 'test_type': 'Failed', 'change': failed - fc},
131
+ {'date': date, 'count': skipped, 'test_type': 'Skipped', 'change': skipped - sc}
132
+ ])
133
+ return pd.DataFrame(data)
134
+
135
+ def create_fig(df, platform):
136
+ fig = px.line(df, x='date', y='count', color='test_type',
137
+ color_discrete_map={"Passed": COLORS['passed'], "Failed": COLORS['failed'], "Skipped": COLORS['skipped']},
138
+ title=f"{model_name.upper()} - {platform} Results Over Time",
139
+ labels={'count': 'Number of Tests', 'date': 'Date', 'test_type': 'Test Type'})
140
+ fig.update_traces(mode='lines+markers', marker=dict(size=8), line=dict(width=3))
141
+ fig.update_layout(height=500, font=dict(size=16, color='#CCCCCC'), paper_bgcolor='#000000',
142
+ plot_bgcolor='#1a1a1a', title_font_size=20, legend=dict(font=dict(size=16),
143
+ bgcolor='rgba(0,0,0,0.5)', orientation="h", yanchor="bottom", y=-0.4, xanchor="center", x=0.5),
144
+ xaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
145
+ yaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
146
+ hovermode='x unified', margin=dict(b=130))
147
+ return fig
148
+
149
+ return {'amd_plot': create_fig(build_data('amd'), 'AMD'),
150
+ 'nvidia_plot': create_fig(build_data('nvidia'), 'NVIDIA')}
utils.py CHANGED
@@ -49,3 +49,15 @@ logger = setup_logger()
49
 
50
  def generate_underlined_line(text: str) -> str:
51
  return text + "\n" + "─" * len(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  def generate_underlined_line(text: str) -> str:
51
  return text + "\n" + "─" * len(text)
52
+
53
+
54
+ # Shared color scheme across all visualization modules
55
+ COLORS = {
56
+ 'passed': '#4CAF50',
57
+ 'failed': '#E53E3E',
58
+ 'skipped': '#FFD54F',
59
+ 'error': '#8B0000',
60
+ 'empty': '#5B5B5B',
61
+ 'amd': '#ED1C24',
62
+ 'nvidia': '#76B900'
63
+ }