Add historical data visualization features

#7
by badaoui HF Staff - opened
Files changed (10) hide show
  1. app.py +692 -84
  2. data.py +466 -13
  3. logos/amd_logo.png +0 -0
  4. logos/nvidia_logo.png +0 -0
  5. model_page.py +44 -22
  6. requirements.txt +2 -0
  7. styles.css +416 -41
  8. summary_page.py +145 -64
  9. time_series.py +316 -0
  10. time_series_gradio.py +556 -0
app.py CHANGED
@@ -2,11 +2,16 @@ import matplotlib.pyplot as plt
2
  import matplotlib
3
  import pandas as pd
4
  import gradio as gr
 
5
 
6
- from data import CIResults
7
  from utils import logger
8
  from summary_page import create_summary_page
9
  from model_page import plot_model_stats
 
 
 
 
10
 
11
 
12
  # Configure matplotlib to prevent memory warnings and set dark background
@@ -19,6 +24,12 @@ plt.ioff() # Turn off interactive mode to prevent figure accumulation
19
  # Load data once at startup
20
  Ci_results = CIResults()
21
  Ci_results.load_data()
 
 
 
 
 
 
22
  # Start the auto-reload scheduler
23
  Ci_results.schedule_data_reload()
24
 
@@ -49,6 +60,34 @@ def model_has_failures(model_name):
49
  nvidia_single_failures > 0,
50
  ])
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  # Function to get current description text
54
  def get_description_text():
@@ -66,6 +105,46 @@ def get_description_text():
66
  msg.append("*This dashboard only tracks important models*<br>*(loading...)*")
67
  return "<br>".join(msg)
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  # Load CSS from external file
70
  def load_css():
71
  try:
@@ -77,9 +156,19 @@ def load_css():
77
  logger.warning("styles.css not found, using minimal default styles")
78
  return "body { background: #000; color: #fff; }"
79
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  # Create the Gradio interface with sidebar and dark theme
82
- with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cache=(3600, 3600)) as demo:
83
 
84
 
85
  with gr.Row():
@@ -91,7 +180,7 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
91
  description_text = get_description_text()
92
  description_display = gr.Markdown(description_text, elem_classes=["sidebar-description"])
93
 
94
- # Summary button at the top
95
  summary_button = gr.Button(
96
  "summary\n📊",
97
  variant="primary",
@@ -99,6 +188,14 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
99
  elem_classes=["summary-button"]
100
  )
101
 
 
 
 
 
 
 
 
 
102
  # Model selection header (clickable toggle)
103
  model_toggle_button = gr.Button(
104
  f"► Select model ({len(Ci_results.available_models)})",
@@ -108,82 +205,273 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
108
 
109
  # Model buttons container (collapsible) - start folded
110
  with gr.Column(elem_classes=["model-list", "model-list-hidden"]) as model_list_container:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  # Create individual buttons for each model
112
  model_buttons = []
113
  model_choices = [model.lower() for model in Ci_results.available_models] if Ci_results.available_models else ["auto", "bert", "clip", "llama"]
114
 
 
 
 
 
 
 
115
  print(f"Creating {len(model_choices)} model buttons: {model_choices}")
116
 
117
  for model_name in model_choices:
118
- # Check if model has failures to determine styling
119
- has_failures = model_has_failures(model_name)
120
- button_classes = ["model-button"]
121
- if has_failures:
122
- button_classes.append("model-button-failed")
123
 
124
- btn = gr.Button(
125
- model_name,
126
- variant="secondary",
127
- size="sm",
128
- elem_classes=button_classes
129
- )
130
- model_buttons.append(btn)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
  # CI job links at bottom of sidebar
133
  ci_links_display = gr.Markdown("🔗 **CI Jobs:** *Loading...*", elem_classes=["sidebar-links"])
134
 
135
  # Main content area
136
  with gr.Column(scale=4, elem_classes=["main-content"]):
137
- # Summary display (default view)
138
- summary_display = gr.Plot(
139
- value=create_summary_page(Ci_results.df, Ci_results.available_models),
140
- label="",
141
- format="png",
142
- elem_classes=["plot-container"],
143
- visible=True
144
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
- # Detailed view components (hidden by default)
147
- with gr.Column(visible=False, elem_classes=["detail-view"]) as detail_view:
 
 
 
 
 
 
148
 
149
- # Create the plot output
150
- plot_output = gr.Plot(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  label="",
152
- format="png",
153
  elem_classes=["plot-container"]
154
  )
155
 
156
- # Create two separate failed tests displays in a row layout
157
- with gr.Row():
158
- with gr.Column(scale=1):
159
- amd_failed_tests_output = gr.Textbox(
160
- value="",
161
- lines=8,
162
- max_lines=8,
163
- interactive=False,
164
- container=False,
165
- elem_classes=["failed-tests"]
166
- )
167
- with gr.Column(scale=1):
168
- nvidia_failed_tests_output = gr.Textbox(
169
- value="",
170
- lines=8,
171
- max_lines=8,
172
- interactive=False,
173
- container=False,
174
- elem_classes=["failed-tests"]
175
- )
176
 
177
- # Set up click handlers for model buttons
178
- for i, btn in enumerate(model_buttons):
179
- model_name = model_choices[i]
180
- btn.click(
181
- fn=lambda selected_model=model_name: plot_model_stats(Ci_results.df, selected_model),
182
- outputs=[plot_output, amd_failed_tests_output, nvidia_failed_tests_output]
183
- ).then(
184
- fn=lambda: [gr.update(visible=False), gr.update(visible=True)],
185
- outputs=[summary_display, detail_view]
186
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
  # Model toggle functionality
189
  def toggle_model_list(current_visible):
@@ -203,6 +491,10 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
203
 
204
  # Track model list visibility state
205
  model_list_visible = gr.State(False)
 
 
 
 
206
 
207
  model_toggle_button.click(
208
  fn=toggle_model_list,
@@ -210,17 +502,64 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
210
  outputs=[model_toggle_button, model_list_container, model_list_visible]
211
  )
212
 
213
- # Summary button click handler
214
- def show_summary_and_update_links():
215
- """Show summary page and update CI links."""
216
- return create_summary_page(Ci_results.df, Ci_results.available_models), get_description_text(), get_ci_links()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
  summary_button.click(
219
- fn=show_summary_and_update_links,
220
- outputs=[summary_display, description_display, ci_links_display]
221
- ).then(
222
- fn=lambda: [gr.update(visible=True), gr.update(visible=False)],
223
- outputs=[summary_display, detail_view]
 
 
 
 
 
 
 
 
 
 
 
 
224
  )
225
 
226
  # Function to get CI job links
@@ -270,25 +609,19 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
270
  # AMD links
271
  if amd_multi_link or amd_single_link:
272
  links_md += "**AMD:**\n"
273
- if amd_multi_link == amd_single_link:
274
- links_md += f"• [Single and Multi GPU]({amd_multi_link})\n"
275
- else:
276
- if amd_multi_link:
277
- links_md += f"• [Multi GPU]({amd_multi_link})\n"
278
- if amd_single_link:
279
- links_md += f"• [Single GPU]({amd_single_link})\n"
280
  links_md += "\n"
281
 
282
  # NVIDIA links
283
  if nvidia_multi_link or nvidia_single_link:
284
  links_md += "**NVIDIA:**\n"
285
- if nvidia_single_link == nvidia_multi_link:
286
- links_md += f"• [Single and Multi GPU]({nvidia_multi_link})\n"
287
- else:
288
- if nvidia_multi_link:
289
- links_md += f"• [Multi GPU]({nvidia_multi_link})\n"
290
- if nvidia_single_link:
291
- links_md += f"• [Single GPU]({nvidia_single_link})\n"
292
 
293
  if not (amd_multi_link or amd_single_link or nvidia_multi_link or nvidia_single_link):
294
  links_md += "*No links available*"
@@ -299,10 +632,285 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
299
  return "🔗 **CI Jobs:** *Error loading links*\n\n❓ **[FAQ](README.md)**"
300
 
301
 
302
- # Auto-update CI links when the interface loads
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
  demo.load(
304
- fn=get_ci_links,
305
- outputs=[ci_links_display]
306
  )
307
 
308
 
 
2
  import matplotlib
3
  import pandas as pd
4
  import gradio as gr
5
+ from gradio_toggle import Toggle
6
 
7
+ from data import CIResults, find_new_regressions
8
  from utils import logger
9
  from summary_page import create_summary_page
10
  from model_page import plot_model_stats
11
+ from time_series_gradio import (
12
+ create_time_series_summary_gradio,
13
+ create_model_time_series_gradio,
14
+ )
15
 
16
 
17
  # Configure matplotlib to prevent memory warnings and set dark background
 
24
  # Load data once at startup
25
  Ci_results = CIResults()
26
  Ci_results.load_data()
27
+ # Preload historical data at startup
28
+ if Ci_results.available_dates:
29
+ start_date_val = Ci_results.available_dates[-1] # Last date (oldest)
30
+ end_date_val = Ci_results.available_dates[0] # First date (newest)
31
+ Ci_results.load_historical_data(start_date_val, end_date_val)
32
+ logger.info(f"Preloaded historical data: {len(Ci_results.historical_df)} records")
33
  # Start the auto-reload scheduler
34
  Ci_results.schedule_data_reload()
35
 
 
60
  nvidia_single_failures > 0,
61
  ])
62
 
63
+ def model_has_amd_failures(model_name):
64
+ """Check if a model has AMD failures."""
65
+ if Ci_results.df is None or Ci_results.df.empty:
66
+ return False
67
+
68
+ model_name_lower = model_name.lower()
69
+ if model_name_lower not in Ci_results.df.index:
70
+ return False
71
+ row = Ci_results.df.loc[model_name_lower]
72
+
73
+ amd_multi_failures = row.get('failed_multi_no_amd', 0)
74
+ amd_single_failures = row.get('failed_single_no_amd', 0)
75
+ return amd_multi_failures > 0 or amd_single_failures > 0
76
+
77
+ def model_has_nvidia_failures(model_name):
78
+ """Check if a model has NVIDIA failures."""
79
+ if Ci_results.df is None or Ci_results.df.empty:
80
+ return False
81
+
82
+ model_name_lower = model_name.lower()
83
+ if model_name_lower not in Ci_results.df.index:
84
+ return False
85
+ row = Ci_results.df.loc[model_name_lower]
86
+
87
+ nvidia_multi_failures = row.get('failed_multi_no_nvidia', 0)
88
+ nvidia_single_failures = row.get('failed_single_no_nvidia', 0)
89
+ return nvidia_multi_failures > 0 or nvidia_single_failures > 0
90
+
91
 
92
  # Function to get current description text
93
  def get_description_text():
 
105
  msg.append("*This dashboard only tracks important models*<br>*(loading...)*")
106
  return "<br>".join(msg)
107
 
108
+ # Function to format new regressions for display
109
+ def get_regressions_text():
110
+ """Get formatted text for new regressions panel."""
111
+ try:
112
+ regressions = find_new_regressions(Ci_results.df, Ci_results.all_historical_data)
113
+
114
+ if not regressions:
115
+ return "### 🎉 No New Regressions\nAll failures were present in the previous run."
116
+
117
+ # Group by model and device
118
+ grouped = {}
119
+ for reg in regressions:
120
+ model = reg['model']
121
+ device = reg['device'].upper()
122
+ gpu_type = reg['gpu_type']
123
+ test = reg['test']
124
+
125
+ key = f"{model} ({device} {gpu_type})"
126
+ if key not in grouped:
127
+ grouped[key] = []
128
+ grouped[key].append(test)
129
+
130
+ # Format output
131
+ lines = [f"### ⚠️ New Regressions Detected: {len(regressions)} failure(s)"]
132
+ lines.append("")
133
+
134
+ for key in sorted(grouped.keys()):
135
+ tests = grouped[key]
136
+ lines.append(f"**{key}:**")
137
+ for test in tests[:5]: # Limit to 5 tests per model
138
+ lines.append(f" • {test}")
139
+ if len(tests) > 5:
140
+ lines.append(f" • ... and {len(tests) - 5} more")
141
+ lines.append("")
142
+
143
+ return "\n".join(lines)
144
+ except Exception as e:
145
+ logger.error(f"Error getting regressions: {e}")
146
+ return "### ⚠️ New Regressions\n*Unable to load regression data*"
147
+
148
  # Load CSS from external file
149
  def load_css():
150
  try:
 
156
  logger.warning("styles.css not found, using minimal default styles")
157
  return "body { background: #000; color: #fff; }"
158
 
159
+ js_func = """
160
+ function refresh() {
161
+ const url = new URL(window.location);
162
+
163
+ if (url.searchParams.get('__theme') !== 'dark') {
164
+ url.searchParams.set('__theme', 'dark');
165
+ window.location.href = url.href;
166
+ }
167
+ }
168
+ """
169
 
170
  # Create the Gradio interface with sidebar and dark theme
171
+ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), js=js_func) as demo:
172
 
173
 
174
  with gr.Row():
 
180
  description_text = get_description_text()
181
  description_display = gr.Markdown(description_text, elem_classes=["sidebar-description"])
182
 
183
+ # Summary button (for current view)
184
  summary_button = gr.Button(
185
  "summary\n📊",
186
  variant="primary",
 
188
  elem_classes=["summary-button"]
189
  )
190
 
191
+ history_view_button = Toggle(
192
+ label="History view",
193
+ value=False,
194
+ interactive=True,
195
+ elem_classes=["history-view-button"]
196
+ )
197
+
198
+
199
  # Model selection header (clickable toggle)
200
  model_toggle_button = gr.Button(
201
  f"► Select model ({len(Ci_results.available_models)})",
 
205
 
206
  # Model buttons container (collapsible) - start folded
207
  with gr.Column(elem_classes=["model-list", "model-list-hidden"]) as model_list_container:
208
+ # Toggles for filtering failing models by device
209
+ with gr.Row(elem_classes=["failing-models-filter-row"]):
210
+ show_amd_failures = gr.Checkbox(
211
+ label="AMD failures",
212
+ value=False,
213
+ interactive=True,
214
+ elem_classes=["failing-models-toggle", "amd-toggle"]
215
+ )
216
+ show_nvidia_failures = gr.Checkbox(
217
+ label="NVIDIA failures",
218
+ value=False,
219
+ interactive=True,
220
+ elem_classes=["failing-models-toggle", "nvidia-toggle"]
221
+ )
222
  # Create individual buttons for each model
223
  model_buttons = []
224
  model_choices = [model.lower() for model in Ci_results.available_models] if Ci_results.available_models else ["auto", "bert", "clip", "llama"]
225
 
226
+ # Categorize models by failure type
227
+ amd_failing_models = []
228
+ nvidia_failing_models = []
229
+ both_failing_models = []
230
+ passing_models = []
231
+
232
  print(f"Creating {len(model_choices)} model buttons: {model_choices}")
233
 
234
  for model_name in model_choices:
235
+ has_amd = model_has_amd_failures(model_name)
236
+ has_nvidia = model_has_nvidia_failures(model_name)
 
 
 
237
 
238
+ if has_amd and has_nvidia:
239
+ both_failing_models.append(model_name)
240
+ elif has_amd:
241
+ amd_failing_models.append(model_name)
242
+ elif has_nvidia:
243
+ nvidia_failing_models.append(model_name)
244
+ else:
245
+ passing_models.append(model_name)
246
+
247
+ # Container for all models (visible by default)
248
+ with gr.Column(visible=True, elem_classes=["all-models-container"]) as all_models_container:
249
+ for model_name in model_choices:
250
+ has_failures = model_has_failures(model_name)
251
+ button_classes = ["model-button"]
252
+ if has_failures:
253
+ button_classes.append("model-button-failed")
254
+
255
+ btn = gr.Button(
256
+ model_name,
257
+ variant="secondary",
258
+ size="sm",
259
+ elem_classes=button_classes
260
+ )
261
+ model_buttons.append(btn)
262
+
263
+ # Container for AMD failures (hidden by default)
264
+ amd_buttons = []
265
+ with gr.Column(visible=False, elem_classes=["amd-failures-container"]) as amd_failures_container:
266
+ amd_models_to_show = amd_failing_models + both_failing_models
267
+ for model_name in sorted(amd_models_to_show):
268
+ btn = gr.Button(
269
+ model_name,
270
+ variant="secondary",
271
+ size="sm",
272
+ elem_classes=["model-button", "model-button-failed"]
273
+ )
274
+ amd_buttons.append(btn)
275
+
276
+ # Container for NVIDIA failures (hidden by default)
277
+ nvidia_buttons = []
278
+ with gr.Column(visible=False, elem_classes=["nvidia-failures-container"]) as nvidia_failures_container:
279
+ nvidia_models_to_show = nvidia_failing_models + both_failing_models
280
+ for model_name in sorted(nvidia_models_to_show):
281
+ btn = gr.Button(
282
+ model_name,
283
+ variant="secondary",
284
+ size="sm",
285
+ elem_classes=["model-button", "model-button-failed"]
286
+ )
287
+ nvidia_buttons.append(btn)
288
+
289
+ # Container for both AMD and NVIDIA failures (hidden by default)
290
+ both_buttons = []
291
+ with gr.Column(visible=False, elem_classes=["both-failures-container"]) as both_failures_container:
292
+ all_failing = list(set(amd_failing_models + nvidia_failing_models + both_failing_models))
293
+ for model_name in sorted(all_failing):
294
+ btn = gr.Button(
295
+ model_name,
296
+ variant="secondary",
297
+ size="sm",
298
+ elem_classes=["model-button", "model-button-failed"]
299
+ )
300
+ both_buttons.append(btn)
301
 
302
  # CI job links at bottom of sidebar
303
  ci_links_display = gr.Markdown("🔗 **CI Jobs:** *Loading...*", elem_classes=["sidebar-links"])
304
 
305
  # Main content area
306
  with gr.Column(scale=4, elem_classes=["main-content"]):
307
+ # Current view components
308
+ with gr.Column(visible=True, elem_classes=["current-view"]) as current_view:
309
+ # Summary view (contains summary plot and regressions panel)
310
+ with gr.Column(visible=True, elem_classes=["summary-view"]) as summary_view:
311
+ # Summary display (default view)
312
+ summary_display = gr.Plot(
313
+ value=create_summary_page(Ci_results.df, Ci_results.available_models),
314
+ label="",
315
+ format="png",
316
+ elem_classes=["plot-container"],
317
+ visible=True
318
+ )
319
+
320
+ # New Regressions section (at the bottom, collapsible)
321
+ regressions_toggle_button = gr.Button(
322
+ "► New Regressions",
323
+ variant="secondary",
324
+ elem_classes=["regressions-header"]
325
+ )
326
+
327
+ with gr.Column(elem_classes=["regressions-content", "regressions-content-hidden"]) as regressions_content:
328
+ regressions_panel = gr.Markdown(
329
+ value=get_regressions_text(),
330
+ elem_classes=["regressions-panel"]
331
+ )
332
 
333
+ # Detailed view components (hidden by default)
334
+ with gr.Column(visible=False, elem_classes=["detail-view"]) as detail_view:
335
+ # Create the plot output
336
+ plot_output = gr.Plot(
337
+ label="",
338
+ format="png",
339
+ elem_classes=["plot-container"]
340
+ )
341
 
342
+ # Create two separate failed tests displays in a row layout
343
+ with gr.Row():
344
+ with gr.Column(scale=1):
345
+ amd_failed_tests_output = gr.Textbox(
346
+ value="",
347
+ lines=8,
348
+ max_lines=8,
349
+ interactive=False,
350
+ container=False,
351
+ elem_classes=["failed-tests"]
352
+ )
353
+ with gr.Column(scale=1):
354
+ nvidia_failed_tests_output = gr.Textbox(
355
+ value="",
356
+ lines=8,
357
+ max_lines=8,
358
+ interactive=False,
359
+ container=False,
360
+ elem_classes=["failed-tests"]
361
+ )
362
+
363
+ # Historical view components (hidden by default)
364
+ with gr.Column(visible=False, elem_classes=["historical-view"]) as historical_view:
365
+
366
+
367
+ # Time-series summary displays (multiple Gradio plots)
368
+ time_series_failure_rates = gr.Plot(
369
+ label="",
370
+ elem_classes=["plot-container"]
371
+ )
372
+
373
+ time_series_amd_tests = gr.Plot(
374
+ label="",
375
+ elem_classes=["plot-container"]
376
+ )
377
+
378
+ time_series_nvidia_tests = gr.Plot(
379
  label="",
 
380
  elem_classes=["plot-container"]
381
  )
382
 
383
+ # Time-series model view (hidden by default)
384
+ with gr.Column(visible=False, elem_classes=["time-series-detail-view"]) as time_series_detail_view:
385
+ # Time-series plots for specific model (with spacing)
386
+ time_series_amd_model_plot = gr.Plot(
387
+ label="",
388
+ elem_classes=["plot-container"]
389
+ )
390
+
391
+ time_series_nvidia_model_plot = gr.Plot(
392
+ label="",
393
+ elem_classes=["plot-container"]
394
+ )
 
 
 
 
 
 
 
 
395
 
396
+ # Failing models filter functionality
397
+ def filter_failing_models(show_amd, show_nvidia):
398
+ """Filter models based on AMD and/or NVIDIA failures.
399
+
400
+ Logic:
401
+ - Neither checked: show all models
402
+ - AMD only: show models with AMD failures (including those with both)
403
+ - NVIDIA only: show models with NVIDIA failures (including those with both)
404
+ - Both checked: show all models with any failures
405
+ """
406
+ if not show_amd and not show_nvidia:
407
+ # Show all models container
408
+ return (
409
+ gr.update(visible=True), # all_models_container
410
+ gr.update(visible=False), # amd_failures_container
411
+ gr.update(visible=False), # nvidia_failures_container
412
+ gr.update(visible=False), # both_failures_container
413
+ )
414
+ elif show_amd and not show_nvidia:
415
+ # Show AMD failures only
416
+ return (
417
+ gr.update(visible=False), # all_models_container
418
+ gr.update(visible=True), # amd_failures_container
419
+ gr.update(visible=False), # nvidia_failures_container
420
+ gr.update(visible=False), # both_failures_container
421
+ )
422
+ elif not show_amd and show_nvidia:
423
+ # Show NVIDIA failures only
424
+ return (
425
+ gr.update(visible=False), # all_models_container
426
+ gr.update(visible=False), # amd_failures_container
427
+ gr.update(visible=True), # nvidia_failures_container
428
+ gr.update(visible=False), # both_failures_container
429
+ )
430
+ else:
431
+ # Show all failures
432
+ return (
433
+ gr.update(visible=False), # all_models_container
434
+ gr.update(visible=False), # amd_failures_container
435
+ gr.update(visible=False), # nvidia_failures_container
436
+ gr.update(visible=True), # both_failures_container
437
+ )
438
+
439
+ # Connect both checkboxes to the filter function
440
+ show_amd_failures.change(
441
+ fn=filter_failing_models,
442
+ inputs=[show_amd_failures, show_nvidia_failures],
443
+ outputs=[all_models_container, amd_failures_container, nvidia_failures_container, both_failures_container]
444
+ )
445
+ show_nvidia_failures.change(
446
+ fn=filter_failing_models,
447
+ inputs=[show_amd_failures, show_nvidia_failures],
448
+ outputs=[all_models_container, amd_failures_container, nvidia_failures_container, both_failures_container]
449
+ )
450
+
451
+ # Regressions panel toggle functionality
452
+ def toggle_regressions_panel(current_visible):
453
+ """Toggle the visibility of the regressions panel."""
454
+ new_visible = not current_visible
455
+ arrow = "▼" if new_visible else "►"
456
+ button_text = f"{arrow} New Regressions"
457
+
458
+ # Use CSS classes instead of Gradio visibility
459
+ css_classes = ["regressions-content"]
460
+ if new_visible:
461
+ css_classes.append("regressions-content-visible")
462
+ else:
463
+ css_classes.append("regressions-content-hidden")
464
+
465
+ return gr.update(value=button_text), gr.update(elem_classes=css_classes), new_visible
466
+
467
+ # Track regressions panel visibility state
468
+ regressions_visible = gr.State(False)
469
+
470
+ regressions_toggle_button.click(
471
+ fn=toggle_regressions_panel,
472
+ inputs=[regressions_visible],
473
+ outputs=[regressions_toggle_button, regressions_content, regressions_visible]
474
+ )
475
 
476
  # Model toggle functionality
477
  def toggle_model_list(current_visible):
 
491
 
492
  # Track model list visibility state
493
  model_list_visible = gr.State(False)
494
+ # Track last selected model for mode switches
495
+ selected_model_state = gr.State(None)
496
+ # Track whether current view is model detail (True) or summary (False)
497
+ in_model_view_state = gr.State(False)
498
 
499
  model_toggle_button.click(
500
  fn=toggle_model_list,
 
502
  outputs=[model_toggle_button, model_list_container, model_list_visible]
503
  )
504
 
505
+
506
+ # Unified summary handler: respects History toggle
507
+ def handle_summary_click(history_mode: bool):
508
+ description = get_description_text()
509
+ links = get_ci_links()
510
+ fr_plot, amd_plot, nvidia_plot = get_historical_summary_plots()
511
+ if history_mode:
512
+ return (
513
+ description,
514
+ links,
515
+ gr.update(visible=False), # current_view
516
+ gr.update(visible=True), # historical_view
517
+ gr.update(visible=False), # summary_view
518
+ gr.update(visible=False), # summary_display
519
+ gr.update(visible=False), # detail_view
520
+ fr_plot,
521
+ amd_plot,
522
+ nvidia_plot,
523
+ gr.update(visible=False), # time_series_detail_view
524
+ False, # in_model_view_state
525
+ "", # selected_model_state (clear it)
526
+ )
527
+ else:
528
+ fig = create_summary_page(Ci_results.df, Ci_results.available_models)
529
+ return (
530
+ description,
531
+ links,
532
+ gr.update(visible=True), # current_view
533
+ gr.update(visible=False), # historical_view
534
+ gr.update(visible=True), # summary_view
535
+ gr.update(value=fig, visible=True), # summary_display
536
+ gr.update(visible=False), # detail_view
537
+ gr.update(visible=False), # time_series_failure_rates
538
+ gr.update(visible=False), # time_series_amd_tests
539
+ gr.update(visible=False), # time_series_nvidia_tests
540
+ gr.update(visible=False), # time_series_detail_view
541
+ False, # in_model_view_state
542
+ "", # selected_model_state (clear it)
543
+ )
544
 
545
  summary_button.click(
546
+ fn=handle_summary_click,
547
+ inputs=[history_view_button],
548
+ outputs=[
549
+ description_display,
550
+ ci_links_display,
551
+ current_view,
552
+ historical_view,
553
+ summary_view,
554
+ summary_display,
555
+ detail_view,
556
+ time_series_failure_rates,
557
+ time_series_amd_tests,
558
+ time_series_nvidia_tests,
559
+ time_series_detail_view,
560
+ in_model_view_state,
561
+ selected_model_state,
562
+ ],
563
  )
564
 
565
  # Function to get CI job links
 
609
  # AMD links
610
  if amd_multi_link or amd_single_link:
611
  links_md += "**AMD:**\n"
612
+ if amd_multi_link:
613
+ links_md += f"• [Multi GPU]({amd_multi_link})\n"
614
+ if amd_single_link:
615
+ links_md += f"• [Single GPU]({amd_single_link})\n"
 
 
 
616
  links_md += "\n"
617
 
618
  # NVIDIA links
619
  if nvidia_multi_link or nvidia_single_link:
620
  links_md += "**NVIDIA:**\n"
621
+ if nvidia_multi_link:
622
+ links_md += f"• [Multi GPU]({nvidia_multi_link})\n"
623
+ if nvidia_single_link:
624
+ links_md += f"• [Single GPU]({nvidia_single_link})\n"
 
 
 
625
 
626
  if not (amd_multi_link or amd_single_link or nvidia_multi_link or nvidia_single_link):
627
  links_md += "*No links available*"
 
632
  return "🔗 **CI Jobs:** *Error loading links*\n\n❓ **[FAQ](README.md)**"
633
 
634
 
635
+
636
+
637
+ def get_historical_summary_plots():
638
+ """Get historical summary plots from preloaded data."""
639
+ plots = create_time_series_summary_gradio(Ci_results.historical_df)
640
+ return (
641
+ gr.update(value=plots['failure_rates'], visible=True),
642
+ gr.update(value=plots['amd_tests'], visible=True),
643
+ gr.update(value=plots['nvidia_tests'], visible=True),
644
+ )
645
+
646
+ def handle_history_toggle(history_mode, last_selected_model, in_model_view):
647
+ if history_mode:
648
+ # If currently in model view and valid model, show historical model detail
649
+ if in_model_view and last_selected_model:
650
+ amd_ts, nvidia_ts = show_time_series_model(last_selected_model)
651
+ return (
652
+ gr.update(visible=False), # current_view
653
+ gr.update(visible=True), # historical_view
654
+ gr.update(visible=False), # summary_view
655
+ gr.update(visible=False), # summary_display
656
+ gr.update(visible=False), # detail_view
657
+ gr.update(visible=False), # time_series_failure_rates
658
+ gr.update(visible=False), # time_series_amd_tests
659
+ gr.update(visible=False), # time_series_nvidia_tests
660
+ amd_ts, # time_series_amd_model_plot
661
+ nvidia_ts, # time_series_nvidia_model_plot
662
+ gr.update(visible=True), # time_series_detail_view
663
+ gr.update(), # plot_output
664
+ gr.update(), # amd_failed_tests_output
665
+ gr.update(), # nvidia_failed_tests_output
666
+ True, # in_model_view_state (still in model view)
667
+ )
668
+ # Otherwise show historical summary
669
+ fr_plot, amd_plot, nvidia_plot = get_historical_summary_plots()
670
+ return (
671
+ gr.update(visible=False), # current_view
672
+ gr.update(visible=True), # historical_view
673
+ gr.update(visible=False), # summary_view
674
+ gr.update(visible=False), # summary_display
675
+ gr.update(visible=False), # detail_view
676
+ fr_plot, # time_series_failure_rates (value + keep visibility)
677
+ amd_plot, # time_series_amd_tests
678
+ nvidia_plot, # time_series_nvidia_tests
679
+ gr.update(), # time_series_amd_model_plot
680
+ gr.update(), # time_series_nvidia_model_plot
681
+ gr.update(visible=False), # time_series_detail_view
682
+ gr.update(), # plot_output
683
+ gr.update(), # amd_failed_tests_output
684
+ gr.update(), # nvidia_failed_tests_output
685
+ False, # in_model_view_state
686
+ )
687
+ else:
688
+ # Switch to current mode: show model if selected; otherwise summary
689
+ if last_selected_model and Ci_results.df is not None and not Ci_results.df.empty and last_selected_model in Ci_results.df.index:
690
+ fig, amd_txt, nvidia_txt = plot_model_stats(Ci_results.df, last_selected_model, Ci_results.all_historical_data)
691
+ return (
692
+ gr.update(visible=True), # current_view
693
+ gr.update(visible=False), # historical_view
694
+ gr.update(visible=False), # summary_view
695
+ gr.update(visible=False), # summary_display
696
+ gr.update(visible=True), # detail_view
697
+ gr.update(visible=False), # time_series_failure_rates
698
+ gr.update(visible=False), # time_series_amd_tests
699
+ gr.update(visible=False), # time_series_nvidia_tests
700
+ gr.update(), # time_series_amd_model_plot
701
+ gr.update(), # time_series_nvidia_model_plot
702
+ gr.update(visible=False), # time_series_detail_view
703
+ fig, # plot_output
704
+ amd_txt, # amd_failed_tests_output
705
+ nvidia_txt, # nvidia_failed_tests_output
706
+ True, # in_model_view_state
707
+ )
708
+ else:
709
+ fig = create_summary_page(Ci_results.df, Ci_results.available_models)
710
+ return (
711
+ gr.update(visible=True), # current_view
712
+ gr.update(visible=False), # historical_view
713
+ gr.update(visible=True), # summary_view
714
+ gr.update(value=fig, visible=True), # summary_display
715
+ gr.update(visible=False), # detail_view
716
+ gr.update(visible=False), # time_series_failure_rates
717
+ gr.update(visible=False), # time_series_amd_tests
718
+ gr.update(visible=False), # time_series_nvidia_tests
719
+ gr.update(), # time_series_amd_model_plot
720
+ gr.update(), # time_series_nvidia_model_plot
721
+ gr.update(visible=False), # time_series_detail_view
722
+ gr.update(), # plot_output
723
+ gr.update(), # amd_failed_tests_output
724
+ gr.update(), # nvidia_failed_tests_output
725
+ False, # in_model_view_state
726
+ )
727
+
728
+ history_view_button.change(
729
+ fn=handle_history_toggle,
730
+ inputs=[history_view_button, selected_model_state, in_model_view_state],
731
+ outputs=[
732
+ current_view,
733
+ historical_view,
734
+ summary_view,
735
+ summary_display,
736
+ detail_view,
737
+ time_series_failure_rates,
738
+ time_series_amd_tests,
739
+ time_series_nvidia_tests,
740
+ time_series_amd_model_plot,
741
+ time_series_nvidia_model_plot,
742
+ time_series_detail_view,
743
+ plot_output,
744
+ amd_failed_tests_output,
745
+ nvidia_failed_tests_output,
746
+ in_model_view_state,
747
+ ],
748
+ )
749
+
750
+
751
+ # Time-series model selection functionality
752
+ def show_time_series_model(selected_model):
753
+ """Show time-series view for a specific model."""
754
+ plots = create_model_time_series_gradio(Ci_results.historical_df, selected_model)
755
+ return (
756
+ gr.update(value=plots['amd_plot'], visible=True),
757
+ gr.update(value=plots['nvidia_plot'], visible=True),
758
+ )
759
+
760
+ # Unified model click handler: respects History toggle
761
+ def handle_model_click(selected_model: str, history_mode: bool):
762
+ if history_mode:
763
+ amd_ts, nvidia_ts = show_time_series_model(selected_model)
764
+ return (
765
+ gr.update(), # plot_output
766
+ gr.update(), # amd_failed_tests_output
767
+ gr.update(), # nvidia_failed_tests_output
768
+ gr.update(visible=False), # current_view
769
+ gr.update(visible=True), # historical_view
770
+ gr.update(visible=False), # summary_view
771
+ gr.update(visible=False), # summary_display
772
+ gr.update(visible=False), # detail_view
773
+ gr.update(visible=False), # time_series_failure_rates
774
+ gr.update(visible=False), # time_series_amd_tests
775
+ gr.update(visible=False), # time_series_nvidia_tests
776
+ amd_ts, # time_series_amd_model_plot
777
+ nvidia_ts, # time_series_nvidia_model_plot
778
+ gr.update(visible=True), # time_series_detail_view
779
+ selected_model, True) # selected_model_state, in_model_view_state
780
+ else:
781
+ fig, amd_txt, nvidia_txt = plot_model_stats(Ci_results.df, selected_model, Ci_results.all_historical_data)
782
+ return (
783
+ fig,
784
+ amd_txt,
785
+ nvidia_txt,
786
+ gr.update(visible=True), # current_view
787
+ gr.update(visible=False), # historical_view
788
+ gr.update(visible=False), # summary_view
789
+ gr.update(visible=False), # summary_display
790
+ gr.update(visible=True), # detail_view
791
+ gr.update(), # time_series_failure_rates
792
+ gr.update(), # time_series_amd_tests
793
+ gr.update(), # time_series_nvidia_tests
794
+ gr.update(), # time_series_amd_model_plot
795
+ gr.update(), # time_series_nvidia_model_plot
796
+ gr.update(visible=False), # time_series_detail_view
797
+ selected_model, True) # selected_model_state, in_model_view_state
798
+
799
+ # Wire up all model buttons
800
+ for i, btn in enumerate(model_buttons):
801
+ model_name = model_choices[i]
802
+ btn.click(
803
+ fn=lambda history_mode, m=model_name: handle_model_click(m, history_mode),
804
+ inputs=[history_view_button],
805
+ outputs=[
806
+ plot_output,
807
+ amd_failed_tests_output,
808
+ nvidia_failed_tests_output,
809
+ current_view,
810
+ historical_view,
811
+ summary_view,
812
+ summary_display,
813
+ detail_view,
814
+ time_series_failure_rates,
815
+ time_series_amd_tests,
816
+ time_series_nvidia_tests,
817
+ time_series_amd_model_plot,
818
+ time_series_nvidia_model_plot,
819
+ time_series_detail_view,
820
+ selected_model_state,
821
+ in_model_view_state,
822
+ ],
823
+ )
824
+
825
+ # Wire up AMD failing model buttons
826
+ amd_models_to_show = amd_failing_models + both_failing_models
827
+ for i, btn in enumerate(amd_buttons):
828
+ model_name = sorted(amd_models_to_show)[i]
829
+ btn.click(
830
+ fn=lambda history_mode, m=model_name: handle_model_click(m, history_mode),
831
+ inputs=[history_view_button],
832
+ outputs=[
833
+ plot_output,
834
+ amd_failed_tests_output,
835
+ nvidia_failed_tests_output,
836
+ current_view,
837
+ historical_view,
838
+ summary_view,
839
+ summary_display,
840
+ detail_view,
841
+ time_series_failure_rates,
842
+ time_series_amd_tests,
843
+ time_series_nvidia_tests,
844
+ time_series_amd_model_plot,
845
+ time_series_nvidia_model_plot,
846
+ time_series_detail_view,
847
+ selected_model_state,
848
+ in_model_view_state,
849
+ ],
850
+ )
851
+
852
+ # Wire up NVIDIA failing model buttons
853
+ nvidia_models_to_show = nvidia_failing_models + both_failing_models
854
+ for i, btn in enumerate(nvidia_buttons):
855
+ model_name = sorted(nvidia_models_to_show)[i]
856
+ btn.click(
857
+ fn=lambda history_mode, m=model_name: handle_model_click(m, history_mode),
858
+ inputs=[history_view_button],
859
+ outputs=[
860
+ plot_output,
861
+ amd_failed_tests_output,
862
+ nvidia_failed_tests_output,
863
+ current_view,
864
+ historical_view,
865
+ summary_view,
866
+ summary_display,
867
+ detail_view,
868
+ time_series_failure_rates,
869
+ time_series_amd_tests,
870
+ time_series_nvidia_tests,
871
+ time_series_amd_model_plot,
872
+ time_series_nvidia_model_plot,
873
+ time_series_detail_view,
874
+ selected_model_state,
875
+ in_model_view_state,
876
+ ],
877
+ )
878
+
879
+ # Wire up both failures model buttons
880
+ all_failing = list(set(amd_failing_models + nvidia_failing_models + both_failing_models))
881
+ for i, btn in enumerate(both_buttons):
882
+ model_name = sorted(all_failing)[i]
883
+ btn.click(
884
+ fn=lambda history_mode, m=model_name: handle_model_click(m, history_mode),
885
+ inputs=[history_view_button],
886
+ outputs=[
887
+ plot_output,
888
+ amd_failed_tests_output,
889
+ nvidia_failed_tests_output,
890
+ current_view,
891
+ historical_view,
892
+ summary_view,
893
+ summary_display,
894
+ detail_view,
895
+ time_series_failure_rates,
896
+ time_series_amd_tests,
897
+ time_series_nvidia_tests,
898
+ time_series_amd_model_plot,
899
+ time_series_nvidia_model_plot,
900
+ time_series_detail_view,
901
+ selected_model_state,
902
+ in_model_view_state,
903
+ ],
904
+ )
905
+
906
+ # Auto-update CI links and regressions when the interface loads
907
+ def load_dashboard_data():
908
+ """Load both CI links and regressions data."""
909
+ return get_ci_links(), get_regressions_text()
910
+
911
  demo.load(
912
+ fn=load_dashboard_data,
913
+ outputs=[ci_links_display, regressions_panel]
914
  )
915
 
916
 
data.py CHANGED
@@ -1,10 +1,12 @@
1
  from huggingface_hub import HfFileSystem
2
  import pandas as pd
3
  from utils import logger
 
4
  import threading
5
  import traceback
6
  import json
7
  import re
 
8
 
9
  # NOTE: if caching is an issue, try adding `use_listings_cache=False`
10
  fs = HfFileSystem()
@@ -60,6 +62,8 @@ def log_dataframe_link(link: str) -> str:
60
  Adds the link to the dataset in the logs, modifies it to get a clockable link and then returns the date of the
61
  report.
62
  """
 
 
63
  logger.info(f"Reading df located at {link}")
64
  # Make sure the links starts with an http adress
65
  if link.startswith("hf://"):
@@ -102,26 +106,181 @@ def read_one_dataframe(json_path: str, device_label: str) -> tuple[pd.DataFrame,
102
  df[f"failed_single_no_{device_label}"] = df["failures"].apply(lambda x: len(x["single"]) if "single" in x else 0)
103
  return df, df_upload_date
104
 
105
- def get_first_working_df(file_list: list[str]) -> str:
106
- for file in file_list:
107
- job_links = file.rsplit('/', 1)[0] + "/job_links.json"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  try:
109
- links = pd.read_json(f"hf://{job_links}", typ="series")
110
- has_one_working_link = any(links.values)
111
  except Exception as e:
112
- logger.error(f"Could not read job links from {job_links}: {e}")
113
- has_one_working_link = False
114
- if has_one_working_link:
115
- return file
116
- logger.warning(f"Skipping {file} as it has no working job links.")
117
- raise RuntimeError("Could not find any working dataframe in the provided list.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
  def get_distant_data() -> tuple[pd.DataFrame, str]:
120
  # Retrieve AMD dataframe
121
  amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
122
  files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
123
- file_amd = get_first_working_df(files_amd)
124
- df_amd, date_df_amd = read_one_dataframe(f"hf://{file_amd}", "amd")
125
  # Retrieve NVIDIA dataframe, which pattern should be:
126
  # hf://datasets/hf-internal-testing`/transformers_daily_ci/raw/main/YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
127
  nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
@@ -161,9 +320,229 @@ def get_sample_data() -> tuple[pd.DataFrame, str]:
161
  filtered_joined.index = "sample_" + filtered_joined.index
162
  return filtered_joined, "sample data was loaded"
163
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  def safe_extract(row: pd.DataFrame, key: str) -> int:
165
  return int(row.get(key, 0)) if pd.notna(row.get(key, 0)) else 0
166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
168
  """Extract and process model data from DataFrame row."""
169
  # Handle missing values and get counts directly from dataframe
@@ -203,6 +582,10 @@ class CIResults:
203
  self.df = pd.DataFrame()
204
  self.available_models = []
205
  self.latest_update_msg = ""
 
 
 
 
206
 
207
  def load_data(self) -> None:
208
  """Load data from the data source."""
@@ -211,6 +594,13 @@ class CIResults:
211
  logger.info("Loading distant data...")
212
  new_df, latest_update_msg = get_distant_data()
213
  self.latest_update_msg = latest_update_msg
 
 
 
 
 
 
 
214
  except Exception as e:
215
  error_msg = [
216
  "Loading data failed:",
@@ -220,11 +610,18 @@ class CIResults:
220
  "Falling back on sample data."
221
  ]
222
  logger.error("\n".join(error_msg))
 
223
  new_df, latest_update_msg = get_sample_data()
224
  self.latest_update_msg = latest_update_msg
 
 
225
  # Update attributes
226
  self.df = new_df
227
  self.available_models = new_df.index.tolist()
 
 
 
 
228
  # Log and return distant load status
229
  logger.info(f"Data loaded successfully: {len(self.available_models)} models")
230
  logger.info(f"Models: {self.available_models[:5]}{'...' if len(self.available_models) > 5 else ''}")
@@ -242,6 +639,62 @@ class CIResults:
242
  msg[model][col] = value
243
  logger.info(json.dumps(msg, indent=4))
244
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  def schedule_data_reload(self):
246
  """Schedule the next data reload."""
247
  def reload_data():
 
1
  from huggingface_hub import HfFileSystem
2
  import pandas as pd
3
  from utils import logger
4
+ from datetime import datetime, timedelta
5
  import threading
6
  import traceback
7
  import json
8
  import re
9
+ from typing import List, Tuple, Optional
10
 
11
  # NOTE: if caching is an issue, try adding `use_listings_cache=False`
12
  fs = HfFileSystem()
 
62
  Adds the link to the dataset in the logs, modifies it to get a clockable link and then returns the date of the
63
  report.
64
  """
65
+ if link.startswith("sample_"):
66
+ return "9999-99-99"
67
  logger.info(f"Reading df located at {link}")
68
  # Make sure the links starts with an http adress
69
  if link.startswith("hf://"):
 
106
  df[f"failed_single_no_{device_label}"] = df["failures"].apply(lambda x: len(x["single"]) if "single" in x else 0)
107
  return df, df_upload_date
108
 
109
+ def get_available_dates() -> List[str]:
110
+ """Get list of available dates from both AMD and NVIDIA datasets."""
111
+ try:
112
+ # Get AMD dates - the path structure is: YYYY-MM-DD/runs/{run_id}/ci_results_run_models_gpu/model_results.json
113
+ amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
114
+ files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
115
+ logger.info(f"Found {len(files_amd)} AMD files")
116
+
117
+ # Get NVIDIA dates - structure is: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
118
+ nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
119
+ files_nvidia = sorted(fs.glob(nvidia_src, refresh=True), reverse=True)
120
+ logger.info(f"Found {len(files_nvidia)} NVIDIA files")
121
+
122
+ # Extract dates from file paths
123
+ amd_dates = set()
124
+ for file_path in files_amd:
125
+ # Pattern to match the date in the AMD path: YYYY-MM-DD/runs/{run_id}/ci_results_run_models_gpu/model_results.json
126
+ pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/runs/[^/]+/ci_results_run_models_gpu/model_results\.json'
127
+ match = re.search(pattern, file_path)
128
+ if match:
129
+ amd_dates.add(match.group(1))
130
+ else:
131
+ # Log unmatched paths for debugging
132
+ logger.debug(f"AMD file path didn't match pattern: {file_path}")
133
+
134
+ # Log a few example AMD file paths for debugging
135
+ if files_amd:
136
+ logger.info(f"Example AMD file paths: {files_amd[:3]}")
137
+
138
+ nvidia_dates = set()
139
+ for file_path in files_nvidia:
140
+ # Pattern to match the date in the NVIDIA path: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
141
+ pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/ci_results_run_models_gpu/model_results\.json'
142
+ match = re.search(pattern, file_path)
143
+ if match:
144
+ nvidia_dates.add(match.group(1))
145
+
146
+ logger.info(f"AMD dates: {sorted(amd_dates, reverse=True)[:5]}...") # Show first 5
147
+ logger.info(f"NVIDIA dates: {sorted(nvidia_dates, reverse=True)[:5]}...") # Show first 5
148
+
149
+ # Return intersection of both datasets (dates where both have data)
150
+ common_dates = sorted(amd_dates.intersection(nvidia_dates), reverse=True)
151
+ logger.info(f"Common dates: {len(common_dates)} dates where both AMD and NVIDIA have data")
152
+
153
+ if common_dates:
154
+ return common_dates[:30] # Limit to last 30 days for performance
155
+ else:
156
+ # If no real dates available, generate fake dates for the last 7 days
157
+ logger.warning("No real dates available, generating fake dates for demo purposes")
158
+ fake_dates = []
159
+ today = datetime.now()
160
+ for i in range(7):
161
+ date = today - timedelta(days=i)
162
+ fake_dates.append(date.strftime("%Y-%m-%d"))
163
+ return fake_dates
164
+
165
+ except Exception as e:
166
+ logger.error(f"Error getting available dates: {e}")
167
+ # Generate fake dates when there's an error
168
+ logger.info("Generating fake dates due to error")
169
+ fake_dates = []
170
+ today = datetime.now()
171
+ for i in range(7):
172
+ date = today - timedelta(days=i)
173
+ fake_dates.append(date.strftime("%Y-%m-%d"))
174
+ return fake_dates
175
+
176
+
177
+ def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
178
+ """Get data for a specific date."""
179
+ try:
180
+ # For AMD, we need to find the specific run file for the date
181
+ # AMD structure: YYYY-MM-DD/runs/{run_id}/ci_results_run_models_gpu/model_results.json
182
+ amd_src = f"hf://datasets/optimum-amd/transformers_daily_ci/{target_date}/runs/*/ci_results_run_models_gpu/model_results.json"
183
+ amd_files = fs.glob(amd_src, refresh=True)
184
+
185
+ if not amd_files:
186
+ raise FileNotFoundError(f"No AMD data found for date {target_date}")
187
+
188
+ # Use the first (most recent) run for the date
189
+ amd_file = amd_files[0]
190
+ # Ensure the AMD file path has the hf:// prefix
191
+ if not amd_file.startswith("hf://"):
192
+ amd_file = f"hf://{amd_file}"
193
+
194
+ # NVIDIA structure: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
195
+ nvidia_src = f"hf://datasets/hf-internal-testing/transformers_daily_ci/{target_date}/ci_results_run_models_gpu/model_results.json"
196
+
197
+ # Read dataframes - try each platform independently
198
+ df_amd = pd.DataFrame()
199
+ df_nvidia = pd.DataFrame()
200
+
201
  try:
202
+ df_amd, _ = read_one_dataframe(amd_file, "amd")
203
+ logger.info(f"Successfully loaded AMD data for {target_date}")
204
  except Exception as e:
205
+ logger.warning(f"Failed to load AMD data for {target_date}: {e}")
206
+
207
+ try:
208
+ df_nvidia, _ = read_one_dataframe(nvidia_src, "nvidia")
209
+ logger.info(f"Successfully loaded NVIDIA data for {target_date}")
210
+ except Exception as e:
211
+ logger.warning(f"Failed to load NVIDIA data for {target_date}: {e}")
212
+
213
+ # If both failed, return empty dataframe
214
+ if df_amd.empty and df_nvidia.empty:
215
+ logger.warning(f"No data available for either platform on {target_date}")
216
+ return pd.DataFrame(), target_date
217
+
218
+ # Join both dataframes (outer join to include data from either platform)
219
+ if not df_amd.empty and not df_nvidia.empty:
220
+ joined = df_amd.join(df_nvidia, rsuffix="_nvidia", lsuffix="_amd", how="outer")
221
+ elif not df_amd.empty:
222
+ joined = df_amd.copy()
223
+ else:
224
+ joined = df_nvidia.copy()
225
+
226
+ joined = joined[KEYS_TO_KEEP]
227
+ joined.index = joined.index.str.replace("^models_", "", regex=True)
228
+
229
+ # Filter out all but important models
230
+ important_models_lower = [model.lower() for model in IMPORTANT_MODELS]
231
+ filtered_joined = joined[joined.index.str.lower().isin(important_models_lower)]
232
+
233
+ return filtered_joined, target_date
234
+
235
+ except Exception as e:
236
+ logger.error(f"Error getting data for date {target_date}: {e}")
237
+ # Return empty dataframe instead of sample data for historical functionality
238
+ return pd.DataFrame(), target_date
239
+
240
+
241
+ def get_historical_data(start_date: str, end_date: str, sample_data = False) -> pd.DataFrame:
242
+ """Get historical data for a date range."""
243
+ if sample_data:
244
+ return get_fake_historical_data(start_date, end_date)
245
+ try:
246
+ start_dt = datetime.strptime(start_date, "%Y-%m-%d")
247
+ end_dt = datetime.strptime(end_date, "%Y-%m-%d")
248
+
249
+ historical_data = []
250
+ current_dt = start_dt
251
+
252
+ while current_dt <= end_dt:
253
+ date_str = current_dt.strftime("%Y-%m-%d")
254
+ try:
255
+ df, _ = get_data_for_date(date_str)
256
+ # Only add non-empty dataframes
257
+ if not df.empty:
258
+ df['date'] = date_str
259
+ historical_data.append(df)
260
+ logger.info(f"Loaded data for {date_str}")
261
+ else:
262
+ logger.warning(f"No data available for {date_str}")
263
+ except Exception as e:
264
+ logger.warning(f"Could not load data for {date_str}: {e}")
265
+
266
+ current_dt += timedelta(days=1)
267
+
268
+ # Combine all dataframes
269
+ combined_df = pd.concat(historical_data, ignore_index=False)
270
+ return combined_df
271
+
272
+ except Exception as e:
273
+ logger.error(f"Error getting historical data: {e}")
274
+ # Fall back to fake data when there's an error
275
+ logger.info("Falling back to fake historical data due to error")
276
+ return get_fake_historical_data(start_date, end_date)
277
+
278
 
279
  def get_distant_data() -> tuple[pd.DataFrame, str]:
280
  # Retrieve AMD dataframe
281
  amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
282
  files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
283
+ df_amd, date_df_amd = read_one_dataframe(f"hf://{files_amd[0]}", "amd")
 
284
  # Retrieve NVIDIA dataframe, which pattern should be:
285
  # hf://datasets/hf-internal-testing`/transformers_daily_ci/raw/main/YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
286
  nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
 
320
  filtered_joined.index = "sample_" + filtered_joined.index
321
  return filtered_joined, "sample data was loaded"
322
 
323
+
324
+ def get_fake_historical_data(start_date: str, end_date: str) -> pd.DataFrame:
325
+ """Generate fake historical data for a date range when real data loading fails."""
326
+ try:
327
+ start_dt = datetime.strptime(start_date, "%Y-%m-%d")
328
+ end_dt = datetime.strptime(end_date, "%Y-%m-%d")
329
+
330
+ # Generate fake data for each date in the range
331
+ historical_data = []
332
+ current_dt = start_dt
333
+
334
+ # Get base sample data to use as template
335
+ sample_df, _ = get_sample_data()
336
+
337
+ while current_dt <= end_dt:
338
+ date_str = current_dt.strftime("%Y-%m-%d")
339
+
340
+ # Create a copy of sample data for this date with some random variations
341
+ date_df = sample_df.copy()
342
+ date_df['date'] = date_str
343
+
344
+ # Add some random variation to make it look more realistic
345
+ import random
346
+ for idx in date_df.index:
347
+ # Vary the success/failure counts slightly (±20%)
348
+ for col in ['success_amd', 'success_nvidia', 'skipped_amd', 'skipped_nvidia']:
349
+ if col in date_df.columns:
350
+ original_val = date_df.loc[idx, col]
351
+ if pd.notna(original_val) and original_val > 0:
352
+ variation = random.uniform(0.8, 1.2)
353
+ date_df.loc[idx, col] = max(0, int(original_val * variation))
354
+
355
+ # Vary failure counts more dramatically to show trends
356
+ for col in ['failed_multi_no_amd', 'failed_multi_no_nvidia', 'failed_single_no_amd', 'failed_single_no_nvidia']:
357
+ if col in date_df.columns:
358
+ original_val = date_df.loc[idx, col]
359
+ if pd.notna(original_val):
360
+ # Sometimes have more failures, sometimes fewer
361
+ variation = random.uniform(0.5, 2.0)
362
+ date_df.loc[idx, col] = max(0, int(original_val * variation))
363
+
364
+ historical_data.append(date_df)
365
+ current_dt += timedelta(days=1)
366
+
367
+ if not historical_data:
368
+ logger.warning("No fake historical data generated")
369
+ return pd.DataFrame()
370
+
371
+ # Combine all dataframes
372
+ combined_df = pd.concat(historical_data, ignore_index=False)
373
+ logger.info(f"Generated fake historical data: {len(combined_df)} records from {start_date} to {end_date}")
374
+ return combined_df
375
+
376
+ except Exception as e:
377
+ logger.error(f"Error generating fake historical data: {e}")
378
+ return pd.DataFrame()
379
+
380
  def safe_extract(row: pd.DataFrame, key: str) -> int:
381
  return int(row.get(key, 0)) if pd.notna(row.get(key, 0)) else 0
382
 
383
+
384
+ def find_failure_first_seen(historical_df: pd.DataFrame, model_name: str, test_name: str, device: str, gpu_type: str) -> Optional[str]:
385
+ """
386
+ Find the first date when a specific test failure appeared in historical data.
387
+ """
388
+ if historical_df.empty:
389
+ return None
390
+
391
+ try:
392
+ # Normalize model name to match DataFrame index
393
+ model_name_lower = model_name.lower()
394
+
395
+ # Filter historical data for this model
396
+ model_data = historical_df[historical_df.index == model_name_lower].copy()
397
+
398
+ if model_data.empty:
399
+ return None
400
+
401
+ # Sort by date (oldest first)
402
+ model_data = model_data.sort_values('date')
403
+
404
+ # Check each date for this failure
405
+ for idx, row in model_data.iterrows():
406
+ failures = row.get(f'failures_{device}', None)
407
+
408
+ if failures is None or pd.isna(failures):
409
+ continue
410
+
411
+ # Handle case where failures might be a string (JSON)
412
+ if isinstance(failures, str):
413
+ try:
414
+ import json
415
+ failures = json.loads(failures)
416
+ except:
417
+ continue
418
+
419
+ # Check if this test appears in the failures for this gpu_type
420
+ if gpu_type in failures:
421
+ for test in failures[gpu_type]:
422
+ test_line = test.get('line', '')
423
+ if test_line == test_name:
424
+ # Found the first occurrence
425
+ return row.get('date', None)
426
+
427
+ return None
428
+
429
+ except Exception as e:
430
+ logger.error(f"Error finding first seen date for {test_name}: {e}")
431
+ return None
432
+
433
+
434
+ def find_new_regressions(current_df: pd.DataFrame, historical_df: pd.DataFrame) -> list[dict]:
435
+ """
436
+ Compare CURRENT failures against PREVIOUS day's failures to find NEW regressions.
437
+
438
+ A regression is a test that:
439
+ - Is failing in the CURRENT/LATEST run (current_df)
440
+ - Was NOT failing in the PREVIOUS run (yesterday in historical_df)
441
+ """
442
+ if current_df.empty or historical_df.empty:
443
+ return []
444
+
445
+ new_regressions = []
446
+
447
+ # Get the most recent date from historical data (this is "yesterday")
448
+ available_dates = sorted(historical_df['date'].unique(), reverse=True)
449
+ if len(available_dates) < 1:
450
+ # No history to compare against
451
+ return []
452
+
453
+ yesterday_date = available_dates[0]
454
+ yesterday_data = historical_df[historical_df['date'] == yesterday_date]
455
+
456
+ # For each model in current data, compare against yesterday
457
+ for model_name in current_df.index:
458
+ model_name_lower = model_name.lower()
459
+
460
+ # Get CURRENT failures from current_df
461
+ current_row = current_df.loc[model_name]
462
+
463
+ # Get YESTERDAY's failures from historical_df
464
+ yesterday_row = yesterday_data[yesterday_data.index == model_name_lower]
465
+ yesterday_failures_amd = {}
466
+ yesterday_failures_nvidia = {}
467
+
468
+ if not yesterday_row.empty:
469
+ yesterday_row = yesterday_row.iloc[0]
470
+ yesterday_failures_amd = yesterday_row.get('failures_amd', {})
471
+ yesterday_failures_nvidia = yesterday_row.get('failures_nvidia', {})
472
+
473
+ # Handle string/dict conversion
474
+ if isinstance(yesterday_failures_amd, str):
475
+ try:
476
+ yesterday_failures_amd = json.loads(yesterday_failures_amd)
477
+ except:
478
+ yesterday_failures_amd = {}
479
+ if isinstance(yesterday_failures_nvidia, str):
480
+ try:
481
+ yesterday_failures_nvidia = json.loads(yesterday_failures_nvidia)
482
+ except:
483
+ yesterday_failures_nvidia = {}
484
+
485
+ # Get CURRENT failures
486
+ current_failures_amd = current_row.get('failures_amd', {})
487
+ current_failures_nvidia = current_row.get('failures_nvidia', {})
488
+
489
+ # Handle string/dict conversion
490
+ if isinstance(current_failures_amd, str):
491
+ try:
492
+ current_failures_amd = json.loads(current_failures_amd)
493
+ except:
494
+ current_failures_amd = {}
495
+ if isinstance(current_failures_nvidia, str):
496
+ try:
497
+ current_failures_nvidia = json.loads(current_failures_nvidia)
498
+ except:
499
+ current_failures_nvidia = {}
500
+
501
+ # Check AMD failures - find tests failing NOW but NOT yesterday
502
+ for gpu_type in ['single', 'multi']:
503
+ current_tests = current_failures_amd.get(gpu_type, [])
504
+ yesterday_tests = yesterday_failures_amd.get(gpu_type, [])
505
+
506
+ # Get test names
507
+ current_test_names = {test.get('line', '') for test in current_tests}
508
+ yesterday_test_names = {test.get('line', '') for test in yesterday_tests}
509
+
510
+ # Find NEW failures: failing NOW but NOT yesterday
511
+ new_tests = current_test_names - yesterday_test_names
512
+ for test_name in new_tests:
513
+ if test_name: # Skip empty names
514
+ new_regressions.append({
515
+ 'model': model_name,
516
+ 'test': test_name.split('::')[-1], # Short name
517
+ 'test_full': test_name, # Full name
518
+ 'device': 'amd',
519
+ 'gpu_type': gpu_type
520
+ })
521
+
522
+ # Check NVIDIA failures - find tests failing NOW but NOT yesterday
523
+ for gpu_type in ['single', 'multi']:
524
+ current_tests = current_failures_nvidia.get(gpu_type, [])
525
+ yesterday_tests = yesterday_failures_nvidia.get(gpu_type, [])
526
+
527
+ # Get test names
528
+ current_test_names = {test.get('line', '') for test in current_tests}
529
+ yesterday_test_names = {test.get('line', '') for test in yesterday_tests}
530
+
531
+ # Find NEW failures: failing NOW but NOT yesterday
532
+ new_tests = current_test_names - yesterday_test_names
533
+ for test_name in new_tests:
534
+ if test_name: # Skip empty names
535
+ new_regressions.append({
536
+ 'model': model_name,
537
+ 'test': test_name.split('::')[-1], # Short name
538
+ 'test_full': test_name, # Full name
539
+ 'device': 'nvidia',
540
+ 'gpu_type': gpu_type
541
+ })
542
+
543
+ return new_regressions
544
+
545
+
546
  def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
547
  """Extract and process model data from DataFrame row."""
548
  # Handle missing values and get counts directly from dataframe
 
582
  self.df = pd.DataFrame()
583
  self.available_models = []
584
  self.latest_update_msg = ""
585
+ self.available_dates = []
586
+ self.historical_df = pd.DataFrame()
587
+ self.all_historical_data = pd.DataFrame() # Store all historical data at startup
588
+ self.sample_data = False
589
 
590
  def load_data(self) -> None:
591
  """Load data from the data source."""
 
594
  logger.info("Loading distant data...")
595
  new_df, latest_update_msg = get_distant_data()
596
  self.latest_update_msg = latest_update_msg
597
+ self.available_dates = get_available_dates()
598
+ logger.info(f"Available dates: {len(self.available_dates)} dates")
599
+ if self.available_dates:
600
+ logger.info(f"Date range: {self.available_dates[-1]} to {self.available_dates[0]}")
601
+ else:
602
+ logger.warning("No available dates found")
603
+ self.available_dates = []
604
  except Exception as e:
605
  error_msg = [
606
  "Loading data failed:",
 
610
  "Falling back on sample data."
611
  ]
612
  logger.error("\n".join(error_msg))
613
+ self.sample_data = True
614
  new_df, latest_update_msg = get_sample_data()
615
  self.latest_update_msg = latest_update_msg
616
+ self.available_dates = None
617
+
618
  # Update attributes
619
  self.df = new_df
620
  self.available_models = new_df.index.tolist()
621
+
622
+ # Load all historical data at startup
623
+ self.load_all_historical_data()
624
+
625
  # Log and return distant load status
626
  logger.info(f"Data loaded successfully: {len(self.available_models)} models")
627
  logger.info(f"Models: {self.available_models[:5]}{'...' if len(self.available_models) > 5 else ''}")
 
639
  msg[model][col] = value
640
  logger.info(json.dumps(msg, indent=4))
641
 
642
+ def load_all_historical_data(self) -> None:
643
+ """Load all available historical data at startup."""
644
+ try:
645
+ if not self.available_dates:
646
+ # Generate fake dates when no real dates are available
647
+ fake_dates = []
648
+ today = datetime.now()
649
+ for i in range(7):
650
+ date = today - timedelta(days=i)
651
+ fake_dates.append(date.strftime("%Y-%m-%d"))
652
+ self.available_dates = fake_dates
653
+ logger.info(f"No available dates found, generated {len(self.available_dates)} sample dates.")
654
+
655
+ logger.info(f"Loading all historical data for {len(self.available_dates)} dates...")
656
+ start_date = self.available_dates[-1] # Oldest date
657
+ end_date = self.available_dates[0] # Newest date
658
+
659
+ self.all_historical_data = get_historical_data(start_date, end_date, self.sample_data)
660
+ logger.info(f"All historical data loaded: {len(self.all_historical_data)} records")
661
+ except Exception as e:
662
+ logger.error(f"Error loading all historical data: {e}")
663
+ self.all_historical_data = pd.DataFrame()
664
+
665
+ def load_historical_data(self, start_date: str, end_date: str) -> None:
666
+ """Load historical data for a date range from pre-loaded data."""
667
+ try:
668
+ logger.info(f"Filtering historical data from {start_date} to {end_date}")
669
+
670
+ if self.all_historical_data.empty:
671
+ logger.warning("No pre-loaded historical data available")
672
+ self.historical_df = pd.DataFrame()
673
+ return
674
+
675
+ # Filter the pre-loaded data by date range
676
+ start_dt = datetime.strptime(start_date, "%Y-%m-%d")
677
+ end_dt = datetime.strptime(end_date, "%Y-%m-%d")
678
+
679
+ # Filter data within the date range
680
+ filtered_data = []
681
+ for date_str in self.all_historical_data['date'].unique():
682
+ date_dt = datetime.strptime(date_str, "%Y-%m-%d")
683
+ if start_dt <= date_dt <= end_dt:
684
+ date_data = self.all_historical_data[self.all_historical_data['date'] == date_str]
685
+ filtered_data.append(date_data)
686
+
687
+ if filtered_data:
688
+ self.historical_df = pd.concat(filtered_data, ignore_index=False)
689
+ logger.info(f"Historical data filtered: {len(self.historical_df)} records for {start_date} to {end_date}")
690
+ else:
691
+ self.historical_df = pd.DataFrame()
692
+ logger.warning(f"No historical data found for date range {start_date} to {end_date}")
693
+
694
+ except Exception as e:
695
+ logger.error(f"Error filtering historical data: {e}")
696
+ self.historical_df = pd.DataFrame()
697
+
698
  def schedule_data_reload(self):
699
  """Schedule the next data reload."""
700
  def reload_data():
logos/amd_logo.png ADDED
logos/nvidia_logo.png ADDED
model_page.py CHANGED
@@ -1,7 +1,7 @@
1
  import matplotlib.pyplot as plt
2
  import pandas as pd
3
  from utils import generate_underlined_line
4
- from data import extract_model_data
5
 
6
  # Figure dimensions
7
  FIGURE_WIDTH_DUAL = 18
@@ -42,11 +42,11 @@ def _create_pie_chart(ax: plt.Axes, device_label: str, filtered_stats: dict) ->
42
  """Create a pie chart for device statistics."""
43
  if not filtered_stats:
44
  ax.text(0.5, 0.5, 'No test results',
45
- horizontalalignment='center', verticalalignment='center',
46
- transform=ax.transAxes, fontsize=14, color='#888888',
47
- fontfamily='monospace', weight='normal')
48
  ax.set_title(device_label, fontsize=DEVICE_TITLE_FONT_SIZE, weight='bold',
49
- pad=DEVICE_TITLE_PAD, color=TITLE_COLOR, fontfamily='monospace')
50
  ax.axis('off')
51
  return
52
 
@@ -63,7 +63,7 @@ def _create_pie_chart(ax: plt.Axes, device_label: str, filtered_stats: dict) ->
63
  shadow=False,
64
  wedgeprops=dict(edgecolor='#1a1a1a', linewidth=BORDER_LINE_WIDTH), # Minimal borders
65
  textprops={'fontsize': 12, 'weight': 'normal',
66
- 'color': LABEL_COLOR, 'fontfamily': 'monospace'}
67
  )
68
 
69
  # Enhanced percentage text styling for better readability
@@ -82,10 +82,10 @@ def _create_pie_chart(ax: plt.Axes, device_label: str, filtered_stats: dict) ->
82
 
83
  # Device label closer to chart and bigger
84
  ax.set_title(device_label, fontsize=DEVICE_TITLE_FONT_SIZE, weight='normal',
85
- pad=DEVICE_TITLE_PAD, color=TITLE_COLOR, fontfamily='monospace')
86
 
87
 
88
- def plot_model_stats(df: pd.DataFrame, model_name: str) -> tuple[plt.Figure, str, str]:
89
  """Draws pie charts of model's passed, failed, skipped, and error stats for AMD and NVIDIA."""
90
  # Handle case where the dataframe is empty or the model name could not be found in it
91
  if df.empty or model_name not in df.index:
@@ -124,25 +124,25 @@ def plot_model_stats(df: pd.DataFrame, model_name: str) -> tuple[plt.Figure, str
124
  # Add subtle separation line between charts - stops at device labels level
125
  line_x = 0.5
126
  fig.add_artist(plt.Line2D([line_x, line_x], [0.0, SEPARATOR_LINE_Y_END],
127
- color='#333333', linewidth=SEPARATOR_LINE_WIDTH,
128
- alpha=SEPARATOR_ALPHA, transform=fig.transFigure))
129
 
130
  # Add central shared title for model name
131
  fig.suptitle(f'{model_name.lower()}', fontsize=32, weight='bold',
132
- color='#CCCCCC', fontfamily='monospace', y=MODEL_TITLE_Y)
133
 
134
  # Clean layout with padding and space for central title
135
  plt.tight_layout()
136
  plt.subplots_adjust(top=SUBPLOT_TOP, wspace=SUBPLOT_WSPACE)
137
 
138
- amd_failed_info = prepare_textbox_content(failures_amd, 'AMD', bool(amd_filtered))
139
- nvidia_failed_info = prepare_textbox_content(failures_nvidia, 'NVIDIA', bool(nvidia_filtered))
140
 
141
  return fig, amd_failed_info, nvidia_failed_info
142
 
143
 
144
- def prepare_textbox_content(failures: dict[str, list], device: str, data_available: bool) -> str:
145
- """Extract failure information from failures object."""
146
  # Catch the case where there is no data
147
  if not data_available:
148
  return generate_underlined_line(f"No data for {device}")
@@ -160,21 +160,43 @@ def prepare_textbox_content(failures: dict[str, list], device: str, data_availab
160
  ""
161
  ]
162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  # Add single-gpu failures
164
  if single_failures:
165
  info_lines.append(generate_underlined_line("Single GPU failures:"))
166
  for test in single_failures:
167
- name = test.get("line", "::*could not find name*")
168
- name = name.split("::")[-1]
169
- info_lines.append(name)
170
  info_lines.append("\n")
171
 
172
  # Add multi-gpu failures
173
  if multi_failures:
174
  info_lines.append(generate_underlined_line("Multi GPU failures:"))
175
  for test in multi_failures:
176
- name = test.get("line", "::*could not find name*")
177
- name = name.split("::")[-1]
178
- info_lines.append(name)
179
 
180
- return "\n".join(info_lines)
 
1
  import matplotlib.pyplot as plt
2
  import pandas as pd
3
  from utils import generate_underlined_line
4
+ from data import extract_model_data, find_failure_first_seen
5
 
6
  # Figure dimensions
7
  FIGURE_WIDTH_DUAL = 18
 
42
  """Create a pie chart for device statistics."""
43
  if not filtered_stats:
44
  ax.text(0.5, 0.5, 'No test results',
45
+ horizontalalignment='center', verticalalignment='center',
46
+ transform=ax.transAxes, fontsize=14, color='#888888',
47
+ fontfamily='monospace', weight='normal')
48
  ax.set_title(device_label, fontsize=DEVICE_TITLE_FONT_SIZE, weight='bold',
49
+ pad=DEVICE_TITLE_PAD, color=TITLE_COLOR, fontfamily='monospace')
50
  ax.axis('off')
51
  return
52
 
 
63
  shadow=False,
64
  wedgeprops=dict(edgecolor='#1a1a1a', linewidth=BORDER_LINE_WIDTH), # Minimal borders
65
  textprops={'fontsize': 12, 'weight': 'normal',
66
+ 'color': LABEL_COLOR, 'fontfamily': 'monospace'}
67
  )
68
 
69
  # Enhanced percentage text styling for better readability
 
82
 
83
  # Device label closer to chart and bigger
84
  ax.set_title(device_label, fontsize=DEVICE_TITLE_FONT_SIZE, weight='normal',
85
+ pad=DEVICE_TITLE_PAD, color=TITLE_COLOR, fontfamily='monospace')
86
 
87
 
88
+ def plot_model_stats(df: pd.DataFrame, model_name: str, historical_df: pd.DataFrame = None) -> tuple[plt.Figure, str, str]:
89
  """Draws pie charts of model's passed, failed, skipped, and error stats for AMD and NVIDIA."""
90
  # Handle case where the dataframe is empty or the model name could not be found in it
91
  if df.empty or model_name not in df.index:
 
124
  # Add subtle separation line between charts - stops at device labels level
125
  line_x = 0.5
126
  fig.add_artist(plt.Line2D([line_x, line_x], [0.0, SEPARATOR_LINE_Y_END],
127
+ color='#333333', linewidth=SEPARATOR_LINE_WIDTH,
128
+ alpha=SEPARATOR_ALPHA, transform=fig.transFigure))
129
 
130
  # Add central shared title for model name
131
  fig.suptitle(f'{model_name.lower()}', fontsize=32, weight='bold',
132
+ color='#CCCCCC', fontfamily='monospace', y=MODEL_TITLE_Y)
133
 
134
  # Clean layout with padding and space for central title
135
  plt.tight_layout()
136
  plt.subplots_adjust(top=SUBPLOT_TOP, wspace=SUBPLOT_WSPACE)
137
 
138
+ amd_failed_info = prepare_textbox_content(failures_amd, 'AMD', bool(amd_filtered), model_name, historical_df)
139
+ nvidia_failed_info = prepare_textbox_content(failures_nvidia, 'NVIDIA', bool(nvidia_filtered), model_name, historical_df)
140
 
141
  return fig, amd_failed_info, nvidia_failed_info
142
 
143
 
144
+ def prepare_textbox_content(failures: dict[str, list], device: str, data_available: bool, model_name: str = None, historical_df: pd.DataFrame = None) -> str:
145
+ """Extract failure information from failures object with first seen dates."""
146
  # Catch the case where there is no data
147
  if not data_available:
148
  return generate_underlined_line(f"No data for {device}")
 
160
  ""
161
  ]
162
 
163
+ # Helper function to format failure line with first seen date
164
+ def format_failure_line(test: dict, gpu_type: str) -> str:
165
+ full_name = test.get("line", "::*could not find name*")
166
+ short_name = full_name.split("::")[-1]
167
+
168
+ # Try to find first seen date if historical data is available
169
+ if historical_df is not None and model_name is not None and not historical_df.empty:
170
+ first_seen = find_failure_first_seen(
171
+ historical_df,
172
+ model_name,
173
+ full_name,
174
+ device.lower(),
175
+ gpu_type
176
+ )
177
+ if first_seen:
178
+ # Format date as MM-DD-YYYY
179
+ try:
180
+ from datetime import datetime
181
+ date_obj = datetime.strptime(first_seen, "%Y-%m-%d")
182
+ formatted_date = date_obj.strftime("%m-%d-%Y")
183
+ return f"{short_name} (First seen: {formatted_date})"
184
+ except:
185
+ return f"{short_name} (First seen: {first_seen})"
186
+
187
+ return short_name
188
+
189
  # Add single-gpu failures
190
  if single_failures:
191
  info_lines.append(generate_underlined_line("Single GPU failures:"))
192
  for test in single_failures:
193
+ info_lines.append(format_failure_line(test, "single"))
 
 
194
  info_lines.append("\n")
195
 
196
  # Add multi-gpu failures
197
  if multi_failures:
198
  info_lines.append(generate_underlined_line("Multi GPU failures:"))
199
  for test in multi_failures:
200
+ info_lines.append(format_failure_line(test, "multi"))
 
 
201
 
202
+ return "\n".join(info_lines)
requirements.txt CHANGED
@@ -1 +1,3 @@
1
  matplotlib>=3.8
 
 
 
1
  matplotlib>=3.8
2
+ gradio_toggle
3
+ plotly>=5.0
styles.css CHANGED
@@ -3,6 +3,8 @@
3
  --main-content-bottom-margin: 10px; /* Configurable bottom margin for main content */
4
  }
5
 
 
 
6
  .gradio-container {
7
  background-color: #000000 !important;
8
  color: white !important;
@@ -173,6 +175,96 @@ div[data-testid="column"]:has(.sidebar) {
173
  transition: max-height 0.3s ease !important;
174
  }
175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
  /* Model button styling */
178
  .model-button {
@@ -371,52 +463,28 @@ div[data-testid="column"]:has(.sidebar) {
371
 
372
  /* Plot container with smooth transitions and controlled scrolling */
373
  .plot-container {
374
- background-color: #000000 !important;
375
  border: none !important;
376
  transition: opacity 0.6s ease-in-out !important;
377
  flex: 1 1 auto !important;
378
  min-height: 0 !important;
379
  overflow-y: auto !important;
380
  scrollbar-width: thin !important;
381
- scrollbar-color: #333333 #000000 !important;
382
  }
383
 
384
  /* Custom scrollbar for plot container */
385
  .plot-container::-webkit-scrollbar {
386
  width: 8px !important;
387
- background: #000000 !important;
388
- }
389
-
390
- .plot-container::-webkit-scrollbar-track {
391
- background: #000000 !important;
392
- }
393
-
394
- .plot-container::-webkit-scrollbar-thumb {
395
- background-color: #333333 !important;
396
- border-radius: 4px !important;
397
- }
398
-
399
- .plot-container::-webkit-scrollbar-thumb:hover {
400
- background-color: #555555 !important;
401
  }
402
 
403
- /* Gradio plot component styling */
404
- .gr-plot {
405
- background-color: #000000 !important;
406
- transition: opacity 0.6s ease-in-out !important;
407
- }
408
 
409
- .gr-plot .gradio-plot {
410
- background-color: #000000 !important;
411
- transition: opacity 0.6s ease-in-out !important;
412
- }
413
 
414
  .gr-plot img {
415
  transition: opacity 0.6s ease-in-out !important;
416
  }
417
 
418
  /* Target the plot wrapper */
419
- div[data-testid="plot"] {
420
  background-color: #000000 !important;
421
  }
422
 
@@ -427,11 +495,6 @@ div[data-testid="plot"] {
427
  background-color: #000000 !important;
428
  }
429
 
430
- /* Ensure plot area background */
431
- .gr-plot > div,
432
- .plot-container > div {
433
- background-color: #000000 !important;
434
- }
435
 
436
  /* Prevent white flash during plot updates */
437
  .plot-container::before {
@@ -445,24 +508,26 @@ div[data-testid="plot"] {
445
  z-index: -1;
446
  }
447
 
448
- /* Force all plot elements to have black background */
449
- .plot-container *,
450
- .gr-plot *,
451
- div[data-testid="plot"] * {
452
- background-color: #000000 !important;
453
  }
454
 
455
- /* Override any white backgrounds in matplotlib */
456
- .plot-container canvas,
457
- .gr-plot canvas {
458
- background-color: #000000 !important;
459
- }
460
 
461
  /* Text elements */
462
  h1, h2, h3, p, .markdown {
463
  color: white !important;
464
  }
465
 
 
 
 
 
 
 
 
 
 
 
466
  /* Sidebar header enhancement */
467
  .sidebar h1 {
468
  background: linear-gradient(45deg, #74b9ff, #a29bfe) !important;
@@ -529,6 +594,116 @@ h1, h2, h3, p, .markdown {
529
  flex-direction: column !important;
530
  }
531
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
532
  /* Custom scrollbar for main content */
533
  .main-content {
534
  scrollbar-width: thin !important;
@@ -667,3 +842,203 @@ h1, h2, h3, p, .markdown {
667
  100% { scroll-behavior: auto; }
668
  }
669
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  --main-content-bottom-margin: 10px; /* Configurable bottom margin for main content */
4
  }
5
 
6
+
7
+
8
  .gradio-container {
9
  background-color: #000000 !important;
10
  color: white !important;
 
175
  transition: max-height 0.3s ease !important;
176
  }
177
 
178
+ .history-view-button {
179
+ background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
180
+ color: white !important;
181
+ margin: 0px 0px !important;
182
+ padding: 8px 12px !important;
183
+ font-weight: 600 !important;
184
+ font-size: 14px !important;
185
+ text-transform: uppercase !important;
186
+ letter-spacing: 0.3px !important;
187
+ font-family: monospace !important;
188
+ width: 100% !important;
189
+ max-width: 100% !important;
190
+ white-space: nowrap !important;
191
+ text-overflow: ellipsis !important;
192
+ display: block !important;
193
+ cursor: pointer !important;
194
+ transition: all 0.3s ease !important;
195
+ }
196
+
197
+ /* Failing models filter row */
198
+ .failing-models-filter-row {
199
+ background: linear-gradient(145deg, #1a1a1a, #0f0f0f) !important;
200
+ border: 1px solid #333 !important;
201
+ border-radius: 6px !important;
202
+ padding: 8px 8px !important;
203
+ margin: 0px 0px 12px 0px !important;
204
+ gap: 8px !important;
205
+ }
206
+
207
+ /* Failing models toggle styling */
208
+ .failing-models-toggle {
209
+ background: transparent !important;
210
+ border: none !important;
211
+ padding: 4px 6px !important;
212
+ margin: 0 !important;
213
+ flex: 1 !important;
214
+ }
215
+
216
+ .failing-models-toggle:hover {
217
+ background: rgba(255, 255, 255, 0.05) !important;
218
+ border-radius: 4px !important;
219
+ }
220
+
221
+ .failing-models-toggle label {
222
+ color: #FFFFFF !important;
223
+ font-family: monospace !important;
224
+ font-size: 11px !important;
225
+ font-weight: 600 !important;
226
+ text-transform: uppercase !important;
227
+ letter-spacing: 0.5px !important;
228
+ cursor: pointer !important;
229
+ display: flex !important;
230
+ align-items: center !important;
231
+ white-space: nowrap !important;
232
+ }
233
+
234
+ /* Override specific colors for AMD and NVIDIA to white */
235
+ .amd-toggle label,
236
+ .amd-toggle label span {
237
+ color: #FFFFFF !important;
238
+ }
239
+
240
+ .nvidia-toggle label,
241
+ .nvidia-toggle label span {
242
+ color: #FFFFFF !important;
243
+ }
244
+
245
+ .failing-models-toggle input[type="checkbox"] {
246
+ cursor: pointer !important;
247
+ width: 16px !important;
248
+ height: 16px !important;
249
+ margin-right: 6px !important;
250
+ }
251
+
252
+ .amd-toggle input[type="checkbox"] {
253
+ accent-color: #FF6B6B !important;
254
+ }
255
+
256
+ .nvidia-toggle input[type="checkbox"] {
257
+ accent-color: #76B900 !important;
258
+ }
259
+
260
+ .amd-toggle input[type="checkbox"]:checked {
261
+ accent-color: #FF8888 !important;
262
+ }
263
+
264
+ .nvidia-toggle input[type="checkbox"]:checked {
265
+ accent-color: #8BD918 !important;
266
+ }
267
+
268
 
269
  /* Model button styling */
270
  .model-button {
 
463
 
464
  /* Plot container with smooth transitions and controlled scrolling */
465
  .plot-container {
 
466
  border: none !important;
467
  transition: opacity 0.6s ease-in-out !important;
468
  flex: 1 1 auto !important;
469
  min-height: 0 !important;
470
  overflow-y: auto !important;
471
  scrollbar-width: thin !important;
472
+ padding: 0 !important;
473
  }
474
 
475
  /* Custom scrollbar for plot container */
476
  .plot-container::-webkit-scrollbar {
477
  width: 8px !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
  }
479
 
 
 
 
 
 
480
 
 
 
 
 
481
 
482
  .gr-plot img {
483
  transition: opacity 0.6s ease-in-out !important;
484
  }
485
 
486
  /* Target the plot wrapper */
487
+ div[data-testid="matplotlib"] {
488
  background-color: #000000 !important;
489
  }
490
 
 
495
  background-color: #000000 !important;
496
  }
497
 
 
 
 
 
 
498
 
499
  /* Prevent white flash during plot updates */
500
  .plot-container::before {
 
508
  z-index: -1;
509
  }
510
 
511
+ .vega-embed {
512
+ position: absolute !important;
 
 
 
513
  }
514
 
 
 
 
 
 
515
 
516
  /* Text elements */
517
  h1, h2, h3, p, .markdown {
518
  color: white !important;
519
  }
520
 
521
+ .toggle {
522
+ margin: 0 auto !important;
523
+ }
524
+
525
+ .toggle-label {
526
+ color: white !important;
527
+ font-family: monospace !important;
528
+ font-size: 14px !important;
529
+ }
530
+
531
  /* Sidebar header enhancement */
532
  .sidebar h1 {
533
  background: linear-gradient(45deg, #74b9ff, #a29bfe) !important;
 
594
  flex-direction: column !important;
595
  }
596
 
597
+ /* Summary view - position content slightly higher (not fully centered) */
598
+ .summary-view {
599
+ display: flex !important;
600
+ flex-direction: column !important;
601
+ align-items: center !important;
602
+ justify-content: flex-start !important;
603
+ gap: 10px !important;
604
+ padding-top: 20px !important;
605
+ }
606
+
607
+ /* Keep the summary display centered */
608
+ .summary-view .plot-container {
609
+ width: 100% !important;
610
+ }
611
+
612
+ /* Regressions components stay with the summary as a group */
613
+ .regressions-header {
614
+ margin: 0px 0px 10px 0px !important;
615
+ width: 100% !important;
616
+ max-width: 100% !important;
617
+ background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
618
+ color: white !important;
619
+ border: 1px solid #8B4513 !important;
620
+ border-radius: 5px !important;
621
+ font-weight: 600 !important;
622
+ font-size: 14px !important;
623
+ font-family: monospace !important;
624
+ text-align: left !important;
625
+ width: 100% !important;
626
+ transition: all 0.3s ease !important;
627
+ }
628
+
629
+ .regressions-header:hover {
630
+ background: linear-gradient(135deg, #3a3a3a, #2e2e2e) !important;
631
+ border-color: #B8621B !important;
632
+ }
633
+
634
+ /* Collapsible regressions content */
635
+ .regressions-content-visible {
636
+ max-height: 800px !important;
637
+ overflow-y: auto !important;
638
+ transition: max-height 0.3s ease !important;
639
+ scrollbar-width: thin !important;
640
+ -ms-overflow-style: none !important;
641
+ }
642
+
643
+ .regressions-content-visible::-webkit-scrollbar {
644
+ width: 8px !important;
645
+ background: transparent !important;
646
+ }
647
+
648
+ .regressions-content-visible::-webkit-scrollbar-thumb {
649
+ background-color: #333333 !important;
650
+ border-radius: 4px !important;
651
+ }
652
+
653
+ .regressions-content-hidden {
654
+ max-height: 0 !important;
655
+ overflow: hidden !important;
656
+ transition: max-height 0.3s ease !important;
657
+ }
658
+
659
+ /* New Regressions Panel */
660
+ .regressions-panel {
661
+ background: linear-gradient(145deg, #2a1a1a, #1a0f0f) !important;
662
+ border: 2px solid #8B4513 !important;
663
+ border-radius: 8px !important;
664
+ padding: 15px 20px !important;
665
+ margin: 0px 0px 15px 0px !important;
666
+ box-shadow: 0 4px 12px rgba(255, 107, 107, 0.2) !important;
667
+ animation: pulse-border 2s ease-in-out infinite !important;
668
+ }
669
+
670
+ .regressions-panel h3 {
671
+ color: #FFB86C !important;
672
+ font-family: monospace !important;
673
+ font-size: 16px !important;
674
+ font-weight: bold !important;
675
+ margin: 0 0 10px 0 !important;
676
+ display: flex !important;
677
+ align-items: center !important;
678
+ }
679
+
680
+ .regressions-panel p,
681
+ .regressions-panel ul,
682
+ .regressions-panel li {
683
+ color: #FFFFFF !important;
684
+ font-family: monospace !important;
685
+ font-size: 13px !important;
686
+ line-height: 1.6 !important;
687
+ margin: 4px 0 !important;
688
+ }
689
+
690
+ .regressions-panel strong {
691
+ color: #FF6B6B !important;
692
+ font-weight: 600 !important;
693
+ }
694
+
695
+ /* Pulse animation for new regressions */
696
+ @keyframes pulse-border {
697
+ 0%, 100% {
698
+ border-color: #8B4513;
699
+ box-shadow: 0 4px 12px rgba(255, 107, 107, 0.2);
700
+ }
701
+ 50% {
702
+ border-color: #B8621B;
703
+ box-shadow: 0 4px 16px rgba(255, 107, 107, 0.4);
704
+ }
705
+ }
706
+
707
  /* Custom scrollbar for main content */
708
  .main-content {
709
  scrollbar-width: thin !important;
 
842
  100% { scroll-behavior: auto; }
843
  }
844
 
845
+ /* View toggle buttons */
846
+ .view-toggle-row {
847
+ display: flex !important;
848
+ gap: 5px !important;
849
+ margin-bottom: 15px !important;
850
+ }
851
+
852
+ .view-toggle-button {
853
+ flex: 1 !important;
854
+ background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
855
+ color: white !important;
856
+ border: 1px solid #333 !important;
857
+ border-radius: 5px !important;
858
+ padding: 8px 6px !important;
859
+ transition: all 0.3s ease !important;
860
+ font-weight: 600 !important;
861
+ font-size: 12px !important;
862
+ text-transform: uppercase !important;
863
+ letter-spacing: 0.3px !important;
864
+ font-family: monospace !important;
865
+ height: 50px !important;
866
+ display: flex !important;
867
+ flex-direction: column !important;
868
+ justify-content: center !important;
869
+ align-items: center !important;
870
+ line-height: 1.2 !important;
871
+ cursor: pointer !important;
872
+ }
873
+
874
+ .view-toggle-button:hover {
875
+ background: linear-gradient(135deg, #3a3a3a, #2e2e2e) !important;
876
+ border-color: #555 !important;
877
+ }
878
+
879
+ .view-toggle-active {
880
+ background: linear-gradient(135deg, #4a4a4a, #3e3e3e) !important;
881
+ border: 2px solid #555555 !important;
882
+ box-shadow:
883
+ 0 4px 15px rgba(0, 0, 0, 0.3),
884
+ inset 0 1px 0 rgba(255, 255, 255, 0.2) !important;
885
+ }
886
+
887
+ /* Date selection styling */
888
+ .date-selection {
889
+ flex-grow: 0 !important;
890
+ background: linear-gradient(145deg, #0f0f0f, #1a1a1a) !important;
891
+ border: 1px solid #333 !important;
892
+ border-radius: 8px !important;
893
+ padding: 15px !important;
894
+ margin-bottom: 15px !important;
895
+ transition: all 0.3s ease !important;
896
+ overflow: hidden !important;
897
+ }
898
+
899
+ .date-selection-hidden {
900
+ max-height: 0 !important;
901
+ padding: 0 15px !important;
902
+ margin-bottom: 0 !important;
903
+ border: none !important;
904
+ }
905
+
906
+ .date-selection-visible {
907
+ max-height: 500px !important;
908
+ }
909
+
910
+ .date-header {
911
+ margin-bottom: 10px !important;
912
+ background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
913
+ color: white !important;
914
+ border: 1px solid #333 !important;
915
+ border-radius: 5px !important;
916
+ padding: 8px 12px !important;
917
+ transition: all 0.3s ease !important;
918
+ font-family: monospace !important;
919
+ font-size: 12px !important;
920
+ text-align: left !important;
921
+ cursor: pointer !important;
922
+ width: 100% !important;
923
+ box-sizing: border-box !important;
924
+ }
925
+
926
+ .date-header:hover {
927
+ background: linear-gradient(135deg, #3a3a3a, #2e2e2e) !important;
928
+ border-color: #444 !important;
929
+ transform: translateY(-1px) !important;
930
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.3) !important;
931
+ }
932
+
933
+ .date-dropdown {
934
+ background-color: #222222 !important;
935
+ color: white !important;
936
+ border: 1px solid #444444 !important;
937
+ border-radius: 5px !important;
938
+ font-family: monospace !important;
939
+ font-size: 12px !important;
940
+ }
941
+
942
+ .date-dropdown .gr-dropdown {
943
+ background-color: #222222 !important;
944
+ color: white !important;
945
+ border: 1px solid #444444 !important;
946
+ }
947
+
948
+ .load-historical-button {
949
+ background: linear-gradient(135deg, #2d5aa0, #1e3f73) !important;
950
+ color: white !important;
951
+ border: 1px solid #3a6bc7 !important;
952
+ border-radius: 5px !important;
953
+ padding: 8px 12px !important;
954
+ transition: all 0.3s ease !important;
955
+ font-weight: 500 !important;
956
+ font-size: 12px !important;
957
+ text-transform: uppercase !important;
958
+ letter-spacing: 0.1px !important;
959
+ font-family: monospace !important;
960
+ width: 100% !important;
961
+ margin-top: 10px !important;
962
+ }
963
+
964
+ .load-historical-button:hover {
965
+ background: linear-gradient(135deg, #3a6bc7, #2d5aa0) !important;
966
+ border-color: #4a7bd9 !important;
967
+ }
968
+
969
+ /* Historical view styling */
970
+ .historical-view {
971
+ background-color: #000000 !important;
972
+ padding: 30px 20px !important;
973
+ }
974
+
975
+ .time-series-detail-view {
976
+ background-color: #000000 !important;
977
+ padding: 30px 20px !important;
978
+ }
979
+
980
+ /* Plotly chart styling for historical view */
981
+ .historical-view .plot-container,
982
+ .time-series-detail-view .plot-container {
983
+ background-color: #000000 !important;
984
+ }
985
+
986
+ /* Plotly specific text styling */
987
+ .historical-view .js-plotly-plot .plotly,
988
+ .time-series-detail-view .js-plotly-plot .plotly {
989
+ background-color: #000000 !important;
990
+ }
991
+
992
+ /* Plotly legend text */
993
+ .historical-view .js-plotly-plot .legend text,
994
+ .time-series-detail-view .js-plotly-plot .legend text {
995
+ font-size: 16px !important;
996
+ fill: #CCCCCC !important;
997
+ }
998
+
999
+ /* Plotly axis titles */
1000
+ .historical-view .js-plotly-plot .g-xtitle text,
1001
+ .historical-view .js-plotly-plot .g-ytitle text,
1002
+ .time-series-detail-view .js-plotly-plot .g-xtitle text,
1003
+ .time-series-detail-view .js-plotly-plot .g-ytitle text {
1004
+ font-size: 16px !important;
1005
+ fill: #CCCCCC !important;
1006
+ }
1007
+
1008
+ /* Plotly axis tick labels */
1009
+ .historical-view .js-plotly-plot .xtick text,
1010
+ .historical-view .js-plotly-plot .ytick text,
1011
+ .time-series-detail-view .js-plotly-plot .xtick text,
1012
+ .time-series-detail-view .js-plotly-plot .ytick text {
1013
+ font-size: 14px !important;
1014
+ fill: #CCCCCC !important;
1015
+ }
1016
+
1017
+ /* Plotly title */
1018
+ .historical-view .js-plotly-plot .g-gtitle text,
1019
+ .time-series-detail-view .js-plotly-plot .g-gtitle text {
1020
+ font-size: 20px !important;
1021
+ fill: #FFFFFF !important;
1022
+ font-weight: 600 !important;
1023
+ }
1024
+
1025
+ /* Back button styling */
1026
+ .back-button {
1027
+ background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
1028
+ color: white !important;
1029
+ border: 1px solid #333 !important;
1030
+ border-radius: 5px !important;
1031
+ padding: 8px 12px !important;
1032
+ transition: all 0.3s ease !important;
1033
+ font-weight: 500 !important;
1034
+ font-size: 12px !important;
1035
+ font-family: monospace !important;
1036
+ margin-bottom: 15px !important;
1037
+ width: 100% !important;
1038
+ }
1039
+
1040
+ .back-button:hover {
1041
+ background: linear-gradient(135deg, #3a3a3a, #2e2e2e) !important;
1042
+ border-color: #555 !important;
1043
+ color: #74b9ff !important;
1044
+ }
summary_page.py CHANGED
@@ -1,26 +1,30 @@
1
  import matplotlib.pyplot as plt
2
  import pandas as pd
 
 
 
 
3
  from data import extract_model_data
4
 
5
  # Layout parameters
6
  COLUMNS = 3
7
 
8
  # Derived constants
9
- COLUMN_WIDTH = 100 / COLUMNS # Each column takes 25% of width
10
- BAR_WIDTH = COLUMN_WIDTH * 0.8 # 80% of column width for bars
11
- BAR_MARGIN = COLUMN_WIDTH * 0.1 # 10% margin on each side
12
 
13
  # Figure dimensions
14
- FIGURE_WIDTH = 22 # Wider to accommodate columns and legend
15
- MAX_HEIGHT = 14 # Maximum height in inches
16
  MIN_HEIGHT_PER_ROW = 2.8
17
  FIGURE_PADDING = 1
18
 
19
  # Bar styling
20
- BAR_HEIGHT_RATIO = 0.22 # Bar height as ratio of vertical spacing
21
- VERTICAL_SPACING_RATIO = 0.2 # Base vertical position ratio
22
- AMD_BAR_OFFSET = 0.25 # AMD bar offset ratio
23
- NVIDIA_BAR_OFFSET = 0.54 # NVIDIA bar offset ratio
24
 
25
  # Colors
26
  COLORS = {
@@ -34,21 +38,35 @@ COLORS = {
34
  # Font styling
35
  MODEL_NAME_FONT_SIZE = 16
36
  LABEL_FONT_SIZE = 14
37
- LABEL_OFFSET = 1 # Distance of label from bar
38
  FAILURE_RATE_FONT_SIZE = 28
39
 
 
 
 
 
40
 
41
- def get_overall_stats(df: pd.DataFrame, available_models: list[str]) -> tuple[list[int], list[int]]:
 
 
 
 
 
 
 
 
 
 
 
 
42
  """Calculate overall failure rates for AMD and NVIDIA across all models."""
43
  if df.empty or not available_models:
44
  return 0.0, 0.0
45
 
46
- total_amd_passed = 0
47
- total_amd_failed = 0
48
- total_amd_skipped = 0
49
- total_nvidia_passed = 0
50
- total_nvidia_failed = 0
51
- total_nvidia_skipped = 0
52
 
53
  for model_name in available_models:
54
  if model_name not in df.index:
@@ -58,19 +76,24 @@ def get_overall_stats(df: pd.DataFrame, available_models: list[str]) -> tuple[li
58
  amd_stats, nvidia_stats = extract_model_data(row)[:2]
59
 
60
  # AMD totals
61
- total_amd_passed += amd_stats['passed']
62
- total_amd_failed += amd_stats['failed'] + amd_stats['error']
63
- total_amd_skipped += amd_stats['skipped']
64
-
 
65
  # NVIDIA totals
66
- total_nvidia_passed += nvidia_stats['passed']
67
- total_nvidia_failed += nvidia_stats['failed'] + nvidia_stats['error']
68
- total_nvidia_skipped += nvidia_stats['skipped']
 
 
 
 
69
 
70
- return [total_amd_passed, total_amd_failed, total_amd_skipped], [total_nvidia_passed, total_nvidia_failed, total_nvidia_skipped]
71
 
72
 
73
- def draw_text_and_bar(
74
  label: str,
75
  stats: dict[str, int],
76
  y_bar: float,
@@ -78,19 +101,72 @@ def draw_text_and_bar(
78
  bar_height: float,
79
  ax: plt.Axes,
80
  ) -> None:
81
- """Draw a horizontal bar chart for given stats and its label on the left."""
82
- # Text
83
- label_x = column_left_position - LABEL_OFFSET
84
  failures_present = any(stats[category] > 0 for category in ['failed', 'error'])
 
 
 
 
 
 
 
 
 
85
  if failures_present:
86
- props = dict(boxstyle='round', facecolor=COLORS['failed'], alpha=0.35)
 
87
  else:
88
- props = dict(alpha=0)
89
- ax.text(
90
- label_x, y_bar, label, ha='right', va='center', color='#CCCCCC', fontsize=LABEL_FONT_SIZE,
91
- fontfamily='monospace', fontweight='normal', bbox=props
 
 
 
 
 
 
 
 
92
  )
93
- # Bar
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  total = sum(stats.values())
95
  if total > 0:
96
  left = column_left_position
@@ -115,14 +191,7 @@ def create_summary_page(df: pd.DataFrame, available_models: list[str]) -> plt.Fi
115
  return fig
116
 
117
  # Calculate overall failure rates
118
- amd_counts, nvidia_counts = get_overall_stats(df, available_models)
119
-
120
- amd_non_skipped = amd_counts[0] + amd_counts[1]
121
- amd_failure_rate = (amd_counts[1] / amd_non_skipped) if amd_non_skipped > 0 else 0.0
122
- amd_failure_rate *= 100
123
- nvidia_non_skipped = nvidia_counts[0] + nvidia_counts[1]
124
- nvidia_failure_rate = (nvidia_counts[1] / nvidia_non_skipped) if nvidia_non_skipped > 0 else 0.0
125
- nvidia_failure_rate *= 100
126
 
127
  # Calculate dimensions for N-column layout
128
  model_count = len(available_models)
@@ -143,6 +212,10 @@ def create_summary_page(df: pd.DataFrame, available_models: list[str]) -> plt.Fi
143
 
144
  visible_model_count = 0
145
  max_y = 0
 
 
 
 
146
 
147
  for i, model_name in enumerate(available_models):
148
  if model_name not in df.index:
@@ -152,6 +225,15 @@ def create_summary_page(df: pd.DataFrame, available_models: list[str]) -> plt.Fi
152
 
153
  # Extract and process model data
154
  amd_stats, nvidia_stats = extract_model_data(row)[:2]
 
 
 
 
 
 
 
 
 
155
 
156
  # Calculate position in 4-column grid
157
  col = visible_model_count % COLUMNS
@@ -176,44 +258,43 @@ def create_summary_page(df: pd.DataFrame, available_models: list[str]) -> plt.Fi
176
 
177
  # AMD label and bar in this column
178
  bar_height = min(0.4, vertical_spacing * BAR_HEIGHT_RATIO)
179
- # Draw AMD bar
180
- draw_text_and_bar("amd", amd_stats, y_amd_bar, col_left, bar_height, ax)
181
- # Draw NVIDIA bar
182
- draw_text_and_bar("nvidia", nvidia_stats, y_nvidia_bar, col_left, bar_height, ax)
183
 
184
  # Increment counter for next visible model
185
  visible_model_count += 1
186
 
187
 
 
 
 
 
 
 
 
 
 
188
  # Add AMD and NVIDIA test totals in the bottom left
189
  # Calculate line spacing to align middle with legend
190
  line_height = 0.4 # Height between lines
191
- legend_y = max_y + 1
192
-
193
  # Position the two lines so their middle aligns with legend_y
194
  amd_y = legend_y - line_height / 2
195
  nvidia_y = legend_y + line_height / 2
196
-
197
- amd_totals_text = f"AMD Tests - Passed: {amd_counts[0]}, Failed: {amd_counts[1]}, Skipped: {amd_counts[2]}"
198
- nvidia_totals_text = f"NVIDIA Tests - Passed: {nvidia_counts[0]}, Failed: {nvidia_counts[1]}, Skipped: {nvidia_counts[2]}"
199
-
200
  ax.text(0, amd_y, amd_totals_text,
201
  ha='left', va='bottom', color='#CCCCCC',
202
  fontsize=14, fontfamily='monospace')
203
-
204
  ax.text(0, nvidia_y, nvidia_totals_text,
205
  ha='left', va='bottom', color='#CCCCCC',
206
  fontsize=14, fontfamily='monospace')
207
-
208
- # Add legend horizontally in bottom right corner
209
- patch_height = 0.3
210
- patch_width = 3
211
-
212
- legend_start_x = 68.7
213
- legend_y = max_y + 1
214
- legend_spacing = 10
215
- legend_font_size = 15
216
-
217
  # Legend entries
218
  legend_items = [
219
  ('passed', 'Passed'),
 
1
  import matplotlib.pyplot as plt
2
  import pandas as pd
3
+ from matplotlib.offsetbox import OffsetImage, AnnotationBbox
4
+ from matplotlib.patches import FancyBboxPatch
5
+ import matplotlib.image as mpimg
6
+ import os
7
  from data import extract_model_data
8
 
9
  # Layout parameters
10
  COLUMNS = 3
11
 
12
  # Derived constants
13
+ COLUMN_WIDTH = 100 / COLUMNS
14
+ BAR_WIDTH = COLUMN_WIDTH * 0.8
15
+ BAR_MARGIN = COLUMN_WIDTH * 0.1
16
 
17
  # Figure dimensions
18
+ FIGURE_WIDTH = 22
19
+ MAX_HEIGHT = 14
20
  MIN_HEIGHT_PER_ROW = 2.8
21
  FIGURE_PADDING = 1
22
 
23
  # Bar styling
24
+ BAR_HEIGHT_RATIO = 0.22
25
+ VERTICAL_SPACING_RATIO = 0.2
26
+ AMD_BAR_OFFSET = 0.25
27
+ NVIDIA_BAR_OFFSET = 0.54
28
 
29
  # Colors
30
  COLORS = {
 
38
  # Font styling
39
  MODEL_NAME_FONT_SIZE = 16
40
  LABEL_FONT_SIZE = 14
41
+ LABEL_OFFSET = 1
42
  FAILURE_RATE_FONT_SIZE = 28
43
 
44
+ # Logo settings
45
+ LOGO_BOX_WIDTH = 4.5
46
+ LOGO_BOX_HEIGHT = 0.43
47
+ LOGO_ZOOM = 0.09
48
 
49
+ # Load logos once at module level
50
+ SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
51
+ try:
52
+ AMD_LOGO = mpimg.imread(os.path.join(SCRIPT_DIR, 'logos/amd_logo.png'))
53
+ except:
54
+ AMD_LOGO = None
55
+ try:
56
+ NVIDIA_LOGO = mpimg.imread(os.path.join(SCRIPT_DIR, 'logos/nvidia_logo.png'))
57
+ except:
58
+ NVIDIA_LOGO = None
59
+
60
+
61
+ def calculate_overall_failure_rates(df: pd.DataFrame, available_models: list[str]) -> tuple[float, float]:
62
  """Calculate overall failure rates for AMD and NVIDIA across all models."""
63
  if df.empty or not available_models:
64
  return 0.0, 0.0
65
 
66
+ total_amd_tests = 0
67
+ total_amd_failures = 0
68
+ total_nvidia_tests = 0
69
+ total_nvidia_failures = 0
 
 
70
 
71
  for model_name in available_models:
72
  if model_name not in df.index:
 
76
  amd_stats, nvidia_stats = extract_model_data(row)[:2]
77
 
78
  # AMD totals
79
+ amd_total = amd_stats['passed'] + amd_stats['failed'] + amd_stats['error']
80
+ if amd_total > 0:
81
+ total_amd_tests += amd_total
82
+ total_amd_failures += amd_stats['failed'] + amd_stats['error']
83
+
84
  # NVIDIA totals
85
+ nvidia_total = nvidia_stats['passed'] + nvidia_stats['failed'] + nvidia_stats['error']
86
+ if nvidia_total > 0:
87
+ total_nvidia_tests += nvidia_total
88
+ total_nvidia_failures += nvidia_stats['failed'] + nvidia_stats['error']
89
+
90
+ amd_failure_rate = (total_amd_failures / total_amd_tests * 100) if total_amd_tests > 0 else 0.0
91
+ nvidia_failure_rate = (total_nvidia_failures / total_nvidia_tests * 100) if total_nvidia_tests > 0 else 0.0
92
 
93
+ return amd_failure_rate, nvidia_failure_rate
94
 
95
 
96
+ def draw_logo_and_bar(
97
  label: str,
98
  stats: dict[str, int],
99
  y_bar: float,
 
101
  bar_height: float,
102
  ax: plt.Axes,
103
  ) -> None:
104
+ """Draw a horizontal bar chart for given stats with a logo box on the left."""
105
+ # Determine if there are failures
 
106
  failures_present = any(stats[category] > 0 for category in ['failed', 'error'])
107
+
108
+ # Select the appropriate logo
109
+ logo = AMD_LOGO if label.lower() == "amd" else NVIDIA_LOGO
110
+
111
+ # Calculate box position (centered on the bar vertically)
112
+ box_x = column_left_position - LABEL_OFFSET - LOGO_BOX_WIDTH
113
+ box_y = y_bar - LOGO_BOX_HEIGHT / 2
114
+
115
+ # Draw the colored box
116
  if failures_present:
117
+ box_color = COLORS['failed'] # Red for failures
118
+ box_alpha = 0.6
119
  else:
120
+ box_color = '#2a2a2a' # Dark gray for no failures
121
+ box_alpha = 0.5
122
+
123
+ box = FancyBboxPatch(
124
+ (box_x, box_y),
125
+ LOGO_BOX_WIDTH,
126
+ LOGO_BOX_HEIGHT,
127
+ boxstyle="round,pad=0.05",
128
+ facecolor=box_color,
129
+ edgecolor='#444444',
130
+ linewidth=1,
131
+ alpha=box_alpha
132
  )
133
+ ax.add_patch(box)
134
+
135
+ # Add logo image inside the box if available
136
+ if logo is not None:
137
+ try:
138
+ imagebox = OffsetImage(logo, zoom=LOGO_ZOOM)
139
+ ab = AnnotationBbox(
140
+ imagebox,
141
+ (box_x + LOGO_BOX_WIDTH / 2, y_bar),
142
+ frameon=False,
143
+ box_alignment=(0.5, 0.5)
144
+ )
145
+ ax.add_artist(ab)
146
+ except:
147
+ # Fallback to text if logo doesn't work
148
+ ax.text(
149
+ box_x + LOGO_BOX_WIDTH / 2, y_bar,
150
+ label.upper(),
151
+ ha='center', va='center',
152
+ color='#FFFFFF',
153
+ fontsize=10,
154
+ fontfamily='monospace',
155
+ fontweight='bold'
156
+ )
157
+ else:
158
+ # Fallback to text if logo not loaded
159
+ ax.text(
160
+ box_x + LOGO_BOX_WIDTH / 2, y_bar,
161
+ label.upper(),
162
+ ha='center', va='center',
163
+ color='#FFFFFF',
164
+ fontsize=10,
165
+ fontfamily='monospace',
166
+ fontweight='bold'
167
+ )
168
+
169
+ # Draw the bar
170
  total = sum(stats.values())
171
  if total > 0:
172
  left = column_left_position
 
191
  return fig
192
 
193
  # Calculate overall failure rates
194
+ amd_failure_rate, nvidia_failure_rate = calculate_overall_failure_rates(df, available_models)
 
 
 
 
 
 
 
195
 
196
  # Calculate dimensions for N-column layout
197
  model_count = len(available_models)
 
212
 
213
  visible_model_count = 0
214
  max_y = 0
215
+
216
+ # Initialize counters for total tests
217
+ amd_totals = {'passed': 0, 'failed': 0, 'skipped': 0}
218
+ nvidia_totals = {'passed': 0, 'failed': 0, 'skipped': 0}
219
 
220
  for i, model_name in enumerate(available_models):
221
  if model_name not in df.index:
 
225
 
226
  # Extract and process model data
227
  amd_stats, nvidia_stats = extract_model_data(row)[:2]
228
+
229
+ # Accumulate totals
230
+ amd_totals['passed'] += amd_stats['passed']
231
+ amd_totals['failed'] += amd_stats['failed'] + amd_stats['error']
232
+ amd_totals['skipped'] += amd_stats['skipped']
233
+
234
+ nvidia_totals['passed'] += nvidia_stats['passed']
235
+ nvidia_totals['failed'] += nvidia_stats['failed'] + nvidia_stats['error']
236
+ nvidia_totals['skipped'] += nvidia_stats['skipped']
237
 
238
  # Calculate position in 4-column grid
239
  col = visible_model_count % COLUMNS
 
258
 
259
  # AMD label and bar in this column
260
  bar_height = min(0.4, vertical_spacing * BAR_HEIGHT_RATIO)
261
+ # Draw AMD bar with logo
262
+ draw_logo_and_bar("amd", amd_stats, y_amd_bar, col_left, bar_height, ax)
263
+ # Draw NVIDIA bar with logo
264
+ draw_logo_and_bar("nvidia", nvidia_stats, y_nvidia_bar, col_left, bar_height, ax)
265
 
266
  # Increment counter for next visible model
267
  visible_model_count += 1
268
 
269
 
270
+ # Add legend horizontally in bottom right corner
271
+ patch_height = 0.3
272
+ patch_width = 3
273
+
274
+ legend_start_x = 68.7
275
+ legend_y = max_y + 1
276
+ legend_spacing = 10
277
+ legend_font_size = 15
278
+
279
  # Add AMD and NVIDIA test totals in the bottom left
280
  # Calculate line spacing to align middle with legend
281
  line_height = 0.4 # Height between lines
282
+
 
283
  # Position the two lines so their middle aligns with legend_y
284
  amd_y = legend_y - line_height / 2
285
  nvidia_y = legend_y + line_height / 2
286
+
287
+ amd_totals_text = f"AMD Tests - Passed: {amd_totals['passed']}, Failed: {amd_totals['failed']}, Skipped: {amd_totals['skipped']}"
288
+ nvidia_totals_text = f"NVIDIA Tests - Passed: {nvidia_totals['passed']}, Failed: {nvidia_totals['failed']}, Skipped: {nvidia_totals['skipped']}"
289
+
290
  ax.text(0, amd_y, amd_totals_text,
291
  ha='left', va='bottom', color='#CCCCCC',
292
  fontsize=14, fontfamily='monospace')
293
+
294
  ax.text(0, nvidia_y, nvidia_totals_text,
295
  ha='left', va='bottom', color='#CCCCCC',
296
  fontsize=14, fontfamily='monospace')
297
+
 
 
 
 
 
 
 
 
 
298
  # Legend entries
299
  legend_items = [
300
  ('passed', 'Passed'),
time_series.py ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import pandas as pd
3
+ import numpy as np
4
+ from datetime import datetime
5
+ from data import extract_model_data
6
+
7
+ COLORS = {
8
+ 'passed': '#4CAF50',
9
+ 'failed': '#E53E3E',
10
+ 'skipped': '#FFD54F',
11
+ 'error': '#8B0000',
12
+ 'amd': '#ED1C24',
13
+ 'nvidia': '#76B900'
14
+ }
15
+
16
+ FIGURE_WIDTH = 20
17
+ FIGURE_HEIGHT = 12
18
+
19
+ BLACK = '#000000'
20
+ LABEL_COLOR = '#CCCCCC'
21
+ TITLE_COLOR = '#FFFFFF'
22
+ GRID_COLOR = '#333333'
23
+
24
+ TITLE_FONT_SIZE = 24
25
+ LABEL_FONT_SIZE = 14
26
+ LEGEND_FONT_SIZE = 12
27
+
28
+
29
+ def create_time_series_summary(historical_df: pd.DataFrame) -> plt.Figure:
30
+ if historical_df.empty or 'date' not in historical_df.columns:
31
+ fig, ax = plt.subplots(figsize=(FIGURE_WIDTH, FIGURE_HEIGHT), facecolor=BLACK)
32
+ ax.set_facecolor(BLACK)
33
+ ax.text(0.5, 0.5, 'No historical data available',
34
+ horizontalalignment='center', verticalalignment='center',
35
+ transform=ax.transAxes, fontsize=20, color='#888888',
36
+ fontfamily='monospace', weight='normal')
37
+ ax.axis('off')
38
+ return fig
39
+
40
+ historical_df['date_dt'] = pd.to_datetime(historical_df['date'])
41
+ historical_df = historical_df.sort_values('date_dt')
42
+
43
+ daily_stats = []
44
+ dates = []
45
+
46
+ for date in historical_df['date_dt'].unique():
47
+ date_data = historical_df[historical_df['date_dt'] == date]
48
+
49
+ total_amd_passed = total_amd_failed = total_amd_skipped = 0
50
+ total_nvidia_passed = total_nvidia_failed = total_nvidia_skipped = 0
51
+
52
+ for _, row in date_data.iterrows():
53
+ amd_stats, nvidia_stats = extract_model_data(row)[:2]
54
+
55
+ total_amd_passed += amd_stats['passed']
56
+ total_amd_failed += amd_stats['failed']
57
+ total_amd_skipped += amd_stats['skipped']
58
+ total_nvidia_passed += nvidia_stats['passed']
59
+ total_nvidia_failed += nvidia_stats['failed']
60
+ total_nvidia_skipped += nvidia_stats['skipped']
61
+
62
+ amd_total = total_amd_passed + total_amd_failed
63
+ nvidia_total = total_nvidia_passed + total_nvidia_failed
64
+
65
+ amd_failure_rate = (total_amd_failed / amd_total * 100) if amd_total > 0 else 0
66
+ nvidia_failure_rate = (total_nvidia_failed / nvidia_total * 100) if nvidia_total > 0 else 0
67
+
68
+ daily_stats.append({
69
+ 'amd_failure_rate': amd_failure_rate,
70
+ 'nvidia_failure_rate': nvidia_failure_rate,
71
+ 'amd_passed': total_amd_passed,
72
+ 'amd_failed': total_amd_failed,
73
+ 'amd_skipped': total_amd_skipped,
74
+ 'nvidia_passed': total_nvidia_passed,
75
+ 'nvidia_failed': total_nvidia_failed,
76
+ 'nvidia_skipped': total_nvidia_skipped
77
+ })
78
+ dates.append(date)
79
+
80
+ fig = plt.figure(figsize=(FIGURE_WIDTH, FIGURE_HEIGHT + 4), facecolor=BLACK)
81
+ gs = fig.add_gridspec(3, 2, height_ratios=[1.2, 1, 1], width_ratios=[2, 1],
82
+ hspace=0.3, wspace=0.25)
83
+
84
+ ax1 = fig.add_subplot(gs[0, :])
85
+ ax2 = fig.add_subplot(gs[1, 0])
86
+ ax3 = fig.add_subplot(gs[2, 0])
87
+ ax4 = fig.add_subplot(gs[1:, 1])
88
+
89
+ for ax in [ax1, ax2, ax3, ax4]:
90
+ ax.set_facecolor(BLACK)
91
+
92
+ dates_array = np.array(dates)
93
+ amd_rates = [stat['amd_failure_rate'] for stat in daily_stats]
94
+ nvidia_rates = [stat['nvidia_failure_rate'] for stat in daily_stats]
95
+
96
+ ax1.fill_between(dates_array, 0, amd_rates, color=COLORS['amd'], alpha=0.15)
97
+ ax1.fill_between(dates_array, 0, nvidia_rates, color=COLORS['nvidia'], alpha=0.15)
98
+ ax1.plot(dates_array, amd_rates, color=COLORS['amd'], linewidth=3,
99
+ label='AMD', marker='o', markersize=7, markeredgewidth=2, markeredgecolor=BLACK)
100
+ ax1.plot(dates_array, nvidia_rates, color=COLORS['nvidia'], linewidth=3,
101
+ label='NVIDIA', marker='s', markersize=7, markeredgewidth=2, markeredgecolor=BLACK)
102
+
103
+ if len(amd_rates) > 2:
104
+ z_amd = np.polyfit(range(len(amd_rates)), amd_rates, 1)
105
+ p_amd = np.poly1d(z_amd)
106
+ ax1.plot(dates_array, p_amd(range(len(amd_rates))),
107
+ color=COLORS['amd'], linestyle='--', alpha=0.5, linewidth=2)
108
+
109
+ z_nvidia = np.polyfit(range(len(nvidia_rates)), nvidia_rates, 1)
110
+ p_nvidia = np.poly1d(z_nvidia)
111
+ ax1.plot(dates_array, p_nvidia(range(len(nvidia_rates))),
112
+ color=COLORS['nvidia'], linestyle='--', alpha=0.5, linewidth=2)
113
+
114
+ ax1.set_title('Overall Failure Rates Over Time', fontsize=TITLE_FONT_SIZE,
115
+ color=TITLE_COLOR, fontfamily='monospace', fontweight='bold', pad=20)
116
+ ax1.set_ylabel('Failure Rate (%)', fontsize=LABEL_FONT_SIZE, color=LABEL_COLOR, fontfamily='monospace')
117
+ ax1.grid(True, color=GRID_COLOR, alpha=0.3, linestyle='-', linewidth=0.5)
118
+ ax1.legend(fontsize=LEGEND_FONT_SIZE, loc='upper right', frameon=False,
119
+ labelcolor=LABEL_COLOR, prop={'family': 'monospace'})
120
+ ax1.tick_params(colors=LABEL_COLOR, labelsize=LABEL_FONT_SIZE, axis='x', rotation=45)
121
+
122
+ amd_passed = [stat['amd_passed'] for stat in daily_stats]
123
+ amd_failed = [stat['amd_failed'] for stat in daily_stats]
124
+ amd_skipped = [stat['amd_skipped'] for stat in daily_stats]
125
+
126
+ ax2.stackplot(dates_array, amd_passed, amd_failed, amd_skipped,
127
+ colors=[COLORS['passed'], COLORS['failed'], COLORS['skipped']],
128
+ alpha=0.8, labels=['Passed', 'Failed', 'Skipped'])
129
+
130
+ ax2.set_title('AMD Test Results', fontsize=TITLE_FONT_SIZE - 2,
131
+ color=TITLE_COLOR, fontfamily='monospace', fontweight='bold', pad=15)
132
+ ax2.set_ylabel('Tests', fontsize=LABEL_FONT_SIZE, color=LABEL_COLOR, fontfamily='monospace')
133
+ ax2.grid(True, color=GRID_COLOR, alpha=0.3, linestyle='-', linewidth=0.5)
134
+ ax2.tick_params(colors=LABEL_COLOR, labelsize=LABEL_FONT_SIZE - 1, axis='x', rotation=45)
135
+
136
+ nvidia_passed = [stat['nvidia_passed'] for stat in daily_stats]
137
+ nvidia_failed = [stat['nvidia_failed'] for stat in daily_stats]
138
+ nvidia_skipped = [stat['nvidia_skipped'] for stat in daily_stats]
139
+
140
+ ax3.stackplot(dates_array, nvidia_passed, nvidia_failed, nvidia_skipped,
141
+ colors=[COLORS['passed'], COLORS['failed'], COLORS['skipped']],
142
+ alpha=0.8, labels=['Passed', 'Failed', 'Skipped'])
143
+
144
+ ax3.set_title('NVIDIA Test Results', fontsize=TITLE_FONT_SIZE - 2,
145
+ color=TITLE_COLOR, fontfamily='monospace', fontweight='bold', pad=15)
146
+ ax3.set_ylabel('Tests', fontsize=LABEL_FONT_SIZE, color=LABEL_COLOR, fontfamily='monospace')
147
+ ax3.set_xlabel('Date', fontsize=LABEL_FONT_SIZE, color=LABEL_COLOR, fontfamily='monospace')
148
+ ax3.grid(True, color=GRID_COLOR, alpha=0.3, linestyle='-', linewidth=0.5)
149
+ ax3.tick_params(colors=LABEL_COLOR, labelsize=LABEL_FONT_SIZE - 1, axis='x', rotation=45)
150
+
151
+ latest = daily_stats[-1]
152
+ metrics = [
153
+ ('Latest AMD Failure Rate', f"{latest['amd_failure_rate']:.1f}%", COLORS['amd']),
154
+ ('Latest NVIDIA Failure Rate', f"{latest['nvidia_failure_rate']:.1f}%", COLORS['nvidia']),
155
+ ('', '', None),
156
+ ('Total AMD Tests', str(latest['amd_passed'] + latest['amd_failed'] + latest['amd_skipped']), '#888888'),
157
+ ('Total NVIDIA Tests', str(latest['nvidia_passed'] + latest['nvidia_failed'] + latest['nvidia_skipped']), '#888888'),
158
+ ]
159
+
160
+ ax4.axis('off')
161
+ y_pos = 0.9
162
+ ax4.text(0.5, 0.95, 'SUMMARY', ha='center', va='top', fontsize=TITLE_FONT_SIZE - 2,
163
+ color=TITLE_COLOR, fontfamily='monospace', fontweight='bold',
164
+ transform=ax4.transAxes)
165
+
166
+ for label, value, color in metrics:
167
+ if label:
168
+ ax4.text(0.1, y_pos, label, ha='left', va='center', fontsize=LABEL_FONT_SIZE,
169
+ color=LABEL_COLOR, fontfamily='monospace', transform=ax4.transAxes)
170
+ ax4.text(0.9, y_pos, value, ha='right', va='center', fontsize=LABEL_FONT_SIZE + 2,
171
+ color=color or LABEL_COLOR, fontfamily='monospace', fontweight='bold',
172
+ transform=ax4.transAxes)
173
+ y_pos -= 0.15
174
+
175
+ handles = [plt.Rectangle((0,0),1,1, fc=COLORS['passed'], alpha=0.8),
176
+ plt.Rectangle((0,0),1,1, fc=COLORS['failed'], alpha=0.8),
177
+ plt.Rectangle((0,0),1,1, fc=COLORS['skipped'], alpha=0.8)]
178
+ ax4.legend(handles, ['Passed', 'Failed', 'Skipped'],
179
+ loc='lower center', fontsize=LEGEND_FONT_SIZE,
180
+ frameon=False, labelcolor=LABEL_COLOR, prop={'family': 'monospace'})
181
+
182
+ plt.close('all')
183
+ return fig
184
+
185
+
186
+ def create_model_time_series(historical_df: pd.DataFrame, model_name: str) -> plt.Figure:
187
+ if historical_df.empty or 'date' not in historical_df.columns:
188
+ fig, ax = plt.subplots(figsize=(FIGURE_WIDTH, FIGURE_HEIGHT), facecolor=BLACK)
189
+ ax.set_facecolor(BLACK)
190
+ ax.text(0.5, 0.5, f'No historical data available for {model_name}',
191
+ horizontalalignment='center', verticalalignment='center',
192
+ transform=ax.transAxes, fontsize=20, color='#888888',
193
+ fontfamily='monospace', weight='normal')
194
+ ax.axis('off')
195
+ return fig
196
+
197
+ model_data = historical_df[historical_df.index.str.lower() == model_name.lower()]
198
+
199
+ if model_data.empty:
200
+ fig, ax = plt.subplots(figsize=(FIGURE_WIDTH, FIGURE_HEIGHT), facecolor=BLACK)
201
+ ax.set_facecolor(BLACK)
202
+ ax.text(0.5, 0.5, f'No data found for model: {model_name}',
203
+ horizontalalignment='center', verticalalignment='center',
204
+ transform=ax.transAxes, fontsize=20, color='#888888',
205
+ fontfamily='monospace', weight='normal')
206
+ ax.axis('off')
207
+ return fig
208
+
209
+ model_data = model_data.copy()
210
+ model_data['date_dt'] = pd.to_datetime(model_data['date'])
211
+ model_data = model_data.sort_values('date_dt')
212
+
213
+ dates = model_data['date_dt'].values
214
+ amd_stats_list = []
215
+ nvidia_stats_list = []
216
+
217
+ for _, row in model_data.iterrows():
218
+ amd_stats, nvidia_stats = extract_model_data(row)[:2]
219
+ amd_stats_list.append(amd_stats)
220
+ nvidia_stats_list.append(nvidia_stats)
221
+
222
+ fig = plt.figure(figsize=(FIGURE_WIDTH, FIGURE_HEIGHT), facecolor=BLACK)
223
+ gs = fig.add_gridspec(2, 2, height_ratios=[1, 1], width_ratios=[3, 1],
224
+ hspace=0.3, wspace=0.2)
225
+
226
+ ax1 = fig.add_subplot(gs[0, 0])
227
+ ax2 = fig.add_subplot(gs[1, 0])
228
+ ax3 = fig.add_subplot(gs[:, 1])
229
+
230
+ for ax in [ax1, ax2, ax3]:
231
+ ax.set_facecolor(BLACK)
232
+
233
+ amd_passed = [stats['passed'] for stats in amd_stats_list]
234
+ amd_failed = [stats['failed'] for stats in amd_stats_list]
235
+ amd_skipped = [stats['skipped'] for stats in amd_stats_list]
236
+
237
+ ax1.stackplot(dates, amd_passed, amd_failed, amd_skipped,
238
+ colors=[COLORS['passed'], COLORS['failed'], COLORS['skipped']],
239
+ alpha=0.7, labels=['Passed', 'Failed', 'Skipped'])
240
+
241
+ ax1.plot(dates, amd_failed, color=COLORS['failed'], linewidth=2.5,
242
+ marker='o', markersize=7, markeredgewidth=2, markeredgecolor=BLACK,
243
+ linestyle='-', label='_nolegend_')
244
+
245
+ ax1.set_title(f'{model_name.upper()} - AMD Results', fontsize=TITLE_FONT_SIZE,
246
+ color=TITLE_COLOR, fontfamily='monospace', fontweight='bold', pad=20)
247
+ ax1.set_ylabel('Number of Tests', fontsize=LABEL_FONT_SIZE, color=LABEL_COLOR, fontfamily='monospace')
248
+ ax1.grid(True, color=GRID_COLOR, alpha=0.3, linestyle='-', linewidth=0.5)
249
+ ax1.legend(fontsize=LEGEND_FONT_SIZE, loc='upper left', frameon=False,
250
+ labelcolor=LABEL_COLOR, prop={'family': 'monospace'})
251
+ ax1.tick_params(colors=LABEL_COLOR, labelsize=LABEL_FONT_SIZE, axis='x', rotation=45)
252
+
253
+ nvidia_passed = [stats['passed'] for stats in nvidia_stats_list]
254
+ nvidia_failed = [stats['failed'] for stats in nvidia_stats_list]
255
+ nvidia_skipped = [stats['skipped'] for stats in nvidia_stats_list]
256
+
257
+ ax2.stackplot(dates, nvidia_passed, nvidia_failed, nvidia_skipped,
258
+ colors=[COLORS['passed'], COLORS['failed'], COLORS['skipped']],
259
+ alpha=0.7, labels=['Passed', 'Failed', 'Skipped'])
260
+
261
+ ax2.plot(dates, nvidia_failed, color=COLORS['failed'], linewidth=2.5,
262
+ marker='s', markersize=7, markeredgewidth=2, markeredgecolor=BLACK,
263
+ linestyle='-', label='_nolegend_')
264
+
265
+ ax2.set_title(f'{model_name.upper()} - NVIDIA Results', fontsize=TITLE_FONT_SIZE,
266
+ color=TITLE_COLOR, fontfamily='monospace', fontweight='bold', pad=20)
267
+ ax2.set_ylabel('Number of Tests', fontsize=LABEL_FONT_SIZE, color=LABEL_COLOR, fontfamily='monospace')
268
+ ax2.set_xlabel('Date', fontsize=LABEL_FONT_SIZE, color=LABEL_COLOR, fontfamily='monospace')
269
+ ax2.grid(True, color=GRID_COLOR, alpha=0.3, linestyle='-', linewidth=0.5)
270
+ ax2.tick_params(colors=LABEL_COLOR, labelsize=LABEL_FONT_SIZE, axis='x', rotation=45)
271
+
272
+ ax3.axis('off')
273
+ latest_amd = amd_stats_list[-1]
274
+ latest_nvidia = nvidia_stats_list[-1]
275
+
276
+ amd_total = latest_amd['passed'] + latest_amd['failed']
277
+ nvidia_total = latest_nvidia['passed'] + latest_nvidia['failed']
278
+ amd_fail_rate = (latest_amd['failed'] / amd_total * 100) if amd_total > 0 else 0
279
+ nvidia_fail_rate = (latest_nvidia['failed'] / nvidia_total * 100) if nvidia_total > 0 else 0
280
+
281
+ ax3.text(0.5, 0.95, 'LATEST RESULTS', ha='center', va='top',
282
+ fontsize=TITLE_FONT_SIZE - 4, color=TITLE_COLOR, fontfamily='monospace',
283
+ fontweight='bold', transform=ax3.transAxes)
284
+
285
+ y = 0.80
286
+ sections = [
287
+ ('AMD', [
288
+ ('Pass Rate', f"{(latest_amd['passed']/amd_total*100) if amd_total > 0 else 0:.1f}%", COLORS['passed']),
289
+ ('Fail Rate', f"{amd_fail_rate:.1f}%", COLORS['failed']),
290
+ ('Total', str(latest_amd['passed'] + latest_amd['failed'] + latest_amd['skipped']), '#888888'),
291
+ ]),
292
+ ('NVIDIA', [
293
+ ('Pass Rate', f"{(latest_nvidia['passed']/nvidia_total*100) if nvidia_total > 0 else 0:.1f}%", COLORS['passed']),
294
+ ('Fail Rate', f"{nvidia_fail_rate:.1f}%", COLORS['failed']),
295
+ ('Total', str(latest_nvidia['passed'] + latest_nvidia['failed'] + latest_nvidia['skipped']), '#888888'),
296
+ ])
297
+ ]
298
+
299
+ for section_name, metrics in sections:
300
+ ax3.text(0.5, y, section_name, ha='center', va='center',
301
+ fontsize=LABEL_FONT_SIZE + 2, color=TITLE_COLOR,
302
+ fontfamily='monospace', fontweight='bold', transform=ax3.transAxes)
303
+ y -= 0.08
304
+
305
+ for label, value, color in metrics:
306
+ ax3.text(0.15, y, label, ha='left', va='center',
307
+ fontsize=LABEL_FONT_SIZE - 1, color=LABEL_COLOR,
308
+ fontfamily='monospace', transform=ax3.transAxes)
309
+ ax3.text(0.85, y, value, ha='right', va='center',
310
+ fontsize=LABEL_FONT_SIZE, color=color,
311
+ fontfamily='monospace', fontweight='bold', transform=ax3.transAxes)
312
+ y -= 0.07
313
+ y -= 0.05
314
+
315
+ plt.close('all')
316
+ return fig
time_series_gradio.py ADDED
@@ -0,0 +1,556 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from datetime import datetime
4
+ from data import extract_model_data
5
+ import gradio as gr
6
+ import plotly.express as px
7
+ import plotly.graph_objects as go
8
+
9
+ COLORS = {
10
+ 'passed': '#4CAF50',
11
+ 'failed': '#E53E3E',
12
+ 'skipped': '#FFD54F',
13
+ 'error': '#8B0000',
14
+ 'amd': '#ED1C24',
15
+ 'nvidia': '#76B900'
16
+ }
17
+
18
+ def get_time_series_summary_dfs(historical_df: pd.DataFrame) -> dict:
19
+ daily_stats = []
20
+ dates = sorted(historical_df['date'].unique())
21
+ for date in dates:
22
+ date_data = historical_df[historical_df['date'] == date]
23
+ amd_passed = date_data['success_amd'].sum() if 'success_amd' in date_data.columns else 0
24
+ amd_failed = (date_data['failed_multi_no_amd'].sum() + date_data['failed_single_no_amd'].sum()) if 'failed_multi_no_amd' in date_data.columns else 0
25
+ amd_skipped = date_data['skipped_amd'].sum() if 'skipped_amd' in date_data.columns else 0
26
+ amd_total = amd_passed + amd_failed + amd_skipped
27
+ amd_failure_rate = (amd_failed / amd_total * 100) if amd_total > 0 else 0
28
+
29
+ nvidia_passed = date_data['success_nvidia'].sum() if 'success_nvidia' in date_data.columns else 0
30
+ nvidia_failed = (date_data['failed_multi_no_nvidia'].sum() + date_data['failed_single_no_nvidia'].sum()) if 'failed_multi_no_nvidia' in date_data.columns else 0
31
+ nvidia_skipped = date_data['skipped_nvidia'].sum() if 'skipped_nvidia' in date_data.columns else 0
32
+ nvidia_total = nvidia_passed + nvidia_failed + nvidia_skipped
33
+ nvidia_failure_rate = (nvidia_failed / nvidia_total * 100) if nvidia_total > 0 else 0
34
+
35
+ daily_stats.append({
36
+ 'date': date,
37
+ 'amd_failure_rate': amd_failure_rate,
38
+ 'nvidia_failure_rate': nvidia_failure_rate,
39
+ 'amd_passed': amd_passed,
40
+ 'amd_failed': amd_failed,
41
+ 'amd_skipped': amd_skipped,
42
+ 'nvidia_passed': nvidia_passed,
43
+ 'nvidia_failed': nvidia_failed,
44
+ 'nvidia_skipped': nvidia_skipped
45
+ })
46
+
47
+ failure_rate_data = []
48
+ for i, stat in enumerate(daily_stats):
49
+ amd_change = stat['amd_failure_rate'] - daily_stats[i-1]['amd_failure_rate'] if i > 0 else 0
50
+ nvidia_change = stat['nvidia_failure_rate'] - daily_stats[i-1]['nvidia_failure_rate'] if i > 0 else 0
51
+ failure_rate_data.extend([
52
+ {'date': stat['date'], 'failure_rate': stat['amd_failure_rate'], 'platform': 'AMD', 'change': amd_change},
53
+ {'date': stat['date'], 'failure_rate': stat['nvidia_failure_rate'], 'platform': 'NVIDIA', 'change': nvidia_change}
54
+ ])
55
+ failure_rate_df = pd.DataFrame(failure_rate_data)
56
+
57
+ amd_data = []
58
+ for i, stat in enumerate(daily_stats):
59
+ passed_change = stat['amd_passed'] - daily_stats[i-1]['amd_passed'] if i > 0 else 0
60
+ failed_change = stat['amd_failed'] - daily_stats[i-1]['amd_failed'] if i > 0 else 0
61
+ skipped_change = stat['amd_skipped'] - daily_stats[i-1]['amd_skipped'] if i > 0 else 0
62
+ amd_data.extend([
63
+ {'date': stat['date'], 'count': stat['amd_passed'], 'test_type': 'Passed', 'change': passed_change},
64
+ {'date': stat['date'], 'count': stat['amd_failed'], 'test_type': 'Failed', 'change': failed_change},
65
+ {'date': stat['date'], 'count': stat['amd_skipped'], 'test_type': 'Skipped', 'change': skipped_change}
66
+ ])
67
+ amd_df = pd.DataFrame(amd_data)
68
+
69
+ nvidia_data = []
70
+ for i, stat in enumerate(daily_stats):
71
+ passed_change = stat['nvidia_passed'] - daily_stats[i-1]['nvidia_passed'] if i > 0 else 0
72
+ failed_change = stat['nvidia_failed'] - daily_stats[i-1]['nvidia_failed'] if i > 0 else 0
73
+ skipped_change = stat['nvidia_skipped'] - daily_stats[i-1]['nvidia_skipped'] if i > 0 else 0
74
+ nvidia_data.extend([
75
+ {'date': stat['date'], 'count': stat['nvidia_passed'], 'test_type': 'Passed', 'change': passed_change},
76
+ {'date': stat['date'], 'count': stat['nvidia_failed'], 'test_type': 'Failed', 'change': failed_change},
77
+ {'date': stat['date'], 'count': stat['nvidia_skipped'], 'test_type': 'Skipped', 'change': skipped_change}
78
+ ])
79
+ nvidia_df = pd.DataFrame(nvidia_data)
80
+
81
+ return {
82
+ 'failure_rates_df': failure_rate_df,
83
+ 'amd_tests_df': amd_df,
84
+ 'nvidia_tests_df': nvidia_df,
85
+ }
86
+
87
+ def get_model_time_series_dfs(historical_df: pd.DataFrame, model_name: str) -> dict:
88
+ model_data = historical_df[historical_df.index.str.lower() == model_name.lower()]
89
+
90
+ if model_data.empty:
91
+ empty_df = pd.DataFrame({'date': [], 'count': [], 'test_type': [], 'change': []})
92
+ return {'amd_df': empty_df.copy(), 'nvidia_df': empty_df.copy()}
93
+
94
+ dates = sorted(model_data['date'].unique())
95
+ amd_data = []
96
+ nvidia_data = []
97
+ for i, date in enumerate(dates):
98
+ date_data = model_data[model_data['date'] == date]
99
+ row = date_data.iloc[0]
100
+
101
+ amd_passed = row.get('success_amd', 0)
102
+ amd_failed = row.get('failed_multi_no_amd', 0) + row.get('failed_single_no_amd', 0)
103
+ amd_skipped = row.get('skipped_amd', 0)
104
+ prev_row = model_data[model_data['date'] == dates[i-1]].iloc[0] if i > 0 and not model_data[model_data['date'] == dates[i-1]].empty else None
105
+ amd_passed_change = amd_passed - (prev_row.get('success_amd', 0) if prev_row is not None else 0)
106
+ amd_failed_change = amd_failed - (prev_row.get('failed_multi_no_amd', 0) + prev_row.get('failed_single_no_amd', 0) if prev_row is not None else 0)
107
+ amd_skipped_change = amd_skipped - (prev_row.get('skipped_amd', 0) if prev_row is not None else 0)
108
+ amd_data.extend([
109
+ {'date': date, 'count': amd_passed, 'test_type': 'Passed', 'change': amd_passed_change},
110
+ {'date': date, 'count': amd_failed, 'test_type': 'Failed', 'change': amd_failed_change},
111
+ {'date': date, 'count': amd_skipped, 'test_type': 'Skipped', 'change': amd_skipped_change}
112
+ ])
113
+
114
+ nvidia_passed = row.get('success_nvidia', 0)
115
+ nvidia_failed = row.get('failed_multi_no_nvidia', 0) + row.get('failed_single_no_nvidia', 0)
116
+ nvidia_skipped = row.get('skipped_nvidia', 0)
117
+ if prev_row is not None:
118
+ prev_nvidia_passed = prev_row.get('success_nvidia', 0)
119
+ prev_nvidia_failed = prev_row.get('failed_multi_no_nvidia', 0) + prev_row.get('failed_single_no_nvidia', 0)
120
+ prev_nvidia_skipped = prev_row.get('skipped_nvidia', 0)
121
+ else:
122
+ prev_nvidia_passed = prev_nvidia_failed = prev_nvidia_skipped = 0
123
+ nvidia_data.extend([
124
+ {'date': date, 'count': nvidia_passed, 'test_type': 'Passed', 'change': nvidia_passed - prev_nvidia_passed},
125
+ {'date': date, 'count': nvidia_failed, 'test_type': 'Failed', 'change': nvidia_failed - prev_nvidia_failed},
126
+ {'date': date, 'count': nvidia_skipped, 'test_type': 'Skipped', 'change': nvidia_skipped - prev_nvidia_skipped}
127
+ ])
128
+
129
+ return {'amd_df': pd.DataFrame(amd_data), 'nvidia_df': pd.DataFrame(nvidia_data)}
130
+
131
+ def create_time_series_summary_gradio(historical_df: pd.DataFrame) -> dict:
132
+ if historical_df.empty or 'date' not in historical_df.columns:
133
+ # Create empty Plotly figure
134
+ empty_fig = go.Figure()
135
+ empty_fig.update_layout(
136
+ title="No historical data available",
137
+ height=500,
138
+ font=dict(size=16, color='#CCCCCC'),
139
+ paper_bgcolor='#000000',
140
+ plot_bgcolor='#1a1a1a',
141
+ margin=dict(b=130)
142
+ )
143
+ return {
144
+ 'failure_rates': empty_fig,
145
+ 'amd_tests': empty_fig,
146
+ 'nvidia_tests': empty_fig
147
+ }
148
+
149
+ daily_stats = []
150
+ dates = sorted(historical_df['date'].unique())
151
+
152
+ for date in dates:
153
+ date_data = historical_df[historical_df['date'] == date]
154
+
155
+ # Calculate failure rates using the same logic as summary_page.py
156
+ # This includes ERROR tests in failures and excludes SKIPPED from total
157
+ total_amd_tests = 0
158
+ total_amd_failures = 0
159
+ total_nvidia_tests = 0
160
+ total_nvidia_failures = 0
161
+ amd_passed = 0
162
+ amd_failed = 0
163
+ amd_skipped = 0
164
+ nvidia_passed = 0
165
+ nvidia_failed = 0
166
+ nvidia_skipped = 0
167
+
168
+ for _, row in date_data.iterrows():
169
+ amd_stats, nvidia_stats = extract_model_data(row)[:2]
170
+
171
+ # AMD (matching summary_page.py logic: failed + error, excluding skipped)
172
+ amd_total = amd_stats['passed'] + amd_stats['failed'] + amd_stats['error']
173
+ if amd_total > 0:
174
+ total_amd_tests += amd_total
175
+ total_amd_failures += amd_stats['failed'] + amd_stats['error']
176
+
177
+ # For test counts graphs (these still use the old logic with skipped)
178
+ amd_passed += amd_stats['passed']
179
+ amd_failed += amd_stats['failed'] + amd_stats['error']
180
+ amd_skipped += amd_stats['skipped']
181
+
182
+ # NVIDIA (matching summary_page.py logic: failed + error, excluding skipped)
183
+ nvidia_total = nvidia_stats['passed'] + nvidia_stats['failed'] + nvidia_stats['error']
184
+ if nvidia_total > 0:
185
+ total_nvidia_tests += nvidia_total
186
+ total_nvidia_failures += nvidia_stats['failed'] + nvidia_stats['error']
187
+
188
+ # For test counts graphs (these still use the old logic with skipped)
189
+ nvidia_passed += nvidia_stats['passed']
190
+ nvidia_failed += nvidia_stats['failed'] + nvidia_stats['error']
191
+ nvidia_skipped += nvidia_stats['skipped']
192
+
193
+ amd_failure_rate = (total_amd_failures / total_amd_tests * 100) if total_amd_tests > 0 else 0
194
+ nvidia_failure_rate = (total_nvidia_failures / total_nvidia_tests * 100) if total_nvidia_tests > 0 else 0
195
+
196
+ daily_stats.append({
197
+ 'date': date,
198
+ 'amd_failure_rate': amd_failure_rate,
199
+ 'nvidia_failure_rate': nvidia_failure_rate,
200
+ 'amd_passed': amd_passed,
201
+ 'amd_failed': amd_failed,
202
+ 'amd_skipped': amd_skipped,
203
+ 'nvidia_passed': nvidia_passed,
204
+ 'nvidia_failed': nvidia_failed,
205
+ 'nvidia_skipped': nvidia_skipped
206
+ })
207
+
208
+ failure_rate_data = []
209
+ for i, stat in enumerate(daily_stats):
210
+ amd_change = nvidia_change = 0
211
+ if i > 0:
212
+ amd_change = stat['amd_failure_rate'] - daily_stats[i-1]['amd_failure_rate']
213
+ nvidia_change = stat['nvidia_failure_rate'] - daily_stats[i-1]['nvidia_failure_rate']
214
+
215
+ failure_rate_data.extend([
216
+ {'date': stat['date'], 'failure_rate': stat['amd_failure_rate'], 'platform': 'AMD', 'change': amd_change},
217
+ {'date': stat['date'], 'failure_rate': stat['nvidia_failure_rate'], 'platform': 'NVIDIA', 'change': nvidia_change}
218
+ ])
219
+
220
+ failure_rate_df = pd.DataFrame(failure_rate_data)
221
+
222
+ amd_data = []
223
+ for i, stat in enumerate(daily_stats):
224
+ passed_change = failed_change = skipped_change = 0
225
+ if i > 0:
226
+ passed_change = stat['amd_passed'] - daily_stats[i-1]['amd_passed']
227
+ failed_change = stat['amd_failed'] - daily_stats[i-1]['amd_failed']
228
+ skipped_change = stat['amd_skipped'] - daily_stats[i-1]['amd_skipped']
229
+
230
+ amd_data.extend([
231
+ {'date': stat['date'], 'count': stat['amd_passed'], 'test_type': 'Passed', 'change': passed_change},
232
+ {'date': stat['date'], 'count': stat['amd_failed'], 'test_type': 'Failed', 'change': failed_change},
233
+ {'date': stat['date'], 'count': stat['amd_skipped'], 'test_type': 'Skipped', 'change': skipped_change}
234
+ ])
235
+
236
+ amd_df = pd.DataFrame(amd_data)
237
+
238
+ nvidia_data = []
239
+ for i, stat in enumerate(daily_stats):
240
+ passed_change = failed_change = skipped_change = 0
241
+ if i > 0:
242
+ passed_change = stat['nvidia_passed'] - daily_stats[i-1]['nvidia_passed']
243
+ failed_change = stat['nvidia_failed'] - daily_stats[i-1]['nvidia_failed']
244
+ skipped_change = stat['nvidia_skipped'] - daily_stats[i-1]['nvidia_skipped']
245
+
246
+ nvidia_data.extend([
247
+ {'date': stat['date'], 'count': stat['nvidia_passed'], 'test_type': 'Passed', 'change': passed_change},
248
+ {'date': stat['date'], 'count': stat['nvidia_failed'], 'test_type': 'Failed', 'change': failed_change},
249
+ {'date': stat['date'], 'count': stat['nvidia_skipped'], 'test_type': 'Skipped', 'change': skipped_change}
250
+ ])
251
+
252
+ nvidia_df = pd.DataFrame(nvidia_data)
253
+
254
+ # Create Plotly figure for failure rates with alternating colors
255
+ fig_failure_rates = go.Figure()
256
+
257
+ # Add NVIDIA line (green line with white markers - Barcelona style)
258
+ nvidia_data = failure_rate_df[failure_rate_df['platform'] == 'NVIDIA']
259
+ if not nvidia_data.empty:
260
+ fig_failure_rates.add_trace(go.Scatter(
261
+ x=nvidia_data['date'],
262
+ y=nvidia_data['failure_rate'],
263
+ mode='lines+markers',
264
+ name='NVIDIA',
265
+ line=dict(color='#76B900', width=3), # Green line
266
+ marker=dict(size=12, color='#FFFFFF', line=dict(color='#76B900', width=2)), # White markers with green border
267
+ hovertemplate='<b>NVIDIA</b><br>Date: %{x}<br>Failure Rate: %{y:.2f}%<extra></extra>'
268
+ ))
269
+
270
+ # Add AMD line (red line with dark gray markers - Barcelona style)
271
+ amd_data = failure_rate_df[failure_rate_df['platform'] == 'AMD']
272
+ if not amd_data.empty:
273
+ fig_failure_rates.add_trace(go.Scatter(
274
+ x=amd_data['date'],
275
+ y=amd_data['failure_rate'],
276
+ mode='lines+markers',
277
+ name='AMD',
278
+ line=dict(color='#ED1C24', width=3), # Red line
279
+ marker=dict(size=12, color='#404040', line=dict(color='#ED1C24', width=2)), # Dark gray markers with red border
280
+ hovertemplate='<b>AMD</b><br>Date: %{x}<br>Failure Rate: %{y:.2f}%<extra></extra>'
281
+ ))
282
+
283
+ fig_failure_rates.update_layout(
284
+ title="Overall Failure Rates Over Time",
285
+ height=500,
286
+ font=dict(size=16, color='#CCCCCC'),
287
+ paper_bgcolor='#000000',
288
+ plot_bgcolor='#1a1a1a',
289
+ title_font_size=20,
290
+ legend=dict(
291
+ font=dict(size=16),
292
+ bgcolor='rgba(0,0,0,0.5)',
293
+ orientation="h",
294
+ yanchor="bottom",
295
+ y=-0.4,
296
+ xanchor="center",
297
+ x=0.5
298
+ ),
299
+ xaxis=dict(title='Date', title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
300
+ yaxis=dict(title='Failure Rate (%)', title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
301
+ hovermode='x unified',
302
+ margin=dict(b=130)
303
+ )
304
+
305
+ # Create Plotly figure for AMD tests
306
+ fig_amd = px.line(
307
+ amd_df,
308
+ x='date',
309
+ y='count',
310
+ color='test_type',
311
+ color_discrete_map={"Passed": COLORS['passed'], "Failed": COLORS['failed'], "Skipped": COLORS['skipped']},
312
+ title="AMD Test Results Over Time",
313
+ labels={'count': 'Number of Tests', 'date': 'Date', 'test_type': 'Test Type'}
314
+ )
315
+ fig_amd.update_traces(mode='lines+markers', marker=dict(size=8), line=dict(width=3))
316
+ fig_amd.update_layout(
317
+ height=500,
318
+ font=dict(size=16, color='#CCCCCC'),
319
+ paper_bgcolor='#000000',
320
+ plot_bgcolor='#1a1a1a',
321
+ title_font_size=20,
322
+ legend=dict(
323
+ font=dict(size=16),
324
+ bgcolor='rgba(0,0,0,0.5)',
325
+ orientation="h",
326
+ yanchor="bottom",
327
+ y=-0.4,
328
+ xanchor="center",
329
+ x=0.5
330
+ ),
331
+ xaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
332
+ yaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
333
+ hovermode='x unified',
334
+ margin=dict(b=130)
335
+ )
336
+
337
+ # Create Plotly figure for NVIDIA tests
338
+ fig_nvidia = px.line(
339
+ nvidia_df,
340
+ x='date',
341
+ y='count',
342
+ color='test_type',
343
+ color_discrete_map={"Passed": COLORS['passed'], "Failed": COLORS['failed'], "Skipped": COLORS['skipped']},
344
+ title="NVIDIA Test Results Over Time",
345
+ labels={'count': 'Number of Tests', 'date': 'Date', 'test_type': 'Test Type'}
346
+ )
347
+ fig_nvidia.update_traces(mode='lines+markers', marker=dict(size=8), line=dict(width=3))
348
+ fig_nvidia.update_layout(
349
+ height=500,
350
+ font=dict(size=16, color='#CCCCCC'),
351
+ paper_bgcolor='#000000',
352
+ plot_bgcolor='#1a1a1a',
353
+ title_font_size=20,
354
+ legend=dict(
355
+ font=dict(size=16),
356
+ bgcolor='rgba(0,0,0,0.5)',
357
+ orientation="h",
358
+ yanchor="bottom",
359
+ y=-0.4,
360
+ xanchor="center",
361
+ x=0.5
362
+ ),
363
+ xaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
364
+ yaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
365
+ hovermode='x unified',
366
+ margin=dict(b=130)
367
+ )
368
+
369
+ return {
370
+ 'failure_rates': fig_failure_rates,
371
+ 'amd_tests': fig_amd,
372
+ 'nvidia_tests': fig_nvidia
373
+ }
374
+
375
+
376
+ def create_model_time_series_gradio(historical_df: pd.DataFrame, model_name: str) -> dict:
377
+ if historical_df.empty or 'date' not in historical_df.columns:
378
+ # Create empty Plotly figures
379
+ empty_fig_amd = go.Figure()
380
+ empty_fig_amd.update_layout(
381
+ title=f"{model_name.upper()} - AMD Results Over Time",
382
+ height=500,
383
+ font=dict(size=16, color='#CCCCCC'),
384
+ paper_bgcolor='#000000',
385
+ plot_bgcolor='#1a1a1a',
386
+ margin=dict(b=130)
387
+ )
388
+ empty_fig_nvidia = go.Figure()
389
+ empty_fig_nvidia.update_layout(
390
+ title=f"{model_name.upper()} - NVIDIA Results Over Time",
391
+ height=500,
392
+ font=dict(size=16, color='#CCCCCC'),
393
+ paper_bgcolor='#000000',
394
+ plot_bgcolor='#1a1a1a',
395
+ margin=dict(b=130)
396
+ )
397
+ return {
398
+ 'amd_plot': empty_fig_amd,
399
+ 'nvidia_plot': empty_fig_nvidia
400
+ }
401
+
402
+ model_data = historical_df[historical_df.index.str.lower() == model_name.lower()]
403
+
404
+ if model_data.empty:
405
+ # Create empty Plotly figures
406
+ empty_fig_amd = go.Figure()
407
+ empty_fig_amd.update_layout(
408
+ title=f"{model_name.upper()} - AMD Results Over Time",
409
+ height=500,
410
+ font=dict(size=16, color='#CCCCCC'),
411
+ paper_bgcolor='#000000',
412
+ plot_bgcolor='#1a1a1a',
413
+ margin=dict(b=130)
414
+ )
415
+ empty_fig_nvidia = go.Figure()
416
+ empty_fig_nvidia.update_layout(
417
+ title=f"{model_name.upper()} - NVIDIA Results Over Time",
418
+ height=500,
419
+ font=dict(size=16, color='#CCCCCC'),
420
+ paper_bgcolor='#000000',
421
+ plot_bgcolor='#1a1a1a',
422
+ margin=dict(b=130)
423
+ )
424
+ return {
425
+ 'amd_plot': empty_fig_amd,
426
+ 'nvidia_plot': empty_fig_nvidia
427
+ }
428
+
429
+ dates = sorted(model_data['date'].unique())
430
+
431
+ amd_data = []
432
+ nvidia_data = []
433
+
434
+ for i, date in enumerate(dates):
435
+ date_data = model_data[model_data['date'] == date]
436
+
437
+ if not date_data.empty:
438
+ row = date_data.iloc[0]
439
+
440
+ amd_passed = row.get('success_amd', 0)
441
+ amd_failed = row.get('failed_multi_no_amd', 0) + row.get('failed_single_no_amd', 0)
442
+ amd_skipped = row.get('skipped_amd', 0)
443
+
444
+ passed_change = failed_change = skipped_change = 0
445
+ if i > 0:
446
+ prev_date_data = model_data[model_data['date'] == dates[i-1]]
447
+ if not prev_date_data.empty:
448
+ prev_row = prev_date_data.iloc[0]
449
+ prev_amd_passed = prev_row.get('success_amd', 0)
450
+ prev_amd_failed = prev_row.get('failed_multi_no_amd', 0) + prev_row.get('failed_single_no_amd', 0)
451
+ prev_amd_skipped = prev_row.get('skipped_amd', 0)
452
+
453
+ passed_change = amd_passed - prev_amd_passed
454
+ failed_change = amd_failed - prev_amd_failed
455
+ skipped_change = amd_skipped - prev_amd_skipped
456
+
457
+ amd_data.extend([
458
+ {'date': date, 'count': amd_passed, 'test_type': 'Passed', 'change': passed_change},
459
+ {'date': date, 'count': amd_failed, 'test_type': 'Failed', 'change': failed_change},
460
+ {'date': date, 'count': amd_skipped, 'test_type': 'Skipped', 'change': skipped_change}
461
+ ])
462
+
463
+ nvidia_passed = row.get('success_nvidia', 0)
464
+ nvidia_failed = row.get('failed_multi_no_nvidia', 0) + row.get('failed_single_no_nvidia', 0)
465
+ nvidia_skipped = row.get('skipped_nvidia', 0)
466
+
467
+ nvidia_passed_change = nvidia_failed_change = nvidia_skipped_change = 0
468
+ if i > 0:
469
+ prev_date_data = model_data[model_data['date'] == dates[i-1]]
470
+ if not prev_date_data.empty:
471
+ prev_row = prev_date_data.iloc[0]
472
+ prev_nvidia_passed = prev_row.get('success_nvidia', 0)
473
+ prev_nvidia_failed = prev_row.get('failed_multi_no_nvidia', 0) + prev_row.get('failed_single_no_nvidia', 0)
474
+ prev_nvidia_skipped = prev_row.get('skipped_nvidia', 0)
475
+
476
+ nvidia_passed_change = nvidia_passed - prev_nvidia_passed
477
+ nvidia_failed_change = nvidia_failed - prev_nvidia_failed
478
+ nvidia_skipped_change = nvidia_skipped - prev_nvidia_skipped
479
+
480
+ nvidia_data.extend([
481
+ {'date': date, 'count': nvidia_passed, 'test_type': 'Passed', 'change': nvidia_passed_change},
482
+ {'date': date, 'count': nvidia_failed, 'test_type': 'Failed', 'change': nvidia_failed_change},
483
+ {'date': date, 'count': nvidia_skipped, 'test_type': 'Skipped', 'change': nvidia_skipped_change}
484
+ ])
485
+
486
+ amd_df = pd.DataFrame(amd_data)
487
+ nvidia_df = pd.DataFrame(nvidia_data)
488
+
489
+ # Create Plotly figure for AMD
490
+ fig_amd = px.line(
491
+ amd_df,
492
+ x='date',
493
+ y='count',
494
+ color='test_type',
495
+ color_discrete_map={"Passed": COLORS['passed'], "Failed": COLORS['failed'], "Skipped": COLORS['skipped']},
496
+ title=f"{model_name.upper()} - AMD Results Over Time",
497
+ labels={'count': 'Number of Tests', 'date': 'Date', 'test_type': 'Test Type'}
498
+ )
499
+ fig_amd.update_traces(mode='lines+markers', marker=dict(size=8), line=dict(width=3))
500
+ fig_amd.update_layout(
501
+ height=500,
502
+ font=dict(size=16, color='#CCCCCC'),
503
+ paper_bgcolor='#000000',
504
+ plot_bgcolor='#1a1a1a',
505
+ title_font_size=20,
506
+ legend=dict(
507
+ font=dict(size=16),
508
+ bgcolor='rgba(0,0,0,0.5)',
509
+ orientation="h",
510
+ yanchor="bottom",
511
+ y=-0.4,
512
+ xanchor="center",
513
+ x=0.5
514
+ ),
515
+ xaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
516
+ yaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
517
+ hovermode='x unified',
518
+ margin=dict(b=130)
519
+ )
520
+
521
+ # Create Plotly figure for NVIDIA
522
+ fig_nvidia = px.line(
523
+ nvidia_df,
524
+ x='date',
525
+ y='count',
526
+ color='test_type',
527
+ color_discrete_map={"Passed": COLORS['passed'], "Failed": COLORS['failed'], "Skipped": COLORS['skipped']},
528
+ title=f"{model_name.upper()} - NVIDIA Results Over Time",
529
+ labels={'count': 'Number of Tests', 'date': 'Date', 'test_type': 'Test Type'}
530
+ )
531
+ fig_nvidia.update_traces(mode='lines+markers', marker=dict(size=8), line=dict(width=3))
532
+ fig_nvidia.update_layout(
533
+ height=500,
534
+ font=dict(size=16, color='#CCCCCC'),
535
+ paper_bgcolor='#000000',
536
+ plot_bgcolor='#1a1a1a',
537
+ title_font_size=20,
538
+ legend=dict(
539
+ font=dict(size=16),
540
+ bgcolor='rgba(0,0,0,0.5)',
541
+ orientation="h",
542
+ yanchor="bottom",
543
+ y=-0.4,
544
+ xanchor="center",
545
+ x=0.5
546
+ ),
547
+ xaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
548
+ yaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
549
+ hovermode='x unified',
550
+ margin=dict(b=130)
551
+ )
552
+
553
+ return {
554
+ 'amd_plot': fig_amd,
555
+ 'nvidia_plot': fig_nvidia
556
+ }