manueldeprada HF Staff commited on
Commit
0c04c7f
·
verified ·
1 Parent(s): be26939

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +3 -3
  2. data.py +36 -27
app.py CHANGED
@@ -395,11 +395,11 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css()) as demo:
395
  try:
396
  Ci_results.load_historical_data(start_date, end_date)
397
  if Ci_results.historical_df.empty:
398
- return gr.update(), "No historical data found for the selected date range."
399
 
400
  # Create time-series summary plot
401
  time_series_plot = create_time_series_summary(Ci_results.historical_df)
402
- return time_series_plot, f"Loaded historical data from {start_date} to {end_date}"
403
  except Exception as e:
404
  logger.error(f"Error loading historical data: {e}")
405
  return gr.update(), f"Error loading historical data: {str(e)}"
@@ -414,7 +414,7 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css()) as demo:
414
  def show_time_series_model(selected_model):
415
  """Show time-series view for a specific model."""
416
  if Ci_results.historical_df.empty:
417
- return gr.update(), "No historical data loaded. Please load historical data first."
418
 
419
  try:
420
  time_series_plot = create_model_time_series(Ci_results.historical_df, selected_model)
 
395
  try:
396
  Ci_results.load_historical_data(start_date, end_date)
397
  if Ci_results.historical_df.empty:
398
+ return gr.update(), f"No historical data found for the selected date range ({start_date} to {end_date}). Please try a different date range."
399
 
400
  # Create time-series summary plot
401
  time_series_plot = create_time_series_summary(Ci_results.historical_df)
402
+ return time_series_plot, f"Loaded historical data from {start_date} to {end_date} ({len(Ci_results.historical_df)} records)"
403
  except Exception as e:
404
  logger.error(f"Error loading historical data: {e}")
405
  return gr.update(), f"Error loading historical data: {str(e)}"
 
414
  def show_time_series_model(selected_model):
415
  """Show time-series view for a specific model."""
416
  if Ci_results.historical_df.empty:
417
+ return gr.update()
418
 
419
  try:
420
  time_series_plot = create_model_time_series(Ci_results.historical_df, selected_model)
data.py CHANGED
@@ -102,57 +102,57 @@ def read_one_dataframe(json_path: str, device_label: str) -> tuple[pd.DataFrame,
102
  def get_available_dates() -> List[str]:
103
  """Get list of available dates from both AMD and NVIDIA datasets."""
104
  try:
105
- # Get AMD dates
106
- amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
107
  files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
 
108
 
109
- # Get NVIDIA dates
110
  nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
111
  files_nvidia = sorted(fs.glob(nvidia_src, refresh=True), reverse=True)
 
112
 
113
  # Extract dates from file paths
114
  amd_dates = set()
115
  for file_path in files_amd:
116
- pattern = r'transformers_daily_ci(.*?)/(\d{4}-\d{2}-\d{2})'
 
117
  match = re.search(pattern, file_path)
118
  if match:
119
- amd_dates.add(match.group(2))
120
 
121
  nvidia_dates = set()
122
  for file_path in files_nvidia:
123
- pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})'
 
124
  match = re.search(pattern, file_path)
125
  if match:
126
  nvidia_dates.add(match.group(1))
127
 
 
 
 
128
  # Return intersection of both datasets (dates where both have data)
129
  common_dates = sorted(amd_dates.intersection(nvidia_dates), reverse=True)
 
130
  return common_dates[:30] # Limit to last 30 days for performance
131
 
132
  except Exception as e:
133
  logger.error(f"Error getting available dates: {e}")
134
- # Return sample dates for fallback
135
- today = datetime.now()
136
- return [(today - timedelta(days=i)).strftime("%Y-%m-%d") for i in range(7)]
137
 
138
 
139
  def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
140
  """Get data for a specific date."""
141
  try:
142
- # Construct paths for specific date
143
- amd_src = f"hf://datasets/optimum-amd/transformers_daily_ci/**/runs/{target_date}/**/ci_results_run_models_gpu/model_results.json"
144
  nvidia_src = f"hf://datasets/hf-internal-testing/transformers_daily_ci/{target_date}/ci_results_run_models_gpu/model_results.json"
145
 
146
- # Find matching files
147
- files_amd = fs.glob(amd_src, refresh=True)
148
- files_nvidia = fs.glob(nvidia_src, refresh=True)
149
-
150
- if not files_amd or not files_nvidia:
151
- raise FileNotFoundError(f"No data found for date {target_date}")
152
-
153
- # Use the first matching file for each
154
- df_amd, _ = read_one_dataframe(f"hf://{files_amd[0]}", "amd")
155
- df_nvidia, _ = read_one_dataframe(f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/main/{target_date}/ci_results_run_models_gpu/model_results.json", "nvidia")
156
 
157
  # Join both dataframes
158
  joined = df_amd.join(df_nvidia, rsuffix="_nvidia", lsuffix="_amd", how="outer")
@@ -167,8 +167,8 @@ def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
167
 
168
  except Exception as e:
169
  logger.error(f"Error getting data for date {target_date}: {e}")
170
- # Fallback to sample data
171
- return get_sample_data()
172
 
173
 
174
  def get_historical_data(start_date: str, end_date: str) -> pd.DataFrame:
@@ -184,16 +184,21 @@ def get_historical_data(start_date: str, end_date: str) -> pd.DataFrame:
184
  date_str = current_dt.strftime("%Y-%m-%d")
185
  try:
186
  df, _ = get_data_for_date(date_str)
187
- df['date'] = date_str
188
- historical_data.append(df)
189
- logger.info(f"Loaded data for {date_str}")
 
 
 
 
190
  except Exception as e:
191
  logger.warning(f"Could not load data for {date_str}: {e}")
192
 
193
  current_dt += timedelta(days=1)
194
 
195
  if not historical_data:
196
- raise ValueError("No historical data found for the specified range")
 
197
 
198
  # Combine all dataframes
199
  combined_df = pd.concat(historical_data, ignore_index=False)
@@ -317,6 +322,10 @@ class CIResults:
317
  try:
318
  self.available_dates = get_available_dates()
319
  logger.info(f"Available dates: {len(self.available_dates)} dates")
 
 
 
 
320
  except Exception as e:
321
  logger.error(f"Error loading available dates: {e}")
322
  self.available_dates = []
 
102
  def get_available_dates() -> List[str]:
103
  """Get list of available dates from both AMD and NVIDIA datasets."""
104
  try:
105
+ # Get AMD dates - the path structure is: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
106
+ amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
107
  files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
108
+ logger.info(f"Found {len(files_amd)} AMD files")
109
 
110
+ # Get NVIDIA dates - same structure
111
  nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
112
  files_nvidia = sorted(fs.glob(nvidia_src, refresh=True), reverse=True)
113
+ logger.info(f"Found {len(files_nvidia)} NVIDIA files")
114
 
115
  # Extract dates from file paths
116
  amd_dates = set()
117
  for file_path in files_amd:
118
+ # Pattern to match the date in the path: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
119
+ pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/ci_results_run_models_gpu/model_results\.json'
120
  match = re.search(pattern, file_path)
121
  if match:
122
+ amd_dates.add(match.group(1))
123
 
124
  nvidia_dates = set()
125
  for file_path in files_nvidia:
126
+ # Same pattern for NVIDIA
127
+ pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/ci_results_run_models_gpu/model_results\.json'
128
  match = re.search(pattern, file_path)
129
  if match:
130
  nvidia_dates.add(match.group(1))
131
 
132
+ logger.info(f"AMD dates: {sorted(amd_dates, reverse=True)[:5]}...") # Show first 5
133
+ logger.info(f"NVIDIA dates: {sorted(nvidia_dates, reverse=True)[:5]}...") # Show first 5
134
+
135
  # Return intersection of both datasets (dates where both have data)
136
  common_dates = sorted(amd_dates.intersection(nvidia_dates), reverse=True)
137
+ logger.info(f"Common dates: {len(common_dates)} dates where both AMD and NVIDIA have data")
138
  return common_dates[:30] # Limit to last 30 days for performance
139
 
140
  except Exception as e:
141
  logger.error(f"Error getting available dates: {e}")
142
+ # Return empty list if no data available
143
+ return []
 
144
 
145
 
146
  def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
147
  """Get data for a specific date."""
148
  try:
149
+ # Construct paths for specific date - correct structure: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
150
+ amd_src = f"hf://datasets/optimum-amd/transformers_daily_ci/{target_date}/ci_results_run_models_gpu/model_results.json"
151
  nvidia_src = f"hf://datasets/hf-internal-testing/transformers_daily_ci/{target_date}/ci_results_run_models_gpu/model_results.json"
152
 
153
+ # Read dataframes directly
154
+ df_amd, _ = read_one_dataframe(amd_src, "amd")
155
+ df_nvidia, _ = read_one_dataframe(nvidia_src, "nvidia")
 
 
 
 
 
 
 
156
 
157
  # Join both dataframes
158
  joined = df_amd.join(df_nvidia, rsuffix="_nvidia", lsuffix="_amd", how="outer")
 
167
 
168
  except Exception as e:
169
  logger.error(f"Error getting data for date {target_date}: {e}")
170
+ # Return empty dataframe instead of sample data for historical functionality
171
+ return pd.DataFrame(), target_date
172
 
173
 
174
  def get_historical_data(start_date: str, end_date: str) -> pd.DataFrame:
 
184
  date_str = current_dt.strftime("%Y-%m-%d")
185
  try:
186
  df, _ = get_data_for_date(date_str)
187
+ # Only add non-empty dataframes
188
+ if not df.empty:
189
+ df['date'] = date_str
190
+ historical_data.append(df)
191
+ logger.info(f"Loaded data for {date_str}")
192
+ else:
193
+ logger.warning(f"No data available for {date_str}")
194
  except Exception as e:
195
  logger.warning(f"Could not load data for {date_str}: {e}")
196
 
197
  current_dt += timedelta(days=1)
198
 
199
  if not historical_data:
200
+ logger.warning("No historical data found for the specified range")
201
+ return pd.DataFrame()
202
 
203
  # Combine all dataframes
204
  combined_df = pd.concat(historical_data, ignore_index=False)
 
322
  try:
323
  self.available_dates = get_available_dates()
324
  logger.info(f"Available dates: {len(self.available_dates)} dates")
325
+ if self.available_dates:
326
+ logger.info(f"Date range: {self.available_dates[-1]} to {self.available_dates[0]}")
327
+ else:
328
+ logger.warning("No available dates found")
329
  except Exception as e:
330
  logger.error(f"Error loading available dates: {e}")
331
  self.available_dates = []