Upload folder using huggingface_hub
Browse files
app.py
CHANGED
|
@@ -395,11 +395,11 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css()) as demo:
|
|
| 395 |
try:
|
| 396 |
Ci_results.load_historical_data(start_date, end_date)
|
| 397 |
if Ci_results.historical_df.empty:
|
| 398 |
-
return gr.update(), "No historical data found for the selected date range."
|
| 399 |
|
| 400 |
# Create time-series summary plot
|
| 401 |
time_series_plot = create_time_series_summary(Ci_results.historical_df)
|
| 402 |
-
return time_series_plot, f"Loaded historical data from {start_date} to {end_date}"
|
| 403 |
except Exception as e:
|
| 404 |
logger.error(f"Error loading historical data: {e}")
|
| 405 |
return gr.update(), f"Error loading historical data: {str(e)}"
|
|
@@ -414,7 +414,7 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css()) as demo:
|
|
| 414 |
def show_time_series_model(selected_model):
|
| 415 |
"""Show time-series view for a specific model."""
|
| 416 |
if Ci_results.historical_df.empty:
|
| 417 |
-
return gr.update()
|
| 418 |
|
| 419 |
try:
|
| 420 |
time_series_plot = create_model_time_series(Ci_results.historical_df, selected_model)
|
|
|
|
| 395 |
try:
|
| 396 |
Ci_results.load_historical_data(start_date, end_date)
|
| 397 |
if Ci_results.historical_df.empty:
|
| 398 |
+
return gr.update(), f"No historical data found for the selected date range ({start_date} to {end_date}). Please try a different date range."
|
| 399 |
|
| 400 |
# Create time-series summary plot
|
| 401 |
time_series_plot = create_time_series_summary(Ci_results.historical_df)
|
| 402 |
+
return time_series_plot, f"Loaded historical data from {start_date} to {end_date} ({len(Ci_results.historical_df)} records)"
|
| 403 |
except Exception as e:
|
| 404 |
logger.error(f"Error loading historical data: {e}")
|
| 405 |
return gr.update(), f"Error loading historical data: {str(e)}"
|
|
|
|
| 414 |
def show_time_series_model(selected_model):
|
| 415 |
"""Show time-series view for a specific model."""
|
| 416 |
if Ci_results.historical_df.empty:
|
| 417 |
+
return gr.update()
|
| 418 |
|
| 419 |
try:
|
| 420 |
time_series_plot = create_model_time_series(Ci_results.historical_df, selected_model)
|
data.py
CHANGED
|
@@ -102,57 +102,57 @@ def read_one_dataframe(json_path: str, device_label: str) -> tuple[pd.DataFrame,
|
|
| 102 |
def get_available_dates() -> List[str]:
|
| 103 |
"""Get list of available dates from both AMD and NVIDIA datasets."""
|
| 104 |
try:
|
| 105 |
-
# Get AMD dates
|
| 106 |
-
amd_src = "hf://datasets/optimum-amd/transformers_daily_ci
|
| 107 |
files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
|
|
|
|
| 108 |
|
| 109 |
-
# Get NVIDIA dates
|
| 110 |
nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
|
| 111 |
files_nvidia = sorted(fs.glob(nvidia_src, refresh=True), reverse=True)
|
|
|
|
| 112 |
|
| 113 |
# Extract dates from file paths
|
| 114 |
amd_dates = set()
|
| 115 |
for file_path in files_amd:
|
| 116 |
-
|
|
|
|
| 117 |
match = re.search(pattern, file_path)
|
| 118 |
if match:
|
| 119 |
-
amd_dates.add(match.group(
|
| 120 |
|
| 121 |
nvidia_dates = set()
|
| 122 |
for file_path in files_nvidia:
|
| 123 |
-
pattern
|
|
|
|
| 124 |
match = re.search(pattern, file_path)
|
| 125 |
if match:
|
| 126 |
nvidia_dates.add(match.group(1))
|
| 127 |
|
|
|
|
|
|
|
|
|
|
| 128 |
# Return intersection of both datasets (dates where both have data)
|
| 129 |
common_dates = sorted(amd_dates.intersection(nvidia_dates), reverse=True)
|
|
|
|
| 130 |
return common_dates[:30] # Limit to last 30 days for performance
|
| 131 |
|
| 132 |
except Exception as e:
|
| 133 |
logger.error(f"Error getting available dates: {e}")
|
| 134 |
-
# Return
|
| 135 |
-
|
| 136 |
-
return [(today - timedelta(days=i)).strftime("%Y-%m-%d") for i in range(7)]
|
| 137 |
|
| 138 |
|
| 139 |
def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
|
| 140 |
"""Get data for a specific date."""
|
| 141 |
try:
|
| 142 |
-
# Construct paths for specific date
|
| 143 |
-
amd_src = f"hf://datasets/optimum-amd/transformers_daily_ci
|
| 144 |
nvidia_src = f"hf://datasets/hf-internal-testing/transformers_daily_ci/{target_date}/ci_results_run_models_gpu/model_results.json"
|
| 145 |
|
| 146 |
-
#
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
if not files_amd or not files_nvidia:
|
| 151 |
-
raise FileNotFoundError(f"No data found for date {target_date}")
|
| 152 |
-
|
| 153 |
-
# Use the first matching file for each
|
| 154 |
-
df_amd, _ = read_one_dataframe(f"hf://{files_amd[0]}", "amd")
|
| 155 |
-
df_nvidia, _ = read_one_dataframe(f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/main/{target_date}/ci_results_run_models_gpu/model_results.json", "nvidia")
|
| 156 |
|
| 157 |
# Join both dataframes
|
| 158 |
joined = df_amd.join(df_nvidia, rsuffix="_nvidia", lsuffix="_amd", how="outer")
|
|
@@ -167,8 +167,8 @@ def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
|
|
| 167 |
|
| 168 |
except Exception as e:
|
| 169 |
logger.error(f"Error getting data for date {target_date}: {e}")
|
| 170 |
-
#
|
| 171 |
-
return
|
| 172 |
|
| 173 |
|
| 174 |
def get_historical_data(start_date: str, end_date: str) -> pd.DataFrame:
|
|
@@ -184,16 +184,21 @@ def get_historical_data(start_date: str, end_date: str) -> pd.DataFrame:
|
|
| 184 |
date_str = current_dt.strftime("%Y-%m-%d")
|
| 185 |
try:
|
| 186 |
df, _ = get_data_for_date(date_str)
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
except Exception as e:
|
| 191 |
logger.warning(f"Could not load data for {date_str}: {e}")
|
| 192 |
|
| 193 |
current_dt += timedelta(days=1)
|
| 194 |
|
| 195 |
if not historical_data:
|
| 196 |
-
|
|
|
|
| 197 |
|
| 198 |
# Combine all dataframes
|
| 199 |
combined_df = pd.concat(historical_data, ignore_index=False)
|
|
@@ -317,6 +322,10 @@ class CIResults:
|
|
| 317 |
try:
|
| 318 |
self.available_dates = get_available_dates()
|
| 319 |
logger.info(f"Available dates: {len(self.available_dates)} dates")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 320 |
except Exception as e:
|
| 321 |
logger.error(f"Error loading available dates: {e}")
|
| 322 |
self.available_dates = []
|
|
|
|
| 102 |
def get_available_dates() -> List[str]:
|
| 103 |
"""Get list of available dates from both AMD and NVIDIA datasets."""
|
| 104 |
try:
|
| 105 |
+
# Get AMD dates - the path structure is: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
|
| 106 |
+
amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
|
| 107 |
files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
|
| 108 |
+
logger.info(f"Found {len(files_amd)} AMD files")
|
| 109 |
|
| 110 |
+
# Get NVIDIA dates - same structure
|
| 111 |
nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
|
| 112 |
files_nvidia = sorted(fs.glob(nvidia_src, refresh=True), reverse=True)
|
| 113 |
+
logger.info(f"Found {len(files_nvidia)} NVIDIA files")
|
| 114 |
|
| 115 |
# Extract dates from file paths
|
| 116 |
amd_dates = set()
|
| 117 |
for file_path in files_amd:
|
| 118 |
+
# Pattern to match the date in the path: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
|
| 119 |
+
pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/ci_results_run_models_gpu/model_results\.json'
|
| 120 |
match = re.search(pattern, file_path)
|
| 121 |
if match:
|
| 122 |
+
amd_dates.add(match.group(1))
|
| 123 |
|
| 124 |
nvidia_dates = set()
|
| 125 |
for file_path in files_nvidia:
|
| 126 |
+
# Same pattern for NVIDIA
|
| 127 |
+
pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/ci_results_run_models_gpu/model_results\.json'
|
| 128 |
match = re.search(pattern, file_path)
|
| 129 |
if match:
|
| 130 |
nvidia_dates.add(match.group(1))
|
| 131 |
|
| 132 |
+
logger.info(f"AMD dates: {sorted(amd_dates, reverse=True)[:5]}...") # Show first 5
|
| 133 |
+
logger.info(f"NVIDIA dates: {sorted(nvidia_dates, reverse=True)[:5]}...") # Show first 5
|
| 134 |
+
|
| 135 |
# Return intersection of both datasets (dates where both have data)
|
| 136 |
common_dates = sorted(amd_dates.intersection(nvidia_dates), reverse=True)
|
| 137 |
+
logger.info(f"Common dates: {len(common_dates)} dates where both AMD and NVIDIA have data")
|
| 138 |
return common_dates[:30] # Limit to last 30 days for performance
|
| 139 |
|
| 140 |
except Exception as e:
|
| 141 |
logger.error(f"Error getting available dates: {e}")
|
| 142 |
+
# Return empty list if no data available
|
| 143 |
+
return []
|
|
|
|
| 144 |
|
| 145 |
|
| 146 |
def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
|
| 147 |
"""Get data for a specific date."""
|
| 148 |
try:
|
| 149 |
+
# Construct paths for specific date - correct structure: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
|
| 150 |
+
amd_src = f"hf://datasets/optimum-amd/transformers_daily_ci/{target_date}/ci_results_run_models_gpu/model_results.json"
|
| 151 |
nvidia_src = f"hf://datasets/hf-internal-testing/transformers_daily_ci/{target_date}/ci_results_run_models_gpu/model_results.json"
|
| 152 |
|
| 153 |
+
# Read dataframes directly
|
| 154 |
+
df_amd, _ = read_one_dataframe(amd_src, "amd")
|
| 155 |
+
df_nvidia, _ = read_one_dataframe(nvidia_src, "nvidia")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
# Join both dataframes
|
| 158 |
joined = df_amd.join(df_nvidia, rsuffix="_nvidia", lsuffix="_amd", how="outer")
|
|
|
|
| 167 |
|
| 168 |
except Exception as e:
|
| 169 |
logger.error(f"Error getting data for date {target_date}: {e}")
|
| 170 |
+
# Return empty dataframe instead of sample data for historical functionality
|
| 171 |
+
return pd.DataFrame(), target_date
|
| 172 |
|
| 173 |
|
| 174 |
def get_historical_data(start_date: str, end_date: str) -> pd.DataFrame:
|
|
|
|
| 184 |
date_str = current_dt.strftime("%Y-%m-%d")
|
| 185 |
try:
|
| 186 |
df, _ = get_data_for_date(date_str)
|
| 187 |
+
# Only add non-empty dataframes
|
| 188 |
+
if not df.empty:
|
| 189 |
+
df['date'] = date_str
|
| 190 |
+
historical_data.append(df)
|
| 191 |
+
logger.info(f"Loaded data for {date_str}")
|
| 192 |
+
else:
|
| 193 |
+
logger.warning(f"No data available for {date_str}")
|
| 194 |
except Exception as e:
|
| 195 |
logger.warning(f"Could not load data for {date_str}: {e}")
|
| 196 |
|
| 197 |
current_dt += timedelta(days=1)
|
| 198 |
|
| 199 |
if not historical_data:
|
| 200 |
+
logger.warning("No historical data found for the specified range")
|
| 201 |
+
return pd.DataFrame()
|
| 202 |
|
| 203 |
# Combine all dataframes
|
| 204 |
combined_df = pd.concat(historical_data, ignore_index=False)
|
|
|
|
| 322 |
try:
|
| 323 |
self.available_dates = get_available_dates()
|
| 324 |
logger.info(f"Available dates: {len(self.available_dates)} dates")
|
| 325 |
+
if self.available_dates:
|
| 326 |
+
logger.info(f"Date range: {self.available_dates[-1]} to {self.available_dates[0]}")
|
| 327 |
+
else:
|
| 328 |
+
logger.warning("No available dates found")
|
| 329 |
except Exception as e:
|
| 330 |
logger.error(f"Error loading available dates: {e}")
|
| 331 |
self.available_dates = []
|