badaoui HF Staff commited on
Commit
c3ed9cd
·
1 Parent(s): 646bbcb

fix first seen date feat

Browse files
Files changed (1) hide show
  1. data.py +33 -8
data.py CHANGED
@@ -68,12 +68,21 @@ def generate_fake_dates(num_days: int = 7) -> List[str]:
68
 
69
  def parse_json_field(value) -> dict:
70
  """Safely parse a JSON field that might be a string or dict."""
 
 
71
  if isinstance(value, str):
72
  try:
73
  return json.loads(value)
74
  except:
75
  return {}
76
- return value if isinstance(value, dict) else {}
 
 
 
 
 
 
 
77
 
78
  def extract_date_from_path(path: str, pattern: str) -> Optional[str]:
79
  """Extract date from file path using regex pattern."""
@@ -368,21 +377,37 @@ def get_fake_historical_data(start_date: str, end_date: str) -> pd.DataFrame:
368
 
369
  def find_failure_first_seen(historical_df: pd.DataFrame, model_name: str, test_name: str, device: str, gpu_type: str) -> Optional[str]:
370
  """Find the first date when a specific test failure appeared in historical data."""
371
- if historical_df.empty:
372
  return None
373
 
374
  try:
375
- model_data = historical_df[historical_df.index == model_name.lower()].copy()
 
 
376
  if model_data.empty:
377
  return None
378
 
 
 
 
 
379
  # Check each date (oldest first) for this failure
380
  for _, row in model_data.sort_values('date').iterrows():
381
- failures = parse_json_field(row.get(f'failures_{device}'))
382
- if gpu_type in failures:
383
- for test in failures[gpu_type]:
384
- if test.get('line', '') == test_name:
385
- return row.get('date')
 
 
 
 
 
 
 
 
 
 
386
  return None
387
 
388
  except Exception as e:
 
68
 
69
  def parse_json_field(value) -> dict:
70
  """Safely parse a JSON field that might be a string or dict."""
71
+ if value is None or pd.isna(value):
72
+ return {}
73
  if isinstance(value, str):
74
  try:
75
  return json.loads(value)
76
  except:
77
  return {}
78
+ # Handle dict-like objects (including pandas Series/dict)
79
+ if isinstance(value, dict):
80
+ return value
81
+ # Try to convert to dict if possible
82
+ try:
83
+ return dict(value) if hasattr(value, '__iter__') else {}
84
+ except:
85
+ return {}
86
 
87
  def extract_date_from_path(path: str, pattern: str) -> Optional[str]:
88
  """Extract date from file path using regex pattern."""
 
377
 
378
  def find_failure_first_seen(historical_df: pd.DataFrame, model_name: str, test_name: str, device: str, gpu_type: str) -> Optional[str]:
379
  """Find the first date when a specific test failure appeared in historical data."""
380
+ if historical_df is None or historical_df.empty:
381
  return None
382
 
383
  try:
384
+ model_name_lower = model_name.lower()
385
+ # Filter by model name (case-insensitive)
386
+ model_data = historical_df[historical_df.index.str.lower() == model_name_lower].copy()
387
  if model_data.empty:
388
  return None
389
 
390
+ # Ensure we have a 'date' column
391
+ if 'date' not in model_data.columns:
392
+ return None
393
+
394
  # Check each date (oldest first) for this failure
395
  for _, row in model_data.sort_values('date').iterrows():
396
+ failures_raw = row.get(f'failures_{device}')
397
+ if failures_raw is None or pd.isna(failures_raw):
398
+ continue
399
+
400
+ # Parse failures (could be dict, string, or already parsed)
401
+ failures = parse_json_field(failures_raw)
402
+ if not isinstance(failures, dict) or gpu_type not in failures:
403
+ continue
404
+
405
+ # Check each test in this gpu_type
406
+ for test in failures.get(gpu_type, []):
407
+ if isinstance(test, dict) and test.get('line', '') == test_name:
408
+ date_value = row.get('date')
409
+ return date_value if date_value else None
410
+
411
  return None
412
 
413
  except Exception as e: