Spaces:

wuhp
/

test-detr

Sleeping

App Files Files Community

wuhp commited on Sep 18

Commit

0257e16

verified ·

1 Parent(s): c54a7a8

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -5

app.py CHANGED Viewed

@@ -133,6 +133,38 @@ def get_latest_version(api_key, workspace, project):
         return None
 def download_dataset(api_key, workspace, project, version):
     """Downloads a single dataset from Roboflow (yolov8 format works fine for RT-DETR)."""
     try:
@@ -145,7 +177,15 @@ def download_dataset(api_key, workspace, project, version):
         with open(data_yaml_path, 'r') as f:
             data_yaml = yaml.safe_load(f)
-        class_names = data_yaml.get('names', [])
         splits = [s for s in ['train', 'valid', 'test']
                   if os.path.exists(os.path.join(dataset.location, s))]
@@ -358,7 +398,8 @@ def load_datasets_handler(api_key, url_file, progress=gr.Progress()):
         msg = "No datasets were loaded successfully.\n" + "\n".join([f"- {u}: {why}" for u, why in failures[:10]])
         raise gr.Error(msg)
-    all_names = sorted(list(set(n for _, names, _, _ in dataset_info for n in names)))
     class_map = {name: name for name in all_names}
     # Initial preview uses "keep all" mapping
@@ -448,9 +489,6 @@ def finalize_handler(dataset_info, class_df, progress=gr.Progress()):
         # Sum limits for final_name over any merged originals
         class_limits[final_name] = class_limits.get(final_name, 0) + int(row["Max Images"])
-    # Any original not present in mapping will map to itself (keep behavior)
-    # BUT we do not want to include classes with 0 limit in the final dataset
-    # finalize_merged_dataset uses the limits dict to decide active classes.
     status, path = finalize_merged_dataset(dataset_info, class_mapping, class_limits, progress)
     return status, path

         return None
+# --- NEW: normalize class names from data.yaml ---
+def _extract_class_names(data_yaml):
+    """
+    Return a list[str] of class names in index order.
+    Handles:
+      - list (possibly containing non-str types)
+      - dict with numeric keys (e.g., {0: 'cat', 1: 'dog'})
+      - fallback to ['class_0', ..., f'class_{nc-1}'] if names missing
+    """
+    names = data_yaml.get('names', None)
+    if isinstance(names, dict):
+        def _k(x):
+            try:
+                return int(x)
+            except Exception:
+                return str(x)
+        ordered_keys = sorted(names.keys(), key=_k)
+        names_list = [names[k] for k in ordered_keys]
+    elif isinstance(names, list):
+        names_list = names
+    else:
+        nc = data_yaml.get('nc', 0)
+        try:
+            nc = int(nc)
+        except Exception:
+            nc = 0
+        names_list = [f"class_{i}" for i in range(nc)]
+    return [str(x) for x in names_list]
 def download_dataset(api_key, workspace, project, version):
     """Downloads a single dataset from Roboflow (yolov8 format works fine for RT-DETR)."""
     try:
         with open(data_yaml_path, 'r') as f:
             data_yaml = yaml.safe_load(f)
+        # --- UPDATED: use normalized names and optional sanity log ---
+        class_names = _extract_class_names(data_yaml)
+        try:
+            nc = int(data_yaml.get('nc', len(class_names)))
+        except Exception:
+            nc = len(class_names)
+        if len(class_names) != nc:
+            logging.warning(f"[{project}-v{version}] names length ({len(class_names)}) != nc ({nc}); using normalized names.")
         splits = [s for s in ['train', 'valid', 'test']
                   if os.path.exists(os.path.join(dataset.location, s))]
         msg = "No datasets were loaded successfully.\n" + "\n".join([f"- {u}: {why}" for u, why in failures[:10]])
         raise gr.Error(msg)
+    # --- UPDATED: ensure all names are strings before sorting
+    all_names = sorted({str(n) for _, names, _, _ in dataset_info for n in names})
     class_map = {name: name for name in all_names}
     # Initial preview uses "keep all" mapping
         # Sum limits for final_name over any merged originals
         class_limits[final_name] = class_limits.get(final_name, 0) + int(row["Max Images"])
     status, path = finalize_merged_dataset(dataset_info, class_mapping, class_limits, progress)
     return status, path