Spaces:

stvnnnnnn
/

nl2sql-tapex-backend

Sleeping

stvnnnnnn commited on Oct 13

Commit

79b7bce

verified ·

1 Parent(s): 619ce97

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -44,12 +44,29 @@ def get_model_and_tokenizer():
 @lru_cache(maxsize=32)
 def get_table(split: str, index: int, max_rows: int) -> pd.DataFrame:
-    ds = load_dataset("Salesforce/wikisql", split=split)  # evita revision parquet
-    if index < 0 or index >= len(ds):
-        raise IndexError(f"TABLE_INDEX fuera de rango (0..{len(ds)-1}).")
-    ex = ds[index]
-    header = [str(h) for h in ex["table"]["header"]]
-    rows = ex["table"]["rows"][:max_rows]
     df = pd.DataFrame(rows, columns=header)
     df.columns = [str(c) for c in df.columns]
     return df

 @lru_cache(maxsize=32)
 def get_table(split: str, index: int, max_rows: int) -> pd.DataFrame:
+    """
+    Carga tabla de WikiSQL usando archivos parquet (sin script local).
+    Compatible con Spaces free (CPU).
+    """
+    import pyarrow.parquet as pq
+    import tempfile
+    from huggingface_hub import hf_hub_download
+    # Descarga directa del dataset en parquet
+    parquet_path = hf_hub_download(
+        repo_id="Salesforce/wikisql",
+        filename=f"data/{split}-00000-of-00001.parquet",
+        repo_type="dataset"
+    )
+    # Lee parquet con pyarrow/pandas
+    table = pq.read_table(parquet_path)
+    df_full = table.to_pandas()
+    # Cada registro tiene una columna "table" con estructura compleja → parseamos
+    example = df_full.iloc[index]
+    header = [str(h) for h in example["table"]["header"]]
+    rows = example["table"]["rows"][:max_rows]
     df = pd.DataFrame(rows, columns=header)
     df.columns = [str(c) for c in df.columns]
     return df