stvnnnnnn commited on
Commit
79b7bce
·
verified ·
1 Parent(s): 619ce97

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -6
app.py CHANGED
@@ -44,12 +44,29 @@ def get_model_and_tokenizer():
44
 
45
  @lru_cache(maxsize=32)
46
  def get_table(split: str, index: int, max_rows: int) -> pd.DataFrame:
47
- ds = load_dataset("Salesforce/wikisql", split=split) # evita revision parquet
48
- if index < 0 or index >= len(ds):
49
- raise IndexError(f"TABLE_INDEX fuera de rango (0..{len(ds)-1}).")
50
- ex = ds[index]
51
- header = [str(h) for h in ex["table"]["header"]]
52
- rows = ex["table"]["rows"][:max_rows]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  df = pd.DataFrame(rows, columns=header)
54
  df.columns = [str(c) for c in df.columns]
55
  return df
 
44
 
45
  @lru_cache(maxsize=32)
46
  def get_table(split: str, index: int, max_rows: int) -> pd.DataFrame:
47
+ """
48
+ Carga tabla de WikiSQL usando archivos parquet (sin script local).
49
+ Compatible con Spaces free (CPU).
50
+ """
51
+ import pyarrow.parquet as pq
52
+ import tempfile
53
+ from huggingface_hub import hf_hub_download
54
+
55
+ # Descarga directa del dataset en parquet
56
+ parquet_path = hf_hub_download(
57
+ repo_id="Salesforce/wikisql",
58
+ filename=f"data/{split}-00000-of-00001.parquet",
59
+ repo_type="dataset"
60
+ )
61
+
62
+ # Lee parquet con pyarrow/pandas
63
+ table = pq.read_table(parquet_path)
64
+ df_full = table.to_pandas()
65
+
66
+ # Cada registro tiene una columna "table" con estructura compleja → parseamos
67
+ example = df_full.iloc[index]
68
+ header = [str(h) for h in example["table"]["header"]]
69
+ rows = example["table"]["rows"][:max_rows]
70
  df = pd.DataFrame(rows, columns=header)
71
  df.columns = [str(c) for c in df.columns]
72
  return df