Spaces:
Sleeping
Sleeping
File size: 3,331 Bytes
f1a5ea7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
"""Utility functions for the Gradio GUI."""
from __future__ import annotations
import csv
from pathlib import Path
try:
import gradio as gr
except ImportError as exc:
raise ImportError("Gradio is required for GUI utilities") from exc
def validate_csv(file_path: str | None) -> tuple[bool, str]:
"""Validate that a CSV file has the required format with 'seg_id' and 'text' columns.
Returns:
A tuple of (is_valid, message) where is_valid is True if valid, False otherwise.
"""
if not file_path:
return False, "No file provided"
try:
with open(file_path, 'r', encoding='utf-8') as f:
reader = csv.reader(f)
header = next(reader, None)
if not header:
return False, "Empty file"
# Check if header has exactly 2 columns: seg_id and text
if len(header) != 2:
return False, f"Expected 2 columns, found {len(header)}"
if header[0] != "seg_id" or header[1] != "text":
return False, f"Expected columns 'seg_id' and 'text', found {header}"
# Check if there's at least one data row
first_row = next(reader, None)
if not first_row:
return False, "No data rows found"
return True, "Valid CSV format"
except Exception as e:
return False, f"Error reading file: {str(e)}"
def load_csv_preview(file_path: str | None, max_rows: int | None = None) -> dict:
"""Load and preview all rows (or first max_rows) of a CSV file.
Args:
file_path: Path to the CSV file
max_rows: Maximum number of rows to load (None = load all rows)
Returns:
A Gradio update dict with the preview data and visibility set to True if valid,
or hidden if invalid/empty.
"""
if not file_path:
return gr.update(visible=False)
try:
with open(file_path, 'r', encoding='utf-8') as f:
reader = csv.reader(f)
header = next(reader, None)
if not header:
return gr.update(visible=False)
# Load rows (all or up to max_rows)
rows = []
for i, row in enumerate(reader):
if max_rows is not None and i >= max_rows:
break
rows.append(row)
if not rows:
return gr.update(visible=False)
return gr.update(value=rows, visible=True, headers=header)
except Exception:
return gr.update(visible=False)
def validate_and_notify(file_path: str | None, doc_type: str = "Document") -> str | None:
"""Validate a document on upload and show notification.
Args:
file_path: Path to the CSV file
doc_type: Type of document (e.g., "Query document", "Source document")
Returns:
The file path if valid, None otherwise
"""
if not file_path:
return None
is_valid, message = validate_csv(file_path)
filename = Path(file_path).name
if is_valid:
gr.Info(f"{doc_type} is valid!")
else:
gr.Warning(f"{doc_type} is invalid: {message}")
return file_path
|