julian-schelb commited on
Commit
f1a5ea7
·
verified ·
1 Parent(s): dc017a9

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +106 -0
utils.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utility functions for the Gradio GUI."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import csv
6
+ from pathlib import Path
7
+
8
+ try:
9
+ import gradio as gr
10
+ except ImportError as exc:
11
+ raise ImportError("Gradio is required for GUI utilities") from exc
12
+
13
+
14
+ def validate_csv(file_path: str | None) -> tuple[bool, str]:
15
+ """Validate that a CSV file has the required format with 'seg_id' and 'text' columns.
16
+
17
+ Returns:
18
+ A tuple of (is_valid, message) where is_valid is True if valid, False otherwise.
19
+ """
20
+ if not file_path:
21
+ return False, "No file provided"
22
+
23
+ try:
24
+ with open(file_path, 'r', encoding='utf-8') as f:
25
+ reader = csv.reader(f)
26
+ header = next(reader, None)
27
+
28
+ if not header:
29
+ return False, "Empty file"
30
+
31
+ # Check if header has exactly 2 columns: seg_id and text
32
+ if len(header) != 2:
33
+ return False, f"Expected 2 columns, found {len(header)}"
34
+
35
+ if header[0] != "seg_id" or header[1] != "text":
36
+ return False, f"Expected columns 'seg_id' and 'text', found {header}"
37
+
38
+ # Check if there's at least one data row
39
+ first_row = next(reader, None)
40
+ if not first_row:
41
+ return False, "No data rows found"
42
+
43
+ return True, "Valid CSV format"
44
+ except Exception as e:
45
+ return False, f"Error reading file: {str(e)}"
46
+
47
+
48
+ def load_csv_preview(file_path: str | None, max_rows: int | None = None) -> dict:
49
+ """Load and preview all rows (or first max_rows) of a CSV file.
50
+
51
+ Args:
52
+ file_path: Path to the CSV file
53
+ max_rows: Maximum number of rows to load (None = load all rows)
54
+
55
+ Returns:
56
+ A Gradio update dict with the preview data and visibility set to True if valid,
57
+ or hidden if invalid/empty.
58
+ """
59
+ if not file_path:
60
+ return gr.update(visible=False)
61
+
62
+ try:
63
+ with open(file_path, 'r', encoding='utf-8') as f:
64
+ reader = csv.reader(f)
65
+ header = next(reader, None)
66
+
67
+ if not header:
68
+ return gr.update(visible=False)
69
+
70
+ # Load rows (all or up to max_rows)
71
+ rows = []
72
+ for i, row in enumerate(reader):
73
+ if max_rows is not None and i >= max_rows:
74
+ break
75
+ rows.append(row)
76
+
77
+ if not rows:
78
+ return gr.update(visible=False)
79
+
80
+ return gr.update(value=rows, visible=True, headers=header)
81
+ except Exception:
82
+ return gr.update(visible=False)
83
+
84
+
85
+ def validate_and_notify(file_path: str | None, doc_type: str = "Document") -> str | None:
86
+ """Validate a document on upload and show notification.
87
+
88
+ Args:
89
+ file_path: Path to the CSV file
90
+ doc_type: Type of document (e.g., "Query document", "Source document")
91
+
92
+ Returns:
93
+ The file path if valid, None otherwise
94
+ """
95
+ if not file_path:
96
+ return None
97
+
98
+ is_valid, message = validate_csv(file_path)
99
+ filename = Path(file_path).name
100
+
101
+ if is_valid:
102
+ gr.Info(f"{doc_type} is valid!")
103
+ else:
104
+ gr.Warning(f"{doc_type} is invalid: {message}")
105
+
106
+ return file_path