File size: 3,331 Bytes
f1a5ea7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
"""Utility functions for the Gradio GUI."""

from __future__ import annotations

import csv
from pathlib import Path

try:
    import gradio as gr
except ImportError as exc:
    raise ImportError("Gradio is required for GUI utilities") from exc


def validate_csv(file_path: str | None) -> tuple[bool, str]:
    """Validate that a CSV file has the required format with 'seg_id' and 'text' columns.
    
    Returns:
        A tuple of (is_valid, message) where is_valid is True if valid, False otherwise.
    """
    if not file_path:
        return False, "No file provided"
    
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            reader = csv.reader(f)
            header = next(reader, None)
            
            if not header:
                return False, "Empty file"
            
            # Check if header has exactly 2 columns: seg_id and text
            if len(header) != 2:
                return False, f"Expected 2 columns, found {len(header)}"
            
            if header[0] != "seg_id" or header[1] != "text":
                return False, f"Expected columns 'seg_id' and 'text', found {header}"
            
            # Check if there's at least one data row
            first_row = next(reader, None)
            if not first_row:
                return False, "No data rows found"
            
            return True, "Valid CSV format"
    except Exception as e:
        return False, f"Error reading file: {str(e)}"


def load_csv_preview(file_path: str | None, max_rows: int | None = None) -> dict:
    """Load and preview all rows (or first max_rows) of a CSV file.
    
    Args:
        file_path: Path to the CSV file
        max_rows: Maximum number of rows to load (None = load all rows)
    
    Returns:
        A Gradio update dict with the preview data and visibility set to True if valid,
        or hidden if invalid/empty.
    """
    if not file_path:
        return gr.update(visible=False)
    
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            reader = csv.reader(f)
            header = next(reader, None)
            
            if not header:
                return gr.update(visible=False)
            
            # Load rows (all or up to max_rows)
            rows = []
            for i, row in enumerate(reader):
                if max_rows is not None and i >= max_rows:
                    break
                rows.append(row)
            
            if not rows:
                return gr.update(visible=False)
            
            return gr.update(value=rows, visible=True, headers=header)
    except Exception:
        return gr.update(visible=False)


def validate_and_notify(file_path: str | None, doc_type: str = "Document") -> str | None:
    """Validate a document on upload and show notification.
    
    Args:
        file_path: Path to the CSV file
        doc_type: Type of document (e.g., "Query document", "Source document")
    
    Returns:
        The file path if valid, None otherwise
    """
    if not file_path:
        return None
    
    is_valid, message = validate_csv(file_path)
    filename = Path(file_path).name
    
    if is_valid:
        gr.Info(f"{doc_type} is valid!")
    else:
        gr.Warning(f"{doc_type} is invalid: {message}")
    
    return file_path