alfakat commited on
Commit
8d465b0
·
verified ·
1 Parent(s): bbdad52

initial commit for app.py

Browse files
Files changed (1) hide show
  1. app.py +243 -0
app.py ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import gradio as gr
4
+
5
+ os.environ["TRANSFORMERS_NO_TF"] = "1"
6
+ os.environ["TRANSFORMERS_NO_FLAX"] = "1"
7
+ os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
8
+
9
+ CRUD_VERB = {
10
+ "INSERT": "inserted",
11
+ "UPDATE": "updated",
12
+ "DELETE": "deleted",
13
+ "SELECT": "retrieved",
14
+ }
15
+
16
+ def detect_command(sql_text: str) -> str:
17
+ m = re.search(r"\b(INSERT|UPDATE|DELETE|SELECT)\b", sql_text, flags=re.IGNORECASE)
18
+ return m.group(1).upper() if m else "OTHER"
19
+
20
+ def parse_table_name(sql: str) -> str:
21
+ for pat in [
22
+ r"INSERT\s+INTO\s+([A-Za-z0-9\.\[\]_]+)",
23
+ r"UPDATE\s+([A-Za-z0-9\.\[\]_]+)",
24
+ r"DELETE\s+FROM\s+([A-Za-z0-9\.\[\]_]+)",
25
+ r"FROM\s+([A-Za-z0-9\.\[\]_]+)",
26
+ ]:
27
+ m = re.search(pat, sql, flags=re.IGNORECASE)
28
+ if m:
29
+ return m.group(1)
30
+ return ""
31
+
32
+ def clean_statement(text: str) -> str:
33
+ text = re.sub(r"^(What|Which|How|Give|Provide)[^:]*:\s*", "", text, flags=re.IGNORECASE).strip()
34
+ if text and text[-1] not in ".!?":
35
+ text += "."
36
+ return text[0].upper() + text[1:] if text else text
37
+
38
+ def summarize_insert(sql: str) -> str:
39
+ table = parse_table_name(sql)
40
+ cols_match = re.search(r"\(\s*([^)]+?)\s*\)\s*VALUES", sql, flags=re.IGNORECASE | re.DOTALL)
41
+ cols = []
42
+ if cols_match:
43
+ cols = [c.strip().strip("[]") for c in cols_match.group(1).split(",")]
44
+
45
+ try:
46
+ uid_idx = cols.index("user_id")
47
+ except ValueError:
48
+ uid_idx = None
49
+
50
+ tuples = re.findall(r"VALUES\s*\(\s*([^)]+?)\s*\)", sql, flags=re.IGNORECASE | re.DOTALL)
51
+ if not tuples:
52
+ tuples = re.findall(r"\(\s*([^)]+?)\s*\)", sql, flags=re.IGNORECASE)
53
+
54
+ user_ids = []
55
+ for tup in tuples:
56
+ parts = [p.strip() for p in tup.split(",")]
57
+ if uid_idx is not None and uid_idx < len(parts):
58
+ uid_raw = parts[uid_idx].strip().strip("'").strip('"')
59
+ if re.fullmatch(r"-?\d+", uid_raw):
60
+ user_ids.append(int(uid_raw))
61
+
62
+ count = len(tuples)
63
+ verb = CRUD_VERB["INSERT"]
64
+ if user_ids:
65
+ groups = {}
66
+ for u in user_ids:
67
+ groups[u] = groups.get(u, 0) + 1
68
+ if len(groups) == 1:
69
+ uid = next(iter(groups))
70
+ return f"{count} record(s) {verb} into {table} (user_id {uid})."
71
+ else:
72
+ parts = [f"{n} with user_id {uid}" for uid, n in sorted(groups.items())]
73
+ return f"{count} record(s) {verb} into {table} ({', '.join(parts)})."
74
+ else:
75
+ return f"{count} record(s) {verb} into {table}."
76
+
77
+ def summarize_update(sql: str) -> str:
78
+ table = parse_table_name(sql)
79
+ set_match = re.search(r"\bSET\b\s+(.+?)(\bWHERE\b|;|$)", sql, flags=re.IGNORECASE | re.DOTALL)
80
+ changed_cols = []
81
+ if set_match:
82
+ assigns = [a.strip() for a in set_match.group(1).split(",")]
83
+ for a in assigns:
84
+ col = a.split("=")[0].strip().strip("[]")
85
+ if col:
86
+ changed_cols.append(col)
87
+ where = ""
88
+ w = re.search(r"\bWHERE\b\s+(.+?)(;|$)", sql, flags=re.IGNORECASE | re.DOTALL)
89
+ if w:
90
+ where = re.sub(r"\s+", " ", w.group(1)).strip()
91
+
92
+ verb = CRUD_VERB["UPDATE"]
93
+ base = f"Record(s) {verb} in {table}"
94
+ if changed_cols:
95
+ if len(changed_cols) <= 3:
96
+ base += f" (changed: {', '.join(changed_cols)})"
97
+ else:
98
+ base += f" ({len(changed_cols)} columns changed)"
99
+ if where:
100
+ base += f" where {where}"
101
+ return base + "."
102
+
103
+ def summarize_delete(sql: str) -> str:
104
+ table = parse_table_name(sql)
105
+ where = ""
106
+ w = re.search(r"\bWHERE\b\s+(.+?)(;|$)", sql, flags=re.IGNORECASE | re.DOTALL)
107
+ if w:
108
+ where = re.sub(r"\s+", " ", w.group(1)).strip()
109
+ verb = CRUD_VERB["DELETE"]
110
+ base = f"Record(s) {verb} from {table}"
111
+ if where:
112
+ base += f" where {where}"
113
+ return base + "."
114
+
115
+ def summarize_select(sql: str) -> str:
116
+ table = parse_table_name(sql)
117
+ cols = "data"
118
+ cm = re.search(r"\bSELECT\b\s+(.+?)\bFROM\b", sql, flags=re.IGNORECASE | re.DOTALL)
119
+ if cm:
120
+ cols_raw = cm.group(1).strip()
121
+ cols = "all columns" if cols_raw == "*" else re.sub(r"\s+", " ", cols_raw)
122
+ where = ""
123
+ w = re.search(r"\bWHERE\b\s+(.+?)(;|$)", sql, flags=re.IGNORECASE | re.DOTALL)
124
+ if w:
125
+ where = re.sub(r"\s+", " ", w.group(1)).strip()
126
+ verb = CRUD_VERB["SELECT"]
127
+ base = f"{cols} will be {verb} from {table}"
128
+ if where:
129
+ base += f" where {where}"
130
+ return base + "."
131
+
132
+ def deterministic_summary(sql_text: str) -> str:
133
+ cmd = detect_command(sql_text)
134
+ if cmd == "INSERT":
135
+ return summarize_insert(sql_text)
136
+ if cmd == "UPDATE":
137
+ return summarize_update(sql_text)
138
+ if cmd == "DELETE":
139
+ return summarize_delete(sql_text)
140
+ if cmd == "SELECT":
141
+ return summarize_select(sql_text)
142
+ return "Unrecognized SQL command."
143
+
144
+ _HAS_T5 = False
145
+ try:
146
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
147
+ _HAS_T5 = True
148
+ except Exception:
149
+ _HAS_T5 = False
150
+
151
+ _T5_TOKENIZER = None
152
+ _T5_MODEL = None
153
+
154
+ CRUD_PROMPT = {
155
+ "INSERT": "Rewrite as a clear statement that new records will be added. Keep numbers the same.",
156
+ "UPDATE": "Rewrite as a clear statement that existing records will be updated. Keep names and conditions.",
157
+ "DELETE": "Rewrite as a clear statement that records will be deleted. Keep conditions if present.",
158
+ "SELECT": "Rewrite as a clear statement that data will be retrieved. Keep table/filters.",
159
+ "OTHER": "Rewrite as a short, clear statement for non-technical users.",
160
+ }
161
+
162
+ def load_t5():
163
+ global _T5_TOKENIZER, _T5_MODEL
164
+ if _T5_TOKENIZER is None or _T5_MODEL is None:
165
+ _T5_TOKENIZER = T5Tokenizer.from_pretrained("mrm8488/t5-base-finetuned-wikiSQL-sql-to-en")
166
+ _T5_MODEL = T5ForConditionalGeneration.from_pretrained("mrm8488/t5-base-finetuned-wikiSQL-sql-to-en")
167
+
168
+ def rephrase_with_t5(summary: str, cmd: str) -> str:
169
+ if not _HAS_T5:
170
+ return summary
171
+ load_t5()
172
+ instruction = CRUD_PROMPT.get(cmd, CRUD_PROMPT["OTHER"])
173
+ # Force a statement; avoid question opening
174
+ input_text = f"explain sql in plain english statement: {instruction} {summary} </s>"
175
+ feats = _T5_TOKENIZER([input_text], return_tensors="pt")
176
+ out = _T5_MODEL.generate(
177
+ input_ids=feats["input_ids"],
178
+ attention_mask=feats["attention_mask"],
179
+ max_new_tokens=64,
180
+ do_sample=False,
181
+ )
182
+ decoded = _T5_TOKENIZER.decode(out[0], skip_special_tokens=True)
183
+ return clean_statement(decoded)
184
+
185
+
186
+ EXAMPLES = [
187
+ # INSERT
188
+ """INSERT INTO demo_database..user_records (record_id, person_id, created_at)
189
+ VALUES (101, 5, GETDATE()), (102, 5, GETDATE()), (103, 5, GETDATE());""",
190
+ # UPDATE
191
+ """UPDATE users
192
+ SET status = 'active', last_login = GETDATE()
193
+ WHERE user_id IN (101, 102, 103);""",
194
+ # DELETE
195
+ """DELETE FROM orders
196
+ WHERE order_date < '2024-01-01' AND status = 'cancelled';""",
197
+ # SELECT
198
+ """SELECT user_id, email, created_at
199
+ FROM accounts
200
+ WHERE email LIKE '%@example.com' AND created_at >= '2025-01-01';""",
201
+ ]
202
+
203
+ with gr.Blocks(title="SQL → Human: CRUD Explainer") as demo:
204
+ gr.HTML("<h1 style='text-align:center;'>SQL → Human: CRUD Explainer</h1>")
205
+ gr.Markdown(
206
+ "Paste any SQL (INSERT/UPDATE/DELETE/SELECT). "
207
+ "The app will detect the command, create a deterministic summary, "
208
+ "and optionally rephrase it using a SQL→English model."
209
+ )
210
+
211
+ with gr.Row():
212
+ sql_in = gr.Textbox(label="SQL input", lines=14, placeholder="Paste your SQL here...")
213
+ with gr.Row():
214
+ use_t5 = gr.Checkbox(label="Use T5 rephrase (mrm8488/t5-base-finetuned-wikiSQL-sql-to-en)", value=True)
215
+
216
+ with gr.Row():
217
+ btn = gr.Button("Explain SQL", variant="primary")
218
+
219
+ detected_out = gr.Label(label="Detected Command")
220
+ deterministic_out = gr.Markdown(label="Deterministic Summary")
221
+ final_out = gr.Markdown(label="Final Explanation")
222
+
223
+ def explain(sql_text: str, want_t5: bool):
224
+ sql_text = (sql_text or "").strip()
225
+ cmd = detect_command(sql_text)
226
+ deterministic = deterministic_summary(sql_text)
227
+ if want_t5 and _HAS_T5 and cmd != "OTHER":
228
+ final = rephrase_with_t5(deterministic, cmd)
229
+ else:
230
+ final = clean_statement(deterministic)
231
+ return {"Detected Command": cmd}, deterministic, final
232
+
233
+ btn.click(explain, inputs=[sql_in, use_t5], outputs=[detected_out, deterministic_out, final_out])
234
+
235
+ gr.Examples(
236
+ examples=EXAMPLES,
237
+ inputs=[sql_in],
238
+ label="Try examples",
239
+ examples_per_page=4
240
+ )
241
+
242
+ if __name__ == "__main__":
243
+ demo.launch()