import gradio as gr import pandas as pd from transformers import AutoModelForSeq2SeqLM, AutoTokenizer import sqlite3 # Load model and tokenizer model_name = "mrm8488/t5-base-finetuned-wikiSQL" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name) def generate_sql_query(natural_language, data): # Load uploaded CSV df = pd.read_csv(data.name) # Create in-memory SQLite DB conn = sqlite3.connect(":memory:") df.to_sql("data_table", conn, index=False, if_exists="replace") # Create schema description schema = ", ".join([f"{col}" for col in df.columns]) # Combine user query and schema input_text = f"translate English to SQL: {natural_language} | table columns: {schema}" # Generate SQL query inputs = tokenizer(input_text, return_tensors="pt") outputs = model.generate(**inputs, max_length=256) sql_query = tokenizer.decode(outputs[0], skip_special_tokens=True) try: # Execute the generated SQL query result_df = pd.read_sql_query(sql_query, conn) except Exception as e: result_df = pd.DataFrame({"Error": [str(e)]}) conn.close() return sql_query, result_df.head() # Gradio UI iface = gr.Interface( fn=generate_sql_query, inputs=[ gr.Textbox(label="Enter your question (Natural Language)", placeholder="e.g., Show customers with age > 30"), gr.File(label="Upload CSV dataset") ], outputs=[ gr.Textbox(label="Generated SQL Query"), gr.Dataframe(label="Query Result") ], title="🧠 Natural Language to SQL Generator", description="Upload a CSV file and ask questions in plain English. The app converts them into SQL and shows the result.", allow_flagging="never" ) if __name__ == "__main__": iface.launch()