PD03 commited on
Commit
31e4035
·
verified ·
1 Parent(s): 49a4733

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -74
app.py CHANGED
@@ -15,32 +15,20 @@ class SALTAnalytics:
15
  self.schema_info = ""
16
  self.openai_client = None
17
 
18
- def setup_openai(self, api_key: str):
19
- """Setup OpenAI client with API key"""
20
- try:
21
- self.openai_client = openai.OpenAI(api_key=api_key)
22
- return True
23
- except Exception as e:
24
- return False
25
-
26
  def load_salt_dataset(self):
27
  """Load SAP SALT dataset from Hugging Face into DuckDB"""
28
  if self.data_loaded:
29
  return "Dataset already loaded!"
30
 
31
  try:
32
- # Load dataset with error handling for HF Spaces
33
  dataset = load_dataset("SAP/SALT", "joined_table", split="train", streaming=False)
34
  df = dataset.to_pandas()
35
 
36
- # Sample data for demo if dataset is too large
37
- if len(df) > 100000: # Limit for HF Spaces memory
38
  df = df.sample(n=50000, random_state=42)
39
 
40
- # Load into DuckDB
41
  self.con.execute("CREATE TABLE salt_data AS SELECT * FROM df")
42
 
43
- # Get schema information
44
  schema_result = self.con.execute("DESCRIBE salt_data").fetchall()
45
  self.schema_info = "\n".join([f"{col[0]}: {col[1]}" for col in schema_result])
46
 
@@ -58,7 +46,6 @@ class SALTAnalytics:
58
  try:
59
  insights = {}
60
 
61
- # Sales Office Performance
62
  insights['Sales Office Performance'] = self.con.execute("""
63
  SELECT SALESOFFICE,
64
  COUNT(*) as total_orders,
@@ -69,7 +56,6 @@ class SALTAnalytics:
69
  LIMIT 10
70
  """).fetchdf()
71
 
72
- # Payment Terms Distribution
73
  insights['Payment Terms Distribution'] = self.con.execute("""
74
  SELECT CUSTOMERPAYMENTTERMS,
75
  COUNT(*) as frequency,
@@ -79,7 +65,6 @@ class SALTAnalytics:
79
  ORDER BY frequency DESC
80
  """).fetchdf()
81
 
82
- # Shipping Conditions Analysis
83
  insights['Shipping Conditions'] = self.con.execute("""
84
  SELECT SHIPPINGCONDITION,
85
  COUNT(*) as order_count,
@@ -95,20 +80,20 @@ class SALTAnalytics:
95
  return f"Error generating insights: {str(e)}"
96
 
97
  def clean_sql_response(self, sql_query: str) -> str:
98
- """Safely clean SQL response from OpenAI - COMPLETELY FIXED VERSION"""
99
- # Define markers as variables to avoid syntax errors
100
- triple_backticks = "```
101
- sql_marker = "```sql"
102
 
103
- # Remove markdown code blocks safely
104
  if sql_query.startswith(sql_marker):
105
- sql_query = sql_query[len(sql_marker):]
106
- elif sql_query.startswith(triple_backticks):
107
- sql_query = sql_query[len(triple_backticks):]
108
 
109
- # Remove trailing backticks
110
- if sql_query.endswith(triple_backticks):
111
- sql_query = sql_query[:-len(triple_backticks)]
112
 
113
  return sql_query.strip()
114
 
@@ -121,7 +106,6 @@ class SALTAnalytics:
121
  return "Please provide OpenAI API key"
122
 
123
  try:
124
- # Setup OpenAI client
125
  client = openai.OpenAI(api_key=api_key)
126
 
127
  prompt = f"""
@@ -129,16 +113,7 @@ class SALTAnalytics:
129
 
130
  {self.schema_info}
131
 
132
- The SALT dataset contains SAP ERP sales order data. Key fields:
133
- - SALESOFFICE, SALESGROUP: Sales organization
134
- - CUSTOMERID: Customer identifier
135
- - CUSTOMERPAYMENTTERMS: Payment terms (Net30, Net45, etc.)
136
- - SHIPPINGCONDITION, SHIPPINGPOINT: Shipping logistics
137
- - PLANT: Manufacturing location
138
- - HEADERINCOTERMSCLASSIFICATION, ITEMINCOTERMSCLASSIFICATION: Trade terms
139
-
140
  Convert this question to a DuckDB SQL query: "{question}"
141
-
142
  Return ONLY the SQL query, no explanation. Limit results to 20 rows.
143
  """
144
 
@@ -148,15 +123,11 @@ class SALTAnalytics:
148
  temperature=0.1
149
  )
150
 
151
- sql_query = response.choices[0].message.content.strip()
152
-
153
- # Clean SQL query using safe method
154
  sql_query = self.clean_sql_response(sql_query)
155
 
156
- # Execute query
157
  result_df = self.con.execute(sql_query).fetchdf()
158
 
159
- # Get explanation
160
  explanation_prompt = f"""
161
  Question: {question}
162
  Results: {result_df.head(10).to_string()}
@@ -170,13 +141,11 @@ class SALTAnalytics:
170
  temperature=0.3
171
  )
172
 
173
- explanation = explanation_response.choices[0].message.content
174
 
175
- # Use variables for backticks in output
176
- code_start = "```
177
- code_end = "\n```"
178
-
179
- return f"**SQL Query:**\n{code_start}{sql_query}{code_end}\n\n**Results:**\n{result_df.to_string(index=False)}\n\n**Explanation:**\n{explanation}"
180
 
181
  except Exception as e:
182
  return f"Error: {str(e)}"
@@ -185,12 +154,9 @@ class SALTAnalytics:
185
  analytics = SALTAnalytics()
186
 
187
  def load_dataset_interface():
188
- """Interface for loading dataset"""
189
- result = analytics.load_salt_dataset()
190
- return result
191
 
192
  def show_insights_interface():
193
- """Interface for showing insights"""
194
  insights = analytics.get_predefined_insights()
195
 
196
  if isinstance(insights, str):
@@ -206,14 +172,10 @@ def show_insights_interface():
206
  return output
207
 
208
  def qa_interface(question: str, api_key: str):
209
- """Interface for Q&A functionality"""
210
  if not question.strip():
211
  return "Please enter a question"
212
-
213
- result = analytics.natural_language_query(question, api_key)
214
- return result
215
 
216
- # Sample questions for the interface
217
  sample_questions = [
218
  "Which sales office has the most customers?",
219
  "What are the most common payment terms?",
@@ -222,7 +184,6 @@ sample_questions = [
222
  "What's the distribution of sales groups?"
223
  ]
224
 
225
- # Create Gradio interface
226
  with gr.Blocks(title="SAP SALT Analytics Demo", theme=gr.themes.Soft()) as demo:
227
 
228
  gr.Markdown("""
@@ -238,10 +199,7 @@ with gr.Blocks(title="SAP SALT Analytics Demo", theme=gr.themes.Soft()) as demo:
238
  load_btn = gr.Button("Load SALT Dataset", variant="primary")
239
  load_output = gr.Textbox(label="Status", lines=3)
240
 
241
- load_btn.click(
242
- fn=load_dataset_interface,
243
- outputs=load_output
244
- )
245
 
246
  with gr.Tab("📈 Insights"):
247
  gr.Markdown("### Pre-built Analytics Insights")
@@ -249,10 +207,7 @@ with gr.Blocks(title="SAP SALT Analytics Demo", theme=gr.themes.Soft()) as demo:
249
  insights_btn = gr.Button("Generate Insights", variant="primary")
250
  insights_output = gr.Markdown()
251
 
252
- insights_btn.click(
253
- fn=show_insights_interface,
254
- outputs=insights_output
255
- )
256
 
257
  with gr.Tab("🤖 AI Q&A"):
258
  gr.Markdown("### Ask Questions in Natural Language")
@@ -282,7 +237,6 @@ with gr.Blocks(title="SAP SALT Analytics Demo", theme=gr.themes.Soft()) as demo:
282
  with gr.Column(scale=4):
283
  qa_output = gr.Markdown()
284
 
285
- # Update question input when sample is selected
286
  sample_dropdown.change(
287
  fn=lambda x: x if x else "",
288
  inputs=sample_dropdown,
@@ -308,19 +262,13 @@ with gr.Blocks(title="SAP SALT Analytics Demo", theme=gr.themes.Soft()) as demo:
308
  - **DuckDB**: High-performance analytics database
309
  - **OpenAI GPT-4**: Natural language to SQL conversion
310
  - **Hugging Face**: Dataset hosting and deployment
311
- - **Gradio 5**: Secure interactive web interface
312
 
313
  **Business Value**:
314
  - Automate sales order completion (70-80% accuracy)
315
  - Optimize customer-to-sales office assignments
316
  - Predict shipping and payment preferences
317
  - Generate actionable business insights
318
-
319
- **Open Source Benefits**:
320
- - Zero licensing costs vs. proprietary SAP analytics
321
- - Full customization and control
322
- - Community-driven improvements
323
- - Easy integration with existing systems
324
  """)
325
 
326
  if __name__ == "__main__":
 
15
  self.schema_info = ""
16
  self.openai_client = None
17
 
 
 
 
 
 
 
 
 
18
  def load_salt_dataset(self):
19
  """Load SAP SALT dataset from Hugging Face into DuckDB"""
20
  if self.data_loaded:
21
  return "Dataset already loaded!"
22
 
23
  try:
 
24
  dataset = load_dataset("SAP/SALT", "joined_table", split="train", streaming=False)
25
  df = dataset.to_pandas()
26
 
27
+ if len(df) > 100000:
 
28
  df = df.sample(n=50000, random_state=42)
29
 
 
30
  self.con.execute("CREATE TABLE salt_data AS SELECT * FROM df")
31
 
 
32
  schema_result = self.con.execute("DESCRIBE salt_data").fetchall()
33
  self.schema_info = "\n".join([f"{col[0]}: {col[1]}" for col in schema_result])
34
 
 
46
  try:
47
  insights = {}
48
 
 
49
  insights['Sales Office Performance'] = self.con.execute("""
50
  SELECT SALESOFFICE,
51
  COUNT(*) as total_orders,
 
56
  LIMIT 10
57
  """).fetchdf()
58
 
 
59
  insights['Payment Terms Distribution'] = self.con.execute("""
60
  SELECT CUSTOMERPAYMENTTERMS,
61
  COUNT(*) as frequency,
 
65
  ORDER BY frequency DESC
66
  """).fetchdf()
67
 
 
68
  insights['Shipping Conditions'] = self.con.execute("""
69
  SELECT SHIPPINGCONDITION,
70
  COUNT(*) as order_count,
 
80
  return f"Error generating insights: {str(e)}"
81
 
82
  def clean_sql_response(self, sql_query: str) -> str:
83
+ """Clean SQL response - COMPLETELY FIXED"""
84
+ # Use string concatenation to avoid syntax errors
85
+ backticks = "`" + "`" + "`"
86
+ sql_marker = backticks + "sql"
87
 
88
+ # Remove start markers
89
  if sql_query.startswith(sql_marker):
90
+ sql_query = sql_query[6:] # Remove ```
91
+ elif sql_query.startswith(backticks):
92
+ sql_query = sql_query[3:] # Remove ```
93
 
94
+ # Remove end markers
95
+ if sql_query.endswith(backticks):
96
+ sql_query = sql_query[:-3] # Remove trailing ```
97
 
98
  return sql_query.strip()
99
 
 
106
  return "Please provide OpenAI API key"
107
 
108
  try:
 
109
  client = openai.OpenAI(api_key=api_key)
110
 
111
  prompt = f"""
 
113
 
114
  {self.schema_info}
115
 
 
 
 
 
 
 
 
 
116
  Convert this question to a DuckDB SQL query: "{question}"
 
117
  Return ONLY the SQL query, no explanation. Limit results to 20 rows.
118
  """
119
 
 
123
  temperature=0.1
124
  )
125
 
126
+ sql_query = response.choices.message.content.strip()
 
 
127
  sql_query = self.clean_sql_response(sql_query)
128
 
 
129
  result_df = self.con.execute(sql_query).fetchdf()
130
 
 
131
  explanation_prompt = f"""
132
  Question: {question}
133
  Results: {result_df.head(10).to_string()}
 
141
  temperature=0.3
142
  )
143
 
144
+ explanation = explanation_response.choices.message.content
145
 
146
+ # Safe output formatting
147
+ code_block = "`" + "`" + "`"
148
+ return f"**SQL Query:**\n{code_block}sql\n{sql_query}\n{code_block}\n\n**Results:**\n{result_df.to_string(index=False)}\n\n**Explanation:**\n{explanation}"
 
 
149
 
150
  except Exception as e:
151
  return f"Error: {str(e)}"
 
154
  analytics = SALTAnalytics()
155
 
156
  def load_dataset_interface():
157
+ return analytics.load_salt_dataset()
 
 
158
 
159
  def show_insights_interface():
 
160
  insights = analytics.get_predefined_insights()
161
 
162
  if isinstance(insights, str):
 
172
  return output
173
 
174
  def qa_interface(question: str, api_key: str):
 
175
  if not question.strip():
176
  return "Please enter a question"
177
+ return analytics.natural_language_query(question, api_key)
 
 
178
 
 
179
  sample_questions = [
180
  "Which sales office has the most customers?",
181
  "What are the most common payment terms?",
 
184
  "What's the distribution of sales groups?"
185
  ]
186
 
 
187
  with gr.Blocks(title="SAP SALT Analytics Demo", theme=gr.themes.Soft()) as demo:
188
 
189
  gr.Markdown("""
 
199
  load_btn = gr.Button("Load SALT Dataset", variant="primary")
200
  load_output = gr.Textbox(label="Status", lines=3)
201
 
202
+ load_btn.click(fn=load_dataset_interface, outputs=load_output)
 
 
 
203
 
204
  with gr.Tab("📈 Insights"):
205
  gr.Markdown("### Pre-built Analytics Insights")
 
207
  insights_btn = gr.Button("Generate Insights", variant="primary")
208
  insights_output = gr.Markdown()
209
 
210
+ insights_btn.click(fn=show_insights_interface, outputs=insights_output)
 
 
 
211
 
212
  with gr.Tab("🤖 AI Q&A"):
213
  gr.Markdown("### Ask Questions in Natural Language")
 
237
  with gr.Column(scale=4):
238
  qa_output = gr.Markdown()
239
 
 
240
  sample_dropdown.change(
241
  fn=lambda x: x if x else "",
242
  inputs=sample_dropdown,
 
262
  - **DuckDB**: High-performance analytics database
263
  - **OpenAI GPT-4**: Natural language to SQL conversion
264
  - **Hugging Face**: Dataset hosting and deployment
265
+ - **Gradio 4.44**: Secure interactive web interface
266
 
267
  **Business Value**:
268
  - Automate sales order completion (70-80% accuracy)
269
  - Optimize customer-to-sales office assignments
270
  - Predict shipping and payment preferences
271
  - Generate actionable business insights
 
 
 
 
 
 
272
  """)
273
 
274
  if __name__ == "__main__":