Spaces:

PD03
/

Salt

Sleeping

App Files Files Community

PD03 commited on Sep 1

Commit

31e4035

verified ·

1 Parent(s): 49a4733

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -74

app.py CHANGED Viewed

@@ -15,32 +15,20 @@ class SALTAnalytics:
         self.schema_info = ""
         self.openai_client = None
-    def setup_openai(self, api_key: str):
-        """Setup OpenAI client with API key"""
-        try:
-            self.openai_client = openai.OpenAI(api_key=api_key)
-            return True
-        except Exception as e:
-            return False
     def load_salt_dataset(self):
         """Load SAP SALT dataset from Hugging Face into DuckDB"""
         if self.data_loaded:
             return "Dataset already loaded!"
         try:
-            # Load dataset with error handling for HF Spaces
             dataset = load_dataset("SAP/SALT", "joined_table", split="train", streaming=False)
             df = dataset.to_pandas()
-            # Sample data for demo if dataset is too large
-            if len(df) > 100000:  # Limit for HF Spaces memory
                 df = df.sample(n=50000, random_state=42)
-            # Load into DuckDB
             self.con.execute("CREATE TABLE salt_data AS SELECT * FROM df")
-            # Get schema information
             schema_result = self.con.execute("DESCRIBE salt_data").fetchall()
             self.schema_info = "\n".join([f"{col[0]}: {col[1]}" for col in schema_result])
@@ -58,7 +46,6 @@ class SALTAnalytics:
         try:
             insights = {}
-            # Sales Office Performance
             insights['Sales Office Performance'] = self.con.execute("""
                 SELECT SALESOFFICE,
                        COUNT(*) as total_orders,
@@ -69,7 +56,6 @@ class SALTAnalytics:
                 LIMIT 10
             """).fetchdf()
-            # Payment Terms Distribution
             insights['Payment Terms Distribution'] = self.con.execute("""
                 SELECT CUSTOMERPAYMENTTERMS,
                        COUNT(*) as frequency,
@@ -79,7 +65,6 @@ class SALTAnalytics:
                 ORDER BY frequency DESC
             """).fetchdf()
-            # Shipping Conditions Analysis
             insights['Shipping Conditions'] = self.con.execute("""
                 SELECT SHIPPINGCONDITION,
                        COUNT(*) as order_count,
@@ -95,20 +80,20 @@ class SALTAnalytics:
             return f"Error generating insights: {str(e)}"
     def clean_sql_response(self, sql_query: str) -> str:
-        """Safely clean SQL response from OpenAI - COMPLETELY FIXED VERSION"""
-        # Define markers as variables to avoid syntax errors
-        triple_backticks = "```
-        sql_marker = "```sql"
-        # Remove markdown code blocks safely
         if sql_query.startswith(sql_marker):
-            sql_query = sql_query[len(sql_marker):]
-        elif sql_query.startswith(triple_backticks):
-            sql_query = sql_query[len(triple_backticks):]
-        # Remove trailing backticks
-        if sql_query.endswith(triple_backticks):
-            sql_query = sql_query[:-len(triple_backticks)]
         return sql_query.strip()
@@ -121,7 +106,6 @@ class SALTAnalytics:
             return "Please provide OpenAI API key"
         try:
-            # Setup OpenAI client
             client = openai.OpenAI(api_key=api_key)
             prompt = f"""
@@ -129,16 +113,7 @@ class SALTAnalytics:
             {self.schema_info}
-            The SALT dataset contains SAP ERP sales order data. Key fields:
-            - SALESOFFICE, SALESGROUP: Sales organization
-            - CUSTOMERID: Customer identifier
-            - CUSTOMERPAYMENTTERMS: Payment terms (Net30, Net45, etc.)
-            - SHIPPINGCONDITION, SHIPPINGPOINT: Shipping logistics
-            - PLANT: Manufacturing location
-            - HEADERINCOTERMSCLASSIFICATION, ITEMINCOTERMSCLASSIFICATION: Trade terms
             Convert this question to a DuckDB SQL query: "{question}"
             Return ONLY the SQL query, no explanation. Limit results to 20 rows.
             """
@@ -148,15 +123,11 @@ class SALTAnalytics:
                 temperature=0.1
             )
-            sql_query = response.choices[0].message.content.strip()
-            # Clean SQL query using safe method
             sql_query = self.clean_sql_response(sql_query)
-            # Execute query
             result_df = self.con.execute(sql_query).fetchdf()
-            # Get explanation
             explanation_prompt = f"""
             Question: {question}
             Results: {result_df.head(10).to_string()}
@@ -170,13 +141,11 @@ class SALTAnalytics:
                 temperature=0.3
             )
-            explanation = explanation_response.choices[0].message.content
-            # Use variables for backticks in output
-            code_start = "```
-            code_end = "\n```"
-            return f"**SQL Query:**\n{code_start}{sql_query}{code_end}\n\n**Results:**\n{result_df.to_string(index=False)}\n\n**Explanation:**\n{explanation}"
         except Exception as e:
             return f"Error: {str(e)}"
@@ -185,12 +154,9 @@ class SALTAnalytics:
 analytics = SALTAnalytics()
 def load_dataset_interface():
-    """Interface for loading dataset"""
-    result = analytics.load_salt_dataset()
-    return result
 def show_insights_interface():
-    """Interface for showing insights"""
     insights = analytics.get_predefined_insights()
     if isinstance(insights, str):
@@ -206,14 +172,10 @@ def show_insights_interface():
     return output
 def qa_interface(question: str, api_key: str):
-    """Interface for Q&A functionality"""
     if not question.strip():
         return "Please enter a question"
-    result = analytics.natural_language_query(question, api_key)
-    return result
-# Sample questions for the interface
 sample_questions = [
     "Which sales office has the most customers?",
     "What are the most common payment terms?",
@@ -222,7 +184,6 @@ sample_questions = [
     "What's the distribution of sales groups?"
 ]
-# Create Gradio interface
 with gr.Blocks(title="SAP SALT Analytics Demo", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
@@ -238,10 +199,7 @@ with gr.Blocks(title="SAP SALT Analytics Demo", theme=gr.themes.Soft()) as demo:
         load_btn = gr.Button("Load SALT Dataset", variant="primary")
         load_output = gr.Textbox(label="Status", lines=3)
-        load_btn.click(
-            fn=load_dataset_interface,
-            outputs=load_output
-        )
     with gr.Tab("📈 Insights"):
         gr.Markdown("### Pre-built Analytics Insights")
@@ -249,10 +207,7 @@ with gr.Blocks(title="SAP SALT Analytics Demo", theme=gr.themes.Soft()) as demo:
         insights_btn = gr.Button("Generate Insights", variant="primary")
         insights_output = gr.Markdown()
-        insights_btn.click(
-            fn=show_insights_interface,
-            outputs=insights_output
-        )
     with gr.Tab("🤖 AI Q&A"):
         gr.Markdown("### Ask Questions in Natural Language")
@@ -282,7 +237,6 @@ with gr.Blocks(title="SAP SALT Analytics Demo", theme=gr.themes.Soft()) as demo:
             with gr.Column(scale=4):
                 qa_output = gr.Markdown()
-        # Update question input when sample is selected
         sample_dropdown.change(
             fn=lambda x: x if x else "",
             inputs=sample_dropdown,
@@ -308,19 +262,13 @@ with gr.Blocks(title="SAP SALT Analytics Demo", theme=gr.themes.Soft()) as demo:
         - **DuckDB**: High-performance analytics database
         - **OpenAI GPT-4**: Natural language to SQL conversion
         - **Hugging Face**: Dataset hosting and deployment
-        - **Gradio 5**: Secure interactive web interface
         **Business Value**:
         - Automate sales order completion (70-80% accuracy)
         - Optimize customer-to-sales office assignments
         - Predict shipping and payment preferences
         - Generate actionable business insights
-        **Open Source Benefits**:
-        - Zero licensing costs vs. proprietary SAP analytics
-        - Full customization and control
-        - Community-driven improvements
-        - Easy integration with existing systems
         """)
 if __name__ == "__main__":

         self.schema_info = ""
         self.openai_client = None
     def load_salt_dataset(self):
         """Load SAP SALT dataset from Hugging Face into DuckDB"""
         if self.data_loaded:
             return "Dataset already loaded!"
         try:
             dataset = load_dataset("SAP/SALT", "joined_table", split="train", streaming=False)
             df = dataset.to_pandas()
+            if len(df) > 100000:
                 df = df.sample(n=50000, random_state=42)
             self.con.execute("CREATE TABLE salt_data AS SELECT * FROM df")
             schema_result = self.con.execute("DESCRIBE salt_data").fetchall()
             self.schema_info = "\n".join([f"{col[0]}: {col[1]}" for col in schema_result])
         try:
             insights = {}
             insights['Sales Office Performance'] = self.con.execute("""
                 SELECT SALESOFFICE,
                        COUNT(*) as total_orders,
                 LIMIT 10
             """).fetchdf()
             insights['Payment Terms Distribution'] = self.con.execute("""
                 SELECT CUSTOMERPAYMENTTERMS,
                        COUNT(*) as frequency,
                 ORDER BY frequency DESC
             """).fetchdf()
             insights['Shipping Conditions'] = self.con.execute("""
                 SELECT SHIPPINGCONDITION,
                        COUNT(*) as order_count,
             return f"Error generating insights: {str(e)}"
     def clean_sql_response(self, sql_query: str) -> str:
+        """Clean SQL response - COMPLETELY FIXED"""
+        # Use string concatenation to avoid syntax errors
+        backticks = "`" + "`" + "`"
+        sql_marker = backticks + "sql"
+        # Remove start markers
         if sql_query.startswith(sql_marker):
+            sql_query = sql_query[6:]  # Remove ```
+        elif sql_query.startswith(backticks):
+            sql_query = sql_query[3:]  # Remove ```
+        # Remove end markers
+        if sql_query.endswith(backticks):
+            sql_query = sql_query[:-3]  # Remove trailing ```
         return sql_query.strip()
             return "Please provide OpenAI API key"
         try:
             client = openai.OpenAI(api_key=api_key)
             prompt = f"""
             {self.schema_info}
             Convert this question to a DuckDB SQL query: "{question}"
             Return ONLY the SQL query, no explanation. Limit results to 20 rows.
             """
                 temperature=0.1
             )
+            sql_query = response.choices.message.content.strip()
             sql_query = self.clean_sql_response(sql_query)
             result_df = self.con.execute(sql_query).fetchdf()
             explanation_prompt = f"""
             Question: {question}
             Results: {result_df.head(10).to_string()}
                 temperature=0.3
             )
+            explanation = explanation_response.choices.message.content
+            # Safe output formatting
+            code_block = "`" + "`" + "`"
+            return f"**SQL Query:**\n{code_block}sql\n{sql_query}\n{code_block}\n\n**Results:**\n{result_df.to_string(index=False)}\n\n**Explanation:**\n{explanation}"
         except Exception as e:
             return f"Error: {str(e)}"
 analytics = SALTAnalytics()
 def load_dataset_interface():
+    return analytics.load_salt_dataset()
 def show_insights_interface():
     insights = analytics.get_predefined_insights()
     if isinstance(insights, str):
     return output
 def qa_interface(question: str, api_key: str):
     if not question.strip():
         return "Please enter a question"
+    return analytics.natural_language_query(question, api_key)
 sample_questions = [
     "Which sales office has the most customers?",
     "What are the most common payment terms?",
     "What's the distribution of sales groups?"
 ]
 with gr.Blocks(title="SAP SALT Analytics Demo", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
         load_btn = gr.Button("Load SALT Dataset", variant="primary")
         load_output = gr.Textbox(label="Status", lines=3)
+        load_btn.click(fn=load_dataset_interface, outputs=load_output)
     with gr.Tab("📈 Insights"):
         gr.Markdown("### Pre-built Analytics Insights")
         insights_btn = gr.Button("Generate Insights", variant="primary")
         insights_output = gr.Markdown()
+        insights_btn.click(fn=show_insights_interface, outputs=insights_output)
     with gr.Tab("🤖 AI Q&A"):
         gr.Markdown("### Ask Questions in Natural Language")
             with gr.Column(scale=4):
                 qa_output = gr.Markdown()
         sample_dropdown.change(
             fn=lambda x: x if x else "",
             inputs=sample_dropdown,
         - **DuckDB**: High-performance analytics database
         - **OpenAI GPT-4**: Natural language to SQL conversion
         - **Hugging Face**: Dataset hosting and deployment
+        - **Gradio 4.44**: Secure interactive web interface
         **Business Value**:
         - Automate sales order completion (70-80% accuracy)
         - Optimize customer-to-sales office assignments
         - Predict shipping and payment preferences
         - Generate actionable business insights
         """)
 if __name__ == "__main__":