Spaces:

TheLoopStudio
/

Mathematics-AI

Paused

App Files Files Community

Khoi1234210 commited on Sep 22

Commit

52adc97

verified ·

1 Parent(s): 85ea4ec

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -18

app.py CHANGED Viewed

@@ -8,43 +8,55 @@ import re
 math_samples = None
 def load_sample_problems():
-    """Load sample problems from ALL datasets"""
     global math_samples
     if math_samples is not None:
         return math_samples
     samples = []
     try:
         # GSM8K (math problems)
         gsm8k = load_dataset("openai/gsm8k", "main", streaming=True)
         for i, item in enumerate(gsm8k["train"]):
             samples.append(item["question"])
-            if i >= 50:
                 break
         # Fineweb-edu (educational text - extract math-like questions)
         fw = load_dataset("HuggingFaceFW/fineweb-edu", name="sample-10BT", split="train", streaming=True)
         fw_count = 0
         for item in fw:
-            # Filter for math-related content (simple keyword match)
-            if any(word in item['text'].lower() for word in ['math', 'calculate', 'solve', 'derivative', 'integral', 'triangle', 'equation']):
-                samples.append(item['text'][:200] + " (Solve this math problem.)")  # Truncate for brevity
-                fw_count += 1
-                if fw_count >= 20:
-                    break
         # Ultrachat_200k (chat-like math queries)
         ds = load_dataset("HuggingFaceH4/ultrachat_200k", streaming=True)
         ds_count = 0
         for item in ds:
-            if 'math' in item['messages'][0]['content'].lower() or 'calculate' in item['messages'][0]['content'].lower():
-                user_msg = item['messages'][0]['content']
-                samples.append(user_msg)
-                ds_count += 1
-                if ds_count >= 20:
-                    break
-        print(f"✅ Loaded {len(samples)} samples: GSM8K ({50}), Fineweb-edu ({fw_count}), Ultrachat ({ds_count})")
         math_samples = samples
         return samples
@@ -60,7 +72,7 @@ def load_sample_problems():
             "What is the probability of rolling a 6 on a die 3 times in a row?"
         ]
         return math_samples
 def create_math_system_message():
     """Specialized system prompt for mathematics with LaTeX"""
     return """You are Mathetics AI, an advanced mathematics tutor and problem solver.
@@ -175,11 +187,11 @@ def get_random_sample():
     """Get a random sample problem - loads datasets if needed"""
     global math_samples
     if math_samples is None:
-        math_samples = load_datasets_lazy()
     if math_samples:
         return random.choice(math_samples)
     return "Solve for x: 2x² + 5x - 3 = 0"
 def insert_sample_to_chat(difficulty):
     """Insert random sample into chat input"""
     sample = get_random_sample()

 math_samples = None
 def load_sample_problems():
+    """Load sample problems from ALL datasets - FIXED VERSION"""
     global math_samples
     if math_samples is not None:
         return math_samples
     samples = []
     try:
+        print("🔄 Loading GSM8K...")
         # GSM8K (math problems)
         gsm8k = load_dataset("openai/gsm8k", "main", streaming=True)
+        gsm_count = 0
         for i, item in enumerate(gsm8k["train"]):
             samples.append(item["question"])
+            gsm_count += 1
+            if gsm_count >= 50:
                 break
+        print("🔄 Loading Fineweb-edu...")
         # Fineweb-edu (educational text - extract math-like questions)
         fw = load_dataset("HuggingFaceFW/fineweb-edu", name="sample-10BT", split="train", streaming=True)
         fw_count = 0
         for item in fw:
+            # Filter for math-related content
+            text_lower = item['text'].lower()
+            if any(word in text_lower for word in ['math', 'calculate', 'solve', 'derivative', 'integral', 'triangle', 'equation', 'area', 'volume', 'probability']):
+                # Truncate and format as question
+                question = item['text'][:150].strip()
+                if len(question) > 20:  # Ensure it's substantial
+                    samples.append(question + " (Solve this math problem.)")
+                    fw_count += 1
+                    if fw_count >= 20:
+                        break
+        print("🔄 Loading Ultrachat...")
         # Ultrachat_200k (chat-like math queries)
         ds = load_dataset("HuggingFaceH4/ultrachat_200k", streaming=True)
         ds_count = 0
         for item in ds:
+            if len(item['messages']) > 0:
+                content = item['messages'][0]['content'].lower()
+                if any(word in content for word in ['math', 'calculate', 'solve', 'problem', 'equation', 'derivative', 'integral']):
+                    user_msg = item['messages'][0]['content']
+                    if len(user_msg) > 10:  # Valid length
+                        samples.append(user_msg)
+                        ds_count += 1
+                        if ds_count >= 20:
+                            break
+        print(f"✅ Loaded {len(samples)} samples: GSM8K ({gsm_count}), Fineweb-edu ({fw_count}), Ultrachat ({ds_count})")
         math_samples = samples
         return samples
             "What is the probability of rolling a 6 on a die 3 times in a row?"
         ]
         return math_samples
 def create_math_system_message():
     """Specialized system prompt for mathematics with LaTeX"""
     return """You are Mathetics AI, an advanced mathematics tutor and problem solver.
     """Get a random sample problem - loads datasets if needed"""
     global math_samples
     if math_samples is None:
+        math_samples = load_sample_problems()  # ✅ FIXED: Now calls the right function
     if math_samples:
         return random.choice(math_samples)
     return "Solve for x: 2x² + 5x - 3 = 0"
 def insert_sample_to_chat(difficulty):
     """Insert random sample into chat input"""
     sample = get_random_sample()