Khoi1234210 commited on
Commit
52adc97
Β·
verified Β·
1 Parent(s): 85ea4ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -18
app.py CHANGED
@@ -8,43 +8,55 @@ import re
8
  math_samples = None
9
 
10
  def load_sample_problems():
11
- """Load sample problems from ALL datasets"""
12
  global math_samples
13
  if math_samples is not None:
14
  return math_samples
15
 
16
  samples = []
17
  try:
 
18
  # GSM8K (math problems)
19
  gsm8k = load_dataset("openai/gsm8k", "main", streaming=True)
 
20
  for i, item in enumerate(gsm8k["train"]):
21
  samples.append(item["question"])
22
- if i >= 50:
 
23
  break
24
 
 
25
  # Fineweb-edu (educational text - extract math-like questions)
26
  fw = load_dataset("HuggingFaceFW/fineweb-edu", name="sample-10BT", split="train", streaming=True)
27
  fw_count = 0
28
  for item in fw:
29
- # Filter for math-related content (simple keyword match)
30
- if any(word in item['text'].lower() for word in ['math', 'calculate', 'solve', 'derivative', 'integral', 'triangle', 'equation']):
31
- samples.append(item['text'][:200] + " (Solve this math problem.)") # Truncate for brevity
32
- fw_count += 1
33
- if fw_count >= 20:
34
- break
 
 
 
 
35
 
 
36
  # Ultrachat_200k (chat-like math queries)
37
  ds = load_dataset("HuggingFaceH4/ultrachat_200k", streaming=True)
38
  ds_count = 0
39
  for item in ds:
40
- if 'math' in item['messages'][0]['content'].lower() or 'calculate' in item['messages'][0]['content'].lower():
41
- user_msg = item['messages'][0]['content']
42
- samples.append(user_msg)
43
- ds_count += 1
44
- if ds_count >= 20:
45
- break
 
 
 
46
 
47
- print(f"βœ… Loaded {len(samples)} samples: GSM8K ({50}), Fineweb-edu ({fw_count}), Ultrachat ({ds_count})")
48
  math_samples = samples
49
  return samples
50
 
@@ -60,7 +72,7 @@ def load_sample_problems():
60
  "What is the probability of rolling a 6 on a die 3 times in a row?"
61
  ]
62
  return math_samples
63
-
64
  def create_math_system_message():
65
  """Specialized system prompt for mathematics with LaTeX"""
66
  return """You are Mathetics AI, an advanced mathematics tutor and problem solver.
@@ -175,11 +187,11 @@ def get_random_sample():
175
  """Get a random sample problem - loads datasets if needed"""
176
  global math_samples
177
  if math_samples is None:
178
- math_samples = load_datasets_lazy()
179
  if math_samples:
180
  return random.choice(math_samples)
181
  return "Solve for x: 2xΒ² + 5x - 3 = 0"
182
-
183
  def insert_sample_to_chat(difficulty):
184
  """Insert random sample into chat input"""
185
  sample = get_random_sample()
 
8
  math_samples = None
9
 
10
  def load_sample_problems():
11
+ """Load sample problems from ALL datasets - FIXED VERSION"""
12
  global math_samples
13
  if math_samples is not None:
14
  return math_samples
15
 
16
  samples = []
17
  try:
18
+ print("πŸ”„ Loading GSM8K...")
19
  # GSM8K (math problems)
20
  gsm8k = load_dataset("openai/gsm8k", "main", streaming=True)
21
+ gsm_count = 0
22
  for i, item in enumerate(gsm8k["train"]):
23
  samples.append(item["question"])
24
+ gsm_count += 1
25
+ if gsm_count >= 50:
26
  break
27
 
28
+ print("πŸ”„ Loading Fineweb-edu...")
29
  # Fineweb-edu (educational text - extract math-like questions)
30
  fw = load_dataset("HuggingFaceFW/fineweb-edu", name="sample-10BT", split="train", streaming=True)
31
  fw_count = 0
32
  for item in fw:
33
+ # Filter for math-related content
34
+ text_lower = item['text'].lower()
35
+ if any(word in text_lower for word in ['math', 'calculate', 'solve', 'derivative', 'integral', 'triangle', 'equation', 'area', 'volume', 'probability']):
36
+ # Truncate and format as question
37
+ question = item['text'][:150].strip()
38
+ if len(question) > 20: # Ensure it's substantial
39
+ samples.append(question + " (Solve this math problem.)")
40
+ fw_count += 1
41
+ if fw_count >= 20:
42
+ break
43
 
44
+ print("πŸ”„ Loading Ultrachat...")
45
  # Ultrachat_200k (chat-like math queries)
46
  ds = load_dataset("HuggingFaceH4/ultrachat_200k", streaming=True)
47
  ds_count = 0
48
  for item in ds:
49
+ if len(item['messages']) > 0:
50
+ content = item['messages'][0]['content'].lower()
51
+ if any(word in content for word in ['math', 'calculate', 'solve', 'problem', 'equation', 'derivative', 'integral']):
52
+ user_msg = item['messages'][0]['content']
53
+ if len(user_msg) > 10: # Valid length
54
+ samples.append(user_msg)
55
+ ds_count += 1
56
+ if ds_count >= 20:
57
+ break
58
 
59
+ print(f"βœ… Loaded {len(samples)} samples: GSM8K ({gsm_count}), Fineweb-edu ({fw_count}), Ultrachat ({ds_count})")
60
  math_samples = samples
61
  return samples
62
 
 
72
  "What is the probability of rolling a 6 on a die 3 times in a row?"
73
  ]
74
  return math_samples
75
+
76
  def create_math_system_message():
77
  """Specialized system prompt for mathematics with LaTeX"""
78
  return """You are Mathetics AI, an advanced mathematics tutor and problem solver.
 
187
  """Get a random sample problem - loads datasets if needed"""
188
  global math_samples
189
  if math_samples is None:
190
+ math_samples = load_sample_problems() # βœ… FIXED: Now calls the right function
191
  if math_samples:
192
  return random.choice(math_samples)
193
  return "Solve for x: 2xΒ² + 5x - 3 = 0"
194
+
195
  def insert_sample_to_chat(difficulty):
196
  """Insert random sample into chat input"""
197
  sample = get_random_sample()