import os import gradio as gr from huggingface_hub import InferenceClient from datasets import load_dataset import random import re import sympy as sp import threading # Global datasets math_samples = [] loading_status = {"loaded": False, "error": None} def load_sample_problems(): """Load sample problems asynchronously""" global math_samples, loading_status samples = [] try: # GSM8K gsm8k = load_dataset("openai/gsm8k", "main", streaming=True, split="train") for i, item in enumerate(gsm8k): if i >= 30: break samples.append(item["question"]) # Fineweb-edu fw = load_dataset("HuggingFaceFW/fineweb-edu", name="sample-10BT", split="train", streaming=True) fw_count = 0 for item in fw: if fw_count >= 15: break text_lower = item['text'].lower() if any(w in text_lower for w in ['math', 'calculate', 'solve', 'equation']): q = item['text'][:120].strip() if len(q) > 20: samples.append(q + "...") fw_count += 1 # Ultrachat ds = load_dataset("HuggingFaceH4/ultrachat_200k", streaming=True, split="train_sft") ds_count = 0 for item in ds: if ds_count >= 15: break if len(item['messages']) > 0: content = item['messages'][0]['content'].lower() if any(w in content for w in ['math', 'solve', 'equation', 'calculate']): samples.append(item['messages'][0]['content']) ds_count += 1 math_samples = samples if samples else get_fallback_samples() loading_status["loaded"] = True print(f"✅ Loaded {len(math_samples)} samples") except Exception as e: print(f"⚠️ Dataset error: {e}") math_samples = get_fallback_samples() loading_status["error"] = str(e) loading_status["loaded"] = True def get_fallback_samples(): """Extended fallback problems""" return [ "Find the derivative of f(x) = 3x² + 2x - 1", "A triangle has sides 5, 12, and 13. What is its area?", "If log₂(x) + log₂(x+6) = 4, find x", "Calculate lim(x→0) sin(x)/x", "Solve: x + 2y = 7, 3x - y = 4", "Integrate sin(x) from 0 to π", "What is P(rolling three 6s in a row)?", "Simplify: (x² - 4)/(x - 2)", "Find the sum of 1 + 2 + 3 + ... + 100", "What is the 10th Fibonacci number?", "Calculate the area of a circle with radius 5", "Factor x² + 5x + 6", "Solve x² - 4x + 4 = 0", "Find tan(π/4)", "What is 15% of 240?", ] # Start background loading threading.Thread(target=load_sample_problems, daemon=True).start() def create_math_system_message(): """System prompt for math tutoring""" return r"""You are Mathetics AI, an expert mathematics tutor. **Teaching Approach:** 1. Understand the problem clearly 2. Show step-by-step solutions with LaTeX 3. Verify answers when possible 4. Suggest alternative methods **LaTeX Formatting:** - Inline: $x^2 + y^2 = r^2$ - Display: $$\int_0^\pi \sin(x)\,dx = 2$$ - Box answers: $\boxed{42}$ - Fractions: $\frac{a}{b}$ - Limits: $\lim_{x \to 0}$ Be clear, precise, and educational.""" def render_latex(text): """Clean and normalize LaTeX for proper rendering""" if not text: return text # Convert LaTeX brackets to dollars text = re.sub(r'\\\[(.*?)\\\]', r'$$\1$$', text, flags=re.DOTALL) text = re.sub(r'\\\((.*?)\\\)', r'$\1$', text, flags=re.DOTALL) # Fix boxed answers outside math mode text = re.sub(r'(?|to)\s*(.+?)(?:\)|$)', msg_lower) if match: expr_str, to_val = match.groups() expr = parse_expr(expr_str.replace('^', '**'), transformations=transforms) result = sp.limit(expr, x, sp.sympify(to_val.strip())) return sp.latex(result) # Triangle area (Heron's formula) match = re.search(r'(\d+)[,\s-]+(\d+)[,\s-]+(\d+)', message) if match and 'triangle' in msg_lower: a, b, c = map(float, match.groups()) s = (a + b + c) / 2 area = sp.sqrt(s * (s-a) * (s-b) * (s-c)) return sp.latex(area.evalf()) except Exception as e: print(f"⚠️ SymPy error: {e}") return None def respond(message, history, system_message, max_tokens, temperature, top_p): """Streaming response with error handling""" client = InferenceClient(model="Qwen/Qwen2.5-Math-7B-Instruct") messages = [{"role": "system", "content": system_message}] for msg in history: messages.append({"role": msg["role"], "content": msg["content"]}) messages.append({"role": "user", "content": message}) try: response_text = "" for chunk in client.chat_completion( messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, stream=True ): if chunk.choices[0].delta.content: response_text += chunk.choices[0].delta.content yield render_latex(response_text) # Add SymPy verification sympy_result = try_sympy_compute(message) if sympy_result: response_text += f"\n\n**✓ Verified with SymPy:** $${sympy_result}$$" yield render_latex(response_text) except Exception as e: error_msg = f"❌ **Error:** {str(e)}\n\nTry:\n- Simpler wording\n- Breaking into steps\n- Checking notation" yield error_msg def get_random_sample(): """Get random sample with loading status""" if not loading_status["loaded"]: return "⏳ Loading samples..." if not math_samples: return get_fallback_samples()[0] return random.choice(math_samples) # Gradio Interface with gr.Blocks(title="🧮 Mathetics AI", theme=gr.themes.Soft(), css=""" .katex { font-size: 1.1em; } .katex-display { margin: 1em 0; } """) as demo: gr.Markdown("# 🧮 **Mathetics AI**\n*Advanced Math Tutor powered by Qwen2.5-Math*") chatbot = gr.Chatbot( height=500, type='messages', label="💬 Conversation", latex_delimiters=[ {"left": "$$", "right": "$$", "display": True}, {"left": "$", "right": "$", "display": False} ], show_copy_button=True ) msg = gr.Textbox(placeholder="Ask any math problem...", show_label=False, scale=4) with gr.Row(): submit = gr.Button("🚀 Solve", variant="primary", scale=1) clear = gr.Button("🗑️ Clear", variant="secondary", scale=1) sample = gr.Button("🎲 Random", variant="secondary", scale=1) with gr.Accordion("⚙️ Advanced Settings", open=False): temp_slider = gr.Slider(0.1, 1.0, value=0.3, step=0.1, label="Temperature (creativity)") tokens_slider = gr.Slider(256, 2048, value=1024, step=128, label="Max Tokens") top_p_slider = gr.Slider(0.1, 1.0, value=0.85, step=0.05, label="Top-p (nucleus sampling)") with gr.Accordion("💡 Help & Examples", open=False): gr.Markdown(""" **Tips:** - Be specific: "derivative of sin(2x)" not "help with calculus" - Request steps: "show step-by-step" - Use clear notation: `x^2` for powers, `lim x->0` for limits **Examples:** - Calculus: "Find ∫x² dx from 0 to 5" - Algebra: "Solve x² + 5x + 6 = 0" - Geometry: "Area of triangle with sides 3, 4, 5" - Limits: "Calculate lim(x→0) sin(x)/x" """) gr.Examples( examples=[ ["Find the derivative of x² sin(x)"], ["Calculate the area of a triangle with sides 5, 12, 13"], ["Integrate x² from 0 to 2"], ["What is lim(x→0) sin(x)/x?"], ["Solve the equation x² - 5x + 6 = 0"], ], inputs=msg ) # Hidden system message (passed to respond) system_msg = gr.State(create_math_system_message()) def chat_response(message, history, sys_msg, max_tok, temp, top_p): """Handle chat with streaming""" history.append({"role": "user", "content": message}) # Create assistant message slot history.append({"role": "assistant", "content": ""}) # Stream responses for response in respond(message, history[:-1], sys_msg, max_tok, temp, top_p): history[-1]["content"] = response yield history, "" return history, "" def clear_chat(): return [], "" msg.submit( chat_response, [msg, chatbot, system_msg, tokens_slider, temp_slider, top_p_slider], [chatbot, msg] ) submit.click( chat_response, [msg, chatbot, system_msg, tokens_slider, temp_slider, top_p_slider], [chatbot, msg] ) clear.click(clear_chat, outputs=[chatbot, msg]) sample.click(get_random_sample, outputs=msg) demo.launch()