File size: 3,662 Bytes
1513153
7ee65b2
 
 
 
1513153
646b139
e02d7d6
 
646b139
1513153
7ee65b2
1513153
 
 
 
7ee65b2
1513153
e02d7d6
1513153
7ee65b2
 
1513153
 
 
 
7ee65b2
1513153
 
7ee65b2
 
1513153
 
7ee65b2
 
1513153
7ee65b2
e02d7d6
 
7ee65b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e02d7d6
 
7ee65b2
 
 
e02d7d6
7ee65b2
 
e02d7d6
 
 
 
 
7ee65b2
 
 
 
 
 
 
 
 
 
 
e02d7d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7ee65b2
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import os
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

"""
๐Ÿงฎ Root_Math fine-tuned model chat app for Hugging Face Spaces.
Supports both Gradio UI and API access via `/chat`.
"""

# โœ… Load Hugging Face API token securely
api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
if not api_token:
    raise ValueError("โŒ ERROR: Hugging Face API token is not set. Please set it as an environment variable.")

# โœ… Define model names
base_model_name = "unsloth/qwen2.5-math-7b-bnb-4bit"
peft_model_name = "Hrushi02/Root_Math"  # <-- model name stays the same

# โœ… Load base model
print("๐Ÿ”„ Loading base model...")
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    use_auth_token=api_token
)

# โœ… Load your fine-tuned PEFT adapter
print("๐Ÿ”„ Loading fine-tuned adapter...")
model = PeftModel.from_pretrained(base_model, peft_model_name, token=api_token)

# โœ… Load tokenizer
print("๐Ÿ”„ Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(base_model_name, token=api_token)


# โœ… Define the response function
def respond(message, history, system_message, max_tokens, temperature, top_p):
    """Generate responses from your fine-tuned model."""
    full_prompt = system_message + "\n\n"
    for user_msg, bot_msg in history:
        if user_msg:
            full_prompt += f"User: {user_msg}\n"
        if bot_msg:
            full_prompt += f"Assistant: {bot_msg}\n"
    full_prompt += f"User: {message}\nAssistant:"

    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            do_sample=True
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract only the assistant's last message
    if "Assistant:" in response:
        response = response.split("Assistant:")[-1].strip()

    return response


# โœ… Create Gradio Chat Interface
chat_ui = gr.ChatInterface(
    fn=lambda message, history, system_message, max_tokens, temperature, top_p: (
        respond(message, history, system_message, max_tokens, temperature, top_p)
    ),
    additional_inputs=[
        gr.Textbox(value="You are a helpful math assistant.", label="System message"),
        gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
    ],
    title="๐Ÿงฎ Root Math Assistant",
    description="A fine-tuned math reasoning model by Hrushi02 using Unsloth + PEFT."
)


# โœ… Add API endpoint `/chat` (for gradio_client access)
api_chat = gr.Interface(
    fn=respond,
    inputs=[
        gr.Textbox(label="Message"),
        gr.State(),  # placeholder for chat history (can be None)
        gr.Textbox(value="You are a helpful math assistant.", label="System message"),
        gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
    ],
    outputs="text",
    api_name="/chat"
)


# โœ… Combine UI + API
demo = gr.TabbedInterface([chat_ui, api_chat], ["Chat", "API"])


# โœ… Launch app
if __name__ == "__main__":
    demo.launch()