Spaces:
Sleeping
Sleeping
File size: 1,371 Bytes
a120af4 20aadca a120af4 20aadca a120af4 20aadca a120af4 20aadca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import spaces
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
model_id = "PhysicsWallahAI/Aryabhata-1.0"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
stop_strings = [
"<|im_end|>",
"<|end|>",
"<im_start|>",
"```python\n",
"<|im_start|>",
"]}}]}}]",
]
def strip_bad_tokens(s, stop_strings):
for suffix in stop_strings:
if s.endswith(suffix):
return s[: -len(suffix)]
return s
generation_config = GenerationConfig(max_new_tokens=4096, stop_strings=stop_strings)
@spaces.GPU
def greet(prompt: str):
messages = [
{
"role": "system",
"content": "Think step-by-step; put only the final answer inside \\boxed{}.",
},
{"role": "user", "content": prompt},
]
text = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
inputs = tokenizer([text], return_tensors="pt")
outputs = model.generate(
**inputs, generation_config=generation_config, tokenizer=tokenizer
)
return strip_bad_tokens(
tokenizer.decode(outputs[0], skip_special_tokens=True), stop_strings
)
demo = gr.Interface(fn=greet, inputs="text", outputs="text", title="Aryabhatta Demo")
demo.launch()
|