Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,6 +7,8 @@ import random
|
|
| 7 |
import time
|
| 8 |
import re
|
| 9 |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, TextStreamer
|
|
|
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
# Set an environment variable
|
|
@@ -45,10 +47,10 @@ def inference(query):
|
|
| 45 |
|
| 46 |
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
|
| 47 |
outputs = model.generate(tokenized_chat, **generation_params)
|
| 48 |
-
decoded_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=
|
| 49 |
assistant_response = decoded_outputs[0].split("<|im_start|>assistant\n")[-1].strip()
|
| 50 |
-
response_ = assistant_response.replace('<|im_end|>', "")
|
| 51 |
-
return
|
| 52 |
# outputs = model.generate(tokenized_chat, **generation_params, streamer=streamer)
|
| 53 |
# return outputs
|
| 54 |
|
|
|
|
| 7 |
import time
|
| 8 |
import re
|
| 9 |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, TextStreamer
|
| 10 |
+
import transformers
|
| 11 |
+
|
| 12 |
|
| 13 |
|
| 14 |
# Set an environment variable
|
|
|
|
| 47 |
|
| 48 |
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
|
| 49 |
outputs = model.generate(tokenized_chat, **generation_params)
|
| 50 |
+
decoded_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=False)
|
| 51 |
assistant_response = decoded_outputs[0].split("<|im_start|>assistant\n")[-1].strip()
|
| 52 |
+
# response_ = assistant_response.replace('<|im_end|>', "")
|
| 53 |
+
return assistant_response
|
| 54 |
# outputs = model.generate(tokenized_chat, **generation_params, streamer=streamer)
|
| 55 |
# return outputs
|
| 56 |
|