Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,5 @@
|
|
| 1 |
import subprocess
|
| 2 |
|
| 3 |
-
# Installing flash_attn
|
| 4 |
-
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
|
| 5 |
-
shell=True)
|
| 6 |
-
|
| 7 |
from threading import Thread
|
| 8 |
import spaces
|
| 9 |
import gradio as gr
|
|
@@ -65,7 +61,9 @@ def predict(history, prompt, max_length, top_p, temperature):
|
|
| 65 |
t = Thread(target=model.generate, kwargs=generate_kwargs)
|
| 66 |
t.start()
|
| 67 |
for new_token in streamer:
|
| 68 |
-
if new_token and '<|user|>'
|
|
|
|
|
|
|
| 69 |
history[-1][1] += new_token
|
| 70 |
yield history
|
| 71 |
|
|
|
|
| 1 |
import subprocess
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
from threading import Thread
|
| 4 |
import spaces
|
| 5 |
import gradio as gr
|
|
|
|
| 61 |
t = Thread(target=model.generate, kwargs=generate_kwargs)
|
| 62 |
t.start()
|
| 63 |
for new_token in streamer:
|
| 64 |
+
if new_token and '<|user|>' in new_token:
|
| 65 |
+
new_token = new_token.split('<|user|>')[0]
|
| 66 |
+
if new_token:
|
| 67 |
history[-1][1] += new_token
|
| 68 |
yield history
|
| 69 |
|