Spaces:
Runtime error
Runtime error
answer speed optimizations
Browse files
app.py
CHANGED
|
@@ -7,12 +7,12 @@ from huggingface_hub import hf_hub_download # load from huggingfaces
|
|
| 7 |
CONST_REPO_ID = "TheBloke/Llama-2-7B-Chat-GGML"
|
| 8 |
CONST_FILENAME = "llama-2-7b-chat.ggmlv3.q4_1.bin"
|
| 9 |
|
| 10 |
-
N_CTX =
|
| 11 |
|
| 12 |
llm = Llama(model_path=hf_hub_download(
|
| 13 |
repo_id=CONST_REPO_ID,
|
| 14 |
filename=CONST_FILENAME),
|
| 15 |
-
n_ctx=
|
| 16 |
)
|
| 17 |
history = N_CTX
|
| 18 |
|
|
@@ -29,7 +29,7 @@ def generate_text(input_text, history):
|
|
| 29 |
else:
|
| 30 |
input_text_with_history = f"{history[-1][1]}" + "\n"
|
| 31 |
input_text_with_history += f"USER: {input_text}" + "\n" + " ASSISTANT:"
|
| 32 |
-
output = llm(input_text_with_history, max_tokens=
|
| 33 |
"<|prompter|>", "<|endoftext|>", "<|endoftext|> \n",
|
| 34 |
"ASSISTANT:", "USER:", "SYSTEM:"], stream=True
|
| 35 |
)
|
|
|
|
| 7 |
CONST_REPO_ID = "TheBloke/Llama-2-7B-Chat-GGML"
|
| 8 |
CONST_FILENAME = "llama-2-7b-chat.ggmlv3.q4_1.bin"
|
| 9 |
|
| 10 |
+
N_CTX = 1024
|
| 11 |
|
| 12 |
llm = Llama(model_path=hf_hub_download(
|
| 13 |
repo_id=CONST_REPO_ID,
|
| 14 |
filename=CONST_FILENAME),
|
| 15 |
+
n_ctx=N_CTX
|
| 16 |
)
|
| 17 |
history = N_CTX
|
| 18 |
|
|
|
|
| 29 |
else:
|
| 30 |
input_text_with_history = f"{history[-1][1]}" + "\n"
|
| 31 |
input_text_with_history += f"USER: {input_text}" + "\n" + " ASSISTANT:"
|
| 32 |
+
output = llm(input_text_with_history, max_tokens=1024, stop=[
|
| 33 |
"<|prompter|>", "<|endoftext|>", "<|endoftext|> \n",
|
| 34 |
"ASSISTANT:", "USER:", "SYSTEM:"], stream=True
|
| 35 |
)
|