Update main.py
Browse files
main.py
CHANGED
|
@@ -5,6 +5,7 @@ import prompt_style
|
|
| 5 |
import os
|
| 6 |
from huggingface_hub import hf_hub_download
|
| 7 |
from llama_cpp import Llama
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
model_id = "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3-GGUF"
|
|
@@ -40,14 +41,17 @@ def format_prompt(item: Item):
|
|
| 40 |
def generate(item: Item):
|
| 41 |
formatted_prompt = format_prompt(item)
|
| 42 |
output = model.create_chat_completion(messages=formatted_prompt, seed=item.seed,
|
| 43 |
-
temperature=item.temperature,
|
| 44 |
|
| 45 |
out = output['choices'][0]['message']['content']
|
| 46 |
return out
|
| 47 |
|
| 48 |
@app.post("/generate/")
|
| 49 |
async def generate_text(item: Item):
|
|
|
|
| 50 |
ans = generate(item)
|
|
|
|
|
|
|
| 51 |
return {"response": ans}
|
| 52 |
|
| 53 |
|
|
|
|
| 5 |
import os
|
| 6 |
from huggingface_hub import hf_hub_download
|
| 7 |
from llama_cpp import Llama
|
| 8 |
+
import time
|
| 9 |
|
| 10 |
|
| 11 |
model_id = "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3-GGUF"
|
|
|
|
| 41 |
def generate(item: Item):
|
| 42 |
formatted_prompt = format_prompt(item)
|
| 43 |
output = model.create_chat_completion(messages=formatted_prompt, seed=item.seed,
|
| 44 |
+
temperature=item.temperature, max_tokens=item.max_new_tokens)
|
| 45 |
|
| 46 |
out = output['choices'][0]['message']['content']
|
| 47 |
return out
|
| 48 |
|
| 49 |
@app.post("/generate/")
|
| 50 |
async def generate_text(item: Item):
|
| 51 |
+
t1 = time.time()
|
| 52 |
ans = generate(item)
|
| 53 |
+
print(ans)
|
| 54 |
+
print(f"time: {str(time.time() - t1)}")
|
| 55 |
return {"response": ans}
|
| 56 |
|
| 57 |
|