Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -52,12 +52,11 @@ model = transformers.AutoModelForCausalLM.from_pretrained(
|
|
| 52 |
)
|
| 53 |
model.to("cuda") # Move the model to GPU
|
| 54 |
tokenizer = transformers.AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct-AWQ")
|
| 55 |
-
|
| 56 |
llm = transformers.pipeline(
|
| 57 |
task="text-generation",
|
| 58 |
model=model,
|
| 59 |
tokenizer=tokenizer,
|
| 60 |
-
device=
|
| 61 |
)
|
| 62 |
|
| 63 |
|
|
@@ -102,9 +101,7 @@ def reply(message: str, history: list[str]) -> str:
|
|
| 102 |
|
| 103 |
# Generate a response from the language model
|
| 104 |
response = llm(
|
| 105 |
-
rag_prompt,
|
| 106 |
-
max_new_tokens=512,
|
| 107 |
-
return_full_text=False,
|
| 108 |
)
|
| 109 |
|
| 110 |
# Return the generated response
|
|
|
|
| 52 |
)
|
| 53 |
model.to("cuda") # Move the model to GPU
|
| 54 |
tokenizer = transformers.AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct-AWQ")
|
|
|
|
| 55 |
llm = transformers.pipeline(
|
| 56 |
task="text-generation",
|
| 57 |
model=model,
|
| 58 |
tokenizer=tokenizer,
|
| 59 |
+
device="cuda",
|
| 60 |
)
|
| 61 |
|
| 62 |
|
|
|
|
| 101 |
|
| 102 |
# Generate a response from the language model
|
| 103 |
response = llm(
|
| 104 |
+
rag_prompt, max_new_tokens=512, return_full_text=False, device="cuda"
|
|
|
|
|
|
|
| 105 |
)
|
| 106 |
|
| 107 |
# Return the generated response
|