fixed bug for llama-2 auth token handling
Browse files- app_modules/llm_loader.py +14 -14
app_modules/llm_loader.py
CHANGED
|
@@ -356,20 +356,20 @@ class LLMLoader:
|
|
| 356 |
model = MODEL_NAME_OR_PATH
|
| 357 |
|
| 358 |
pipe = pipeline(
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
|
| 374 |
self.llm = HuggingFacePipeline(pipeline=pipe, callbacks=callbacks)
|
| 375 |
elif self.llm_model_type == "mosaicml":
|
|
|
|
| 356 |
model = MODEL_NAME_OR_PATH
|
| 357 |
|
| 358 |
pipe = pipeline(
|
| 359 |
+
task,
|
| 360 |
+
model=model,
|
| 361 |
+
tokenizer=tokenizer,
|
| 362 |
+
streamer=self.streamer,
|
| 363 |
+
return_full_text=return_full_text, # langchain expects the full text
|
| 364 |
+
device=hf_pipeline_device_type,
|
| 365 |
+
torch_dtype=torch_dtype,
|
| 366 |
+
max_new_tokens=2048,
|
| 367 |
+
trust_remote_code=True,
|
| 368 |
+
temperature=temperature,
|
| 369 |
+
top_p=0.95,
|
| 370 |
+
top_k=0, # select from top 0 tokens (because zero, relies on top_p)
|
| 371 |
+
repetition_penalty=1.115,
|
| 372 |
+
)
|
| 373 |
|
| 374 |
self.llm = HuggingFacePipeline(pipeline=pipe, callbacks=callbacks)
|
| 375 |
elif self.llm_model_type == "mosaicml":
|