fixed bug
Browse files- app_modules/llm_loader.py +2 -2
- tgi.sh +1 -3
app_modules/llm_loader.py
CHANGED
|
@@ -298,7 +298,7 @@ class LLMLoader:
|
|
| 298 |
config=config,
|
| 299 |
quantization_config=double_quant_config,
|
| 300 |
trust_remote_code=True,
|
| 301 |
-
|
| 302 |
)
|
| 303 |
if is_t5
|
| 304 |
else AutoModelForCausalLM.from_pretrained(
|
|
@@ -306,7 +306,7 @@ class LLMLoader:
|
|
| 306 |
config=config,
|
| 307 |
quantization_config=double_quant_config,
|
| 308 |
trust_remote_code=True,
|
| 309 |
-
|
| 310 |
)
|
| 311 |
)
|
| 312 |
|
|
|
|
| 298 |
config=config,
|
| 299 |
quantization_config=double_quant_config,
|
| 300 |
trust_remote_code=True,
|
| 301 |
+
use_auth_token=token,
|
| 302 |
)
|
| 303 |
if is_t5
|
| 304 |
else AutoModelForCausalLM.from_pretrained(
|
|
|
|
| 306 |
config=config,
|
| 307 |
quantization_config=double_quant_config,
|
| 308 |
trust_remote_code=True,
|
| 309 |
+
use_auth_token=token,
|
| 310 |
)
|
| 311 |
)
|
| 312 |
|
tgi.sh
CHANGED
|
@@ -9,9 +9,7 @@ uname -a
|
|
| 9 |
|
| 10 |
. env/tgi.conf
|
| 11 |
|
| 12 |
-
export MODEL_ID="meta-llama/Llama-2-7b-chat-hf"
|
| 13 |
-
export QUANTIZE="--quantize bitsandbytes-fp4"
|
| 14 |
-
|
| 15 |
echo Running $MODEL_ID with TGI
|
| 16 |
|
| 17 |
text-generation-launcher --model-id $MODEL_ID --port $PORT --max-input-length 2048 --max-total-tokens 4096 --ngrok --ngrok-authtoken $NGROK_AUTHTOKEN --ngrok-edge $NGROK_EDGE $QUANTIZE
|
|
|
|
|
|
| 9 |
|
| 10 |
. env/tgi.conf
|
| 11 |
|
|
|
|
|
|
|
|
|
|
| 12 |
echo Running $MODEL_ID with TGI
|
| 13 |
|
| 14 |
text-generation-launcher --model-id $MODEL_ID --port $PORT --max-input-length 2048 --max-total-tokens 4096 --ngrok --ngrok-authtoken $NGROK_AUTHTOKEN --ngrok-edge $NGROK_EDGE $QUANTIZE
|
| 15 |
+
|