Update run_vllm.sh (#20)

- Update run_vllm.sh (8c6ccbc03601c74bfae5594c3d130a36c0a2b630)

Co-authored-by: Fred Reiss <frreiss@users.noreply.huggingface.co>

Files changed (1) hide show

run_vllm.sh CHANGED Viewed

@@ -5,16 +5,17 @@
 # available LoRA adapters in this repository.
 #
 # To run this script:
-# 1. Install an appropriate build of vLLM for your machine
-# 2. Install the Hugging Face CLI (`hf`)
 # 3. Download the intrinsics library by running:
-#    hf download ibm-granite/intrinsics-lib --local-dir ./intrinsics-lib
-# 4. Edit the constants BASE_MODEL_NAME and BASE_MODEL_ORG as needed
-# 5. Run this script from the root of your local copy of intrinsics-lib.
 ################################################################################
 BASE_MODEL_NAME=granite-3.3-8b-instruct
 BASE_MODEL_ORG=ibm-granite
 export VLLM_API_KEY=rag_intrinsics_1234
@@ -30,7 +31,7 @@ done
 CMD="vllm serve ${BASE_MODEL_ORG}/${BASE_MODEL_NAME} \
-    --port 55555 \
     --gpu-memory-utilization 0.45 \
     --max-model-len 8192 \
     --enable-lora \

 # available LoRA adapters in this repository.
 #
 # To run this script:
+# 1. Install an appropriate build of vLLM for your machine (`pip install vllm`)
+# 2. Install the Hugging Face CLI (`pip install -U "huggingface_hub[cli]"`)
 # 3. Download the intrinsics library by running:
+#    hf download ibm-granite/rag-intrinsics-lib --local-dir ./rag-intrinsics-lib
+# 4. Edit the constants BASE_MODEL_NAME, BASE_MODEL_ORG, and PORT as needed
+# 5. Run this script from the root of your local copy of rag-intrinsics-lib.
 ################################################################################
 BASE_MODEL_NAME=granite-3.3-8b-instruct
 BASE_MODEL_ORG=ibm-granite
+PORT=55555
 export VLLM_API_KEY=rag_intrinsics_1234
 CMD="vllm serve ${BASE_MODEL_ORG}/${BASE_MODEL_NAME} \
+    --port ${PORT} \
     --gpu-memory-utilization 0.45 \
     --max-model-len 8192 \
     --enable-lora \