cguna frreiss commited on
Commit
7156925
·
verified ·
1 Parent(s): 74ef57e

Update run_vllm.sh (#20)

Browse files

- Update run_vllm.sh (8c6ccbc03601c74bfae5594c3d130a36c0a2b630)


Co-authored-by: Fred Reiss <frreiss@users.noreply.huggingface.co>

Files changed (1) hide show
  1. run_vllm.sh +7 -6
run_vllm.sh CHANGED
@@ -5,16 +5,17 @@
5
  # available LoRA adapters in this repository.
6
  #
7
  # To run this script:
8
- # 1. Install an appropriate build of vLLM for your machine
9
- # 2. Install the Hugging Face CLI (`hf`)
10
  # 3. Download the intrinsics library by running:
11
- # hf download ibm-granite/intrinsics-lib --local-dir ./intrinsics-lib
12
- # 4. Edit the constants BASE_MODEL_NAME and BASE_MODEL_ORG as needed
13
- # 5. Run this script from the root of your local copy of intrinsics-lib.
14
  ################################################################################
15
 
16
  BASE_MODEL_NAME=granite-3.3-8b-instruct
17
  BASE_MODEL_ORG=ibm-granite
 
18
 
19
  export VLLM_API_KEY=rag_intrinsics_1234
20
 
@@ -30,7 +31,7 @@ done
30
 
31
 
32
  CMD="vllm serve ${BASE_MODEL_ORG}/${BASE_MODEL_NAME} \
33
- --port 55555 \
34
  --gpu-memory-utilization 0.45 \
35
  --max-model-len 8192 \
36
  --enable-lora \
 
5
  # available LoRA adapters in this repository.
6
  #
7
  # To run this script:
8
+ # 1. Install an appropriate build of vLLM for your machine (`pip install vllm`)
9
+ # 2. Install the Hugging Face CLI (`pip install -U "huggingface_hub[cli]"`)
10
  # 3. Download the intrinsics library by running:
11
+ # hf download ibm-granite/rag-intrinsics-lib --local-dir ./rag-intrinsics-lib
12
+ # 4. Edit the constants BASE_MODEL_NAME, BASE_MODEL_ORG, and PORT as needed
13
+ # 5. Run this script from the root of your local copy of rag-intrinsics-lib.
14
  ################################################################################
15
 
16
  BASE_MODEL_NAME=granite-3.3-8b-instruct
17
  BASE_MODEL_ORG=ibm-granite
18
+ PORT=55555
19
 
20
  export VLLM_API_KEY=rag_intrinsics_1234
21
 
 
31
 
32
 
33
  CMD="vllm serve ${BASE_MODEL_ORG}/${BASE_MODEL_NAME} \
34
+ --port ${PORT} \
35
  --gpu-memory-utilization 0.45 \
36
  --max-model-len 8192 \
37
  --enable-lora \