Qwen3-0.6B / run_qwen3_0.6b_int8_ctx_axcl_aarch64.sh
wli1995's picture
Upload folder using huggingface_hub
a06f9c6 verified
raw
history blame contribute delete
596 Bytes
./main_axcl_aarch64 \
--system_prompt "You are Qwen, created by Alibaba Cloud. You are a helpful assistant." \
--template_filename_axmodel "qwen3-0.6b-ax650/qwen3_p128_l%d_together.axmodel" \
--axmodel_num 28 \
--url_tokenizer_model "http://127.0.0.1:12345" \
--filename_post_axmodel qwen3-0.6b-ax650/qwen3_post.axmodel \
--filename_tokens_embed qwen3-0.6b-ax650/model.embed_tokens.weight.bfloat16.bin \
--tokens_embed_num 151936 \
--tokens_embed_size 1024 \
--use_mmap_load_embed 1 \
--live_print 1 \
--devices 0
# --kvcache_path /home/axera/ax-llm/build/kvcache_yuanqi \
# --tokenizer_type 2 \