Upload folder using huggingface_hub
Browse files- Test_RAG.py +1 -8
Test_RAG.py
CHANGED
|
@@ -282,13 +282,6 @@ print(f"Loading model from {model_dir}")
|
|
| 282 |
|
| 283 |
ov_config = {"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "CACHE_DIR": ""}
|
| 284 |
|
| 285 |
-
if "GPU" in llm_device and "qwen2-7b-instruct" in llm_model_id:
|
| 286 |
-
ov_config["GPU_ENABLE_SDPA_OPTIMIZATION"] = "NO"
|
| 287 |
-
|
| 288 |
-
# On a GPU device a model is executed in FP16 precision. For red-pajama-3b-chat model there known accuracy
|
| 289 |
-
# issues caused by this, which we avoid by setting precision hint to "f32".
|
| 290 |
-
if llm_model_id == "red-pajama-3b-chat" and "GPU" in core.available_devices and llm_device in ["GPU", "AUTO"]:
|
| 291 |
-
ov_config["INFERENCE_PRECISION_HINT"] = "f32"
|
| 292 |
|
| 293 |
# llm = HuggingFacePipeline.from_model_id(
|
| 294 |
# model_id= "meta-llama/Meta-Llama-3-8B",
|
|
@@ -304,7 +297,7 @@ if llm_model_id == "red-pajama-3b-chat" and "GPU" in core.available_devices and
|
|
| 304 |
# )
|
| 305 |
from optimum.intel.openvino import OVModelForCausalLM
|
| 306 |
from transformers import pipeline
|
| 307 |
-
|
| 308 |
|
| 309 |
model_id = "meta-llama/Meta-Llama-3-8B"
|
| 310 |
ov_config = {"PERFORMANCE_HINT": "LATENCY"} # 这是一个例子,检查你的实际 ov_config
|
|
|
|
| 282 |
|
| 283 |
ov_config = {"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "CACHE_DIR": ""}
|
| 284 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
|
| 286 |
# llm = HuggingFacePipeline.from_model_id(
|
| 287 |
# model_id= "meta-llama/Meta-Llama-3-8B",
|
|
|
|
| 297 |
# )
|
| 298 |
from optimum.intel.openvino import OVModelForCausalLM
|
| 299 |
from transformers import pipeline
|
| 300 |
+
print("starting setting llm model")
|
| 301 |
|
| 302 |
model_id = "meta-llama/Meta-Llama-3-8B"
|
| 303 |
ov_config = {"PERFORMANCE_HINT": "LATENCY"} # 这是一个例子,检查你的实际 ov_config
|