Spaces:
Running
Running
clementsan
commited on
Commit
·
067316d
1
Parent(s):
146ca67
Add trust_remote_code condition for phi2 model
Browse files
app.py
CHANGED
|
@@ -71,7 +71,7 @@ def load_db():
|
|
| 71 |
def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
|
| 72 |
progress(0.1, desc="Initializing HF tokenizer...")
|
| 73 |
# HuggingFacePipeline uses local model
|
| 74 |
-
#
|
| 75 |
# tokenizer=AutoTokenizer.from_pretrained(llm_model)
|
| 76 |
# progress(0.5, desc="Initializing HF pipeline...")
|
| 77 |
# pipeline=transformers.pipeline(
|
|
@@ -92,11 +92,20 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
| 92 |
|
| 93 |
# HuggingFaceHub uses HF inference endpoints
|
| 94 |
progress(0.5, desc="Initializing HF Hub...")
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
progress(0.75, desc="Defining buffer memory...")
|
| 102 |
memory = ConversationBufferMemory(
|
|
|
|
| 71 |
def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
|
| 72 |
progress(0.1, desc="Initializing HF tokenizer...")
|
| 73 |
# HuggingFacePipeline uses local model
|
| 74 |
+
# Note: it will download model locally...
|
| 75 |
# tokenizer=AutoTokenizer.from_pretrained(llm_model)
|
| 76 |
# progress(0.5, desc="Initializing HF pipeline...")
|
| 77 |
# pipeline=transformers.pipeline(
|
|
|
|
| 92 |
|
| 93 |
# HuggingFaceHub uses HF inference endpoints
|
| 94 |
progress(0.5, desc="Initializing HF Hub...")
|
| 95 |
+
# Use of trust_remote_code as model_kwargs
|
| 96 |
+
# Warning: langchain issue
|
| 97 |
+
# URL: https://github.com/langchain-ai/langchain/issues/6080
|
| 98 |
+
if llm_model == "microsoft/phi-2":
|
| 99 |
+
llm = HuggingFaceHub(
|
| 100 |
+
repo_id=llm_model,
|
| 101 |
+
model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
|
| 102 |
+
)
|
| 103 |
+
else:
|
| 104 |
+
llm = HuggingFaceHub(
|
| 105 |
+
repo_id=llm_model,
|
| 106 |
+
# model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
|
| 107 |
+
model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
|
| 108 |
+
)
|
| 109 |
|
| 110 |
progress(0.75, desc="Defining buffer memory...")
|
| 111 |
memory = ConversationBufferMemory(
|