chat-with-docs

Runtime error

herMaster commited on Jan 1, 2024

Commit

68c8d72

1 Parent(s): 9f7d3b3

change model file and using ctransformers directly instead of langchain.

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,8 +8,8 @@ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 # from langchain.llms import LlamaCpp
 from langchain.vectorstores import Qdrant
 from qdrant_client.http import models
-from langchain.llms import CTransformers
-# from ctransformers import AutoModelForCausalLM
@@ -36,13 +36,13 @@ print("loading the LLM......................................")
 #     verbose=True,
 # )
-llm = CTransformers(
-    model="TheBloke/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q8_0.gguf",
-    callbacks=[StreamingStdOutCallbackHandler()],
-    temperature = 0.2,
-    max_new_tokens = 1000,
-)
 print("LLM loaded........................................")

 # from langchain.llms import LlamaCpp
 from langchain.vectorstores import Qdrant
 from qdrant_client.http import models
+# from langchain.llms import CTransformers
+from ctransformers import AutoModelForCausalLM
 #     verbose=True,
 # )
+llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
+                                           model_file="llama-2-7b-chat.Q8_0.gguf",
+                                           model_type="llama",
+                                          #  config = ctransformers.hub.AutoConfig,
+                                           hf = True
+                                           )
 print("LLM loaded........................................")