Spaces:
Runtime error
Runtime error
change model file and using ctransformers directly instead of langchain.
Browse files
app.py
CHANGED
|
@@ -8,8 +8,8 @@ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
|
| 8 |
# from langchain.llms import LlamaCpp
|
| 9 |
from langchain.vectorstores import Qdrant
|
| 10 |
from qdrant_client.http import models
|
| 11 |
-
from langchain.llms import CTransformers
|
| 12 |
-
|
| 13 |
|
| 14 |
|
| 15 |
|
|
@@ -36,13 +36,13 @@ print("loading the LLM......................................")
|
|
| 36 |
# verbose=True,
|
| 37 |
# )
|
| 38 |
|
| 39 |
-
llm =
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
|
| 47 |
|
| 48 |
print("LLM loaded........................................")
|
|
|
|
| 8 |
# from langchain.llms import LlamaCpp
|
| 9 |
from langchain.vectorstores import Qdrant
|
| 10 |
from qdrant_client.http import models
|
| 11 |
+
# from langchain.llms import CTransformers
|
| 12 |
+
from ctransformers import AutoModelForCausalLM
|
| 13 |
|
| 14 |
|
| 15 |
|
|
|
|
| 36 |
# verbose=True,
|
| 37 |
# )
|
| 38 |
|
| 39 |
+
llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
|
| 40 |
+
model_file="llama-2-7b-chat.Q8_0.gguf",
|
| 41 |
+
model_type="llama",
|
| 42 |
+
# config = ctransformers.hub.AutoConfig,
|
| 43 |
+
hf = True
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
|
| 47 |
|
| 48 |
print("LLM loaded........................................")
|