fixed bugs for tgi
Browse files- .env.example +1 -1
- app_modules/llm_inference.py +15 -13
- app_modules/llm_loader.py +0 -3
.env.example
CHANGED
|
@@ -30,7 +30,7 @@ DISABLE_MODEL_PRELOADING=true
|
|
| 30 |
CHAT_HISTORY_ENABLED=true
|
| 31 |
SHOW_PARAM_SETTINGS=false
|
| 32 |
SHARE_GRADIO_APP=false
|
| 33 |
-
PDF_FILE_BASE_URL=https://
|
| 34 |
|
| 35 |
# if unset, default to "hkunlp/instructor-xl"
|
| 36 |
HF_EMBEDDINGS_MODEL_NAME="hkunlp/instructor-large"
|
|
|
|
| 30 |
CHAT_HISTORY_ENABLED=true
|
| 31 |
SHOW_PARAM_SETTINGS=false
|
| 32 |
SHARE_GRADIO_APP=false
|
| 33 |
+
PDF_FILE_BASE_URL=https://chat-with-llama-2.netlify.app/pdfs/books/
|
| 34 |
|
| 35 |
# if unset, default to "hkunlp/instructor-xl"
|
| 36 |
HF_EMBEDDINGS_MODEL_NAME="hkunlp/instructor-large"
|
app_modules/llm_inference.py
CHANGED
|
@@ -51,7 +51,6 @@ class LLMInference(metaclass=abc.ABCMeta):
|
|
| 51 |
streaming_handler,
|
| 52 |
)
|
| 53 |
if streaming_handler is not None
|
| 54 |
-
and self.llm_loader.streamer.for_huggingface
|
| 55 |
else chain(inputs)
|
| 56 |
)
|
| 57 |
|
|
@@ -82,20 +81,23 @@ class LLMInference(metaclass=abc.ABCMeta):
|
|
| 82 |
)
|
| 83 |
t.start()
|
| 84 |
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
|
| 100 |
t.join()
|
| 101 |
return que.get()
|
|
|
|
| 51 |
streaming_handler,
|
| 52 |
)
|
| 53 |
if streaming_handler is not None
|
|
|
|
| 54 |
else chain(inputs)
|
| 55 |
)
|
| 56 |
|
|
|
|
| 81 |
)
|
| 82 |
t.start()
|
| 83 |
|
| 84 |
+
if self.llm_loader.streamer.for_huggingface:
|
| 85 |
+
count = (
|
| 86 |
+
2
|
| 87 |
+
if "chat_history" in inputs and len(inputs.get("chat_history")) > 0
|
| 88 |
+
else 1
|
| 89 |
+
)
|
| 90 |
|
| 91 |
+
while count > 0:
|
| 92 |
+
try:
|
| 93 |
+
for token in self.llm_loader.streamer:
|
| 94 |
+
streaming_handler.on_llm_new_token(token)
|
| 95 |
|
| 96 |
+
self.llm_loader.streamer.reset()
|
| 97 |
+
count -= 1
|
| 98 |
+
except Exception:
|
| 99 |
+
print("nothing generated yet - retry in 0.5s")
|
| 100 |
+
time.sleep(0.5)
|
| 101 |
|
| 102 |
t.join()
|
| 103 |
return que.get()
|
app_modules/llm_loader.py
CHANGED
|
@@ -66,9 +66,6 @@ class TextIteratorStreamer(TextStreamer, StreamingStdOutCallbackHandler):
|
|
| 66 |
self.text_queue.put("\n", timeout=self.timeout)
|
| 67 |
self.text_queue.put(self.stop_signal, timeout=self.timeout)
|
| 68 |
|
| 69 |
-
def for_huggingface(self) -> bool:
|
| 70 |
-
return self.tokenizer != ""
|
| 71 |
-
|
| 72 |
def __iter__(self):
|
| 73 |
return self
|
| 74 |
|
|
|
|
| 66 |
self.text_queue.put("\n", timeout=self.timeout)
|
| 67 |
self.text_queue.put(self.stop_signal, timeout=self.timeout)
|
| 68 |
|
|
|
|
|
|
|
|
|
|
| 69 |
def __iter__(self):
|
| 70 |
return self
|
| 71 |
|