learn-ai

Sleeping

App Files Files Community

Donghao Huang commited on Aug 19, 2023

Commit

bf1e59b

1 Parent(s): 6b469d2

clean up code

Browse files

Files changed (2) hide show

app_modules/llm_inference.py +7 -11
test.py +0 -1

app_modules/llm_inference.py CHANGED Viewed

@@ -5,7 +5,6 @@ import urllib
 from queue import Queue
 from threading import Thread
-from langchain.callbacks.tracers import LangChainTracer
 from langchain.chains.base import Chain
 from app_modules.llm_loader import LLMLoader, TextIteratorStreamer
@@ -24,12 +23,8 @@ class LLMInference(metaclass=abc.ABCMeta):
     def create_chain(self) -> Chain:
         pass
-    def get_chain(self, tracing: bool = False) -> Chain:
         if self.chain is None:
-            if tracing:
-                tracer = LangChainTracer()
-                tracer.load_default_session()
             self.chain = self.create_chain()
         return self.chain
@@ -39,7 +34,6 @@ class LLMInference(metaclass=abc.ABCMeta):
         inputs,
         streaming_handler,
         q: Queue = None,
-        tracing: bool = False,
         testing: bool = False,
     ):
         print(inputs)
@@ -49,7 +43,7 @@ class LLMInference(metaclass=abc.ABCMeta):
         try:
             self.llm_loader.streamer.reset(q)
-            chain = self.get_chain(tracing)
             result = (
                 self._run_chain(chain, inputs, streaming_handler, testing)
                 if streaming_handler is not None
@@ -84,7 +78,7 @@ class LLMInference(metaclass=abc.ABCMeta):
         )
         t.start()
-        if self.llm_loader.streamer.for_huggingface and not testing:
             count = (
                 2
                 if "chat_history" in inputs and len(inputs.get("chat_history")) > 0
@@ -94,12 +88,14 @@ class LLMInference(metaclass=abc.ABCMeta):
             while count > 0:
                 try:
                     for token in self.llm_loader.streamer:
-                        streaming_handler.on_llm_new_token(token)
                     self.llm_loader.streamer.reset()
                     count -= 1
                 except Exception:
-                    print("nothing generated yet - retry in 0.5s")
                     time.sleep(0.5)
         t.join()

 from queue import Queue
 from threading import Thread
 from langchain.chains.base import Chain
 from app_modules.llm_loader import LLMLoader, TextIteratorStreamer
     def create_chain(self) -> Chain:
         pass
+    def get_chain(self) -> Chain:
         if self.chain is None:
             self.chain = self.create_chain()
         return self.chain
         inputs,
         streaming_handler,
         q: Queue = None,
         testing: bool = False,
     ):
         print(inputs)
         try:
             self.llm_loader.streamer.reset(q)
+            chain = self.get_chain()
             result = (
                 self._run_chain(chain, inputs, streaming_handler, testing)
                 if streaming_handler is not None
         )
         t.start()
+        if self.llm_loader.streamer.for_huggingface:
             count = (
                 2
                 if "chat_history" in inputs and len(inputs.get("chat_history")) > 0
             while count > 0:
                 try:
                     for token in self.llm_loader.streamer:
+                        if not testing:
+                            streaming_handler.on_llm_new_token(token)
                     self.llm_loader.streamer.reset()
                     count -= 1
                 except Exception:
+                    if not testing:
+                        print("nothing generated yet - retry in 0.5s")
                     time.sleep(0.5)
         t.join()

test.py CHANGED Viewed

@@ -72,7 +72,6 @@ while True:
         {"question": query, "chat_history": chat_history},
         custom_handler,
         None,
-        False,
         True,
     )
     end = timer()

         {"question": query, "chat_history": chat_history},
         custom_handler,
         None,
         True,
     )
     end = timer()