learn-ai

Sleeping

App Files Files Community

inflaton commited on Aug 10, 2023

Commit

b5f591e

1 Parent(s): 2732905

support Qwen chat model

Browse files

Files changed (3) hide show

app_modules/llm_loader.py +39 -10
requirements.txt +1 -0
server.py +3 -0

app_modules/llm_loader.py CHANGED Viewed

@@ -207,6 +207,7 @@ class LLMLoader:
                     0.01
                     if "gpt4all-j" in MODEL_NAME_OR_PATH
                     or "dolly" in MODEL_NAME_OR_PATH
                     else 0
                 )
                 use_fast = (
@@ -216,11 +217,29 @@ class LLMLoader:
                 )
                 padding_side = "left"  # if "dolly" in MODEL_NAME_OR_PATH else None
-                config = AutoConfig.from_pretrained(
-                    MODEL_NAME_OR_PATH,
-                    trust_remote_code=True,
-                    token=token,
                 )
                 # config.attn_config["attn_impl"] = "triton"
                 # config.max_seq_len = 4096
                 config.init_device = hf_pipeline_device_type
@@ -360,16 +379,26 @@ class LLMLoader:
                                     config=config,
                                     trust_remote_code=True,
                                 )
-                                if token is None
-                                else AutoModelForCausalLM.from_pretrained(
-                                    MODEL_NAME_OR_PATH,
-                                    config=config,
-                                    trust_remote_code=True,
-                                    token=token,
                                 )
                             )
                         )
                         print(f"Model memory footprint: {model.get_memory_footprint()}")
                     else:
                         model = MODEL_NAME_OR_PATH

                     0.01
                     if "gpt4all-j" in MODEL_NAME_OR_PATH
                     or "dolly" in MODEL_NAME_OR_PATH
+                    or "Qwen" in MODEL_NAME_OR_PATH
                     else 0
                 )
                 use_fast = (
                 )
                 padding_side = "left"  # if "dolly" in MODEL_NAME_OR_PATH else None
+                config = (
+                    AutoConfig.from_pretrained(
+                        MODEL_NAME_OR_PATH,
+                        trust_remote_code=True,
+                        token=token,
+                        fp32=hf_pipeline_device_type == "cpu",
+                        bf16=(
+                            hf_pipeline_device_type != "cpu"
+                            and torch_dtype == torch.bfloat16
+                        ),
+                        fp16=(
+                            hf_pipeline_device_type != "cpu"
+                            and torch_dtype != torch.bfloat16
+                        ),
+                    )
+                    if "Qwen" in MODEL_NAME_OR_PATH
+                    else AutoConfig.from_pretrained(
+                        MODEL_NAME_OR_PATH,
+                        trust_remote_code=True,
+                        token=token,
+                    )
                 )
                 # config.attn_config["attn_impl"] = "triton"
                 # config.max_seq_len = 4096
                 config.init_device = hf_pipeline_device_type
                                     config=config,
                                     trust_remote_code=True,
                                 )
+                                if "Qwen" in MODEL_NAME_OR_PATH
+                                else (
+                                    AutoModelForCausalLM.from_pretrained(
+                                        MODEL_NAME_OR_PATH,
+                                        config=config,
+                                        trust_remote_code=True,
+                                    )
+                                    if token is None
+                                    else AutoModelForCausalLM.from_pretrained(
+                                        MODEL_NAME_OR_PATH,
+                                        config=config,
+                                        trust_remote_code=True,
+                                        token=token,
+                                    )
                                 )
                             )
                         )
                         print(f"Model memory footprint: {model.get_memory_footprint()}")
+                        model = model.eval()
+                        # print(f"Model memory footprint: {model.get_memory_footprint()}")
                     else:
                         model = MODEL_NAME_OR_PATH

requirements.txt CHANGED Viewed

@@ -32,3 +32,4 @@ gevent
 pydantic >= 1.10.11
 pypdf
 python-telegram-bot

 pydantic >= 1.10.11
 pypdf
 python-telegram-bot
+transformers_stream_generator

server.py CHANGED Viewed

@@ -86,6 +86,9 @@ if __name__ == "__main__":
     chat_start = timer()
     chat_sync("What's generative AI?", chat_id="test_user")
     chat_sync("more on finance", chat_id="test_user")
     chat_end = timer()
     total_time = chat_end - chat_start
     print(f"Total time used: {total_time:.3f} s")

     chat_start = timer()
     chat_sync("What's generative AI?", chat_id="test_user")
     chat_sync("more on finance", chat_id="test_user")
+    # chat_sync("给我讲一个年轻人奋斗创业最终取得成功的故事。", chat_id="test_user")
+    # chat_sync("给这个故事起一个标题", chat_id="test_user")
+    # chat_sync("Write the game 'snake' in python", chat_id="test_user")
     chat_end = timer()
     total_time = chat_end - chat_start
     print(f"Total time used: {total_time:.3f} s")