Spaces:

likewendy
/

phi-4

Runtime error

App Files Files Community

likewendy commited on Jan 6

Commit

07f9f12

1 Parent(s): b9692bd

code

Browse files

Files changed (1) hide show

app.py +14 -62

app.py CHANGED Viewed

@@ -1,56 +1,14 @@
 import spaces
 import gradio as gr
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import os
-if not os.path.exists("./phi-4"):
-    os.system('huggingface-cli download matteogeniaccio/phi-4 --local-dir ./phi-4 --include "phi-4/*"')
-from safetensors.torch import load_file, save_file
-@spaces.GPU
-def merge_safetensors(input_dir, output_file):
-    # 获取所有分片文件
-    files = sorted([f for f in os.listdir(input_dir) if f.startswith('model-') and f.endswith('.safetensors')])
-    # 合并所有张量
-    merged_state_dict = {}
-    for file in files:
-        file_path = os.path.join(input_dir, file)
-        print(f"Loading {file}...")
-        state_dict = load_file(file_path)
-        merged_state_dict.update(state_dict)
-    # 保存合并后的文件
-    print(f"Saving merged model to {output_file}...")
-    save_file(merged_state_dict, output_file)
-    print("Done!")
-# 使用示例
-input_dir = "./phi-4/phi-4"  # 包含分片文件的目录
-output_file = "./phi-4/phi-4/model.safetensors"  # 合并后的文件路径
-if not os.path.exists(output_file):
-    merge_safetensors(input_dir, output_file)
-# 加载 phi-4 模型和 tokenizer
-torch.random.manual_seed(0)
-model = AutoModelForCausalLM.from_pretrained(
-    "./phi-4/phi-4",  # 模型路径
-    device_map="cuda",  # 使用 GPU
-    torch_dtype="auto",  # 自动选择数据类型
-    trust_remote_code=True,  # 允许远程代码加载
-)
-tokenizer = AutoTokenizer.from_pretrained("./phi-4/phi-4")
-# 设置 pipeline
-pipe = pipeline(
     "text-generation",
-    model=model,
-    tokenizer=tokenizer,
 )
 # 响应函数
@@ -72,21 +30,15 @@ def respond(
             messages.append({"role": "assistant", "content": assistant_msg})
     messages.append({"role": "user", "content": message})
-    # 将消息转换为字符串格式（适用于 text-generation）
-    input_text = "\n".join(
-        f"{msg['role']}: {msg['content']}" for msg in messages
-    )
     # 生成响应
-    generation_args = {
-        "max_new_tokens": max_tokens,
-        "temperature": temperature,
-        "top_p": top_p,
-        "do_sample": temperature > 0,
-        "return_full_text": False,
-    }
-    output = pipe(input_text, **generation_args)
-    response = output[0]["generated_text"]
     # 返回流式响应
     for token in response:

 import spaces
 import gradio as gr
+import transformers
 import os
+# 初始化pipeline
+pipeline = transformers.pipeline(
     "text-generation",
+    model="microsoft/phi-4",
+    model_kwargs={"torch_dtype": "auto"},
+    device_map="auto",
 )
 # 响应函数
             messages.append({"role": "assistant", "content": assistant_msg})
     messages.append({"role": "user", "content": message})
     # 生成响应
+    outputs = pipeline(
+        messages,
+        max_new_tokens=max_tokens,
+        temperature=temperature,
+        top_p=top_p,
+        do_sample=(temperature > 0),
+    )
+    response = outputs[0]["generated_text"]
     # 返回流式响应
     for token in response: