Spaces:

Eliot0110
/

Travel_Assistant

Sleeping

App Files Files Community

Eliot0110 commited on Aug 5

Commit

8d69a10

1 Parent(s): 96512ae

fix: decode

Browse files

Files changed (1) hide show

modules/ai_model.py +7 -18

modules/ai_model.py CHANGED Viewed

@@ -163,23 +163,16 @@ class AIModel:
                     return_tensors="pt"
                 ).to(self.model.device, dtype=torch.bfloat16)
-            # 截断过长的 token
-            if hasattr(inputs, 'input_ids') and inputs.input_ids.shape[-1] > 512:
-                log.warning(f"⚠️ 截断过长输入: {inputs.input_ids.shape[-1]} -> 512")
-                inputs.input_ids = inputs.input_ids[:, :512]
-                if hasattr(inputs, 'attention_mask'):
-                    inputs.attention_mask = inputs.attention_mask[:, :512]
-            # --- 这是关键的修改 ---
             with torch.inference_mode():
                 generation_args = {
-                    "max_new_tokens": 256,
                     "pad_token_id": self.processor.tokenizer.eos_token_id,
                     "use_cache": True
                 }
                 # 如果 temperature 接近0，使用贪心解码 (用于分类等确定性任务)
-                if temperature < 1e-6: # 使用一个很小的数来比较浮点数
                     log.info("▶️ 使用贪心解码 (do_sample=False) 以获得确定性输出。")
                     generation_args["do_sample"] = False
                 # 否则，使用采样解码 (用于创造性生成任务)
@@ -194,15 +187,11 @@ class AIModel:
                     **inputs,
                     **generation_args
                 )
-            decoded = self.processor.tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
-        # 移除prompt部分
-            if prompt in decoded:
-                decoded = decoded.replace(prompt, "").strip()
             return decoded if decoded else "我理解了您的问题，请告诉我更多具体信息。"
         except RuntimeError as e:
             if "shape" in str(e):
                 log.error(f"❌ Tensor形状错误: {e}")

                     return_tensors="pt"
                 ).to(self.model.device, dtype=torch.bfloat16)
+            input_len = inputs.input_ids.shape[-1]
             with torch.inference_mode():
                 generation_args = {
+                    "max_new_tokens": 512,
                     "pad_token_id": self.processor.tokenizer.eos_token_id,
                     "use_cache": True
                 }
                 # 如果 temperature 接近0，使用贪心解码 (用于分类等确定性任务)
+                if temperature < 1e-6:
                     log.info("▶️ 使用贪心解码 (do_sample=False) 以获得确定性输出。")
                     generation_args["do_sample"] = False
                 # 否则，使用采样解码 (用于创造性生成任务)
                     **inputs,
                     **generation_args
                 )
+            generated_tokens = outputs[0][input_len:]
+            decoded = self.processor.tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
             return decoded if decoded else "我理解了您的问题，请告诉我更多具体信息。"
         except RuntimeError as e:
             if "shape" in str(e):
                 log.error(f"❌ Tensor形状错误: {e}")