Spaces:
Sleeping
Sleeping
| import torch | |
| from AI_Model_architecture import BertLSTM_CNN_Classifier | |
| from transformers import BertTokenizer | |
| import re | |
| import os | |
| import requests | |
| # ✅ 使用 CPU 模式(如果你只部署在 Hugging Face) | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # ✅ Hugging Face 建議路徑(防止 cache 錯誤) | |
| model_path = "/tmp/model.pth" | |
| model_url = "https://huggingface.co/jerrynnms/scam-model/resolve/main/model.pth" | |
| # ✅ 快取模型檔(僅首次下載) | |
| if not os.path.exists(model_path): | |
| print("📦 下載 model.pth 中...") | |
| response = requests.get(model_url) | |
| if response.status_code == 200: | |
| with open(model_path, "wb") as f: | |
| f.write(response.content) | |
| print("✅ 模型下載完成") | |
| else: | |
| raise FileNotFoundError("❌ 無法下載 model.pth,請檢查網址") | |
| # ✅ 全域快取模型與 tokenizer | |
| model = BertLSTM_CNN_Classifier() | |
| model.load_state_dict(torch.load(model_path, map_location=device)) | |
| model.to(device) | |
| model.eval() | |
| tokenizer = BertTokenizer.from_pretrained("ckiplab/bert-base-chinese") | |
| # ✅ 預測單句文字 | |
| def predict_single_sentence(text: str, max_len=256): | |
| text = re.sub(r"\s+", "", text) # 移除空白 | |
| text = re.sub(r"[^\u4e00-\u9fffA-Za-z0-9。,!?:/.\-]", "", text) # 清洗非標點與文字 | |
| encoded = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=max_len) | |
| input_ids = encoded["input_ids"].to(device) | |
| attention_mask = encoded["attention_mask"].to(device) | |
| token_type_ids = encoded["token_type_ids"].to(device) | |
| with torch.no_grad(): | |
| output = model(input_ids, attention_mask, token_type_ids) | |
| prob = output.item() | |
| label = int(prob > 0.5) | |
| return label, prob | |
| # ✅ 封裝為 API 可用格式 | |
| def analyze_text(text: str): | |
| label, prob = predict_single_sentence(text) | |
| prob_percent = round(prob * 100, 2) | |
| if prob > 0.9: | |
| risk = "🔴 高風險(極可能是詐騙)" | |
| elif prob > 0.5: | |
| risk = "🟡 中風險(可疑)" | |
| else: | |
| risk = "🟢 低風險(正常)" | |
| status = "詐騙" if label == 1 else "正常" | |
| return { | |
| "status": status, | |
| "confidence": prob_percent, | |
| "suspicious_keywords": [risk] # 這裡之後可進一步做關鍵字標註 | |
| } | |