selfconstruct3d
/

AttackGroup-MPNET

Feature Extraction

Model card Files Files and versions

selfconstruct3d commited on Mar 8

Commit

e899270

·

verified ·

1 Parent(s): f2c6a2d

Update README.md

Files changed (1) hide show

README.md +36 -7

README.md CHANGED Viewed

@@ -172,12 +172,41 @@ To be anounced...
 | Classification Accuracy (Test) | 0.7161 |
 | Weighted F1 Score      | [More Information Needed] |
 ### Single Prediction Example
 ```python
-# Create explicit mapping from numeric labels to original GroupIDs
-label_to_groupid = dict(enumerate(train_df["GroupID"].astype("category").cat.categories))
 def predict_group(sentence):
     classifier_model.eval()
@@ -192,17 +221,17 @@ def predict_group(sentence):
     attention_mask = encoding["attention_mask"].to(device)
     with torch.no_grad():
-        logits = classifier_model(input_ids, attention_mask)
         predicted_label = torch.argmax(logits, dim=1).cpu().item()
-    # Explicitly convert numeric label to original GroupID
-    predicted_groupid = label_to_groupid[predicted_label]
     return predicted_groupid
 sentence = "APT38 has used phishing emails with malicious links to distribute malware."
 predicted_class = predict_group(sentence)
-print(f"Predicted GroupID: {predicted_class}")  # e.g., Predicted GroupID: G0081
 ```
 ## Environmental Impact

 | Classification Accuracy (Test) | 0.7161 |
 | Weighted F1 Score      | [More Information Needed] |
+                                 Embedding Variability  Accuracy
+Original MPNet                                0.092721  0.998611
+MLM Fine-tuned MPNet                          0.034983  0.653611
+Classification Fine-tuned MPNet               0.193065  0.950833
+SecBERT                                       0.591303  0.988611
+ATTACK-BERT                                   0.096108  0.967778
 ### Single Prediction Example
 ```python
+import torch
+import torch.nn as nn
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch.optim as optim
+import numpy as np
+from huggingface_hub import hf_hub_download
+import json
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Load explicitly your fine-tuned MPNet model
+classifier_model = AutoModelForSequenceClassification.from_pretrained("selfconstruct3d/AttackGroup-MPNET").to(device)
+# Load explicitly your tokenizer
+tokenizer = AutoTokenizer.from_pretrained("selfconstruct3d/AttackGroup-MPNET")
+label_to_groupid_file = hf_hub_download(
+    repo_id="selfconstruct3d/AttackGroup-MPNET",
+    filename="label_to_groupid.json"
+)
+with open(label_to_groupid_file, "r") as f:
+    label_to_groupid = json.load(f)
 def predict_group(sentence):
     classifier_model.eval()
     attention_mask = encoding["attention_mask"].to(device)
     with torch.no_grad():
+        outputs = classifier_model(input_ids=input_ids, attention_mask=attention_mask)
+        logits = outputs.logits
         predicted_label = torch.argmax(logits, dim=1).cpu().item()
+    predicted_groupid = label_to_groupid[str(predicted_label)]
     return predicted_groupid
+# Example usage explicitly:
 sentence = "APT38 has used phishing emails with malicious links to distribute malware."
 predicted_class = predict_group(sentence)
+print(f"Predicted GroupID: {predicted_class}")
 ```
 ## Environmental Impact