selfconstruct3d
/

cybersec_classifier

Model card Files Files and versions

selfconstruct3d commited on Jul 29

Commit

c791818

·

verified ·

1 Parent(s): a967b13

Update README.md

Files changed (1) hide show

README.md +17 -28

README.md CHANGED Viewed

@@ -26,39 +26,28 @@ The model was trained on a multilingual dataset of cybersecurity and non-cyberse
 ```python
 from sentence_transformers import SentenceTransformer
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import LabelEncoder
-import pandas as pd
-import joblib
 from huggingface_hub import hf_hub_download
-# Load your cleaned dataset
-df = pd.read_csv("your_dataset.csv")  # Requires 'clean_text' and 'label' columns
-# Load the sentence transformer
 embedder = SentenceTransformer("intfloat/multilingual-e5-large")
-# Train-test split
-X_train, X_test, y_train, y_test = train_test_split(
-    df["clean_text"],
-    df["label"],
-    test_size=0.05,
-    stratify=df["label"],
-    random_state=42
-)
-# Encode labels
-label_encoder = LabelEncoder()
-y_train_enc = label_encoder.fit_transform(y_train)
-y_test_enc = label_encoder.transform(y_test)
-# Generate sentence embeddings
-X_train_emb = embedder.encode(X_train.tolist(), convert_to_numpy=True, show_progress_bar=True)
-X_test_emb = embedder.encode(X_test.tolist(), convert_to_numpy=True, show_progress_bar=True)
-# Load the trained classifier
-model_path = hf_hub_download(repo_id="selfconstruct3d/cybersec_classifier", filename="cybersec_classifier.pkl")
-model = joblib.load(model_path)
-# Predict
-y_pred = model.predict(X_test_emb)

 ```python
 from sentence_transformers import SentenceTransformer
 from huggingface_hub import hf_hub_download
+import joblib
+# 1. Load the embedding model
 embedder = SentenceTransformer("intfloat/multilingual-e5-large")
+# 2. Load the pretrained MLP classifier from Hugging Face Hub
+model_path = hf_hub_download(repo_id="selfconstruct3d/cybersec_classifier", filename="cybersec_classifier.pkl")
+model = joblib.load(model_path)
+# 3. Example input texts (can be in English or German)
+texts = [
+    "A new ransomware attack has affected critical infrastructure in Germany.",
+    "The local sports club hosted its annual summer festival this weekend."
+]
+# 4. Generate embeddings
+embeddings = embedder.encode(texts, convert_to_numpy=True, show_progress_bar=False)
+# 5. Predict cybersecurity relevance
+predictions = model.predict(embeddings)
+# 6. Output results
+for text, label in zip(texts, predictions):
+    print(f"Text: {text}\nPrediction: {label}\n")