Spaces:

jeevitha-app
/

Fake_news_detector_app

Sleeping

App Files Files Community

jeevitha-app commited on Jul 19

Commit

7c6e40f

verified ·

1 Parent(s): c7ba381

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -92

app.py CHANGED Viewed

@@ -1,112 +1,41 @@
-# Step 1: import
-import pandas as pd
-import numpy as np
-import string
-import re
 import gradio as gr
-import matplotlib.pyplot as plt
-import seaborn as sns
 from nltk.stem import PorterStemmer
 from nltk.corpus import stopwords
 import nltk
-nltk.download('stopwords')
-from sklearn.model_selection import train_test_split
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.linear_model import LogisticRegression
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.svm import LinearSVC
-from sklearn.metrics import accuracy_score, confusion_matrix
-# Step 2: Load data
-true = pd.read_csv('True.csv', on_bad_lines='skip')
-fake = pd.read_csv('Fake.csv', on_bad_lines='skip')
-true['label'] = 1  # real
-fake['label'] = 0  # fake
-df = pd.concat([true, fake]).sample(frac=1).reset_index(drop=True)
-df = df[['title', 'text', 'label']]
-# Combine title and text
-df['content'] = df['title'] + " " + df['text']
-# Step 3: NLP Cleaning
-stop_words = set(stopwords.words('english'))
 stemmer = PorterStemmer()
 def clean_text(text):
     text = text.lower()
-    text = re.sub(r'\[.*?\]', '', text)  # remove brackets
-    text = re.sub(r'https?://\S+|www\.\S+', '', text)  # remove links
-    text = re.sub(r'<.*?>+', '', text)  # remove html tags
-    text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)  # remove punctuation
-    text = re.sub(r'\n', '', text)  # remove newlines
-    text = re.sub(r'\w*\d\w*', '', text)  # remove words with digits
-    words = text.split()
-    words = [stemmer.stem(word) for word in words if word not in stop_words]
-    return ' '.join(words)
-df['cleaned'] = df['content'].apply(clean_text)
-# Step 4: Train-Test Split
-X = df['cleaned']
-y = df['label']
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-# Step 5: TF-IDF Vectorizer
-vectorizer = TfidfVectorizer(max_df=0.7)
-X_train_tfidf = vectorizer.fit_transform(X_train)
-X_test_tfidf = vectorizer.transform(X_test)
-# Step 6: Models
-models = {
-    "Logistic Regression": LogisticRegression(),
-    "Random Forest": RandomForestClassifier(n_estimators=100),
-    "SVM": LinearSVC()
-}
-# Train and evaluate
-results = {}
-for name, model in models.items():
-    model.fit(X_train_tfidf, y_train)
-    preds = model.predict(X_test_tfidf)
-    acc = accuracy_score(y_test, preds)
-    results[name] = {"model": model, "accuracy": acc}
-    print(f"{name} Accuracy: {acc:.4f}")
-# Plot confusion matrix for best model
-best_model_name = max(results, key=lambda x: results[x]['accuracy'])
-best_model = results[best_model_name]['model']
-y_pred = best_model.predict(X_test_tfidf)
-cm = confusion_matrix(y_test, y_pred)
-plt.figure(figsize=(5, 4))
-sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
-plt.title(f"Confusion Matrix - {best_model_name}")
-plt.xlabel('Predicted')
-plt.ylabel('Actual')
-plt.show()
-# Step 7: Gradio Web App
 def predict_news(text):
-    cleaned_text = clean_text(text)
-    vectorized = vectorizer.transform([cleaned_text])
-    prediction = best_model.predict(vectorized)[0]
-    return "Real News 🟢" if prediction == 1 else "Fake News 🔴"
-iface = gr.Interface(
     fn=predict_news,
-    inputs="text",
     outputs="text",
     title="📰 Fake News Detector",
-    description="Enter a news headline or content to check if it's real or fake."
 )
-iface.launch()

 import gradio as gr
+import pickle
+import string
 from nltk.stem import PorterStemmer
 from nltk.corpus import stopwords
 import nltk
+nltk.download("stopwords")
+# Load model and vectorizer
+with open("model.pkl", "rb") as f:
+    model = pickle.load(f)
+with open("vectorizer.pkl", "rb") as f:
+    vectorizer = pickle.load(f)
 stemmer = PorterStemmer()
+stop_words = stopwords.words("english")
 def clean_text(text):
     text = text.lower()
+    text = "".join([c for c in text if c not in string.punctuation])
+    tokens = text.split()
+    tokens = [stemmer.stem(word) for word in tokens if word not in stop_words]
+    return " ".join(tokens)
 def predict_news(text):
+    cleaned = clean_text(text)
+    vectorized = vectorizer.transform([cleaned])
+    prediction = model.predict(vectorized)[0]
+    return "✅ Real News" if prediction == 1 else "🚨 Fake News"
+demo = gr.Interface(
     fn=predict_news,
+    inputs=gr.Textbox(lines=10, placeholder="Paste a news article here..."),
     outputs="text",
     title="📰 Fake News Detector",
+    description="Paste any news content to classify it as Real or Fake."
 )
+demo.launch()