jeevitha-app commited on
Commit
7cca5b0
·
verified ·
1 Parent(s): 6c8e288

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -0
app.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Step 1: Install and import
2
+ !pip install gradio --quiet
3
+
4
+ import pandas as pd
5
+ import numpy as np
6
+ import string
7
+ import re
8
+ import gradio as gr
9
+
10
+ import matplotlib.pyplot as plt
11
+ import seaborn as sns
12
+
13
+ from nltk.stem import PorterStemmer
14
+ from nltk.corpus import stopwords
15
+ import nltk
16
+ nltk.download('stopwords')
17
+
18
+ from sklearn.model_selection import train_test_split
19
+ from sklearn.feature_extraction.text import TfidfVectorizer
20
+
21
+ from sklearn.linear_model import LogisticRegression
22
+ from sklearn.ensemble import RandomForestClassifier
23
+ from sklearn.svm import LinearSVC
24
+
25
+ from sklearn.metrics import accuracy_score, confusion_matrix
26
+
27
+ # Step 2: Load data
28
+ true = pd.read_csv('True.csv', on_bad_lines='skip')
29
+ fake = pd.read_csv('Fake.csv', on_bad_lines='skip')
30
+
31
+ true['label'] = 1 # real
32
+ fake['label'] = 0 # fake
33
+
34
+ df = pd.concat([true, fake]).sample(frac=1).reset_index(drop=True)
35
+ df = df[['title', 'text', 'label']]
36
+
37
+ # Combine title and text
38
+ df['content'] = df['title'] + " " + df['text']
39
+
40
+ # Step 3: NLP Cleaning
41
+ stop_words = set(stopwords.words('english'))
42
+ stemmer = PorterStemmer()
43
+
44
+ def clean_text(text):
45
+ text = text.lower()
46
+ text = re.sub(r'\[.*?\]', '', text) # remove brackets
47
+ text = re.sub(r'https?://\S+|www\.\S+', '', text) # remove links
48
+ text = re.sub(r'<.*?>+', '', text) # remove html tags
49
+ text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text) # remove punctuation
50
+ text = re.sub(r'\n', '', text) # remove newlines
51
+ text = re.sub(r'\w*\d\w*', '', text) # remove words with digits
52
+ words = text.split()
53
+ words = [stemmer.stem(word) for word in words if word not in stop_words]
54
+ return ' '.join(words)
55
+
56
+ df['cleaned'] = df['content'].apply(clean_text)
57
+
58
+ # Step 4: Train-Test Split
59
+ X = df['cleaned']
60
+ y = df['label']
61
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
62
+
63
+ # Step 5: TF-IDF Vectorizer
64
+ vectorizer = TfidfVectorizer(max_df=0.7)
65
+ X_train_tfidf = vectorizer.fit_transform(X_train)
66
+ X_test_tfidf = vectorizer.transform(X_test)
67
+
68
+ # Step 6: Models
69
+ models = {
70
+ "Logistic Regression": LogisticRegression(),
71
+ "Random Forest": RandomForestClassifier(n_estimators=100),
72
+ "SVM": LinearSVC()
73
+ }
74
+
75
+ # Train and evaluate
76
+ results = {}
77
+ for name, model in models.items():
78
+ model.fit(X_train_tfidf, y_train)
79
+ preds = model.predict(X_test_tfidf)
80
+ acc = accuracy_score(y_test, preds)
81
+ results[name] = {"model": model, "accuracy": acc}
82
+ print(f"{name} Accuracy: {acc:.4f}")
83
+
84
+ # Plot confusion matrix for best model
85
+ best_model_name = max(results, key=lambda x: results[x]['accuracy'])
86
+ best_model = results[best_model_name]['model']
87
+ y_pred = best_model.predict(X_test_tfidf)
88
+ cm = confusion_matrix(y_test, y_pred)
89
+
90
+ plt.figure(figsize=(5, 4))
91
+ sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
92
+ plt.title(f"Confusion Matrix - {best_model_name}")
93
+ plt.xlabel('Predicted')
94
+ plt.ylabel('Actual')
95
+ plt.show()
96
+
97
+ # Step 7: Gradio Web App
98
+ def predict_news(text):
99
+ cleaned_text = clean_text(text)
100
+ vectorized = vectorizer.transform([cleaned_text])
101
+ prediction = best_model.predict(vectorized)[0]
102
+ return "Real News 🟢" if prediction == 1 else "Fake News 🔴"
103
+
104
+ iface = gr.Interface(
105
+ fn=predict_news,
106
+ inputs="text",
107
+ outputs="text",
108
+ title="📰 Fake News Detector",
109
+ description="Enter a news headline or content to check if it's real or fake."
110
+ )
111
+
112
+ iface.launch()