jeevitha-app commited on
Commit
7c6e40f
Β·
verified Β·
1 Parent(s): c7ba381

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -92
app.py CHANGED
@@ -1,112 +1,41 @@
1
- # Step 1: import
2
-
3
-
4
- import pandas as pd
5
- import numpy as np
6
- import string
7
- import re
8
  import gradio as gr
9
-
10
- import matplotlib.pyplot as plt
11
- import seaborn as sns
12
-
13
  from nltk.stem import PorterStemmer
14
  from nltk.corpus import stopwords
15
  import nltk
16
- nltk.download('stopwords')
17
-
18
- from sklearn.model_selection import train_test_split
19
- from sklearn.feature_extraction.text import TfidfVectorizer
20
-
21
- from sklearn.linear_model import LogisticRegression
22
- from sklearn.ensemble import RandomForestClassifier
23
- from sklearn.svm import LinearSVC
24
-
25
- from sklearn.metrics import accuracy_score, confusion_matrix
26
 
27
- # Step 2: Load data
28
- true = pd.read_csv('True.csv', on_bad_lines='skip')
29
- fake = pd.read_csv('Fake.csv', on_bad_lines='skip')
30
 
31
- true['label'] = 1 # real
32
- fake['label'] = 0 # fake
 
33
 
34
- df = pd.concat([true, fake]).sample(frac=1).reset_index(drop=True)
35
- df = df[['title', 'text', 'label']]
36
 
37
- # Combine title and text
38
- df['content'] = df['title'] + " " + df['text']
39
-
40
- # Step 3: NLP Cleaning
41
- stop_words = set(stopwords.words('english'))
42
  stemmer = PorterStemmer()
 
43
 
44
  def clean_text(text):
45
  text = text.lower()
46
- text = re.sub(r'\[.*?\]', '', text) # remove brackets
47
- text = re.sub(r'https?://\S+|www\.\S+', '', text) # remove links
48
- text = re.sub(r'<.*?>+', '', text) # remove html tags
49
- text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text) # remove punctuation
50
- text = re.sub(r'\n', '', text) # remove newlines
51
- text = re.sub(r'\w*\d\w*', '', text) # remove words with digits
52
- words = text.split()
53
- words = [stemmer.stem(word) for word in words if word not in stop_words]
54
- return ' '.join(words)
55
-
56
- df['cleaned'] = df['content'].apply(clean_text)
57
-
58
- # Step 4: Train-Test Split
59
- X = df['cleaned']
60
- y = df['label']
61
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
62
-
63
- # Step 5: TF-IDF Vectorizer
64
- vectorizer = TfidfVectorizer(max_df=0.7)
65
- X_train_tfidf = vectorizer.fit_transform(X_train)
66
- X_test_tfidf = vectorizer.transform(X_test)
67
-
68
- # Step 6: Models
69
- models = {
70
- "Logistic Regression": LogisticRegression(),
71
- "Random Forest": RandomForestClassifier(n_estimators=100),
72
- "SVM": LinearSVC()
73
- }
74
-
75
- # Train and evaluate
76
- results = {}
77
- for name, model in models.items():
78
- model.fit(X_train_tfidf, y_train)
79
- preds = model.predict(X_test_tfidf)
80
- acc = accuracy_score(y_test, preds)
81
- results[name] = {"model": model, "accuracy": acc}
82
- print(f"{name} Accuracy: {acc:.4f}")
83
-
84
- # Plot confusion matrix for best model
85
- best_model_name = max(results, key=lambda x: results[x]['accuracy'])
86
- best_model = results[best_model_name]['model']
87
- y_pred = best_model.predict(X_test_tfidf)
88
- cm = confusion_matrix(y_test, y_pred)
89
-
90
- plt.figure(figsize=(5, 4))
91
- sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
92
- plt.title(f"Confusion Matrix - {best_model_name}")
93
- plt.xlabel('Predicted')
94
- plt.ylabel('Actual')
95
- plt.show()
96
 
97
- # Step 7: Gradio Web App
98
  def predict_news(text):
99
- cleaned_text = clean_text(text)
100
- vectorized = vectorizer.transform([cleaned_text])
101
- prediction = best_model.predict(vectorized)[0]
102
- return "Real News 🟒" if prediction == 1 else "Fake News πŸ”΄"
103
 
104
- iface = gr.Interface(
105
  fn=predict_news,
106
- inputs="text",
107
  outputs="text",
108
  title="πŸ“° Fake News Detector",
109
- description="Enter a news headline or content to check if it's real or fake."
110
  )
111
 
112
- iface.launch()
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import pickle
3
+ import string
 
 
4
  from nltk.stem import PorterStemmer
5
  from nltk.corpus import stopwords
6
  import nltk
 
 
 
 
 
 
 
 
 
 
7
 
8
+ nltk.download("stopwords")
 
 
9
 
10
+ # Load model and vectorizer
11
+ with open("model.pkl", "rb") as f:
12
+ model = pickle.load(f)
13
 
14
+ with open("vectorizer.pkl", "rb") as f:
15
+ vectorizer = pickle.load(f)
16
 
 
 
 
 
 
17
  stemmer = PorterStemmer()
18
+ stop_words = stopwords.words("english")
19
 
20
  def clean_text(text):
21
  text = text.lower()
22
+ text = "".join([c for c in text if c not in string.punctuation])
23
+ tokens = text.split()
24
+ tokens = [stemmer.stem(word) for word in tokens if word not in stop_words]
25
+ return " ".join(tokens)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
 
27
  def predict_news(text):
28
+ cleaned = clean_text(text)
29
+ vectorized = vectorizer.transform([cleaned])
30
+ prediction = model.predict(vectorized)[0]
31
+ return "βœ… Real News" if prediction == 1 else "🚨 Fake News"
32
 
33
+ demo = gr.Interface(
34
  fn=predict_news,
35
+ inputs=gr.Textbox(lines=10, placeholder="Paste a news article here..."),
36
  outputs="text",
37
  title="πŸ“° Fake News Detector",
38
+ description="Paste any news content to classify it as Real or Fake."
39
  )
40
 
41
+ demo.launch()