Spaces:

ihaveaplan66
/

news-analyzer

Runtime error

App Files Files Community

ihaveaplan66 commited on Mar 2

Commit

bdfd7d2

verified ·

1 Parent(s): 64a6581

Update main.py

Browse files

Files changed (1) hide show

main.py +99 -95

main.py CHANGED Viewed

@@ -1,95 +1,99 @@
-import requests
-from collections import Counter
-from transformers import pipeline
-import nltk
-from nltk.tokenize import word_tokenize
-from nltk.corpus import stopwords
-import string
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
-import torch
-nltk.download('punkt')
-nltk.download('stopwords')
-nltk.download('averaged_perceptron_tagger')
-nltk.download('punkt_tab')
-# 1. Function for getting news via NewsAPI
-def get_news(query, api_key, num_articles=5):
-    url = f'https://newsapi.org/v2/everything?q={query}&apiKey={api_key}&language=en&pageSize={num_articles}'
-    response = requests.get(url)
-    if response.status_code == 200:
-        return response.json()['articles']
-    return []
-# 2. Analyzing tone with Hugging Face
-tone_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", revision="714eb0f")
-def analyze_sentiment(text):
-    return tone_analyzer(text)[0]
-# 3. Define category
-category_model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/tweet-topic-21-multi")
-category_tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/tweet-topic-21-multi")
-labels = ['art', 'business', 'entertainment', 'environment', 'fashion', 'finance', 'food',
-          'health', 'law', 'media', 'military', 'music', 'politics', 'religion', 'sci/tech',
-          'sports', 'travel', 'weather', 'world news', 'none']
-def classify_category(text):
-    inputs = category_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
-    outputs = category_model(**inputs)
-    predicted_class = torch.argmax(outputs.logits, dim=1).item()
-    return labels[predicted_class]
-# 4. Summarization
-summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-def split_text(text, max_tokens=512):
-    words = text.split()
-    return [' '.join(words[i:i+max_tokens]) for i in range(0, len(words), max_tokens)]
-def summarize_text(text):
-    chunks = split_text(text)
-    summaries = [summarizer(chunk, max_length=100, min_length=30, do_sample=False)[0]['summary_text'] for chunk in chunks]
-    return ' '.join(summaries)
-# 5. Search for trending words
-def extract_trending_words(texts):
-    text = ' '.join(texts).lower()
-    words = word_tokenize(text)
-    words = [word for word in words if word not in stopwords.words('english') and word not in string.punctuation and len(word) > 1]
-    word_freq = Counter(words)
-    return word_freq.most_common(10)
-# 6. The main process of analyzing news
-def analyze_news(query, api_key, num_articles=5):
-    articles = get_news(query, api_key, num_articles)
-    if not articles:
-        return []
-    news_results = []
-    for article in articles:
-        title = article.get('title', 'No Title')
-        description = article.get('description', '') or ''
-        url = article.get('url', '#')
-        sentiment = analyze_sentiment(title + " " + description)['label']
-        category = classify_category(title + " " + description)
-        summary = summarize_text(title + " " + description)
-        news_results.append({
-            "title": title,
-            "url": url,
-            "sentiment": sentiment,
-            "category": category,
-            "summary": summary
-        })
-    return news_results

+import requests
+from collections import Counter
+from transformers import pipeline
+import nltk
+from nltk.tokenize import word_tokenize
+from nltk.corpus import stopwords
+import string
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+import torch
+import os
+nltk.data.path.append('/app/nltk_data')
+os.environ['TRANSFORMERS_CACHE'] = '/app/transformers_cache'
+nltk.download('punkt')
+nltk.download('stopwords')
+nltk.download('averaged_perceptron_tagger')
+nltk.download('punkt_tab')
+# 1. Function for getting news via NewsAPI
+def get_news(query, api_key, num_articles=5):
+    url = f'https://newsapi.org/v2/everything?q={query}&apiKey={api_key}&language=en&pageSize={num_articles}'
+    response = requests.get(url)
+    if response.status_code == 200:
+        return response.json()['articles']
+    return []
+# 2. Analyzing tone with Hugging Face
+tone_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", revision="714eb0f")
+def analyze_sentiment(text):
+    return tone_analyzer(text)[0]
+# 3. Define category
+category_model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/tweet-topic-21-multi")
+category_tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/tweet-topic-21-multi")
+labels = ['art', 'business', 'entertainment', 'environment', 'fashion', 'finance', 'food',
+          'health', 'law', 'media', 'military', 'music', 'politics', 'religion', 'sci/tech',
+          'sports', 'travel', 'weather', 'world news', 'none']
+def classify_category(text):
+    inputs = category_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+    outputs = category_model(**inputs)
+    predicted_class = torch.argmax(outputs.logits, dim=1).item()
+    return labels[predicted_class]
+# 4. Summarization
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+def split_text(text, max_tokens=512):
+    words = text.split()
+    return [' '.join(words[i:i+max_tokens]) for i in range(0, len(words), max_tokens)]
+def summarize_text(text):
+    chunks = split_text(text)
+    summaries = [summarizer(chunk, max_length=100, min_length=30, do_sample=False)[0]['summary_text'] for chunk in chunks]
+    return ' '.join(summaries)
+# 5. Search for trending words
+def extract_trending_words(texts):
+    text = ' '.join(texts).lower()
+    words = word_tokenize(text)
+    words = [word for word in words if word not in stopwords.words('english') and word not in string.punctuation and len(word) > 1]
+    word_freq = Counter(words)
+    return word_freq.most_common(10)
+# 6. The main process of analyzing news
+def analyze_news(query, api_key, num_articles=5):
+    articles = get_news(query, api_key, num_articles)
+    if not articles:
+        return []
+    news_results = []
+    for article in articles:
+        title = article.get('title', 'No Title')
+        description = article.get('description', '') or ''
+        url = article.get('url', '#')
+        sentiment = analyze_sentiment(title + " " + description)['label']
+        category = classify_category(title + " " + description)
+        summary = summarize_text(title + " " + description)
+        news_results.append({
+            "title": title,
+            "url": url,
+            "sentiment": sentiment,
+            "category": category,
+            "summary": summary
+        })
+    return news_results