Spaces:
Runtime error
Runtime error
Sigrid De los Santos
commited on
Commit
·
24bf2bc
1
Parent(s):
1353a1f
debugging for analysis tables
Browse files- src/news_analysis.py +58 -58
src/news_analysis.py
CHANGED
|
@@ -213,74 +213,74 @@ def tavily_search(query, days, max_results=10):
|
|
| 213 |
return response.json()
|
| 214 |
|
| 215 |
# === Smart News Search ===
|
| 216 |
-
def fetch_deep_news(topic, days):
|
| 217 |
-
print(f"[DEBUG] Fetching mock articles for topic: {topic}")
|
| 218 |
-
return [
|
| 219 |
-
{
|
| 220 |
-
"title": "Startup raises $100M Series A funding",
|
| 221 |
-
"url": "https://example.com/funding",
|
| 222 |
-
"summary": "The startup raised $100M in Series A funding to expand operations.",
|
| 223 |
-
"date": "2025-07-20"
|
| 224 |
-
},
|
| 225 |
-
{
|
| 226 |
-
"title": "Company faces regulatory lawsuit",
|
| 227 |
-
"url": "https://example.com/lawsuit",
|
| 228 |
-
"summary": "The company is facing lawsuits due to environmental violations.",
|
| 229 |
-
"date": "2025-07-19"
|
| 230 |
-
}
|
| 231 |
-
]
|
| 232 |
-
|
| 233 |
# def fetch_deep_news(topic, days):
|
| 234 |
-
#
|
| 235 |
-
#
|
| 236 |
-
|
| 237 |
-
#
|
| 238 |
-
#
|
| 239 |
-
#
|
| 240 |
-
#
|
| 241 |
-
#
|
| 242 |
-
#
|
| 243 |
-
#
|
| 244 |
-
#
|
|
|
|
|
|
|
|
|
|
| 245 |
# ]
|
| 246 |
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
|
| 254 |
-
|
| 255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
|
| 257 |
-
|
|
|
|
| 258 |
|
| 259 |
-
|
| 260 |
-
# try:
|
| 261 |
-
# print(f"🔍 Tavily query: {query}")
|
| 262 |
-
# response = tavily_search(query, days)
|
| 263 |
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
|
|
|
| 267 |
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
# if url and url not in seen_urls and len(content) > 150:
|
| 272 |
-
# all_results.append({
|
| 273 |
-
# "title": item.get("title"),
|
| 274 |
-
# "url": url,
|
| 275 |
-
# "content": content
|
| 276 |
-
# })
|
| 277 |
-
# seen_urls.add(url)
|
| 278 |
|
| 279 |
-
|
| 280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
|
| 282 |
-
|
| 283 |
-
|
|
|
|
|
|
|
|
|
|
| 284 |
|
| 285 |
# === Generate Markdown Report ===
|
| 286 |
def generate_value_investor_report(topic, news_results, max_articles=20, max_chars_per_article=400):
|
|
|
|
| 213 |
return response.json()
|
| 214 |
|
| 215 |
# === Smart News Search ===
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
# def fetch_deep_news(topic, days):
|
| 217 |
+
# print(f"[DEBUG] Fetching mock articles for topic: {topic}")
|
| 218 |
+
# return [
|
| 219 |
+
# {
|
| 220 |
+
# "title": "Startup raises $100M Series A funding",
|
| 221 |
+
# "url": "https://example.com/funding",
|
| 222 |
+
# "summary": "The startup raised $100M in Series A funding to expand operations.",
|
| 223 |
+
# "date": "2025-07-20"
|
| 224 |
+
# },
|
| 225 |
+
# {
|
| 226 |
+
# "title": "Company faces regulatory lawsuit",
|
| 227 |
+
# "url": "https://example.com/lawsuit",
|
| 228 |
+
# "summary": "The company is facing lawsuits due to environmental violations.",
|
| 229 |
+
# "date": "2025-07-19"
|
| 230 |
+
# }
|
| 231 |
# ]
|
| 232 |
|
| 233 |
+
def fetch_deep_news(topic, days):
|
| 234 |
+
all_results = []
|
| 235 |
+
seen_urls = set()
|
| 236 |
+
|
| 237 |
+
base_queries = [
|
| 238 |
+
topic,
|
| 239 |
+
f"{topic} AND startup",
|
| 240 |
+
f"{topic} AND acquisition OR merger OR funding",
|
| 241 |
+
f"{topic} AND CEO OR executive OR leadership",
|
| 242 |
+
f"{topic} AND venture capital OR Series A OR Series B",
|
| 243 |
+
f"{topic} AND government grant OR approval OR contract",
|
| 244 |
+
f"{topic} AND underrated OR small-cap OR micro-cap"
|
| 245 |
+
]
|
| 246 |
|
| 247 |
+
investor_queries = [
|
| 248 |
+
f"{topic} AND BlackRock OR Vanguard OR SoftBank",
|
| 249 |
+
f"{topic} AND Elon Musk OR Sam Altman OR Peter Thiel",
|
| 250 |
+
f"{topic} AND Berkshire Hathaway OR Warren Buffett",
|
| 251 |
+
f"{topic} AND institutional investor OR hedge fund",
|
| 252 |
+
]
|
| 253 |
|
| 254 |
+
related_terms = get_related_terms(topic)
|
| 255 |
+
synonym_queries = [f"{term} AND {kw}" for term in related_terms for kw in ["startup", "funding", "merger", "acquisition"]]
|
| 256 |
|
| 257 |
+
all_queries = base_queries + investor_queries + synonym_queries
|
|
|
|
|
|
|
|
|
|
| 258 |
|
| 259 |
+
for query in all_queries:
|
| 260 |
+
try:
|
| 261 |
+
print(f"🔍 Tavily query: {query}")
|
| 262 |
+
response = tavily_search(query, days)
|
| 263 |
|
| 264 |
+
if not isinstance(response, dict) or "results" not in response:
|
| 265 |
+
print(f"⚠️ Tavily API response issue: {response}")
|
| 266 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 267 |
|
| 268 |
+
for item in response.get("results", []):
|
| 269 |
+
url = item.get("url")
|
| 270 |
+
content = item.get("content", "") or item.get("summary", "") or item.get("title", "")
|
| 271 |
+
if url and url not in seen_urls and len(content) > 150:
|
| 272 |
+
all_results.append({
|
| 273 |
+
"title": item.get("title"),
|
| 274 |
+
"url": url,
|
| 275 |
+
"content": content
|
| 276 |
+
})
|
| 277 |
+
seen_urls.add(url)
|
| 278 |
|
| 279 |
+
except Exception as e:
|
| 280 |
+
print(f"⚠️ Tavily request failed for query '{query}': {e}")
|
| 281 |
+
|
| 282 |
+
print(f"📰 Total articles collected: {len(all_results)}")
|
| 283 |
+
return all_results
|
| 284 |
|
| 285 |
# === Generate Markdown Report ===
|
| 286 |
def generate_value_investor_report(topic, news_results, max_articles=20, max_chars_per_article=400):
|