Spaces:
Runtime error
Runtime error
| import os | |
| import pandas as pd | |
| from datetime import datetime | |
| from dotenv import load_dotenv | |
| from md_html import convert_single_md_to_html as convert_md_to_html | |
| from news_analysis import fetch_deep_news, generate_value_investor_report | |
| from fin_interpreter import analyze_article | |
| BASE_DIR = os.path.dirname(os.path.dirname(__file__)) | |
| DATA_DIR = os.path.join(BASE_DIR, "data") | |
| HTML_DIR = os.path.join(BASE_DIR, "html") | |
| os.makedirs(DATA_DIR, exist_ok=True) | |
| os.makedirs(HTML_DIR, exist_ok=True) | |
| load_dotenv() | |
| # === Priority Logic === | |
| def derive_priority(sentiment, confidence): | |
| sentiment = sentiment.lower() | |
| if sentiment == "positive" and confidence > 0.7: | |
| return "High" | |
| if sentiment == "negative" and confidence > 0.6: | |
| return "High" | |
| if confidence > 0.5: | |
| return "Medium" | |
| return "Low" | |
| # === Metrics Box === | |
| def build_metrics_box(topic, num_articles): | |
| now = datetime.now().strftime("%Y-%m-%d %H:%M") | |
| return f""" | |
| > **Topic:** `{topic}` | |
| > **Articles Collected:** `{num_articles}` | |
| > **Generated:** `{now}` | |
| --- | |
| """ | |
| # === Main Analysis === | |
| def run_value_investing_analysis(csv_path, progress_callback=None): | |
| current_df = pd.read_csv(csv_path) | |
| all_articles = [] | |
| company_data = [] | |
| for _, row in current_df.iterrows(): | |
| topic = row.get("topic") | |
| timespan = row.get("timespan_days", 7) | |
| # if progress_callback: | |
| # progress_callback(f"🔍 Processing topic: {topic} ({timespan} days)") | |
| # try: | |
| # news = fetch_deep_news(topic, timespan) | |
| # if progress_callback: | |
| # progress_callback(f"[DEBUG] fetch_deep_news returned {len(news) if news else 0} articles.") | |
| # except Exception as e: | |
| # if progress_callback: | |
| # progress_callback(f"[ERROR] fetch_deep_news failed: {e}") | |
| # continue | |
| try: | |
| news = fetch_deep_news(topic, timespan) | |
| except Exception as e: | |
| if progress_callback: | |
| progress_callback(f"[ERROR] fetch_deep_news failed: {e}") | |
| continue | |
| if not news: | |
| if progress_callback: | |
| progress_callback(f"⚠️ No news found for topic: {topic}") | |
| continue | |
| for article in news: | |
| summary = article.get("summary", "") or article.get("content", "") | |
| title = article.get("title", "Untitled") | |
| url = article.get("url", "") | |
| date = article.get("date", datetime.now().strftime("%Y-%m-%d")) | |
| try: | |
| result = analyze_article(summary) | |
| sentiment = result.get("sentiment", "Neutral") | |
| confidence = float(result.get("confidence", 0.0)) | |
| signal = result.get("investment_decision", "Watch") | |
| #if progress_callback: | |
| #progress_callback(f"📰 [{title[:50]}...] → Sentiment: {sentiment}, Confidence: {confidence}, Signal: {signal}") | |
| except Exception as e: | |
| if progress_callback: | |
| progress_callback(f"[FinBERT ERROR] {e}") | |
| sentiment, confidence, signal = "Neutral", 0.0, "Watch" | |
| priority = derive_priority(sentiment, confidence) | |
| all_articles.append({ | |
| "Title": title, | |
| "URL": url, | |
| "Summary": summary[:300] + "..." if summary else "", | |
| "Priority": priority, | |
| "Sentiment": sentiment, | |
| "Confidence": confidence, | |
| "Signal": signal, | |
| "Date": date | |
| }) | |
| company_data.append({ | |
| "Company": topic, | |
| "Sentiment": sentiment, | |
| "Confidence": confidence, | |
| "Signal": signal, | |
| "Summary": summary, | |
| "Priority": priority | |
| }) | |
| try: | |
| report_body = generate_value_investor_report(topic, news) | |
| metrics_md = build_metrics_box(topic, len(news)) | |
| full_md = metrics_md + report_body | |
| filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}.md" | |
| filepath = os.path.join(DATA_DIR, filename) | |
| with open(filepath, "w", encoding="utf-8") as f: | |
| f.write(full_md) | |
| except Exception as e: | |
| if progress_callback: | |
| progress_callback(f"[REPORT ERROR] {e}") | |
| return all_articles, company_data | |
| # === Insights Tab Data === | |
| def build_company_insights(company_data): | |
| if not company_data: | |
| return pd.DataFrame() | |
| df = pd.DataFrame(company_data) | |
| insights = [] | |
| for company, group in df.groupby("Company"): | |
| mentions = len(group) | |
| dominant_signal = group["Signal"].mode()[0] if not group["Signal"].mode().empty else "Watch" | |
| avg_confidence = round(group["Confidence"].mean(), 2) | |
| high_priority_ratio = round((group['Priority'] == 'High').sum() / len(group) * 100, 1) | |
| highlights = " | ".join(group["Summary"].head(2).tolist()) | |
| insights.append({ | |
| "Company": company, | |
| "Mentions": mentions, | |
| "Dominant Signal": dominant_signal, | |
| "Avg Confidence": avg_confidence, | |
| "Interest % (High Priority)": f"{high_priority_ratio}%", | |
| "Highlights": highlights | |
| }) | |
| return pd.DataFrame(insights).sort_values(by="Avg Confidence", ascending=False).head(5) | |
| # === Pipeline === | |
| def run_pipeline(csv_path, tavily_api_key, progress_callback=None): | |
| os.environ["TAVILY_API_KEY"] = tavily_api_key | |
| # === Clean old reports (MD and HTML) === | |
| for file in os.listdir(DATA_DIR): | |
| if file.endswith(".md"): | |
| os.remove(os.path.join(DATA_DIR, file)) | |
| for file in os.listdir(HTML_DIR): | |
| if file.endswith(".html"): | |
| os.remove(os.path.join(HTML_DIR, file)) | |
| # === Run the new analysis === | |
| all_articles, company_data = run_value_investing_analysis(csv_path, progress_callback) | |
| html_paths = [] | |
| for md_file in os.listdir(DATA_DIR): | |
| if md_file.endswith(".md"): | |
| convert_md_to_html(os.path.join(DATA_DIR, md_file), HTML_DIR) | |
| html_paths.append(os.path.join(HTML_DIR, md_file.replace(".md", ".html"))) | |
| articles_df = pd.DataFrame(all_articles) | |
| insights_df = build_company_insights(company_data) | |
| return html_paths, articles_df, insights_df | |
| # import os | |
| # import pandas as pd | |
| # from datetime import datetime | |
| # from dotenv import load_dotenv | |
| # from md_html import convert_single_md_to_html as convert_md_to_html | |
| # from news_analysis import fetch_deep_news, generate_value_investor_report | |
| # from fin_interpreter import analyze_article | |
| # BASE_DIR = os.path.dirname(os.path.dirname(__file__)) | |
| # DATA_DIR = os.path.join(BASE_DIR, "data") | |
| # HTML_DIR = os.path.join(BASE_DIR, "html") | |
| # os.makedirs(DATA_DIR, exist_ok=True) | |
| # os.makedirs(HTML_DIR, exist_ok=True) | |
| # load_dotenv() | |
| # # === Priority Logic === | |
| # def derive_priority(sentiment, confidence): | |
| # sentiment = sentiment.lower() | |
| # if sentiment == "positive" and confidence > 0.7: | |
| # return "High" | |
| # if sentiment == "negative" and confidence > 0.6: | |
| # return "High" | |
| # if confidence > 0.5: | |
| # return "Medium" | |
| # return "Low" | |
| # # === Main Analysis === | |
| # def run_value_investing_analysis(csv_path, progress_callback=None): | |
| # current_df = pd.read_csv(csv_path) | |
| # all_articles = [] | |
| # company_data = [] | |
| # for _, row in current_df.iterrows(): | |
| # topic = row.get("topic") | |
| # timespan = row.get("timespan_days", 7) | |
| # if progress_callback: | |
| # progress_callback(f"🔍 Processing topic: {topic} ({timespan} days)") | |
| # try: | |
| # news = fetch_deep_news(topic, timespan) | |
| # if progress_callback: | |
| # progress_callback(f"[DEBUG] fetch_deep_news returned {len(news) if news else 0} articles.") | |
| # except Exception as e: | |
| # if progress_callback: | |
| # progress_callback(f"[ERROR] fetch_deep_news failed: {e}") | |
| # continue | |
| # if not news: | |
| # if progress_callback: | |
| # progress_callback(f"⚠️ No news found for topic: {topic}") | |
| # continue | |
| # for article in news: | |
| # summary = article.get("summary", "") or article.get("content", "") | |
| # title = article.get("title", "Untitled") | |
| # url = article.get("url", "") | |
| # date = article.get("date", datetime.now().strftime("%Y-%m-%d")) | |
| # try: | |
| # result = analyze_article(summary) | |
| # sentiment = result.get("sentiment", "Neutral") | |
| # confidence = float(result.get("confidence", 0.0)) | |
| # if progress_callback: | |
| # progress_callback(f"📰 [{title[:50]}...] → Sentiment: {sentiment}, Confidence: {confidence}") | |
| # except Exception as e: | |
| # if progress_callback: | |
| # progress_callback(f"[FinBERT ERROR] {e}") | |
| # sentiment, confidence = "Neutral", 0.0 | |
| # priority = derive_priority(sentiment, confidence) | |
| # all_articles.append({ | |
| # "Title": title, | |
| # "URL": url, | |
| # "Summary": summary[:300] + "..." if summary else "", | |
| # "Priority": priority, | |
| # "Date": date, | |
| # "Sentiment": sentiment, | |
| # "Confidence": confidence | |
| # }) | |
| # company_data.append({ | |
| # "Company": topic, | |
| # "Sentiment": sentiment, | |
| # "Confidence": confidence, | |
| # "Summary": summary, | |
| # }) | |
| # try: | |
| # report_body = generate_value_investor_report(topic, news) | |
| # filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}.md" | |
| # filepath = os.path.join(DATA_DIR, filename) | |
| # with open(filepath, "w", encoding="utf-8") as f: | |
| # f.write(report_body) | |
| # except Exception as e: | |
| # if progress_callback: | |
| # progress_callback(f"[REPORT ERROR] {e}") | |
| # return all_articles, company_data | |
| # # === Insights Tab Data === | |
| # def build_company_insights(company_data): | |
| # if not company_data: | |
| # return pd.DataFrame() | |
| # df = pd.DataFrame(company_data) | |
| # insights = [] | |
| # for company, group in df.groupby("Company"): | |
| # mentions = len(group) | |
| # dominant_sentiment = group["Sentiment"].mode()[0] if not group["Sentiment"].mode().empty else "Neutral" | |
| # avg_confidence = round(group["Confidence"].mean(), 2) | |
| # highlights = " | ".join(group["Summary"].head(2).tolist()) | |
| # insights.append({ | |
| # "Company": company, | |
| # "Mentions": mentions, | |
| # "Sentiment": dominant_sentiment, | |
| # "Confidence": avg_confidence, | |
| # "Highlights": highlights | |
| # }) | |
| # return pd.DataFrame(insights).sort_values(by="Confidence", ascending=False).head(5) | |
| # # === Pipeline === | |
| # def run_pipeline(csv_path, tavily_api_key, progress_callback=None): | |
| # os.environ["TAVILY_API_KEY"] = tavily_api_key | |
| # all_articles, company_data = run_value_investing_analysis(csv_path, progress_callback) | |
| # html_paths = [] | |
| # for md_file in os.listdir(DATA_DIR): | |
| # if md_file.endswith(".md"): | |
| # convert_md_to_html(os.path.join(DATA_DIR, md_file), HTML_DIR) | |
| # html_paths.append(os.path.join(HTML_DIR, md_file.replace(".md", ".html"))) | |
| # articles_df = pd.DataFrame(all_articles) | |
| # insights_df = build_company_insights(company_data) | |
| # return html_paths, articles_df, insights_df | |