Spaces:
Runtime error
Runtime error
| import os | |
| import pandas as pd | |
| from datetime import datetime | |
| from dotenv import load_dotenv | |
| from md_html import convert_single_md_to_html as convert_md_to_html | |
| from news_analysis import fetch_deep_news, generate_value_investor_report | |
| from fin_interpreter import analyze_article | |
| BASE_DIR = os.path.dirname(os.path.dirname(__file__)) | |
| DATA_DIR = os.path.join(BASE_DIR, "data") | |
| HTML_DIR = os.path.join(BASE_DIR, "html") | |
| os.makedirs(DATA_DIR, exist_ok=True) | |
| os.makedirs(HTML_DIR, exist_ok=True) | |
| load_dotenv() | |
| def derive_priority(sentiment, confidence): | |
| if sentiment == "Positive" and confidence > 0.7: | |
| return "High" | |
| elif sentiment == "Negative" and confidence > 0.6: | |
| return "High" | |
| elif confidence > 0.5: | |
| return "Medium" | |
| return "Low" | |
| def run_value_investing_analysis(csv_path, progress_callback=None): | |
| current_df = pd.read_csv(csv_path) | |
| all_articles = [] | |
| company_data = [] | |
| for _, row in current_df.iterrows(): | |
| topic = row.get("topic") | |
| timespan = row.get("timespan_days", 7) | |
| if progress_callback: | |
| progress_callback(f"π Processing: {topic} ({timespan} days)") | |
| news = fetch_deep_news(topic, timespan) | |
| if not news: | |
| continue | |
| for article in news: | |
| summary = article.get("summary", "") | |
| title = article.get("title", "Untitled") | |
| url = article.get("url", "") | |
| date = article.get("date", datetime.now().strftime("%Y-%m-%d")) | |
| try: | |
| result = analyze_article(summary) | |
| sentiment = result.get("sentiment", "Neutral") | |
| confidence = float(result.get("confidence", 0.0)) | |
| except Exception as e: | |
| print(f"[FinBERT ERROR] {e}") | |
| sentiment, confidence = "Neutral", 0.0 | |
| priority = derive_priority(sentiment, confidence) | |
| # Add to articles_df | |
| all_articles.append({ | |
| "Title": title, | |
| "URL": url, | |
| "Summary": summary, | |
| "Priority": priority, | |
| "Date": date, | |
| }) | |
| # Collect company-level data for insights | |
| company_data.append({ | |
| "Company": topic, # For now, use topic as company proxy | |
| "Sentiment": sentiment, | |
| "Confidence": confidence, | |
| "Summary": summary, | |
| }) | |
| # Save markdown report | |
| report_body = generate_value_investor_report(topic, news) | |
| filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}.md" | |
| filepath = os.path.join(DATA_DIR, filename) | |
| with open(filepath, "w", encoding="utf-8") as f: | |
| f.write(report_body) | |
| return all_articles, company_data | |
| def build_company_insights(company_data): | |
| if not company_data: | |
| return pd.DataFrame() | |
| df = pd.DataFrame(company_data) | |
| insights = [] | |
| for company, group in df.groupby("Company"): | |
| mentions = len(group) | |
| dominant_sentiment = group["Sentiment"].mode()[0] if not group["Sentiment"].mode().empty else "Neutral" | |
| avg_confidence = round(group["Confidence"].mean(), 2) | |
| highlights = " | ".join(group["Summary"].head(2).tolist()) | |
| insights.append({ | |
| "Company": company, | |
| "Mentions": mentions, | |
| "Sentiment": dominant_sentiment, | |
| "Confidence": avg_confidence, | |
| "Highlights": highlights | |
| }) | |
| return pd.DataFrame(insights) | |
| def run_pipeline(csv_path, tavily_api_key, progress_callback=None): | |
| os.environ["TAVILY_API_KEY"] = tavily_api_key | |
| all_articles, company_data = run_value_investing_analysis(csv_path, progress_callback) | |
| # Convert markdown to HTML | |
| html_paths = [] | |
| for md_file in os.listdir(DATA_DIR): | |
| if md_file.endswith(".md"): | |
| convert_md_to_html(os.path.join(DATA_DIR, md_file), HTML_DIR) | |
| html_paths.append(os.path.join(HTML_DIR, md_file.replace(".md", ".html"))) | |
| articles_df = pd.DataFrame(all_articles) | |
| insights_df = build_company_insights(company_data) | |
| return html_paths, articles_df, insights_df | |
| # import os | |
| # import pandas as pd | |
| # from datetime import datetime | |
| # from dotenv import load_dotenv | |
| # import traceback | |
| # from md_html import convert_single_md_to_html as convert_md_to_html | |
| # from news_analysis import fetch_deep_news, generate_value_investor_report | |
| # from csv_utils import detect_changes | |
| # from fin_interpreter import analyze_article | |
| # BASE_DIR = os.path.dirname(os.path.dirname(__file__)) | |
| # DATA_DIR = os.path.join(BASE_DIR, "data") | |
| # HTML_DIR = os.path.join(BASE_DIR, "html") | |
| # CSV_PATH = os.path.join(BASE_DIR, "investing_topics.csv") | |
| # os.makedirs(DATA_DIR, exist_ok=True) | |
| # os.makedirs(HTML_DIR, exist_ok=True) | |
| # load_dotenv() | |
| # def build_metrics_box(topic, num_articles): | |
| # now = datetime.now().strftime("%Y-%m-%d %H:%M") | |
| # return f""" | |
| # > Topic: `{topic}` | |
| # > Articles Collected: `{num_articles}` | |
| # > Generated: `{now}` | |
| # > | |
| # """ | |
| # def run_value_investing_analysis(csv_path, progress_callback=None): | |
| # """ | |
| # Runs the analysis for all topics in the CSV. | |
| # Returns: | |
| # md_files (list of md file paths) | |
| # all_articles (list of article dicts) | |
| # """ | |
| # current_df = pd.read_csv(csv_path) | |
| # prev_path = os.path.join(BASE_DIR, "investing_topics_prev.csv") | |
| # if os.path.exists(prev_path): | |
| # previous_df = pd.read_csv(prev_path) | |
| # changed_df = detect_changes(current_df, previous_df) | |
| # if changed_df.empty: | |
| # if progress_callback: | |
| # progress_callback("β No changes detected. Skipping processing.") | |
| # return [], [] | |
| # else: | |
| # changed_df = current_df | |
| # new_md_files = [] | |
| # all_articles = [] | |
| # for _, row in changed_df.iterrows(): | |
| # topic = row.get("topic") | |
| # timespan = row.get("timespan_days", 7) | |
| # msg = f"π Processing: {topic} ({timespan} days)" | |
| # print(msg) | |
| # if progress_callback: | |
| # progress_callback(msg) | |
| # news = fetch_deep_news(topic, timespan) | |
| # if not news: | |
| # warning = f"β οΈ No news found for: {topic}" | |
| # print(warning) | |
| # if progress_callback: | |
| # progress_callback(warning) | |
| # continue | |
| # # Add articles to all_articles | |
| # for article in news: | |
| # try: | |
| # res = analyze_article(article.get("summary", "")) | |
| # if isinstance(res, dict): | |
| # sentiment = res.get("sentiment") | |
| # confidence = res.get("confidence") | |
| # signal = res.get("signal") | |
| # else: | |
| # sentiment, confidence, signal = res[0], res[1], res[2] | |
| # except Exception as e: | |
| # sentiment, confidence, signal = "Unknown", 0.0, "None" | |
| # print(f"Error analyzing article: {e}") | |
| # all_articles.append({ | |
| # "Title": article.get("title"), | |
| # "URL": article.get("url"), | |
| # "Summary": article.get("summary"), | |
| # "Priority": article.get("priority", "Low"), | |
| # "Date": article.get("date"), | |
| # "Company": article.get("company", topic), | |
| # "Sentiment": sentiment, | |
| # "Confidence": confidence, | |
| # "Signal": signal | |
| # }) | |
| # # Generate report | |
| # report_body = generate_value_investor_report(topic, news) | |
| # metrics_md = build_metrics_box(topic, len(news)) | |
| # full_md = metrics_md + report_body | |
| # filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}.md" | |
| # filepath = os.path.join(DATA_DIR, filename) | |
| # counter = 1 | |
| # while os.path.exists(filepath): | |
| # filename = f"{topic.replace(' ', '_').lower()}_{datetime.now().strftime('%Y-%m-%d')}_{counter}.md" | |
| # filepath = os.path.join(DATA_DIR, filename) | |
| # counter += 1 | |
| # with open(filepath, "w", encoding="utf-8") as f: | |
| # f.write(full_md) | |
| # new_md_files.append(filepath) | |
| # if progress_callback: | |
| # progress_callback(f"β Markdown saved to: {DATA_DIR}") | |
| # current_df.to_csv(prev_path, index=False) | |
| # return new_md_files, all_articles | |
| # def run_pipeline(csv_path, tavily_api_key, progress_callback=None): | |
| # os.environ["TAVILY_API_KEY"] = tavily_api_key | |
| # new_md_files, all_articles = run_value_investing_analysis(csv_path, progress_callback) | |
| # new_html_paths = [] | |
| # for md_path in new_md_files: | |
| # convert_md_to_html(md_path, HTML_DIR) | |
| # html_path = os.path.join(HTML_DIR, os.path.basename(md_path).replace(".md", ".html")) | |
| # new_html_paths.append(html_path) | |
| # articles_df = pd.DataFrame(all_articles) | |
| # insights_df = build_company_insights(articles_df) | |
| # return new_html_paths, articles_df, insights_df | |
| # def build_company_insights(articles_df): | |
| # if articles_df.empty: | |
| # return pd.DataFrame() | |
| # grouped = ( | |
| # articles_df.groupby("Company") | |
| # .agg({ | |
| # "Title": "count", | |
| # "Sentiment": lambda x: x.mode()[0] if not x.mode().empty else "Neutral", | |
| # "Signal": lambda x: x.mode()[0] if not x.mode().empty else "Watch" | |
| # }) | |
| # .reset_index() | |
| # .rename(columns={"Title": "Mentions"}) | |
| # ) | |
| # return grouped | |
| # if __name__ == "__main__": | |
| # md_files, _ = run_value_investing_analysis(CSV_PATH) | |
| # for md in md_files: | |
| # convert_md_to_html(md, HTML_DIR) | |
| # print(f"π All reports converted to HTML at: {HTML_DIR}") | |