# pipeline_v2.py import os import re import pandas as pd try: import yfinance as yf except Exception as e: raise ImportError( "yfinance is not installed. Add `yfinance>=0.2.40` to requirements.txt." ) from e def _ensure_dir(path: str) -> None: os.makedirs(path, exist_ok=True) def _ticker_for_query(t: str) -> str: """ Prepare ticker for yfinance: - strip spaces - uppercase - DO NOT alter '.' or '-' (yfinance relies on them, e.g. NESN.SW, BRK-B) """ return t.strip().upper() def _ticker_for_filename(t: str) -> str: """ Prepare a safe filename: - replace any char not [A-Za-z0-9] with '_' """ return re.sub(r"[^A-Za-z0-9]", "_", t) def update_ticker_csv( ticker: str, start: str = "2015-01-01", interval: str = "1d", dst_dir: str = "/mnt/data" ) -> str: """ Download OHLCV for `ticker` using yfinance and save as CSV. Returns the CSV file path. """ _ensure_dir(dst_dir) tkr_query = _ticker_for_query(ticker) tkr_file = _ticker_for_filename(tkr_query) df = yf.download( tkr_query, start=start, interval=interval, auto_adjust=False, progress=False, threads=True, ) if df is None or df.empty: raise ValueError( f"No data returned for ticker '{tkr_query}' (start={start}, interval={interval}). " "Check the symbol and exchange suffix (e.g., NESN.SW, BMW.DE, VOD.L)." ) # Ensure a clean Date index if not isinstance(df.index, pd.DatetimeIndex): df = df.reset_index() if "Date" in df.columns: df = df.set_index("Date") else: df.columns = ["Date"] + list(df.columns[1:]) df = df.set_index("Date") df.index.name = "Date" csv_path = os.path.join(dst_dir, f"{tkr_file}_{interval}.csv") df.to_csv(csv_path) return csv_path