Spaces:
Sleeping
Sleeping
| # pipeline_v2.py | |
| import os | |
| import re | |
| import pandas as pd | |
| try: | |
| import yfinance as yf | |
| except Exception as e: | |
| raise ImportError( | |
| "yfinance is not installed. Add `yfinance>=0.2.40` to requirements.txt." | |
| ) from e | |
| def _ensure_dir(path: str) -> None: | |
| os.makedirs(path, exist_ok=True) | |
| def _ticker_for_query(t: str) -> str: | |
| """ | |
| Prepare ticker for yfinance: | |
| - strip spaces | |
| - uppercase | |
| - DO NOT alter '.' or '-' (yfinance relies on them, e.g. NESN.SW, BRK-B) | |
| """ | |
| return t.strip().upper() | |
| def _ticker_for_filename(t: str) -> str: | |
| """ | |
| Prepare a safe filename: | |
| - replace any char not [A-Za-z0-9] with '_' | |
| """ | |
| return re.sub(r"[^A-Za-z0-9]", "_", t) | |
| def update_ticker_csv( | |
| ticker: str, | |
| start: str = "2015-01-01", | |
| interval: str = "1d", | |
| dst_dir: str = "/mnt/data" | |
| ) -> str: | |
| """ | |
| Download OHLCV for `ticker` using yfinance and save as CSV. | |
| Returns the CSV file path. | |
| """ | |
| _ensure_dir(dst_dir) | |
| tkr_query = _ticker_for_query(ticker) | |
| tkr_file = _ticker_for_filename(tkr_query) | |
| df = yf.download( | |
| tkr_query, | |
| start=start, | |
| interval=interval, | |
| auto_adjust=False, | |
| progress=False, | |
| threads=True, | |
| ) | |
| if df is None or df.empty: | |
| raise ValueError( | |
| f"No data returned for ticker '{tkr_query}' (start={start}, interval={interval}). " | |
| "Check the symbol and exchange suffix (e.g., NESN.SW, BMW.DE, VOD.L)." | |
| ) | |
| # Ensure a clean Date index | |
| if not isinstance(df.index, pd.DatetimeIndex): | |
| df = df.reset_index() | |
| if "Date" in df.columns: | |
| df = df.set_index("Date") | |
| else: | |
| df.columns = ["Date"] + list(df.columns[1:]) | |
| df = df.set_index("Date") | |
| df.index.name = "Date" | |
| csv_path = os.path.join(dst_dir, f"{tkr_file}_{interval}.csv") | |
| df.to_csv(csv_path) | |
| return csv_path | |