volatilitypredictor / pipeline_v2.py
Gil Stetler
fix
89cf40b
# pipeline_v2.py
import os
import re
import pandas as pd
try:
import yfinance as yf
except Exception as e:
raise ImportError(
"yfinance is not installed. Add `yfinance>=0.2.40` to requirements.txt."
) from e
def _ensure_dir(path: str) -> None:
os.makedirs(path, exist_ok=True)
def _ticker_for_query(t: str) -> str:
"""
Prepare ticker for yfinance:
- strip spaces
- uppercase
- DO NOT alter '.' or '-' (yfinance relies on them, e.g. NESN.SW, BRK-B)
"""
return t.strip().upper()
def _ticker_for_filename(t: str) -> str:
"""
Prepare a safe filename:
- replace any char not [A-Za-z0-9] with '_'
"""
return re.sub(r"[^A-Za-z0-9]", "_", t)
def update_ticker_csv(
ticker: str,
start: str = "2015-01-01",
interval: str = "1d",
dst_dir: str = "/mnt/data"
) -> str:
"""
Download OHLCV for `ticker` using yfinance and save as CSV.
Returns the CSV file path.
"""
_ensure_dir(dst_dir)
tkr_query = _ticker_for_query(ticker)
tkr_file = _ticker_for_filename(tkr_query)
df = yf.download(
tkr_query,
start=start,
interval=interval,
auto_adjust=False,
progress=False,
threads=True,
)
if df is None or df.empty:
raise ValueError(
f"No data returned for ticker '{tkr_query}' (start={start}, interval={interval}). "
"Check the symbol and exchange suffix (e.g., NESN.SW, BMW.DE, VOD.L)."
)
# Ensure a clean Date index
if not isinstance(df.index, pd.DatetimeIndex):
df = df.reset_index()
if "Date" in df.columns:
df = df.set_index("Date")
else:
df.columns = ["Date"] + list(df.columns[1:])
df = df.set_index("Date")
df.index.name = "Date"
csv_path = os.path.join(dst_dir, f"{tkr_file}_{interval}.csv")
df.to_csv(csv_path)
return csv_path