Spaces:
Sleeping
Sleeping
| # utils_vol.py — robuste Version | |
| import yfinance as yf | |
| import numpy as np | |
| import pandas as pd | |
| def _to_1d_series(obj: pd.Series | pd.DataFrame) -> pd.Series: | |
| """ | |
| Erzwingt eine 1D-Serie: | |
| - DataFrame (n,1) -> squeeze | |
| - MultiIndex -> erste passende Spalte | |
| - alles in float konvertieren, NaNs droppen | |
| """ | |
| if isinstance(obj, pd.DataFrame): | |
| # (n,1) -> Serie | |
| if obj.shape[1] == 1: | |
| ser = obj.squeeze(axis=1) | |
| else: | |
| # Fallback: nimm die erste numerische Spalte | |
| num_cols = obj.select_dtypes(include=[np.number]).columns | |
| if len(num_cols) > 0: | |
| ser = obj[num_cols[0]] | |
| else: | |
| # nimm einfach die erste Spalte | |
| ser = obj.iloc[:, 0] | |
| else: | |
| ser = obj | |
| ser = pd.to_numeric(ser, errors="coerce") | |
| ser = ser.dropna() | |
| # Index in DatetimeIndex verwandeln, wenn möglich | |
| if not isinstance(ser.index, pd.DatetimeIndex): | |
| try: | |
| ser.index = pd.to_datetime(ser.index, errors="coerce") | |
| ser = ser[ser.index.notna()] | |
| except Exception: | |
| # notfalls RangeIndex lassen | |
| pass | |
| return ser.astype(float) | |
| def fetch_close_series(ticker: str, start: str = "2015-01-01", interval: str = "1d") -> pd.Series: | |
| """ | |
| Lädt OHLCV via yfinance und gibt eine 1D-Schlusskurs-Serie zurück. | |
| Nutzt auto_adjust=True (aktuelles yfinance-Default) bewusst, | |
| damit der FutureWarning verschwindet und Adjusted/Close konsistent ist. | |
| """ | |
| df = yf.download( | |
| ticker.strip(), | |
| start=start, | |
| interval=interval, | |
| auto_adjust=True, # explizit setzen, um Warnung zu vermeiden | |
| progress=False, | |
| threads=True, | |
| ) | |
| if df is None or df.empty: | |
| raise ValueError(f"Keine Daten für {ticker} (start={start}, interval={interval}).") | |
| # MultiIndex-Handling (bei mehreren Tickern oder Börsen-Suffixen) | |
| if isinstance(df.columns, pd.MultiIndex): | |
| # versuche 'Close' auf Level 0 | |
| if "Close" in df.columns.get_level_values(0): | |
| sub = df.xs("Close", axis=1, level=0) | |
| # falls mehrere Spalten (mehrere Ticker): nimm die erste | |
| if sub.shape[1] > 1: | |
| sub = sub.iloc[:, 0] | |
| return _to_1d_series(sub) | |
| # Fallback: erste numerische Spalte | |
| num_cols = df.select_dtypes(include=[np.number]).columns | |
| if len(num_cols) > 0: | |
| sub = df[num_cols[0]] | |
| return _to_1d_series(sub) | |
| # letzter Ausweg: erste Spalte | |
| return _to_1d_series(df.iloc[:, 0]) | |
| # Flache Spalten | |
| for name in ["Close", "Adj Close", "close", "adj close", "Price", "price"]: | |
| if name in df.columns: | |
| return _to_1d_series(df[name]) | |
| # Fallback: erste numerische Spalte | |
| num_cols = df.select_dtypes(include=[np.number]).columns | |
| if len(num_cols) == 0: | |
| raise ValueError("Keine numerische Close-Spalte gefunden.") | |
| return _to_1d_series(df[num_cols[0]]) | |
| def realized_vol(close: pd.Series, window: int = 20, annualize: bool = True) -> pd.Series: | |
| """ | |
| 20-Tage-Rolling-Std der Logrenditen; gibt IMMER eine 1D-Serie zurück. | |
| """ | |
| close = _to_1d_series(close) | |
| r = np.log(close).diff().dropna() | |
| rv = r.rolling(window, min_periods=window).std() | |
| if annualize: | |
| rv = rv * np.sqrt(252.0) | |
| rv = rv.dropna() | |
| # Sicherheitshalber 1D | |
| return _to_1d_series(rv) | |
| def rv_to_autogluon_df(rv: pd.Series | pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| Formatiert Realized Vol als DataFrame für AutoGluon TimeSeries: | |
| columns: ['item_id', 'timestamp', 'target'] | |
| """ | |
| # Erzwinge Serie 1D | |
| rv = _to_1d_series(rv) | |
| # Werte & Index robust extrahieren | |
| values = np.asarray(rv.values).reshape(-1) # 1D | |
| idx = rv.index | |
| if not isinstance(idx, pd.DatetimeIndex): | |
| try: | |
| idx = pd.to_datetime(idx, errors="coerce") | |
| except Exception: | |
| # Fallback: generiere einfache Range-Dates | |
| idx = pd.date_range(start="2000-01-01", periods=len(values), freq="D") | |
| # gültige Punkte | |
| mask = ~np.isnan(values) | |
| df = pd.DataFrame({ | |
| "item_id": "series_1", | |
| "timestamp": idx[mask], | |
| "target": values[mask], | |
| }) | |
| # sortiert & ohne NaN-Timestamps | |
| df = df[df["timestamp"].notna()].sort_values("timestamp") | |
| return df | |