Spaces:

Gilette
/

volatilitypredictor

Sleeping

App Files Files Community

Gil Stetler commited on 16 days ago

Commit

89cf40b

1 Parent(s): 682cd17

fix

Browse files

Files changed (2) hide show

app.py +20 -14
pipeline_v2.py +38 -23

app.py CHANGED Viewed

@@ -524,7 +524,7 @@ matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 from chronos import ChronosPipeline
-# --- our data pipeline ---
 import pipeline_v2 as pipe2  # update_ticker_csv(...)
 # --------------------
@@ -568,7 +568,7 @@ def _extract_dates(df: pd.DataFrame):
     # If index is DatetimeIndex, use it
     if isinstance(df.index, pd.DatetimeIndex):
         return df.index.to_numpy()
-    # Else look for a date-like column
     mapping = {c.lower(): c for c in df.columns}
     for name in ["date", "time", "timestamp"]:
         if name in mapping:
@@ -607,23 +607,28 @@ def run_for_ticker(tickers: str, start: str, interval: str, use_calibration: boo
     start: YYYY-MM-DD
     interval: '1d', '1wk', '1mo'
     """
-    # Parse first ticker
     tick_list = [t.strip() for t in tickers.replace(";", ",").replace("|", ",").split(",") if t.strip()]
     if not tick_list:
-        raise gr.Error("Please enter at least one ticker, e.g. AAPL")
-    ticker = tick_list[0]
     # 1) Fetch/update CSV via pipeline
     try:
         csv_path = pipe2.update_ticker_csv(ticker, start=start, interval=interval)
     except Exception as e:
-        raise gr.Error(f"Data fetch failed for '{ticker}': {e}")
     # 2) Load CSV and build realized vol
     try:
-        df = pd.read_csv(csv_path, index_col=0, parse_dates=[0])
     except Exception:
-        # Fallback if index parsing fails
         df = pd.read_csv(csv_path)
     dates = _extract_dates(df)
@@ -662,7 +667,6 @@ def run_for_ticker(tickers: str, start: str, interval: str, use_calibration: boo
     fig = plt.figure(figsize=(10, 4))
     H0 = len(rv_train)
-    # Align dates to rv length if we have real dates
     if isinstance(dates, np.ndarray) and len(dates) >= len(close):
         dates_rv = np.array(dates[-len(rv):])
         x_hist = dates_rv[:H0]
@@ -727,13 +731,14 @@ def run_for_ticker(tickers: str, start: str, interval: str, use_calibration: boo
 with gr.Blocks(title="Volatility Forecast • yfinance pipeline + Chronos") as demo:
     gr.Markdown(
         "### Predict last 30 days of realized volatility for any ticker\n"
-        "- Fetches data via **yfinance** using your `pipeline_v2.update_ticker_csv`.\n"
-        "- Forecast uses **Chronos-T5-Large** (single path, no mean/median).\n"
-        "- Compares day-by-day to actual RV and reports **MAPE/MPE/RMSE**.\n"
-        "- Optional **Bias/Scale Calibration (α)** to remove systematic bias."
     )
     with gr.Row():
-        tickers_in = gr.Textbox(value="AAPL", label="Tickers (comma-separated; first is evaluated)")
     with gr.Row():
         start_in = gr.Textbox(value="2015-01-01", label="Start date (YYYY-MM-DD)")
         interval_in = gr.Dropdown(choices=["1d", "1wk", "1mo"], value="1d", label="Interval")
@@ -750,3 +755,4 @@ with gr.Blocks(title="Volatility Forecast • yfinance pipeline + Chronos") as d
 if __name__ == "__main__":
     demo.launch()

 import matplotlib.pyplot as plt
 from chronos import ChronosPipeline
+# our data pipeline
 import pipeline_v2 as pipe2  # update_ticker_csv(...)
 # --------------------
     # If index is DatetimeIndex, use it
     if isinstance(df.index, pd.DatetimeIndex):
         return df.index.to_numpy()
+    # Else try a date-like column
     mapping = {c.lower(): c for c in df.columns}
     for name in ["date", "time", "timestamp"]:
         if name in mapping:
     start: YYYY-MM-DD
     interval: '1d', '1wk', '1mo'
     """
+    # Parse first ticker (keep dots and dashes!)
     tick_list = [t.strip() for t in tickers.replace(";", ",").replace("|", ",").split(",") if t.strip()]
     if not tick_list:
+        raise gr.Error("Please enter at least one ticker, e.g. AAPL or NESN.SW")
+    ticker = tick_list[0]  # keep original form; pipeline handles uppercasing
     # 1) Fetch/update CSV via pipeline
     try:
         csv_path = pipe2.update_ticker_csv(ticker, start=start, interval=interval)
     except Exception as e:
+        raise gr.Error(
+            f"Data fetch failed for '{ticker}'. Tip: ensure exchange suffixes (e.g., NESN.SW, BMW.DE, VOD.L).\n{e}"
+        )
     # 2) Load CSV and build realized vol
     try:
+        df = pd.read_csv(csv_path, index_col=0, parse_dates=True)
+        if not isinstance(df.index, pd.DatetimeIndex):
+            # last fallback
+            df = pd.read_csv(csv_path)
     except Exception:
         df = pd.read_csv(csv_path)
     dates = _extract_dates(df)
     fig = plt.figure(figsize=(10, 4))
     H0 = len(rv_train)
     if isinstance(dates, np.ndarray) and len(dates) >= len(close):
         dates_rv = np.array(dates[-len(rv):])
         x_hist = dates_rv[:H0]
 with gr.Blocks(title="Volatility Forecast • yfinance pipeline + Chronos") as demo:
     gr.Markdown(
         "### Predict last 30 days of realized volatility for any ticker\n"
+        "- Works with symbols like `AAPL`, `NESN.SW`, `BMW.DE`, `VOD.L`, `BRK-B`, `BTC-USD`.\n"
+        "- Data fetched via **yfinance** using your `pipeline_v2.update_ticker_csv`.\n"
+        "- Forecast uses **Chronos-T5-Large** (single path, deterministic seed).\n"
+        "- Day-by-day comparison with **MAPE/MPE/RMSE**.\n"
+        "- Optional **Bias/Scale Calibration (α)**."
     )
     with gr.Row():
+        tickers_in = gr.Textbox(value="AAPL", label="Ticker (you can use suffixes like NESN.SW, BMW.DE)")
     with gr.Row():
         start_in = gr.Textbox(value="2015-01-01", label="Start date (YYYY-MM-DD)")
         interval_in = gr.Dropdown(choices=["1d", "1wk", "1mo"], value="1d", label="Interval")
 if __name__ == "__main__":
     demo.launch()

pipeline_v2.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # pipeline_v2.py
 import os
-from typing import Tuple
 import pandas as pd
 try:
@@ -15,50 +15,65 @@ def _ensure_dir(path: str) -> None:
     os.makedirs(path, exist_ok=True)
-def _sanitize_ticker(t: str) -> str:
-    return t.strip().upper().replace(" ", "").replace("/", "-").replace(".", "-")
 def update_ticker_csv(
     ticker: str,
     start: str = "2015-01-01",
     interval: str = "1d",
-    dst_dir: str = "/mnt/data"  # HF Spaces writeable path
 ) -> str:
     """
     Download OHLCV for `ticker` using yfinance and save as CSV.
     Returns the CSV file path.
-    Args:
-        ticker: e.g. "AAPL"
-        start: "YYYY-MM-DD"
-        interval: "1d", "1wk", "1mo"
-        dst_dir: directory to write CSVs (default: /mnt/data for Spaces)
     """
     _ensure_dir(dst_dir)
-    tkr = _sanitize_ticker(ticker)
     df = yf.download(
-        tkr,
         start=start,
         interval=interval,
-        auto_adjust=False,   # keep explicit Adj Close; we’ll pick Close / Adj Close later
         progress=False,
         threads=True,
     )
     if df is None or df.empty:
-        raise ValueError(f"No data returned for ticker '{tkr}' with start={start}, interval={interval}.")
-    # Ensure a clean, single-index Date column
-    if isinstance(df.index, pd.DatetimeIndex):
-        df = df.copy()
-        df.index.name = "Date"
-    else:
-        df = df.reset_index().rename(columns={df.columns[0]: "Date"}).set_index("Date")
-    # Save
-    csv_path = os.path.join(dst_dir, f"{tkr}_{interval}.csv")
-    df.to_csv(csv_path)
     return csv_path

 # pipeline_v2.py
 import os
+import re
 import pandas as pd
 try:
     os.makedirs(path, exist_ok=True)
+def _ticker_for_query(t: str) -> str:
+    """
+    Prepare ticker for yfinance:
+    - strip spaces
+    - uppercase
+    - DO NOT alter '.' or '-' (yfinance relies on them, e.g. NESN.SW, BRK-B)
+    """
+    return t.strip().upper()
+def _ticker_for_filename(t: str) -> str:
+    """
+    Prepare a safe filename:
+    - replace any char not [A-Za-z0-9] with '_'
+    """
+    return re.sub(r"[^A-Za-z0-9]", "_", t)
 def update_ticker_csv(
     ticker: str,
     start: str = "2015-01-01",
     interval: str = "1d",
+    dst_dir: str = "/mnt/data"
 ) -> str:
     """
     Download OHLCV for `ticker` using yfinance and save as CSV.
     Returns the CSV file path.
     """
     _ensure_dir(dst_dir)
+    tkr_query = _ticker_for_query(ticker)
+    tkr_file = _ticker_for_filename(tkr_query)
     df = yf.download(
+        tkr_query,
         start=start,
         interval=interval,
+        auto_adjust=False,
         progress=False,
         threads=True,
     )
     if df is None or df.empty:
+        raise ValueError(
+            f"No data returned for ticker '{tkr_query}' (start={start}, interval={interval}). "
+            "Check the symbol and exchange suffix (e.g., NESN.SW, BMW.DE, VOD.L)."
+        )
+    # Ensure a clean Date index
+    if not isinstance(df.index, pd.DatetimeIndex):
+        df = df.reset_index()
+        if "Date" in df.columns:
+            df = df.set_index("Date")
+        else:
+            df.columns = ["Date"] + list(df.columns[1:])
+            df = df.set_index("Date")
+    df.index.name = "Date"
+    csv_path = os.path.join(dst_dir, f"{tkr_file}_{interval}.csv")
+    df.to_csv(csv_path)
     return csv_path