import os import datetime as dt import numpy as np import pandas as pd import requests import streamlit as st # --- CONFIG --- API_URL = os.getenv("API_URL", "https://rjuro-hotel-cancel-api.hf.space/predict_batch") NUMERIC_FEATURES = [ 'lead_time','arrival_date_week_number','arrival_date_day_of_month', 'stays_in_weekend_nights','stays_in_week_nights','adults','children', 'babies','is_repeated_guest','previous_cancellations', 'previous_bookings_not_canceled','booking_changes','agent', 'days_in_waiting_list','adr','required_car_parking_spaces', 'total_of_special_requests','total_guests','total_nights','is_summer', 'previous_cancellation_rate' ] CATEGORICAL_FEATURES = [ 'hotel','meal','market_segment','distribution_channel', 'reserved_room_type','deposit_type','customer_type' ] ALL_FEATURES = NUMERIC_FEATURES + CATEGORICAL_FEATURES st.set_page_config(page_title="Weekly Cancellation Predictions", layout="wide") # --- SIMPLE SYNTH GENERATOR --- def synth_week(n_per_day=300, seed=42): rng = np.random.default_rng(seed) today = dt.date.today() all_rows = [] for i in range(1, 8): arr = today + dt.timedelta(days=i) week = int(arr.isocalendar().week) dom = arr.day is_summer = int(arr.month in [6,7,8]) n = n_per_day lead_time = np.clip(rng.gamma(2.0, 60.0, n).astype(int), 1, 365) wkd = rng.poisson(1.0, n) wk = rng.poisson(3.0, n) adults = np.maximum(1, rng.poisson(1.5, n)+1) children = rng.binomial(2, 0.15, n) babies = rng.binomial(1, 0.05, n) is_repeated_guest = rng.binomial(1, 0.12, n) prev_canc = rng.binomial(2, 0.05, n) prev_notc = rng.binomial(3, 0.15, n) booking_changes = rng.poisson(0.2, n) agent = rng.integers(0, 5, n) # 0≈direct wait_list = rng.binomial(5, 0.05, n) adr = np.clip(rng.normal(120, 35, n), 30, 450) parking = rng.binomial(1, 0.12, n) special_req = rng.poisson(0.6, n) total_nights = (wkd + wk).astype(int) total_guests = (adults + children + babies).astype(int) prev_rate = prev_canc / np.maximum(1e-6, (prev_canc + prev_notc + 1e-6)) def choice(vals, probs): p = np.array(probs, dtype=float); p = p / p.sum() return rng.choice(vals, p=p, size=n) hotel = choice(['City Hotel','Resort Hotel'], [0.7, 0.3]) meal = choice(['BB','HB','FB','SC'], [0.75,0.15,0.03,0.07]) market = choice(['Online TA','Direct','Corporate','Offline TA/TO'], [0.45,0.30,0.15,0.10]) channel = choice(['TA/TO','Direct','Corporate','GDS'], [0.5,0.3,0.15,0.05]) roomtype = choice(list("ABCDEFG"), [0.35,0.25,0.15,0.1,0.08,0.05,0.02]) deposit = choice(['No Deposit','Non Refund','Refundable'], [0.75,0.15,0.10]) cust = choice(['Transient','Contract','Group','Transient-Party'], [0.7,0.15,0.08,0.07]) df = pd.DataFrame({ 'lead_time': lead_time, 'arrival_date_week_number': week, 'arrival_date_day_of_month': dom, 'stays_in_weekend_nights': wkd, 'stays_in_week_nights': wk, 'adults': adults, 'children': children, 'babies': babies, 'is_repeated_guest': is_repeated_guest, 'previous_cancellations': prev_canc, 'previous_bookings_not_canceled': prev_notc, 'booking_changes': booking_changes, 'agent': agent, 'days_in_waiting_list': wait_list, 'adr': adr, 'required_car_parking_spaces': parking, 'total_of_special_requests': special_req, 'total_guests': total_guests, 'total_nights': total_nights, 'is_summer': is_summer, 'previous_cancellation_rate': prev_rate, 'hotel': hotel, 'meal': meal, 'market_segment': market, 'distribution_channel': channel, 'reserved_room_type': roomtype, 'deposit_type': deposit, 'customer_type': cust }) df.insert(0, "arrival_date", pd.Timestamp(arr)) all_rows.append(df) return pd.concat(all_rows, ignore_index=True) def call_api(df: pd.DataFrame) -> np.ndarray: payload = {"data": df[ALL_FEATURES].to_dict(orient="records")} r = requests.post(API_URL, json=payload, timeout=60) r.raise_for_status() return np.array(r.json()["probabilities"]) # --- UI --- st.title("Weekly Booking Predictions") st.caption("API: " + API_URL) with st.sidebar: st.header("Simulation") n_per_day = st.slider("Synthetic bookings per day", 50, 2000, 400, 50) t_low = st.slider("Reminder threshold", 0.05, 0.60, 0.30, 0.01) t_high = st.slider("Perk (prepay upgrade) threshold", 0.30, 0.95, 0.65, 0.01) seed = st.number_input("Random seed", 0, 99999, 42, 1) st.caption("Rules: p ≥ t_high → Perk; t_low ≤ p < t_high → Reminder; else → None.") cols = st.columns(2) with cols[0]: if st.button("Generate & Predict", use_container_width=True): df = synth_week(n_per_day=n_per_day, seed=int(seed)) probs = call_api(df) df['pred_cancel_prob'] = probs df['action'] = np.where( probs >= t_high, "Perk-Upgrade (Prepay)", np.where(probs >= t_low, "Reminder", "None") ) daily = ( df.groupby(df['arrival_date'].dt.date) .agg(n_bookings=('arrival_date','count'), mean_risk=('pred_cancel_prob','mean'), p75=('pred_cancel_prob', lambda x: np.quantile(x, 0.75)), n_perk=('action', lambda s: (s=="Perk-Upgrade (Prepay)").sum()), n_reminder=('action', lambda s: (s=="Reminder").sum()), n_none=('action', lambda s: (s=="None").sum())) .reset_index() .rename(columns={'arrival_date':'date'}) ) st.subheader("Daily Summary (Next 7 Days)") st.dataframe(daily, use_container_width=True, hide_index=True) st.subheader("Preview: First 200 Bookings with Suggested Actions") st.dataframe( df[['arrival_date','hotel','market_segment','deposit_type','lead_time', 'total_nights','total_guests','pred_cancel_prob','action']].head(200), use_container_width=True, hide_index=True ) st.download_button( "Download Full Weekly Predictions (CSV)", df.to_csv(index=False).encode("utf-8"), file_name="weekly_predictions_with_actions.csv", mime="text/csv" ) with cols[1]: st.subheader("How it works") st.markdown( "- Synthetic bookings for the next 7 days\n" "- Calls the public FastAPI to get cancellation probabilities\n" "- Simple rules pick suggested actions" )