Spaces:
Runtime error
Runtime error
Upload 2 files
Browse files- Dockerfile +27 -0
- streamlit_app.py +735 -0
Dockerfile
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.12-slim
|
| 2 |
+
|
| 3 |
+
ENV PYTHONDONTWRITEBYTECODE=1 \
|
| 4 |
+
PYTHONUNBUFFERED=1 \
|
| 5 |
+
PIP_NO_CACHE_DIR=1 \
|
| 6 |
+
PORT=8501
|
| 7 |
+
|
| 8 |
+
WORKDIR /app
|
| 9 |
+
|
| 10 |
+
# System deps
|
| 11 |
+
RUN apt-get update -y && apt-get install -y --no-install-recommends \
|
| 12 |
+
build-essential \
|
| 13 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 14 |
+
|
| 15 |
+
# Copy requirements first for caching
|
| 16 |
+
COPY requirements.txt /app/requirements.txt
|
| 17 |
+
RUN python -m pip install --upgrade pip && \
|
| 18 |
+
pip install -r requirements.txt
|
| 19 |
+
|
| 20 |
+
# Copy application
|
| 21 |
+
COPY . /app
|
| 22 |
+
|
| 23 |
+
EXPOSE 8501
|
| 24 |
+
|
| 25 |
+
CMD ["streamlit", "run", "streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0", "--browser.gatherUsageStats=false"]
|
| 26 |
+
|
| 27 |
+
|
streamlit_app.py
ADDED
|
@@ -0,0 +1,735 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import os
|
| 3 |
+
from datetime import datetime, date
|
| 4 |
+
from typing import Dict, List, Optional, Tuple
|
| 5 |
+
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import plotly.express as px
|
| 8 |
+
import streamlit as st
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# -----------------------------
|
| 12 |
+
# App Configuration
|
| 13 |
+
# -----------------------------
|
| 14 |
+
st.set_page_config(
|
| 15 |
+
page_title="Tableau de bord des inscriptions",
|
| 16 |
+
page_icon="🧭",
|
| 17 |
+
layout="wide",
|
| 18 |
+
initial_sidebar_state="expanded",
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# -----------------------------
|
| 23 |
+
# Utilities
|
| 24 |
+
# -----------------------------
|
| 25 |
+
def try_parse_datetime(series: pd.Series) -> pd.Series:
|
| 26 |
+
"""Attempt to parse a pandas Series as datetimes, returning original on failure."""
|
| 27 |
+
if pd.api.types.is_datetime64_any_dtype(series):
|
| 28 |
+
return series
|
| 29 |
+
try:
|
| 30 |
+
parsed = pd.to_datetime(series, errors="coerce")
|
| 31 |
+
if parsed.notna().sum() >= max(3, int(0.2 * len(parsed))):
|
| 32 |
+
return parsed
|
| 33 |
+
except Exception:
|
| 34 |
+
pass
|
| 35 |
+
return series
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def make_unique_columns(columns: List[str]) -> List[str]:
|
| 39 |
+
"""Ensure column names are unique by appending suffixes (2), (3), ..."""
|
| 40 |
+
seen: Dict[str, int] = {}
|
| 41 |
+
unique_cols: List[str] = []
|
| 42 |
+
for name in columns:
|
| 43 |
+
base = str(name)
|
| 44 |
+
if base not in seen:
|
| 45 |
+
seen[base] = 1
|
| 46 |
+
unique_cols.append(base)
|
| 47 |
+
else:
|
| 48 |
+
seen[base] += 1
|
| 49 |
+
unique_cols.append(f"{base} ({seen[base]})")
|
| 50 |
+
return unique_cols
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def normalize_label(text: str) -> str:
|
| 54 |
+
t = str(text).lower().strip()
|
| 55 |
+
t = t.replace("\u00a0", " ").replace(" ", " ")
|
| 56 |
+
t = " ".join(t.split())
|
| 57 |
+
return t
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def find_column(df: pd.DataFrame, candidates: List[str]) -> Optional[str]:
|
| 61 |
+
"""Return the first matching column by normalized name from candidates."""
|
| 62 |
+
norm_to_col = {normalize_label(c): c for c in df.columns}
|
| 63 |
+
for cand in candidates:
|
| 64 |
+
n = normalize_label(cand)
|
| 65 |
+
if n in norm_to_col:
|
| 66 |
+
return norm_to_col[n]
|
| 67 |
+
return None
|
| 68 |
+
|
| 69 |
+
def infer_pandas_types(df: pd.DataFrame) -> Dict[str, str]:
|
| 70 |
+
"""Return a mapping of column -> inferred logical type: 'categorical' | 'numeric' | 'date' | 'text'."""
|
| 71 |
+
type_map: Dict[str, str] = {}
|
| 72 |
+
for col in df.columns:
|
| 73 |
+
s = df[col]
|
| 74 |
+
if pd.api.types.is_datetime64_any_dtype(s):
|
| 75 |
+
type_map[col] = "date"
|
| 76 |
+
elif pd.api.types.is_bool_dtype(s):
|
| 77 |
+
type_map[col] = "categorical"
|
| 78 |
+
elif pd.api.types.is_numeric_dtype(s):
|
| 79 |
+
type_map[col] = "numeric"
|
| 80 |
+
else:
|
| 81 |
+
# try parse datetime heuristic
|
| 82 |
+
parsed = try_parse_datetime(s)
|
| 83 |
+
if pd.api.types.is_datetime64_any_dtype(parsed):
|
| 84 |
+
type_map[col] = "date"
|
| 85 |
+
else:
|
| 86 |
+
# if low cardinality, treat as categorical
|
| 87 |
+
nunique = s.astype(str).nunique(dropna=True)
|
| 88 |
+
type_map[col] = "categorical" if nunique <= max(50, len(s) * 0.05) else "text"
|
| 89 |
+
return type_map
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def dynamic_filters(df: pd.DataFrame, type_map: Dict[str, str]) -> pd.DataFrame:
|
| 93 |
+
"""Render dynamic filters for all columns and return the filtered DataFrame."""
|
| 94 |
+
filtered = df.copy()
|
| 95 |
+
st.sidebar.markdown("### 🔎 Filtres dynamiques")
|
| 96 |
+
for col in filtered.columns:
|
| 97 |
+
logical = type_map.get(col, "text")
|
| 98 |
+
if logical == "numeric" and pd.api.types.is_numeric_dtype(filtered[col]):
|
| 99 |
+
series_num = pd.to_numeric(filtered[col], errors="coerce")
|
| 100 |
+
valid = series_num.dropna()
|
| 101 |
+
if valid.empty:
|
| 102 |
+
st.sidebar.caption(f"{col}: aucune valeur numérique exploitable")
|
| 103 |
+
continue
|
| 104 |
+
min_v = float(valid.min())
|
| 105 |
+
max_v = float(valid.max())
|
| 106 |
+
if min_v == max_v:
|
| 107 |
+
st.sidebar.caption(f"{col}: valeur unique {min_v}")
|
| 108 |
+
# Filtrage inutile car une seule valeur
|
| 109 |
+
continue
|
| 110 |
+
vmin, vmax = st.sidebar.slider(f"{col} (min-max)", min_value=min_v, max_value=max_v, value=(min_v, max_v))
|
| 111 |
+
filtered = filtered[(series_num >= vmin) & (series_num <= vmax)]
|
| 112 |
+
elif logical == "date":
|
| 113 |
+
parsed = try_parse_datetime(filtered[col])
|
| 114 |
+
if pd.api.types.is_datetime64_any_dtype(parsed):
|
| 115 |
+
dmin = parsed.min()
|
| 116 |
+
dmax = parsed.max()
|
| 117 |
+
start_end = st.sidebar.date_input(f"{col} (période)", value=(dmin.date() if pd.notna(dmin) else date.today(), dmax.date() if pd.notna(dmax) else date.today()))
|
| 118 |
+
if isinstance(start_end, tuple) and len(start_end) == 2:
|
| 119 |
+
start, end = start_end
|
| 120 |
+
mask = (parsed.dt.date >= start) & (parsed.dt.date <= end)
|
| 121 |
+
filtered = filtered[mask]
|
| 122 |
+
else:
|
| 123 |
+
# categorical or text -> multiselect of unique values (with limit)
|
| 124 |
+
uniques = filtered[col].dropna().astype(str).unique().tolist()
|
| 125 |
+
uniques = sorted(uniques)[:200]
|
| 126 |
+
selected = st.sidebar.multiselect(f"{col}", options=uniques, default=[])
|
| 127 |
+
if selected:
|
| 128 |
+
filtered = filtered[filtered[col].astype(str).isin(selected)]
|
| 129 |
+
return filtered
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def apply_search(df: pd.DataFrame, query: str) -> pd.DataFrame:
|
| 133 |
+
if not query:
|
| 134 |
+
return df
|
| 135 |
+
q = query.strip().lower()
|
| 136 |
+
mask = pd.Series(False, index=df.index)
|
| 137 |
+
for col in df.columns:
|
| 138 |
+
col_values = df[col].astype(str).str.lower()
|
| 139 |
+
mask = mask | col_values.str.contains(q, na=False)
|
| 140 |
+
return df[mask]
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def to_excel_bytes(df: pd.DataFrame) -> bytes:
|
| 144 |
+
buffer = io.BytesIO()
|
| 145 |
+
with pd.ExcelWriter(buffer, engine="xlsxwriter") as writer:
|
| 146 |
+
df.to_excel(writer, index=False, sheet_name="inscriptions")
|
| 147 |
+
return buffer.getvalue()
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
def kpi_card(label: str, value: str):
|
| 151 |
+
st.markdown(
|
| 152 |
+
f"""
|
| 153 |
+
<div class="card kpi">
|
| 154 |
+
<div class="card-label">{label}</div>
|
| 155 |
+
<div class="card-value">{value}</div>
|
| 156 |
+
</div>
|
| 157 |
+
""",
|
| 158 |
+
unsafe_allow_html=True,
|
| 159 |
+
)
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def chart_card(title: str, fig):
|
| 163 |
+
st.markdown(f"<div class=\"card\"><div class=\"card-title\">{title}</div>", unsafe_allow_html=True)
|
| 164 |
+
st.plotly_chart(fig, use_container_width=True, theme=None)
|
| 165 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def inject_base_css():
|
| 169 |
+
with open(os.path.join("assets", "styles.css"), "r", encoding="utf-8") as f:
|
| 170 |
+
css = f.read()
|
| 171 |
+
st.markdown(f"<style>{css}</style>", unsafe_allow_html=True)
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
def set_theme_variables(mode: str):
|
| 175 |
+
# Adjust CSS variables for light/dark for cards and text; Plotly handled via template
|
| 176 |
+
palette = {
|
| 177 |
+
"light": {
|
| 178 |
+
"--bg": "#f7f9fc",
|
| 179 |
+
"--card": "#ffffff",
|
| 180 |
+
"--text": "#0f172a",
|
| 181 |
+
"--muted": "#64748b",
|
| 182 |
+
"--primary": "#0ea5e9",
|
| 183 |
+
"--accent": "#10b981",
|
| 184 |
+
"--border": "#e5e7eb",
|
| 185 |
+
},
|
| 186 |
+
"dark": {
|
| 187 |
+
"--bg": "#0b1220",
|
| 188 |
+
"--card": "#111827",
|
| 189 |
+
"--text": "#e5e7eb",
|
| 190 |
+
"--muted": "#94a3b8",
|
| 191 |
+
"--primary": "#38bdf8",
|
| 192 |
+
"--accent": "#34d399",
|
| 193 |
+
"--border": "#1f2937",
|
| 194 |
+
},
|
| 195 |
+
}
|
| 196 |
+
colors = palette.get(mode, palette["light"])
|
| 197 |
+
styles = ":root{" + ";".join([f"{k}:{v}" for k, v in colors.items()]) + "}"
|
| 198 |
+
st.markdown(f"<style>{styles}</style>", unsafe_allow_html=True)
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
def get_plotly_template(mode: str) -> str:
|
| 202 |
+
return "plotly_dark" if mode == "dark" else "plotly_white"
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
# -----------------------------
|
| 206 |
+
# Sidebar: Logo, Upload, Theme, Column mapping
|
| 207 |
+
# -----------------------------
|
| 208 |
+
def sidebar_controls() -> Tuple[Optional[pd.DataFrame], Dict[str, str], str, Dict[str, str], List[str]]:
|
| 209 |
+
st.sidebar.markdown("## ⚙️ Contrôles")
|
| 210 |
+
|
| 211 |
+
# Theme
|
| 212 |
+
mode = st.sidebar.radio("Thème", options=["clair", "sombre"], horizontal=True, index=0)
|
| 213 |
+
theme_mode = "dark" if mode == "sombre" else "light"
|
| 214 |
+
set_theme_variables(theme_mode)
|
| 215 |
+
|
| 216 |
+
# Logo (optional)
|
| 217 |
+
logo_path = os.path.join("assets", "logo.png")
|
| 218 |
+
if os.path.exists(logo_path):
|
| 219 |
+
st.sidebar.image(logo_path, use_column_width=True)
|
| 220 |
+
|
| 221 |
+
uploaded = st.sidebar.file_uploader("Importer un fichier Excel (.xlsx)", type=["xlsx"])
|
| 222 |
+
|
| 223 |
+
df: Optional[pd.DataFrame] = None
|
| 224 |
+
if uploaded is not None:
|
| 225 |
+
try:
|
| 226 |
+
# Read first sheet by default
|
| 227 |
+
df = pd.read_excel(uploaded, sheet_name=0)
|
| 228 |
+
# Strip column names
|
| 229 |
+
df.columns = [str(c).strip() for c in df.columns]
|
| 230 |
+
# Ensure unique column names
|
| 231 |
+
if pd.Index(df.columns).has_duplicates:
|
| 232 |
+
df.columns = make_unique_columns(list(df.columns))
|
| 233 |
+
except Exception as e:
|
| 234 |
+
st.sidebar.error(f"Erreur de lecture du fichier: {e}")
|
| 235 |
+
|
| 236 |
+
logical_types: Dict[str, str] = {}
|
| 237 |
+
coercions: Dict[str, str] = {}
|
| 238 |
+
unique_keys: List[str] = []
|
| 239 |
+
if df is not None and not df.empty:
|
| 240 |
+
st.sidebar.markdown("---")
|
| 241 |
+
st.sidebar.markdown("### 🧹 Nettoyage & types")
|
| 242 |
+
# Global cleaning options
|
| 243 |
+
trim_spaces = st.sidebar.checkbox("Supprimer les espaces autour du texte", value=True)
|
| 244 |
+
lower_case = st.sidebar.checkbox("Mettre le texte en minuscules", value=False)
|
| 245 |
+
drop_dupes = st.sidebar.checkbox("Supprimer les doublons", value=False)
|
| 246 |
+
dedup_subset_cols: List[str] = []
|
| 247 |
+
dedup_keep_choice = "first"
|
| 248 |
+
if drop_dupes:
|
| 249 |
+
dedup_subset_cols = st.sidebar.multiselect(
|
| 250 |
+
"Colonnes à considérer (vide = toutes)", options=list(df.columns), help="Sélectionnez les colonnes sur lesquelles détecter les doublons."
|
| 251 |
+
)
|
| 252 |
+
dedup_keep_choice = st.sidebar.selectbox(
|
| 253 |
+
"Conserver",
|
| 254 |
+
options=["first", "last", "none"],
|
| 255 |
+
index=0,
|
| 256 |
+
help="Quelle occurrence conserver pour chaque doublon détecté",
|
| 257 |
+
)
|
| 258 |
+
fillna_blank = st.sidebar.checkbox("Remplacer NaN texte par vide", value=True)
|
| 259 |
+
|
| 260 |
+
# Remove selected columns
|
| 261 |
+
drop_columns = st.sidebar.multiselect(
|
| 262 |
+
"Enlever des colonnes",
|
| 263 |
+
options=list(df.columns),
|
| 264 |
+
default=[],
|
| 265 |
+
help="Supprimer des champs du jeu de données avant l'analyse",
|
| 266 |
+
key="clean_drop_cols",
|
| 267 |
+
)
|
| 268 |
+
if drop_columns:
|
| 269 |
+
df.drop(columns=drop_columns, inplace=True, errors="ignore")
|
| 270 |
+
|
| 271 |
+
# Infer and allow override per column
|
| 272 |
+
inferred = infer_pandas_types(df)
|
| 273 |
+
for col in df.columns:
|
| 274 |
+
logical_types[col] = st.sidebar.selectbox(
|
| 275 |
+
f"Type pour {col}", options=["categorical", "numeric", "date", "text"], index=["categorical", "numeric", "date", "text"].index(inferred.get(col, "text"))
|
| 276 |
+
)
|
| 277 |
+
# Optional coercion
|
| 278 |
+
if logical_types[col] in ("numeric", "date"):
|
| 279 |
+
coercions[col] = logical_types[col]
|
| 280 |
+
|
| 281 |
+
# Apply cleaning
|
| 282 |
+
for col in df.columns:
|
| 283 |
+
if df[col].dtype == object:
|
| 284 |
+
if trim_spaces:
|
| 285 |
+
df[col] = df[col].astype(str).str.strip()
|
| 286 |
+
if lower_case:
|
| 287 |
+
df[col] = df[col].astype(str).str.lower()
|
| 288 |
+
if fillna_blank:
|
| 289 |
+
df[col] = df[col].replace({pd.NA: "", None: ""})
|
| 290 |
+
# Coerce types
|
| 291 |
+
if coercions.get(col) == "numeric":
|
| 292 |
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 293 |
+
elif coercions.get(col) == "date":
|
| 294 |
+
df[col] = try_parse_datetime(df[col])
|
| 295 |
+
|
| 296 |
+
if drop_dupes:
|
| 297 |
+
keep_arg = None if dedup_keep_choice == "none" else dedup_keep_choice
|
| 298 |
+
df.drop_duplicates(subset=(dedup_subset_cols if dedup_subset_cols else None), keep=keep_arg, inplace=True)
|
| 299 |
+
|
| 300 |
+
# Unique person keys
|
| 301 |
+
st.sidebar.markdown("---")
|
| 302 |
+
st.sidebar.markdown("### 👤 Personne unique")
|
| 303 |
+
# Heuristic suggestions
|
| 304 |
+
hints = ["email", "e-mail", "mail", "id", "identifiant", "cin", "passport", "matricule", "phone", "téléphone", "telephone", "tel"]
|
| 305 |
+
suggested = [c for c in df.columns if any(h in c.lower() for h in hints)]
|
| 306 |
+
unique_keys = st.sidebar.multiselect(
|
| 307 |
+
"Champs d'unicité (sélection multiple)", options=list(df.columns), default=suggested, help="Sélectionnez les champs qui identifient de façon unique une personne."
|
| 308 |
+
)
|
| 309 |
+
|
| 310 |
+
return df, logical_types, theme_mode, coercions, unique_keys
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
# -----------------------------
|
| 314 |
+
# Main App
|
| 315 |
+
# -----------------------------
|
| 316 |
+
def main():
|
| 317 |
+
inject_base_css()
|
| 318 |
+
|
| 319 |
+
# Header
|
| 320 |
+
col_logo, col_title, col_right = st.columns([1, 3, 1])
|
| 321 |
+
with col_logo:
|
| 322 |
+
logo_path = os.path.join("assets", "logo.png")
|
| 323 |
+
if os.path.exists(logo_path):
|
| 324 |
+
st.image(logo_path, width=72)
|
| 325 |
+
with col_title:
|
| 326 |
+
st.markdown("<h1 style='text-align:center; margin-top: 0;'>Tableau de bord des inscriptions</h1>", unsafe_allow_html=True)
|
| 327 |
+
with col_right:
|
| 328 |
+
st.write("")
|
| 329 |
+
|
| 330 |
+
df, type_map, theme_mode, _, unique_keys = sidebar_controls()
|
| 331 |
+
plotly_template = get_plotly_template(theme_mode)
|
| 332 |
+
|
| 333 |
+
if df is None or df.empty:
|
| 334 |
+
st.markdown(
|
| 335 |
+
"""
|
| 336 |
+
<div class="card">
|
| 337 |
+
<div class="card-title">Bienvenue 👋</div>
|
| 338 |
+
<p>Importez un fichier <b>.xlsx</b> contenant vos inscriptions pour commencer l'analyse.</p>
|
| 339 |
+
<ul>
|
| 340 |
+
<li>Assurez-vous que les colonnes principales (pays, formation, statut, date) sont présentes.</li>
|
| 341 |
+
<li>Vous pourrez mapper les colonnes dans la barre latérale.</li>
|
| 342 |
+
</ul>
|
| 343 |
+
</div>
|
| 344 |
+
""",
|
| 345 |
+
unsafe_allow_html=True,
|
| 346 |
+
)
|
| 347 |
+
return
|
| 348 |
+
|
| 349 |
+
# Filters (dynamic for all columns)
|
| 350 |
+
st.sidebar.markdown("---")
|
| 351 |
+
filtered_df = dynamic_filters(df, type_map)
|
| 352 |
+
|
| 353 |
+
# Optional unique-person filtering using selected keys
|
| 354 |
+
st.sidebar.markdown("### 👤 Filtrer par personne unique")
|
| 355 |
+
if 'unique_keys' not in locals():
|
| 356 |
+
unique_keys = []
|
| 357 |
+
if unique_keys:
|
| 358 |
+
person_filter = st.sidebar.checkbox("Activer le filtre d'unicité (drop_duplicates)", value=False, key="unique_filter_toggle")
|
| 359 |
+
keep_strategy = st.sidebar.selectbox("Conserver", options=["first", "last"], index=0, key="unique_filter_keep")
|
| 360 |
+
if person_filter:
|
| 361 |
+
try:
|
| 362 |
+
filtered_df = filtered_df.drop_duplicates(subset=unique_keys, keep=keep_strategy)
|
| 363 |
+
except Exception:
|
| 364 |
+
st.sidebar.warning("Impossible d'appliquer le filtre d'unicité. Vérifiez les champs choisis.")
|
| 365 |
+
|
| 366 |
+
# KPIs
|
| 367 |
+
total_count = len(filtered_df)
|
| 368 |
+
total_columns = filtered_df.shape[1]
|
| 369 |
+
total_missing = int(filtered_df.isna().sum().sum())
|
| 370 |
+
approx_dupes = int(filtered_df.duplicated().sum())
|
| 371 |
+
|
| 372 |
+
c1, c2, c3, c4 = st.columns(4)
|
| 373 |
+
with c1:
|
| 374 |
+
kpi_card("Lignes", f"{total_count:,}")
|
| 375 |
+
with c2:
|
| 376 |
+
kpi_card("Colonnes", f"{total_columns:,}")
|
| 377 |
+
with c3:
|
| 378 |
+
kpi_card("Valeurs manquantes", f"{total_missing:,}")
|
| 379 |
+
with c4:
|
| 380 |
+
kpi_card("Doublons (approx)", f"{approx_dupes:,}")
|
| 381 |
+
|
| 382 |
+
# Unique persons KPI (based on selected keys)
|
| 383 |
+
if unique_keys:
|
| 384 |
+
try:
|
| 385 |
+
uniq = (
|
| 386 |
+
filtered_df.dropna(subset=unique_keys)[unique_keys]
|
| 387 |
+
.astype(str)
|
| 388 |
+
.drop_duplicates()
|
| 389 |
+
.shape[0]
|
| 390 |
+
)
|
| 391 |
+
except Exception:
|
| 392 |
+
uniq = 0
|
| 393 |
+
c5, _ = st.columns([1, 3])
|
| 394 |
+
with c5:
|
| 395 |
+
kpi_card("Personnes uniques", f"{uniq:,}")
|
| 396 |
+
|
| 397 |
+
# Charts row 1: Program distribution, Country distribution
|
| 398 |
+
st.markdown("<div class=\"card\"><div class=\"card-title\">Répartitions clés</div>", unsafe_allow_html=True)
|
| 399 |
+
ctrl1, ctrl2, ctrl3 = st.columns([1,1,2])
|
| 400 |
+
with ctrl1:
|
| 401 |
+
topn = st.slider("Top N", min_value=3, max_value=50, value=10, step=1)
|
| 402 |
+
with ctrl2:
|
| 403 |
+
sort_dir = st.selectbox("Tri", options=["desc", "asc"], index=0)
|
| 404 |
+
with ctrl3:
|
| 405 |
+
st.caption("Appliqué aux graphiques de répartition ci-dessous")
|
| 406 |
+
charts_row_1 = st.columns(2)
|
| 407 |
+
# Choose any categorical column for distribution 1
|
| 408 |
+
cat_cols_all = [c for c in filtered_df.columns if type_map.get(c) in ("categorical", "text")]
|
| 409 |
+
if cat_cols_all and not filtered_df.empty:
|
| 410 |
+
dim1 = st.selectbox("Dimension 1 (répartition)", options=cat_cols_all, key="rep_dim1")
|
| 411 |
+
program_counts = (
|
| 412 |
+
filtered_df.groupby(dim1).size().reset_index(name="count").sort_values("count", ascending=(sort_dir=="asc"))
|
| 413 |
+
.head(topn)
|
| 414 |
+
)
|
| 415 |
+
fig_prog = px.bar(
|
| 416 |
+
program_counts,
|
| 417 |
+
x=dim1,
|
| 418 |
+
y="count",
|
| 419 |
+
template=plotly_template,
|
| 420 |
+
color_continuous_scale="Blues",
|
| 421 |
+
)
|
| 422 |
+
fig_prog.update_layout(margin=dict(l=10, r=10, t=10, b=10))
|
| 423 |
+
with charts_row_1[0]:
|
| 424 |
+
chart_card("Répartition (dimension 1)", fig_prog)
|
| 425 |
+
|
| 426 |
+
if cat_cols_all and not filtered_df.empty:
|
| 427 |
+
dim2 = st.selectbox("Dimension 2 (répartition)", options=[c for c in cat_cols_all], index=min(1, len(cat_cols_all)-1), key="rep_dim2")
|
| 428 |
+
country_counts = (
|
| 429 |
+
filtered_df.groupby(dim2).size().reset_index(name="count").sort_values("count", ascending=(sort_dir=="asc"))
|
| 430 |
+
.head(topn)
|
| 431 |
+
)
|
| 432 |
+
fig_country = px.pie(
|
| 433 |
+
country_counts,
|
| 434 |
+
names=dim2,
|
| 435 |
+
values="count",
|
| 436 |
+
template=plotly_template,
|
| 437 |
+
hole=0.35,
|
| 438 |
+
)
|
| 439 |
+
fig_country.update_layout(margin=dict(l=10, r=10, t=10, b=10))
|
| 440 |
+
with charts_row_1[1]:
|
| 441 |
+
chart_card("Répartition (dimension 2)", fig_country)
|
| 442 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
| 443 |
+
|
| 444 |
+
# Charts row 2: Status distribution, Time series
|
| 445 |
+
charts_row_2 = st.columns(2)
|
| 446 |
+
if cat_cols_all and not filtered_df.empty:
|
| 447 |
+
dim3 = st.selectbox("Dimension 3", options=cat_cols_all, key="rep_dim3")
|
| 448 |
+
status_counts = (
|
| 449 |
+
filtered_df.groupby(dim3).size().reset_index(name="count").sort_values("count", ascending=False)
|
| 450 |
+
)
|
| 451 |
+
fig_status = px.bar(
|
| 452 |
+
status_counts,
|
| 453 |
+
x=dim3,
|
| 454 |
+
y="count",
|
| 455 |
+
template=plotly_template,
|
| 456 |
+
color=dim3,
|
| 457 |
+
)
|
| 458 |
+
fig_status.update_layout(showlegend=False, margin=dict(l=10, r=10, t=10, b=10))
|
| 459 |
+
with charts_row_2[0]:
|
| 460 |
+
chart_card("Répartition (dimension 3)", fig_status)
|
| 461 |
+
|
| 462 |
+
# date_cols = [c for c in filtered_df.columns if type_map.get(c) == "date"]
|
| 463 |
+
|
| 464 |
+
|
| 465 |
+
# Charts row 3: Numeric histogram (user-selectable)
|
| 466 |
+
# numeric_cols = [c for c in filtered_df.columns if pd.api.types.is_numeric_dtype(filtered_df[c])]
|
| 467 |
+
|
| 468 |
+
|
| 469 |
+
# Ad-hoc analysis builder
|
| 470 |
+
st.markdown("<div class=\"card\"><div class=\"card-title\">Zone d’analyse</div>", unsafe_allow_html=True)
|
| 471 |
+
cat_cols = [c for c in filtered_df.columns if type_map.get(c) in ("categorical", "text")]
|
| 472 |
+
if cat_cols:
|
| 473 |
+
ac1, ac2, ac3 = st.columns([2,1,1])
|
| 474 |
+
with ac1:
|
| 475 |
+
dim_col = st.selectbox("Dimension", options=cat_cols)
|
| 476 |
+
with ac2:
|
| 477 |
+
chart_type = st.selectbox("Type de graphique", options=["Barres", "Camembert"], index=0)
|
| 478 |
+
with ac3:
|
| 479 |
+
topn_dim = st.slider("Top N (dimension)", 3, 50, 10)
|
| 480 |
+
|
| 481 |
+
agg = filtered_df.groupby(dim_col).size().reset_index(name="count").sort_values("count", ascending=False).head(topn_dim)
|
| 482 |
+
if chart_type == "Barres":
|
| 483 |
+
fig = px.bar(agg, x=dim_col, y="count", template=plotly_template)
|
| 484 |
+
else:
|
| 485 |
+
fig = px.pie(agg, names=dim_col, values="count", template=plotly_template, hole=0.35)
|
| 486 |
+
st.plotly_chart(fig, use_container_width=True, theme=None)
|
| 487 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
| 488 |
+
|
| 489 |
+
|
| 490 |
+
# Drilldown option (simple): filtrer sur une dimension/valeur
|
| 491 |
+
dd_cols = cat_cols
|
| 492 |
+
dd1, dd2 = st.columns([1,2])
|
| 493 |
+
with dd1:
|
| 494 |
+
dd_dim = st.selectbox("Drilldown - dimension", options=[None] + dd_cols)
|
| 495 |
+
if dd_dim:
|
| 496 |
+
values = [x for x in filtered_df[dd_dim].dropna().astype(str).unique()]
|
| 497 |
+
with dd2:
|
| 498 |
+
dd_val = st.selectbox("Valeur", options=[None] + values)
|
| 499 |
+
if dd_val:
|
| 500 |
+
filtered_df = filtered_df[filtered_df[dd_dim].astype(str) == dd_val]
|
| 501 |
+
|
| 502 |
+
search_query = st.text_input("Recherche globale")
|
| 503 |
+
df_searched = apply_search(filtered_df, search_query)
|
| 504 |
+
st.dataframe(df_searched, use_container_width=True, hide_index=True)
|
| 505 |
+
|
| 506 |
+
# Downloads
|
| 507 |
+
csv_bytes = df_searched.to_csv(index=False).encode("utf-8-sig")
|
| 508 |
+
xlsx_bytes = to_excel_bytes(df_searched)
|
| 509 |
+
dc1, dc2 = st.columns(2)
|
| 510 |
+
with dc1:
|
| 511 |
+
st.download_button(
|
| 512 |
+
"Télécharger CSV",
|
| 513 |
+
data=csv_bytes,
|
| 514 |
+
file_name="inscriptions_filtrees.csv",
|
| 515 |
+
mime="text/csv",
|
| 516 |
+
use_container_width=True,
|
| 517 |
+
)
|
| 518 |
+
with dc2:
|
| 519 |
+
st.download_button(
|
| 520 |
+
"Télécharger Excel",
|
| 521 |
+
data=xlsx_bytes,
|
| 522 |
+
file_name="inscriptions_filtrees.xlsx",
|
| 523 |
+
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
| 524 |
+
use_container_width=True,
|
| 525 |
+
)
|
| 526 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
| 527 |
+
|
| 528 |
+
# Universal Chart Builder
|
| 529 |
+
st.markdown("<div class=\"card\"><div class=\"card-title\">Constructeur de graphiques</div>", unsafe_allow_html=True)
|
| 530 |
+
chart_types = [
|
| 531 |
+
"Barres",
|
| 532 |
+
"Barres empilées",
|
| 533 |
+
"Lignes",
|
| 534 |
+
"Aires",
|
| 535 |
+
"Camembert",
|
| 536 |
+
"Histogramme",
|
| 537 |
+
"Nuage de points",
|
| 538 |
+
"Boîte (Box)",
|
| 539 |
+
"Violon",
|
| 540 |
+
]
|
| 541 |
+
cA, cB, cC = st.columns([1.2, 1, 1])
|
| 542 |
+
with cA:
|
| 543 |
+
chosen_chart = st.selectbox("Type de graphique", options=chart_types, key="ub_chart_type")
|
| 544 |
+
with cB:
|
| 545 |
+
agg_choice = st.selectbox("Agrégat", options=["count", "sum", "mean", "median", "min", "max"], index=0, key="ub_agg")
|
| 546 |
+
with cC:
|
| 547 |
+
topn_builder = st.number_input("Top N (optionnel)", min_value=0, value=0, step=1, help="0 pour désactiver")
|
| 548 |
+
|
| 549 |
+
all_cols = list(filtered_df.columns)
|
| 550 |
+
num_cols = [c for c in all_cols if pd.api.types.is_numeric_dtype(filtered_df[c])]
|
| 551 |
+
date_cols_any = [c for c in all_cols if pd.api.types.is_datetime64_any_dtype(try_parse_datetime(filtered_df[c]))]
|
| 552 |
+
cat_cols_any = [c for c in all_cols if c not in num_cols]
|
| 553 |
+
|
| 554 |
+
def aggregate_df(df_src: pd.DataFrame, x_col: Optional[str], y_col: Optional[str], color_col: Optional[str]) -> pd.DataFrame:
|
| 555 |
+
if agg_choice == "count":
|
| 556 |
+
if x_col is not None and y_col is None:
|
| 557 |
+
return df_src.groupby([x_col, color_col] if color_col else [x_col]).size().reset_index(name="value")
|
| 558 |
+
elif x_col is None and y_col is not None:
|
| 559 |
+
return df_src.groupby([y_col, color_col] if color_col else [y_col]).size().reset_index(name="value")
|
| 560 |
+
elif x_col is not None and y_col is not None:
|
| 561 |
+
return df_src.groupby([x_col, y_col]).size().reset_index(name="value")
|
| 562 |
+
else:
|
| 563 |
+
return pd.DataFrame({"value": [len(df_src)]})
|
| 564 |
+
else:
|
| 565 |
+
agg_func = agg_choice
|
| 566 |
+
measure = y_col if (y_col in num_cols) else (x_col if (x_col in num_cols) else (num_cols[0] if num_cols else None))
|
| 567 |
+
if measure is None:
|
| 568 |
+
return df_src.groupby([x_col, color_col] if color_col else [x_col]).size().reset_index(name="value") if x_col else pd.DataFrame({"value": [len(df_src)]})
|
| 569 |
+
group_keys = [k for k in [x_col, color_col] if k]
|
| 570 |
+
out = df_src.groupby(group_keys, dropna=False)[measure].agg(agg_func).reset_index(name="value")
|
| 571 |
+
return out
|
| 572 |
+
|
| 573 |
+
if chosen_chart in ("Barres", "Barres empilées"):
|
| 574 |
+
x = st.selectbox("Axe X (cat/date)", options=cat_cols_any, key="ub_bar_x")
|
| 575 |
+
color = st.selectbox("Couleur (optionnel)", options=[None] + cat_cols_any, key="ub_bar_color")
|
| 576 |
+
measure = st.selectbox("Mesure (numérique ou count)", options=["(count)"] + num_cols, key="ub_bar_measure")
|
| 577 |
+
data = aggregate_df(filtered_df, x, None if measure == "(count)" else measure, color)
|
| 578 |
+
if topn_builder and topn_builder > 0 and x in data.columns:
|
| 579 |
+
data = data.sort_values("value", ascending=False).groupby(x).head(1).head(int(topn_builder))
|
| 580 |
+
if chosen_chart == "Barres":
|
| 581 |
+
fig = px.bar(data, x=x, y="value", color=color, template=plotly_template, barmode="group")
|
| 582 |
+
else:
|
| 583 |
+
fig = px.bar(data, x=x, y="value", color=color, template=plotly_template, barmode="relative")
|
| 584 |
+
st.plotly_chart(fig, use_container_width=True, theme=None)
|
| 585 |
+
elif chosen_chart in ("Lignes", "Aires"):
|
| 586 |
+
x = st.selectbox("Axe X (date recommandé)", options=date_cols_any or cat_cols_any, key="ub_line_x")
|
| 587 |
+
color = st.selectbox("Couleur (optionnel)", options=[None] + cat_cols_any, key="ub_line_color")
|
| 588 |
+
measure = st.selectbox("Mesure (numérique ou count)", options=["(count)"] + num_cols, key="ub_line_measure")
|
| 589 |
+
data = aggregate_df(filtered_df, x, None if measure == "(count)" else measure, color)
|
| 590 |
+
if chosen_chart == "Lignes":
|
| 591 |
+
fig = px.line(data, x=x, y="value", color=color, template=plotly_template)
|
| 592 |
+
else:
|
| 593 |
+
fig = px.area(data, x=x, y="value", color=color, template=plotly_template)
|
| 594 |
+
st.plotly_chart(fig, use_container_width=True, theme=None)
|
| 595 |
+
elif chosen_chart == "Camembert":
|
| 596 |
+
names = st.selectbox("Noms (catégorie)", options=cat_cols_any, key="ub_pie_names")
|
| 597 |
+
measure = st.selectbox("Mesure (numérique ou count)", options=["(count)"] + num_cols, key="ub_pie_measure")
|
| 598 |
+
if measure == "(count)":
|
| 599 |
+
data = filtered_df.groupby(names).size().reset_index(name="value")
|
| 600 |
+
else:
|
| 601 |
+
data = filtered_df.groupby(names)[measure].sum().reset_index(name="value")
|
| 602 |
+
fig = px.pie(data, names=names, values="value", template=plotly_template, hole=0.35)
|
| 603 |
+
st.plotly_chart(fig, use_container_width=True, theme=None)
|
| 604 |
+
elif chosen_chart == "Histogramme":
|
| 605 |
+
x = st.selectbox("Colonne numérique", options=num_cols, key="ub_hist_x")
|
| 606 |
+
bins = st.slider("Nb de bacs (bins)", 5, 100, 30)
|
| 607 |
+
fig = px.histogram(filtered_df, x=x, nbins=bins, template=plotly_template)
|
| 608 |
+
st.plotly_chart(fig, use_container_width=True, theme=None)
|
| 609 |
+
elif chosen_chart == "Nuage de points":
|
| 610 |
+
x = st.selectbox("X (numérique)", options=num_cols, key="ub_scatter_x")
|
| 611 |
+
y = st.selectbox("Y (numérique)", options=[c for c in num_cols if c != x], key="ub_scatter_y")
|
| 612 |
+
color = st.selectbox("Couleur (optionnel)", options=[None] + cat_cols_any, key="ub_scatter_color")
|
| 613 |
+
fig = px.scatter(filtered_df, x=x, y=y, color=color, template=plotly_template)
|
| 614 |
+
st.plotly_chart(fig, use_container_width=True, theme=None)
|
| 615 |
+
elif chosen_chart == "Boîte (Box)":
|
| 616 |
+
y = st.selectbox("Y (numérique)", options=num_cols, key="ub_box_y")
|
| 617 |
+
x = st.selectbox("X (catégorie optionnel)", options=[None] + cat_cols_any, key="ub_box_x")
|
| 618 |
+
fig = px.box(filtered_df, x=x, y=y, template=plotly_template)
|
| 619 |
+
st.plotly_chart(fig, use_container_width=True, theme=None)
|
| 620 |
+
elif chosen_chart == "Violon":
|
| 621 |
+
y = st.selectbox("Y (numérique)", options=num_cols, key="ub_violin_y")
|
| 622 |
+
x = st.selectbox("X (catégorie optionnel)", options=[None] + cat_cols_any, key="ub_violin_x")
|
| 623 |
+
fig = px.violin(filtered_df, x=x, y=y, template=plotly_template, box=True, points="outliers")
|
| 624 |
+
st.plotly_chart(fig, use_container_width=True, theme=None)
|
| 625 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
| 626 |
+
|
| 627 |
+
# Decision Maker View (field-aware, optional)
|
| 628 |
+
st.markdown("<div class=\"card\"><div class=\"card-title\">Vue Décideur (si champs disponibles)</div>", unsafe_allow_html=True)
|
| 629 |
+
# Candidate fields based on provided list
|
| 630 |
+
col_email = find_column(filtered_df, ["Email"]) or find_column(filtered_df, ["E-mail"])
|
| 631 |
+
col_gender = find_column(filtered_df, ["Genre", "Autre genre (Veuillez préciser) : "])
|
| 632 |
+
col_nat = find_column(filtered_df, ["Nationalité"])
|
| 633 |
+
col_country = find_column(filtered_df, ["Pays de résidence"]) or find_column(filtered_df, ["D’où préférez-vous participer à l'événement ?"])
|
| 634 |
+
col_role = find_column(filtered_df, ["Votre profession / statut", "Autre profession (veuillez préciser)"])
|
| 635 |
+
col_aff = find_column(filtered_df, ["Affiliation", "Autre affiliation (Veuillez préciser) : "])
|
| 636 |
+
col_particip = find_column(filtered_df, ["Avez-vous déjà participé à un événement Indaba X Togo ?"])
|
| 637 |
+
col_mode_formation = find_column(filtered_df, ["Comment voulez-vous participer aux formations ?"])
|
| 638 |
+
col_what_do = find_column(filtered_df, ["Que voulez-vous faire ?"])
|
| 639 |
+
col_skills = {
|
| 640 |
+
"Python": find_column(filtered_df, ["Quel est votre niveau en [Python]", "Quel est votre niveau en [Python]"]),
|
| 641 |
+
"Numpy": find_column(filtered_df, ["Quel est votre niveau en [Numpy]", "Quel est votre niveau en [Numpy]"]),
|
| 642 |
+
"Pandas": find_column(filtered_df, ["Quel est votre niveau en [Pandas]", "Quel est votre niveau en [Pandas]"]),
|
| 643 |
+
"Scikit Learn": find_column(filtered_df, ["Quel est votre niveau en [Scikit Learn]", "Quel est votre niveau en [Scikit Learn]"]),
|
| 644 |
+
"Pytorch": find_column(filtered_df, ["Quel est votre niveau en [Pytorch]", "Quel est votre niveau en [Pytorch]"]),
|
| 645 |
+
"Deep Learning": find_column(filtered_df, ["Quel est votre niveau en [Deep Learning]", "Quel est votre niveau en [Deep Learning]"]),
|
| 646 |
+
}
|
| 647 |
+
|
| 648 |
+
# KPIs for decision maker
|
| 649 |
+
kcols = st.columns(4)
|
| 650 |
+
with kcols[0]:
|
| 651 |
+
kpi_card("Inscriptions", f"{len(filtered_df):,}")
|
| 652 |
+
with kcols[1]:
|
| 653 |
+
if col_email:
|
| 654 |
+
uniq_people = filtered_df[col_email].astype(str).str.strip().str.lower().dropna().nunique()
|
| 655 |
+
kpi_card("Personnes uniques (email)", f"{uniq_people:,}")
|
| 656 |
+
else:
|
| 657 |
+
kpi_card("Personnes uniques", "-")
|
| 658 |
+
with kcols[2]:
|
| 659 |
+
if col_country and col_country in filtered_df.columns:
|
| 660 |
+
kpi_card("Pays (distincts)", f"{filtered_df[col_country].astype(str).nunique():,}")
|
| 661 |
+
else:
|
| 662 |
+
kpi_card("Pays (distincts)", "-")
|
| 663 |
+
with kcols[3]:
|
| 664 |
+
if col_role and col_role in filtered_df.columns:
|
| 665 |
+
kpi_card("Profils (distincts)", f"{filtered_df[col_role].astype(str).nunique():,}")
|
| 666 |
+
else:
|
| 667 |
+
kpi_card("Profils (distincts)", "-")
|
| 668 |
+
|
| 669 |
+
# Row 1 charts: Gender, Country
|
| 670 |
+
dm1 = st.columns(2)
|
| 671 |
+
if col_gender and col_gender in filtered_df.columns and not filtered_df.empty:
|
| 672 |
+
gcounts = filtered_df.groupby(col_gender).size().reset_index(name="count").sort_values("count", ascending=False)
|
| 673 |
+
fig_g = px.pie(gcounts, names=col_gender, values="count", template=get_plotly_template(theme_mode), hole=0.35)
|
| 674 |
+
with dm1[0]:
|
| 675 |
+
chart_card("Répartition par genre", fig_g)
|
| 676 |
+
if col_country and col_country in filtered_df.columns and not filtered_df.empty:
|
| 677 |
+
ccounts = filtered_df.groupby(col_country).size().reset_index(name="count").sort_values("count", ascending=False).head(15)
|
| 678 |
+
fig_c = px.bar(ccounts, x=col_country, y="count", template=get_plotly_template(theme_mode))
|
| 679 |
+
with dm1[1]:
|
| 680 |
+
chart_card("Top 15 pays de résidence", fig_c)
|
| 681 |
+
|
| 682 |
+
# Row 2: Participation history and roles
|
| 683 |
+
dm2 = st.columns(2)
|
| 684 |
+
if col_particip and col_particip in filtered_df.columns and not filtered_df.empty:
|
| 685 |
+
pcounts = filtered_df.groupby(col_particip).size().reset_index(name="count")
|
| 686 |
+
fig_p = px.bar(pcounts, x=col_particip, y="count", template=get_plotly_template(theme_mode))
|
| 687 |
+
with dm2[0]:
|
| 688 |
+
chart_card("A déjà participé ?", fig_p)
|
| 689 |
+
if col_role and col_role in filtered_df.columns and not filtered_df.empty:
|
| 690 |
+
rcounts = filtered_df.groupby(col_role).size().reset_index(name="count").sort_values("count", ascending=False).head(15)
|
| 691 |
+
fig_r = px.bar(rcounts, x=col_role, y="count", template=get_plotly_template(theme_mode))
|
| 692 |
+
with dm2[1]:
|
| 693 |
+
chart_card("Professions / Statuts (Top 15)", fig_r)
|
| 694 |
+
|
| 695 |
+
# Row 2b: Formations participation mode and intentions
|
| 696 |
+
dm2b = st.columns(2)
|
| 697 |
+
if col_mode_formation and col_mode_formation in filtered_df.columns and not filtered_df.empty:
|
| 698 |
+
mcounts = (
|
| 699 |
+
filtered_df.groupby(col_mode_formation).size().reset_index(name="count").sort_values("count", ascending=False)
|
| 700 |
+
)
|
| 701 |
+
fig_m = px.bar(mcounts, x=col_mode_formation, y="count", template=get_plotly_template(theme_mode))
|
| 702 |
+
with dm2b[0]:
|
| 703 |
+
chart_card("Mode de participation aux formations", fig_m)
|
| 704 |
+
if col_what_do and col_what_do in filtered_df.columns and not filtered_df.empty:
|
| 705 |
+
wcounts = (
|
| 706 |
+
filtered_df.groupby(col_what_do).size().reset_index(name="count").sort_values("count", ascending=False).head(15)
|
| 707 |
+
)
|
| 708 |
+
fig_w = px.bar(wcounts, x=col_what_do, y="count", template=get_plotly_template(theme_mode))
|
| 709 |
+
with dm2b[1]:
|
| 710 |
+
chart_card("Intentions: Que voulez-vous faire ? (Top 15)", fig_w)
|
| 711 |
+
|
| 712 |
+
# Row 3: Skills radar-like bars
|
| 713 |
+
skill_pairs = [(name, col) for name, col in col_skills.items() if col]
|
| 714 |
+
if skill_pairs:
|
| 715 |
+
sm = []
|
| 716 |
+
for name, col in skill_pairs:
|
| 717 |
+
# Map text levels to ordered scale if needed
|
| 718 |
+
s = filtered_df[col].astype(str).str.strip().str.lower()
|
| 719 |
+
order = ["débutant", "intermédiaire", "avancé", "expert"]
|
| 720 |
+
s = s.where(s.isin(order), s)
|
| 721 |
+
d = s.value_counts().reindex(order).fillna(0).rename_axis("niveau").reset_index(name="count")
|
| 722 |
+
d["skill"] = name
|
| 723 |
+
sm.append(d)
|
| 724 |
+
if sm:
|
| 725 |
+
skill_df = pd.concat(sm, ignore_index=True)
|
| 726 |
+
fig_skill = px.bar(skill_df, x="skill", y="count", color="niveau", barmode="group", template=get_plotly_template(theme_mode))
|
| 727 |
+
chart_card("Niveaux par compétence", fig_skill)
|
| 728 |
+
|
| 729 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
| 730 |
+
|
| 731 |
+
|
| 732 |
+
if __name__ == "__main__":
|
| 733 |
+
main()
|
| 734 |
+
|
| 735 |
+
|