analist commited on
Commit
8d624f4
·
verified ·
1 Parent(s): b0d7032

Upload 2 files

Browse files
Files changed (2) hide show
  1. Dockerfile +27 -0
  2. streamlit_app.py +735 -0
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ ENV PYTHONDONTWRITEBYTECODE=1 \
4
+ PYTHONUNBUFFERED=1 \
5
+ PIP_NO_CACHE_DIR=1 \
6
+ PORT=8501
7
+
8
+ WORKDIR /app
9
+
10
+ # System deps
11
+ RUN apt-get update -y && apt-get install -y --no-install-recommends \
12
+ build-essential \
13
+ && rm -rf /var/lib/apt/lists/*
14
+
15
+ # Copy requirements first for caching
16
+ COPY requirements.txt /app/requirements.txt
17
+ RUN python -m pip install --upgrade pip && \
18
+ pip install -r requirements.txt
19
+
20
+ # Copy application
21
+ COPY . /app
22
+
23
+ EXPOSE 8501
24
+
25
+ CMD ["streamlit", "run", "streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0", "--browser.gatherUsageStats=false"]
26
+
27
+
streamlit_app.py ADDED
@@ -0,0 +1,735 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import os
3
+ from datetime import datetime, date
4
+ from typing import Dict, List, Optional, Tuple
5
+
6
+ import pandas as pd
7
+ import plotly.express as px
8
+ import streamlit as st
9
+
10
+
11
+ # -----------------------------
12
+ # App Configuration
13
+ # -----------------------------
14
+ st.set_page_config(
15
+ page_title="Tableau de bord des inscriptions",
16
+ page_icon="🧭",
17
+ layout="wide",
18
+ initial_sidebar_state="expanded",
19
+ )
20
+
21
+
22
+ # -----------------------------
23
+ # Utilities
24
+ # -----------------------------
25
+ def try_parse_datetime(series: pd.Series) -> pd.Series:
26
+ """Attempt to parse a pandas Series as datetimes, returning original on failure."""
27
+ if pd.api.types.is_datetime64_any_dtype(series):
28
+ return series
29
+ try:
30
+ parsed = pd.to_datetime(series, errors="coerce")
31
+ if parsed.notna().sum() >= max(3, int(0.2 * len(parsed))):
32
+ return parsed
33
+ except Exception:
34
+ pass
35
+ return series
36
+
37
+
38
+ def make_unique_columns(columns: List[str]) -> List[str]:
39
+ """Ensure column names are unique by appending suffixes (2), (3), ..."""
40
+ seen: Dict[str, int] = {}
41
+ unique_cols: List[str] = []
42
+ for name in columns:
43
+ base = str(name)
44
+ if base not in seen:
45
+ seen[base] = 1
46
+ unique_cols.append(base)
47
+ else:
48
+ seen[base] += 1
49
+ unique_cols.append(f"{base} ({seen[base]})")
50
+ return unique_cols
51
+
52
+
53
+ def normalize_label(text: str) -> str:
54
+ t = str(text).lower().strip()
55
+ t = t.replace("\u00a0", " ").replace(" ", " ")
56
+ t = " ".join(t.split())
57
+ return t
58
+
59
+
60
+ def find_column(df: pd.DataFrame, candidates: List[str]) -> Optional[str]:
61
+ """Return the first matching column by normalized name from candidates."""
62
+ norm_to_col = {normalize_label(c): c for c in df.columns}
63
+ for cand in candidates:
64
+ n = normalize_label(cand)
65
+ if n in norm_to_col:
66
+ return norm_to_col[n]
67
+ return None
68
+
69
+ def infer_pandas_types(df: pd.DataFrame) -> Dict[str, str]:
70
+ """Return a mapping of column -> inferred logical type: 'categorical' | 'numeric' | 'date' | 'text'."""
71
+ type_map: Dict[str, str] = {}
72
+ for col in df.columns:
73
+ s = df[col]
74
+ if pd.api.types.is_datetime64_any_dtype(s):
75
+ type_map[col] = "date"
76
+ elif pd.api.types.is_bool_dtype(s):
77
+ type_map[col] = "categorical"
78
+ elif pd.api.types.is_numeric_dtype(s):
79
+ type_map[col] = "numeric"
80
+ else:
81
+ # try parse datetime heuristic
82
+ parsed = try_parse_datetime(s)
83
+ if pd.api.types.is_datetime64_any_dtype(parsed):
84
+ type_map[col] = "date"
85
+ else:
86
+ # if low cardinality, treat as categorical
87
+ nunique = s.astype(str).nunique(dropna=True)
88
+ type_map[col] = "categorical" if nunique <= max(50, len(s) * 0.05) else "text"
89
+ return type_map
90
+
91
+
92
+ def dynamic_filters(df: pd.DataFrame, type_map: Dict[str, str]) -> pd.DataFrame:
93
+ """Render dynamic filters for all columns and return the filtered DataFrame."""
94
+ filtered = df.copy()
95
+ st.sidebar.markdown("### 🔎 Filtres dynamiques")
96
+ for col in filtered.columns:
97
+ logical = type_map.get(col, "text")
98
+ if logical == "numeric" and pd.api.types.is_numeric_dtype(filtered[col]):
99
+ series_num = pd.to_numeric(filtered[col], errors="coerce")
100
+ valid = series_num.dropna()
101
+ if valid.empty:
102
+ st.sidebar.caption(f"{col}: aucune valeur numérique exploitable")
103
+ continue
104
+ min_v = float(valid.min())
105
+ max_v = float(valid.max())
106
+ if min_v == max_v:
107
+ st.sidebar.caption(f"{col}: valeur unique {min_v}")
108
+ # Filtrage inutile car une seule valeur
109
+ continue
110
+ vmin, vmax = st.sidebar.slider(f"{col} (min-max)", min_value=min_v, max_value=max_v, value=(min_v, max_v))
111
+ filtered = filtered[(series_num >= vmin) & (series_num <= vmax)]
112
+ elif logical == "date":
113
+ parsed = try_parse_datetime(filtered[col])
114
+ if pd.api.types.is_datetime64_any_dtype(parsed):
115
+ dmin = parsed.min()
116
+ dmax = parsed.max()
117
+ start_end = st.sidebar.date_input(f"{col} (période)", value=(dmin.date() if pd.notna(dmin) else date.today(), dmax.date() if pd.notna(dmax) else date.today()))
118
+ if isinstance(start_end, tuple) and len(start_end) == 2:
119
+ start, end = start_end
120
+ mask = (parsed.dt.date >= start) & (parsed.dt.date <= end)
121
+ filtered = filtered[mask]
122
+ else:
123
+ # categorical or text -> multiselect of unique values (with limit)
124
+ uniques = filtered[col].dropna().astype(str).unique().tolist()
125
+ uniques = sorted(uniques)[:200]
126
+ selected = st.sidebar.multiselect(f"{col}", options=uniques, default=[])
127
+ if selected:
128
+ filtered = filtered[filtered[col].astype(str).isin(selected)]
129
+ return filtered
130
+
131
+
132
+ def apply_search(df: pd.DataFrame, query: str) -> pd.DataFrame:
133
+ if not query:
134
+ return df
135
+ q = query.strip().lower()
136
+ mask = pd.Series(False, index=df.index)
137
+ for col in df.columns:
138
+ col_values = df[col].astype(str).str.lower()
139
+ mask = mask | col_values.str.contains(q, na=False)
140
+ return df[mask]
141
+
142
+
143
+ def to_excel_bytes(df: pd.DataFrame) -> bytes:
144
+ buffer = io.BytesIO()
145
+ with pd.ExcelWriter(buffer, engine="xlsxwriter") as writer:
146
+ df.to_excel(writer, index=False, sheet_name="inscriptions")
147
+ return buffer.getvalue()
148
+
149
+
150
+ def kpi_card(label: str, value: str):
151
+ st.markdown(
152
+ f"""
153
+ <div class="card kpi">
154
+ <div class="card-label">{label}</div>
155
+ <div class="card-value">{value}</div>
156
+ </div>
157
+ """,
158
+ unsafe_allow_html=True,
159
+ )
160
+
161
+
162
+ def chart_card(title: str, fig):
163
+ st.markdown(f"<div class=\"card\"><div class=\"card-title\">{title}</div>", unsafe_allow_html=True)
164
+ st.plotly_chart(fig, use_container_width=True, theme=None)
165
+ st.markdown("</div>", unsafe_allow_html=True)
166
+
167
+
168
+ def inject_base_css():
169
+ with open(os.path.join("assets", "styles.css"), "r", encoding="utf-8") as f:
170
+ css = f.read()
171
+ st.markdown(f"<style>{css}</style>", unsafe_allow_html=True)
172
+
173
+
174
+ def set_theme_variables(mode: str):
175
+ # Adjust CSS variables for light/dark for cards and text; Plotly handled via template
176
+ palette = {
177
+ "light": {
178
+ "--bg": "#f7f9fc",
179
+ "--card": "#ffffff",
180
+ "--text": "#0f172a",
181
+ "--muted": "#64748b",
182
+ "--primary": "#0ea5e9",
183
+ "--accent": "#10b981",
184
+ "--border": "#e5e7eb",
185
+ },
186
+ "dark": {
187
+ "--bg": "#0b1220",
188
+ "--card": "#111827",
189
+ "--text": "#e5e7eb",
190
+ "--muted": "#94a3b8",
191
+ "--primary": "#38bdf8",
192
+ "--accent": "#34d399",
193
+ "--border": "#1f2937",
194
+ },
195
+ }
196
+ colors = palette.get(mode, palette["light"])
197
+ styles = ":root{" + ";".join([f"{k}:{v}" for k, v in colors.items()]) + "}"
198
+ st.markdown(f"<style>{styles}</style>", unsafe_allow_html=True)
199
+
200
+
201
+ def get_plotly_template(mode: str) -> str:
202
+ return "plotly_dark" if mode == "dark" else "plotly_white"
203
+
204
+
205
+ # -----------------------------
206
+ # Sidebar: Logo, Upload, Theme, Column mapping
207
+ # -----------------------------
208
+ def sidebar_controls() -> Tuple[Optional[pd.DataFrame], Dict[str, str], str, Dict[str, str], List[str]]:
209
+ st.sidebar.markdown("## ⚙️ Contrôles")
210
+
211
+ # Theme
212
+ mode = st.sidebar.radio("Thème", options=["clair", "sombre"], horizontal=True, index=0)
213
+ theme_mode = "dark" if mode == "sombre" else "light"
214
+ set_theme_variables(theme_mode)
215
+
216
+ # Logo (optional)
217
+ logo_path = os.path.join("assets", "logo.png")
218
+ if os.path.exists(logo_path):
219
+ st.sidebar.image(logo_path, use_column_width=True)
220
+
221
+ uploaded = st.sidebar.file_uploader("Importer un fichier Excel (.xlsx)", type=["xlsx"])
222
+
223
+ df: Optional[pd.DataFrame] = None
224
+ if uploaded is not None:
225
+ try:
226
+ # Read first sheet by default
227
+ df = pd.read_excel(uploaded, sheet_name=0)
228
+ # Strip column names
229
+ df.columns = [str(c).strip() for c in df.columns]
230
+ # Ensure unique column names
231
+ if pd.Index(df.columns).has_duplicates:
232
+ df.columns = make_unique_columns(list(df.columns))
233
+ except Exception as e:
234
+ st.sidebar.error(f"Erreur de lecture du fichier: {e}")
235
+
236
+ logical_types: Dict[str, str] = {}
237
+ coercions: Dict[str, str] = {}
238
+ unique_keys: List[str] = []
239
+ if df is not None and not df.empty:
240
+ st.sidebar.markdown("---")
241
+ st.sidebar.markdown("### 🧹 Nettoyage & types")
242
+ # Global cleaning options
243
+ trim_spaces = st.sidebar.checkbox("Supprimer les espaces autour du texte", value=True)
244
+ lower_case = st.sidebar.checkbox("Mettre le texte en minuscules", value=False)
245
+ drop_dupes = st.sidebar.checkbox("Supprimer les doublons", value=False)
246
+ dedup_subset_cols: List[str] = []
247
+ dedup_keep_choice = "first"
248
+ if drop_dupes:
249
+ dedup_subset_cols = st.sidebar.multiselect(
250
+ "Colonnes à considérer (vide = toutes)", options=list(df.columns), help="Sélectionnez les colonnes sur lesquelles détecter les doublons."
251
+ )
252
+ dedup_keep_choice = st.sidebar.selectbox(
253
+ "Conserver",
254
+ options=["first", "last", "none"],
255
+ index=0,
256
+ help="Quelle occurrence conserver pour chaque doublon détecté",
257
+ )
258
+ fillna_blank = st.sidebar.checkbox("Remplacer NaN texte par vide", value=True)
259
+
260
+ # Remove selected columns
261
+ drop_columns = st.sidebar.multiselect(
262
+ "Enlever des colonnes",
263
+ options=list(df.columns),
264
+ default=[],
265
+ help="Supprimer des champs du jeu de données avant l'analyse",
266
+ key="clean_drop_cols",
267
+ )
268
+ if drop_columns:
269
+ df.drop(columns=drop_columns, inplace=True, errors="ignore")
270
+
271
+ # Infer and allow override per column
272
+ inferred = infer_pandas_types(df)
273
+ for col in df.columns:
274
+ logical_types[col] = st.sidebar.selectbox(
275
+ f"Type pour {col}", options=["categorical", "numeric", "date", "text"], index=["categorical", "numeric", "date", "text"].index(inferred.get(col, "text"))
276
+ )
277
+ # Optional coercion
278
+ if logical_types[col] in ("numeric", "date"):
279
+ coercions[col] = logical_types[col]
280
+
281
+ # Apply cleaning
282
+ for col in df.columns:
283
+ if df[col].dtype == object:
284
+ if trim_spaces:
285
+ df[col] = df[col].astype(str).str.strip()
286
+ if lower_case:
287
+ df[col] = df[col].astype(str).str.lower()
288
+ if fillna_blank:
289
+ df[col] = df[col].replace({pd.NA: "", None: ""})
290
+ # Coerce types
291
+ if coercions.get(col) == "numeric":
292
+ df[col] = pd.to_numeric(df[col], errors="coerce")
293
+ elif coercions.get(col) == "date":
294
+ df[col] = try_parse_datetime(df[col])
295
+
296
+ if drop_dupes:
297
+ keep_arg = None if dedup_keep_choice == "none" else dedup_keep_choice
298
+ df.drop_duplicates(subset=(dedup_subset_cols if dedup_subset_cols else None), keep=keep_arg, inplace=True)
299
+
300
+ # Unique person keys
301
+ st.sidebar.markdown("---")
302
+ st.sidebar.markdown("### 👤 Personne unique")
303
+ # Heuristic suggestions
304
+ hints = ["email", "e-mail", "mail", "id", "identifiant", "cin", "passport", "matricule", "phone", "téléphone", "telephone", "tel"]
305
+ suggested = [c for c in df.columns if any(h in c.lower() for h in hints)]
306
+ unique_keys = st.sidebar.multiselect(
307
+ "Champs d'unicité (sélection multiple)", options=list(df.columns), default=suggested, help="Sélectionnez les champs qui identifient de façon unique une personne."
308
+ )
309
+
310
+ return df, logical_types, theme_mode, coercions, unique_keys
311
+
312
+
313
+ # -----------------------------
314
+ # Main App
315
+ # -----------------------------
316
+ def main():
317
+ inject_base_css()
318
+
319
+ # Header
320
+ col_logo, col_title, col_right = st.columns([1, 3, 1])
321
+ with col_logo:
322
+ logo_path = os.path.join("assets", "logo.png")
323
+ if os.path.exists(logo_path):
324
+ st.image(logo_path, width=72)
325
+ with col_title:
326
+ st.markdown("<h1 style='text-align:center; margin-top: 0;'>Tableau de bord des inscriptions</h1>", unsafe_allow_html=True)
327
+ with col_right:
328
+ st.write("")
329
+
330
+ df, type_map, theme_mode, _, unique_keys = sidebar_controls()
331
+ plotly_template = get_plotly_template(theme_mode)
332
+
333
+ if df is None or df.empty:
334
+ st.markdown(
335
+ """
336
+ <div class="card">
337
+ <div class="card-title">Bienvenue 👋</div>
338
+ <p>Importez un fichier <b>.xlsx</b> contenant vos inscriptions pour commencer l'analyse.</p>
339
+ <ul>
340
+ <li>Assurez-vous que les colonnes principales (pays, formation, statut, date) sont présentes.</li>
341
+ <li>Vous pourrez mapper les colonnes dans la barre latérale.</li>
342
+ </ul>
343
+ </div>
344
+ """,
345
+ unsafe_allow_html=True,
346
+ )
347
+ return
348
+
349
+ # Filters (dynamic for all columns)
350
+ st.sidebar.markdown("---")
351
+ filtered_df = dynamic_filters(df, type_map)
352
+
353
+ # Optional unique-person filtering using selected keys
354
+ st.sidebar.markdown("### 👤 Filtrer par personne unique")
355
+ if 'unique_keys' not in locals():
356
+ unique_keys = []
357
+ if unique_keys:
358
+ person_filter = st.sidebar.checkbox("Activer le filtre d'unicité (drop_duplicates)", value=False, key="unique_filter_toggle")
359
+ keep_strategy = st.sidebar.selectbox("Conserver", options=["first", "last"], index=0, key="unique_filter_keep")
360
+ if person_filter:
361
+ try:
362
+ filtered_df = filtered_df.drop_duplicates(subset=unique_keys, keep=keep_strategy)
363
+ except Exception:
364
+ st.sidebar.warning("Impossible d'appliquer le filtre d'unicité. Vérifiez les champs choisis.")
365
+
366
+ # KPIs
367
+ total_count = len(filtered_df)
368
+ total_columns = filtered_df.shape[1]
369
+ total_missing = int(filtered_df.isna().sum().sum())
370
+ approx_dupes = int(filtered_df.duplicated().sum())
371
+
372
+ c1, c2, c3, c4 = st.columns(4)
373
+ with c1:
374
+ kpi_card("Lignes", f"{total_count:,}")
375
+ with c2:
376
+ kpi_card("Colonnes", f"{total_columns:,}")
377
+ with c3:
378
+ kpi_card("Valeurs manquantes", f"{total_missing:,}")
379
+ with c4:
380
+ kpi_card("Doublons (approx)", f"{approx_dupes:,}")
381
+
382
+ # Unique persons KPI (based on selected keys)
383
+ if unique_keys:
384
+ try:
385
+ uniq = (
386
+ filtered_df.dropna(subset=unique_keys)[unique_keys]
387
+ .astype(str)
388
+ .drop_duplicates()
389
+ .shape[0]
390
+ )
391
+ except Exception:
392
+ uniq = 0
393
+ c5, _ = st.columns([1, 3])
394
+ with c5:
395
+ kpi_card("Personnes uniques", f"{uniq:,}")
396
+
397
+ # Charts row 1: Program distribution, Country distribution
398
+ st.markdown("<div class=\"card\"><div class=\"card-title\">Répartitions clés</div>", unsafe_allow_html=True)
399
+ ctrl1, ctrl2, ctrl3 = st.columns([1,1,2])
400
+ with ctrl1:
401
+ topn = st.slider("Top N", min_value=3, max_value=50, value=10, step=1)
402
+ with ctrl2:
403
+ sort_dir = st.selectbox("Tri", options=["desc", "asc"], index=0)
404
+ with ctrl3:
405
+ st.caption("Appliqué aux graphiques de répartition ci-dessous")
406
+ charts_row_1 = st.columns(2)
407
+ # Choose any categorical column for distribution 1
408
+ cat_cols_all = [c for c in filtered_df.columns if type_map.get(c) in ("categorical", "text")]
409
+ if cat_cols_all and not filtered_df.empty:
410
+ dim1 = st.selectbox("Dimension 1 (répartition)", options=cat_cols_all, key="rep_dim1")
411
+ program_counts = (
412
+ filtered_df.groupby(dim1).size().reset_index(name="count").sort_values("count", ascending=(sort_dir=="asc"))
413
+ .head(topn)
414
+ )
415
+ fig_prog = px.bar(
416
+ program_counts,
417
+ x=dim1,
418
+ y="count",
419
+ template=plotly_template,
420
+ color_continuous_scale="Blues",
421
+ )
422
+ fig_prog.update_layout(margin=dict(l=10, r=10, t=10, b=10))
423
+ with charts_row_1[0]:
424
+ chart_card("Répartition (dimension 1)", fig_prog)
425
+
426
+ if cat_cols_all and not filtered_df.empty:
427
+ dim2 = st.selectbox("Dimension 2 (répartition)", options=[c for c in cat_cols_all], index=min(1, len(cat_cols_all)-1), key="rep_dim2")
428
+ country_counts = (
429
+ filtered_df.groupby(dim2).size().reset_index(name="count").sort_values("count", ascending=(sort_dir=="asc"))
430
+ .head(topn)
431
+ )
432
+ fig_country = px.pie(
433
+ country_counts,
434
+ names=dim2,
435
+ values="count",
436
+ template=plotly_template,
437
+ hole=0.35,
438
+ )
439
+ fig_country.update_layout(margin=dict(l=10, r=10, t=10, b=10))
440
+ with charts_row_1[1]:
441
+ chart_card("Répartition (dimension 2)", fig_country)
442
+ st.markdown("</div>", unsafe_allow_html=True)
443
+
444
+ # Charts row 2: Status distribution, Time series
445
+ charts_row_2 = st.columns(2)
446
+ if cat_cols_all and not filtered_df.empty:
447
+ dim3 = st.selectbox("Dimension 3", options=cat_cols_all, key="rep_dim3")
448
+ status_counts = (
449
+ filtered_df.groupby(dim3).size().reset_index(name="count").sort_values("count", ascending=False)
450
+ )
451
+ fig_status = px.bar(
452
+ status_counts,
453
+ x=dim3,
454
+ y="count",
455
+ template=plotly_template,
456
+ color=dim3,
457
+ )
458
+ fig_status.update_layout(showlegend=False, margin=dict(l=10, r=10, t=10, b=10))
459
+ with charts_row_2[0]:
460
+ chart_card("Répartition (dimension 3)", fig_status)
461
+
462
+ # date_cols = [c for c in filtered_df.columns if type_map.get(c) == "date"]
463
+
464
+
465
+ # Charts row 3: Numeric histogram (user-selectable)
466
+ # numeric_cols = [c for c in filtered_df.columns if pd.api.types.is_numeric_dtype(filtered_df[c])]
467
+
468
+
469
+ # Ad-hoc analysis builder
470
+ st.markdown("<div class=\"card\"><div class=\"card-title\">Zone d’analyse</div>", unsafe_allow_html=True)
471
+ cat_cols = [c for c in filtered_df.columns if type_map.get(c) in ("categorical", "text")]
472
+ if cat_cols:
473
+ ac1, ac2, ac3 = st.columns([2,1,1])
474
+ with ac1:
475
+ dim_col = st.selectbox("Dimension", options=cat_cols)
476
+ with ac2:
477
+ chart_type = st.selectbox("Type de graphique", options=["Barres", "Camembert"], index=0)
478
+ with ac3:
479
+ topn_dim = st.slider("Top N (dimension)", 3, 50, 10)
480
+
481
+ agg = filtered_df.groupby(dim_col).size().reset_index(name="count").sort_values("count", ascending=False).head(topn_dim)
482
+ if chart_type == "Barres":
483
+ fig = px.bar(agg, x=dim_col, y="count", template=plotly_template)
484
+ else:
485
+ fig = px.pie(agg, names=dim_col, values="count", template=plotly_template, hole=0.35)
486
+ st.plotly_chart(fig, use_container_width=True, theme=None)
487
+ st.markdown("</div>", unsafe_allow_html=True)
488
+
489
+
490
+ # Drilldown option (simple): filtrer sur une dimension/valeur
491
+ dd_cols = cat_cols
492
+ dd1, dd2 = st.columns([1,2])
493
+ with dd1:
494
+ dd_dim = st.selectbox("Drilldown - dimension", options=[None] + dd_cols)
495
+ if dd_dim:
496
+ values = [x for x in filtered_df[dd_dim].dropna().astype(str).unique()]
497
+ with dd2:
498
+ dd_val = st.selectbox("Valeur", options=[None] + values)
499
+ if dd_val:
500
+ filtered_df = filtered_df[filtered_df[dd_dim].astype(str) == dd_val]
501
+
502
+ search_query = st.text_input("Recherche globale")
503
+ df_searched = apply_search(filtered_df, search_query)
504
+ st.dataframe(df_searched, use_container_width=True, hide_index=True)
505
+
506
+ # Downloads
507
+ csv_bytes = df_searched.to_csv(index=False).encode("utf-8-sig")
508
+ xlsx_bytes = to_excel_bytes(df_searched)
509
+ dc1, dc2 = st.columns(2)
510
+ with dc1:
511
+ st.download_button(
512
+ "Télécharger CSV",
513
+ data=csv_bytes,
514
+ file_name="inscriptions_filtrees.csv",
515
+ mime="text/csv",
516
+ use_container_width=True,
517
+ )
518
+ with dc2:
519
+ st.download_button(
520
+ "Télécharger Excel",
521
+ data=xlsx_bytes,
522
+ file_name="inscriptions_filtrees.xlsx",
523
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
524
+ use_container_width=True,
525
+ )
526
+ st.markdown("</div>", unsafe_allow_html=True)
527
+
528
+ # Universal Chart Builder
529
+ st.markdown("<div class=\"card\"><div class=\"card-title\">Constructeur de graphiques</div>", unsafe_allow_html=True)
530
+ chart_types = [
531
+ "Barres",
532
+ "Barres empilées",
533
+ "Lignes",
534
+ "Aires",
535
+ "Camembert",
536
+ "Histogramme",
537
+ "Nuage de points",
538
+ "Boîte (Box)",
539
+ "Violon",
540
+ ]
541
+ cA, cB, cC = st.columns([1.2, 1, 1])
542
+ with cA:
543
+ chosen_chart = st.selectbox("Type de graphique", options=chart_types, key="ub_chart_type")
544
+ with cB:
545
+ agg_choice = st.selectbox("Agrégat", options=["count", "sum", "mean", "median", "min", "max"], index=0, key="ub_agg")
546
+ with cC:
547
+ topn_builder = st.number_input("Top N (optionnel)", min_value=0, value=0, step=1, help="0 pour désactiver")
548
+
549
+ all_cols = list(filtered_df.columns)
550
+ num_cols = [c for c in all_cols if pd.api.types.is_numeric_dtype(filtered_df[c])]
551
+ date_cols_any = [c for c in all_cols if pd.api.types.is_datetime64_any_dtype(try_parse_datetime(filtered_df[c]))]
552
+ cat_cols_any = [c for c in all_cols if c not in num_cols]
553
+
554
+ def aggregate_df(df_src: pd.DataFrame, x_col: Optional[str], y_col: Optional[str], color_col: Optional[str]) -> pd.DataFrame:
555
+ if agg_choice == "count":
556
+ if x_col is not None and y_col is None:
557
+ return df_src.groupby([x_col, color_col] if color_col else [x_col]).size().reset_index(name="value")
558
+ elif x_col is None and y_col is not None:
559
+ return df_src.groupby([y_col, color_col] if color_col else [y_col]).size().reset_index(name="value")
560
+ elif x_col is not None and y_col is not None:
561
+ return df_src.groupby([x_col, y_col]).size().reset_index(name="value")
562
+ else:
563
+ return pd.DataFrame({"value": [len(df_src)]})
564
+ else:
565
+ agg_func = agg_choice
566
+ measure = y_col if (y_col in num_cols) else (x_col if (x_col in num_cols) else (num_cols[0] if num_cols else None))
567
+ if measure is None:
568
+ return df_src.groupby([x_col, color_col] if color_col else [x_col]).size().reset_index(name="value") if x_col else pd.DataFrame({"value": [len(df_src)]})
569
+ group_keys = [k for k in [x_col, color_col] if k]
570
+ out = df_src.groupby(group_keys, dropna=False)[measure].agg(agg_func).reset_index(name="value")
571
+ return out
572
+
573
+ if chosen_chart in ("Barres", "Barres empilées"):
574
+ x = st.selectbox("Axe X (cat/date)", options=cat_cols_any, key="ub_bar_x")
575
+ color = st.selectbox("Couleur (optionnel)", options=[None] + cat_cols_any, key="ub_bar_color")
576
+ measure = st.selectbox("Mesure (numérique ou count)", options=["(count)"] + num_cols, key="ub_bar_measure")
577
+ data = aggregate_df(filtered_df, x, None if measure == "(count)" else measure, color)
578
+ if topn_builder and topn_builder > 0 and x in data.columns:
579
+ data = data.sort_values("value", ascending=False).groupby(x).head(1).head(int(topn_builder))
580
+ if chosen_chart == "Barres":
581
+ fig = px.bar(data, x=x, y="value", color=color, template=plotly_template, barmode="group")
582
+ else:
583
+ fig = px.bar(data, x=x, y="value", color=color, template=plotly_template, barmode="relative")
584
+ st.plotly_chart(fig, use_container_width=True, theme=None)
585
+ elif chosen_chart in ("Lignes", "Aires"):
586
+ x = st.selectbox("Axe X (date recommandé)", options=date_cols_any or cat_cols_any, key="ub_line_x")
587
+ color = st.selectbox("Couleur (optionnel)", options=[None] + cat_cols_any, key="ub_line_color")
588
+ measure = st.selectbox("Mesure (numérique ou count)", options=["(count)"] + num_cols, key="ub_line_measure")
589
+ data = aggregate_df(filtered_df, x, None if measure == "(count)" else measure, color)
590
+ if chosen_chart == "Lignes":
591
+ fig = px.line(data, x=x, y="value", color=color, template=plotly_template)
592
+ else:
593
+ fig = px.area(data, x=x, y="value", color=color, template=plotly_template)
594
+ st.plotly_chart(fig, use_container_width=True, theme=None)
595
+ elif chosen_chart == "Camembert":
596
+ names = st.selectbox("Noms (catégorie)", options=cat_cols_any, key="ub_pie_names")
597
+ measure = st.selectbox("Mesure (numérique ou count)", options=["(count)"] + num_cols, key="ub_pie_measure")
598
+ if measure == "(count)":
599
+ data = filtered_df.groupby(names).size().reset_index(name="value")
600
+ else:
601
+ data = filtered_df.groupby(names)[measure].sum().reset_index(name="value")
602
+ fig = px.pie(data, names=names, values="value", template=plotly_template, hole=0.35)
603
+ st.plotly_chart(fig, use_container_width=True, theme=None)
604
+ elif chosen_chart == "Histogramme":
605
+ x = st.selectbox("Colonne numérique", options=num_cols, key="ub_hist_x")
606
+ bins = st.slider("Nb de bacs (bins)", 5, 100, 30)
607
+ fig = px.histogram(filtered_df, x=x, nbins=bins, template=plotly_template)
608
+ st.plotly_chart(fig, use_container_width=True, theme=None)
609
+ elif chosen_chart == "Nuage de points":
610
+ x = st.selectbox("X (numérique)", options=num_cols, key="ub_scatter_x")
611
+ y = st.selectbox("Y (numérique)", options=[c for c in num_cols if c != x], key="ub_scatter_y")
612
+ color = st.selectbox("Couleur (optionnel)", options=[None] + cat_cols_any, key="ub_scatter_color")
613
+ fig = px.scatter(filtered_df, x=x, y=y, color=color, template=plotly_template)
614
+ st.plotly_chart(fig, use_container_width=True, theme=None)
615
+ elif chosen_chart == "Boîte (Box)":
616
+ y = st.selectbox("Y (numérique)", options=num_cols, key="ub_box_y")
617
+ x = st.selectbox("X (catégorie optionnel)", options=[None] + cat_cols_any, key="ub_box_x")
618
+ fig = px.box(filtered_df, x=x, y=y, template=plotly_template)
619
+ st.plotly_chart(fig, use_container_width=True, theme=None)
620
+ elif chosen_chart == "Violon":
621
+ y = st.selectbox("Y (numérique)", options=num_cols, key="ub_violin_y")
622
+ x = st.selectbox("X (catégorie optionnel)", options=[None] + cat_cols_any, key="ub_violin_x")
623
+ fig = px.violin(filtered_df, x=x, y=y, template=plotly_template, box=True, points="outliers")
624
+ st.plotly_chart(fig, use_container_width=True, theme=None)
625
+ st.markdown("</div>", unsafe_allow_html=True)
626
+
627
+ # Decision Maker View (field-aware, optional)
628
+ st.markdown("<div class=\"card\"><div class=\"card-title\">Vue Décideur (si champs disponibles)</div>", unsafe_allow_html=True)
629
+ # Candidate fields based on provided list
630
+ col_email = find_column(filtered_df, ["Email"]) or find_column(filtered_df, ["E-mail"])
631
+ col_gender = find_column(filtered_df, ["Genre", "Autre genre (Veuillez préciser) : "])
632
+ col_nat = find_column(filtered_df, ["Nationalité"])
633
+ col_country = find_column(filtered_df, ["Pays de résidence"]) or find_column(filtered_df, ["D’où préférez-vous participer à l'événement ?"])
634
+ col_role = find_column(filtered_df, ["Votre profession / statut", "Autre profession (veuillez préciser)"])
635
+ col_aff = find_column(filtered_df, ["Affiliation", "Autre affiliation (Veuillez préciser) : "])
636
+ col_particip = find_column(filtered_df, ["Avez-vous déjà participé à un événement Indaba X Togo ?"])
637
+ col_mode_formation = find_column(filtered_df, ["Comment voulez-vous participer aux formations ?"])
638
+ col_what_do = find_column(filtered_df, ["Que voulez-vous faire ?"])
639
+ col_skills = {
640
+ "Python": find_column(filtered_df, ["Quel est votre niveau en [Python]", "Quel est votre niveau en [Python]"]),
641
+ "Numpy": find_column(filtered_df, ["Quel est votre niveau en [Numpy]", "Quel est votre niveau en [Numpy]"]),
642
+ "Pandas": find_column(filtered_df, ["Quel est votre niveau en [Pandas]", "Quel est votre niveau en [Pandas]"]),
643
+ "Scikit Learn": find_column(filtered_df, ["Quel est votre niveau en [Scikit Learn]", "Quel est votre niveau en [Scikit Learn]"]),
644
+ "Pytorch": find_column(filtered_df, ["Quel est votre niveau en [Pytorch]", "Quel est votre niveau en [Pytorch]"]),
645
+ "Deep Learning": find_column(filtered_df, ["Quel est votre niveau en [Deep Learning]", "Quel est votre niveau en [Deep Learning]"]),
646
+ }
647
+
648
+ # KPIs for decision maker
649
+ kcols = st.columns(4)
650
+ with kcols[0]:
651
+ kpi_card("Inscriptions", f"{len(filtered_df):,}")
652
+ with kcols[1]:
653
+ if col_email:
654
+ uniq_people = filtered_df[col_email].astype(str).str.strip().str.lower().dropna().nunique()
655
+ kpi_card("Personnes uniques (email)", f"{uniq_people:,}")
656
+ else:
657
+ kpi_card("Personnes uniques", "-")
658
+ with kcols[2]:
659
+ if col_country and col_country in filtered_df.columns:
660
+ kpi_card("Pays (distincts)", f"{filtered_df[col_country].astype(str).nunique():,}")
661
+ else:
662
+ kpi_card("Pays (distincts)", "-")
663
+ with kcols[3]:
664
+ if col_role and col_role in filtered_df.columns:
665
+ kpi_card("Profils (distincts)", f"{filtered_df[col_role].astype(str).nunique():,}")
666
+ else:
667
+ kpi_card("Profils (distincts)", "-")
668
+
669
+ # Row 1 charts: Gender, Country
670
+ dm1 = st.columns(2)
671
+ if col_gender and col_gender in filtered_df.columns and not filtered_df.empty:
672
+ gcounts = filtered_df.groupby(col_gender).size().reset_index(name="count").sort_values("count", ascending=False)
673
+ fig_g = px.pie(gcounts, names=col_gender, values="count", template=get_plotly_template(theme_mode), hole=0.35)
674
+ with dm1[0]:
675
+ chart_card("Répartition par genre", fig_g)
676
+ if col_country and col_country in filtered_df.columns and not filtered_df.empty:
677
+ ccounts = filtered_df.groupby(col_country).size().reset_index(name="count").sort_values("count", ascending=False).head(15)
678
+ fig_c = px.bar(ccounts, x=col_country, y="count", template=get_plotly_template(theme_mode))
679
+ with dm1[1]:
680
+ chart_card("Top 15 pays de résidence", fig_c)
681
+
682
+ # Row 2: Participation history and roles
683
+ dm2 = st.columns(2)
684
+ if col_particip and col_particip in filtered_df.columns and not filtered_df.empty:
685
+ pcounts = filtered_df.groupby(col_particip).size().reset_index(name="count")
686
+ fig_p = px.bar(pcounts, x=col_particip, y="count", template=get_plotly_template(theme_mode))
687
+ with dm2[0]:
688
+ chart_card("A déjà participé ?", fig_p)
689
+ if col_role and col_role in filtered_df.columns and not filtered_df.empty:
690
+ rcounts = filtered_df.groupby(col_role).size().reset_index(name="count").sort_values("count", ascending=False).head(15)
691
+ fig_r = px.bar(rcounts, x=col_role, y="count", template=get_plotly_template(theme_mode))
692
+ with dm2[1]:
693
+ chart_card("Professions / Statuts (Top 15)", fig_r)
694
+
695
+ # Row 2b: Formations participation mode and intentions
696
+ dm2b = st.columns(2)
697
+ if col_mode_formation and col_mode_formation in filtered_df.columns and not filtered_df.empty:
698
+ mcounts = (
699
+ filtered_df.groupby(col_mode_formation).size().reset_index(name="count").sort_values("count", ascending=False)
700
+ )
701
+ fig_m = px.bar(mcounts, x=col_mode_formation, y="count", template=get_plotly_template(theme_mode))
702
+ with dm2b[0]:
703
+ chart_card("Mode de participation aux formations", fig_m)
704
+ if col_what_do and col_what_do in filtered_df.columns and not filtered_df.empty:
705
+ wcounts = (
706
+ filtered_df.groupby(col_what_do).size().reset_index(name="count").sort_values("count", ascending=False).head(15)
707
+ )
708
+ fig_w = px.bar(wcounts, x=col_what_do, y="count", template=get_plotly_template(theme_mode))
709
+ with dm2b[1]:
710
+ chart_card("Intentions: Que voulez-vous faire ? (Top 15)", fig_w)
711
+
712
+ # Row 3: Skills radar-like bars
713
+ skill_pairs = [(name, col) for name, col in col_skills.items() if col]
714
+ if skill_pairs:
715
+ sm = []
716
+ for name, col in skill_pairs:
717
+ # Map text levels to ordered scale if needed
718
+ s = filtered_df[col].astype(str).str.strip().str.lower()
719
+ order = ["débutant", "intermédiaire", "avancé", "expert"]
720
+ s = s.where(s.isin(order), s)
721
+ d = s.value_counts().reindex(order).fillna(0).rename_axis("niveau").reset_index(name="count")
722
+ d["skill"] = name
723
+ sm.append(d)
724
+ if sm:
725
+ skill_df = pd.concat(sm, ignore_index=True)
726
+ fig_skill = px.bar(skill_df, x="skill", y="count", color="niveau", barmode="group", template=get_plotly_template(theme_mode))
727
+ chart_card("Niveaux par compétence", fig_skill)
728
+
729
+ st.markdown("</div>", unsafe_allow_html=True)
730
+
731
+
732
+ if __name__ == "__main__":
733
+ main()
734
+
735
+