Upload 2 files
Browse files- dataviz_avid.py +180 -0
- trace_agent_collaboratif.py +104 -0
dataviz_avid.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import plotly.express as px
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import glob
|
| 4 |
+
import json
|
| 5 |
+
import plotly.graph_objects as go
|
| 6 |
+
import numpy as np
|
| 7 |
+
import textwrap
|
| 8 |
+
import re
|
| 9 |
+
import os
|
| 10 |
+
|
| 11 |
+
def readExcel(data):
|
| 12 |
+
return pd.read_excel(data)
|
| 13 |
+
|
| 14 |
+
def matrixcorrelation(matrix,df,list_labo):
|
| 15 |
+
fig = go.Figure(data=go.Heatmap(
|
| 16 |
+
z=matrix.values,
|
| 17 |
+
x=matrix.columns,
|
| 18 |
+
y=matrix.index,
|
| 19 |
+
colorscale=[
|
| 20 |
+
[0, 'rgba(6,6,33,1)'],
|
| 21 |
+
[0.2, 'rgba(6,6,33,1)'],
|
| 22 |
+
[0.2, '#FF69B4'], # Rose pour technique
|
| 23 |
+
[0.4, '#FF69B4'],
|
| 24 |
+
[0.4, '#4169E1'], # Bleu pour management
|
| 25 |
+
[0.6, '#4169E1'],
|
| 26 |
+
[0.6, '#32CD32'], # Vert pour environnement
|
| 27 |
+
[0.8, '#32CD32'],
|
| 28 |
+
[0.8, '#FFD700'], # Jaune pour économie
|
| 29 |
+
[1.0, '#32CD32']
|
| 30 |
+
],
|
| 31 |
+
showscale=False,
|
| 32 |
+
))
|
| 33 |
+
|
| 34 |
+
# Ajout des bordures aux cellules
|
| 35 |
+
fig.update_traces(
|
| 36 |
+
xgap=1,
|
| 37 |
+
ygap=1,
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
# Mise en forme
|
| 41 |
+
fig.update_layout(
|
| 42 |
+
#title='Matrice des emplois types<br>par famille de compétences<br>professionnelles',
|
| 43 |
+
xaxis=dict(
|
| 44 |
+
side='top',
|
| 45 |
+
tickangle=45,
|
| 46 |
+
tickfont=dict(size=10),
|
| 47 |
+
),
|
| 48 |
+
yaxis=dict(
|
| 49 |
+
autorange='reversed',
|
| 50 |
+
tickfont=dict(size=10),
|
| 51 |
+
),
|
| 52 |
+
width=800,
|
| 53 |
+
#height=300,
|
| 54 |
+
#height=len(list_labo) * 20,
|
| 55 |
+
height=1200,
|
| 56 |
+
template='plotly_dark',
|
| 57 |
+
paper_bgcolor = 'rgba(6,6,33,1)',
|
| 58 |
+
plot_bgcolor='rgba(6,6,33,1)',
|
| 59 |
+
margin=dict(
|
| 60 |
+
t=10,
|
| 61 |
+
l=50,
|
| 62 |
+
r=10,
|
| 63 |
+
b=50
|
| 64 |
+
),
|
| 65 |
+
#annotations=annotations,
|
| 66 |
+
hovermode="x unified",hoverlabel=dict(
|
| 67 |
+
bgcolor='rgba(8,8,74,1)',
|
| 68 |
+
font_size=10,
|
| 69 |
+
), clickmode='event+select',
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
# Personnalisation du style des axes
|
| 73 |
+
fig.update_xaxes(
|
| 74 |
+
#showspikes=True,
|
| 75 |
+
showgrid=True,
|
| 76 |
+
gridwidth=1,
|
| 77 |
+
gridcolor='lightgrey',
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
fig.update_yaxes(
|
| 81 |
+
#showspikes=True,
|
| 82 |
+
showgrid=True,
|
| 83 |
+
gridwidth=1,
|
| 84 |
+
gridcolor='lightgrey',
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
# Ajout d'un hover template personnalisé
|
| 88 |
+
hover_text = []
|
| 89 |
+
wrapper = textwrap.TextWrapper(width=10)
|
| 90 |
+
df_info = df[["Laboratoires", "Thématiques Ville Durable", "Ville","Nom du Laboratoire","Thématiques Transversales","Thématiques Spécifiques", "Productions Pédagogiques", "Équipements"]].copy()
|
| 91 |
+
df_info['Thématiques Ville Durable'] = df_info['Thématiques Ville Durable'].str.split('; ')
|
| 92 |
+
df_info = df_info.explode('Thématiques Ville Durable')
|
| 93 |
+
|
| 94 |
+
#df_info = df_info.drop_duplicates(subset=['Thématiques ODD11'])
|
| 95 |
+
df_info.set_index("Thématiques Ville Durable", inplace=True)
|
| 96 |
+
|
| 97 |
+
for idx in matrix.index:
|
| 98 |
+
row = []
|
| 99 |
+
for col in matrix.columns:
|
| 100 |
+
if matrix.loc[idx,col] == 1:
|
| 101 |
+
#df_psycho = df_score[(df_score['Thématiques Pedago'].str.contains(row['Thématiques Pedago'])) & (df_score['labStructName_s'] == row['labStructName_s'])]
|
| 102 |
+
|
| 103 |
+
df_extract = df_info.loc[col]
|
| 104 |
+
label_y = idx
|
| 105 |
+
df_test = df_extract[df_extract["Laboratoires"] == idx].copy()
|
| 106 |
+
if len(df_test) > 0:
|
| 107 |
+
nom_labo = "<br>".join(df_test["Nom du Laboratoire"].values.tolist())
|
| 108 |
+
transversales = "<br>".join(df_test["Thématiques Transversales"].values.tolist())
|
| 109 |
+
specifiques = "<br>".join(df_test["Thématiques Spécifiques"].values.tolist())
|
| 110 |
+
pedagogiques = "<br>".join(df_test["Productions Pédagogiques"].values.tolist())
|
| 111 |
+
equipements = "<br>".join(df_test["Équipements"].values.tolist())
|
| 112 |
+
row.append(
|
| 113 |
+
f'<b>🔬 Laboratoire: {label_y}</b> : {nom_labo}<br>' +
|
| 114 |
+
f'<b>🏙️ Thématiques Ville Durable: {col.capitalize()}</b><br><br>' +
|
| 115 |
+
f'📣 Thématiques Transversales :<br>{transversales}<br><br>' +
|
| 116 |
+
f'📣 Thématiques Spécifiques :<br>{specifiques}<br><br>' +
|
| 117 |
+
f'🧑 Productions Pédagogiques:<br>{pedagogiques}<br><br>' +
|
| 118 |
+
f'🎓 Équipements: {equipements}'
|
| 119 |
+
)
|
| 120 |
+
else:
|
| 121 |
+
row.append('')
|
| 122 |
+
|
| 123 |
+
hover_text.append(row)
|
| 124 |
+
|
| 125 |
+
fig.update_traces(
|
| 126 |
+
hovertemplate="%{customdata}<extra></extra>",
|
| 127 |
+
customdata=hover_text,
|
| 128 |
+
#y=[y[0:-10].replace('(','') if y.find('(essential)')!=-1 or y.find('(optional)')!=-1 else y for y in color_values.index]
|
| 129 |
+
)
|
| 130 |
+
return fig
|
| 131 |
+
|
| 132 |
+
async def display_matrixcorrelation():
|
| 133 |
+
######## Matrice de corrélation ########
|
| 134 |
+
df = readExcel("public/Fiches-laboratoires-Thematiques-AVID.xlsx")
|
| 135 |
+
df_labo = df[["Laboratoires"]].copy()
|
| 136 |
+
df_labo = df_labo.drop_duplicates(subset=["Laboratoires"])
|
| 137 |
+
list_labo = df_labo["Laboratoires"].values.tolist()
|
| 138 |
+
|
| 139 |
+
df_thematique = df[['Thématiques Ville Durable']].copy()
|
| 140 |
+
df_thematique['Thématiques Ville Durable'] = df_thematique['Thématiques Ville Durable'].str.split('; ')
|
| 141 |
+
df_thematique = df_thematique.explode('Thématiques Ville Durable')
|
| 142 |
+
df_thematique = df_thematique.drop_duplicates(subset=['Thématiques Ville Durable'])
|
| 143 |
+
list_thematique = df_thematique['Thématiques Ville Durable'].values.tolist()
|
| 144 |
+
|
| 145 |
+
matrix = pd.DataFrame(0, index=list_labo, columns=list_thematique)
|
| 146 |
+
for labo in list_labo:
|
| 147 |
+
for thematique in list_thematique:
|
| 148 |
+
df_test = df[df['Thématiques Ville Durable'] == thematique]
|
| 149 |
+
if labo in df_test.values :
|
| 150 |
+
matrix.loc[labo, thematique] = 1 # Replace with actual condition logic
|
| 151 |
+
|
| 152 |
+
return matrixcorrelation(matrix,df,list_labo)
|
| 153 |
+
|
| 154 |
+
async def display_barplotcorrelation():
|
| 155 |
+
df = readExcel("public/Fiches-laboratoires-Thematiques-AVID.xlsx")
|
| 156 |
+
df['Thématiques Ville Durable'] = df['Thématiques Ville Durable'].str.split('; ')
|
| 157 |
+
df = df.explode('Thématiques Ville Durable')
|
| 158 |
+
df = df.groupby(['Thématiques Ville Durable','Laboratoires']).size().reset_index(name='count')
|
| 159 |
+
fig = px.bar(df, x='count', y='Thématiques Ville Durable', color='Laboratoires', height=1200, width=1200, orientation='h',color_discrete_sequence=px.colors.qualitative.Safe, text_auto=True, template='plotly_dark').update_layout(font=dict(size=10, color='white'),autosize=True, paper_bgcolor='rgba(6,6,33,1)', plot_bgcolor='rgba(6,6,33,1)').update_traces(showlegend=True)
|
| 160 |
+
return fig
|
| 161 |
+
|
| 162 |
+
async def display_barplotpublication():
|
| 163 |
+
df = readExcel("public/all-LABOUGE-publications_2020-2025_Thematiques_AVID.xlsx")
|
| 164 |
+
df.dropna(subset=['Thématiques Ville Durable'], inplace=True)
|
| 165 |
+
df['Thématiques Ville Durable'] = df['Thématiques Ville Durable'].str.split('; ')
|
| 166 |
+
df = df.explode('Thématiques Ville Durable')
|
| 167 |
+
df['Thématiques Ville Durable'] = df['Thématiques Ville Durable'].apply(lambda x: str(x).split('(')).apply(lambda x: x[0])
|
| 168 |
+
df = df.groupby(['Thématiques Ville Durable','Laboratoire Université Gustave Eiffel']).size().reset_index(name='count')
|
| 169 |
+
fig = px.bar(df, x='count', y='Thématiques Ville Durable', color='Laboratoire Université Gustave Eiffel', height=1200, width=1200, orientation='h',color_discrete_sequence=px.colors.qualitative.Safe, text_auto=True, template='plotly_dark').update_layout(font=dict(size=10, color='white'),autosize=True, paper_bgcolor='rgba(6,6,33,1)', plot_bgcolor='rgba(6,6,33,1)').update_traces(showlegend=True)
|
| 170 |
+
return fig
|
| 171 |
+
|
| 172 |
+
async def display_barplotformation():
|
| 173 |
+
df = readExcel("public/Formations-correlation-thematiquesVD_AVID.xlsx")
|
| 174 |
+
df.dropna(subset=['Thématiques Ville Durable'], inplace=True)
|
| 175 |
+
df['Thématiques Ville Durable'] = df['Thématiques Ville Durable'].str.split('; ')
|
| 176 |
+
df = df.explode('Thématiques Ville Durable')
|
| 177 |
+
df['Thématiques Ville Durable'] = df['Thématiques Ville Durable'].apply(lambda x: str(x).split('(')).apply(lambda x: x[0])
|
| 178 |
+
df = df.groupby(['Thématiques Ville Durable','Niveau']).size().reset_index(name='count')
|
| 179 |
+
fig = px.bar(df, x='count', y='Thématiques Ville Durable', color='Niveau', height=1200, width=1200, orientation='h',color_discrete_sequence=px.colors.qualitative.Safe, text_auto=True, template='plotly_dark').update_layout(font=dict(size=10, color='white'),autosize=True, paper_bgcolor='rgba(6,6,33,1)', plot_bgcolor='rgba(6,6,33,1)').update_traces(showlegend=False)
|
| 180 |
+
return fig
|
trace_agent_collaboratif.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import importlib
|
| 4 |
+
from collections import defaultdict
|
| 5 |
+
|
| 6 |
+
from concurrent.futures import Future, ThreadPoolExecutor
|
| 7 |
+
|
| 8 |
+
from datetime import datetime, timedelta
|
| 9 |
+
import pandas as pd
|
| 10 |
+
from langsmith import Client
|
| 11 |
+
from tqdm.auto import tqdm
|
| 12 |
+
|
| 13 |
+
import chainlit as cl
|
| 14 |
+
|
| 15 |
+
async def get_trace(apiKey,task_list):
|
| 16 |
+
try:
|
| 17 |
+
client = Client(api_key=apiKey)
|
| 18 |
+
project_name = "agent-collaboratif-avid"
|
| 19 |
+
num_days = 30
|
| 20 |
+
|
| 21 |
+
# List all tool runs
|
| 22 |
+
tool_runs = client.list_runs(
|
| 23 |
+
project_name=project_name,
|
| 24 |
+
start_time=datetime.now() - timedelta(days=num_days),
|
| 25 |
+
is_root=True,
|
| 26 |
+
# We don't need to fetch inputs, outputs, and other values that # may increase the query time
|
| 27 |
+
select=["inputs","trace_id"],
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
data = []
|
| 31 |
+
futures: list[Future] = []
|
| 32 |
+
trace_cursor = 0
|
| 33 |
+
trace_batch_size = 20
|
| 34 |
+
|
| 35 |
+
tool_runs_by_parent = defaultdict(lambda: defaultdict(set))
|
| 36 |
+
# Do not exceed rate limit
|
| 37 |
+
with ThreadPoolExecutor(max_workers=2) as executor:
|
| 38 |
+
# Group tool runs by parent run ID
|
| 39 |
+
task2 = cl.Task(title="Grouper les outils invoqués dans une trace et les organiser par parent run ID")
|
| 40 |
+
await task_list.add_task(task2)
|
| 41 |
+
for run in tqdm(tool_runs):
|
| 42 |
+
# Collect all tools invoked within a given trace
|
| 43 |
+
tool_runs_by_parent[run.trace_id]["tools_involved"].add(run.name)
|
| 44 |
+
# maybe send a batch of parent run IDs to the server
|
| 45 |
+
# this lets us query for the root runs in batches
|
| 46 |
+
# while still processing the tool runs
|
| 47 |
+
if len(tool_runs_by_parent) % trace_batch_size == 0:
|
| 48 |
+
if this_batch := list(tool_runs_by_parent.keys())[
|
| 49 |
+
trace_cursor : trace_cursor + trace_batch_size
|
| 50 |
+
]:
|
| 51 |
+
trace_cursor += trace_batch_size
|
| 52 |
+
futures.append(
|
| 53 |
+
executor.submit(
|
| 54 |
+
client.list_runs,
|
| 55 |
+
project_name=project_name,
|
| 56 |
+
run_ids=this_batch,
|
| 57 |
+
select=["inputs","trace_id"],
|
| 58 |
+
)
|
| 59 |
+
)
|
| 60 |
+
await task_list.send()
|
| 61 |
+
if this_batch := list(tool_runs_by_parent.keys())[trace_cursor:]:
|
| 62 |
+
futures.append(
|
| 63 |
+
executor.submit(
|
| 64 |
+
client.list_runs,
|
| 65 |
+
project_name=project_name,
|
| 66 |
+
run_ids=this_batch,
|
| 67 |
+
select=["inputs","trace_id"],
|
| 68 |
+
)
|
| 69 |
+
)
|
| 70 |
+
task2.status = cl.TaskStatus.DONE
|
| 71 |
+
await task_list.send()
|
| 72 |
+
task3 = cl.Task(title="Rechercher les données d'actions des utilisateurs de l'agent collabroatif AVID et les organiser par parent run ID dans un DataFrame")
|
| 73 |
+
await task_list.add_task(task3)
|
| 74 |
+
for future in tqdm(futures):
|
| 75 |
+
root_runs = future.result()
|
| 76 |
+
for root_run in root_runs:
|
| 77 |
+
root_data = tool_runs_by_parent[root_run.id]
|
| 78 |
+
data.append(
|
| 79 |
+
{
|
| 80 |
+
"inputs": root_run.inputs,
|
| 81 |
+
"start_time": root_run.start_time,
|
| 82 |
+
"end_time": root_run.end_time,
|
| 83 |
+
}
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
# (Optional): Convert to a pandas DataFrame
|
| 87 |
+
task3.status = cl.TaskStatus.DONE
|
| 88 |
+
await task_list.send()
|
| 89 |
+
|
| 90 |
+
df_inputs = pd.DataFrame(data)
|
| 91 |
+
df_inputs['query'] = df_inputs.apply(lambda x: x.get('inputs', {}).get('query'), axis=1)
|
| 92 |
+
df_inputs['latency'] = df_inputs['end_time'] - df_inputs['start_time']
|
| 93 |
+
df_inputs['latency'] = df_inputs['latency'].apply(lambda x: x.total_seconds())
|
| 94 |
+
df_inputs=df_inputs[["query","latency","start_time"]].copy()
|
| 95 |
+
task4 = cl.Task(title="Conversion des données d'actions des utilisateurs de l'agent collabroatif AVID et les afficher au format texte")
|
| 96 |
+
await task_list.add_task(task4)
|
| 97 |
+
|
| 98 |
+
list_inputs = df_inputs.head(20).values.tolist()
|
| 99 |
+
str_inputs="".join(['* Requête : ' + str(item[0]) + '\nDate : ' + str(item[2]) + '\nDurée de la requête : ' + str(item[1]) + '\n\n' for item in list_inputs])
|
| 100 |
+
task4.status = cl.TaskStatus.DONE
|
| 101 |
+
await task_list.send()
|
| 102 |
+
return str_inputs
|
| 103 |
+
except Exception as e:
|
| 104 |
+
return f"Aucune connexion à LangSmith"
|