Spaces:

datacipen
/

datavid

Running

App Files Files Community

datacipen commited on 18 days ago

Commit

a284a70

verified ·

1 Parent(s): 16c5cf6

Upload 2 files

Browse files

Files changed (2) hide show

dataviz_avid.py +180 -0
trace_agent_collaboratif.py +104 -0

dataviz_avid.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import plotly.express as px
+import pandas as pd
+import glob
+import json
+import plotly.graph_objects as go
+import numpy as np
+import textwrap
+import re
+import os
+def readExcel(data):
+    return pd.read_excel(data)
+def matrixcorrelation(matrix,df,list_labo):
+    fig = go.Figure(data=go.Heatmap(
+    z=matrix.values,
+    x=matrix.columns,
+    y=matrix.index,
+    colorscale=[
+        [0, 'rgba(6,6,33,1)'],
+        [0.2, 'rgba(6,6,33,1)'],
+        [0.2, '#FF69B4'],  # Rose pour technique
+        [0.4, '#FF69B4'],
+        [0.4, '#4169E1'],  # Bleu pour management
+        [0.6, '#4169E1'],
+        [0.6, '#32CD32'],  # Vert pour environnement
+        [0.8, '#32CD32'],
+        [0.8, '#FFD700'],  # Jaune pour économie
+        [1.0, '#32CD32']
+        ],
+        showscale=False,
+    ))
+    # Ajout des bordures aux cellules
+    fig.update_traces(
+        xgap=1,
+        ygap=1,
+    )
+    # Mise en forme
+    fig.update_layout(
+        #title='Matrice des emplois types<br>par famille de compétences<br>professionnelles',
+        xaxis=dict(
+            side='top',
+            tickangle=45,
+            tickfont=dict(size=10),
+        ),
+        yaxis=dict(
+            autorange='reversed',
+            tickfont=dict(size=10),
+        ),
+        width=800,
+        #height=300,
+        #height=len(list_labo) * 20,
+        height=1200,
+        template='plotly_dark',
+        paper_bgcolor = 'rgba(6,6,33,1)',
+        plot_bgcolor='rgba(6,6,33,1)',
+        margin=dict(
+            t=10,
+            l=50,
+            r=10,
+            b=50
+        ),
+        #annotations=annotations,
+        hovermode="x unified",hoverlabel=dict(
+            bgcolor='rgba(8,8,74,1)',
+            font_size=10,
+        ), clickmode='event+select',
+    )
+    # Personnalisation du style des axes
+    fig.update_xaxes(
+        #showspikes=True,
+        showgrid=True,
+        gridwidth=1,
+        gridcolor='lightgrey',
+    )
+    fig.update_yaxes(
+        #showspikes=True,
+        showgrid=True,
+        gridwidth=1,
+        gridcolor='lightgrey',
+    )
+    # Ajout d'un hover template personnalisé
+    hover_text = []
+    wrapper = textwrap.TextWrapper(width=10)
+    df_info = df[["Laboratoires", "Thématiques Ville Durable", "Ville","Nom du Laboratoire","Thématiques Transversales","Thématiques Spécifiques", "Productions Pédagogiques", "Équipements"]].copy()
+    df_info['Thématiques Ville Durable'] = df_info['Thématiques Ville Durable'].str.split('; ')
+    df_info = df_info.explode('Thématiques Ville Durable')
+    #df_info = df_info.drop_duplicates(subset=['Thématiques ODD11'])
+    df_info.set_index("Thématiques Ville Durable", inplace=True)
+    for idx in matrix.index:
+        row = []
+        for col in matrix.columns:
+            if matrix.loc[idx,col] == 1:
+                #df_psycho = df_score[(df_score['Thématiques Pedago'].str.contains(row['Thématiques Pedago'])) & (df_score['labStructName_s'] == row['labStructName_s'])]
+                df_extract = df_info.loc[col]
+                label_y = idx
+                df_test = df_extract[df_extract["Laboratoires"] == idx].copy()
+                if len(df_test) > 0:
+                    nom_labo = "<br>".join(df_test["Nom du Laboratoire"].values.tolist())
+                    transversales = "<br>".join(df_test["Thématiques Transversales"].values.tolist())
+                    specifiques = "<br>".join(df_test["Thématiques Spécifiques"].values.tolist())
+                    pedagogiques = "<br>".join(df_test["Productions Pédagogiques"].values.tolist())
+                    equipements = "<br>".join(df_test["Équipements"].values.tolist())
+                    row.append(
+                    f'<b>🔬 Laboratoire: {label_y}</b> : {nom_labo}<br>' +
+                    f'<b>🏙️ Thématiques Ville Durable: {col.capitalize()}</b><br><br>' +
+                    f'📣 Thématiques Transversales :<br>{transversales}<br><br>' +
+                    f'📣 Thématiques Spécifiques :<br>{specifiques}<br><br>' +
+                    f'🧑‍ Productions Pédagogiques:<br>{pedagogiques}<br><br>' +
+                    f'🎓 Équipements: {equipements}'
+                    )
+            else:
+                row.append('')
+        hover_text.append(row)
+    fig.update_traces(
+        hovertemplate="%{customdata}<extra></extra>",
+        customdata=hover_text,
+        #y=[y[0:-10].replace('(','') if y.find('(essential)')!=-1 or y.find('(optional)')!=-1 else y for y in color_values.index]
+    )
+    return fig
+async def display_matrixcorrelation():
+    ######## Matrice de corrélation ########
+    df = readExcel("public/Fiches-laboratoires-Thematiques-AVID.xlsx")
+    df_labo = df[["Laboratoires"]].copy()
+    df_labo = df_labo.drop_duplicates(subset=["Laboratoires"])
+    list_labo = df_labo["Laboratoires"].values.tolist()
+    df_thematique = df[['Thématiques Ville Durable']].copy()
+    df_thematique['Thématiques Ville Durable'] = df_thematique['Thématiques Ville Durable'].str.split('; ')
+    df_thematique = df_thematique.explode('Thématiques Ville Durable')
+    df_thematique = df_thematique.drop_duplicates(subset=['Thématiques Ville Durable'])
+    list_thematique = df_thematique['Thématiques Ville Durable'].values.tolist()
+    matrix = pd.DataFrame(0, index=list_labo, columns=list_thematique)
+    for labo in list_labo:
+        for thematique in list_thematique:
+            df_test = df[df['Thématiques Ville Durable'] == thematique]
+            if labo in df_test.values :
+                matrix.loc[labo, thematique] = 1 # Replace with actual condition logic
+    return matrixcorrelation(matrix,df,list_labo)
+async def display_barplotcorrelation():
+    df = readExcel("public/Fiches-laboratoires-Thematiques-AVID.xlsx")
+    df['Thématiques Ville Durable'] = df['Thématiques Ville Durable'].str.split('; ')
+    df = df.explode('Thématiques Ville Durable')
+    df = df.groupby(['Thématiques Ville Durable','Laboratoires']).size().reset_index(name='count')
+    fig = px.bar(df, x='count', y='Thématiques Ville Durable', color='Laboratoires', height=1200, width=1200, orientation='h',color_discrete_sequence=px.colors.qualitative.Safe, text_auto=True, template='plotly_dark').update_layout(font=dict(size=10, color='white'),autosize=True, paper_bgcolor='rgba(6,6,33,1)', plot_bgcolor='rgba(6,6,33,1)').update_traces(showlegend=True)
+    return fig
+async def display_barplotpublication():
+    df = readExcel("public/all-LABOUGE-publications_2020-2025_Thematiques_AVID.xlsx")
+    df.dropna(subset=['Thématiques Ville Durable'], inplace=True)
+    df['Thématiques Ville Durable'] = df['Thématiques Ville Durable'].str.split('; ')
+    df = df.explode('Thématiques Ville Durable')
+    df['Thématiques Ville Durable'] = df['Thématiques Ville Durable'].apply(lambda x: str(x).split('(')).apply(lambda x: x[0])
+    df = df.groupby(['Thématiques Ville Durable','Laboratoire Université Gustave Eiffel']).size().reset_index(name='count')
+    fig = px.bar(df, x='count', y='Thématiques Ville Durable', color='Laboratoire Université Gustave Eiffel', height=1200, width=1200, orientation='h',color_discrete_sequence=px.colors.qualitative.Safe, text_auto=True, template='plotly_dark').update_layout(font=dict(size=10, color='white'),autosize=True, paper_bgcolor='rgba(6,6,33,1)', plot_bgcolor='rgba(6,6,33,1)').update_traces(showlegend=True)
+    return fig
+async def display_barplotformation():
+    df = readExcel("public/Formations-correlation-thematiquesVD_AVID.xlsx")
+    df.dropna(subset=['Thématiques Ville Durable'], inplace=True)
+    df['Thématiques Ville Durable'] = df['Thématiques Ville Durable'].str.split('; ')
+    df = df.explode('Thématiques Ville Durable')
+    df['Thématiques Ville Durable'] = df['Thématiques Ville Durable'].apply(lambda x: str(x).split('(')).apply(lambda x: x[0])
+    df = df.groupby(['Thématiques Ville Durable','Niveau']).size().reset_index(name='count')
+    fig = px.bar(df, x='count', y='Thématiques Ville Durable', color='Niveau', height=1200, width=1200, orientation='h',color_discrete_sequence=px.colors.qualitative.Safe, text_auto=True, template='plotly_dark').update_layout(font=dict(size=10, color='white'),autosize=True, paper_bgcolor='rgba(6,6,33,1)', plot_bgcolor='rgba(6,6,33,1)').update_traces(showlegend=False)
+    return fig

trace_agent_collaboratif.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import os
+import sys
+import importlib
+from collections import defaultdict
+from concurrent.futures import Future, ThreadPoolExecutor
+from datetime import datetime, timedelta
+import pandas as pd
+from langsmith import Client
+from tqdm.auto import tqdm
+import chainlit as cl
+async def get_trace(apiKey,task_list):
+    try:
+        client = Client(api_key=apiKey)
+        project_name = "agent-collaboratif-avid"
+        num_days = 30
+        # List all tool runs
+        tool_runs = client.list_runs(
+            project_name=project_name,
+            start_time=datetime.now() - timedelta(days=num_days),
+            is_root=True,
+            # We don't need to fetch inputs, outputs, and other values that # may increase the query time
+            select=["inputs","trace_id"],
+        )
+        data = []
+        futures: list[Future] = []
+        trace_cursor = 0
+        trace_batch_size = 20
+        tool_runs_by_parent = defaultdict(lambda: defaultdict(set))
+        # Do not exceed rate limit
+        with ThreadPoolExecutor(max_workers=2) as executor:
+            # Group tool runs by parent run ID
+            task2 = cl.Task(title="Grouper les outils invoqués dans une trace et les organiser par parent run ID")
+            await task_list.add_task(task2)
+            for run in tqdm(tool_runs):
+                # Collect all tools invoked within a given trace
+                tool_runs_by_parent[run.trace_id]["tools_involved"].add(run.name)
+                # maybe send a batch of parent run IDs to the server
+                # this lets us query for the root runs in batches
+                # while still processing the tool runs
+                if len(tool_runs_by_parent) % trace_batch_size == 0:
+                    if this_batch := list(tool_runs_by_parent.keys())[
+                        trace_cursor : trace_cursor + trace_batch_size
+                    ]:
+                        trace_cursor += trace_batch_size
+                        futures.append(
+                            executor.submit(
+                                client.list_runs,
+                                project_name=project_name,
+                                run_ids=this_batch,
+                                select=["inputs","trace_id"],
+                            )
+                        )
+            await task_list.send()
+            if this_batch := list(tool_runs_by_parent.keys())[trace_cursor:]:
+                futures.append(
+                    executor.submit(
+                        client.list_runs,
+                        project_name=project_name,
+                        run_ids=this_batch,
+                        select=["inputs","trace_id"],
+                    )
+                )
+        task2.status = cl.TaskStatus.DONE
+        await task_list.send()
+        task3 = cl.Task(title="Rechercher les données d'actions des utilisateurs de l'agent collabroatif AVID et les organiser par parent run ID dans un DataFrame")
+        await task_list.add_task(task3)
+        for future in tqdm(futures):
+            root_runs = future.result()
+            for root_run in root_runs:
+                root_data = tool_runs_by_parent[root_run.id]
+                data.append(
+                    {
+                        "inputs": root_run.inputs,
+                        "start_time": root_run.start_time,
+                        "end_time": root_run.end_time,
+                    }
+                )
+        # (Optional): Convert to a pandas DataFrame
+        task3.status = cl.TaskStatus.DONE
+        await task_list.send()
+        df_inputs = pd.DataFrame(data)
+        df_inputs['query'] = df_inputs.apply(lambda x: x.get('inputs', {}).get('query'), axis=1)
+        df_inputs['latency'] = df_inputs['end_time'] - df_inputs['start_time']
+        df_inputs['latency'] = df_inputs['latency'].apply(lambda x: x.total_seconds())
+        df_inputs=df_inputs[["query","latency","start_time"]].copy()
+        task4 = cl.Task(title="Conversion des données d'actions des utilisateurs de l'agent collabroatif AVID et les afficher au format texte")
+        await task_list.add_task(task4)
+        list_inputs = df_inputs.head(20).values.tolist()
+        str_inputs="".join(['* Requête : ' + str(item[0]) + '\nDate : ' + str(item[2]) + '\nDurée de la requête : ' + str(item[1]) + '\n\n' for item in list_inputs])
+        task4.status = cl.TaskStatus.DONE
+        await task_list.send()
+        return str_inputs
+    except Exception as e:
+        return f"Aucune connexion à LangSmith"