File size: 4,640 Bytes
664c85a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa94331
664c85a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa94331
664c85a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa94331
664c85a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5eaaa30
 
 
 
 
664c85a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import os
import json
from pathlib import Path
import pandas as pd
from typing import Dict, List
import streamlit as st
from st_aggrid import AgGrid, GridOptionsBuilder

def create_config_dataframe(flattened_configs: List[Dict], ids: List[str]) -> pd.DataFrame:
    df = pd.DataFrame(flattened_configs)
    df.columns = [str(col).strip() for col in df.columns]
    df.insert(0, 'id', ids)
    return df

def flatten_dict(d, parent_key='', sep='.'):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

@st.cache_data
def load_config_data():
    log_dir = Path(__file__).parent / './logs'
    configs = []
    dir_ids = []

    for dir_name in log_dir.glob('*'):
        if not dir_name.is_dir():
            continue

        config_path = dir_name / 'config.json'
        if not config_path.exists():
            continue

        with open(config_path, 'r') as f:
            config = json.load(f)
            flattened_config = flatten_dict(config)
            configs.append(flattened_config)
            dir_ids.append(dir_name.name)
    return create_config_dataframe(configs, dir_ids)

@st.cache_data
def load_eval_data():
    log_dir = Path(__file__).parent / './logs'
    scores = []
    dir_ids = []

    for dir_name in log_dir.glob('*'):
        if not dir_name.is_dir():
            continue

        eval_path = dir_name / 'eval_log.json'
        if not eval_path.exists():
            continue

        with open(eval_path, 'r') as f:
            eval_data = json.load(f)
            score_dict = {'test/mean_score': eval_data.get('test/mean_score')}
            scores.append(score_dict)
            dir_ids.append(dir_name.name)

    return create_config_dataframe(scores, dir_ids)

@st.cache_data
def load_meta_data():
    log_dir = Path(__file__).parent / './logs'
    metas = []
    dir_ids = []

    for dir_name in log_dir.glob('*'):
        if not dir_name.is_dir():
            continue

        meta_path = dir_name / 'meta.json'
        if not meta_path.exists():
            continue

        with open(meta_path, 'r') as f:
            meta = json.load(f)
            metas.append(meta)
            dir_ids.append(dir_name.name)
    return create_config_dataframe(metas, dir_ids)

def configure_grid(df):
    gb = GridOptionsBuilder.from_dataframe(df)
    gb.configure_pagination(paginationAutoPageSize=True)
    gb.configure_side_bar()
    gb.configure_default_column(groupable=True, value=True, enableRowGroup=True, aggFunc='sum', editable=False)
    return gb.build()


# Streamlit app
st.set_page_config(layout="wide")
st.title("Experiment Results Dashboard")

# Load data
config_df = load_config_data()
score_df = load_eval_data()
meta_df = load_meta_data()

experiments_df = pd.merge(
    config_df, 
    score_df,
    on='id',
    how='inner'
)

# Preprocess data
columns_to_keep = ['id', 'Filter.name', 'checkpoint', 'model', 'task', 'test/mean_score', 
                   'tags', 'start_time', 'Filter.threshold', 'Filter.seed', 'dataset']
filtered_df = experiments_df[columns_to_keep].copy()
filtered_df['Filter.threshold'] = filtered_df['Filter.threshold'].fillna('None')
filtered_df['Filter.seed'] = filtered_df['Filter.seed'].fillna('None')
filtered_df['start_time'] = pd.to_datetime(filtered_df['start_time'], format='%Y%m%d_%H%M%S')

# Grouped view
grouped_df = filtered_df.groupby(['model', 'Filter.name', 'tags', 'task', 'Filter.threshold']).agg({
    'test/mean_score': ['mean', lambda x: list(x)],
    'checkpoint': ('count', list),
    'start_time': ('max', lambda x: sorted(x, reverse=True)),
    'Filter.seed': ('count', list),
}).reset_index()


tab1, tab2, tab3 = st.tabs(["Meta Data", "Experiment Results", "Grouped Analysis"])

with tab1:
    st.header("Experiment Metadata")
    AgGrid(meta_df.sort_values(['start_time'], ascending=False), 
           gridOptions=configure_grid(meta_df), 
           height=400, 
           fit_columns_on_grid_load=True)

with tab2:
    st.header("Filtered Experiment Results")
    AgGrid(filtered_df.sort_values(['start_time'], ascending=False), 
           gridOptions=configure_grid(filtered_df), 
           height=600, 
           fit_columns_on_grid_load=True)

with tab3:
    st.header("Grouped Performance Analysis")
    AgGrid(grouped_df.sort_values([('start_time', 'max')], ascending=False), 
           gridOptions=configure_grid(grouped_df), 
           height=600, 
           fit_columns_on_grid_load=True)