Spaces:

EDS-lab
/

Transparency_Plus

Sleeping

App Files Files Community

mmmapms commited on Aug 22, 2024

Commit

2dae4e3

verified ·

1 Parent(s): 3d3f015

Update app.py

Browse files

Files changed (1) hide show

app.py +291 -22

app.py CHANGED Viewed

@@ -47,6 +47,34 @@ def convert_European_time(data, time_zone):
     data.index = data.index.tz_localize(None)
     return data
 github_token = st.secrets["GitHub_Token_KUL_Margarida"]
 if github_token:
@@ -92,8 +120,26 @@ def conformal_predictions(data, target, my_forecast):
     #data.reset_index(inplace=True)
     return data
-st.title("Transparency++")
 countries = {
     'Belgium': 'BE',
@@ -105,9 +151,15 @@ countries = {
 st.sidebar.header('Filters')
 selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
 st.write()
 date_range = st.sidebar.date_input("Select Date Range for Metrics Calculation:",
                                    value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today'))))
@@ -120,9 +172,12 @@ else:
     st.error("Please select a valid date range.")
     st.stop()
-# Sidebar with radio buttons for different sections
-section = st.sidebar.radio('Section', ['Data', 'Forecasts', 'Insights'])
 country_code = countries[selected_country]
 if country_code == 'BE':
@@ -161,7 +216,7 @@ def add_feature(df2, df_main):
     #df_combined.reset_index(inplace=True)
     return df_combined
 #data.index = data.index.tz_localize('UTC')
-data = data.loc[start_date:end_date]
 forecast_columns = [
     'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
@@ -175,8 +230,7 @@ if section == 'Data':
     st.header('Data Quality')
-    output_text = f"The below percentages are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. This interval can be adjusted from the sidebar."
-    st.write(output_text)
     # Report % of missing values
     missing_values = data[forecast_columns].isna().mean() * 100
@@ -403,21 +457,225 @@ elif section == 'Forecasts':
             )
         )
         return fig
-    if country_code == "BE":
-        st.header('EDS Forecasts by Hour')
-        solar_fig = plot_category(forecast_dict, 'Solar', 'Solar Predictions')
-        st.plotly_chart(solar_fig)
-        wind_offshore_fig = plot_category(forecast_dict, 'Wind_offshore', 'Wind Offshore Predictions')
-        st.plotly_chart(wind_offshore_fig)
-        wind_onshore_fig = plot_category(forecast_dict, 'Wind_onshore', 'Wind Onshore Predictions')
-        st.plotly_chart(wind_onshore_fig)
-        load_fig = plot_category(forecast_dict, 'Load', 'Load Predictions')
-        st.plotly_chart(load_fig)
     # Scatter plots for error distribution
     st.subheader('Error Distribution')
@@ -426,18 +684,22 @@ elif section == 'Forecasts':
         actual_col = forecast_columns[i]
         forecast_col = forecast_columns[i + 1]
         if forecast_col in data.columns:
-            obs = last_week[actual_col]
-            pred = last_week[forecast_col]
             error = pred - obs
             fig = px.scatter(x=obs, y=pred, labels={'x': 'Observed [MW]', 'y': 'Predicted by ENTSO-E [MW]'})
             fig.update_layout(title=f'Error Distribution for {forecast_col}')
             st.plotly_chart(fig)
     st.subheader('Accuracy Metrics (Sorted by rMAE):')
     if country_code == "BE":
         # Combine the two DataFrames on their index
@@ -531,26 +793,32 @@ elif section == 'Forecasts':
         # Convert the dictionaries to DataFrames and sort by rMAE
         df_wind_onshore = pd.DataFrame.from_dict(results_wind_onshore, orient='index').sort_values(by='rMAE')
         df_wind_offshore = pd.DataFrame.from_dict(results_wind_offshore, orient='index').sort_values(by='rMAE')
         df_load = pd.DataFrame.from_dict(results_load, orient='index').sort_values(by='rMAE')
         df_solar = pd.DataFrame.from_dict(results_solar, orient='index').sort_values(by='rMAE')
         st.write("##### Wind Onshore:")
         st.dataframe(df_wind_onshore)
         st.write("##### Wind Offshore:")
         st.dataframe(df_wind_offshore)
         st.write("##### Load:")
         st.dataframe(df_load)
         st.write("##### Solar:")
         st.dataframe(df_solar)
     else:
         accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore'])
         for i in range(0, len(forecast_columns), 2):
@@ -607,7 +875,7 @@ elif section == 'Forecasts':
     st.subheader('ACF plots of Errors')
-    st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three fields: Solar, Wind and Load.')
     for i in range(0, len(forecast_columns), 2):
         actual_col = forecast_columns[i]
@@ -634,7 +902,7 @@ elif section == 'Insights':
     # Scatter plots for correlation between wind, solar, and load
     st.subheader('Correlation between Wind, Solar, and Load')
-    st.write('The below scatter plots for correlation between all three fields: Solar, Wind and Load.')
     combinations = [('Solar_entsoe', 'Load_entsoe'), ('Wind_onshore_entsoe', 'Load_entsoe'), ('Wind_offshore_entsoe', 'Load_entsoe'), ('Solar_entsoe', 'Wind_onshore_entsoe'), ('Solar_entsoe', 'Wind_offshore_entsoe')]
@@ -659,12 +927,13 @@ elif section == 'Insights':
     st.subheader('Weather vs. Generation/Demand')
-    st.write('The below scatter plots show the relation between weather parameters (i.e., Temperature, Wind Speed) and generation/demand.')
     for weather_col in weather_columns:
         for actual_col in ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe', 'Wind_offshore_entsoe']:
             if weather_col in data.columns and actual_col in data.columns:
                 clean_label = actual_col.replace('_entsoe', '')
                 if weather_col == 'Temperature':
                     fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (°C)', 'y': f'{clean_label} Generation [MW]'}, color_discrete_sequence=['orange'])
                 else:

     data.index = data.index.tz_localize(None)
     return data
+def simplify_model_names(df):
+    # Define the mapping of complex names to simpler ones
+    replacements = {
+        r'\.LightGBMModel\.\dD\.TimeCov\.Temp\.Forecast_elia': '.LightGBM_with_Forecast_elia',
+        r'\.LightGBMModel\.\dD\.TimeCov\.Temp': '.LightGBM',
+        r'\.Naive\.\dD': '.Naive',
+    }
+    # Apply the replacements
+    for original, simplified in replacements.items():
+        df.columns = df.columns.str.replace(original, simplified, regex=True)
+    return df
+def simplify_model_names_in_index(df):
+    # Define the mapping of complex names to simpler ones
+    replacements = {
+        r'\.LightGBMModel\.\dD\.TimeCov\.Temp\.Forecast_elia': '.LightGBM_with_Forecast_elia',
+        r'\.LightGBMModel\.\dD\.TimeCov\.Temp': '.LightGBM',
+        r'\.Naive\.\dD': '.Naive',
+    }
+    # Apply the replacements to the DataFrame index
+    for original, simplified in replacements.items():
+        df.index = df.index.str.replace(original, simplified, regex=True)
+    return df
 github_token = st.secrets["GitHub_Token_KUL_Margarida"]
 if github_token:
     #data.reset_index(inplace=True)
     return data
+# Main layout of the app
+col1, col2 = st.columns([5, 2])  # Adjust the ratio to better fit your layout needs
+with col1:
+    st.title("Transparency++")
+with col2:
+    upper_space = col2.empty()
+    upper_space = col2.empty()
+    col2_1, col2_2 = st.columns(2)  # Create two columns within the right column for side-by-side images
+    with col2_1:
+        st.image("KU_Leuven_logo.png", width=100)   # Adjust the path and width as needed
+    with col2_2:
+        st.image("energyville_logo.png", width=100)
+upper_space.markdown("""
+&nbsp;
+&nbsp;
+""", unsafe_allow_html=True)
 countries = {
     'Belgium': 'BE',
 st.sidebar.header('Filters')
+st.sidebar.subheader("Select Country")
+st.sidebar.caption("Choose the country for which you want to display data or forecasts.")
 selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
+st.sidebar.subheader("Select Date Range ")
+st.sidebar.caption("Define the time period over which the accuracy metrics will be calculated.")
 st.write()
 date_range = st.sidebar.date_input("Select Date Range for Metrics Calculation:",
                                    value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today'))))
     st.error("Please select a valid date range.")
     st.stop()
+st.sidebar.subheader("Section")
+st.sidebar.caption("Select the type of information you want to explore.")
+# Sidebar with radio buttons for different sections
+section = st.sidebar.radio('', ['Data', 'Forecasts', 'Insights'],index=1)
 country_code = countries[selected_country]
 if country_code == 'BE':
     #df_combined.reset_index(inplace=True)
     return df_combined
 #data.index = data.index.tz_localize('UTC')
 forecast_columns = [
     'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
     st.header('Data Quality')
+    st.write('The table below presents the data quality metrics for various energy-related datasets, focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.')
     # Report % of missing values
     missing_values = data[forecast_columns].isna().mean() * 100
             )
         )
         return fig
+    def calculate_mae(y_true, y_pred):
+        return np.mean(np.abs(y_true - y_pred))
+    def plot_mae_comparison(df_dict, category_prefix, title, real_values_df):
+        hours = list(range(24))
+        if category_prefix=='Load':
+            model_colors = {
+                'LightGBMModel.7D.TimeCov.Temp.Forecast_elia': '#1F77B4',  # Blue
+                'LightGBMModel.7D.TimeCov.Temp': '#2CA02C',  # Green
+                'Naive': '#FF7F0E'  # Orange
+            }
+        else:
+            model_colors = {
+                'LightGBMModel.1D.TimeCov.Temp.Forecast_elia': '#1F77B4',  # Blue
+                'LightGBMModel.1D.TimeCov.Temp': '#2CA02C',  # Green
+                'Naive': '#FF7F0E'  # Orange
+            }
+        fig = go.Figure()
+        for model_key, base_color in model_colors.items():
+            hours_with_data = []
+            mae_ratios = []
+            for hour in hours:
+                file_name = f'Predictions_{hour}h.csv'
+                df = df_dict.get(file_name, None)
+                if df is None:
+                    continue
+                if isinstance(df.index, pd.DatetimeIndex):
+                    first_day = df.index.min().normalize()
+                    last_day = df.index.max().normalize()
+                    df = df[df.index.normalize() != first_day]
+                    df = df[df.index.normalize() != last_day]
+                # Adjusted filtering logic based on actual column names
+                filtered_columns = [col for col in df.columns if col.startswith(f"{category_prefix}_entsoe") and model_key in col]
+                if not filtered_columns:
+                    continue
+                # Assuming only one column matches, otherwise refine the selection logic
+                model_predictions = df[filtered_columns[0]]
+                actual_values = real_values_df[f'{category_prefix}_entsoe']
+                actual_values = actual_values.dropna()
+                # Align both series by their common indices
+                common_indices = model_predictions.index.intersection(actual_values.index)
+                aligned_model_predictions = model_predictions.loc[common_indices]
+                aligned_actual_values = actual_values.loc[common_indices]
+                # Calculate MAE for the model
+                model_mae = calculate_mae(aligned_actual_values, aligned_model_predictions)
+                # Calculate MAE for the entsoe forecast
+                entsoe_forecast = real_values_df[f'{category_prefix}_forecast_entsoe'].loc[common_indices]
+                #print(entsoe_forecast.index)
+                entsoe_mae = calculate_mae(aligned_actual_values, entsoe_forecast)
+                # Calculate MAE ratio
+                mae_ratio = model_mae / entsoe_mae
+                mae_ratios.append(mae_ratio)
+                hours_with_data.append(hour)
+            # Plot the MAE ratio for this model as points
+            if mae_ratios:  # Only plot if there's data
+                fig.add_trace(go.Scatter(
+                    x=hours_with_data,  # The hours where we have data
+                    y=mae_ratios,
+                    mode='markers+lines',  # Plot as points connected by lines
+                    name=model_key,
+                    line=dict(color=base_color),
+                    marker=dict(color=base_color, size=8)  # Customize marker size
+                ))
+        # Update layout
+        fig.update_layout(
+            title=f'{category_prefix}: rMAE<span style="font-size:11px;">ENTSO-E</span> by hour of Forecasting.',
+            xaxis_title='Hour of Forecast',
+            yaxis_title='MAE Ratio (Model / entsoe)',
+            legend=dict(
+                orientation="h",
+                yanchor="bottom",
+                y=1.02,
+                xanchor="center",
+                x=0.5
+            )
+        )
+        return fig
+    def plot_mae_comparison_clock(df_dict, category_prefix, title, real_values_df):
+        hours = list(range(24))
+        if category_prefix=='Load':
+            model_colors = {
+                'LightGBM_with_Forecast_elia': '#1F77B4',  # Blue
+                'LightGBM': '#2CA02C',  # Green
+                'Naive': '#FF7F0E'  # Orange
+            }
+        else:
+            model_colors = {
+                'LightGBM_with_Forecast_elia': '#1F77B4',  # Blue
+                'LightGBM': '#2CA02C',  # Green
+                'Naive': '#FF7F0E'  # Orange
+            }
+        fig = go.Figure()
+        for model_key, base_color in model_colors.items():
+            hours_with_data = []
+            mae_ratios = []
+            #print(f"Processing {model_key}...")  # Debugging print
+            for hour in hours:
+                file_name = f'Predictions_{hour}h.csv'
+                df = df_dict.get(file_name, None)
+                if df is None:
+                    #print(f"No data for hour {hour}. Skipping...")
+                    continue
+                if isinstance(df.index, pd.DatetimeIndex):
+                    first_day = df.index.min().normalize()
+                    last_day = df.index.max().normalize()
+                    df = df[df.index.normalize() != first_day]
+                    df = df[df.index.normalize() != last_day]
+                filtered_columns = [col for col in df.columns if col.startswith(f"{category_prefix}_entsoe") and model_key in col]
+                if not filtered_columns:
+                    print(f"No matching columns for {model_key} at hour {hour}. Skipping...")
+                    continue
+                model_predictions = df[filtered_columns[0]]
+                actual_values = real_values_df[f'{category_prefix}_entsoe']
+                actual_values = actual_values.dropna()
+                common_indices = model_predictions.index.intersection(actual_values.index)
+                aligned_model_predictions = model_predictions.loc[common_indices]
+                aligned_actual_values = actual_values.loc[common_indices]
+                model_mae = calculate_mae(aligned_actual_values, aligned_model_predictions)
+                entsoe_forecast = real_values_df[f'{category_prefix}_forecast_entsoe'].loc[common_indices]
+                entsoe_mae = calculate_mae(aligned_actual_values, entsoe_forecast)
+                mae_ratio = model_mae / entsoe_mae
+                mae_ratios.append(mae_ratio)
+                hours_with_data.append(hour)
+            if mae_ratios:
+                print(f"Adding {model_key} to the plot with {len(mae_ratios)} points.")  # Debugging print
+                fig.add_trace(go.Scatterpolar(
+                    r=mae_ratios + [mae_ratios[0]],  # Ensure closure of the polar plot
+                    theta=[h * 15 for h in hours_with_data] + [0],  # Ensure closure at 0 degrees
+                    mode='lines+markers',
+                    name=model_key,
+                    line=dict(color=base_color),
+                    marker=dict(color=base_color, size=8)
+                ))
+            else:
+                print(f"No data to plot for {model_key}.")  # Debugging print
+        fig.update_layout(
+            polar=dict(
+                radialaxis=dict(visible=True, range=[0, max(max(mae_ratios), 1.0) * 1.1] if mae_ratios else [0, 1.0]),
+                angularaxis=dict(tickmode='array', tickvals=[h * 15 for h in hours], ticktext=hours)
+            ),
+            title=f'{category_prefix}: rMAE<span style="font-size:11px;">ENTSO-E</span> by Hour of Forecasting',
+            showlegend=True
+        )
+        return fig
+    if country_code == "BE":
+        #-------------------------------------------------
+        #st.header('EDS Forecasts by Hour')
+        #solar_fig = plot_category(forecast_dict, 'Solar', 'Solar Predictions')
+        #st.plotly_chart(solar_fig)
+        #wind_offshore_fig = plot_category(forecast_dict, 'Wind_offshore', 'Wind Offshore Predictions')
+        #st.plotly_chart(wind_offshore_fig)
+        #wind_onshore_fig = plot_category(forecast_dict, 'Wind_onshore', 'Wind Onshore Predictions')
+        #st.plotly_chart(wind_onshore_fig)
+        #load_fig = plot_category(forecast_dict, 'Load', 'Load Predictions')
+        #st.plotly_chart(load_fig)
+        #-------------------------------------------------
+        #st.header('MAE Ratio Comparison by Forecast Hour')
+        #st.write("This graph shows the relative Mean Absolute Error (rMAE) of different forecasting models "
+                #"compared to the ENTSO-E forecast, by the hour at which the forecast was made. "
+                #"The rMAE is calculated as the ratio of the model's MAE to the ENTSO-E forecast's MAE.")
+        #mae_comparison_fig = plot_mae_comparison(forecast_dict, 'Solar', 'rMAE Ratio Comparison for Solar', real_values_df=Data_BE)
+        #st.plotly_chart(mae_comparison_fig)
+        # Similarly for Wind_onshore, Wind_offshore, and Load
+        #mae_comparison_fig_wind_onshore = plot_mae_comparison(forecast_dict, 'Wind_onshore', 'MAE Ratio Comparison for Wind Onshore', real_values_df=Data_BE)
+        #st.plotly_chart(mae_comparison_fig_wind_onshore)
+        #mae_comparison_fig_wind_offshore = plot_mae_comparison(forecast_dict, 'Wind_offshore', 'MAE Ratio Comparison for Wind Offshore', real_values_df=Data_BE)
+        #st.plotly_chart(mae_comparison_fig_wind_offshore)
+        #mae_comparison_fig_load = plot_mae_comparison(forecast_dict, 'Load', 'MAE Ratio Comparison for Load', real_values_df=Data_BE)
+        #st.plotly_chart(mae_comparison_fig_load)
+        #-------------------------------------------------
+        st.header('MAE Ratio Comparison by Forecast Hour')
+        st.write("These clock-plots shows the relative Mean Absolute Error (rMAE) of different forecasting models compared to the ENTSO-E forecast, by the hour at which the forecast was made. "
+                "The rMAE is calculated as the ratio of the model's MAE to the ENTSO-E forecast's MAE.")
+        forecast_dict2 = forecast_dict.copy()
+        forecast_dict2 = {k: simplify_model_names(v) for k, v in forecast_dict.items()}
+        mae_comparison_fig = plot_mae_comparison_clock(forecast_dict2, 'Solar', 'rMAE Ratio Comparison for Solar', real_values_df=Data_BE)
+        st.plotly_chart(mae_comparison_fig)
+        mae_comparison_fig_wind_onshore = plot_mae_comparison_clock(forecast_dict2, 'Wind_onshore', 'MAE Ratio Comparison for Wind Onshore', real_values_df=Data_BE)
+        st.plotly_chart(mae_comparison_fig_wind_onshore)
+        mae_comparison_fig_wind_offshore = plot_mae_comparison_clock(forecast_dict2, 'Wind_offshore', 'MAE Ratio Comparison for Wind Offshore', real_values_df=Data_BE)
+        st.plotly_chart(mae_comparison_fig_wind_offshore)
+        mae_comparison_fig_load = plot_mae_comparison_clock(forecast_dict2, 'Load', 'MAE Ratio Comparison for Load', real_values_df=Data_BE)
+        st.plotly_chart(mae_comparison_fig_load)
     # Scatter plots for error distribution
     st.subheader('Error Distribution')
         actual_col = forecast_columns[i]
         forecast_col = forecast_columns[i + 1]
         if forecast_col in data.columns:
+            obs = data[actual_col]
+            pred = data[forecast_col]
             error = pred - obs
             fig = px.scatter(x=obs, y=pred, labels={'x': 'Observed [MW]', 'y': 'Predicted by ENTSO-E [MW]'})
             fig.update_layout(title=f'Error Distribution for {forecast_col}')
             st.plotly_chart(fig)
     st.subheader('Accuracy Metrics (Sorted by rMAE):')
+    output_text = f"The below metrics are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. This interval can be adjusted from the sidebar."
+    st.write(output_text)
     if country_code == "BE":
         # Combine the two DataFrames on their index
         # Convert the dictionaries to DataFrames and sort by rMAE
         df_wind_onshore = pd.DataFrame.from_dict(results_wind_onshore, orient='index').sort_values(by='rMAE')
+        print(df_wind_onshore)
         df_wind_offshore = pd.DataFrame.from_dict(results_wind_offshore, orient='index').sort_values(by='rMAE')
         df_load = pd.DataFrame.from_dict(results_load, orient='index').sort_values(by='rMAE')
         df_solar = pd.DataFrame.from_dict(results_solar, orient='index').sort_values(by='rMAE')
         st.write("##### Wind Onshore:")
+        df_wind_onshore = simplify_model_names_in_index(df_wind_onshore)
         st.dataframe(df_wind_onshore)
         st.write("##### Wind Offshore:")
+        df_wind_offshore2 = simplify_model_names_in_index(df_wind_offshore)
         st.dataframe(df_wind_offshore)
         st.write("##### Load:")
+        df_load = simplify_model_names_in_index(df_load)
         st.dataframe(df_load)
         st.write("##### Solar:")
+        df_solar = simplify_model_names_in_index(df_solar)
         st.dataframe(df_solar)
     else:
+        data = data.loc[start_date:end_date]
         accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore'])
         for i in range(0, len(forecast_columns), 2):
     st.subheader('ACF plots of Errors')
+    st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three data fields obtained from ENTSO-E: Solar, Wind and Load.')
     for i in range(0, len(forecast_columns), 2):
         actual_col = forecast_columns[i]
     # Scatter plots for correlation between wind, solar, and load
     st.subheader('Correlation between Wind, Solar, and Load')
+    st.write('The below scatter plots are made for checking whether there exists a correlation between all three data fields obtained from ENTSO-E: Solar, Wind and Load.')
     combinations = [('Solar_entsoe', 'Load_entsoe'), ('Wind_onshore_entsoe', 'Load_entsoe'), ('Wind_offshore_entsoe', 'Load_entsoe'), ('Solar_entsoe', 'Wind_onshore_entsoe'), ('Solar_entsoe', 'Wind_offshore_entsoe')]
     st.subheader('Weather vs. Generation/Demand')
+    st.write('The below scatter plots show the relation between weather parameters (i.e., Temperature, Wind Speed) and the generation/demand data from ENTSO-E.')
     for weather_col in weather_columns:
         for actual_col in ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe', 'Wind_offshore_entsoe']:
             if weather_col in data.columns and actual_col in data.columns:
                 clean_label = actual_col.replace('_entsoe', '')
                 if weather_col == 'Temperature':
                     fig = px.scatter(x=data[weather_col], y=data[actual_col], labels={'x': f'{weather_col} (°C)', 'y': f'{clean_label} Generation [MW]'}, color_discrete_sequence=['orange'])
                 else: