Spaces:
Runtime error
Runtime error
| import requests | |
| import pandas as pd | |
| from io import StringIO | |
| import streamlit as st | |
| import os | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| import plotly.colors as pc | |
| import numpy as np | |
| from sklearn.metrics import mean_squared_error | |
| from statsmodels.tsa.stattools import acf | |
| from statsmodels.graphics.tsaplots import plot_acf | |
| import matplotlib.pyplot as plt | |
| from datetime import datetime | |
| import folium | |
| import seaborn as sns | |
| from streamlit_folium import st_folium | |
| from datetime import datetime, timedelta | |
| from entsoe.geo import load_zones | |
| from branca.colormap import LinearColormap | |
| import branca | |
| def get_current_time(): | |
| now = datetime.now() | |
| current_hour = now.hour | |
| current_minute = now.minute | |
| # Return the hour and a boolean indicating if it is after the 10th minute | |
| return current_hour, current_minute >= 10 | |
| ##GET ALL FILES FROM GITHUB | |
| def load_GitHub(github_token, file_name, hour, after_10_min): | |
| url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}' | |
| headers = {'Authorization': f'token {github_token}'} | |
| response = requests.get(url, headers=headers) | |
| if response.status_code == 200: | |
| csv_content = StringIO(response.text) | |
| df = pd.read_csv(csv_content) | |
| if 'Date' in df.columns: | |
| df['Date'] = pd.to_datetime(df['Date']) # Convert 'Date' column to datetime | |
| df.set_index('Date', inplace=True) # Set 'Date' column as the index | |
| #df.to_csv(file_name) | |
| return df | |
| else: | |
| print(f"Failed to download {file_name}. Status code: {response.status_code}") | |
| return None | |
| def load_forecast(github_token, hour, after_10_min): | |
| predictions_dict = {} | |
| for hour in range(24): | |
| file_name = f'Predictions_{hour}h.csv' | |
| df = load_GitHub(github_token, file_name, hour, after_10_min) | |
| if df is not None: | |
| predictions_dict[file_name] = df | |
| return predictions_dict | |
| def convert_European_time(data, time_zone): | |
| data.index = pd.to_datetime(data.index, utc=True) | |
| data.index = data.index.tz_convert(time_zone) | |
| data.index = data.index.tz_localize(None) | |
| return data | |
| def simplify_model_names(df): | |
| # Define the mapping of complex names to simpler ones | |
| replacements = { | |
| r'\.LightGBMModel\.\dD\.TimeCov\.Temp\.Forecast_elia': '.LightGBM_with_Forecast_elia', | |
| r'\.LightGBMModel\.\dD\.TimeCov\.Temp': '.LightGBM', | |
| r'\.Naive\.\dD': '.Naive', | |
| } | |
| # Apply the replacements | |
| for original, simplified in replacements.items(): | |
| df.columns = df.columns.str.replace(original, simplified, regex=True) | |
| return df | |
| def simplify_model_names_in_index(df): | |
| # Define the mapping of complex names to simpler ones | |
| replacements = { | |
| r'\.LightGBMModel\.\dD\.TimeCov\.Temp\.Forecast_elia': '.LightGBM_with_Forecast_elia', | |
| r'\.LightGBMModel\.\dD\.TimeCov\.Temp': '.LightGBM', | |
| r'\.Naive\.\dD': '.Naive', | |
| } | |
| # Apply the replacements to the DataFrame index | |
| for original, simplified in replacements.items(): | |
| df.index = df.index.str.replace(original, simplified, regex=True) | |
| return df | |
| github_token = st.secrets["GitHub_Token_KUL_Margarida"] | |
| if github_token: | |
| hour, after_10_min=get_current_time() | |
| forecast_dict = load_forecast(github_token, hour, after_10_min) | |
| historical_forecast=load_GitHub(github_token, 'Historical_forecast.csv', hour, after_10_min) | |
| Data_BE=load_GitHub(github_token, 'BE_Elia_Entsoe_UTC.csv', hour, after_10_min) | |
| Data_FR=load_GitHub(github_token, 'FR_Entsoe_UTC.csv', hour, after_10_min) | |
| Data_NL=load_GitHub(github_token, 'NL_Entsoe_UTC.csv', hour, after_10_min) | |
| Data_DE=load_GitHub(github_token, 'DE_Entsoe_UTC.csv', hour, after_10_min) | |
| Data_PT=load_GitHub(github_token, 'PT_Entsoe_UTC.csv', hour, after_10_min) | |
| Data_ES=load_GitHub(github_token, 'ES_Entsoe_UTC.csv', hour, after_10_min) | |
| Data_AT=load_GitHub(github_token, 'AT_Entsoe_UTC.csv', hour, after_10_min) | |
| Data_IT_CALA=load_GitHub(github_token, 'IT_CALA_Entsoe_UTC.csv', hour, after_10_min) | |
| Data_IT_CNOR=load_GitHub(github_token, 'IT_CNOR_Entsoe_UTC.csv', hour, after_10_min) | |
| Data_IT_CSUD=load_GitHub(github_token, 'IT_CSUD_Entsoe_UTC.csv', hour, after_10_min) | |
| Data_IT_NORD=load_GitHub(github_token, 'IT_NORD_Entsoe_UTC.csv', hour, after_10_min) | |
| Data_IT_SICI=load_GitHub(github_token, 'IT_SICI_Entsoe_UTC.csv', hour, after_10_min) | |
| Data_IT_SUD=load_GitHub(github_token, 'IT_SUD_Entsoe_UTC.csv', hour, after_10_min) | |
| Data_DK_1=load_GitHub(github_token, 'DK_1_Entsoe_UTC.csv', hour, after_10_min) | |
| Data_DK_2=load_GitHub(github_token, 'DK_2_Entsoe_UTC.csv', hour, after_10_min) | |
| Data_BE=convert_European_time(Data_BE, 'Europe/Brussels') | |
| Data_FR=convert_European_time(Data_FR, 'Europe/Paris') | |
| Data_NL=convert_European_time(Data_NL, 'Europe/Amsterdam') | |
| Data_DE=convert_European_time(Data_DE, 'Europe/Berlin') | |
| Data_PT=convert_European_time(Data_PT, 'Europe/Lisbon') | |
| Data_ES=convert_European_time(Data_ES, 'Europe/Madrid') | |
| Data_AT=convert_European_time(Data_AT, 'Europe/Vienna') | |
| Data_IT_CALA = convert_European_time(Data_IT_CALA, 'Europe/Rome') | |
| Data_IT_CNOR = convert_European_time(Data_IT_CNOR, 'Europe/Rome') | |
| Data_IT_CSUD = convert_European_time(Data_IT_CSUD, 'Europe/Rome') | |
| Data_IT_NORD = convert_European_time(Data_IT_NORD, 'Europe/Rome') | |
| Data_IT_SICI = convert_European_time(Data_IT_SICI, 'Europe/Rome') | |
| Data_IT_SUD = convert_European_time(Data_IT_SUD, 'Europe/Rome') | |
| Data_DK_1 = convert_European_time(Data_DK_1, 'Europe/Copenhagen') | |
| Data_DK_2 = convert_European_time(Data_DK_2, 'Europe/Copenhagen') | |
| else: | |
| print("Please enter your GitHub Personal Access Token to proceed.") | |
| col1, col2 = st.columns([5, 2]) # Adjust the ratio to better fit your layout needs | |
| with col1: | |
| st.title("Transparency++") | |
| with col2: | |
| upper_space = col2.empty() | |
| upper_space = col2.empty() | |
| col2_1, col2_2 = st.columns(2) # Create two columns within the right column for side-by-side images | |
| with col2_1: | |
| st.image("KU_Leuven_logo.png", width=100) # Adjust the path and width as needed | |
| with col2_2: | |
| st.image("energyville_logo.png", width=100) | |
| st.write("**Evaluate and analyze ENTSO-E Transparency Platform data quality, forecast accuracy, and energy trends for Portugal, Spain, Belgium, France, Germany-Luxembourg, Austria, the Netherlands, Italy and Denmark.**") | |
| upper_space.markdown(""" | |
| | |
| | |
| """, unsafe_allow_html=True) | |
| countries = { | |
| 'Overall': 'Overall', | |
| 'Austria': 'AT', | |
| 'Belgium': 'BE', | |
| 'Denmark 1': 'DK_1', | |
| 'Denmark 2': 'DK_2', | |
| 'France': 'FR', | |
| 'Germany-Luxembourg': 'DE_LU', | |
| 'Italy Calabria': 'IT_CALA', | |
| 'Italy Central North': 'IT_CNOR', | |
| 'Italy Central South': 'IT_CSUD', | |
| 'Italy North': 'IT_NORD', | |
| 'Italy Sicily': 'IT_SICI', | |
| 'Italy South': 'IT_SUD', | |
| 'Netherlands': 'NL', | |
| 'Portugal': 'PT', | |
| 'Spain': 'ES', | |
| } | |
| data_dict = { | |
| 'BE': Data_BE, | |
| 'FR': Data_FR, | |
| 'DE_LU': Data_DE, | |
| 'NL': Data_NL, | |
| 'PT': Data_PT, | |
| 'AT': Data_AT, | |
| 'ES': Data_ES, | |
| 'IT_CALA': Data_IT_CALA, | |
| 'IT_CNOR': Data_IT_CNOR, | |
| 'IT_CSUD': Data_IT_CSUD, | |
| 'IT_NORD': Data_IT_NORD, | |
| 'IT_SICI': Data_IT_SICI, | |
| 'IT_SUD': Data_IT_SUD, | |
| 'DK_1': Data_DK_1, | |
| 'DK_2': Data_DK_2, | |
| } | |
| countries_all_RES = ['BE', 'FR', 'NL', 'DE_LU', 'PT', 'DK_1', 'DK_2'] | |
| countries_no_offshore= ['AT', 'ES', 'IT_CALA', 'IT_CNOR', 'IT_CSUD', 'IT_NORD', 'IT_SICI', 'IT_SUD',] | |
| installed_capacities = { | |
| 'FR': { 'Solar': 17419, 'Wind Offshore': 1483, 'Wind Onshore': 22134}, | |
| 'DE_LU': { 'Solar': 73821, 'Wind Offshore': 8386, 'Wind Onshore': 59915}, | |
| 'BE': { 'Solar': 8789, 'Wind Offshore': 2262, 'Wind Onshore': 3053}, | |
| 'NL': { 'Solar': 22590, 'Wind Offshore': 3220, 'Wind Onshore': 6190}, | |
| 'PT': { 'Solar': 1811, 'Wind Offshore': 25, 'Wind Onshore': 5333}, | |
| 'ES': { 'Solar': 23867, 'Wind Onshore': 30159}, | |
| 'AT': { 'Solar': 7294, 'Wind Onshore': 4021 }, | |
| 'DK_1': { 'Solar': 2738, 'Wind Offshore': 1601, 'Wind Onshore': 4112}, | |
| 'DK_2': { 'Solar': 992, 'Wind Offshore': 1045, 'Wind Onshore': 748}, | |
| } | |
| forecast_columns_all_RES = [ | |
| 'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe'] | |
| forecast_columns_no_wind_offshore = [ | |
| 'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe'] | |
| st.sidebar.header('Filters') | |
| st.sidebar.subheader("Select Country") | |
| st.sidebar.caption("Choose the country for which you want to display data or forecasts.") | |
| selected_country = st.sidebar.selectbox('Select Country', list(countries.keys())) | |
| # Sidebar with radio buttons for different sections | |
| if selected_country != 'Overall': | |
| st.sidebar.subheader("Section") | |
| st.sidebar.caption("Select the type of information you want to explore.") | |
| section = st.sidebar.radio('Section', ['Data Quality', 'Forecasts Quality', 'Insights'], index=1, label_visibility='collapsed') | |
| else: | |
| section = None # No section is shown when "Overall" is selected | |
| if selected_country == 'Overall': | |
| data = None # You can set data to None or a specific dataset based on your logic | |
| section = None # No section selected when "Overall" is chosen | |
| else: | |
| country_code = countries[selected_country] | |
| data = data_dict.get(country_code) | |
| if country_code in countries_all_RES: | |
| forecast_columns = forecast_columns_all_RES | |
| elif country_code in countries_no_offshore: | |
| forecast_columns = forecast_columns_no_wind_offshore | |
| if country_code == 'BE': | |
| weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore'] | |
| data['Temperature'] = data['temperature_2m_8'] | |
| data['Wind Speed Onshore'] = data['wind_speed_100m_8'] | |
| data['Wind Speed Offshore'] = data['wind_speed_100m_4'] | |
| else: | |
| weather_columns = ['Temperature', 'Wind Speed'] | |
| data['Temperature'] = data['temperature_2m'] | |
| data['Wind Speed'] = data['wind_speed_100m'] | |
| if section == 'Data Quality': | |
| st.header('Data Quality') | |
| st.write('The table below presents the data quality metrics focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.') | |
| yesterday_midnight = pd.Timestamp(datetime.now().date() - pd.Timedelta(days=1)).replace(hour=23, minute=59, second=59) | |
| # Filter data until the end of yesterday (midnight) | |
| data_quality = data[data.index <= yesterday_midnight] | |
| # Report % of missing values | |
| missing_values = data_quality[forecast_columns].isna().mean() * 100 | |
| missing_values = missing_values.round(2) | |
| if country_code not in installed_capacities: | |
| st.markdown(f"⚠️ **Installed capacities not available on ENTSO-E Transparency Platform for country code '{country_code}'. Therefore, cannot calculate Extreme/Nonsensical values.**") | |
| # If capacities are not available, assign NaN to extreme_values and skip extreme value checking | |
| extreme_values = {col: np.nan for col in forecast_columns} | |
| else: | |
| capacities = installed_capacities[country_code] | |
| extreme_values = {} | |
| for col in forecast_columns: | |
| if 'Solar_entsoe' in col: | |
| extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100 | |
| elif 'Solar_forecast_entsoe' in col: | |
| extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100 | |
| elif 'Wind_onshore_entsoe' in col: | |
| extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100 | |
| elif 'Wind_onshore_forecast_entsoe' in col: | |
| extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100 | |
| elif 'Wind_offshore_entsoe' in col: | |
| extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100 | |
| elif 'Wind_offshore_forecast_entsoe' in col: | |
| extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100 | |
| elif 'Load_entsoe' in col: | |
| extreme_values[col] = ((data_quality[col] < 0)).mean() * 100 | |
| elif 'Load_forecast_entsoe' in col: | |
| extreme_values[col] = ((data_quality[col] < 0)).mean() * 100 | |
| extreme_values = pd.Series(extreme_values).round(2) | |
| # Combine all metrics into one DataFrame | |
| metrics_df = pd.DataFrame({ | |
| 'Missing Values (%)': missing_values, | |
| 'Extreme/Nonsensical Values (%)': extreme_values, | |
| }) | |
| st.markdown( | |
| """ | |
| <style> | |
| .dataframe {font-size: 45px !important;} | |
| </style> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| st.dataframe(metrics_df) | |
| st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset', unsafe_allow_html=True) | |
| st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True) | |
| elif section == 'Forecasts Quality': | |
| st.header('Forecast Quality') | |
| # Time series for last 1 week | |
| last_week = data.loc[data.index >= (data.index[-1] - pd.Timedelta(days=7))] | |
| st.write('The below plot shows the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform from the past week.') | |
| # Options for selecting the data to display | |
| if country_code in countries_all_RES: | |
| variable_options = { | |
| "Load": ("Load_entsoe", "Load_forecast_entsoe"), | |
| "Solar": ("Solar_entsoe", "Solar_forecast_entsoe"), | |
| "Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"), | |
| "Wind Offshore": ("Wind_offshore_entsoe", "Wind_offshore_forecast_entsoe") | |
| } | |
| elif country_code in countries_no_offshore: | |
| variable_options = { | |
| "Load": ("Load_entsoe", "Load_forecast_entsoe"), | |
| "Solar": ("Solar_entsoe", "Solar_forecast_entsoe"), | |
| "Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"), | |
| } | |
| else: | |
| print('Country code doesnt correspond.') | |
| # Dropdown to select the variable | |
| selected_variable = st.selectbox("Select Variable for Line PLot", list(variable_options.keys())) | |
| # Get the corresponding columns for the selected variable | |
| actual_col, forecast_col = variable_options[selected_variable] | |
| # Plot only the selected variable's data | |
| fig = go.Figure() | |
| fig.add_trace(go.Scatter(x=last_week.index, y=last_week[actual_col], mode='lines', name='Actual')) | |
| fig.add_trace(go.Scatter(x=last_week.index, y=last_week[forecast_col], mode='lines', name='Forecast ENTSO-E')) | |
| fig.update_layout(title=f'Forecasts vs Actual for {selected_variable}', xaxis_title='Date', yaxis_title='Value [MW]') | |
| st.plotly_chart(fig) | |
| # Scatter plots for error distribution | |
| st.subheader('Error Distribution') | |
| st.write('The below scatter plots show the error distribution of all fields: Solar, Wind and Load.') | |
| selected_variable = st.selectbox("Select Variable for Error Distribution", list(variable_options.keys())) | |
| # Get the corresponding columns for the selected variable | |
| actual_col, forecast_col = variable_options[selected_variable] | |
| # Filter data for the selected year and check if columns are available | |
| data_2024 = data[data.index.year > 2023] | |
| if forecast_col in data_2024.columns: | |
| obs = data_2024[actual_col] | |
| pred = data_2024[forecast_col] | |
| # Calculate error and plot | |
| error = pred - obs | |
| fig = px.scatter(x=obs, y=pred, labels={'x': 'Observed [MW]', 'y': 'Forecast ENTSO-E [MW]'}) | |
| fig.update_layout(title=f'Error Distribution for {selected_variable}') | |
| st.plotly_chart(fig) | |
| st.subheader('Accuracy Metrics (Sorted by rMAE):') | |
| date_range = st.date_input( | |
| "Select Date Range for Metrics Calculation:", | |
| value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today'))) | |
| ) | |
| if len(date_range) == 2: | |
| start_date = pd.Timestamp(date_range[0]) | |
| end_date = pd.Timestamp(date_range[1]) | |
| else: | |
| st.error("Please select a valid date range.") | |
| st.stop() | |
| output_text = f"The below metrics are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. " | |
| st.write(output_text) | |
| data = data.loc[start_date:end_date] | |
| if country_code in countries_all_RES: | |
| accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore']) | |
| elif country_code in countries_no_offshore: | |
| accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore']) | |
| else: | |
| print('Country code doesnt correspond.') | |
| for i in range(0, len(forecast_columns), 2): | |
| actual_col = forecast_columns[i] | |
| forecast_col = forecast_columns[i + 1] | |
| if forecast_col in data.columns: | |
| obs = data[actual_col] | |
| pred = data[forecast_col] | |
| error = pred - obs | |
| mae = round(np.mean(np.abs(error)),2) | |
| if 'Load' in actual_col: | |
| persistence = obs.shift(168) # Weekly persistence | |
| else: | |
| persistence = obs.shift(24) # Daily persistence | |
| # Using the whole year's data for rMAE calculations | |
| rmae = round(mae / np.mean(np.abs(obs - persistence)),2) | |
| row_label = 'Load' if 'Load' in actual_col else 'Solar' if 'Solar' in actual_col else 'Wind Offshore' if 'Wind_offshore' in actual_col else 'Wind Onshore' | |
| accuracy_metrics.loc[row_label] = [mae, rmae] | |
| accuracy_metrics.dropna(how='all', inplace=True)# Sort by rMAE (second column) | |
| accuracy_metrics.sort_values(by=accuracy_metrics.columns[1], ascending=True, inplace=True) | |
| accuracy_metrics = accuracy_metrics.round(4) | |
| col1, col2 = st.columns([1, 2]) | |
| with col1: | |
| st.markdown( | |
| """ | |
| <style> | |
| .small-chart { | |
| margin-top: 30px; /* Adjust this value as needed */ | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| st.dataframe(accuracy_metrics) | |
| st.markdown( | |
| """ | |
| <style> | |
| .small-chart { | |
| margin-top: -30px; /* Adjust this value as needed */ | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| with col2: | |
| # Prepare data for the radar chart | |
| rmae_values = accuracy_metrics['rMAE'].tolist() | |
| categories = accuracy_metrics.index.tolist() | |
| fig = go.Figure() | |
| fig.add_trace(go.Scatterpolar( | |
| r=rmae_values, | |
| theta=categories, | |
| fill='toself', | |
| name='rMAE' | |
| )) | |
| # Configuring radar chart layout to be smaller | |
| fig.update_layout( | |
| width=250, # Adjust width | |
| height=250, # Adjust height | |
| margin=dict(t=20, b=20, l=0, r=0), # Remove all margins | |
| polar=dict( | |
| radialaxis=dict( | |
| visible=True, | |
| range=[0, max(rmae_values) * 1.2] # Adjust range dynamically | |
| )), | |
| showlegend=False | |
| ) | |
| # Apply CSS class to remove extra space above chart | |
| st.plotly_chart(fig, use_container_width=True, config={'displayModeBar': False}, className="small-chart") | |
| st.subheader('ACF plots of Errors') | |
| st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three data fields obtained from ENTSO-E: Solar, Wind and Load.') | |
| # Dropdown to select the variable | |
| selected_variable = st.selectbox("Select Variable for ACF of Errors", list(variable_options.keys())) | |
| # Get the corresponding columns for the selected variable | |
| actual_col, forecast_col = variable_options[selected_variable] | |
| # Calculate the error and plot ACF if columns are available | |
| if forecast_col in data.columns: | |
| obs = data[actual_col] | |
| pred = data[forecast_col] | |
| error = pred - obs | |
| st.write(f"**ACF of Errors for {selected_variable}**") | |
| fig, ax = plt.subplots(figsize=(10, 5)) | |
| plot_acf(error.dropna(), ax=ax) | |
| st.pyplot(fig) | |
| # Optionally calculate and store ACF values for further analysis if needed | |
| acf_values = acf(error.dropna(), nlags=240) | |
| elif section == 'Insights': | |
| st.header("Insights") | |
| st.write('The scatter plots below are created to explore possible correlations between the data fields: Solar, Wind Onshore, Wind Offshore (if any), Load, and Weather Features.') | |
| # Add a selection box for the data resolution (weekly, daily, hourly) | |
| data_2024 = data[data.index.year == 2024] | |
| resolution = st.selectbox('Select data resolution:', ['Daily', 'Hourly']) | |
| # Resample data based on the selected resolution | |
| if resolution == 'Hourly': | |
| resampled_data = data_2024 | |
| elif resolution == 'Daily': | |
| resampled_data = data_2024.resample('D').mean() # Resample to daily mean | |
| # Select the necessary columns for the scatter plot | |
| if country_code in countries_all_RES: | |
| selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_offshore_entsoe', 'Wind_onshore_entsoe'] + weather_columns | |
| elif country_code in countries_no_offshore: | |
| selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe'] + weather_columns | |
| else: | |
| print('Country code doesnt correspond.') | |
| selected_df = resampled_data[selected_columns] | |
| selected_df.columns = [col.replace('_entsoe', '').replace('_', ' ') for col in selected_df.columns] | |
| # Drop missing values | |
| selected_df = selected_df.dropna() | |
| # Create the scatter plots using seaborn's pairplot | |
| sns.set_theme(style="ticks") | |
| pairplot_fig = sns.pairplot(selected_df) | |
| # Display the pairplot in Streamlit | |
| st.pyplot(pairplot_fig) | |
| elif selected_country == 'Overall': | |
| def get_forecast_columns(country_code): | |
| if country_code in countries_all_RES: | |
| return forecast_columns_all_RES | |
| elif country_code in countries_no_offshore: | |
| return forecast_columns_no_wind_offshore | |
| else: | |
| print('Country code doesnt correspond.') | |
| def calculate_net_load_error(df, country_code): | |
| forecast_columns = get_forecast_columns(country_code) | |
| filter_df = df[forecast_columns].dropna() | |
| # Initialize net_load and net_load_forecast with Load and other available data | |
| net_load = filter_df['Load_entsoe'] - filter_df['Wind_onshore_entsoe'] - filter_df['Solar_entsoe'] | |
| net_load_forecast = filter_df['Load_forecast_entsoe'] - filter_df['Wind_onshore_forecast_entsoe'] - filter_df['Solar_forecast_entsoe'] | |
| # Subtract Wind_offshore_entsoe if the column exists | |
| if 'Wind_offshore_entsoe' in filter_df.columns: | |
| net_load -= filter_df['Wind_offshore_entsoe'] | |
| # Subtract Wind_offshore_forecast_entsoe if the column exists | |
| if 'Wind_offshore_forecast_entsoe' in filter_df.columns: | |
| net_load_forecast -= filter_df['Wind_offshore_forecast_entsoe'] | |
| # Calculate the error based on the latest values | |
| error = (net_load_forecast - net_load).iloc[-1] | |
| date = filter_df.index[-1].strftime("%Y-%m-%d %H:%M") # Get the latest date in string format | |
| return error, date | |
| def plot_net_load_error_map(data_dict): | |
| # Calculate net load errors and dates for each country | |
| net_load_errors = {country_code: calculate_net_load_error(data, country_code) for country_code, data in data_dict.items()} | |
| # Use country codes directly | |
| selected_country_codes = list(data_dict.keys()) | |
| df_net_load_error = pd.DataFrame({ | |
| 'zoneName': selected_country_codes, | |
| 'net_load_error': [v[0] for v in net_load_errors.values()], | |
| 'date': [v[1] for v in net_load_errors.values()] | |
| }) | |
| # Load the GeoJSON data using the entsoe library | |
| date = pd.Timestamp.now() | |
| geo_data = load_zones(selected_country_codes, date) | |
| # Reset index to include 'zoneName' as a column | |
| geo_data = geo_data.reset_index() | |
| # Map country codes to country names | |
| countries_code_to_name = {v: k for k, v in countries.items()} | |
| geo_data['name'] = geo_data['zoneName'].map(countries_code_to_name) | |
| # Merge net_load_error and date into geo_data | |
| geo_data = geo_data.merge(df_net_load_error, on='zoneName', how='left') | |
| # Initialize the Folium map | |
| m = folium.Map(location=[46.6034, 1.8883], zoom_start=4, tiles="cartodb positron") | |
| # Calculate the maximum absolute net load error for normalization | |
| max_value = df_net_load_error['net_load_error'].abs().max() | |
| # Create a colormap with lighter shades | |
| colormap = branca.colormap.LinearColormap( | |
| colors=['#0D92F4', 'white', '#C62E2E'], # Light blue to white to light coral | |
| vmin=-max_value, | |
| vmax=max_value, | |
| caption='Net Load Error [MW]' | |
| ) | |
| # Define the style function | |
| def style_function(feature): | |
| net_load_error = feature['properties']['net_load_error'] | |
| if net_load_error is None: | |
| return {'fillOpacity': 0.5, 'color': 'grey', 'weight': 0.5} | |
| else: | |
| fill_color = colormap(net_load_error) | |
| return { | |
| 'fillColor': fill_color, | |
| 'fillOpacity': 0.8, # Set a constant opacity | |
| 'color': 'black', | |
| 'weight': 0.5 | |
| } | |
| # Add the GeoJson layer with the custom style_function | |
| folium.GeoJson( | |
| geo_data, | |
| style_function=style_function, | |
| tooltip=folium.GeoJsonTooltip( | |
| fields=["name", "net_load_error", "date"], | |
| aliases=["Country:", "Net Load Error [MW]:", "Date:"], | |
| localize=True | |
| ) | |
| ).add_to(m) | |
| # Add the colormap to the map | |
| colormap.add_to(m) | |
| # Display the map | |
| _ = st_folium(m, width=700, height=600) | |
| def calculate_mae(actual, forecast): | |
| return np.mean(np.abs(actual - forecast)) | |
| def calculate_persistence_mae(data, shift_hours): | |
| return np.mean(np.abs(data - data.shift(shift_hours))) | |
| def calculate_rmae_for_country(df): | |
| rmae = {} | |
| rmae['Load'] = calculate_mae(df['Load_entsoe'], df['Load_forecast_entsoe']) / calculate_persistence_mae(df['Load_entsoe'], 168) | |
| rmae['Wind_onshore'] = calculate_mae(df['Wind_onshore_entsoe'], df['Wind_onshore_forecast_entsoe']) / calculate_persistence_mae(df['Wind_onshore_entsoe'], 24) | |
| # Only calculate Wind_offshore rMAE if the columns exist | |
| if 'Wind_offshore_entsoe' in df.columns and 'Wind_offshore_forecast_entsoe' in df.columns: | |
| rmae['Wind_offshore'] = calculate_mae(df['Wind_offshore_entsoe'], df['Wind_offshore_forecast_entsoe']) / calculate_persistence_mae(df['Wind_offshore_entsoe'], 24) | |
| else: | |
| rmae['Wind_offshore'] = None # Mark as None if not applicable | |
| rmae['Solar'] = calculate_mae(df['Solar_entsoe'], df['Solar_forecast_entsoe']) / calculate_persistence_mae(df['Solar_entsoe'], 24) | |
| return rmae | |
| def create_rmae_dataframe(data_dict): | |
| rmae_values = {'Country': [], 'Load': [], 'Wind_onshore': [], 'Wind_offshore': [], 'Solar': []} | |
| for country_name, df in data_dict.items(): | |
| forecast_columns=get_forecast_columns(country_name) | |
| df_filtered = df[forecast_columns].dropna() | |
| rmae = calculate_rmae_for_country(df_filtered) | |
| rmae_values['Country'].append(country_name) | |
| rmae_values['Load'].append(rmae['Load']) | |
| rmae_values['Wind_onshore'].append(rmae['Wind_onshore']) | |
| rmae_values['Solar'].append(rmae['Solar']) | |
| # Append Wind_offshore rMAE only if it's not None (i.e., the country has offshore wind data) | |
| if rmae['Wind_offshore'] is not None: | |
| rmae_values['Wind_offshore'].append(rmae['Wind_offshore']) | |
| else: | |
| rmae_values['Wind_offshore'].append(np.nan) # Insert NaN for countries without offshore wind | |
| return pd.DataFrame(rmae_values) | |
| def plot_rmae_radar_chart(rmae_df): | |
| fig = go.Figure() | |
| # Dynamically adjust angles to exclude Wind_offshore if all values are NaN | |
| angles = ['Load', 'Wind_onshore', 'Solar'] | |
| if not rmae_df['Wind_offshore'].isna().all(): # Only include Wind_offshore if it's not NaN for all countries | |
| angles.append('Wind_offshore') | |
| for _, row in rmae_df.iterrows(): | |
| fig.add_trace(go.Scatterpolar( | |
| r=[row[angle] for angle in angles], | |
| theta=angles, | |
| fill='toself', | |
| name=row['Country'] | |
| )) | |
| fig.update_layout( | |
| polar=dict( | |
| radialaxis=dict(visible=True, range=[0, 1.2]) | |
| ), | |
| showlegend=True, | |
| title="rMAE Radar Chart by Country" | |
| ) | |
| st.plotly_chart(fig) | |
| st.subheader("Net Load Error Map") | |
| st.write(""" | |
| The net load error map highlights the error in the forecasted versus actual net load for each country. | |
| Hover over each country to see details on the latest net load error and the timestamp (with the time zone of the corresponding country) of the last recorded data. | |
| """) | |
| plot_net_load_error_map(data_dict) | |
| st.subheader("rMAE of Forecasts published on ENTSO-E TP") | |
| st.write("""The rMAE of Forecasts chart compares the forecast accuracy of the predictions published by ENTSO-E Transparency Platform for Portugal, Spain, Belgium, France, Germany-Luxembourg, Austria, the Netherlands, Italy and Denmark. It shows the rMAE for onshore wind, offshore wind (if any), solar, and load demand, highlighting how well forecasts perform relative to a basic persistence model across these countries and energy sectors.""") | |
| rmae_df = create_rmae_dataframe(data_dict) | |
| # Add multiselect for country selection | |
| selected_countries = st.multiselect("Select Countries for Radar Plot", options=rmae_df['Country'].unique(), default=['BE', 'DE_LU', 'FR']) | |
| # Filter the dataframe based on the selected countries | |
| filtered_rmae_df = rmae_df[rmae_df['Country'].isin(selected_countries)] | |
| # Plot radar chart for the selected countries | |
| plot_rmae_radar_chart(filtered_rmae_df) | |