Spaces:
Runtime error
Runtime error
| import requests | |
| import pandas as pd | |
| from io import StringIO | |
| import streamlit as st | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| import numpy as np | |
| from statsmodels.tsa.stattools import acf | |
| from statsmodels.graphics.tsaplots import plot_acf | |
| import matplotlib.pyplot as plt | |
| import folium | |
| from streamlit_folium import st_folium | |
| import seaborn as sns | |
| from datetime import datetime, timedelta | |
| from entsoe.geo import load_zones | |
| import branca | |
| import pytz | |
| import time | |
| from entsoe import EntsoePandasClient | |
| import geopandas as gpd | |
| tz = pytz.timezone('Europe/Brussels') | |
| def load_capacity_csv(path: str) -> dict: | |
| """Load installed capacities CSV into a dict: Country -> {tech: value} """ | |
| df = pd.read_csv(path, index_col='Country') | |
| # Ensure numeric and handle missing | |
| df = df.replace({"NaN": np.nan}).astype(float) | |
| return df.to_dict(orient='index') | |
| # Load installed capacities from CSV files | |
| installed_capacities_2024 = load_capacity_csv('installed_capacities_2024.csv') | |
| installed_capacities_2025 = load_capacity_csv('installed_capacities_2025.csv') | |
| TECHS = ['Solar', 'Wind Offshore', 'Wind Onshore'] | |
| #countries = [ 'AT', 'BE', 'NL', 'BG', 'HR', 'CZ', 'DE_LU', 'DK_1', 'DK_2', | |
| #'EE', 'FI', 'FR', 'GR', 'HU', 'IT_CALA', 'IT_CNOR', | |
| #'IT_CSUD', 'IT_NORD', 'IT_SARD', 'IT_SICI', 'IT_SUD', 'LV', 'LT', | |
| #'NO_1', 'NO_2', 'NO_3', 'NO_4', 'NO_5', 'PL', 'PT', 'RO', | |
| #'SE_1', 'SE_2', 'SE_3', 'SE_4', 'RS', 'SK', 'SI', 'ES', 'CH', 'ME','IE_SEM','MK','CY','BA','AL','XK'] | |
| countries = ['AT', 'BE', 'DE_LU', 'DK_1', 'DK_2', 'FR', 'IT_CALA', 'IT_CNOR', | |
| 'IT_CSUD', 'IT_NORD', 'IT_SARD', 'IT_SICI', 'IT_SUD', | |
| 'NL', 'ES'] | |
| def get_time_zone(country_code): | |
| tz_map = { | |
| 'AL': 'Europe/Tirane', | |
| 'AT': 'Europe/Vienna', | |
| 'BE': 'Europe/Brussels', | |
| 'BA': 'Europe/Sarajevo', | |
| 'BG': 'Europe/Sofia', | |
| 'HR': 'Europe/Zagreb', | |
| 'CY': 'Asia/Nicosia', | |
| 'CZ': 'Europe/Prague', | |
| 'DE_LU': 'Europe/Berlin', | |
| 'DK_1': 'Europe/Copenhagen', | |
| 'DK_2': 'Europe/Copenhagen', | |
| 'EE': 'Europe/Tallinn', | |
| 'FI': 'Europe/Helsinki', | |
| 'MK': 'Europe/Skopje', | |
| 'FR': 'Europe/Paris', | |
| 'GR': 'Europe/Athens', | |
| 'HU': 'Europe/Budapest', | |
| 'IS': 'Atlantic/Reykjavik', | |
| 'IE_SEM': 'Europe/Dublin', | |
| 'IT_CALA': 'Europe/Rome', | |
| 'IT_CNOR': 'Europe/Rome', | |
| 'IT_CSUD': 'Europe/Rome', | |
| 'IT_NORD': 'Europe/Rome', | |
| 'IT_SARD': 'Europe/Rome', | |
| 'IT_SICI': 'Europe/Rome', | |
| 'IT_SUD': 'Europe/Rome', | |
| 'LV': 'Europe/Riga', | |
| 'LT': 'Europe/Vilnius', | |
| 'ME': 'Europe/Podgorica', | |
| 'NL': 'Europe/Amsterdam', | |
| 'NO_1': 'Europe/Oslo', | |
| 'NO_2': 'Europe/Oslo', | |
| 'NO_3': 'Europe/Oslo', | |
| 'NO_4': 'Europe/Oslo', | |
| 'NO_5': 'Europe/Oslo', | |
| 'PL': 'Europe/Warsaw', | |
| 'PT': 'Europe/Lisbon', | |
| 'MD': 'Europe/Chisinau', | |
| 'RO': 'Europe/Bucharest', | |
| 'SE_1': 'Europe/Stockholm', | |
| 'SE_2': 'Europe/Stockholm', | |
| 'SE_3': 'Europe/Stockholm', | |
| 'SE_4': 'Europe/Stockholm', | |
| 'RS': 'Europe/Belgrade', | |
| 'SK': 'Europe/Bratislava', | |
| 'SI': 'Europe/Ljubljana', | |
| 'ES': 'Europe/Madrid', | |
| 'CH': 'Europe/Zurich', | |
| 'XK': 'Europe/Rome' | |
| } | |
| if country_code in tz_map: | |
| return tz_map[country_code] | |
| else: | |
| raise ValueError(f"Time zone for country code {country_code} is not defined.") | |
| def convert_European_time(data, bdz): | |
| time_zone = get_time_zone(bdz) | |
| data.index = pd.to_datetime(data.index, utc=True) | |
| data.index = data.index.tz_convert(time_zone) | |
| data.index = data.index.tz_localize(None) | |
| return data | |
| def filter_dataframe(df): | |
| allowed_columns = {"Load_entsoe", "Load_forecast_entsoe", "Solar_entsoe", "Solar_forecast_entsoe", "Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe", "Wind_offshore_entsoe", "Wind_offshore_forecast_entsoe"} | |
| return df[[col for col in df.columns if col in allowed_columns]] | |
| def load_GitHub(github_token, bdz): | |
| file_name=f'{bdz}_Entsoe_UTC.csv' | |
| url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}' | |
| headers = {'Authorization': f'token {github_token}'} | |
| response = requests.get(url, headers=headers) | |
| if response.status_code == 200: | |
| csv_content = StringIO(response.text) | |
| df = pd.read_csv(csv_content) | |
| if 'Date' in df.columns: | |
| df['Date'] = pd.to_datetime(df['Date']) # Convert 'Date' column to datetime | |
| df.set_index('Date', inplace=True) # Set 'Date' column as the index | |
| df=filter_dataframe(df) | |
| df=convert_European_time(df, bdz) | |
| return df[df.index >= pd.Timestamp('2024-01-01')] | |
| else: | |
| print(f"Failed to download {file_name}. Status code: {response.status_code}") | |
| return None | |
| def filter_variable_options(df): | |
| all_options = { | |
| "Load": ("Load_entsoe", "Load_forecast_entsoe"), | |
| "Solar": ("Solar_entsoe", "Solar_forecast_entsoe"), | |
| "Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"), | |
| "Wind Offshore": ("Wind_offshore_entsoe", "Wind_offshore_forecast_entsoe"), | |
| } | |
| variable_options = {} | |
| flagged_columns = [] | |
| for key, (col1, col2) in all_options.items(): | |
| col1_exists = col1 in df.columns and not df[col1].isna().all() | |
| col2_exists = col2 in df.columns and not df[col2].isna().all() | |
| if col1_exists and col2_exists: | |
| variable_options[key] = (col1, col2) | |
| elif not col1_exists and col2_exists: | |
| flagged_columns.append(col1) | |
| elif col1_exists and not col2_exists: | |
| flagged_columns.append(col2) | |
| elif not col1_exists and not col2_exists: | |
| flagged_columns.append(col1) | |
| flagged_columns.append(col2) | |
| return variable_options, flagged_columns | |
| github_token = st.secrets["GitHub_Token_KUL_Margarida"] | |
| #countries = ['IT_CALA', 'IT_CNOR', 'IT_CSUD', 'IT_SARD', 'PT', 'FR'] | |
| def load_all_from_github(github_token, countries, period_key): | |
| """Fetch all zones’ CSVs from GitHub and return as dict.""" | |
| data = {} | |
| for bdz in countries: | |
| df = load_GitHub(github_token, bdz) | |
| if df is not None: | |
| data[bdz] = df | |
| return data | |
| # in your main script, _before_ you do the for-loop | |
| now = datetime.now(tz) | |
| if now.minute >= 15: | |
| block_start = now.replace(minute=0, second=0, microsecond=0) | |
| else: | |
| block_start = (now - timedelta(hours=1)).replace(minute=0, second=0, microsecond=0) | |
| # turn it into a simple string key | |
| period_key = block_start.strftime("%Y-%m-%d-%H") | |
| if github_token: | |
| data_dict = load_all_from_github(github_token, countries, period_key) | |
| else: | |
| st.error("Please enter your GitHub Personal Access Token to proceed.") | |
| st.stop() | |
| col1, col2 = st.columns([5, 2]) | |
| with col1: | |
| st.title("Transparency++") | |
| with col2: | |
| upper_space = col2.empty() | |
| upper_space = col2.empty() | |
| col2_1, col2_2 = st.columns(2) # Create two columns within the right column for side-by-side images | |
| with col2_1: | |
| st.image("KU_Leuven_logo.png", width=100) # Adjust the path and width as needed | |
| with col2_2: | |
| st.image("energyville_logo.png", width=100) | |
| st.write("**Evaluate and analyze ENTSO-E Transparency Platform data quality, forecast accuracy, and energy trends for ENTSO-E member countries.**") | |
| st.sidebar.header('Filters') | |
| st.sidebar.subheader("Select Country") | |
| st.sidebar.caption("Choose the country for which you want to display data or forecasts.") | |
| selection = ['Overall'] + list(countries) | |
| selected_country = st.sidebar.selectbox('Select Country', selection) | |
| if selected_country != 'Overall': | |
| st.sidebar.subheader("Section") | |
| st.sidebar.caption("Select the type of information you want to explore.") | |
| section = st.sidebar.radio('', ['Data Quality', 'Forecasts Quality', 'Insights'], index=1) | |
| else: | |
| section = None # No section is shown when "Overall" is selected | |
| if selected_country == 'Overall': | |
| data = None # You can set data to None or a specific dataset based on your logic | |
| section = None # No section selected when "Overall" is chosen | |
| else: | |
| country_code = selected_country | |
| data = data_dict.get(selected_country) | |
| if section == 'Data Quality': | |
| st.header('Data Quality') | |
| # Determine if capacities missing per year | |
| caps4 = installed_capacities_2024.get(country_code) | |
| caps5 = installed_capacities_2025.get(country_code) | |
| st.write( | |
| "The table below presents the data quality metrics focusing on the percentage " | |
| "of missing values and the occurrence of extreme or nonsensical values for " | |
| "the selected country. Additionally, it flags any mismatch between installed " | |
| "capacity (NaN or 0) and actual data in the dataset." | |
| ) | |
| # Determine end of data slice (yesterday 23:59:59) | |
| yesterday = datetime.now(tz).date() - timedelta(days=1) | |
| end_time = pd.Timestamp(yesterday).replace(hour=23, minute=59, second=59) | |
| # Filter data | |
| data_quality = data[data.index <= end_time] | |
| tech_cols = { | |
| 'Load': ('Load_entsoe', 'Load_forecast_entsoe'), | |
| 'Wind Onshore': ('Wind_onshore_entsoe', 'Wind_onshore_forecast_entsoe'), | |
| 'Wind Offshore': ('Wind_offshore_entsoe', 'Wind_offshore_forecast_entsoe'), | |
| 'Solar': ('Solar_entsoe', 'Solar_forecast_entsoe'), | |
| } | |
| skip_cols = [] | |
| for tech_key, (act_col, fct_col) in tech_cols.items(): | |
| # only proceed if the columns are in the DataFrame | |
| if act_col in data_quality.columns and fct_col in data_quality.columns: | |
| # get installed capacities for 2024 & 2025 | |
| cap4 = caps4.get(tech_key, np.nan) if isinstance(caps4, dict) else np.nan | |
| cap5 = caps5.get(tech_key, np.nan) if isinstance(caps5, dict) else np.nan | |
| # if both years are missing or zero capacity | |
| if (pd.isna(cap4) or cap4 == 0) and (pd.isna(cap5) or cap5 == 0): | |
| act = data_quality[act_col] | |
| fct = data_quality[fct_col] | |
| # check if actual AND forecast are entirely zero or NaN | |
| only_zero_or_na = (act.fillna(0) == 0).all() and (fct.fillna(0) == 0).all() | |
| if only_zero_or_na: | |
| skip_cols += [act_col, fct_col] | |
| # drop any columns flagged for skipping (ignore errors if somehow missing) | |
| if skip_cols: | |
| data_quality = data_quality.drop(columns=skip_cols, errors='ignore') | |
| # Compute missing | |
| missing_values = data_quality.isna().mean() * 100 | |
| missing_values = missing_values.round(2) | |
| extreme_values = {} | |
| capacity_mismatch = {} | |
| neg_counts = {} | |
| over_counts = {} | |
| cutoff = pd.Timestamp('2025-01-01') | |
| # Iterate over columns | |
| for col in data_quality.columns: | |
| # Identify technology | |
| if 'Solar' in col: | |
| tech_key = 'Solar' | |
| elif 'Wind_onshore' in col: | |
| tech_key = 'Wind Onshore' | |
| elif 'Wind_offshore' in col: | |
| tech_key = 'Wind Offshore' | |
| elif 'Load' in col: | |
| tech_key = 'Load' | |
| else: | |
| extreme_values[col] = np.nan | |
| capacity_mismatch[col] = np.nan | |
| continue | |
| series = data_quality[col] | |
| # Year masks | |
| mask_2024 = series.index < cutoff | |
| # Fetch capacity values | |
| cap4 = caps4.get(tech_key, np.nan) if isinstance(caps4, dict) else np.nan | |
| cap5 = caps5.get(tech_key, np.nan) if isinstance(caps5, dict) else np.nan | |
| print('var:',col) | |
| print('cap4:',cap4) | |
| if tech_key == 'Load': | |
| # Negative load | |
| extreme_pct = round((series < 0).mean() * 100, 2) | |
| mismatch = np.nan | |
| else: | |
| # Create per-timestamp capacity | |
| cap_series = pd.Series( | |
| np.where(mask_2024, cap4, cap5), | |
| index=series.index | |
| ) | |
| # Flags | |
| neg = series < 0 | |
| over = (series > cap_series) & cap_series.notna() | |
| nonsense = neg | over | |
| extreme_pct = round(nonsense.mean() * 100, 2) | |
| # Mismatch: non-zero gen when cap missing or zero | |
| # cap4, cap5 are floats or NaN | |
| no_cap_2024 = pd.isna(cap4) or (cap4 == 0) | |
| no_cap_2025 = pd.isna(cap5) or (cap5 == 0) | |
| # check if there's at least one actual non-zero (treat NaN as 0) | |
| has_nonzero = (series.fillna(0) != 0).any() | |
| if no_cap_2024 and no_cap_2025 and has_nonzero: | |
| mismatch = 100.0 | |
| else: | |
| mismatch = 0.0 | |
| extreme_values[col] = extreme_pct | |
| capacity_mismatch[col] = mismatch | |
| display_extreme = {col: f"{val:.2f}" if not pd.isna(val) else '' | |
| for col, val in extreme_values.items()} | |
| display_mismatch = {} | |
| for col, val in capacity_mismatch.items(): | |
| if 'Load' in col: | |
| display_mismatch[col] = '-' | |
| else: | |
| display_mismatch[col] = '🚩' if val == 100.0 else '' | |
| # Build and render DataFrame | |
| metrics_df = pd.DataFrame({ | |
| 'Missing Values (%)': missing_values, | |
| 'Extreme/Nonsensical Values (%)': pd.Series(display_extreme), | |
| 'Capacity Mismatch Flag': pd.Series(display_mismatch) | |
| }) | |
| st.dataframe(metrics_df.style.format({ | |
| 'Missing Values (%)': '{:.2f}', | |
| 'Extreme/Nonsensical Values (%)': '{}' | |
| })) | |
| st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset',unsafe_allow_html=True) | |
| st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: For Load, this is % of values below 0. For generation, it is negative or out-of-bound (> capacity).',unsafe_allow_html=True) | |
| st.write('<b><u>Capacity Mismatch Flag</u></b>: Shows "🚩" if installed capacity is `NaN` or `0` but the dataset has non-zero generation. Blank otherwise. For Load columns, it is "-".',unsafe_allow_html=True) | |
| elif section == 'Forecasts Quality': | |
| st.header('Forecast Quality') | |
| # Time series for last 1 week | |
| last_week = data.loc[data.index >= (data.index[-1] - pd.Timedelta(days=7))] | |
| st.write('The below plot shows the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform from the past week.') | |
| variable_options, flagged_columns = filter_variable_options(last_week) | |
| # Dropdown to select the variable | |
| selected_variable = st.selectbox("Select Variable for Line PLot", list(variable_options.keys())) | |
| actual_col, forecast_col = variable_options[selected_variable] | |
| x_vals = last_week.index.to_pydatetime().tolist() | |
| y_actual = last_week[actual_col].tolist() | |
| y_forecast = last_week[forecast_col].tolist() | |
| # then plot | |
| fig = go.Figure() | |
| fig.add_trace(go.Scatter(x=x_vals,y=y_actual,mode="lines",name="Actual")) | |
| fig.add_trace(go.Scatter(x=x_vals,y=y_forecast,mode="lines",name="Forecast ENTSO-E")) | |
| fig.update_layout(title=f"Forecasts vs Actual for {selected_variable}",xaxis_title="Date",yaxis_title="Value [MW]") | |
| st.plotly_chart(fig) | |
| # Scatter plots for error distribution | |
| st.subheader('Error Distribution') | |
| st.write('The below scatter plots show the error distribution of all fields: Solar, Wind and Load.') | |
| selected_variable = st.selectbox("Select Variable for Error Distribution", list(variable_options.keys())) | |
| # Get the corresponding columns for the selected variable | |
| actual_col, forecast_col = variable_options[selected_variable] | |
| if forecast_col in data.columns: | |
| # grab the two series, drop any NaNs, and align on their common timestamps | |
| obs = data[actual_col].dropna() | |
| pred = data[forecast_col].dropna() | |
| idx = obs.index.intersection(pred.index) | |
| obs = obs.loc[idx] | |
| pred = pred.loc[idx] | |
| # convert to pure Python lists | |
| x_vals = obs.tolist() | |
| y_vals = pred.tolist() | |
| fig = go.Figure() | |
| fig.add_trace(go.Scatter(x=x_vals,y=y_vals,mode='markers',name=f'{selected_variable}')) | |
| fig.update_layout(title=f'Error Distribution for {selected_variable}',xaxis_title='Observed [MW]',yaxis_title='Forecast ENTSO-E [MW]') | |
| st.plotly_chart(fig) | |
| st.subheader('Accuracy Metrics (Sorted by rMAE):') | |
| date_range = st.date_input( | |
| "Select Date Range for Metrics Calculation:", | |
| value=(pd.to_datetime("2024-01-01"), pd.to_datetime(pd.Timestamp('today'))) | |
| ) | |
| if len(date_range) == 2: | |
| start_date = pd.Timestamp(date_range[0]) | |
| end_date = pd.Timestamp(date_range[1]) | |
| else: | |
| st.error("Please select a valid date range.") | |
| st.stop() | |
| output_text = f"The below metrics are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. On the right is a radar plot with the rMAE." | |
| st.write(output_text) | |
| data_metrics = data.loc[start_date:end_date] | |
| accuracy_metrics = pd.DataFrame(columns=['MAE', 'RMSE' ,'rMAE'], index=list(variable_options.keys())) | |
| for variable in variable_options.keys(): | |
| actual_col, forecast_col = variable_options[variable] | |
| obs = data_metrics[actual_col] | |
| pred = data_metrics[forecast_col] | |
| error = pred - obs | |
| mae = round(np.mean(np.abs(error)),2) | |
| if 'Load' in actual_col: | |
| persistence = obs.shift(168) # Weekly persistence | |
| else: | |
| persistence = obs.shift(24) # Daily persistence | |
| # Using the whole year's data for rMAE calculations | |
| rmae = round(mae / np.mean(np.abs(obs - persistence)),2) | |
| rmse = round(np.sqrt(np.mean((error)**2)), 2) | |
| row_label = variable #'Load' if 'Load' in actual_col else 'Solar' if 'Solar' in actual_col else 'Wind Offshore' if 'Wind_offshore' in actual_col else 'Wind Onshore' | |
| accuracy_metrics.loc[row_label] = [mae, rmse, rmae] | |
| accuracy_metrics.dropna(how='all', inplace=True)# Sort by rMAE (second column) | |
| accuracy_metrics.sort_values(by=accuracy_metrics.columns[-1], ascending=True, inplace=True) | |
| accuracy_metrics = accuracy_metrics.round(4) | |
| col1, col2 = st.columns([1, 1]) | |
| with col1: | |
| # (optional) some top-margin before the table | |
| st.markdown( | |
| """ | |
| <style> | |
| .small-chart-container { | |
| margin-top: 0px; | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| st.dataframe(accuracy_metrics) | |
| with col2: | |
| # prepare the data | |
| rmae_values = accuracy_metrics['rMAE'].tolist() | |
| categories = accuracy_metrics.index.tolist() | |
| # build the radar | |
| fig = go.Figure( | |
| go.Scatterpolar( | |
| r=rmae_values, | |
| theta=categories, | |
| fill='toself', | |
| name='rMAE' | |
| ) | |
| ) | |
| # 👉 shrink the total size, and give extra left/right margin for your labels | |
| fig.update_layout( | |
| width=300, # make the whole plot a bit smaller | |
| height=300, | |
| margin=dict( | |
| l=50, # more space on the left for long category names | |
| r=60, # and on the right, if needed | |
| t=20, | |
| b=20 | |
| ), | |
| polar=dict( | |
| angularaxis=dict( | |
| tickfont=dict(size=11) # if you want slightly smaller ticks | |
| ), | |
| radialaxis=dict( | |
| visible=True, | |
| range=[0, max(rmae_values)*1.2] | |
| ) | |
| ), | |
| showlegend=False | |
| ) | |
| # wrap in a div so you can still control vertical spacing via CSS | |
| st.markdown('<div class="small-chart-container">', unsafe_allow_html=True) | |
| st.plotly_chart(fig, use_container_width=False) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| st.subheader('ACF plots of Errors') | |
| st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three data fields obtained from ENTSO-E: Solar, Wind and Load.') | |
| # Dropdown to select the variable | |
| selected_variable = st.selectbox("Select Variable for ACF of Errors", list(variable_options.keys())) | |
| # Get the corresponding columns for the selected variable | |
| actual_col, forecast_col = variable_options[selected_variable] | |
| # Calculate the error and plot ACF if columns are available | |
| if forecast_col in data.columns: | |
| obs = data[actual_col] | |
| pred = data[forecast_col] | |
| error = pred - obs | |
| st.write(f"**ACF of Errors for {selected_variable}**") | |
| fig, ax = plt.subplots(figsize=(10, 5)) | |
| plot_acf(error.dropna(), ax=ax) | |
| st.pyplot(fig) | |
| # Optionally calculate and store ACF values for further analysis if needed | |
| acf_values = acf(error.dropna(), nlags=240) | |
| elif section == 'Insights': | |
| st.header("Insights") | |
| st.write('The scatter plots below are created to explore possible correlations between the data fields: Solar, Wind Onshore, Wind Offshore (if any), Load, and Weather Features.') | |
| # Add a selection box for the data resolution (weekly, daily, hourly) | |
| data_2024 = data[data.index.year == 2024] | |
| resolution = st.selectbox('Select data resolution:', ['Daily', 'Hourly']) | |
| # Resample data based on the selected resolution | |
| if resolution == 'Hourly': | |
| resampled_data = data | |
| elif resolution == 'Daily': | |
| resampled_data = data.resample('D').mean() # Resample to daily mean | |
| resampled_data.columns = [col.replace('_entsoe', '').replace('_', ' ') for col in resampled_data.columns] | |
| # Drop missing values | |
| selected_df = resampled_data.dropna() | |
| # Create the scatter plots using seaborn's pairplot | |
| sns.set_theme(style="ticks") | |
| pairplot_fig = sns.pairplot(selected_df) | |
| # Display the pairplot in Streamlit | |
| st.pyplot(pairplot_fig) | |
| elif selected_country == 'Overall': | |
| def calculate_net_load_error(df, country_code): | |
| #filter_df = df.dropna() | |
| filter_df = df.dropna(axis=1, how='all') | |
| filter_df = filter_df.dropna() | |
| if filter_df.empty: | |
| # Return something (e.g., None) if there's no data left | |
| print(country_code) | |
| return None, None | |
| net_load = filter_df['Load_entsoe'].copy() | |
| for col in ['Wind_onshore_entsoe', 'Solar_entsoe', 'Wind_offshore_entsoe']: | |
| if col in filter_df.columns: | |
| net_load -= filter_df[col] | |
| net_load_forecast = filter_df['Load_forecast_entsoe'].copy() | |
| for col in ['Wind_onshore_forecast_entsoe', 'Solar_forecast_entsoe', 'Wind_offshore_forecast_entsoe']: | |
| if col in filter_df.columns: | |
| net_load_forecast -= filter_df[col] | |
| # Calculate the error based on the latest values | |
| error = (net_load_forecast - net_load).iloc[-1] | |
| date = filter_df.index[-1].strftime("%Y-%m-%d %H:%M") # Get the latest date in string format | |
| return error, date | |
| def plot_net_load_error_map(data_dict): | |
| # 1) compute your errors as before | |
| missing_zones={'ME','IE_SEM','MK','CY','BA','AL','XK'} | |
| net_load_errors = { | |
| country_code: calculate_net_load_error(data, country_code) | |
| for country_code, data in data_dict.items() | |
| } | |
| df_net_load_error = pd.DataFrame({ | |
| "zoneName": list(net_load_errors), | |
| "net_load_error": [v[0] for v in net_load_errors.values()], | |
| "date": [v[1] for v in net_load_errors.values()], | |
| }) | |
| # 2) split your zones into standard vs. fallback | |
| selected = list(data_dict.keys()) | |
| standard_zones = [z for z in selected if z not in missing_zones] | |
| fallback_zones = [z for z in selected if z in missing_zones] | |
| # 3a) load the standard ones with entsoe.load_zones | |
| date = pd.Timestamp.now() | |
| geo_std = load_zones(standard_zones, date).reset_index() | |
| # 3b) manually load the fallback ones | |
| gdfs = [] | |
| for z in fallback_zones: | |
| fn = f"{z}.geojson" | |
| path = f'./geojson_missing/{fn}' | |
| g = gpd.read_file(path) | |
| g['zoneName'] = z | |
| gdfs.append(g) | |
| geo_fb = pd.concat(gdfs, ignore_index=True) if gdfs else gpd.GeoDataFrame() | |
| # 4) combine | |
| geo_data = pd.concat([geo_std, geo_fb], ignore_index=True) | |
| # Merge net_load_error and date into geo_data | |
| geo_data = geo_data.merge(df_net_load_error, on='zoneName', how='left') | |
| # Initialize the Folium map | |
| m = folium.Map(location=[46.6034, 1.8883], zoom_start=4, tiles="cartodb positron") | |
| # Calculate the maximum absolute net load error for normalization | |
| max_value = df_net_load_error['net_load_error'].abs().max() | |
| # Create a colormap with lighter shades | |
| colormap = branca.colormap.LinearColormap( | |
| colors=['#0D92F4', 'white', '#C62E2E'], # Light blue to white to light coral | |
| vmin=-max_value, | |
| vmax=max_value, | |
| caption='Net Load Error [MW]' | |
| ) | |
| # Define the style function | |
| def style_function(feature): | |
| net_load_error = feature['properties']['net_load_error'] | |
| if net_load_error is None: | |
| return {'fillOpacity': 0.5, 'color': 'grey', 'weight': 0.5} | |
| else: | |
| fill_color = colormap(net_load_error) | |
| return { | |
| 'fillColor': fill_color, | |
| 'fillOpacity': 0.8, # Set a constant opacity | |
| 'color': 'black', | |
| 'weight': 0.5 | |
| } | |
| # Add the GeoJson layer with the custom style_function | |
| folium.GeoJson( | |
| geo_data, | |
| style_function=style_function, | |
| tooltip=folium.GeoJsonTooltip( | |
| fields=["zoneName", "net_load_error", "date"], | |
| aliases=["Country:", "Net Load Error [MW]:", "Date:"], | |
| localize=True | |
| ) | |
| ).add_to(m) | |
| # Add the colormap to the map | |
| colormap.add_to(m) | |
| # Display the map | |
| _=st_folium(m, width=700, height=600) | |
| def calculate_mae(actual, forecast): | |
| return np.mean(np.abs(actual - forecast)) | |
| def calculate_persistence_mae(data, shift_hours): | |
| return np.mean(np.abs(data - data.shift(shift_hours))) | |
| def calculate_rmae_for_country(df, variable_options): | |
| rmae = {} | |
| rmae['Load'] = calculate_mae(df['Load_entsoe'], df['Load_forecast_entsoe']) / calculate_persistence_mae(df['Load_entsoe'], 168) | |
| for variable in variable_options.keys(): | |
| actual_col, forecast_col = variable_options[variable] | |
| rmae[variable] = calculate_mae(df[actual_col], df[forecast_col]) / calculate_persistence_mae(df[actual_col], 24) | |
| all_opt = ["Load", "Solar", "Wind Onshore", "Wind Offshore"] | |
| not_in_list2 = [elem for elem in all_opt if elem not in variable_options.keys()] | |
| for ele in not_in_list2: | |
| rmae[ele] = None | |
| return rmae | |
| def create_rmae_dataframe(data_dict): | |
| rmae_values = {'Country': [], 'Load': [], 'Wind Onshore': [], 'Wind Offshore': [], 'Solar': []} | |
| for country_name, df in data_dict.items(): | |
| df_filtered = df.dropna() | |
| print(country_name) | |
| variable_options, flagged_columns = filter_variable_options(df_filtered) | |
| rmae = calculate_rmae_for_country(df_filtered, variable_options) | |
| rmae_values['Country'].append(country_name) | |
| for var, met in rmae.items(): | |
| rmae_values[var].append(met) | |
| return pd.DataFrame(rmae_values) | |
| def plot_rmae_radar_chart(rmae_df): | |
| fig = go.Figure() | |
| # Dynamically adjust angles to exclude Wind_offshore if all values are NaN | |
| angles = ['Load'] | |
| if not rmae_df['Wind Offshore'].isna().all(): # Only include Wind_offshore if it's not NaN for all countries | |
| angles.append('Wind Offshore') | |
| if not rmae_df['Wind Onshore'].isna().all(): # Only include Wind_offshore if it's not NaN for all countries | |
| angles.append('Wind Onshore') | |
| if not rmae_df['Solar'].isna().all(): # Only include Wind_offshore if it's not NaN for all countries | |
| angles.append('Solar') | |
| for _, row in rmae_df.iterrows(): | |
| fig.add_trace(go.Scatterpolar( | |
| r=[row[angle] for angle in angles], | |
| theta=angles, | |
| fill='toself', | |
| name=row['Country'] | |
| )) | |
| fig.update_layout( | |
| polar=dict( | |
| radialaxis=dict(visible=True, range=[0, 1.2]) | |
| ), | |
| showlegend=True, | |
| title="rMAE Radar Chart by Country" | |
| ) | |
| st.plotly_chart(fig) | |
| st.subheader("Net Load Error Map") | |
| st.write(""" | |
| The net load error map highlights the error in the forecasted versus actual net load for each country. | |
| Hover over each country to see details on the latest net load error and the timestamp (with the time zone of the corresponding country) of the last recorded data. | |
| """) | |
| plot_net_load_error_map(data_dict) | |
| st.subheader("rMAE of Forecasts published on ENTSO-E TP") | |
| st.write("""The rMAE of Forecasts chart compares the forecast accuracy of the predictions published by ENTSO-E Transparency Platform for Portugal, Spain, Belgium, France, Germany-Luxembourg, Austria, the Netherlands, Italy and Denmark. It shows the rMAE for onshore wind, offshore wind (if any), solar, and load demand, highlighting how well forecasts perform relative to a basic persistence model across these countries and energy sectors.""") | |
| rmae_df = create_rmae_dataframe(data_dict) | |
| # Add multiselect for country selection | |
| selected_countries = st.multiselect("Select Countries for Radar Plot", options=rmae_df['Country'].unique(), default=['BE', 'DE_LU']) | |
| # Filter the dataframe based on the selected countries | |
| filtered_rmae_df = rmae_df[rmae_df['Country'].isin(selected_countries)] | |
| # Plot radar chart for the selected countries | |
| plot_rmae_radar_chart(filtered_rmae_df) | |