Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,35 +2,120 @@ import requests
|
|
| 2 |
import pandas as pd
|
| 3 |
from io import StringIO
|
| 4 |
import streamlit as st
|
| 5 |
-
import os
|
| 6 |
import plotly.express as px
|
| 7 |
import plotly.graph_objects as go
|
| 8 |
-
import plotly.colors as pc
|
| 9 |
import numpy as np
|
| 10 |
-
from sklearn.metrics import mean_squared_error
|
| 11 |
from statsmodels.tsa.stattools import acf
|
| 12 |
from statsmodels.graphics.tsaplots import plot_acf
|
| 13 |
import matplotlib.pyplot as plt
|
| 14 |
-
from datetime import datetime
|
| 15 |
import folium
|
| 16 |
-
import seaborn as sns
|
| 17 |
from streamlit_folium import st_folium
|
| 18 |
-
|
|
|
|
| 19 |
from entsoe.geo import load_zones
|
| 20 |
-
from branca.colormap import LinearColormap
|
| 21 |
import branca
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
-
def
|
| 25 |
-
now = datetime.now()
|
| 26 |
-
current_hour = now.hour
|
| 27 |
-
current_minute = now.minute
|
| 28 |
-
# Return the hour and a boolean indicating if it is after the 10th minute
|
| 29 |
-
return current_hour, current_minute >= 10
|
| 30 |
|
| 31 |
-
|
| 32 |
-
@st.cache_data(show_spinner=False)
|
| 33 |
-
def load_GitHub(github_token, file_name, hour, after_10_min):
|
| 34 |
url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}'
|
| 35 |
headers = {'Authorization': f'token {github_token}'}
|
| 36 |
|
|
@@ -42,102 +127,52 @@ def load_GitHub(github_token, file_name, hour, after_10_min):
|
|
| 42 |
if 'Date' in df.columns:
|
| 43 |
df['Date'] = pd.to_datetime(df['Date']) # Convert 'Date' column to datetime
|
| 44 |
df.set_index('Date', inplace=True) # Set 'Date' column as the index
|
| 45 |
-
|
| 46 |
-
|
|
|
|
| 47 |
else:
|
| 48 |
print(f"Failed to download {file_name}. Status code: {response.status_code}")
|
| 49 |
return None
|
| 50 |
-
|
| 51 |
-
@st.cache_data(show_spinner=False)
|
| 52 |
-
def load_forecast(github_token, hour, after_10_min):
|
| 53 |
-
predictions_dict = {}
|
| 54 |
-
for hour in range(24):
|
| 55 |
-
file_name = f'Predictions_{hour}h.csv'
|
| 56 |
-
df = load_GitHub(github_token, file_name, hour, after_10_min)
|
| 57 |
-
if df is not None:
|
| 58 |
-
predictions_dict[file_name] = df
|
| 59 |
-
return predictions_dict
|
| 60 |
-
|
| 61 |
-
def convert_European_time(data, time_zone):
|
| 62 |
-
data.index = pd.to_datetime(data.index, utc=True)
|
| 63 |
-
data.index = data.index.tz_convert(time_zone)
|
| 64 |
-
data.index = data.index.tz_localize(None)
|
| 65 |
-
return data
|
| 66 |
|
| 67 |
-
def
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
}
|
| 74 |
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
df.columns = df.columns.str.replace(original, simplified, regex=True)
|
| 78 |
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
return df
|
| 94 |
|
| 95 |
github_token = st.secrets["GitHub_Token_KUL_Margarida"]
|
|
|
|
| 96 |
|
| 97 |
if github_token:
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
Data_BE=load_GitHub(github_token, 'BE_Elia_Entsoe_UTC.csv', hour, after_10_min)
|
| 104 |
-
Data_FR=load_GitHub(github_token, 'FR_Entsoe_UTC.csv', hour, after_10_min)
|
| 105 |
-
Data_NL=load_GitHub(github_token, 'NL_Entsoe_UTC.csv', hour, after_10_min)
|
| 106 |
-
Data_DE=load_GitHub(github_token, 'DE_Entsoe_UTC.csv', hour, after_10_min)
|
| 107 |
-
Data_PT=load_GitHub(github_token, 'PT_Entsoe_UTC.csv', hour, after_10_min)
|
| 108 |
-
Data_ES=load_GitHub(github_token, 'ES_Entsoe_UTC.csv', hour, after_10_min)
|
| 109 |
-
Data_AT=load_GitHub(github_token, 'AT_Entsoe_UTC.csv', hour, after_10_min)
|
| 110 |
-
Data_IT_CALA=load_GitHub(github_token, 'IT_CALA_Entsoe_UTC.csv', hour, after_10_min)
|
| 111 |
-
Data_IT_CNOR=load_GitHub(github_token, 'IT_CNOR_Entsoe_UTC.csv', hour, after_10_min)
|
| 112 |
-
Data_IT_CSUD=load_GitHub(github_token, 'IT_CSUD_Entsoe_UTC.csv', hour, after_10_min)
|
| 113 |
-
Data_IT_NORD=load_GitHub(github_token, 'IT_NORD_Entsoe_UTC.csv', hour, after_10_min)
|
| 114 |
-
Data_IT_SICI=load_GitHub(github_token, 'IT_SICI_Entsoe_UTC.csv', hour, after_10_min)
|
| 115 |
-
Data_IT_SUD=load_GitHub(github_token, 'IT_SUD_Entsoe_UTC.csv', hour, after_10_min)
|
| 116 |
-
Data_DK_1=load_GitHub(github_token, 'DK_1_Entsoe_UTC.csv', hour, after_10_min)
|
| 117 |
-
Data_DK_2=load_GitHub(github_token, 'DK_2_Entsoe_UTC.csv', hour, after_10_min)
|
| 118 |
-
|
| 119 |
-
Data_BE=convert_European_time(Data_BE, 'Europe/Brussels')
|
| 120 |
-
Data_FR=convert_European_time(Data_FR, 'Europe/Paris')
|
| 121 |
-
Data_NL=convert_European_time(Data_NL, 'Europe/Amsterdam')
|
| 122 |
-
Data_DE=convert_European_time(Data_DE, 'Europe/Berlin')
|
| 123 |
-
Data_PT=convert_European_time(Data_PT, 'Europe/Lisbon')
|
| 124 |
-
Data_ES=convert_European_time(Data_ES, 'Europe/Madrid')
|
| 125 |
-
Data_AT=convert_European_time(Data_AT, 'Europe/Vienna')
|
| 126 |
-
Data_IT_CALA = convert_European_time(Data_IT_CALA, 'Europe/Rome')
|
| 127 |
-
Data_IT_CNOR = convert_European_time(Data_IT_CNOR, 'Europe/Rome')
|
| 128 |
-
Data_IT_CSUD = convert_European_time(Data_IT_CSUD, 'Europe/Rome')
|
| 129 |
-
Data_IT_NORD = convert_European_time(Data_IT_NORD, 'Europe/Rome')
|
| 130 |
-
Data_IT_SICI = convert_European_time(Data_IT_SICI, 'Europe/Rome')
|
| 131 |
-
Data_IT_SUD = convert_European_time(Data_IT_SUD, 'Europe/Rome')
|
| 132 |
-
Data_DK_1 = convert_European_time(Data_DK_1, 'Europe/Copenhagen')
|
| 133 |
-
Data_DK_2 = convert_European_time(Data_DK_2, 'Europe/Copenhagen')
|
| 134 |
-
|
| 135 |
|
| 136 |
else:
|
| 137 |
print("Please enter your GitHub Personal Access Token to proceed.")
|
| 138 |
|
| 139 |
-
|
| 140 |
-
col1, col2 = st.columns([5, 2]) # Adjust the ratio to better fit your layout needs
|
| 141 |
with col1:
|
| 142 |
st.title("Transparency++")
|
| 143 |
|
|
@@ -150,85 +185,19 @@ with col2:
|
|
| 150 |
with col2_2:
|
| 151 |
st.image("energyville_logo.png", width=100)
|
| 152 |
|
| 153 |
-
|
| 154 |
-
st.write("**Evaluate and analyze ENTSO-E Transparency Platform data quality, forecast accuracy, and energy trends for Portugal, Spain, Belgium, France, Germany-Luxembourg, Austria, the Netherlands, Italy and Denmark.**")
|
| 155 |
-
|
| 156 |
-
upper_space.markdown("""
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
""", unsafe_allow_html=True)
|
| 160 |
-
|
| 161 |
-
countries = {
|
| 162 |
-
'Overall': 'Overall',
|
| 163 |
-
'Austria': 'AT',
|
| 164 |
-
'Belgium': 'BE',
|
| 165 |
-
'Denmark 1': 'DK_1',
|
| 166 |
-
'Denmark 2': 'DK_2',
|
| 167 |
-
'France': 'FR',
|
| 168 |
-
'Germany-Luxembourg': 'DE_LU',
|
| 169 |
-
'Italy Calabria': 'IT_CALA',
|
| 170 |
-
'Italy Central North': 'IT_CNOR',
|
| 171 |
-
'Italy Central South': 'IT_CSUD',
|
| 172 |
-
'Italy North': 'IT_NORD',
|
| 173 |
-
'Italy Sicily': 'IT_SICI',
|
| 174 |
-
'Italy South': 'IT_SUD',
|
| 175 |
-
'Netherlands': 'NL',
|
| 176 |
-
'Portugal': 'PT',
|
| 177 |
-
'Spain': 'ES',
|
| 178 |
-
}
|
| 179 |
-
|
| 180 |
-
data_dict = {
|
| 181 |
-
'BE': Data_BE,
|
| 182 |
-
'FR': Data_FR,
|
| 183 |
-
'DE_LU': Data_DE,
|
| 184 |
-
'NL': Data_NL,
|
| 185 |
-
'PT': Data_PT,
|
| 186 |
-
'AT': Data_AT,
|
| 187 |
-
'ES': Data_ES,
|
| 188 |
-
'IT_CALA': Data_IT_CALA,
|
| 189 |
-
'IT_CNOR': Data_IT_CNOR,
|
| 190 |
-
'IT_CSUD': Data_IT_CSUD,
|
| 191 |
-
'IT_NORD': Data_IT_NORD,
|
| 192 |
-
'IT_SICI': Data_IT_SICI,
|
| 193 |
-
'IT_SUD': Data_IT_SUD,
|
| 194 |
-
'DK_1': Data_DK_1,
|
| 195 |
-
'DK_2': Data_DK_2,
|
| 196 |
-
}
|
| 197 |
-
|
| 198 |
-
countries_all_RES = ['BE', 'FR', 'NL', 'DE_LU', 'PT', 'DK_1', 'DK_2']
|
| 199 |
-
countries_no_offshore= ['AT', 'ES', 'IT_CALA', 'IT_CNOR', 'IT_CSUD', 'IT_NORD', 'IT_SICI', 'IT_SUD',]
|
| 200 |
-
|
| 201 |
-
installed_capacities = {
|
| 202 |
-
'FR': { 'Solar': 17419, 'Wind Offshore': 1483, 'Wind Onshore': 22134},
|
| 203 |
-
'DE_LU': { 'Solar': 73821, 'Wind Offshore': 8386, 'Wind Onshore': 59915},
|
| 204 |
-
'BE': { 'Solar': 8789, 'Wind Offshore': 2262, 'Wind Onshore': 3053},
|
| 205 |
-
'NL': { 'Solar': 22590, 'Wind Offshore': 3220, 'Wind Onshore': 6190},
|
| 206 |
-
'PT': { 'Solar': 1811, 'Wind Offshore': 25, 'Wind Onshore': 5333},
|
| 207 |
-
'ES': { 'Solar': 23867, 'Wind Onshore': 30159},
|
| 208 |
-
'AT': { 'Solar': 7294, 'Wind Onshore': 4021 },
|
| 209 |
-
'DK_1': { 'Solar': 2738, 'Wind Offshore': 1601, 'Wind Onshore': 4112},
|
| 210 |
-
'DK_2': { 'Solar': 992, 'Wind Offshore': 1045, 'Wind Onshore': 748},
|
| 211 |
-
}
|
| 212 |
-
|
| 213 |
-
forecast_columns_all_RES = [
|
| 214 |
-
'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
|
| 215 |
-
|
| 216 |
-
forecast_columns_no_wind_offshore = [
|
| 217 |
-
'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
|
| 218 |
-
|
| 219 |
|
| 220 |
st.sidebar.header('Filters')
|
| 221 |
|
| 222 |
st.sidebar.subheader("Select Country")
|
| 223 |
st.sidebar.caption("Choose the country for which you want to display data or forecasts.")
|
|
|
|
|
|
|
| 224 |
|
| 225 |
-
selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
|
| 226 |
-
|
| 227 |
-
# Sidebar with radio buttons for different sections
|
| 228 |
if selected_country != 'Overall':
|
| 229 |
st.sidebar.subheader("Section")
|
| 230 |
st.sidebar.caption("Select the type of information you want to explore.")
|
| 231 |
-
section = st.sidebar.radio('
|
| 232 |
else:
|
| 233 |
section = None # No section is shown when "Overall" is selected
|
| 234 |
|
|
@@ -236,124 +205,172 @@ if selected_country == 'Overall':
|
|
| 236 |
data = None # You can set data to None or a specific dataset based on your logic
|
| 237 |
section = None # No section selected when "Overall" is chosen
|
| 238 |
else:
|
| 239 |
-
country_code =
|
| 240 |
-
data = data_dict.get(
|
| 241 |
-
if country_code in countries_all_RES:
|
| 242 |
-
forecast_columns = forecast_columns_all_RES
|
| 243 |
-
elif country_code in countries_no_offshore:
|
| 244 |
-
forecast_columns = forecast_columns_no_wind_offshore
|
| 245 |
-
if country_code == 'BE':
|
| 246 |
-
weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore']
|
| 247 |
-
data['Temperature'] = data['temperature_2m_8']
|
| 248 |
-
data['Wind Speed Onshore'] = data['wind_speed_100m_8']
|
| 249 |
-
data['Wind Speed Offshore'] = data['wind_speed_100m_4']
|
| 250 |
-
else:
|
| 251 |
-
weather_columns = ['Temperature', 'Wind Speed']
|
| 252 |
-
data['Temperature'] = data['temperature_2m']
|
| 253 |
-
data['Wind Speed'] = data['wind_speed_100m']
|
| 254 |
-
|
| 255 |
|
| 256 |
if section == 'Data Quality':
|
| 257 |
-
|
| 258 |
st.header('Data Quality')
|
| 259 |
-
|
| 260 |
-
st.write('The table below presents the data quality metrics focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.')
|
| 261 |
|
| 262 |
-
|
|
|
|
|
|
|
| 263 |
|
| 264 |
-
|
| 265 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
|
| 267 |
-
|
| 268 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
missing_values = missing_values.round(2)
|
| 270 |
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
|
|
|
|
|
|
|
|
|
| 312 |
|
| 313 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
|
| 315 |
-
st.
|
| 316 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
|
| 318 |
elif section == 'Forecasts Quality':
|
| 319 |
-
|
| 320 |
st.header('Forecast Quality')
|
| 321 |
|
| 322 |
# Time series for last 1 week
|
| 323 |
last_week = data.loc[data.index >= (data.index[-1] - pd.Timedelta(days=7))]
|
| 324 |
st.write('The below plot shows the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform from the past week.')
|
| 325 |
-
|
| 326 |
-
# Options for selecting the data to display
|
| 327 |
-
if country_code in countries_all_RES:
|
| 328 |
-
variable_options = {
|
| 329 |
-
"Load": ("Load_entsoe", "Load_forecast_entsoe"),
|
| 330 |
-
"Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
|
| 331 |
-
"Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
|
| 332 |
-
"Wind Offshore": ("Wind_offshore_entsoe", "Wind_offshore_forecast_entsoe")
|
| 333 |
-
}
|
| 334 |
-
elif country_code in countries_no_offshore:
|
| 335 |
-
variable_options = {
|
| 336 |
-
"Load": ("Load_entsoe", "Load_forecast_entsoe"),
|
| 337 |
-
"Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
|
| 338 |
-
"Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
|
| 339 |
-
}
|
| 340 |
-
else:
|
| 341 |
-
print('Country code doesnt correspond.')
|
| 342 |
-
|
| 343 |
# Dropdown to select the variable
|
| 344 |
selected_variable = st.selectbox("Select Variable for Line PLot", list(variable_options.keys()))
|
| 345 |
-
|
| 346 |
-
# Get the corresponding columns for the selected variable
|
| 347 |
actual_col, forecast_col = variable_options[selected_variable]
|
| 348 |
|
| 349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
fig = go.Figure()
|
| 351 |
-
fig.add_trace(go.Scatter(x=
|
| 352 |
-
fig.add_trace(go.Scatter(x=
|
| 353 |
-
fig.update_layout(title=f
|
| 354 |
-
|
| 355 |
st.plotly_chart(fig)
|
| 356 |
|
|
|
|
| 357 |
# Scatter plots for error distribution
|
| 358 |
st.subheader('Error Distribution')
|
| 359 |
st.write('The below scatter plots show the error distribution of all fields: Solar, Wind and Load.')
|
|
@@ -362,19 +379,24 @@ elif section == 'Forecasts Quality':
|
|
| 362 |
# Get the corresponding columns for the selected variable
|
| 363 |
actual_col, forecast_col = variable_options[selected_variable]
|
| 364 |
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
st.plotly_chart(fig)
|
| 377 |
-
|
| 378 |
st.subheader('Accuracy Metrics (Sorted by rMAE):')
|
| 379 |
|
| 380 |
date_range = st.date_input(
|
|
@@ -388,99 +410,92 @@ elif section == 'Forecasts Quality':
|
|
| 388 |
else:
|
| 389 |
st.error("Please select a valid date range.")
|
| 390 |
st.stop()
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
output_text = f"The below metrics are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. "
|
| 394 |
st.write(output_text)
|
| 395 |
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
if country_code in countries_all_RES:
|
| 399 |
-
accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore'])
|
| 400 |
-
elif country_code in countries_no_offshore:
|
| 401 |
-
accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore'])
|
| 402 |
-
else:
|
| 403 |
-
print('Country code doesnt correspond.')
|
| 404 |
|
|
|
|
| 405 |
|
| 406 |
-
for
|
| 407 |
-
actual_col =
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
row_label = 'Load' if 'Load' in actual_col else 'Solar' if 'Solar' in actual_col else 'Wind Offshore' if 'Wind_offshore' in actual_col else 'Wind Onshore'
|
| 424 |
-
accuracy_metrics.loc[row_label] = [mae, rmae]
|
| 425 |
|
| 426 |
accuracy_metrics.dropna(how='all', inplace=True)# Sort by rMAE (second column)
|
| 427 |
-
accuracy_metrics.sort_values(by=accuracy_metrics.columns[1], ascending=True, inplace=True)
|
| 428 |
accuracy_metrics = accuracy_metrics.round(4)
|
| 429 |
|
| 430 |
-
col1, col2 = st.columns([1,
|
| 431 |
|
| 432 |
with col1:
|
|
|
|
| 433 |
st.markdown(
|
| 434 |
"""
|
| 435 |
<style>
|
| 436 |
-
.small-chart {
|
| 437 |
-
margin-top:
|
| 438 |
}
|
| 439 |
</style>
|
| 440 |
""",
|
| 441 |
unsafe_allow_html=True
|
| 442 |
)
|
| 443 |
st.dataframe(accuracy_metrics)
|
| 444 |
-
st.markdown(
|
| 445 |
-
"""
|
| 446 |
-
<style>
|
| 447 |
-
.small-chart {
|
| 448 |
-
margin-top: -30px; /* Adjust this value as needed */
|
| 449 |
-
}
|
| 450 |
-
</style>
|
| 451 |
-
""",
|
| 452 |
-
unsafe_allow_html=True
|
| 453 |
-
)
|
| 454 |
|
| 455 |
with col2:
|
| 456 |
-
|
| 457 |
-
# Prepare data for the radar chart
|
| 458 |
rmae_values = accuracy_metrics['rMAE'].tolist()
|
| 459 |
-
categories
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
fig
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
|
|
|
|
|
|
| 470 |
fig.update_layout(
|
| 471 |
-
width=
|
| 472 |
-
height=
|
| 473 |
-
margin=dict(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 474 |
polar=dict(
|
|
|
|
|
|
|
|
|
|
| 475 |
radialaxis=dict(
|
| 476 |
visible=True,
|
| 477 |
-
range=[0, max(rmae_values)
|
| 478 |
-
)
|
|
|
|
| 479 |
showlegend=False
|
| 480 |
)
|
| 481 |
-
|
| 482 |
-
#
|
| 483 |
-
st.
|
|
|
|
|
|
|
| 484 |
|
| 485 |
st.subheader('ACF plots of Errors')
|
| 486 |
st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three data fields obtained from ENTSO-E: Solar, Wind and Load.')
|
|
@@ -504,7 +519,7 @@ elif section == 'Forecasts Quality':
|
|
| 504 |
|
| 505 |
# Optionally calculate and store ACF values for further analysis if needed
|
| 506 |
acf_values = acf(error.dropna(), nlags=240)
|
| 507 |
-
|
| 508 |
elif section == 'Insights':
|
| 509 |
st.header("Insights")
|
| 510 |
|
|
@@ -516,23 +531,15 @@ elif section == 'Insights':
|
|
| 516 |
|
| 517 |
# Resample data based on the selected resolution
|
| 518 |
if resolution == 'Hourly':
|
| 519 |
-
resampled_data =
|
| 520 |
elif resolution == 'Daily':
|
| 521 |
-
resampled_data =
|
| 522 |
|
| 523 |
-
# Select the necessary columns for the scatter plot
|
| 524 |
-
if country_code in countries_all_RES:
|
| 525 |
-
selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_offshore_entsoe', 'Wind_onshore_entsoe'] + weather_columns
|
| 526 |
-
elif country_code in countries_no_offshore:
|
| 527 |
-
selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe'] + weather_columns
|
| 528 |
-
else:
|
| 529 |
-
print('Country code doesnt correspond.')
|
| 530 |
|
| 531 |
-
|
| 532 |
-
selected_df.columns = [col.replace('_entsoe', '').replace('_', ' ') for col in selected_df.columns]
|
| 533 |
|
| 534 |
# Drop missing values
|
| 535 |
-
selected_df =
|
| 536 |
|
| 537 |
# Create the scatter plots using seaborn's pairplot
|
| 538 |
sns.set_theme(style="ticks")
|
|
@@ -543,30 +550,24 @@ elif section == 'Insights':
|
|
| 543 |
|
| 544 |
elif selected_country == 'Overall':
|
| 545 |
|
| 546 |
-
def get_forecast_columns(country_code):
|
| 547 |
-
if country_code in countries_all_RES:
|
| 548 |
-
return forecast_columns_all_RES
|
| 549 |
-
elif country_code in countries_no_offshore:
|
| 550 |
-
return forecast_columns_no_wind_offshore
|
| 551 |
-
else:
|
| 552 |
-
print('Country code doesnt correspond.')
|
| 553 |
-
|
| 554 |
def calculate_net_load_error(df, country_code):
|
| 555 |
-
|
| 556 |
-
filter_df = df
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
|
|
|
|
|
|
| 570 |
# Calculate the error based on the latest values
|
| 571 |
error = (net_load_forecast - net_load).iloc[-1]
|
| 572 |
date = filter_df.index[-1].strftime("%Y-%m-%d %H:%M") # Get the latest date in string format
|
|
@@ -574,29 +575,41 @@ elif selected_country == 'Overall':
|
|
| 574 |
return error, date
|
| 575 |
|
| 576 |
def plot_net_load_error_map(data_dict):
|
| 577 |
-
#
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
df_net_load_error = pd.DataFrame({
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
})
|
| 588 |
|
| 589 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 590 |
date = pd.Timestamp.now()
|
| 591 |
-
|
| 592 |
|
| 593 |
-
#
|
| 594 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 595 |
|
| 596 |
-
# Map country codes to country names
|
| 597 |
-
countries_code_to_name = {v: k for k, v in countries.items()}
|
| 598 |
-
geo_data['name'] = geo_data['zoneName'].map(countries_code_to_name)
|
| 599 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 600 |
# Merge net_load_error and date into geo_data
|
| 601 |
geo_data = geo_data.merge(df_net_load_error, on='zoneName', how='left')
|
| 602 |
|
|
@@ -633,7 +646,7 @@ elif selected_country == 'Overall':
|
|
| 633 |
geo_data,
|
| 634 |
style_function=style_function,
|
| 635 |
tooltip=folium.GeoJsonTooltip(
|
| 636 |
-
fields=["
|
| 637 |
aliases=["Country:", "Net Load Error [MW]:", "Date:"],
|
| 638 |
localize=True
|
| 639 |
)
|
|
@@ -643,7 +656,7 @@ elif selected_country == 'Overall':
|
|
| 643 |
colormap.add_to(m)
|
| 644 |
|
| 645 |
# Display the map
|
| 646 |
-
_
|
| 647 |
|
| 648 |
def calculate_mae(actual, forecast):
|
| 649 |
return np.mean(np.abs(actual - forecast))
|
|
@@ -651,40 +664,36 @@ elif selected_country == 'Overall':
|
|
| 651 |
def calculate_persistence_mae(data, shift_hours):
|
| 652 |
return np.mean(np.abs(data - data.shift(shift_hours)))
|
| 653 |
|
| 654 |
-
def calculate_rmae_for_country(df):
|
| 655 |
rmae = {}
|
| 656 |
rmae['Load'] = calculate_mae(df['Load_entsoe'], df['Load_forecast_entsoe']) / calculate_persistence_mae(df['Load_entsoe'], 168)
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
rmae['Wind_offshore'] = calculate_mae(df['Wind_offshore_entsoe'], df['Wind_offshore_forecast_entsoe']) / calculate_persistence_mae(df['Wind_offshore_entsoe'], 24)
|
| 662 |
-
else:
|
| 663 |
-
rmae['Wind_offshore'] = None # Mark as None if not applicable
|
| 664 |
-
|
| 665 |
-
rmae['Solar'] = calculate_mae(df['Solar_entsoe'], df['Solar_forecast_entsoe']) / calculate_persistence_mae(df['Solar_entsoe'], 24)
|
| 666 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 667 |
return rmae
|
| 668 |
|
| 669 |
def create_rmae_dataframe(data_dict):
|
| 670 |
|
| 671 |
-
rmae_values = {'Country': [], 'Load': [], '
|
| 672 |
|
| 673 |
for country_name, df in data_dict.items():
|
| 674 |
-
|
| 675 |
-
|
| 676 |
-
|
|
|
|
| 677 |
|
| 678 |
rmae_values['Country'].append(country_name)
|
| 679 |
-
rmae_values['Load'].append(rmae['Load'])
|
| 680 |
-
rmae_values['Wind_onshore'].append(rmae['Wind_onshore'])
|
| 681 |
-
rmae_values['Solar'].append(rmae['Solar'])
|
| 682 |
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
rmae_values['Wind_offshore'].append(rmae['Wind_offshore'])
|
| 686 |
-
else:
|
| 687 |
-
rmae_values['Wind_offshore'].append(np.nan) # Insert NaN for countries without offshore wind
|
| 688 |
|
| 689 |
return pd.DataFrame(rmae_values)
|
| 690 |
|
|
@@ -692,10 +701,14 @@ elif selected_country == 'Overall':
|
|
| 692 |
fig = go.Figure()
|
| 693 |
|
| 694 |
# Dynamically adjust angles to exclude Wind_offshore if all values are NaN
|
| 695 |
-
angles = ['Load'
|
| 696 |
-
if not rmae_df['
|
| 697 |
-
angles.append('
|
| 698 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 699 |
for _, row in rmae_df.iterrows():
|
| 700 |
fig.add_trace(go.Scatterpolar(
|
| 701 |
r=[row[angle] for angle in angles],
|
|
@@ -735,7 +748,3 @@ elif selected_country == 'Overall':
|
|
| 735 |
|
| 736 |
# Plot radar chart for the selected countries
|
| 737 |
plot_rmae_radar_chart(filtered_rmae_df)
|
| 738 |
-
|
| 739 |
-
|
| 740 |
-
|
| 741 |
-
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
from io import StringIO
|
| 4 |
import streamlit as st
|
|
|
|
| 5 |
import plotly.express as px
|
| 6 |
import plotly.graph_objects as go
|
|
|
|
| 7 |
import numpy as np
|
|
|
|
| 8 |
from statsmodels.tsa.stattools import acf
|
| 9 |
from statsmodels.graphics.tsaplots import plot_acf
|
| 10 |
import matplotlib.pyplot as plt
|
|
|
|
| 11 |
import folium
|
|
|
|
| 12 |
from streamlit_folium import st_folium
|
| 13 |
+
import seaborn as sns
|
| 14 |
+
import datetime
|
| 15 |
from entsoe.geo import load_zones
|
|
|
|
| 16 |
import branca
|
| 17 |
+
import pytz
|
| 18 |
+
import time
|
| 19 |
+
from entsoe import EntsoePandasClient
|
| 20 |
+
import geopandas as gpd
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
tz = pytz.timezone('Europe/Brussels')
|
| 24 |
+
|
| 25 |
+
def load_capacity_csv(path: str) -> dict:
|
| 26 |
+
"""Load installed capacities CSV into a dict: Country -> {tech: value} """
|
| 27 |
+
df = pd.read_csv(path, index_col='Country')
|
| 28 |
+
# Ensure numeric and handle missing
|
| 29 |
+
df = df.replace({"NaN": np.nan}).astype(float)
|
| 30 |
+
return df.to_dict(orient='index')
|
| 31 |
+
|
| 32 |
+
# Load installed capacities from CSV files
|
| 33 |
+
installed_capacities_2024 = load_capacity_csv('installed_capacities_2024.csv')
|
| 34 |
+
installed_capacities_2025 = load_capacity_csv('installed_capacities_2025.csv')
|
| 35 |
+
|
| 36 |
+
TECHS = ['Solar', 'Wind Offshore', 'Wind Onshore']
|
| 37 |
+
#countries = [ 'AT', 'BE', 'NL', 'BG', 'HR', 'CZ', 'DE_LU', 'DK_1', 'DK_2',
|
| 38 |
+
#'EE', 'FI', 'FR', 'GR', 'HU', 'IT_CALA', 'IT_CNOR',
|
| 39 |
+
#'IT_CSUD', 'IT_NORD', 'IT_SARD', 'IT_SICI', 'IT_SUD', 'LV', 'LT',
|
| 40 |
+
#'NO_1', 'NO_2', 'NO_3', 'NO_4', 'NO_5', 'PL', 'PT', 'RO',
|
| 41 |
+
#'SE_1', 'SE_2', 'SE_3', 'SE_4', 'RS', 'SK', 'SI', 'ES', 'CH', 'ME','IE_SEM','MK','CY','BA','AL','XK']
|
| 42 |
+
|
| 43 |
+
countries = ['AT', 'BE', 'DE_LU', 'DK_1', 'DK_2', 'FR', 'IT_CALA', 'IT_CNOR',
|
| 44 |
+
'IT_CSUD', 'IT_NORD', 'IT_SARD', 'IT_SICI', 'IT_SUD',
|
| 45 |
+
'NL', 'PT', 'ES']
|
| 46 |
+
|
| 47 |
+
def get_time_zone(country_code):
|
| 48 |
+
|
| 49 |
+
tz_map = {
|
| 50 |
+
'AL': 'Europe/Tirane',
|
| 51 |
+
'AT': 'Europe/Vienna',
|
| 52 |
+
'BE': 'Europe/Brussels',
|
| 53 |
+
'BA': 'Europe/Sarajevo',
|
| 54 |
+
'BG': 'Europe/Sofia',
|
| 55 |
+
'HR': 'Europe/Zagreb',
|
| 56 |
+
'CY': 'Asia/Nicosia',
|
| 57 |
+
'CZ': 'Europe/Prague',
|
| 58 |
+
'DE_LU': 'Europe/Berlin',
|
| 59 |
+
'DK_1': 'Europe/Copenhagen',
|
| 60 |
+
'DK_2': 'Europe/Copenhagen',
|
| 61 |
+
'EE': 'Europe/Tallinn',
|
| 62 |
+
'FI': 'Europe/Helsinki',
|
| 63 |
+
'MK': 'Europe/Skopje',
|
| 64 |
+
'FR': 'Europe/Paris',
|
| 65 |
+
'GR': 'Europe/Athens',
|
| 66 |
+
'HU': 'Europe/Budapest',
|
| 67 |
+
'IS': 'Atlantic/Reykjavik',
|
| 68 |
+
'IE_SEM': 'Europe/Dublin',
|
| 69 |
+
'IT_CALA': 'Europe/Rome',
|
| 70 |
+
'IT_CNOR': 'Europe/Rome',
|
| 71 |
+
'IT_CSUD': 'Europe/Rome',
|
| 72 |
+
'IT_NORD': 'Europe/Rome',
|
| 73 |
+
'IT_SARD': 'Europe/Rome',
|
| 74 |
+
'IT_SICI': 'Europe/Rome',
|
| 75 |
+
'IT_SUD': 'Europe/Rome',
|
| 76 |
+
'LV': 'Europe/Riga',
|
| 77 |
+
'LT': 'Europe/Vilnius',
|
| 78 |
+
'ME': 'Europe/Podgorica',
|
| 79 |
+
'NL': 'Europe/Amsterdam',
|
| 80 |
+
'NO_1': 'Europe/Oslo',
|
| 81 |
+
'NO_2': 'Europe/Oslo',
|
| 82 |
+
'NO_3': 'Europe/Oslo',
|
| 83 |
+
'NO_4': 'Europe/Oslo',
|
| 84 |
+
'NO_5': 'Europe/Oslo',
|
| 85 |
+
'PL': 'Europe/Warsaw',
|
| 86 |
+
'PT': 'Europe/Lisbon',
|
| 87 |
+
'MD': 'Europe/Chisinau',
|
| 88 |
+
'RO': 'Europe/Bucharest',
|
| 89 |
+
'SE_1': 'Europe/Stockholm',
|
| 90 |
+
'SE_2': 'Europe/Stockholm',
|
| 91 |
+
'SE_3': 'Europe/Stockholm',
|
| 92 |
+
'SE_4': 'Europe/Stockholm',
|
| 93 |
+
'RS': 'Europe/Belgrade',
|
| 94 |
+
'SK': 'Europe/Bratislava',
|
| 95 |
+
'SI': 'Europe/Ljubljana',
|
| 96 |
+
'ES': 'Europe/Madrid',
|
| 97 |
+
'CH': 'Europe/Zurich',
|
| 98 |
+
'XK': 'Europe/Rome'
|
| 99 |
+
}
|
| 100 |
+
if country_code in tz_map:
|
| 101 |
+
return tz_map[country_code]
|
| 102 |
+
else:
|
| 103 |
+
raise ValueError(f"Time zone for country code {country_code} is not defined.")
|
| 104 |
+
|
| 105 |
+
def convert_European_time(data, bdz):
|
| 106 |
+
time_zone = get_time_zone(bdz)
|
| 107 |
+
data.index = pd.to_datetime(data.index, utc=True)
|
| 108 |
+
data.index = data.index.tz_convert(time_zone)
|
| 109 |
+
data.index = data.index.tz_localize(None)
|
| 110 |
+
return data
|
| 111 |
|
| 112 |
+
def filter_dataframe(df):
|
| 113 |
+
allowed_columns = {"Load_entsoe", "Load_forecast_entsoe", "Solar_entsoe", "Solar_forecast_entsoe", "Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe", "Wind_offshore_entsoe", "Wind_offshore_forecast_entsoe"}
|
| 114 |
+
return df[[col for col in df.columns if col in allowed_columns]]
|
| 115 |
|
| 116 |
+
def load_GitHub(github_token, bdz):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
+
file_name=f'{bdz}_Entsoe_UTC.csv'
|
|
|
|
|
|
|
| 119 |
url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}'
|
| 120 |
headers = {'Authorization': f'token {github_token}'}
|
| 121 |
|
|
|
|
| 127 |
if 'Date' in df.columns:
|
| 128 |
df['Date'] = pd.to_datetime(df['Date']) # Convert 'Date' column to datetime
|
| 129 |
df.set_index('Date', inplace=True) # Set 'Date' column as the index
|
| 130 |
+
df=filter_dataframe(df)
|
| 131 |
+
df=convert_European_time(df, bdz)
|
| 132 |
+
return df[df.index >= pd.Timestamp('2024-01-01')]
|
| 133 |
else:
|
| 134 |
print(f"Failed to download {file_name}. Status code: {response.status_code}")
|
| 135 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
+
def filter_variable_options(df):
|
| 138 |
+
all_options = {
|
| 139 |
+
"Load": ("Load_entsoe", "Load_forecast_entsoe"),
|
| 140 |
+
"Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
|
| 141 |
+
"Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
|
| 142 |
+
"Wind Offshore": ("Wind_offshore_entsoe", "Wind_offshore_forecast_entsoe"),
|
| 143 |
}
|
| 144 |
|
| 145 |
+
variable_options = {}
|
| 146 |
+
flagged_columns = []
|
|
|
|
| 147 |
|
| 148 |
+
for key, (col1, col2) in all_options.items():
|
| 149 |
+
col1_exists = col1 in df.columns and not df[col1].isna().all()
|
| 150 |
+
col2_exists = col2 in df.columns and not df[col2].isna().all()
|
| 151 |
+
if col1_exists and col2_exists:
|
| 152 |
+
variable_options[key] = (col1, col2)
|
| 153 |
+
elif not col1_exists and col2_exists:
|
| 154 |
+
flagged_columns.append(col1)
|
| 155 |
+
elif col1_exists and not col2_exists:
|
| 156 |
+
flagged_columns.append(col2)
|
| 157 |
+
elif not col1_exists and not col2_exists:
|
| 158 |
+
flagged_columns.append(col1)
|
| 159 |
+
flagged_columns.append(col2)
|
| 160 |
+
return variable_options, flagged_columns
|
|
|
|
|
|
|
| 161 |
|
| 162 |
github_token = st.secrets["GitHub_Token_KUL_Margarida"]
|
| 163 |
+
#countries = ['IT_CALA', 'IT_CNOR', 'IT_CSUD', 'IT_SARD', 'PT', 'FR']
|
| 164 |
|
| 165 |
if github_token:
|
| 166 |
+
data_dict = {}
|
| 167 |
+
for bdz in countries:
|
| 168 |
+
df = load_GitHub(github_token, bdz)
|
| 169 |
+
if df is not None:
|
| 170 |
+
data_dict[bdz] = df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
|
| 172 |
else:
|
| 173 |
print("Please enter your GitHub Personal Access Token to proceed.")
|
| 174 |
|
| 175 |
+
col1, col2 = st.columns([5, 2])
|
|
|
|
| 176 |
with col1:
|
| 177 |
st.title("Transparency++")
|
| 178 |
|
|
|
|
| 185 |
with col2_2:
|
| 186 |
st.image("energyville_logo.png", width=100)
|
| 187 |
|
| 188 |
+
st.write("**Evaluate and analyze ENTSO-E Transparency Platform data quality, forecast accuracy, and energy trends for ENTSO-E member countries.**")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
|
| 190 |
st.sidebar.header('Filters')
|
| 191 |
|
| 192 |
st.sidebar.subheader("Select Country")
|
| 193 |
st.sidebar.caption("Choose the country for which you want to display data or forecasts.")
|
| 194 |
+
selection = ['Overall'] + list(countries)
|
| 195 |
+
selected_country = st.sidebar.selectbox('Select Country', selection)
|
| 196 |
|
|
|
|
|
|
|
|
|
|
| 197 |
if selected_country != 'Overall':
|
| 198 |
st.sidebar.subheader("Section")
|
| 199 |
st.sidebar.caption("Select the type of information you want to explore.")
|
| 200 |
+
section = st.sidebar.radio('', ['Data Quality', 'Forecasts Quality', 'Insights'], index=1)
|
| 201 |
else:
|
| 202 |
section = None # No section is shown when "Overall" is selected
|
| 203 |
|
|
|
|
| 205 |
data = None # You can set data to None or a specific dataset based on your logic
|
| 206 |
section = None # No section selected when "Overall" is chosen
|
| 207 |
else:
|
| 208 |
+
country_code = selected_country
|
| 209 |
+
data = data_dict.get(selected_country)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
|
| 211 |
if section == 'Data Quality':
|
|
|
|
| 212 |
st.header('Data Quality')
|
|
|
|
|
|
|
| 213 |
|
| 214 |
+
# Determine if capacities missing per year
|
| 215 |
+
caps4 = installed_capacities_2024.get(country_code)
|
| 216 |
+
caps5 = installed_capacities_2025.get(country_code)
|
| 217 |
|
| 218 |
+
st.write(
|
| 219 |
+
"The table below presents the data quality metrics focusing on the percentage "
|
| 220 |
+
"of missing values and the occurrence of extreme or nonsensical values for "
|
| 221 |
+
"the selected country. Additionally, it flags any mismatch between installed "
|
| 222 |
+
"capacity (NaN or 0) and actual data in the dataset."
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
# Determine end of data slice (yesterday 23:59:59)
|
| 226 |
+
yesterday = datetime.datetime.now(tz).date() - datetime.timedelta(days=1)
|
| 227 |
+
end_time = pd.Timestamp(yesterday).replace(hour=23, minute=59, second=59)
|
| 228 |
+
# Filter data
|
| 229 |
+
data_quality = data[data.index <= end_time]
|
| 230 |
+
|
| 231 |
+
tech_cols = {
|
| 232 |
+
'Load': ('Load_entsoe', 'Load_forecast_entsoe'),
|
| 233 |
+
'Wind Onshore': ('Wind_onshore_entsoe', 'Wind_onshore_forecast_entsoe'),
|
| 234 |
+
'Wind Offshore': ('Wind_offshore_entsoe', 'Wind_offshore_forecast_entsoe'),
|
| 235 |
+
'Solar': ('Solar_entsoe', 'Solar_forecast_entsoe'),
|
| 236 |
+
}
|
| 237 |
|
| 238 |
+
skip_cols = []
|
| 239 |
+
|
| 240 |
+
for tech_key, (act_col, fct_col) in tech_cols.items():
|
| 241 |
+
# only proceed if the columns are in the DataFrame
|
| 242 |
+
if act_col in data_quality.columns and fct_col in data_quality.columns:
|
| 243 |
+
# get installed capacities for 2024 & 2025
|
| 244 |
+
cap4 = caps4.get(tech_key, np.nan) if isinstance(caps4, dict) else np.nan
|
| 245 |
+
cap5 = caps5.get(tech_key, np.nan) if isinstance(caps5, dict) else np.nan
|
| 246 |
+
|
| 247 |
+
# if both years are missing or zero capacity
|
| 248 |
+
if (pd.isna(cap4) or cap4 == 0) and (pd.isna(cap5) or cap5 == 0):
|
| 249 |
+
act = data_quality[act_col]
|
| 250 |
+
fct = data_quality[fct_col]
|
| 251 |
+
# check if actual AND forecast are entirely zero or NaN
|
| 252 |
+
only_zero_or_na = (act.fillna(0) == 0).all() and (fct.fillna(0) == 0).all()
|
| 253 |
+
if only_zero_or_na:
|
| 254 |
+
skip_cols += [act_col, fct_col]
|
| 255 |
+
|
| 256 |
+
# drop any columns flagged for skipping (ignore errors if somehow missing)
|
| 257 |
+
if skip_cols:
|
| 258 |
+
data_quality = data_quality.drop(columns=skip_cols, errors='ignore')
|
| 259 |
+
|
| 260 |
+
# Compute missing
|
| 261 |
+
missing_values = data_quality.isna().mean() * 100
|
| 262 |
missing_values = missing_values.round(2)
|
| 263 |
|
| 264 |
+
extreme_values = {}
|
| 265 |
+
capacity_mismatch = {}
|
| 266 |
+
neg_counts = {}
|
| 267 |
+
over_counts = {}
|
| 268 |
+
cutoff = pd.Timestamp('2025-01-01')
|
| 269 |
+
|
| 270 |
+
# Iterate over columns
|
| 271 |
+
for col in data_quality.columns:
|
| 272 |
+
# Identify technology
|
| 273 |
+
if 'Solar' in col:
|
| 274 |
+
tech_key = 'Solar'
|
| 275 |
+
elif 'Wind_onshore' in col:
|
| 276 |
+
tech_key = 'Wind Onshore'
|
| 277 |
+
elif 'Wind_offshore' in col:
|
| 278 |
+
tech_key = 'Wind Offshore'
|
| 279 |
+
elif 'Load' in col:
|
| 280 |
+
tech_key = 'Load'
|
| 281 |
+
else:
|
| 282 |
+
extreme_values[col] = np.nan
|
| 283 |
+
capacity_mismatch[col] = np.nan
|
| 284 |
+
continue
|
| 285 |
+
|
| 286 |
+
series = data_quality[col]
|
| 287 |
+
# Year masks
|
| 288 |
+
mask_2024 = series.index < cutoff
|
| 289 |
+
# Fetch capacity values
|
| 290 |
+
cap4 = caps4.get(tech_key, np.nan) if isinstance(caps4, dict) else np.nan
|
| 291 |
+
cap5 = caps5.get(tech_key, np.nan) if isinstance(caps5, dict) else np.nan
|
| 292 |
+
print('var:',col)
|
| 293 |
+
print('cap4:',cap4)
|
| 294 |
+
if tech_key == 'Load':
|
| 295 |
+
# Negative load
|
| 296 |
+
extreme_pct = round((series < 0).mean() * 100, 2)
|
| 297 |
+
mismatch = np.nan
|
| 298 |
+
else:
|
| 299 |
+
# Create per-timestamp capacity
|
| 300 |
+
cap_series = pd.Series(
|
| 301 |
+
np.where(mask_2024, cap4, cap5),
|
| 302 |
+
index=series.index
|
| 303 |
+
)
|
| 304 |
+
# Flags
|
| 305 |
+
neg = series < 0
|
| 306 |
+
over = (series > cap_series) & cap_series.notna()
|
| 307 |
+
nonsense = neg | over
|
| 308 |
+
extreme_pct = round(nonsense.mean() * 100, 2)
|
| 309 |
+
# Mismatch: non-zero gen when cap missing or zero
|
| 310 |
+
# cap4, cap5 are floats or NaN
|
| 311 |
+
no_cap_2024 = pd.isna(cap4) or (cap4 == 0)
|
| 312 |
+
no_cap_2025 = pd.isna(cap5) or (cap5 == 0)
|
| 313 |
+
|
| 314 |
+
# check if there's at least one actual non-zero (treat NaN as 0)
|
| 315 |
+
has_nonzero = (series.fillna(0) != 0).any()
|
| 316 |
+
|
| 317 |
+
if no_cap_2024 and no_cap_2025 and has_nonzero:
|
| 318 |
+
mismatch = 100.0
|
| 319 |
+
else:
|
| 320 |
+
mismatch = 0.0
|
| 321 |
|
| 322 |
+
extreme_values[col] = extreme_pct
|
| 323 |
+
capacity_mismatch[col] = mismatch
|
| 324 |
+
|
| 325 |
+
display_extreme = {col: f"{val:.2f}" if not pd.isna(val) else ''
|
| 326 |
+
for col, val in extreme_values.items()}
|
| 327 |
+
display_mismatch = {}
|
| 328 |
+
for col, val in capacity_mismatch.items():
|
| 329 |
+
if 'Load' in col:
|
| 330 |
+
display_mismatch[col] = '-'
|
| 331 |
+
else:
|
| 332 |
+
display_mismatch[col] = '🚩' if val == 100.0 else ''
|
| 333 |
|
| 334 |
+
# Build and render DataFrame
|
| 335 |
+
metrics_df = pd.DataFrame({
|
| 336 |
+
'Missing Values (%)': missing_values,
|
| 337 |
+
'Extreme/Nonsensical Values (%)': pd.Series(display_extreme),
|
| 338 |
+
'Capacity Mismatch Flag': pd.Series(display_mismatch)
|
| 339 |
+
})
|
| 340 |
|
| 341 |
+
st.dataframe(metrics_df.style.format({
|
| 342 |
+
'Missing Values (%)': '{:.2f}',
|
| 343 |
+
'Extreme/Nonsensical Values (%)': '{}'
|
| 344 |
+
}))
|
| 345 |
+
|
| 346 |
+
st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset',unsafe_allow_html=True)
|
| 347 |
+
st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: For Load, this is % of values below 0. For generation, it is negative or out-of-bound (> capacity).',unsafe_allow_html=True)
|
| 348 |
+
st.write('<b><u>Capacity Mismatch Flag</u></b>: Shows "🚩" if installed capacity is `NaN` or `0` but the dataset has non-zero generation. Blank otherwise. For Load columns, it is "-".',unsafe_allow_html=True)
|
| 349 |
|
| 350 |
elif section == 'Forecasts Quality':
|
| 351 |
+
|
| 352 |
st.header('Forecast Quality')
|
| 353 |
|
| 354 |
# Time series for last 1 week
|
| 355 |
last_week = data.loc[data.index >= (data.index[-1] - pd.Timedelta(days=7))]
|
| 356 |
st.write('The below plot shows the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform from the past week.')
|
| 357 |
+
variable_options, flagged_columns = filter_variable_options(last_week)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 358 |
# Dropdown to select the variable
|
| 359 |
selected_variable = st.selectbox("Select Variable for Line PLot", list(variable_options.keys()))
|
|
|
|
|
|
|
| 360 |
actual_col, forecast_col = variable_options[selected_variable]
|
| 361 |
|
| 362 |
+
x_vals = last_week.index.to_pydatetime().tolist()
|
| 363 |
+
y_actual = last_week[actual_col].tolist()
|
| 364 |
+
y_forecast = last_week[forecast_col].tolist()
|
| 365 |
+
|
| 366 |
+
# then plot
|
| 367 |
fig = go.Figure()
|
| 368 |
+
fig.add_trace(go.Scatter(x=x_vals,y=y_actual,mode="lines",name="Actual"))
|
| 369 |
+
fig.add_trace(go.Scatter(x=x_vals,y=y_forecast,mode="lines",name="Forecast ENTSO-E"))
|
| 370 |
+
fig.update_layout(title=f"Forecasts vs Actual for {selected_variable}",xaxis_title="Date",yaxis_title="Value [MW]")
|
|
|
|
| 371 |
st.plotly_chart(fig)
|
| 372 |
|
| 373 |
+
|
| 374 |
# Scatter plots for error distribution
|
| 375 |
st.subheader('Error Distribution')
|
| 376 |
st.write('The below scatter plots show the error distribution of all fields: Solar, Wind and Load.')
|
|
|
|
| 379 |
# Get the corresponding columns for the selected variable
|
| 380 |
actual_col, forecast_col = variable_options[selected_variable]
|
| 381 |
|
| 382 |
+
if forecast_col in data.columns:
|
| 383 |
+
# grab the two series, drop any NaNs, and align on their common timestamps
|
| 384 |
+
obs = data[actual_col].dropna()
|
| 385 |
+
pred = data[forecast_col].dropna()
|
| 386 |
+
idx = obs.index.intersection(pred.index)
|
| 387 |
+
obs = obs.loc[idx]
|
| 388 |
+
pred = pred.loc[idx]
|
| 389 |
+
|
| 390 |
+
# convert to pure Python lists
|
| 391 |
+
x_vals = obs.tolist()
|
| 392 |
+
y_vals = pred.tolist()
|
| 393 |
+
|
| 394 |
+
fig = go.Figure()
|
| 395 |
+
fig.add_trace(go.Scatter(x=x_vals,y=y_vals,mode='markers',name=f'{selected_variable}'))
|
| 396 |
+
fig.update_layout(title=f'Error Distribution for {selected_variable}',xaxis_title='Observed [MW]',yaxis_title='Forecast ENTSO-E [MW]')
|
| 397 |
+
|
| 398 |
st.plotly_chart(fig)
|
| 399 |
+
|
| 400 |
st.subheader('Accuracy Metrics (Sorted by rMAE):')
|
| 401 |
|
| 402 |
date_range = st.date_input(
|
|
|
|
| 410 |
else:
|
| 411 |
st.error("Please select a valid date range.")
|
| 412 |
st.stop()
|
| 413 |
+
output_text = f"The below metrics are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. On the right is a radar plot with the rMAE."
|
|
|
|
|
|
|
| 414 |
st.write(output_text)
|
| 415 |
|
| 416 |
+
data_metrics = data.loc[start_date:end_date]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
|
| 418 |
+
accuracy_metrics = pd.DataFrame(columns=['MAE', 'RMSE' ,'rMAE'], index=list(variable_options.keys()))
|
| 419 |
|
| 420 |
+
for variable in variable_options.keys():
|
| 421 |
+
actual_col, forecast_col = variable_options[variable]
|
| 422 |
+
obs = data_metrics[actual_col]
|
| 423 |
+
pred = data_metrics[forecast_col]
|
| 424 |
+
error = pred - obs
|
| 425 |
+
|
| 426 |
+
mae = round(np.mean(np.abs(error)),2)
|
| 427 |
+
if 'Load' in actual_col:
|
| 428 |
+
persistence = obs.shift(168) # Weekly persistence
|
| 429 |
+
else:
|
| 430 |
+
persistence = obs.shift(24) # Daily persistence
|
| 431 |
+
|
| 432 |
+
# Using the whole year's data for rMAE calculations
|
| 433 |
+
rmae = round(mae / np.mean(np.abs(obs - persistence)),2)
|
| 434 |
+
rmse = round(np.sqrt(np.mean((error)**2)), 2)
|
| 435 |
+
row_label = variable #'Load' if 'Load' in actual_col else 'Solar' if 'Solar' in actual_col else 'Wind Offshore' if 'Wind_offshore' in actual_col else 'Wind Onshore'
|
| 436 |
+
accuracy_metrics.loc[row_label] = [mae, rmse, rmae]
|
|
|
|
|
|
|
| 437 |
|
| 438 |
accuracy_metrics.dropna(how='all', inplace=True)# Sort by rMAE (second column)
|
| 439 |
+
accuracy_metrics.sort_values(by=accuracy_metrics.columns[-1], ascending=True, inplace=True)
|
| 440 |
accuracy_metrics = accuracy_metrics.round(4)
|
| 441 |
|
| 442 |
+
col1, col2 = st.columns([1, 1])
|
| 443 |
|
| 444 |
with col1:
|
| 445 |
+
# (optional) some top-margin before the table
|
| 446 |
st.markdown(
|
| 447 |
"""
|
| 448 |
<style>
|
| 449 |
+
.small-chart-container {
|
| 450 |
+
margin-top: 0px;
|
| 451 |
}
|
| 452 |
</style>
|
| 453 |
""",
|
| 454 |
unsafe_allow_html=True
|
| 455 |
)
|
| 456 |
st.dataframe(accuracy_metrics)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 457 |
|
| 458 |
with col2:
|
| 459 |
+
# prepare the data
|
|
|
|
| 460 |
rmae_values = accuracy_metrics['rMAE'].tolist()
|
| 461 |
+
categories = accuracy_metrics.index.tolist()
|
| 462 |
+
|
| 463 |
+
# build the radar
|
| 464 |
+
fig = go.Figure(
|
| 465 |
+
go.Scatterpolar(
|
| 466 |
+
r=rmae_values,
|
| 467 |
+
theta=categories,
|
| 468 |
+
fill='toself',
|
| 469 |
+
name='rMAE'
|
| 470 |
+
)
|
| 471 |
+
)
|
| 472 |
+
|
| 473 |
+
# 👉 shrink the total size, and give extra left/right margin for your labels
|
| 474 |
fig.update_layout(
|
| 475 |
+
width=300, # make the whole plot a bit smaller
|
| 476 |
+
height=300,
|
| 477 |
+
margin=dict(
|
| 478 |
+
l=50, # more space on the left for long category names
|
| 479 |
+
r=60, # and on the right, if needed
|
| 480 |
+
t=20,
|
| 481 |
+
b=20
|
| 482 |
+
),
|
| 483 |
polar=dict(
|
| 484 |
+
angularaxis=dict(
|
| 485 |
+
tickfont=dict(size=11) # if you want slightly smaller ticks
|
| 486 |
+
),
|
| 487 |
radialaxis=dict(
|
| 488 |
visible=True,
|
| 489 |
+
range=[0, max(rmae_values)*1.2]
|
| 490 |
+
)
|
| 491 |
+
),
|
| 492 |
showlegend=False
|
| 493 |
)
|
| 494 |
+
|
| 495 |
+
# wrap in a div so you can still control vertical spacing via CSS
|
| 496 |
+
st.markdown('<div class="small-chart-container">', unsafe_allow_html=True)
|
| 497 |
+
st.plotly_chart(fig, use_container_width=False)
|
| 498 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
| 499 |
|
| 500 |
st.subheader('ACF plots of Errors')
|
| 501 |
st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three data fields obtained from ENTSO-E: Solar, Wind and Load.')
|
|
|
|
| 519 |
|
| 520 |
# Optionally calculate and store ACF values for further analysis if needed
|
| 521 |
acf_values = acf(error.dropna(), nlags=240)
|
| 522 |
+
|
| 523 |
elif section == 'Insights':
|
| 524 |
st.header("Insights")
|
| 525 |
|
|
|
|
| 531 |
|
| 532 |
# Resample data based on the selected resolution
|
| 533 |
if resolution == 'Hourly':
|
| 534 |
+
resampled_data = data
|
| 535 |
elif resolution == 'Daily':
|
| 536 |
+
resampled_data = data.resample('D').mean() # Resample to daily mean
|
| 537 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 538 |
|
| 539 |
+
resampled_data.columns = [col.replace('_entsoe', '').replace('_', ' ') for col in resampled_data.columns]
|
|
|
|
| 540 |
|
| 541 |
# Drop missing values
|
| 542 |
+
selected_df = resampled_data.dropna()
|
| 543 |
|
| 544 |
# Create the scatter plots using seaborn's pairplot
|
| 545 |
sns.set_theme(style="ticks")
|
|
|
|
| 550 |
|
| 551 |
elif selected_country == 'Overall':
|
| 552 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 553 |
def calculate_net_load_error(df, country_code):
|
| 554 |
+
#filter_df = df.dropna()
|
| 555 |
+
filter_df = df.dropna(axis=1, how='all')
|
| 556 |
+
filter_df = filter_df.dropna()
|
| 557 |
+
|
| 558 |
+
if filter_df.empty:
|
| 559 |
+
# Return something (e.g., None) if there's no data left
|
| 560 |
+
print(country_code)
|
| 561 |
+
return None, None
|
| 562 |
+
net_load = filter_df['Load_entsoe'].copy()
|
| 563 |
+
for col in ['Wind_onshore_entsoe', 'Solar_entsoe', 'Wind_offshore_entsoe']:
|
| 564 |
+
if col in filter_df.columns:
|
| 565 |
+
net_load -= filter_df[col]
|
| 566 |
+
|
| 567 |
+
net_load_forecast = filter_df['Load_forecast_entsoe'].copy()
|
| 568 |
+
for col in ['Wind_onshore_forecast_entsoe', 'Solar_forecast_entsoe', 'Wind_offshore_forecast_entsoe']:
|
| 569 |
+
if col in filter_df.columns:
|
| 570 |
+
net_load_forecast -= filter_df[col]
|
| 571 |
# Calculate the error based on the latest values
|
| 572 |
error = (net_load_forecast - net_load).iloc[-1]
|
| 573 |
date = filter_df.index[-1].strftime("%Y-%m-%d %H:%M") # Get the latest date in string format
|
|
|
|
| 575 |
return error, date
|
| 576 |
|
| 577 |
def plot_net_load_error_map(data_dict):
|
| 578 |
+
# 1) compute your errors as before
|
| 579 |
+
missing_zones={'ME','IE_SEM','MK','CY','BA','AL','XK'}
|
| 580 |
+
net_load_errors = {
|
| 581 |
+
country_code: calculate_net_load_error(data, country_code)
|
| 582 |
+
for country_code, data in data_dict.items()
|
| 583 |
+
}
|
| 584 |
df_net_load_error = pd.DataFrame({
|
| 585 |
+
"zoneName": list(net_load_errors),
|
| 586 |
+
"net_load_error": [v[0] for v in net_load_errors.values()],
|
| 587 |
+
"date": [v[1] for v in net_load_errors.values()],
|
| 588 |
})
|
| 589 |
|
| 590 |
+
# 2) split your zones into standard vs. fallback
|
| 591 |
+
selected = list(data_dict.keys())
|
| 592 |
+
standard_zones = [z for z in selected if z not in missing_zones]
|
| 593 |
+
fallback_zones = [z for z in selected if z in missing_zones]
|
| 594 |
+
|
| 595 |
+
# 3a) load the standard ones with entsoe.load_zones
|
| 596 |
date = pd.Timestamp.now()
|
| 597 |
+
geo_std = load_zones(standard_zones, date).reset_index()
|
| 598 |
|
| 599 |
+
# 3b) manually load the fallback ones
|
| 600 |
+
gdfs = []
|
| 601 |
+
for z in fallback_zones:
|
| 602 |
+
fn = f"{z}.geojson"
|
| 603 |
+
path = f'./geojson_missing/{fn}'
|
| 604 |
+
g = gpd.read_file(path)
|
| 605 |
+
g['zoneName'] = z
|
| 606 |
+
gdfs.append(g)
|
| 607 |
|
|
|
|
|
|
|
|
|
|
| 608 |
|
| 609 |
+
geo_fb = pd.concat(gdfs, ignore_index=True) if gdfs else gpd.GeoDataFrame()
|
| 610 |
+
|
| 611 |
+
# 4) combine
|
| 612 |
+
geo_data = pd.concat([geo_std, geo_fb], ignore_index=True)
|
| 613 |
# Merge net_load_error and date into geo_data
|
| 614 |
geo_data = geo_data.merge(df_net_load_error, on='zoneName', how='left')
|
| 615 |
|
|
|
|
| 646 |
geo_data,
|
| 647 |
style_function=style_function,
|
| 648 |
tooltip=folium.GeoJsonTooltip(
|
| 649 |
+
fields=["zoneName", "net_load_error", "date"],
|
| 650 |
aliases=["Country:", "Net Load Error [MW]:", "Date:"],
|
| 651 |
localize=True
|
| 652 |
)
|
|
|
|
| 656 |
colormap.add_to(m)
|
| 657 |
|
| 658 |
# Display the map
|
| 659 |
+
_=st_folium(m, width=700, height=600)
|
| 660 |
|
| 661 |
def calculate_mae(actual, forecast):
|
| 662 |
return np.mean(np.abs(actual - forecast))
|
|
|
|
| 664 |
def calculate_persistence_mae(data, shift_hours):
|
| 665 |
return np.mean(np.abs(data - data.shift(shift_hours)))
|
| 666 |
|
| 667 |
+
def calculate_rmae_for_country(df, variable_options):
|
| 668 |
rmae = {}
|
| 669 |
rmae['Load'] = calculate_mae(df['Load_entsoe'], df['Load_forecast_entsoe']) / calculate_persistence_mae(df['Load_entsoe'], 168)
|
| 670 |
+
|
| 671 |
+
for variable in variable_options.keys():
|
| 672 |
+
actual_col, forecast_col = variable_options[variable]
|
| 673 |
+
rmae[variable] = calculate_mae(df[actual_col], df[forecast_col]) / calculate_persistence_mae(df[actual_col], 24)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 674 |
|
| 675 |
+
all_opt = ["Load", "Solar", "Wind Onshore", "Wind Offshore"]
|
| 676 |
+
not_in_list2 = [elem for elem in all_opt if elem not in variable_options.keys()]
|
| 677 |
+
|
| 678 |
+
for ele in not_in_list2:
|
| 679 |
+
rmae[ele] = None
|
| 680 |
+
|
| 681 |
return rmae
|
| 682 |
|
| 683 |
def create_rmae_dataframe(data_dict):
|
| 684 |
|
| 685 |
+
rmae_values = {'Country': [], 'Load': [], 'Wind Onshore': [], 'Wind Offshore': [], 'Solar': []}
|
| 686 |
|
| 687 |
for country_name, df in data_dict.items():
|
| 688 |
+
df_filtered = df.dropna()
|
| 689 |
+
print(country_name)
|
| 690 |
+
variable_options, flagged_columns = filter_variable_options(df_filtered)
|
| 691 |
+
rmae = calculate_rmae_for_country(df_filtered, variable_options)
|
| 692 |
|
| 693 |
rmae_values['Country'].append(country_name)
|
|
|
|
|
|
|
|
|
|
| 694 |
|
| 695 |
+
for var, met in rmae.items():
|
| 696 |
+
rmae_values[var].append(met)
|
|
|
|
|
|
|
|
|
|
| 697 |
|
| 698 |
return pd.DataFrame(rmae_values)
|
| 699 |
|
|
|
|
| 701 |
fig = go.Figure()
|
| 702 |
|
| 703 |
# Dynamically adjust angles to exclude Wind_offshore if all values are NaN
|
| 704 |
+
angles = ['Load']
|
| 705 |
+
if not rmae_df['Wind Offshore'].isna().all(): # Only include Wind_offshore if it's not NaN for all countries
|
| 706 |
+
angles.append('Wind Offshore')
|
| 707 |
+
if not rmae_df['Wind Onshore'].isna().all(): # Only include Wind_offshore if it's not NaN for all countries
|
| 708 |
+
angles.append('Wind Onshore')
|
| 709 |
+
if not rmae_df['Solar'].isna().all(): # Only include Wind_offshore if it's not NaN for all countries
|
| 710 |
+
angles.append('Solar')
|
| 711 |
+
|
| 712 |
for _, row in rmae_df.iterrows():
|
| 713 |
fig.add_trace(go.Scatterpolar(
|
| 714 |
r=[row[angle] for angle in angles],
|
|
|
|
| 748 |
|
| 749 |
# Plot radar chart for the selected countries
|
| 750 |
plot_rmae_radar_chart(filtered_rmae_df)
|
|
|
|
|
|
|
|
|
|
|
|