Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,8 +13,16 @@ from statsmodels.graphics.tsaplots import plot_acf
|
|
| 13 |
import matplotlib.pyplot as plt
|
| 14 |
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
##GET ALL FILES FROM GITHUB
|
| 17 |
-
|
|
|
|
| 18 |
url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}'
|
| 19 |
headers = {'Authorization': f'token {github_token}'}
|
| 20 |
|
|
@@ -31,12 +39,13 @@ def load_GitHub(github_token, file_name):
|
|
| 31 |
else:
|
| 32 |
print(f"Failed to download {file_name}. Status code: {response.status_code}")
|
| 33 |
return None
|
| 34 |
-
|
| 35 |
-
|
|
|
|
| 36 |
predictions_dict = {}
|
| 37 |
for hour in range(24):
|
| 38 |
file_name = f'Predictions_{hour}h.csv'
|
| 39 |
-
df = load_GitHub(github_token, file_name)
|
| 40 |
if df is not None:
|
| 41 |
predictions_dict[file_name] = df
|
| 42 |
return predictions_dict
|
|
@@ -75,10 +84,12 @@ def simplify_model_names_in_index(df):
|
|
| 75 |
|
| 76 |
return df
|
| 77 |
|
|
|
|
|
|
|
| 78 |
github_token = st.secrets["GitHub_Token_KUL_Margarida"]
|
| 79 |
|
| 80 |
if github_token:
|
| 81 |
-
forecast_dict = load_forecast(github_token)
|
| 82 |
|
| 83 |
historical_forecast=load_GitHub(github_token, 'Historical_forecast.csv')
|
| 84 |
|
|
@@ -140,12 +151,11 @@ upper_space.markdown("""
|
|
| 140 |
""", unsafe_allow_html=True)
|
| 141 |
|
| 142 |
|
| 143 |
-
|
| 144 |
countries = {
|
| 145 |
-
'Belgium': 'BE',
|
| 146 |
'Netherlands': 'NL',
|
| 147 |
'Germany': 'DE',
|
| 148 |
'France': 'FR',
|
|
|
|
| 149 |
}
|
| 150 |
|
| 151 |
|
|
@@ -231,9 +241,12 @@ if section == 'Data':
|
|
| 231 |
st.header('Data Quality')
|
| 232 |
|
| 233 |
st.write('The table below presents the data quality metrics for various energy-related datasets, focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.')
|
| 234 |
-
|
|
|
|
|
|
|
|
|
|
| 235 |
# Report % of missing values
|
| 236 |
-
missing_values =
|
| 237 |
missing_values = missing_values.round(2)
|
| 238 |
|
| 239 |
installed_capacities = {
|
|
@@ -254,21 +267,21 @@ if section == 'Data':
|
|
| 254 |
|
| 255 |
for col in forecast_columns:
|
| 256 |
if 'Solar_entsoe' in col:
|
| 257 |
-
extreme_values[col] = ((
|
| 258 |
elif 'Solar_forecast_entsoe' in col:
|
| 259 |
-
extreme_values[col] = ((
|
| 260 |
elif 'Wind_onshore_entsoe' in col:
|
| 261 |
-
extreme_values[col] = ((
|
| 262 |
elif 'Wind_onshore_forecast_entsoe' in col:
|
| 263 |
-
extreme_values[col] = ((
|
| 264 |
elif 'Wind_offshore_entsoe' in col:
|
| 265 |
-
extreme_values[col] = ((
|
| 266 |
elif 'Wind_offshore_forecast_entsoe' in col:
|
| 267 |
-
extreme_values[col] = ((
|
| 268 |
elif 'Load_entsoe' in col:
|
| 269 |
-
extreme_values[col] = ((
|
| 270 |
elif 'Load_forecast_entsoe' in col:
|
| 271 |
-
extreme_values[col] = ((
|
| 272 |
|
| 273 |
|
| 274 |
extreme_values = pd.Series(extreme_values).round(2)
|
|
@@ -300,29 +313,34 @@ elif section == 'Forecasts':
|
|
| 300 |
|
| 301 |
# Time series for last 1 week
|
| 302 |
st.subheader('Time Series: Last 1 Week')
|
| 303 |
-
last_week =
|
| 304 |
st.write('The below plots show the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform between the selected data range.')
|
| 305 |
-
|
| 306 |
-
'Load_entsoe','Load_forecast_entsoe', 'Load_LightGBMModel.7D.TimeCov.Temp.Forecast_elia', 'Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_onshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Wind_offshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Solar_entsoe','Solar_forecast_entsoe', 'Solar_LightGBMModel.1D.TimeCov.Temp.Forecast_elia']
|
| 307 |
forecast_columns = [
|
| 308 |
'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
|
|
|
|
| 309 |
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
|
|
|
| 316 |
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
|
|
|
|
|
|
|
|
|
| 321 |
|
| 322 |
-
for i in range(0, len(
|
| 323 |
-
actual_col =
|
| 324 |
-
forecast_col =
|
| 325 |
-
|
|
|
|
| 326 |
|
| 327 |
|
| 328 |
if forecast_col in data.columns:
|
|
@@ -332,7 +350,7 @@ elif section == 'Forecasts':
|
|
| 332 |
|
| 333 |
if country_code=='BE':
|
| 334 |
conformal=conformal_predictions(df_combined, actual_col, my_forecast)
|
| 335 |
-
last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=
|
| 336 |
if actual_col =='Load_entsoe':
|
| 337 |
last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=5))]
|
| 338 |
fig.add_trace(go.Scatter(x=last_week_best_forecast.index, y=last_week_best_forecast[my_forecast], mode='lines', name='Forecast EDS'))
|
|
@@ -645,12 +663,13 @@ elif section == 'Forecasts':
|
|
| 645 |
# Scatter plots for error distribution
|
| 646 |
st.subheader('Error Distribution')
|
| 647 |
st.write('The below scatter plots show the error distribution of all three fields: Solar, Wind and Load between the selected date range')
|
|
|
|
| 648 |
for i in range(0, len(forecast_columns), 2):
|
| 649 |
actual_col = forecast_columns[i]
|
| 650 |
forecast_col = forecast_columns[i + 1]
|
| 651 |
-
if forecast_col in
|
| 652 |
-
obs =
|
| 653 |
-
pred =
|
| 654 |
error = pred - obs
|
| 655 |
|
| 656 |
fig = px.scatter(x=obs, y=pred, labels={'x': 'Observed [MW]', 'y': 'Predicted by ENTSO-E [MW]'})
|
|
@@ -905,5 +924,4 @@ elif section == 'Insights':
|
|
| 905 |
fig.update_layout(title=f'{weather_col} vs {actual_col}')
|
| 906 |
st.plotly_chart(fig)
|
| 907 |
|
| 908 |
-
|
| 909 |
|
|
|
|
| 13 |
import matplotlib.pyplot as plt
|
| 14 |
|
| 15 |
|
| 16 |
+
def get_current_time():
|
| 17 |
+
now = datetime.now()
|
| 18 |
+
current_hour = now.hour
|
| 19 |
+
current_minute = now.minute
|
| 20 |
+
# Return the hour and a boolean indicating if it is after the 10th minute
|
| 21 |
+
return current_hour, current_minute >= 10
|
| 22 |
+
|
| 23 |
##GET ALL FILES FROM GITHUB
|
| 24 |
+
@st.cache_data(show_spinner=False)
|
| 25 |
+
def load_GitHub(github_token, file_name, hour, after_10_min):
|
| 26 |
url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}'
|
| 27 |
headers = {'Authorization': f'token {github_token}'}
|
| 28 |
|
|
|
|
| 39 |
else:
|
| 40 |
print(f"Failed to download {file_name}. Status code: {response.status_code}")
|
| 41 |
return None
|
| 42 |
+
|
| 43 |
+
@st.cache_data(show_spinner=False)
|
| 44 |
+
def load_forecast(github_token, hour, after_10_min):
|
| 45 |
predictions_dict = {}
|
| 46 |
for hour in range(24):
|
| 47 |
file_name = f'Predictions_{hour}h.csv'
|
| 48 |
+
df = load_GitHub(github_token, file_name, hour, after_10_min)
|
| 49 |
if df is not None:
|
| 50 |
predictions_dict[file_name] = df
|
| 51 |
return predictions_dict
|
|
|
|
| 84 |
|
| 85 |
return df
|
| 86 |
|
| 87 |
+
current_hour, after_10_min = get_current_time()
|
| 88 |
+
|
| 89 |
github_token = st.secrets["GitHub_Token_KUL_Margarida"]
|
| 90 |
|
| 91 |
if github_token:
|
| 92 |
+
forecast_dict = load_forecast(github_token, current_hour, after_10_min)
|
| 93 |
|
| 94 |
historical_forecast=load_GitHub(github_token, 'Historical_forecast.csv')
|
| 95 |
|
|
|
|
| 151 |
""", unsafe_allow_html=True)
|
| 152 |
|
| 153 |
|
|
|
|
| 154 |
countries = {
|
|
|
|
| 155 |
'Netherlands': 'NL',
|
| 156 |
'Germany': 'DE',
|
| 157 |
'France': 'FR',
|
| 158 |
+
'Belgium': 'BE',
|
| 159 |
}
|
| 160 |
|
| 161 |
|
|
|
|
| 241 |
st.header('Data Quality')
|
| 242 |
|
| 243 |
st.write('The table below presents the data quality metrics for various energy-related datasets, focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.')
|
| 244 |
+
data_quality=data.iloc[:-28]
|
| 245 |
+
if country_code=='BE':
|
| 246 |
+
data_quality=data.iloc[:-5*24]
|
| 247 |
+
print(data_quality.tail(48))
|
| 248 |
# Report % of missing values
|
| 249 |
+
missing_values = data_quality[forecast_columns].isna().mean() * 100
|
| 250 |
missing_values = missing_values.round(2)
|
| 251 |
|
| 252 |
installed_capacities = {
|
|
|
|
| 267 |
|
| 268 |
for col in forecast_columns:
|
| 269 |
if 'Solar_entsoe' in col:
|
| 270 |
+
extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100
|
| 271 |
elif 'Solar_forecast_entsoe' in col:
|
| 272 |
+
extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100
|
| 273 |
elif 'Wind_onshore_entsoe' in col:
|
| 274 |
+
extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100
|
| 275 |
elif 'Wind_onshore_forecast_entsoe' in col:
|
| 276 |
+
extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100
|
| 277 |
elif 'Wind_offshore_entsoe' in col:
|
| 278 |
+
extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100
|
| 279 |
elif 'Wind_offshore_forecast_entsoe' in col:
|
| 280 |
+
extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100
|
| 281 |
elif 'Load_entsoe' in col:
|
| 282 |
+
extreme_values[col] = ((data_quality[col] < 0)).mean() * 100
|
| 283 |
elif 'Load_forecast_entsoe' in col:
|
| 284 |
+
extreme_values[col] = ((data_quality[col] < 0)).mean() * 100
|
| 285 |
|
| 286 |
|
| 287 |
extreme_values = pd.Series(extreme_values).round(2)
|
|
|
|
| 313 |
|
| 314 |
# Time series for last 1 week
|
| 315 |
st.subheader('Time Series: Last 1 Week')
|
| 316 |
+
last_week = data.loc[data.index >= (data.index[-1] - pd.Timedelta(days=7))]
|
| 317 |
st.write('The below plots show the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform between the selected data range.')
|
| 318 |
+
|
|
|
|
| 319 |
forecast_columns = [
|
| 320 |
'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
|
| 321 |
+
num_per_var=2
|
| 322 |
|
| 323 |
+
if country_code=='BE':
|
| 324 |
+
operation_forecast_load=forecast_dict['Predictions_10h.csv'].filter(like='Load_', axis=1)
|
| 325 |
+
operation_forecast_res=forecast_dict['Predictions_17h.csv'].filter(regex='^(?!Load_)')
|
| 326 |
+
operation_forecast_load.columns = [col.replace('_entsoe.', '_').replace('Naive.7D', 'WeeklyNaiveSeasonal') for col in operation_forecast_load.columns]
|
| 327 |
+
operation_forecast_res.columns = [col.replace('_entsoe.', '_').replace('Naive.1D', 'DailyNaiveSeasonal') for col in operation_forecast_res.columns]
|
| 328 |
+
Historical_and_Load=add_feature(operation_forecast_load, historical_forecast)
|
| 329 |
+
Historical_and_operational=add_feature(operation_forecast_res, Historical_and_Load)
|
| 330 |
|
| 331 |
+
best_forecast = Historical_and_operational.filter(like='Forecast_elia', axis=1)
|
| 332 |
+
df_combined = Historical_and_operational.join(Data_BE, how='inner')
|
| 333 |
+
last_week_best_forecast = best_forecast.loc[best_forecast.index >= (best_forecast.index[-24] - pd.Timedelta(days=7))]
|
| 334 |
+
num_per_var=3
|
| 335 |
+
forecast_columns_line=['Load_entsoe','Load_forecast_entsoe', 'Load_LightGBMModel.7D.TimeCov.Temp.Forecast_elia', 'Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_onshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Wind_offshore_LightGBMModel.1D.TimeCov.Temp.Forecast_elia','Solar_entsoe','Solar_forecast_entsoe', 'Solar_LightGBMModel.1D.TimeCov.Temp.Forecast_elia']
|
| 336 |
+
else:
|
| 337 |
+
forecast_columns_line=forecast_columns
|
| 338 |
|
| 339 |
+
for i in range(0, len(forecast_columns_line), num_per_var):
|
| 340 |
+
actual_col = forecast_columns_line[i]
|
| 341 |
+
forecast_col = forecast_columns_line[i + 1]
|
| 342 |
+
if country_code=='BE':
|
| 343 |
+
my_forecast = forecast_columns_line[i + 2]
|
| 344 |
|
| 345 |
|
| 346 |
if forecast_col in data.columns:
|
|
|
|
| 350 |
|
| 351 |
if country_code=='BE':
|
| 352 |
conformal=conformal_predictions(df_combined, actual_col, my_forecast)
|
| 353 |
+
last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=7))]
|
| 354 |
if actual_col =='Load_entsoe':
|
| 355 |
last_week_conformal = conformal.loc[conformal.index >= (conformal.index[-24] - pd.Timedelta(days=5))]
|
| 356 |
fig.add_trace(go.Scatter(x=last_week_best_forecast.index, y=last_week_best_forecast[my_forecast], mode='lines', name='Forecast EDS'))
|
|
|
|
| 663 |
# Scatter plots for error distribution
|
| 664 |
st.subheader('Error Distribution')
|
| 665 |
st.write('The below scatter plots show the error distribution of all three fields: Solar, Wind and Load between the selected date range')
|
| 666 |
+
data_2024 = data[data.index.year > 2023]
|
| 667 |
for i in range(0, len(forecast_columns), 2):
|
| 668 |
actual_col = forecast_columns[i]
|
| 669 |
forecast_col = forecast_columns[i + 1]
|
| 670 |
+
if forecast_col in data_2024.columns:
|
| 671 |
+
obs = data_2024[actual_col]
|
| 672 |
+
pred = data_2024[forecast_col]
|
| 673 |
error = pred - obs
|
| 674 |
|
| 675 |
fig = px.scatter(x=obs, y=pred, labels={'x': 'Observed [MW]', 'y': 'Predicted by ENTSO-E [MW]'})
|
|
|
|
| 924 |
fig.update_layout(title=f'{weather_col} vs {actual_col}')
|
| 925 |
st.plotly_chart(fig)
|
| 926 |
|
|
|
|
| 927 |
|