Spaces:

EDS-lab
/

Transparency_Plus

Sleeping

App Files Files Community

mmmapms commited on May 19

Commit

653e93b

verified ·

1 Parent(s): 330cb0a

Update app.py

Browse files

Files changed (1) hide show

app.py +430 -421

app.py CHANGED Viewed

@@ -2,35 +2,120 @@ import requests
 import pandas as pd
 from io import StringIO
 import streamlit as st
-import os
 import plotly.express as px
 import plotly.graph_objects as go
-import plotly.colors as pc
 import numpy as np
-from sklearn.metrics import mean_squared_error
 from statsmodels.tsa.stattools import acf
 from statsmodels.graphics.tsaplots import plot_acf
 import matplotlib.pyplot as plt
-from datetime import datetime
 import folium
-import seaborn as sns
 from streamlit_folium import st_folium
-from datetime import datetime, timedelta
 from entsoe.geo import load_zones
-from branca.colormap import LinearColormap
 import branca
-def get_current_time():
-    now = datetime.now()
-    current_hour = now.hour
-    current_minute = now.minute
-    # Return the hour and a boolean indicating if it is after the 10th minute
-    return current_hour, current_minute >= 10
-##GET ALL FILES FROM GITHUB
-@st.cache_data(show_spinner=False)
-def load_GitHub(github_token, file_name, hour, after_10_min):
     url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}'
     headers = {'Authorization': f'token {github_token}'}
@@ -42,102 +127,52 @@ def load_GitHub(github_token, file_name, hour, after_10_min):
         if 'Date' in df.columns:
             df['Date'] = pd.to_datetime(df['Date'])  # Convert 'Date' column to datetime
             df.set_index('Date', inplace=True)  # Set 'Date' column as the index
-            #df.to_csv(file_name)
-        return df
     else:
         print(f"Failed to download {file_name}. Status code: {response.status_code}")
         return None
-@st.cache_data(show_spinner=False)
-def load_forecast(github_token, hour, after_10_min):
-    predictions_dict = {}
-    for hour in range(24):
-        file_name = f'Predictions_{hour}h.csv'
-        df = load_GitHub(github_token, file_name, hour, after_10_min)
-        if df is not None:
-            predictions_dict[file_name] = df
-    return predictions_dict
-def convert_European_time(data, time_zone):
-    data.index = pd.to_datetime(data.index, utc=True)
-    data.index = data.index.tz_convert(time_zone)
-    data.index = data.index.tz_localize(None)
-    return data
-def simplify_model_names(df):
-    # Define the mapping of complex names to simpler ones
-    replacements = {
-        r'\.LightGBMModel\.\dD\.TimeCov\.Temp\.Forecast_elia': '.LightGBM_with_Forecast_elia',
-        r'\.LightGBMModel\.\dD\.TimeCov\.Temp': '.LightGBM',
-        r'\.Naive\.\dD': '.Naive',
     }
-    # Apply the replacements
-    for original, simplified in replacements.items():
-        df.columns = df.columns.str.replace(original, simplified, regex=True)
-    return df
-def simplify_model_names_in_index(df):
-    # Define the mapping of complex names to simpler ones
-    replacements = {
-        r'\.LightGBMModel\.\dD\.TimeCov\.Temp\.Forecast_elia': '.LightGBM_with_Forecast_elia',
-        r'\.LightGBMModel\.\dD\.TimeCov\.Temp': '.LightGBM',
-        r'\.Naive\.\dD': '.Naive',
-    }
-    # Apply the replacements to the DataFrame index
-    for original, simplified in replacements.items():
-        df.index = df.index.str.replace(original, simplified, regex=True)
-    return df
 github_token = st.secrets["GitHub_Token_KUL_Margarida"]
 if github_token:
-    hour, after_10_min=get_current_time()
-    forecast_dict = load_forecast(github_token, hour, after_10_min)
-    historical_forecast=load_GitHub(github_token, 'Historical_forecast.csv', hour, after_10_min)
-    Data_BE=load_GitHub(github_token, 'BE_Elia_Entsoe_UTC.csv', hour, after_10_min)
-    Data_FR=load_GitHub(github_token, 'FR_Entsoe_UTC.csv', hour, after_10_min)
-    Data_NL=load_GitHub(github_token, 'NL_Entsoe_UTC.csv', hour, after_10_min)
-    Data_DE=load_GitHub(github_token, 'DE_Entsoe_UTC.csv', hour, after_10_min)
-    Data_PT=load_GitHub(github_token, 'PT_Entsoe_UTC.csv', hour, after_10_min)
-    Data_ES=load_GitHub(github_token, 'ES_Entsoe_UTC.csv', hour, after_10_min)
-    Data_AT=load_GitHub(github_token, 'AT_Entsoe_UTC.csv', hour, after_10_min)
-    Data_IT_CALA=load_GitHub(github_token, 'IT_CALA_Entsoe_UTC.csv', hour, after_10_min)
-    Data_IT_CNOR=load_GitHub(github_token, 'IT_CNOR_Entsoe_UTC.csv', hour, after_10_min)
-    Data_IT_CSUD=load_GitHub(github_token, 'IT_CSUD_Entsoe_UTC.csv', hour, after_10_min)
-    Data_IT_NORD=load_GitHub(github_token, 'IT_NORD_Entsoe_UTC.csv', hour, after_10_min)
-    Data_IT_SICI=load_GitHub(github_token, 'IT_SICI_Entsoe_UTC.csv', hour, after_10_min)
-    Data_IT_SUD=load_GitHub(github_token, 'IT_SUD_Entsoe_UTC.csv', hour, after_10_min)
-    Data_DK_1=load_GitHub(github_token, 'DK_1_Entsoe_UTC.csv', hour, after_10_min)
-    Data_DK_2=load_GitHub(github_token, 'DK_2_Entsoe_UTC.csv', hour, after_10_min)
-    Data_BE=convert_European_time(Data_BE, 'Europe/Brussels')
-    Data_FR=convert_European_time(Data_FR, 'Europe/Paris')
-    Data_NL=convert_European_time(Data_NL, 'Europe/Amsterdam')
-    Data_DE=convert_European_time(Data_DE, 'Europe/Berlin')
-    Data_PT=convert_European_time(Data_PT, 'Europe/Lisbon')
-    Data_ES=convert_European_time(Data_ES, 'Europe/Madrid')
-    Data_AT=convert_European_time(Data_AT, 'Europe/Vienna')
-    Data_IT_CALA = convert_European_time(Data_IT_CALA, 'Europe/Rome')
-    Data_IT_CNOR = convert_European_time(Data_IT_CNOR, 'Europe/Rome')
-    Data_IT_CSUD = convert_European_time(Data_IT_CSUD, 'Europe/Rome')
-    Data_IT_NORD = convert_European_time(Data_IT_NORD, 'Europe/Rome')
-    Data_IT_SICI = convert_European_time(Data_IT_SICI, 'Europe/Rome')
-    Data_IT_SUD = convert_European_time(Data_IT_SUD, 'Europe/Rome')
-    Data_DK_1 = convert_European_time(Data_DK_1, 'Europe/Copenhagen')
-    Data_DK_2 = convert_European_time(Data_DK_2, 'Europe/Copenhagen')
 else:
     print("Please enter your GitHub Personal Access Token to proceed.")
-col1, col2 = st.columns([5, 2])  # Adjust the ratio to better fit your layout needs
 with col1:
     st.title("Transparency++")
@@ -150,85 +185,19 @@ with col2:
     with col2_2:
         st.image("energyville_logo.png", width=100)
-st.write("**Evaluate and analyze ENTSO-E Transparency Platform data quality, forecast accuracy, and energy trends for Portugal, Spain, Belgium, France, Germany-Luxembourg, Austria, the Netherlands, Italy and Denmark.**")
-upper_space.markdown("""
-&nbsp;
-&nbsp;
-""", unsafe_allow_html=True)
-countries = {
-    'Overall': 'Overall',
-    'Austria': 'AT',
-    'Belgium': 'BE',
-    'Denmark 1': 'DK_1',
-    'Denmark 2': 'DK_2',
-    'France': 'FR',
-    'Germany-Luxembourg': 'DE_LU',
-    'Italy Calabria': 'IT_CALA',
-    'Italy Central North': 'IT_CNOR',
-    'Italy Central South': 'IT_CSUD',
-    'Italy North': 'IT_NORD',
-    'Italy Sicily': 'IT_SICI',
-    'Italy South': 'IT_SUD',
-    'Netherlands': 'NL',
-    'Portugal': 'PT',
-    'Spain': 'ES',
-}
-data_dict = {
-    'BE': Data_BE,
-    'FR': Data_FR,
-    'DE_LU': Data_DE,
-    'NL': Data_NL,
-    'PT': Data_PT,
-    'AT': Data_AT,
-    'ES': Data_ES,
-    'IT_CALA': Data_IT_CALA,
-    'IT_CNOR': Data_IT_CNOR,
-    'IT_CSUD': Data_IT_CSUD,
-    'IT_NORD': Data_IT_NORD,
-    'IT_SICI': Data_IT_SICI,
-    'IT_SUD': Data_IT_SUD,
-    'DK_1': Data_DK_1,
-    'DK_2': Data_DK_2,
-}
-countries_all_RES = ['BE', 'FR', 'NL', 'DE_LU', 'PT', 'DK_1', 'DK_2']
-countries_no_offshore= ['AT', 'ES', 'IT_CALA', 'IT_CNOR', 'IT_CSUD', 'IT_NORD', 'IT_SICI', 'IT_SUD',]
-installed_capacities = {
-        'FR': { 'Solar': 17419, 'Wind Offshore': 1483, 'Wind Onshore': 22134},
-        'DE_LU': { 'Solar': 73821, 'Wind Offshore': 8386, 'Wind Onshore': 59915},
-        'BE': { 'Solar': 8789, 'Wind Offshore': 2262, 'Wind Onshore': 3053},
-        'NL': { 'Solar': 22590, 'Wind Offshore': 3220, 'Wind Onshore': 6190},
-        'PT': { 'Solar': 1811, 'Wind Offshore': 25, 'Wind Onshore': 5333},
-        'ES': { 'Solar': 23867, 'Wind Onshore': 30159},
-        'AT': { 'Solar': 7294, 'Wind Onshore': 4021 },
-        'DK_1': { 'Solar': 2738, 'Wind Offshore': 1601, 'Wind Onshore': 4112},
-        'DK_2': { 'Solar': 992, 'Wind Offshore': 	1045, 'Wind Onshore': 748},
-    }
-forecast_columns_all_RES = [
-    'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
-forecast_columns_no_wind_offshore = [
-    'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
 st.sidebar.header('Filters')
 st.sidebar.subheader("Select Country")
 st.sidebar.caption("Choose the country for which you want to display data or forecasts.")
-selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
-# Sidebar with radio buttons for different sections
 if selected_country != 'Overall':
     st.sidebar.subheader("Section")
     st.sidebar.caption("Select the type of information you want to explore.")
-    section = st.sidebar.radio('Section', ['Data Quality', 'Forecasts Quality', 'Insights'], index=1, label_visibility='collapsed')
 else:
     section = None  # No section is shown when "Overall" is selected
@@ -236,124 +205,172 @@ if selected_country == 'Overall':
     data = None  # You can set data to None or a specific dataset based on your logic
     section = None  # No section selected when "Overall" is chosen
 else:
-    country_code = countries[selected_country]
-    data = data_dict.get(country_code)
-    if country_code in countries_all_RES:
-        forecast_columns = forecast_columns_all_RES
-    elif country_code in countries_no_offshore:
-        forecast_columns = forecast_columns_no_wind_offshore
-    if country_code == 'BE':
-        weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore']
-        data['Temperature'] = data['temperature_2m_8']
-        data['Wind Speed Onshore'] = data['wind_speed_100m_8']
-        data['Wind Speed Offshore'] = data['wind_speed_100m_4']
-    else:
-        weather_columns = ['Temperature', 'Wind Speed']
-        data['Temperature'] = data['temperature_2m']
-        data['Wind Speed'] = data['wind_speed_100m']
 if section == 'Data Quality':
     st.header('Data Quality')
-    st.write('The table below presents the data quality metrics focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.')
-    yesterday_midnight = pd.Timestamp(datetime.now().date() - pd.Timedelta(days=1)).replace(hour=23, minute=59, second=59)
-    # Filter data until the end of yesterday (midnight)
-    data_quality = data[data.index <= yesterday_midnight]
-    # Report % of missing values
-    missing_values = data_quality[forecast_columns].isna().mean() * 100
     missing_values = missing_values.round(2)
-    if country_code not in installed_capacities:
-        st.markdown(f"⚠️ **Installed capacities not available on ENTSO-E Transparency Platform for country code '{country_code}'. Therefore, cannot calculate Extreme/Nonsensical values.**")
-        # If capacities are not available, assign NaN to extreme_values and skip extreme value checking
-        extreme_values = {col: np.nan for col in forecast_columns}
-    else:
-        capacities = installed_capacities[country_code]
-        extreme_values = {}
-        for col in forecast_columns:
-                if 'Solar_entsoe' in col:
-                    extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100
-                elif 'Solar_forecast_entsoe' in col:
-                    extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100
-                elif 'Wind_onshore_entsoe' in col:
-                    extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100
-                elif 'Wind_onshore_forecast_entsoe' in col:
-                    extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100
-                elif 'Wind_offshore_entsoe' in col:
-                    extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100
-                elif 'Wind_offshore_forecast_entsoe' in col:
-                    extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100
-                elif 'Load_entsoe' in col:
-                    extreme_values[col] = ((data_quality[col] < 0)).mean() * 100
-                elif 'Load_forecast_entsoe' in col:
-                    extreme_values[col] = ((data_quality[col] < 0)).mean() * 100
-    extreme_values = pd.Series(extreme_values).round(2)
-    # Combine all metrics into one DataFrame
-    metrics_df = pd.DataFrame({
-    'Missing Values (%)': missing_values,
-    'Extreme/Nonsensical Values (%)': extreme_values,
-    })
-    st.markdown(
-    """
-    <style>
-    .dataframe {font-size: 45px !important;}
-    </style>
-    """,
-    unsafe_allow_html=True
-    )
-    st.dataframe(metrics_df)
-    st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset', unsafe_allow_html=True)
-    st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True)
 elif section == 'Forecasts Quality':
     st.header('Forecast Quality')
     # Time series for last 1 week
     last_week = data.loc[data.index >= (data.index[-1] - pd.Timedelta(days=7))]
     st.write('The below plot shows the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform from the past week.')
-    # Options for selecting the data to display
-    if country_code in countries_all_RES:
-        variable_options = {
-            "Load": ("Load_entsoe", "Load_forecast_entsoe"),
-            "Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
-            "Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
-            "Wind Offshore": ("Wind_offshore_entsoe", "Wind_offshore_forecast_entsoe")
-        }
-    elif country_code in countries_no_offshore:
-        variable_options = {
-            "Load": ("Load_entsoe", "Load_forecast_entsoe"),
-            "Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
-            "Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
-        }
-    else:
-        print('Country code doesnt correspond.')
     # Dropdown to select the variable
     selected_variable = st.selectbox("Select Variable for Line PLot", list(variable_options.keys()))
-    # Get the corresponding columns for the selected variable
     actual_col, forecast_col = variable_options[selected_variable]
-    # Plot only the selected variable's data
     fig = go.Figure()
-    fig.add_trace(go.Scatter(x=last_week.index, y=last_week[actual_col], mode='lines', name='Actual'))
-    fig.add_trace(go.Scatter(x=last_week.index, y=last_week[forecast_col], mode='lines', name='Forecast ENTSO-E'))
-    fig.update_layout(title=f'Forecasts vs Actual for {selected_variable}', xaxis_title='Date', yaxis_title='Value [MW]')
     st.plotly_chart(fig)
     # Scatter plots for error distribution
     st.subheader('Error Distribution')
     st.write('The below scatter plots show the error distribution of all fields: Solar, Wind and Load.')
@@ -362,19 +379,24 @@ elif section == 'Forecasts Quality':
     # Get the corresponding columns for the selected variable
     actual_col, forecast_col = variable_options[selected_variable]
-    # Filter data for the selected year and check if columns are available
-    data_2024 = data[data.index.year > 2023]
-    if forecast_col in data_2024.columns:
-        obs = data_2024[actual_col]
-        pred = data_2024[forecast_col]
-        # Calculate error and plot
-        error = pred - obs
-        fig = px.scatter(x=obs, y=pred, labels={'x': 'Observed [MW]', 'y': 'Forecast ENTSO-E [MW]'})
-        fig.update_layout(title=f'Error Distribution for {selected_variable}')
         st.plotly_chart(fig)
     st.subheader('Accuracy Metrics (Sorted by rMAE):')
     date_range = st.date_input(
@@ -388,99 +410,92 @@ elif section == 'Forecasts Quality':
     else:
         st.error("Please select a valid date range.")
         st.stop()
-    output_text = f"The below metrics are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. "
     st.write(output_text)
-    data = data.loc[start_date:end_date]
-    if country_code in countries_all_RES:
-        accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore'])
-    elif country_code in countries_no_offshore:
-        accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore'])
-    else:
-        print('Country code doesnt correspond.')
-    for i in range(0, len(forecast_columns), 2):
-        actual_col = forecast_columns[i]
-        forecast_col = forecast_columns[i + 1]
-        if forecast_col in data.columns:
-            obs = data[actual_col]
-            pred = data[forecast_col]
-            error = pred - obs
-            mae = round(np.mean(np.abs(error)),2)
-            if 'Load' in actual_col:
-                persistence = obs.shift(168)  # Weekly persistence
-            else:
-                persistence = obs.shift(24)  # Daily persistence
-            # Using the whole year's data for rMAE calculations
-            rmae = round(mae / np.mean(np.abs(obs - persistence)),2)
-            row_label = 'Load' if 'Load' in actual_col else 'Solar' if 'Solar' in actual_col else 'Wind Offshore' if 'Wind_offshore' in actual_col else 'Wind Onshore'
-            accuracy_metrics.loc[row_label] = [mae, rmae]
     accuracy_metrics.dropna(how='all', inplace=True)# Sort by rMAE (second column)
-    accuracy_metrics.sort_values(by=accuracy_metrics.columns[1], ascending=True, inplace=True)
     accuracy_metrics = accuracy_metrics.round(4)
-    col1, col2 = st.columns([1, 2])
     with col1:
         st.markdown(
             """
             <style>
-            .small-chart {
-                margin-top: 30px;  /* Adjust this value as needed */
             }
             </style>
             """,
             unsafe_allow_html=True
         )
         st.dataframe(accuracy_metrics)
-        st.markdown(
-            """
-            <style>
-            .small-chart {
-                margin-top: -30px;  /* Adjust this value as needed */
-            }
-            </style>
-            """,
-            unsafe_allow_html=True
-        )
     with col2:
-        # Prepare data for the radar chart
         rmae_values = accuracy_metrics['rMAE'].tolist()
-        categories = accuracy_metrics.index.tolist()
-        fig = go.Figure()
-        fig.add_trace(go.Scatterpolar(
-            r=rmae_values,
-            theta=categories,
-            fill='toself',
-            name='rMAE'
-        ))
-        # Configuring radar chart layout to be smaller
         fig.update_layout(
-            width=250,  # Adjust width
-            height=250,  # Adjust height
-            margin=dict(t=20, b=20, l=0, r=0),  # Remove all margins
             polar=dict(
                 radialaxis=dict(
                     visible=True,
-                    range=[0, max(rmae_values) * 1.2]  # Adjust range dynamically
-                )),
             showlegend=False
         )
-        # Apply CSS class to remove extra space above chart
-        st.plotly_chart(fig, use_container_width=True, config={'displayModeBar': False}, className="small-chart")
     st.subheader('ACF plots of Errors')
     st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three data fields obtained from ENTSO-E: Solar, Wind and Load.')
@@ -504,7 +519,7 @@ elif section == 'Forecasts Quality':
         # Optionally calculate and store ACF values for further analysis if needed
         acf_values = acf(error.dropna(), nlags=240)
 elif section == 'Insights':
     st.header("Insights")
@@ -516,23 +531,15 @@ elif section == 'Insights':
         # Resample data based on the selected resolution
     if resolution == 'Hourly':
-        resampled_data = data_2024
     elif resolution == 'Daily':
-        resampled_data = data_2024.resample('D').mean()  # Resample to daily mean
-    # Select the necessary columns for the scatter plot
-    if country_code in countries_all_RES:
-        selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_offshore_entsoe', 'Wind_onshore_entsoe'] + weather_columns
-    elif country_code in countries_no_offshore:
-        selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe'] + weather_columns
-    else:
-        print('Country code doesnt correspond.')
-    selected_df = resampled_data[selected_columns]
-    selected_df.columns = [col.replace('_entsoe', '').replace('_', ' ') for col in selected_df.columns]
     # Drop missing values
-    selected_df = selected_df.dropna()
     # Create the scatter plots using seaborn's pairplot
     sns.set_theme(style="ticks")
@@ -543,30 +550,24 @@ elif section == 'Insights':
 elif selected_country == 'Overall':
-    def get_forecast_columns(country_code):
-        if country_code in countries_all_RES:
-            return forecast_columns_all_RES
-        elif country_code in countries_no_offshore:
-            return forecast_columns_no_wind_offshore
-        else:
-            print('Country code doesnt correspond.')
     def calculate_net_load_error(df, country_code):
-        forecast_columns = get_forecast_columns(country_code)
-        filter_df = df[forecast_columns].dropna()
-        # Initialize net_load and net_load_forecast with Load and other available data
-        net_load = filter_df['Load_entsoe'] - filter_df['Wind_onshore_entsoe'] - filter_df['Solar_entsoe']
-        net_load_forecast = filter_df['Load_forecast_entsoe'] - filter_df['Wind_onshore_forecast_entsoe'] - filter_df['Solar_forecast_entsoe']
-        # Subtract Wind_offshore_entsoe if the column exists
-        if 'Wind_offshore_entsoe' in filter_df.columns:
-            net_load -= filter_df['Wind_offshore_entsoe']
-        # Subtract Wind_offshore_forecast_entsoe if the column exists
-        if 'Wind_offshore_forecast_entsoe' in filter_df.columns:
-            net_load_forecast -= filter_df['Wind_offshore_forecast_entsoe']
         # Calculate the error based on the latest values
         error = (net_load_forecast - net_load).iloc[-1]
         date = filter_df.index[-1].strftime("%Y-%m-%d %H:%M")  # Get the latest date in string format
@@ -574,29 +575,41 @@ elif selected_country == 'Overall':
         return error, date
     def plot_net_load_error_map(data_dict):
-        # Calculate net load errors and dates for each country
-        net_load_errors = {country_code: calculate_net_load_error(data, country_code) for country_code, data in data_dict.items()}
-        # Use country codes directly
-        selected_country_codes = list(data_dict.keys())
         df_net_load_error = pd.DataFrame({
-            'zoneName': selected_country_codes,
-            'net_load_error': [v[0] for v in net_load_errors.values()],
-            'date': [v[1] for v in net_load_errors.values()]
         })
-        # Load the GeoJSON data using the entsoe library
         date = pd.Timestamp.now()
-        geo_data = load_zones(selected_country_codes, date)
-        # Reset index to include 'zoneName' as a column
-        geo_data = geo_data.reset_index()
-        # Map country codes to country names
-        countries_code_to_name = {v: k for k, v in countries.items()}
-        geo_data['name'] = geo_data['zoneName'].map(countries_code_to_name)
         # Merge net_load_error and date into geo_data
         geo_data = geo_data.merge(df_net_load_error, on='zoneName', how='left')
@@ -633,7 +646,7 @@ elif selected_country == 'Overall':
             geo_data,
             style_function=style_function,
             tooltip=folium.GeoJsonTooltip(
-                fields=["name", "net_load_error", "date"],
                 aliases=["Country:", "Net Load Error [MW]:", "Date:"],
                 localize=True
             )
@@ -643,7 +656,7 @@ elif selected_country == 'Overall':
         colormap.add_to(m)
         # Display the map
-        _ = st_folium(m, width=700, height=600)
     def calculate_mae(actual, forecast):
         return np.mean(np.abs(actual - forecast))
@@ -651,40 +664,36 @@ elif selected_country == 'Overall':
     def calculate_persistence_mae(data, shift_hours):
         return np.mean(np.abs(data - data.shift(shift_hours)))
-    def calculate_rmae_for_country(df):
         rmae = {}
         rmae['Load'] = calculate_mae(df['Load_entsoe'], df['Load_forecast_entsoe']) / calculate_persistence_mae(df['Load_entsoe'], 168)
-        rmae['Wind_onshore'] = calculate_mae(df['Wind_onshore_entsoe'], df['Wind_onshore_forecast_entsoe']) / calculate_persistence_mae(df['Wind_onshore_entsoe'], 24)
-        # Only calculate Wind_offshore rMAE if the columns exist
-        if 'Wind_offshore_entsoe' in df.columns and 'Wind_offshore_forecast_entsoe' in df.columns:
-            rmae['Wind_offshore'] = calculate_mae(df['Wind_offshore_entsoe'], df['Wind_offshore_forecast_entsoe']) / calculate_persistence_mae(df['Wind_offshore_entsoe'], 24)
-        else:
-            rmae['Wind_offshore'] = None  # Mark as None if not applicable
-        rmae['Solar'] = calculate_mae(df['Solar_entsoe'], df['Solar_forecast_entsoe']) / calculate_persistence_mae(df['Solar_entsoe'], 24)
         return rmae
     def create_rmae_dataframe(data_dict):
-        rmae_values = {'Country': [], 'Load': [], 'Wind_onshore': [], 'Wind_offshore': [], 'Solar': []}
         for country_name, df in data_dict.items():
-            forecast_columns=get_forecast_columns(country_name)
-            df_filtered = df[forecast_columns].dropna()
-            rmae = calculate_rmae_for_country(df_filtered)
             rmae_values['Country'].append(country_name)
-            rmae_values['Load'].append(rmae['Load'])
-            rmae_values['Wind_onshore'].append(rmae['Wind_onshore'])
-            rmae_values['Solar'].append(rmae['Solar'])
-            # Append Wind_offshore rMAE only if it's not None (i.e., the country has offshore wind data)
-            if rmae['Wind_offshore'] is not None:
-                rmae_values['Wind_offshore'].append(rmae['Wind_offshore'])
-            else:
-                rmae_values['Wind_offshore'].append(np.nan)  # Insert NaN for countries without offshore wind
         return pd.DataFrame(rmae_values)
@@ -692,10 +701,14 @@ elif selected_country == 'Overall':
         fig = go.Figure()
         # Dynamically adjust angles to exclude Wind_offshore if all values are NaN
-        angles = ['Load', 'Wind_onshore', 'Solar']
-        if not rmae_df['Wind_offshore'].isna().all():  # Only include Wind_offshore if it's not NaN for all countries
-            angles.append('Wind_offshore')
         for _, row in rmae_df.iterrows():
             fig.add_trace(go.Scatterpolar(
                 r=[row[angle] for angle in angles],
@@ -735,7 +748,3 @@ elif selected_country == 'Overall':
     # Plot radar chart for the selected countries
     plot_rmae_radar_chart(filtered_rmae_df)

 import pandas as pd
 from io import StringIO
 import streamlit as st
 import plotly.express as px
 import plotly.graph_objects as go
 import numpy as np
 from statsmodels.tsa.stattools import acf
 from statsmodels.graphics.tsaplots import plot_acf
 import matplotlib.pyplot as plt
 import folium
 from streamlit_folium import st_folium
+import seaborn as sns
+import datetime
 from entsoe.geo import load_zones
 import branca
+import pytz
+import time
+from entsoe import EntsoePandasClient
+import geopandas as gpd
+tz = pytz.timezone('Europe/Brussels')
+def load_capacity_csv(path: str) -> dict:
+    """Load installed capacities CSV into a dict: Country -> {tech: value} """
+    df = pd.read_csv(path, index_col='Country')
+    # Ensure numeric and handle missing
+    df = df.replace({"NaN": np.nan}).astype(float)
+    return df.to_dict(orient='index')
+# Load installed capacities from CSV files
+installed_capacities_2024 = load_capacity_csv('installed_capacities_2024.csv')
+installed_capacities_2025 = load_capacity_csv('installed_capacities_2025.csv')
+TECHS = ['Solar', 'Wind Offshore', 'Wind Onshore']
+#countries = [ 'AT', 'BE', 'NL',  'BG', 'HR', 'CZ', 'DE_LU', 'DK_1', 'DK_2',
+#'EE', 'FI', 'FR', 'GR', 'HU',  'IT_CALA', 'IT_CNOR',
+#'IT_CSUD', 'IT_NORD', 'IT_SARD', 'IT_SICI', 'IT_SUD', 'LV', 'LT',
+#'NO_1', 'NO_2', 'NO_3', 'NO_4', 'NO_5', 'PL', 'PT', 'RO',
+#'SE_1', 'SE_2', 'SE_3', 'SE_4', 'RS', 'SK', 'SI', 'ES', 'CH', 'ME','IE_SEM','MK','CY','BA','AL','XK']
+countries = ['AT', 'BE', 'DE_LU', 'DK_1', 'DK_2', 'FR', 'IT_CALA', 'IT_CNOR',
+        'IT_CSUD', 'IT_NORD', 'IT_SARD', 'IT_SICI', 'IT_SUD',
+        'NL', 'PT', 'ES']
+def get_time_zone(country_code):
+    tz_map = {
+        'AL': 'Europe/Tirane',
+        'AT': 'Europe/Vienna',
+        'BE': 'Europe/Brussels',
+        'BA': 'Europe/Sarajevo',
+        'BG': 'Europe/Sofia',
+        'HR': 'Europe/Zagreb',
+        'CY': 'Asia/Nicosia',
+        'CZ': 'Europe/Prague',
+        'DE_LU': 'Europe/Berlin',
+        'DK_1': 'Europe/Copenhagen',
+        'DK_2': 'Europe/Copenhagen',
+        'EE': 'Europe/Tallinn',
+        'FI': 'Europe/Helsinki',
+        'MK': 'Europe/Skopje',
+        'FR': 'Europe/Paris',
+        'GR': 'Europe/Athens',
+        'HU': 'Europe/Budapest',
+        'IS': 'Atlantic/Reykjavik',
+        'IE_SEM': 'Europe/Dublin',
+        'IT_CALA': 'Europe/Rome',
+        'IT_CNOR': 'Europe/Rome',
+        'IT_CSUD': 'Europe/Rome',
+        'IT_NORD': 'Europe/Rome',
+        'IT_SARD': 'Europe/Rome',
+        'IT_SICI': 'Europe/Rome',
+        'IT_SUD': 'Europe/Rome',
+        'LV': 'Europe/Riga',
+        'LT': 'Europe/Vilnius',
+        'ME': 'Europe/Podgorica',
+        'NL': 'Europe/Amsterdam',
+        'NO_1': 'Europe/Oslo',
+        'NO_2': 'Europe/Oslo',
+        'NO_3': 'Europe/Oslo',
+        'NO_4': 'Europe/Oslo',
+        'NO_5': 'Europe/Oslo',
+        'PL': 'Europe/Warsaw',
+        'PT': 'Europe/Lisbon',
+        'MD': 'Europe/Chisinau',
+        'RO': 'Europe/Bucharest',
+        'SE_1': 'Europe/Stockholm',
+        'SE_2': 'Europe/Stockholm',
+        'SE_3': 'Europe/Stockholm',
+        'SE_4': 'Europe/Stockholm',
+        'RS': 'Europe/Belgrade',
+        'SK': 'Europe/Bratislava',
+        'SI': 'Europe/Ljubljana',
+        'ES': 'Europe/Madrid',
+        'CH': 'Europe/Zurich',
+        'XK': 'Europe/Rome'
+    }
+    if country_code in tz_map:
+        return tz_map[country_code]
+    else:
+        raise ValueError(f"Time zone for country code {country_code} is not defined.")
+def convert_European_time(data, bdz):
+    time_zone = get_time_zone(bdz)
+    data.index = pd.to_datetime(data.index, utc=True)
+    data.index = data.index.tz_convert(time_zone)
+    data.index = data.index.tz_localize(None)
+    return data
+def filter_dataframe(df):
+    allowed_columns = {"Load_entsoe", "Load_forecast_entsoe", "Solar_entsoe", "Solar_forecast_entsoe", "Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe", "Wind_offshore_entsoe", "Wind_offshore_forecast_entsoe"}
+    return df[[col for col in df.columns if col in allowed_columns]]
+def load_GitHub(github_token, bdz):
+    file_name=f'{bdz}_Entsoe_UTC.csv'
     url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}'
     headers = {'Authorization': f'token {github_token}'}
         if 'Date' in df.columns:
             df['Date'] = pd.to_datetime(df['Date'])  # Convert 'Date' column to datetime
             df.set_index('Date', inplace=True)  # Set 'Date' column as the index
+            df=filter_dataframe(df)
+            df=convert_European_time(df, bdz)
+        return df[df.index >= pd.Timestamp('2024-01-01')]
     else:
         print(f"Failed to download {file_name}. Status code: {response.status_code}")
         return None
+def filter_variable_options(df):
+    all_options = {
+        "Load": ("Load_entsoe", "Load_forecast_entsoe"),
+        "Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
+        "Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
+        "Wind Offshore": ("Wind_offshore_entsoe", "Wind_offshore_forecast_entsoe"),
     }
+    variable_options = {}
+    flagged_columns = []
+    for key, (col1, col2) in all_options.items():
+        col1_exists = col1 in df.columns and not df[col1].isna().all()
+        col2_exists = col2 in df.columns and not df[col2].isna().all()
+        if col1_exists and col2_exists:
+            variable_options[key] = (col1, col2)
+        elif not col1_exists and col2_exists:
+            flagged_columns.append(col1)
+        elif col1_exists and not col2_exists:
+            flagged_columns.append(col2)
+        elif not col1_exists and not col2_exists:
+            flagged_columns.append(col1)
+            flagged_columns.append(col2)
+    return variable_options, flagged_columns
 github_token = st.secrets["GitHub_Token_KUL_Margarida"]
+#countries = ['IT_CALA', 'IT_CNOR', 'IT_CSUD', 'IT_SARD', 'PT', 'FR']
 if github_token:
+    data_dict = {}
+    for bdz in countries:
+        df = load_GitHub(github_token, bdz)
+        if df is not None:
+            data_dict[bdz] = df
 else:
     print("Please enter your GitHub Personal Access Token to proceed.")
+col1, col2 = st.columns([5, 2])
 with col1:
     st.title("Transparency++")
     with col2_2:
         st.image("energyville_logo.png", width=100)
+st.write("**Evaluate and analyze ENTSO-E Transparency Platform data quality, forecast accuracy, and energy trends for ENTSO-E member countries.**")
 st.sidebar.header('Filters')
 st.sidebar.subheader("Select Country")
 st.sidebar.caption("Choose the country for which you want to display data or forecasts.")
+selection = ['Overall'] + list(countries)
+selected_country = st.sidebar.selectbox('Select Country', selection)
 if selected_country != 'Overall':
     st.sidebar.subheader("Section")
     st.sidebar.caption("Select the type of information you want to explore.")
+    section = st.sidebar.radio('', ['Data Quality', 'Forecasts Quality', 'Insights'], index=1)
 else:
     section = None  # No section is shown when "Overall" is selected
     data = None  # You can set data to None or a specific dataset based on your logic
     section = None  # No section selected when "Overall" is chosen
 else:
+    country_code = selected_country
+    data = data_dict.get(selected_country)
 if section == 'Data Quality':
     st.header('Data Quality')
+    # Determine if capacities missing per year
+    caps4 = installed_capacities_2024.get(country_code)
+    caps5 = installed_capacities_2025.get(country_code)
+    st.write(
+        "The table below presents the data quality metrics focusing on the percentage "
+        "of missing values and the occurrence of extreme or nonsensical values for "
+        "the selected country. Additionally, it flags any mismatch between installed "
+        "capacity (NaN or 0) and actual data in the dataset."
+    )
+    # Determine end of data slice (yesterday 23:59:59)
+    yesterday = datetime.datetime.now(tz).date() - datetime.timedelta(days=1)
+    end_time = pd.Timestamp(yesterday).replace(hour=23, minute=59, second=59)
+    # Filter data
+    data_quality = data[data.index <= end_time]
+    tech_cols = {
+        'Load':          ('Load_entsoe',             'Load_forecast_entsoe'),
+        'Wind Onshore':  ('Wind_onshore_entsoe',     'Wind_onshore_forecast_entsoe'),
+        'Wind Offshore': ('Wind_offshore_entsoe',    'Wind_offshore_forecast_entsoe'),
+        'Solar':         ('Solar_entsoe',            'Solar_forecast_entsoe'),
+    }
+    skip_cols = []
+    for tech_key, (act_col, fct_col) in tech_cols.items():
+        # only proceed if the columns are in the DataFrame
+        if act_col in data_quality.columns and fct_col in data_quality.columns:
+            # get installed capacities for 2024 & 2025
+            cap4 = caps4.get(tech_key, np.nan) if isinstance(caps4, dict) else np.nan
+            cap5 = caps5.get(tech_key, np.nan) if isinstance(caps5, dict) else np.nan
+            # if both years are missing or zero capacity
+            if (pd.isna(cap4) or cap4 == 0) and (pd.isna(cap5) or cap5 == 0):
+                act = data_quality[act_col]
+                fct = data_quality[fct_col]
+                # check if actual AND forecast are entirely zero or NaN
+                only_zero_or_na = (act.fillna(0) == 0).all() and (fct.fillna(0) == 0).all()
+                if only_zero_or_na:
+                    skip_cols += [act_col, fct_col]
+    # drop any columns flagged for skipping (ignore errors if somehow missing)
+    if skip_cols:
+        data_quality = data_quality.drop(columns=skip_cols, errors='ignore')
+    # Compute missing
+    missing_values = data_quality.isna().mean() * 100
     missing_values = missing_values.round(2)
+    extreme_values = {}
+    capacity_mismatch = {}
+    neg_counts = {}
+    over_counts = {}
+    cutoff = pd.Timestamp('2025-01-01')
+    # Iterate over columns
+    for col in data_quality.columns:
+        # Identify technology
+        if 'Solar' in col:
+            tech_key = 'Solar'
+        elif 'Wind_onshore' in col:
+            tech_key = 'Wind Onshore'
+        elif 'Wind_offshore' in col:
+            tech_key = 'Wind Offshore'
+        elif 'Load' in col:
+            tech_key = 'Load'
+        else:
+            extreme_values[col] = np.nan
+            capacity_mismatch[col] = np.nan
+            continue
+        series = data_quality[col]
+        # Year masks
+        mask_2024 = series.index < cutoff
+        # Fetch capacity values
+        cap4 = caps4.get(tech_key, np.nan) if isinstance(caps4, dict) else np.nan
+        cap5 = caps5.get(tech_key, np.nan) if isinstance(caps5, dict) else np.nan
+        print('var:',col)
+        print('cap4:',cap4)
+        if tech_key == 'Load':
+            # Negative load
+            extreme_pct = round((series < 0).mean() * 100, 2)
+            mismatch = np.nan
+        else:
+            # Create per-timestamp capacity
+            cap_series = pd.Series(
+                np.where(mask_2024, cap4, cap5),
+                index=series.index
+            )
+            # Flags
+            neg = series < 0
+            over = (series > cap_series) & cap_series.notna()
+            nonsense = neg | over
+            extreme_pct = round(nonsense.mean() * 100, 2)
+            # Mismatch: non-zero gen when cap missing or zero
+            # cap4, cap5 are floats or NaN
+            no_cap_2024 = pd.isna(cap4) or (cap4 == 0)
+            no_cap_2025 = pd.isna(cap5) or (cap5 == 0)
+            # check if there's at least one actual non-zero (treat NaN as 0)
+            has_nonzero = (series.fillna(0) != 0).any()
+            if no_cap_2024 and no_cap_2025 and has_nonzero:
+                mismatch = 100.0
+            else:
+                mismatch = 0.0
+        extreme_values[col] = extreme_pct
+        capacity_mismatch[col] = mismatch
+    display_extreme = {col: f"{val:.2f}" if not pd.isna(val) else ''
+                       for col, val in extreme_values.items()}
+    display_mismatch = {}
+    for col, val in capacity_mismatch.items():
+        if 'Load' in col:
+            display_mismatch[col] = '-'
+        else:
+            display_mismatch[col] = '🚩' if val == 100.0 else ''
+    # Build and render DataFrame
+    metrics_df = pd.DataFrame({
+        'Missing Values (%)': missing_values,
+        'Extreme/Nonsensical Values (%)': pd.Series(display_extreme),
+        'Capacity Mismatch Flag': pd.Series(display_mismatch)
+    })
+    st.dataframe(metrics_df.style.format({
+        'Missing Values (%)': '{:.2f}',
+        'Extreme/Nonsensical Values (%)': '{}'
+    }))
+    st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset',unsafe_allow_html=True)
+    st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: For Load, this is % of values below 0. For generation, it is negative or out-of-bound (> capacity).',unsafe_allow_html=True)
+    st.write('<b><u>Capacity Mismatch Flag</u></b>: Shows "🚩" if installed capacity is `NaN` or `0` but the dataset has non-zero generation. Blank otherwise. For Load columns, it is "-".',unsafe_allow_html=True)
 elif section == 'Forecasts Quality':
     st.header('Forecast Quality')
     # Time series for last 1 week
     last_week = data.loc[data.index >= (data.index[-1] - pd.Timedelta(days=7))]
     st.write('The below plot shows the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform from the past week.')
+    variable_options, flagged_columns = filter_variable_options(last_week)
     # Dropdown to select the variable
     selected_variable = st.selectbox("Select Variable for Line PLot", list(variable_options.keys()))
     actual_col, forecast_col = variable_options[selected_variable]
+    x_vals = last_week.index.to_pydatetime().tolist()
+    y_actual = last_week[actual_col].tolist()
+    y_forecast = last_week[forecast_col].tolist()
+    # then plot
     fig = go.Figure()
+    fig.add_trace(go.Scatter(x=x_vals,y=y_actual,mode="lines",name="Actual"))
+    fig.add_trace(go.Scatter(x=x_vals,y=y_forecast,mode="lines",name="Forecast ENTSO-E"))
+    fig.update_layout(title=f"Forecasts vs Actual for {selected_variable}",xaxis_title="Date",yaxis_title="Value [MW]")
     st.plotly_chart(fig)
     # Scatter plots for error distribution
     st.subheader('Error Distribution')
     st.write('The below scatter plots show the error distribution of all fields: Solar, Wind and Load.')
     # Get the corresponding columns for the selected variable
     actual_col, forecast_col = variable_options[selected_variable]
+    if forecast_col in data.columns:
+        # grab the two series, drop any NaNs, and align on their common timestamps
+        obs = data[actual_col].dropna()
+        pred = data[forecast_col].dropna()
+        idx = obs.index.intersection(pred.index)
+        obs = obs.loc[idx]
+        pred = pred.loc[idx]
+        # convert to pure Python lists
+        x_vals = obs.tolist()
+        y_vals = pred.tolist()
+        fig = go.Figure()
+        fig.add_trace(go.Scatter(x=x_vals,y=y_vals,mode='markers',name=f'{selected_variable}'))
+        fig.update_layout(title=f'Error Distribution for {selected_variable}',xaxis_title='Observed [MW]',yaxis_title='Forecast ENTSO-E [MW]')
         st.plotly_chart(fig)
     st.subheader('Accuracy Metrics (Sorted by rMAE):')
     date_range = st.date_input(
     else:
         st.error("Please select a valid date range.")
         st.stop()
+    output_text = f"The below metrics are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. On the right is a radar plot with the rMAE."
     st.write(output_text)
+    data_metrics = data.loc[start_date:end_date]
+    accuracy_metrics = pd.DataFrame(columns=['MAE', 'RMSE' ,'rMAE'], index=list(variable_options.keys()))
+    for variable in variable_options.keys():
+        actual_col, forecast_col = variable_options[variable]
+        obs = data_metrics[actual_col]
+        pred = data_metrics[forecast_col]
+        error = pred - obs
+        mae = round(np.mean(np.abs(error)),2)
+        if 'Load' in actual_col:
+            persistence = obs.shift(168)  # Weekly persistence
+        else:
+            persistence = obs.shift(24)  # Daily persistence
+        # Using the whole year's data for rMAE calculations
+        rmae = round(mae / np.mean(np.abs(obs - persistence)),2)
+        rmse = round(np.sqrt(np.mean((error)**2)), 2)
+        row_label = variable #'Load' if 'Load' in actual_col else 'Solar' if 'Solar' in actual_col else 'Wind Offshore' if 'Wind_offshore' in actual_col else 'Wind Onshore'
+        accuracy_metrics.loc[row_label] = [mae, rmse, rmae]
     accuracy_metrics.dropna(how='all', inplace=True)# Sort by rMAE (second column)
+    accuracy_metrics.sort_values(by=accuracy_metrics.columns[-1], ascending=True, inplace=True)
     accuracy_metrics = accuracy_metrics.round(4)
+    col1, col2 = st.columns([1, 1])
     with col1:
+        # (optional) some top-margin before the table
         st.markdown(
             """
             <style>
+            .small-chart-container {
+                margin-top: 0px;
             }
             </style>
             """,
             unsafe_allow_html=True
         )
         st.dataframe(accuracy_metrics)
     with col2:
+        # prepare the data
         rmae_values = accuracy_metrics['rMAE'].tolist()
+        categories   = accuracy_metrics.index.tolist()
+        # build the radar
+        fig = go.Figure(
+            go.Scatterpolar(
+                r=rmae_values,
+                theta=categories,
+                fill='toself',
+                name='rMAE'
+            )
+        )
+        # 👉 shrink the total size, and give extra left/right margin for your labels
         fig.update_layout(
+            width=300,    # make the whole plot a bit smaller
+            height=300,
+            margin=dict(
+                l=50,      # more space on the left for long category names
+                r=60,      # and on the right, if needed
+                t=20,
+                b=20
+            ),
             polar=dict(
+                angularaxis=dict(
+                    tickfont=dict(size=11)   # if you want slightly smaller ticks
+                ),
                 radialaxis=dict(
                     visible=True,
+                    range=[0, max(rmae_values)*1.2]
+                )
+            ),
             showlegend=False
         )
+        # wrap in a div so you can still control vertical spacing via CSS
+        st.markdown('<div class="small-chart-container">', unsafe_allow_html=True)
+        st.plotly_chart(fig, use_container_width=False)
+        st.markdown('</div>', unsafe_allow_html=True)
     st.subheader('ACF plots of Errors')
     st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three data fields obtained from ENTSO-E: Solar, Wind and Load.')
         # Optionally calculate and store ACF values for further analysis if needed
         acf_values = acf(error.dropna(), nlags=240)
 elif section == 'Insights':
     st.header("Insights")
         # Resample data based on the selected resolution
     if resolution == 'Hourly':
+        resampled_data = data
     elif resolution == 'Daily':
+        resampled_data = data.resample('D').mean()  # Resample to daily mean
+    resampled_data.columns = [col.replace('_entsoe', '').replace('_', ' ') for col in resampled_data.columns]
     # Drop missing values
+    selected_df = resampled_data.dropna()
     # Create the scatter plots using seaborn's pairplot
     sns.set_theme(style="ticks")
 elif selected_country == 'Overall':
     def calculate_net_load_error(df, country_code):
+        #filter_df = df.dropna()
+        filter_df = df.dropna(axis=1, how='all')
+        filter_df = filter_df.dropna()
+        if filter_df.empty:
+            # Return something (e.g., None) if there's no data left
+            print(country_code)
+            return None, None
+        net_load = filter_df['Load_entsoe'].copy()
+        for col in ['Wind_onshore_entsoe', 'Solar_entsoe', 'Wind_offshore_entsoe']:
+            if col in filter_df.columns:
+                net_load -= filter_df[col]
+        net_load_forecast = filter_df['Load_forecast_entsoe'].copy()
+        for col in ['Wind_onshore_forecast_entsoe', 'Solar_forecast_entsoe', 'Wind_offshore_forecast_entsoe']:
+            if col in filter_df.columns:
+                net_load_forecast -= filter_df[col]
         # Calculate the error based on the latest values
         error = (net_load_forecast - net_load).iloc[-1]
         date = filter_df.index[-1].strftime("%Y-%m-%d %H:%M")  # Get the latest date in string format
         return error, date
     def plot_net_load_error_map(data_dict):
+        # 1) compute your errors as before
+        missing_zones={'ME','IE_SEM','MK','CY','BA','AL','XK'}
+        net_load_errors = {
+            country_code: calculate_net_load_error(data, country_code)
+            for country_code, data in data_dict.items()
+        }
         df_net_load_error = pd.DataFrame({
+            "zoneName": list(net_load_errors),
+            "net_load_error": [v[0] for v in net_load_errors.values()],
+            "date":            [v[1] for v in net_load_errors.values()],
         })
+        # 2) split your zones into standard vs. fallback
+        selected = list(data_dict.keys())
+        standard_zones = [z for z in selected if z not in missing_zones]
+        fallback_zones = [z for z in selected if z in missing_zones]
+        # 3a) load the standard ones with entsoe.load_zones
         date = pd.Timestamp.now()
+        geo_std = load_zones(standard_zones, date).reset_index()
+        # 3b) manually load the fallback ones
+        gdfs = []
+        for z in fallback_zones:
+            fn = f"{z}.geojson"
+            path = f'./geojson_missing/{fn}'
+            g = gpd.read_file(path)
+            g['zoneName'] = z
+            gdfs.append(g)
+        geo_fb = pd.concat(gdfs, ignore_index=True) if gdfs else gpd.GeoDataFrame()
+        # 4) combine
+        geo_data = pd.concat([geo_std, geo_fb], ignore_index=True)
         # Merge net_load_error and date into geo_data
         geo_data = geo_data.merge(df_net_load_error, on='zoneName', how='left')
             geo_data,
             style_function=style_function,
             tooltip=folium.GeoJsonTooltip(
+                fields=["zoneName", "net_load_error", "date"],
                 aliases=["Country:", "Net Load Error [MW]:", "Date:"],
                 localize=True
             )
         colormap.add_to(m)
         # Display the map
+        _=st_folium(m, width=700, height=600)
     def calculate_mae(actual, forecast):
         return np.mean(np.abs(actual - forecast))
     def calculate_persistence_mae(data, shift_hours):
         return np.mean(np.abs(data - data.shift(shift_hours)))
+    def calculate_rmae_for_country(df, variable_options):
         rmae = {}
         rmae['Load'] = calculate_mae(df['Load_entsoe'], df['Load_forecast_entsoe']) / calculate_persistence_mae(df['Load_entsoe'], 168)
+        for variable in variable_options.keys():
+            actual_col, forecast_col = variable_options[variable]
+            rmae[variable] = calculate_mae(df[actual_col], df[forecast_col]) / calculate_persistence_mae(df[actual_col], 24)
+        all_opt = ["Load", "Solar", "Wind Onshore", "Wind Offshore"]
+        not_in_list2 = [elem for elem in all_opt if elem not in variable_options.keys()]
+        for ele in not_in_list2:
+            rmae[ele] = None
         return rmae
     def create_rmae_dataframe(data_dict):
+        rmae_values = {'Country': [], 'Load': [], 'Wind Onshore': [], 'Wind Offshore': [], 'Solar': []}
         for country_name, df in data_dict.items():
+            df_filtered = df.dropna()
+            print(country_name)
+            variable_options, flagged_columns = filter_variable_options(df_filtered)
+            rmae = calculate_rmae_for_country(df_filtered, variable_options)
             rmae_values['Country'].append(country_name)
+            for var, met in rmae.items():
+                rmae_values[var].append(met)
         return pd.DataFrame(rmae_values)
         fig = go.Figure()
         # Dynamically adjust angles to exclude Wind_offshore if all values are NaN
+        angles = ['Load']
+        if not rmae_df['Wind Offshore'].isna().all():  # Only include Wind_offshore if it's not NaN for all countries
+            angles.append('Wind Offshore')
+        if not rmae_df['Wind Onshore'].isna().all():  # Only include Wind_offshore if it's not NaN for all countries
+            angles.append('Wind Onshore')
+        if not rmae_df['Solar'].isna().all():  # Only include Wind_offshore if it's not NaN for all countries
+            angles.append('Solar')
         for _, row in rmae_df.iterrows():
             fig.add_trace(go.Scatterpolar(
                 r=[row[angle] for angle in angles],
     # Plot radar chart for the selected countries
     plot_rmae_radar_chart(filtered_rmae_df)