Spaces:

attention-conll
/

visualizer

Running

App Files Files Community

acmc commited on May 27

Commit

5479ffa

verified ·

1 Parent(s): d54e177

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +546 -10

streamlit_app.py CHANGED Viewed

@@ -82,7 +82,7 @@ class AttentionResultsExplorer:
                 st.warning(f"Could not load cached config, downloading fresh: {str(e)}")
         # Download from GitHub
-        config_url = f"https://raw.githubusercontent.com/{self.github_repo}/refs/heads/master/experiment_config.yaml"
         response = self._make_github_request(config_url, "experiment configuration file")
         if response is None:
@@ -207,8 +207,9 @@ class AttentionResultsExplorer:
     def _ensure_specific_data_downloaded(self, language, config, model):
         """Download specific files for a language/config/model combination if not cached"""
         base_path = f"results_{language}/{config}/{model}"
-        local_path = self.base_path / f"results_{language}" / config / model
         # Check if we already have this specific combination cached
         if local_path.exists() and self.use_cache:
@@ -227,7 +228,8 @@ class AttentionResultsExplorer:
     def _download_specific_model_data(self, language, config, model):
         """Download only the specific model data needed"""
-        base_remote_path = f"results_{language}/{config}/{model}"
         # List of essential directories to download for a model
         essential_dirs = ["metadata", "uas_scores", "number_of_heads_matching", "variability", "figures"]
@@ -251,7 +253,8 @@ class AttentionResultsExplorer:
             contents = response.json()
             # Create local directory
-            local_dir = self.base_path / f"results_{language}" / config / model / dir_name
             local_dir.mkdir(parents=True, exist_ok=True)
             # Download all files in this directory
@@ -518,7 +521,8 @@ class AttentionResultsExplorer:
         # Ensure we have the specific data downloaded
         self._ensure_specific_data_downloaded(language, config, model)
-        metadata_path = self.base_path / f"results_{language}" / config / model / "metadata" / "metadata.json"
         if metadata_path.exists():
             with open(metadata_path, 'r') as f:
                 return json.load(f)
@@ -529,7 +533,8 @@ class AttentionResultsExplorer:
         # Ensure we have the specific data downloaded
         self._ensure_specific_data_downloaded(language, config, model)
-        uas_dir = self.base_path / f"results_{language}" / config / model / "uas_scores"
         if not uas_dir.exists():
             return {}
@@ -564,7 +569,8 @@ class AttentionResultsExplorer:
         # Ensure we have the specific data downloaded
         self._ensure_specific_data_downloaded(language, config, model)
-        heads_dir = self.base_path / f"results_{language}" / config / model / "number_of_heads_matching"
         if not heads_dir.exists():
             return {}
@@ -577,7 +583,7 @@ class AttentionResultsExplorer:
                 status_text = st.empty()
                 for i, csv_file in enumerate(csv_files):
-                    relation = csv_file.stem.replace("heads_matching_", "").replace(f"_{model}", "")
                     status_text.text(f"Loading head matching data: {relation}")
                     try:
@@ -599,7 +605,8 @@ class AttentionResultsExplorer:
         # Ensure we have the specific data downloaded
         self._ensure_specific_data_downloaded(language, config, model)
-        var_path = self.base_path / f"results_{language}" / config / model / "variability" / "variability_list.csv"
         if var_path.exists():
             try:
                 return pd.read_csv(var_path, index_col=0)
@@ -612,7 +619,536 @@ class AttentionResultsExplorer:
         # Ensure we have the specific data downloaded
         self._ensure_specific_data_downloaded(language, config, model)
-        figures_dir = self.base_path / f"results_{language}" / config / model / "figures"
         if not figures_dir.exists():
             return []
         return list(figures_dir.glob("*.pdf"))

                 st.warning(f"Could not load cached config, downloading fresh: {str(e)}")
         # Download from GitHub
+        config_url = f"https://raw.githubusercontent.com/{self.github_repo}/master/experiment_config.yaml"
         response = self._make_github_request(config_url, "experiment configuration file")
         if response is None:
     def _ensure_specific_data_downloaded(self, language, config, model):
         """Download specific files for a language/config/model combination if not cached"""
+        folder_model_name = self._model_name_to_folder_name(model)
         base_path = f"results_{language}/{config}/{model}"
+        local_path = self.base_path / f"results_{language}" / config / folder_model_name
         # Check if we already have this specific combination cached
         if local_path.exists() and self.use_cache:
     def _download_specific_model_data(self, language, config, model):
         """Download only the specific model data needed"""
+        folder_model_name = self._model_name_to_folder_name(model)
+        base_remote_path = f"results_{language}/{config}/{folder_model_name}"
         # List of essential directories to download for a model
         essential_dirs = ["metadata", "uas_scores", "number_of_heads_matching", "variability", "figures"]
             contents = response.json()
             # Create local directory
+            folder_model_name = self._model_name_to_folder_name(model)
+            local_dir = self.base_path / f"results_{language}" / config / folder_model_name / dir_name
             local_dir.mkdir(parents=True, exist_ok=True)
             # Download all files in this directory
         # Ensure we have the specific data downloaded
         self._ensure_specific_data_downloaded(language, config, model)
+        folder_model_name = self._model_name_to_folder_name(model)
+        metadata_path = self.base_path / f"results_{language}" / config / folder_model_name / "metadata" / "metadata.json"
         if metadata_path.exists():
             with open(metadata_path, 'r') as f:
                 return json.load(f)
         # Ensure we have the specific data downloaded
         self._ensure_specific_data_downloaded(language, config, model)
+        folder_model_name = self._model_name_to_folder_name(model)
+        uas_dir = self.base_path / f"results_{language}" / config / folder_model_name / "uas_scores"
         if not uas_dir.exists():
             return {}
         # Ensure we have the specific data downloaded
         self._ensure_specific_data_downloaded(language, config, model)
+        folder_model_name = self._model_name_to_folder_name(model)
+        heads_dir = self.base_path / f"results_{language}" / config / folder_model_name / "number_of_heads_matching"
         if not heads_dir.exists():
             return {}
                 status_text = st.empty()
                 for i, csv_file in enumerate(csv_files):
+                    relation = csv_file.stem.replace("heads_matching_", "").replace(f"_{folder_model_name}", "")
                     status_text.text(f"Loading head matching data: {relation}")
                     try:
         # Ensure we have the specific data downloaded
         self._ensure_specific_data_downloaded(language, config, model)
+        folder_model_name = self._model_name_to_folder_name(model)
+        var_path = self.base_path / f"results_{language}" / config / folder_model_name / "variability" / "variability_list.csv"
         if var_path.exists():
             try:
                 return pd.read_csv(var_path, index_col=0)
         # Ensure we have the specific data downloaded
         self._ensure_specific_data_downloaded(language, config, model)
+        folder_model_name = self._model_name_to_folder_name(model)
+        figures_dir = self.base_path / f"results_{language}" / config / folder_model_name / "figures"
+        if not figures_dir.exists():
+            return []
+        return list(figures_dir.glob("*.pdf"))
+    def _handle_rate_limit_error(self, response):
+        """Handle GitHub API rate limit errors with detailed user feedback"""
+        if response.status_code in (403, 429):
+            # Check if it's a rate limit error
+            if 'rate limit' in response.text.lower() or 'api rate limit' in response.text.lower():
+                # Extract rate limit information from headers
+                remaining = response.headers.get('x-ratelimit-remaining', 'unknown')
+                reset_timestamp = response.headers.get('x-ratelimit-reset')
+                limit = response.headers.get('x-ratelimit-limit', 'unknown')
+                # Calculate reset time
+                reset_time_str = "unknown"
+                if reset_timestamp:
+                    try:
+                        reset_time = datetime.fromtimestamp(int(reset_timestamp), tz=timezone.utc)
+                        reset_time_str = reset_time.strftime("%Y-%m-%d %H:%M:%S UTC")
+                        # Calculate time until reset
+                        now = datetime.now(timezone.utc)
+                        time_until_reset = reset_time - now
+                        minutes_until_reset = int(time_until_reset.total_seconds() / 60)
+                        if minutes_until_reset > 0:
+                            reset_time_str += f" (in {minutes_until_reset} minutes)"
+                    except (ValueError, TypeError):
+                        pass
+                # Display comprehensive rate limit information
+                st.error("🚫 **GitHub API Rate Limit Exceeded**")
+                with st.expander("📊 Rate Limit Details", expanded=True):
+                    col1, col2 = st.columns(2)
+                    with col1:
+                        st.metric("Requests Remaining", remaining)
+                        st.metric("Rate Limit", limit)
+                    with col2:
+                        st.metric("Reset Time", reset_time_str)
+                        if reset_timestamp:
+                            try:
+                                reset_time = datetime.fromtimestamp(int(reset_timestamp), tz=timezone.utc)
+                                now = datetime.now(timezone.utc)
+                                time_until_reset = reset_time - now
+                                if time_until_reset.total_seconds() > 0:
+                                    st.metric("Time Until Reset", f"{int(time_until_reset.total_seconds() / 60)} minutes")
+                            except (ValueError, TypeError):
+                                pass
+                return True  # Indicates rate limit error was handled
+        return False  # Not a rate limit error
+    def _make_github_request(self, url, description="GitHub API request", silent_404=False):
+        """Make a GitHub API request with rate limit handling"""
+        try:
+            # Add GitHub token if available
+            headers = {}
+            github_token = os.environ.get('GITHUB_TOKEN')
+            if github_token:
+                headers['Authorization'] = f'token {github_token}'
+            response = requests.get(url, headers=headers)
+            # Check for rate limit before raising for status
+            if self._handle_rate_limit_error(response):
+                return None  # Rate limit handled, return None
+            # Handle 404 errors silently if requested (for optional directories)
+            if response.status_code == 404 and silent_404:
+                return None
+            response.raise_for_status()
+            return response
+        except requests.exceptions.RequestException as e:
+            if hasattr(e, 'response') and e.response is not None:
+                # Handle 404 silently if requested
+                if e.response.status_code == 404 and silent_404:
+                    return None
+                if not self._handle_rate_limit_error(e.response):
+                    st.warning(f"Request failed for {description}: {str(e)}")
+            else:
+                st.warning(f"Network error for {description}: {str(e)}")
+            return None
+    def _model_name_to_folder_name(self, model_name):
+        """Convert model name from config format to folder format
+        Examples:
+        - 'PlanTL-GOB-ES/roberta-base-ca' -> 'roberta-base-ca'
+        - 'microsoft/deberta-v3-base' -> 'deberta-v3-base'
+        - 'bert-base-uncased' -> 'bert-base-uncased' (no change)
+        """
+        if '/' in model_name:
+            return model_name.split('/')[-1]
+        return model_name
+    def _get_available_languages_local(self):
+        """Get available languages from local cache"""
+        if not self.base_path.exists():
+            return []
+        result_dirs = [d.name for d in self.base_path.iterdir()
+                      if d.is_dir() and d.name.startswith("results_")]
+        languages = [d.replace("results_", "") for d in result_dirs]
+        return sorted(languages)
+    def _ensure_specific_data_downloaded(self, language, config, model):
+        """Download specific files for a language/config/model combination if not cached"""
+        folder_model_name = self._model_name_to_folder_name(model)
+        base_path = f"results_{language}/{config}/{model}"
+        local_path = self.base_path / f"results_{language}" / config / folder_model_name
+        # Check if we already have this specific combination cached
+        if local_path.exists() and self.use_cache:
+            # Quick check if essential files exist
+            metadata_path = local_path / "metadata" / "metadata.json"
+            if metadata_path.exists():
+                return  # Already have the data
+        with st.spinner(f"📥 Downloading data for {language.upper()}/{config}/{model}..."):
+            try:
+                self._download_specific_model_data(language, config, model)
+                st.success(f"✅ Downloaded {language.upper()}/{model} data!")
+            except Exception as e:
+                st.error(f"❌ Failed to download specific data: {str(e)}")
+                raise
+    def _download_specific_model_data(self, language, config, model):
+        """Download only the specific model data needed"""
+        folder_model_name = self._model_name_to_folder_name(model)
+        base_remote_path = f"results_{language}/{config}/{folder_model_name}"
+        # List of essential directories to download for a model
+        essential_dirs = ["metadata", "uas_scores", "number_of_heads_matching", "variability", "figures"]
+        for dir_name in essential_dirs:
+            remote_path = f"{base_remote_path}/{dir_name}"
+            try:
+                self._download_directory_targeted(dir_name, remote_path, language, config, model)
+            except Exception as e:
+                st.warning(f"Could not download {dir_name} for {model}: {str(e)}")
+    def _download_directory_targeted(self, dir_name, remote_path, language, config, model):
+        """Download a specific directory for a model"""
+        api_url = f"https://api.github.com/repos/{self.github_repo}/contents/{remote_path}"
+        response = self._make_github_request(api_url, f"directory {dir_name}", silent_404=True)
+        if response is None:
+            return  # Rate limit, 404, or other error
+        try:
+            contents = response.json()
+            # Create local directory
+            folder_model_name = self._model_name_to_folder_name(model)
+            local_dir = self.base_path / f"results_{language}" / config / folder_model_name / dir_name
+            local_dir.mkdir(parents=True, exist_ok=True)
+            # Download all files in this directory
+            for item in contents:
+                if item['type'] == 'file':
+                    self._download_file(item, local_dir)
+        except Exception as e:
+            st.warning(f"Could not download directory {dir_name}: {str(e)}")
+    def _get_available_configs_from_github(self, language):
+        """Get available configurations for a language from GitHub"""
+        api_url = f"https://api.github.com/repos/{self.github_repo}/contents/results_{language}"
+        response = self._make_github_request(api_url, f"configurations for {language}")
+        if response is None:
+            return []
+        try:
+            contents = response.json()
+            configs = [item['name'] for item in contents if item['type'] == 'dir']
+            return sorted(configs)
+        except Exception as e:
+            st.warning(f"Could not parse configurations for {language}: {str(e)}")
+            return []
+    def _discover_config_parameters(self, language=None):
+        """Dynamically discover configuration parameters from available configs
+        Now uses the first language-model pair from experiment config to discover
+        valid configuration parameters, since configurations are consistent across
+        all language-model combinations.
+        """
+        try:
+            # Get the first language-model pair from experiment config
+            if language is None:
+                language, model = self._get_first_language_model_pair()
+                if language is None or model is None:
+                    st.warning("Could not find any language-model pairs in experiment config")
+                    return {}
+                st.info(f"🔍 Discovering configurations using {language.upper()}/{model} (configurations are consistent across all languages and models)")
+            else:
+                # If language is specified, try to get first model for that language
+                models = self._get_models_for_language(language)
+                if not models:
+                    st.warning(f"No models found for language {language}")
+                    return {}
+                model = models[0]
+            available_configs = self._get_experimental_configs(language)
+            if not available_configs:
+                return {}
+            # Parse all configurations to extract unique parameters
+            all_params = set()
+            param_values = {}
+            for config in available_configs:
+                params = self._parse_config_params(config)
+                for param, value in params.items():
+                    all_params.add(param)
+                    if param not in param_values:
+                        param_values[param] = set()
+                    param_values[param].add(value)
+            # Convert sets to sorted lists for consistent UI
+            return {param: sorted(list(values)) for param, values in param_values.items()}
+        except Exception as e:
+            st.warning(f"Could not discover configuration parameters: {str(e)}")
+            return {}
+    def _build_config_from_params(self, param_dict):
+        """Build configuration string from parameter dictionary"""
+        config_parts = []
+        for param, value in sorted(param_dict.items()):
+            config_parts.append(f"{param}_{value}")
+        return "+".join(config_parts)
+    def _find_best_matching_config(self, language, target_params):
+        """Find the configuration that best matches the target parameters"""
+        available_configs = self._get_experimental_configs(language)
+        best_match = None
+        best_score = -1
+        for config in available_configs:
+            config_params = self._parse_config_params(config)
+            # Calculate match score
+            score = 0
+            total_params = len(target_params)
+            for param, target_value in target_params.items():
+                if param in config_params and config_params[param] == target_value:
+                    score += 1
+            # Prefer configs with exact parameter count
+            if len(config_params) == total_params:
+                score += 0.5
+            if score > best_score:
+                best_score = score
+                best_match = config
+        return best_match, best_score == len(target_params)
+    def _download_repository(self):
+        """Download repository data from GitHub"""
+        st.info("🔄 Downloading results data from GitHub... This may take a moment.")
+        # GitHub API to get the repository contents
+        api_url = f"https://api.github.com/repos/{self.github_repo}/contents"
+        try:
+            # Get list of result directories
+            response = requests.get(api_url)
+            response.raise_for_status()
+            contents = response.json()
+            result_dirs = [item['name'] for item in contents
+                          if item['type'] == 'dir' and item['name'].startswith('results_')]
+            st.write(f"Found {len(result_dirs)} result directories: {', '.join(result_dirs)}")
+            # Download each result directory
+            progress_bar = st.progress(0)
+            for i, result_dir in enumerate(result_dirs):
+                st.write(f"Downloading {result_dir}...")
+                self._download_directory(result_dir)
+                progress_bar.progress((i + 1) / len(result_dirs))
+            st.success("✅ Download completed!")
+        except Exception as e:
+            st.error(f"❌ Error downloading repository: {str(e)}")
+            st.error("Please check the repository URL and your internet connection.")
+            raise
+    def _parse_config_params(self, config_name):
+        """Parse configuration parameters into a dictionary"""
+        parts = config_name.split('+')
+        params = {}
+        for part in parts:
+            if '_' in part:
+                key_parts = part.split('_')
+                if len(key_parts) >= 2:
+                    key = '_'.join(key_parts[:-1])
+                    value = key_parts[-1]
+                    params[key] = value == 'True'
+        return params
+    def _download_directory(self, dir_name, path=""):
+        """Recursively download a directory from GitHub"""
+        url = f"https://api.github.com/repos/{self.github_repo}/contents/{path}{dir_name}"
+        try:
+            response = requests.get(url)
+            response.raise_for_status()
+            contents = response.json()
+            local_dir = self.cache_dir / path / dir_name
+            local_dir.mkdir(parents=True, exist_ok=True)
+            for item in contents:
+                if item['type'] == 'file':
+                    self._download_file(item, local_dir)
+                elif item['type'] == 'dir':
+                    self._download_directory(item['name'], f"{path}{dir_name}/")
+        except Exception as e:
+            st.warning(f"Could not download {dir_name}: {str(e)}")
+    def _download_file(self, file_info, local_dir):
+        """Download a single file from GitHub"""
+        try:
+            # Use the rate limit handling for file downloads too
+            file_response = self._make_github_request(file_info['download_url'], f"file {file_info['name']}")
+            if file_response is None:
+                return  # Rate limit or other error
+            # Save to local cache
+            local_file = local_dir / file_info['name']
+            # Handle different file types
+            if file_info['name'].endswith(('.csv', '.json')):
+                with open(local_file, 'w', encoding='utf-8') as f:
+                    f.write(file_response.text)
+            else:  # Binary files like PDFs
+                with open(local_file, 'wb') as f:
+                    f.write(file_response.content)
+        except Exception as e:
+            st.warning(f"Could not download file {file_info['name']}: {str(e)}")
+    def _get_available_languages(self):
+        """Get all available language directories"""
+        return self.available_languages
+    def _get_experimental_configs(self, language):
+        """Get all experimental configurations for a language from GitHub API"""
+        api_url = f"https://api.github.com/repos/{self.github_repo}/contents/results_{language}"
+        response = self._make_github_request(api_url, f"experimental configs for {language}")
+        if response is not None:
+            try:
+                contents = response.json()
+                configs = [item['name'] for item in contents if item['type'] == 'dir']
+                return sorted(configs)
+            except Exception as e:
+                st.warning(f"Could not parse experimental configs for {language}: {str(e)}")
+        # Fallback to local cache if available
+        lang_dir = self.base_path / f"results_{language}"
+        if lang_dir.exists():
+            configs = [d.name for d in lang_dir.iterdir() if d.is_dir()]
+            return sorted(configs)
+        return []
+    def _find_matching_config(self, language, target_params):
+        """Find the first matching configuration from target parameters"""
+        return self._find_best_matching_config(language, target_params)
+    def _get_models(self, language, config):
+        """Get all models for a language and configuration from experiment config"""
+        # First try to get models from experiment config
+        models = self._get_models_for_language(language)
+        if models:
+            return models
+        # Fallback to GitHub API directory listing if config unavailable
+        api_url = f"https://api.github.com/repos/{self.github_repo}/contents/results_{language}/{config}"
+        response = self._make_github_request(api_url, f"models for {language}/{config}")
+        if response is not None:
+            try:
+                contents = response.json()
+                models = [item['name'] for item in contents if item['type'] == 'dir']
+                return sorted(models)
+            except Exception as e:
+                st.warning(f"Could not parse models for {language}/{config}: {str(e)}")
+        # Final fallback to local cache if available
+        config_dir = self.base_path / f"results_{language}" / config
+        if config_dir.exists():
+            models = [d.name for d in config_dir.iterdir() if d.is_dir()]
+            return sorted(models)
+        return []
+    def _parse_config_name(self, config_name):
+        """Parse configuration name into readable format"""
+        parts = config_name.split('+')
+        config_dict = {}
+        for part in parts:
+            if '_' in part:
+                key, value = part.split('_', 1)
+                config_dict[key.replace('_', ' ').title()] = value
+        return config_dict
+    def _load_metadata(self, language, config, model):
+        """Load metadata for a specific combination"""
+        # Ensure we have the specific data downloaded
+        self._ensure_specific_data_downloaded(language, config, model)
+        folder_model_name = self._model_name_to_folder_name(model)
+        metadata_path = self.base_path / f"results_{language}" / config / folder_model_name / "metadata" / "metadata.json"
+        if metadata_path.exists():
+            with open(metadata_path, 'r') as f:
+                return json.load(f)
+        return None
+    def _load_uas_scores(self, language, config, model):
+        """Load UAS scores data"""
+        # Ensure we have the specific data downloaded
+        self._ensure_specific_data_downloaded(language, config, model)
+        folder_model_name = self._model_name_to_folder_name(model)
+        uas_dir = self.base_path / f"results_{language}" / config / folder_model_name / "uas_scores"
+        if not uas_dir.exists():
+            return {}
+        uas_data = {}
+        csv_files = list(uas_dir.glob("uas_*.csv"))
+        if csv_files:
+            with st.spinner("Loading UAS scores data..."):
+                progress_bar = st.progress(0)
+                status_text = st.empty()
+                for i, csv_file in enumerate(csv_files):
+                    relation = csv_file.stem.replace("uas_", "")
+                    status_text.text(f"Loading UAS data: {relation}")
+                    try:
+                        df = pd.read_csv(csv_file, index_col=0)
+                        uas_data[relation] = df
+                    except Exception as e:
+                        st.warning(f"Could not load {csv_file.name}: {e}")
+                    progress_bar.progress((i + 1) / len(csv_files))
+                    time.sleep(0.01)  # Small delay for smoother progress
+                progress_bar.empty()
+                status_text.empty()
+        return uas_data
+    def _load_head_matching(self, language, config, model):
+        """Load head matching data"""
+        # Ensure we have the specific data downloaded
+        self._ensure_specific_data_downloaded(language, config, model)
+        folder_model_name = self._model_name_to_folder_name(model)
+        heads_dir = self.base_path / f"results_{language}" / config / folder_model_name / "number_of_heads_matching"
+        if not heads_dir.exists():
+            return {}
+        heads_data = {}
+        csv_files = list(heads_dir.glob("heads_matching_*.csv"))
+        if csv_files:
+            with st.spinner("Loading head matching data..."):
+                progress_bar = st.progress(0)
+                status_text = st.empty()
+                for i, csv_file in enumerate(csv_files):
+                    relation = csv_file.stem.replace("heads_matching_", "").replace(f"_{folder_model_name}", "")
+                    status_text.text(f"Loading head matching data: {relation}")
+                    try:
+                        df = pd.read_csv(csv_file, index_col=0)
+                        heads_data[relation] = df
+                    except Exception as e:
+                        st.warning(f"Could not load {csv_file.name}: {e}")
+                    progress_bar.progress((i + 1) / len(csv_files))
+                    time.sleep(0.01)  # Small delay for smoother progress
+                progress_bar.empty()
+                status_text.empty()
+        return heads_data
+    def _load_variability(self, language, config, model):
+        """Load variability data"""
+        # Ensure we have the specific data downloaded
+        self._ensure_specific_data_downloaded(language, config, model)
+        folder_model_name = self._model_name_to_folder_name(model)
+        var_path = self.base_path / f"results_{language}" / config / folder_model_name / "variability" / "variability_list.csv"
+        if var_path.exists():
+            try:
+                return pd.read_csv(var_path, index_col=0)
+            except Exception as e:
+                st.warning(f"Could not load variability data: {e}")
+        return None
+    def _get_available_figures(self, language, config, model):
+        """Get all available figure files"""
+        # Ensure we have the specific data downloaded
+        self._ensure_specific_data_downloaded(language, config, model)
+        folder_model_name = self._model_name_to_folder_name(model)
+        figures_dir = self.base_path / f"results_{language}" / config / folder_model_name / "figures"
         if not figures_dir.exists():
             return []
         return list(figures_dir.glob("*.pdf"))