Spaces:
Sleeping
Sleeping
Update streamlit_app.py
Browse files- streamlit_app.py +131 -8
streamlit_app.py
CHANGED
|
@@ -15,6 +15,7 @@ import tempfile
|
|
| 15 |
import shutil
|
| 16 |
import time
|
| 17 |
from datetime import datetime, timezone
|
|
|
|
| 18 |
|
| 19 |
# Set page config
|
| 20 |
st.set_page_config(
|
|
@@ -68,8 +69,63 @@ class AttentionResultsExplorer:
|
|
| 68 |
self.available_languages = self._get_available_languages_from_github()
|
| 69 |
self.relation_types = None
|
| 70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
def _get_available_languages_from_github(self):
|
| 72 |
-
"""Get available languages from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
api_url = f"https://api.github.com/repos/{self.github_repo}/contents"
|
| 74 |
|
| 75 |
response = self._make_github_request(api_url, "available languages")
|
|
@@ -89,6 +145,56 @@ class AttentionResultsExplorer:
|
|
| 89 |
st.warning(f"Could not parse language list from GitHub: {str(e)}")
|
| 90 |
# Fallback to local cache if available
|
| 91 |
return self._get_available_languages_local()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
def _get_available_languages_local(self):
|
| 94 |
"""Get available languages from local cache"""
|
|
@@ -176,15 +282,25 @@ class AttentionResultsExplorer:
|
|
| 176 |
def _discover_config_parameters(self, language=None):
|
| 177 |
"""Dynamically discover configuration parameters from available configs
|
| 178 |
|
| 179 |
-
|
| 180 |
-
configurations are consistent across
|
|
|
|
| 181 |
"""
|
| 182 |
try:
|
| 183 |
-
#
|
| 184 |
if language is None:
|
| 185 |
-
|
|
|
|
|
|
|
| 186 |
return {}
|
| 187 |
-
language
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
|
| 189 |
available_configs = self._get_experimental_configs(language)
|
| 190 |
if not available_configs:
|
|
@@ -361,7 +477,14 @@ class AttentionResultsExplorer:
|
|
| 361 |
return self._find_best_matching_config(language, target_params)
|
| 362 |
|
| 363 |
def _get_models(self, language, config):
|
| 364 |
-
"""Get all models for a language and configuration from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
api_url = f"https://api.github.com/repos/{self.github_repo}/contents/results_{language}/{config}"
|
| 366 |
response = self._make_github_request(api_url, f"models for {language}/{config}")
|
| 367 |
|
|
@@ -373,7 +496,7 @@ class AttentionResultsExplorer:
|
|
| 373 |
except Exception as e:
|
| 374 |
st.warning(f"Could not parse models for {language}/{config}: {str(e)}")
|
| 375 |
|
| 376 |
-
#
|
| 377 |
config_dir = self.base_path / f"results_{language}" / config
|
| 378 |
if config_dir.exists():
|
| 379 |
models = [d.name for d in config_dir.iterdir() if d.is_dir()]
|
|
|
|
| 15 |
import shutil
|
| 16 |
import time
|
| 17 |
from datetime import datetime, timezone
|
| 18 |
+
import yaml
|
| 19 |
|
| 20 |
# Set page config
|
| 21 |
st.set_page_config(
|
|
|
|
| 69 |
self.available_languages = self._get_available_languages_from_github()
|
| 70 |
self.relation_types = None
|
| 71 |
|
| 72 |
+
def _download_experiment_config(self):
|
| 73 |
+
"""Download and parse the experiment_config.yaml file from GitHub"""
|
| 74 |
+
config_path = self.cache_dir / "experiment_config.yaml"
|
| 75 |
+
|
| 76 |
+
# Check if we have a cached version and use_cache is enabled
|
| 77 |
+
if config_path.exists() and self.use_cache:
|
| 78 |
+
try:
|
| 79 |
+
with open(config_path, 'r', encoding='utf-8') as f:
|
| 80 |
+
return yaml.safe_load(f)
|
| 81 |
+
except Exception as e:
|
| 82 |
+
st.warning(f"Could not load cached config, downloading fresh: {str(e)}")
|
| 83 |
+
|
| 84 |
+
# Download from GitHub
|
| 85 |
+
config_url = f"https://raw.githubusercontent.com/{self.github_repo}/refs/heads/master/experiment_config.yaml"
|
| 86 |
+
response = self._make_github_request(config_url, "experiment configuration file")
|
| 87 |
+
|
| 88 |
+
if response is None:
|
| 89 |
+
# Try to load from cache as fallback
|
| 90 |
+
if config_path.exists():
|
| 91 |
+
try:
|
| 92 |
+
with open(config_path, 'r', encoding='utf-8') as f:
|
| 93 |
+
return yaml.safe_load(f)
|
| 94 |
+
except Exception:
|
| 95 |
+
pass
|
| 96 |
+
return None
|
| 97 |
+
|
| 98 |
+
try:
|
| 99 |
+
config_content = response.text
|
| 100 |
+
# Save to cache
|
| 101 |
+
with open(config_path, 'w', encoding='utf-8') as f:
|
| 102 |
+
f.write(config_content)
|
| 103 |
+
|
| 104 |
+
# Parse and return
|
| 105 |
+
return yaml.safe_load(StringIO(config_content))
|
| 106 |
+
|
| 107 |
+
except Exception as e:
|
| 108 |
+
st.error(f"Could not parse experiment configuration: {str(e)}")
|
| 109 |
+
return None
|
| 110 |
+
|
| 111 |
def _get_available_languages_from_github(self):
|
| 112 |
+
"""Get available languages from experiment config file"""
|
| 113 |
+
config = self._download_experiment_config()
|
| 114 |
+
|
| 115 |
+
if config is None:
|
| 116 |
+
# Fallback to directory-based discovery
|
| 117 |
+
return self._get_available_languages_from_directories()
|
| 118 |
+
|
| 119 |
+
try:
|
| 120 |
+
languages = list(config.get('languages', {}).keys())
|
| 121 |
+
return sorted(languages)
|
| 122 |
+
except Exception as e:
|
| 123 |
+
st.warning(f"Could not parse languages from config: {str(e)}")
|
| 124 |
+
# Fallback to directory-based discovery
|
| 125 |
+
return self._get_available_languages_from_directories()
|
| 126 |
+
|
| 127 |
+
def _get_available_languages_from_directories(self):
|
| 128 |
+
"""Fallback method: Get available languages from GitHub API directory listing"""
|
| 129 |
api_url = f"https://api.github.com/repos/{self.github_repo}/contents"
|
| 130 |
|
| 131 |
response = self._make_github_request(api_url, "available languages")
|
|
|
|
| 145 |
st.warning(f"Could not parse language list from GitHub: {str(e)}")
|
| 146 |
# Fallback to local cache if available
|
| 147 |
return self._get_available_languages_local()
|
| 148 |
+
|
| 149 |
+
def _get_models_for_language(self, language):
|
| 150 |
+
"""Get all models for a specific language from the experiment config"""
|
| 151 |
+
config = self._download_experiment_config()
|
| 152 |
+
|
| 153 |
+
if config is None:
|
| 154 |
+
return []
|
| 155 |
+
|
| 156 |
+
try:
|
| 157 |
+
# Get language-specific models
|
| 158 |
+
language_models = config.get('languages', {}).get(language, {}).get('models', [])
|
| 159 |
+
|
| 160 |
+
# Get multilingual models
|
| 161 |
+
multilingual_models = config.get('multilingual_models', [])
|
| 162 |
+
|
| 163 |
+
# Combine both lists
|
| 164 |
+
all_models = language_models + multilingual_models
|
| 165 |
+
return sorted(list(set(all_models))) # Remove duplicates and sort
|
| 166 |
+
|
| 167 |
+
except Exception as e:
|
| 168 |
+
st.warning(f"Could not get models for {language}: {str(e)}")
|
| 169 |
+
return []
|
| 170 |
+
|
| 171 |
+
def _get_first_language_model_pair(self):
|
| 172 |
+
"""Get the first language-model pair from the experiment config for configuration discovery"""
|
| 173 |
+
config = self._download_experiment_config()
|
| 174 |
+
|
| 175 |
+
if config is None:
|
| 176 |
+
return None, None
|
| 177 |
+
|
| 178 |
+
try:
|
| 179 |
+
languages = config.get('languages', {})
|
| 180 |
+
multilingual_models = config.get('multilingual_models', [])
|
| 181 |
+
|
| 182 |
+
# Find first language with models
|
| 183 |
+
for language, lang_config in languages.items():
|
| 184 |
+
models = lang_config.get('models', [])
|
| 185 |
+
if models:
|
| 186 |
+
return language, models[0]
|
| 187 |
+
|
| 188 |
+
# If no language-specific models, use first language with first multilingual model
|
| 189 |
+
if multilingual_models and languages:
|
| 190 |
+
first_language = list(languages.keys())[0]
|
| 191 |
+
return first_language, multilingual_models[0]
|
| 192 |
+
|
| 193 |
+
return None, None
|
| 194 |
+
|
| 195 |
+
except Exception as e:
|
| 196 |
+
st.warning(f"Could not find language-model pair: {str(e)}")
|
| 197 |
+
return None, None
|
| 198 |
|
| 199 |
def _get_available_languages_local(self):
|
| 200 |
"""Get available languages from local cache"""
|
|
|
|
| 282 |
def _discover_config_parameters(self, language=None):
|
| 283 |
"""Dynamically discover configuration parameters from available configs
|
| 284 |
|
| 285 |
+
Now uses the first language-model pair from experiment config to discover
|
| 286 |
+
valid configuration parameters, since configurations are consistent across
|
| 287 |
+
all language-model combinations.
|
| 288 |
"""
|
| 289 |
try:
|
| 290 |
+
# Get the first language-model pair from experiment config
|
| 291 |
if language is None:
|
| 292 |
+
language, model = self._get_first_language_model_pair()
|
| 293 |
+
if language is None or model is None:
|
| 294 |
+
st.warning("Could not find any language-model pairs in experiment config")
|
| 295 |
return {}
|
| 296 |
+
st.info(f"🔍 Discovering configurations using {language.upper()}/{model} (configurations are consistent across all languages and models)")
|
| 297 |
+
else:
|
| 298 |
+
# If language is specified, try to get first model for that language
|
| 299 |
+
models = self._get_models_for_language(language)
|
| 300 |
+
if not models:
|
| 301 |
+
st.warning(f"No models found for language {language}")
|
| 302 |
+
return {}
|
| 303 |
+
model = models[0]
|
| 304 |
|
| 305 |
available_configs = self._get_experimental_configs(language)
|
| 306 |
if not available_configs:
|
|
|
|
| 477 |
return self._find_best_matching_config(language, target_params)
|
| 478 |
|
| 479 |
def _get_models(self, language, config):
|
| 480 |
+
"""Get all models for a language and configuration from experiment config"""
|
| 481 |
+
# First try to get models from experiment config
|
| 482 |
+
models = self._get_models_for_language(language)
|
| 483 |
+
|
| 484 |
+
if models:
|
| 485 |
+
return models
|
| 486 |
+
|
| 487 |
+
# Fallback to GitHub API directory listing if config unavailable
|
| 488 |
api_url = f"https://api.github.com/repos/{self.github_repo}/contents/results_{language}/{config}"
|
| 489 |
response = self._make_github_request(api_url, f"models for {language}/{config}")
|
| 490 |
|
|
|
|
| 496 |
except Exception as e:
|
| 497 |
st.warning(f"Could not parse models for {language}/{config}: {str(e)}")
|
| 498 |
|
| 499 |
+
# Final fallback to local cache if available
|
| 500 |
config_dir = self.base_path / f"results_{language}" / config
|
| 501 |
if config_dir.exists():
|
| 502 |
models = [d.name for d in config_dir.iterdir() if d.is_dir()]
|