Spaces:
Running
Running
Update streamlit_app.py
Browse files- streamlit_app.py +546 -10
streamlit_app.py
CHANGED
|
@@ -82,7 +82,7 @@ class AttentionResultsExplorer:
|
|
| 82 |
st.warning(f"Could not load cached config, downloading fresh: {str(e)}")
|
| 83 |
|
| 84 |
# Download from GitHub
|
| 85 |
-
config_url = f"https://raw.githubusercontent.com/{self.github_repo}/
|
| 86 |
response = self._make_github_request(config_url, "experiment configuration file")
|
| 87 |
|
| 88 |
if response is None:
|
|
@@ -207,8 +207,9 @@ class AttentionResultsExplorer:
|
|
| 207 |
|
| 208 |
def _ensure_specific_data_downloaded(self, language, config, model):
|
| 209 |
"""Download specific files for a language/config/model combination if not cached"""
|
|
|
|
| 210 |
base_path = f"results_{language}/{config}/{model}"
|
| 211 |
-
local_path = self.base_path / f"results_{language}" / config /
|
| 212 |
|
| 213 |
# Check if we already have this specific combination cached
|
| 214 |
if local_path.exists() and self.use_cache:
|
|
@@ -227,7 +228,8 @@ class AttentionResultsExplorer:
|
|
| 227 |
|
| 228 |
def _download_specific_model_data(self, language, config, model):
|
| 229 |
"""Download only the specific model data needed"""
|
| 230 |
-
|
|
|
|
| 231 |
|
| 232 |
# List of essential directories to download for a model
|
| 233 |
essential_dirs = ["metadata", "uas_scores", "number_of_heads_matching", "variability", "figures"]
|
|
@@ -251,7 +253,8 @@ class AttentionResultsExplorer:
|
|
| 251 |
contents = response.json()
|
| 252 |
|
| 253 |
# Create local directory
|
| 254 |
-
|
|
|
|
| 255 |
local_dir.mkdir(parents=True, exist_ok=True)
|
| 256 |
|
| 257 |
# Download all files in this directory
|
|
@@ -518,7 +521,8 @@ class AttentionResultsExplorer:
|
|
| 518 |
# Ensure we have the specific data downloaded
|
| 519 |
self._ensure_specific_data_downloaded(language, config, model)
|
| 520 |
|
| 521 |
-
|
|
|
|
| 522 |
if metadata_path.exists():
|
| 523 |
with open(metadata_path, 'r') as f:
|
| 524 |
return json.load(f)
|
|
@@ -529,7 +533,8 @@ class AttentionResultsExplorer:
|
|
| 529 |
# Ensure we have the specific data downloaded
|
| 530 |
self._ensure_specific_data_downloaded(language, config, model)
|
| 531 |
|
| 532 |
-
|
|
|
|
| 533 |
if not uas_dir.exists():
|
| 534 |
return {}
|
| 535 |
|
|
@@ -564,7 +569,8 @@ class AttentionResultsExplorer:
|
|
| 564 |
# Ensure we have the specific data downloaded
|
| 565 |
self._ensure_specific_data_downloaded(language, config, model)
|
| 566 |
|
| 567 |
-
|
|
|
|
| 568 |
if not heads_dir.exists():
|
| 569 |
return {}
|
| 570 |
|
|
@@ -577,7 +583,7 @@ class AttentionResultsExplorer:
|
|
| 577 |
status_text = st.empty()
|
| 578 |
|
| 579 |
for i, csv_file in enumerate(csv_files):
|
| 580 |
-
relation = csv_file.stem.replace("heads_matching_", "").replace(f"_{
|
| 581 |
status_text.text(f"Loading head matching data: {relation}")
|
| 582 |
|
| 583 |
try:
|
|
@@ -599,7 +605,8 @@ class AttentionResultsExplorer:
|
|
| 599 |
# Ensure we have the specific data downloaded
|
| 600 |
self._ensure_specific_data_downloaded(language, config, model)
|
| 601 |
|
| 602 |
-
|
|
|
|
| 603 |
if var_path.exists():
|
| 604 |
try:
|
| 605 |
return pd.read_csv(var_path, index_col=0)
|
|
@@ -612,7 +619,536 @@ class AttentionResultsExplorer:
|
|
| 612 |
# Ensure we have the specific data downloaded
|
| 613 |
self._ensure_specific_data_downloaded(language, config, model)
|
| 614 |
|
| 615 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 616 |
if not figures_dir.exists():
|
| 617 |
return []
|
| 618 |
return list(figures_dir.glob("*.pdf"))
|
|
|
|
| 82 |
st.warning(f"Could not load cached config, downloading fresh: {str(e)}")
|
| 83 |
|
| 84 |
# Download from GitHub
|
| 85 |
+
config_url = f"https://raw.githubusercontent.com/{self.github_repo}/master/experiment_config.yaml"
|
| 86 |
response = self._make_github_request(config_url, "experiment configuration file")
|
| 87 |
|
| 88 |
if response is None:
|
|
|
|
| 207 |
|
| 208 |
def _ensure_specific_data_downloaded(self, language, config, model):
|
| 209 |
"""Download specific files for a language/config/model combination if not cached"""
|
| 210 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
| 211 |
base_path = f"results_{language}/{config}/{model}"
|
| 212 |
+
local_path = self.base_path / f"results_{language}" / config / folder_model_name
|
| 213 |
|
| 214 |
# Check if we already have this specific combination cached
|
| 215 |
if local_path.exists() and self.use_cache:
|
|
|
|
| 228 |
|
| 229 |
def _download_specific_model_data(self, language, config, model):
|
| 230 |
"""Download only the specific model data needed"""
|
| 231 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
| 232 |
+
base_remote_path = f"results_{language}/{config}/{folder_model_name}"
|
| 233 |
|
| 234 |
# List of essential directories to download for a model
|
| 235 |
essential_dirs = ["metadata", "uas_scores", "number_of_heads_matching", "variability", "figures"]
|
|
|
|
| 253 |
contents = response.json()
|
| 254 |
|
| 255 |
# Create local directory
|
| 256 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
| 257 |
+
local_dir = self.base_path / f"results_{language}" / config / folder_model_name / dir_name
|
| 258 |
local_dir.mkdir(parents=True, exist_ok=True)
|
| 259 |
|
| 260 |
# Download all files in this directory
|
|
|
|
| 521 |
# Ensure we have the specific data downloaded
|
| 522 |
self._ensure_specific_data_downloaded(language, config, model)
|
| 523 |
|
| 524 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
| 525 |
+
metadata_path = self.base_path / f"results_{language}" / config / folder_model_name / "metadata" / "metadata.json"
|
| 526 |
if metadata_path.exists():
|
| 527 |
with open(metadata_path, 'r') as f:
|
| 528 |
return json.load(f)
|
|
|
|
| 533 |
# Ensure we have the specific data downloaded
|
| 534 |
self._ensure_specific_data_downloaded(language, config, model)
|
| 535 |
|
| 536 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
| 537 |
+
uas_dir = self.base_path / f"results_{language}" / config / folder_model_name / "uas_scores"
|
| 538 |
if not uas_dir.exists():
|
| 539 |
return {}
|
| 540 |
|
|
|
|
| 569 |
# Ensure we have the specific data downloaded
|
| 570 |
self._ensure_specific_data_downloaded(language, config, model)
|
| 571 |
|
| 572 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
| 573 |
+
heads_dir = self.base_path / f"results_{language}" / config / folder_model_name / "number_of_heads_matching"
|
| 574 |
if not heads_dir.exists():
|
| 575 |
return {}
|
| 576 |
|
|
|
|
| 583 |
status_text = st.empty()
|
| 584 |
|
| 585 |
for i, csv_file in enumerate(csv_files):
|
| 586 |
+
relation = csv_file.stem.replace("heads_matching_", "").replace(f"_{folder_model_name}", "")
|
| 587 |
status_text.text(f"Loading head matching data: {relation}")
|
| 588 |
|
| 589 |
try:
|
|
|
|
| 605 |
# Ensure we have the specific data downloaded
|
| 606 |
self._ensure_specific_data_downloaded(language, config, model)
|
| 607 |
|
| 608 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
| 609 |
+
var_path = self.base_path / f"results_{language}" / config / folder_model_name / "variability" / "variability_list.csv"
|
| 610 |
if var_path.exists():
|
| 611 |
try:
|
| 612 |
return pd.read_csv(var_path, index_col=0)
|
|
|
|
| 619 |
# Ensure we have the specific data downloaded
|
| 620 |
self._ensure_specific_data_downloaded(language, config, model)
|
| 621 |
|
| 622 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
| 623 |
+
figures_dir = self.base_path / f"results_{language}" / config / folder_model_name / "figures"
|
| 624 |
+
if not figures_dir.exists():
|
| 625 |
+
return []
|
| 626 |
+
return list(figures_dir.glob("*.pdf"))
|
| 627 |
+
|
| 628 |
+
def _handle_rate_limit_error(self, response):
|
| 629 |
+
"""Handle GitHub API rate limit errors with detailed user feedback"""
|
| 630 |
+
if response.status_code in (403, 429):
|
| 631 |
+
# Check if it's a rate limit error
|
| 632 |
+
if 'rate limit' in response.text.lower() or 'api rate limit' in response.text.lower():
|
| 633 |
+
# Extract rate limit information from headers
|
| 634 |
+
remaining = response.headers.get('x-ratelimit-remaining', 'unknown')
|
| 635 |
+
reset_timestamp = response.headers.get('x-ratelimit-reset')
|
| 636 |
+
limit = response.headers.get('x-ratelimit-limit', 'unknown')
|
| 637 |
+
|
| 638 |
+
# Calculate reset time
|
| 639 |
+
reset_time_str = "unknown"
|
| 640 |
+
if reset_timestamp:
|
| 641 |
+
try:
|
| 642 |
+
reset_time = datetime.fromtimestamp(int(reset_timestamp), tz=timezone.utc)
|
| 643 |
+
reset_time_str = reset_time.strftime("%Y-%m-%d %H:%M:%S UTC")
|
| 644 |
+
|
| 645 |
+
# Calculate time until reset
|
| 646 |
+
now = datetime.now(timezone.utc)
|
| 647 |
+
time_until_reset = reset_time - now
|
| 648 |
+
minutes_until_reset = int(time_until_reset.total_seconds() / 60)
|
| 649 |
+
|
| 650 |
+
if minutes_until_reset > 0:
|
| 651 |
+
reset_time_str += f" (in {minutes_until_reset} minutes)"
|
| 652 |
+
except (ValueError, TypeError):
|
| 653 |
+
pass
|
| 654 |
+
|
| 655 |
+
# Display comprehensive rate limit information
|
| 656 |
+
st.error("🚫 **GitHub API Rate Limit Exceeded**")
|
| 657 |
+
|
| 658 |
+
with st.expander("📊 Rate Limit Details", expanded=True):
|
| 659 |
+
col1, col2 = st.columns(2)
|
| 660 |
+
|
| 661 |
+
with col1:
|
| 662 |
+
st.metric("Requests Remaining", remaining)
|
| 663 |
+
st.metric("Rate Limit", limit)
|
| 664 |
+
|
| 665 |
+
with col2:
|
| 666 |
+
st.metric("Reset Time", reset_time_str)
|
| 667 |
+
if reset_timestamp:
|
| 668 |
+
try:
|
| 669 |
+
reset_time = datetime.fromtimestamp(int(reset_timestamp), tz=timezone.utc)
|
| 670 |
+
now = datetime.now(timezone.utc)
|
| 671 |
+
time_until_reset = reset_time - now
|
| 672 |
+
if time_until_reset.total_seconds() > 0:
|
| 673 |
+
st.metric("Time Until Reset", f"{int(time_until_reset.total_seconds() / 60)} minutes")
|
| 674 |
+
except (ValueError, TypeError):
|
| 675 |
+
pass
|
| 676 |
+
|
| 677 |
+
return True # Indicates rate limit error was handled
|
| 678 |
+
|
| 679 |
+
return False # Not a rate limit error
|
| 680 |
+
|
| 681 |
+
def _make_github_request(self, url, description="GitHub API request", silent_404=False):
|
| 682 |
+
"""Make a GitHub API request with rate limit handling"""
|
| 683 |
+
try:
|
| 684 |
+
# Add GitHub token if available
|
| 685 |
+
headers = {}
|
| 686 |
+
github_token = os.environ.get('GITHUB_TOKEN')
|
| 687 |
+
if github_token:
|
| 688 |
+
headers['Authorization'] = f'token {github_token}'
|
| 689 |
+
|
| 690 |
+
response = requests.get(url, headers=headers)
|
| 691 |
+
|
| 692 |
+
# Check for rate limit before raising for status
|
| 693 |
+
if self._handle_rate_limit_error(response):
|
| 694 |
+
return None # Rate limit handled, return None
|
| 695 |
+
|
| 696 |
+
# Handle 404 errors silently if requested (for optional directories)
|
| 697 |
+
if response.status_code == 404 and silent_404:
|
| 698 |
+
return None
|
| 699 |
+
|
| 700 |
+
response.raise_for_status()
|
| 701 |
+
return response
|
| 702 |
+
|
| 703 |
+
except requests.exceptions.RequestException as e:
|
| 704 |
+
if hasattr(e, 'response') and e.response is not None:
|
| 705 |
+
# Handle 404 silently if requested
|
| 706 |
+
if e.response.status_code == 404 and silent_404:
|
| 707 |
+
return None
|
| 708 |
+
|
| 709 |
+
if not self._handle_rate_limit_error(e.response):
|
| 710 |
+
st.warning(f"Request failed for {description}: {str(e)}")
|
| 711 |
+
else:
|
| 712 |
+
st.warning(f"Network error for {description}: {str(e)}")
|
| 713 |
+
return None
|
| 714 |
+
|
| 715 |
+
def _model_name_to_folder_name(self, model_name):
|
| 716 |
+
"""Convert model name from config format to folder format
|
| 717 |
+
|
| 718 |
+
Examples:
|
| 719 |
+
- 'PlanTL-GOB-ES/roberta-base-ca' -> 'roberta-base-ca'
|
| 720 |
+
- 'microsoft/deberta-v3-base' -> 'deberta-v3-base'
|
| 721 |
+
- 'bert-base-uncased' -> 'bert-base-uncased' (no change)
|
| 722 |
+
"""
|
| 723 |
+
if '/' in model_name:
|
| 724 |
+
return model_name.split('/')[-1]
|
| 725 |
+
return model_name
|
| 726 |
+
|
| 727 |
+
def _get_available_languages_local(self):
|
| 728 |
+
"""Get available languages from local cache"""
|
| 729 |
+
if not self.base_path.exists():
|
| 730 |
+
return []
|
| 731 |
+
result_dirs = [d.name for d in self.base_path.iterdir()
|
| 732 |
+
if d.is_dir() and d.name.startswith("results_")]
|
| 733 |
+
languages = [d.replace("results_", "") for d in result_dirs]
|
| 734 |
+
return sorted(languages)
|
| 735 |
+
|
| 736 |
+
def _ensure_specific_data_downloaded(self, language, config, model):
|
| 737 |
+
"""Download specific files for a language/config/model combination if not cached"""
|
| 738 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
| 739 |
+
base_path = f"results_{language}/{config}/{model}"
|
| 740 |
+
local_path = self.base_path / f"results_{language}" / config / folder_model_name
|
| 741 |
+
|
| 742 |
+
# Check if we already have this specific combination cached
|
| 743 |
+
if local_path.exists() and self.use_cache:
|
| 744 |
+
# Quick check if essential files exist
|
| 745 |
+
metadata_path = local_path / "metadata" / "metadata.json"
|
| 746 |
+
if metadata_path.exists():
|
| 747 |
+
return # Already have the data
|
| 748 |
+
|
| 749 |
+
with st.spinner(f"📥 Downloading data for {language.upper()}/{config}/{model}..."):
|
| 750 |
+
try:
|
| 751 |
+
self._download_specific_model_data(language, config, model)
|
| 752 |
+
st.success(f"✅ Downloaded {language.upper()}/{model} data!")
|
| 753 |
+
except Exception as e:
|
| 754 |
+
st.error(f"❌ Failed to download specific data: {str(e)}")
|
| 755 |
+
raise
|
| 756 |
+
|
| 757 |
+
def _download_specific_model_data(self, language, config, model):
|
| 758 |
+
"""Download only the specific model data needed"""
|
| 759 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
| 760 |
+
base_remote_path = f"results_{language}/{config}/{folder_model_name}"
|
| 761 |
+
|
| 762 |
+
# List of essential directories to download for a model
|
| 763 |
+
essential_dirs = ["metadata", "uas_scores", "number_of_heads_matching", "variability", "figures"]
|
| 764 |
+
|
| 765 |
+
for dir_name in essential_dirs:
|
| 766 |
+
remote_path = f"{base_remote_path}/{dir_name}"
|
| 767 |
+
try:
|
| 768 |
+
self._download_directory_targeted(dir_name, remote_path, language, config, model)
|
| 769 |
+
except Exception as e:
|
| 770 |
+
st.warning(f"Could not download {dir_name} for {model}: {str(e)}")
|
| 771 |
+
|
| 772 |
+
def _download_directory_targeted(self, dir_name, remote_path, language, config, model):
|
| 773 |
+
"""Download a specific directory for a model"""
|
| 774 |
+
api_url = f"https://api.github.com/repos/{self.github_repo}/contents/{remote_path}"
|
| 775 |
+
|
| 776 |
+
response = self._make_github_request(api_url, f"directory {dir_name}", silent_404=True)
|
| 777 |
+
if response is None:
|
| 778 |
+
return # Rate limit, 404, or other error
|
| 779 |
+
|
| 780 |
+
try:
|
| 781 |
+
contents = response.json()
|
| 782 |
+
|
| 783 |
+
# Create local directory
|
| 784 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
| 785 |
+
local_dir = self.base_path / f"results_{language}" / config / folder_model_name / dir_name
|
| 786 |
+
local_dir.mkdir(parents=True, exist_ok=True)
|
| 787 |
+
|
| 788 |
+
# Download all files in this directory
|
| 789 |
+
for item in contents:
|
| 790 |
+
if item['type'] == 'file':
|
| 791 |
+
self._download_file(item, local_dir)
|
| 792 |
+
|
| 793 |
+
except Exception as e:
|
| 794 |
+
st.warning(f"Could not download directory {dir_name}: {str(e)}")
|
| 795 |
+
|
| 796 |
+
def _get_available_configs_from_github(self, language):
|
| 797 |
+
"""Get available configurations for a language from GitHub"""
|
| 798 |
+
api_url = f"https://api.github.com/repos/{self.github_repo}/contents/results_{language}"
|
| 799 |
+
|
| 800 |
+
response = self._make_github_request(api_url, f"configurations for {language}")
|
| 801 |
+
if response is None:
|
| 802 |
+
return []
|
| 803 |
+
|
| 804 |
+
try:
|
| 805 |
+
contents = response.json()
|
| 806 |
+
configs = [item['name'] for item in contents if item['type'] == 'dir']
|
| 807 |
+
return sorted(configs)
|
| 808 |
+
|
| 809 |
+
except Exception as e:
|
| 810 |
+
st.warning(f"Could not parse configurations for {language}: {str(e)}")
|
| 811 |
+
return []
|
| 812 |
+
|
| 813 |
+
def _discover_config_parameters(self, language=None):
|
| 814 |
+
"""Dynamically discover configuration parameters from available configs
|
| 815 |
+
|
| 816 |
+
Now uses the first language-model pair from experiment config to discover
|
| 817 |
+
valid configuration parameters, since configurations are consistent across
|
| 818 |
+
all language-model combinations.
|
| 819 |
+
"""
|
| 820 |
+
try:
|
| 821 |
+
# Get the first language-model pair from experiment config
|
| 822 |
+
if language is None:
|
| 823 |
+
language, model = self._get_first_language_model_pair()
|
| 824 |
+
if language is None or model is None:
|
| 825 |
+
st.warning("Could not find any language-model pairs in experiment config")
|
| 826 |
+
return {}
|
| 827 |
+
st.info(f"🔍 Discovering configurations using {language.upper()}/{model} (configurations are consistent across all languages and models)")
|
| 828 |
+
else:
|
| 829 |
+
# If language is specified, try to get first model for that language
|
| 830 |
+
models = self._get_models_for_language(language)
|
| 831 |
+
if not models:
|
| 832 |
+
st.warning(f"No models found for language {language}")
|
| 833 |
+
return {}
|
| 834 |
+
model = models[0]
|
| 835 |
+
|
| 836 |
+
available_configs = self._get_experimental_configs(language)
|
| 837 |
+
if not available_configs:
|
| 838 |
+
return {}
|
| 839 |
+
|
| 840 |
+
# Parse all configurations to extract unique parameters
|
| 841 |
+
all_params = set()
|
| 842 |
+
param_values = {}
|
| 843 |
+
|
| 844 |
+
for config in available_configs:
|
| 845 |
+
params = self._parse_config_params(config)
|
| 846 |
+
for param, value in params.items():
|
| 847 |
+
all_params.add(param)
|
| 848 |
+
if param not in param_values:
|
| 849 |
+
param_values[param] = set()
|
| 850 |
+
param_values[param].add(value)
|
| 851 |
+
|
| 852 |
+
# Convert sets to sorted lists for consistent UI
|
| 853 |
+
return {param: sorted(list(values)) for param, values in param_values.items()}
|
| 854 |
+
|
| 855 |
+
except Exception as e:
|
| 856 |
+
st.warning(f"Could not discover configuration parameters: {str(e)}")
|
| 857 |
+
return {}
|
| 858 |
+
|
| 859 |
+
def _build_config_from_params(self, param_dict):
|
| 860 |
+
"""Build configuration string from parameter dictionary"""
|
| 861 |
+
config_parts = []
|
| 862 |
+
for param, value in sorted(param_dict.items()):
|
| 863 |
+
config_parts.append(f"{param}_{value}")
|
| 864 |
+
return "+".join(config_parts)
|
| 865 |
+
|
| 866 |
+
def _find_best_matching_config(self, language, target_params):
|
| 867 |
+
"""Find the configuration that best matches the target parameters"""
|
| 868 |
+
available_configs = self._get_experimental_configs(language)
|
| 869 |
+
|
| 870 |
+
best_match = None
|
| 871 |
+
best_score = -1
|
| 872 |
+
|
| 873 |
+
for config in available_configs:
|
| 874 |
+
config_params = self._parse_config_params(config)
|
| 875 |
+
|
| 876 |
+
# Calculate match score
|
| 877 |
+
score = 0
|
| 878 |
+
total_params = len(target_params)
|
| 879 |
+
|
| 880 |
+
for param, target_value in target_params.items():
|
| 881 |
+
if param in config_params and config_params[param] == target_value:
|
| 882 |
+
score += 1
|
| 883 |
+
|
| 884 |
+
# Prefer configs with exact parameter count
|
| 885 |
+
if len(config_params) == total_params:
|
| 886 |
+
score += 0.5
|
| 887 |
+
|
| 888 |
+
if score > best_score:
|
| 889 |
+
best_score = score
|
| 890 |
+
best_match = config
|
| 891 |
+
|
| 892 |
+
return best_match, best_score == len(target_params)
|
| 893 |
+
|
| 894 |
+
def _download_repository(self):
|
| 895 |
+
"""Download repository data from GitHub"""
|
| 896 |
+
st.info("🔄 Downloading results data from GitHub... This may take a moment.")
|
| 897 |
+
|
| 898 |
+
# GitHub API to get the repository contents
|
| 899 |
+
api_url = f"https://api.github.com/repos/{self.github_repo}/contents"
|
| 900 |
+
|
| 901 |
+
try:
|
| 902 |
+
# Get list of result directories
|
| 903 |
+
response = requests.get(api_url)
|
| 904 |
+
response.raise_for_status()
|
| 905 |
+
contents = response.json()
|
| 906 |
+
|
| 907 |
+
result_dirs = [item['name'] for item in contents
|
| 908 |
+
if item['type'] == 'dir' and item['name'].startswith('results_')]
|
| 909 |
+
|
| 910 |
+
st.write(f"Found {len(result_dirs)} result directories: {', '.join(result_dirs)}")
|
| 911 |
+
|
| 912 |
+
# Download each result directory
|
| 913 |
+
progress_bar = st.progress(0)
|
| 914 |
+
for i, result_dir in enumerate(result_dirs):
|
| 915 |
+
st.write(f"Downloading {result_dir}...")
|
| 916 |
+
self._download_directory(result_dir)
|
| 917 |
+
progress_bar.progress((i + 1) / len(result_dirs))
|
| 918 |
+
|
| 919 |
+
st.success("✅ Download completed!")
|
| 920 |
+
|
| 921 |
+
except Exception as e:
|
| 922 |
+
st.error(f"❌ Error downloading repository: {str(e)}")
|
| 923 |
+
st.error("Please check the repository URL and your internet connection.")
|
| 924 |
+
raise
|
| 925 |
+
|
| 926 |
+
def _parse_config_params(self, config_name):
|
| 927 |
+
"""Parse configuration parameters into a dictionary"""
|
| 928 |
+
parts = config_name.split('+')
|
| 929 |
+
params = {}
|
| 930 |
+
for part in parts:
|
| 931 |
+
if '_' in part:
|
| 932 |
+
key_parts = part.split('_')
|
| 933 |
+
if len(key_parts) >= 2:
|
| 934 |
+
key = '_'.join(key_parts[:-1])
|
| 935 |
+
value = key_parts[-1]
|
| 936 |
+
params[key] = value == 'True'
|
| 937 |
+
return params
|
| 938 |
+
|
| 939 |
+
def _download_directory(self, dir_name, path=""):
|
| 940 |
+
"""Recursively download a directory from GitHub"""
|
| 941 |
+
url = f"https://api.github.com/repos/{self.github_repo}/contents/{path}{dir_name}"
|
| 942 |
+
|
| 943 |
+
try:
|
| 944 |
+
response = requests.get(url)
|
| 945 |
+
response.raise_for_status()
|
| 946 |
+
contents = response.json()
|
| 947 |
+
|
| 948 |
+
local_dir = self.cache_dir / path / dir_name
|
| 949 |
+
local_dir.mkdir(parents=True, exist_ok=True)
|
| 950 |
+
|
| 951 |
+
for item in contents:
|
| 952 |
+
if item['type'] == 'file':
|
| 953 |
+
self._download_file(item, local_dir)
|
| 954 |
+
elif item['type'] == 'dir':
|
| 955 |
+
self._download_directory(item['name'], f"{path}{dir_name}/")
|
| 956 |
+
|
| 957 |
+
except Exception as e:
|
| 958 |
+
st.warning(f"Could not download {dir_name}: {str(e)}")
|
| 959 |
+
|
| 960 |
+
def _download_file(self, file_info, local_dir):
|
| 961 |
+
"""Download a single file from GitHub"""
|
| 962 |
+
try:
|
| 963 |
+
# Use the rate limit handling for file downloads too
|
| 964 |
+
file_response = self._make_github_request(file_info['download_url'], f"file {file_info['name']}")
|
| 965 |
+
if file_response is None:
|
| 966 |
+
return # Rate limit or other error
|
| 967 |
+
|
| 968 |
+
# Save to local cache
|
| 969 |
+
local_file = local_dir / file_info['name']
|
| 970 |
+
|
| 971 |
+
# Handle different file types
|
| 972 |
+
if file_info['name'].endswith(('.csv', '.json')):
|
| 973 |
+
with open(local_file, 'w', encoding='utf-8') as f:
|
| 974 |
+
f.write(file_response.text)
|
| 975 |
+
else: # Binary files like PDFs
|
| 976 |
+
with open(local_file, 'wb') as f:
|
| 977 |
+
f.write(file_response.content)
|
| 978 |
+
|
| 979 |
+
except Exception as e:
|
| 980 |
+
st.warning(f"Could not download file {file_info['name']}: {str(e)}")
|
| 981 |
+
|
| 982 |
+
def _get_available_languages(self):
|
| 983 |
+
"""Get all available language directories"""
|
| 984 |
+
return self.available_languages
|
| 985 |
+
|
| 986 |
+
def _get_experimental_configs(self, language):
|
| 987 |
+
"""Get all experimental configurations for a language from GitHub API"""
|
| 988 |
+
api_url = f"https://api.github.com/repos/{self.github_repo}/contents/results_{language}"
|
| 989 |
+
response = self._make_github_request(api_url, f"experimental configs for {language}")
|
| 990 |
+
|
| 991 |
+
if response is not None:
|
| 992 |
+
try:
|
| 993 |
+
contents = response.json()
|
| 994 |
+
configs = [item['name'] for item in contents if item['type'] == 'dir']
|
| 995 |
+
return sorted(configs)
|
| 996 |
+
except Exception as e:
|
| 997 |
+
st.warning(f"Could not parse experimental configs for {language}: {str(e)}")
|
| 998 |
+
|
| 999 |
+
# Fallback to local cache if available
|
| 1000 |
+
lang_dir = self.base_path / f"results_{language}"
|
| 1001 |
+
if lang_dir.exists():
|
| 1002 |
+
configs = [d.name for d in lang_dir.iterdir() if d.is_dir()]
|
| 1003 |
+
return sorted(configs)
|
| 1004 |
+
return []
|
| 1005 |
+
|
| 1006 |
+
def _find_matching_config(self, language, target_params):
|
| 1007 |
+
"""Find the first matching configuration from target parameters"""
|
| 1008 |
+
return self._find_best_matching_config(language, target_params)
|
| 1009 |
+
|
| 1010 |
+
def _get_models(self, language, config):
|
| 1011 |
+
"""Get all models for a language and configuration from experiment config"""
|
| 1012 |
+
# First try to get models from experiment config
|
| 1013 |
+
models = self._get_models_for_language(language)
|
| 1014 |
+
|
| 1015 |
+
if models:
|
| 1016 |
+
return models
|
| 1017 |
+
|
| 1018 |
+
# Fallback to GitHub API directory listing if config unavailable
|
| 1019 |
+
api_url = f"https://api.github.com/repos/{self.github_repo}/contents/results_{language}/{config}"
|
| 1020 |
+
response = self._make_github_request(api_url, f"models for {language}/{config}")
|
| 1021 |
+
|
| 1022 |
+
if response is not None:
|
| 1023 |
+
try:
|
| 1024 |
+
contents = response.json()
|
| 1025 |
+
models = [item['name'] for item in contents if item['type'] == 'dir']
|
| 1026 |
+
return sorted(models)
|
| 1027 |
+
except Exception as e:
|
| 1028 |
+
st.warning(f"Could not parse models for {language}/{config}: {str(e)}")
|
| 1029 |
+
|
| 1030 |
+
# Final fallback to local cache if available
|
| 1031 |
+
config_dir = self.base_path / f"results_{language}" / config
|
| 1032 |
+
if config_dir.exists():
|
| 1033 |
+
models = [d.name for d in config_dir.iterdir() if d.is_dir()]
|
| 1034 |
+
return sorted(models)
|
| 1035 |
+
return []
|
| 1036 |
+
|
| 1037 |
+
def _parse_config_name(self, config_name):
|
| 1038 |
+
"""Parse configuration name into readable format"""
|
| 1039 |
+
parts = config_name.split('+')
|
| 1040 |
+
config_dict = {}
|
| 1041 |
+
for part in parts:
|
| 1042 |
+
if '_' in part:
|
| 1043 |
+
key, value = part.split('_', 1)
|
| 1044 |
+
config_dict[key.replace('_', ' ').title()] = value
|
| 1045 |
+
return config_dict
|
| 1046 |
+
|
| 1047 |
+
def _load_metadata(self, language, config, model):
|
| 1048 |
+
"""Load metadata for a specific combination"""
|
| 1049 |
+
# Ensure we have the specific data downloaded
|
| 1050 |
+
self._ensure_specific_data_downloaded(language, config, model)
|
| 1051 |
+
|
| 1052 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
| 1053 |
+
metadata_path = self.base_path / f"results_{language}" / config / folder_model_name / "metadata" / "metadata.json"
|
| 1054 |
+
if metadata_path.exists():
|
| 1055 |
+
with open(metadata_path, 'r') as f:
|
| 1056 |
+
return json.load(f)
|
| 1057 |
+
return None
|
| 1058 |
+
|
| 1059 |
+
def _load_uas_scores(self, language, config, model):
|
| 1060 |
+
"""Load UAS scores data"""
|
| 1061 |
+
# Ensure we have the specific data downloaded
|
| 1062 |
+
self._ensure_specific_data_downloaded(language, config, model)
|
| 1063 |
+
|
| 1064 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
| 1065 |
+
uas_dir = self.base_path / f"results_{language}" / config / folder_model_name / "uas_scores"
|
| 1066 |
+
if not uas_dir.exists():
|
| 1067 |
+
return {}
|
| 1068 |
+
|
| 1069 |
+
uas_data = {}
|
| 1070 |
+
csv_files = list(uas_dir.glob("uas_*.csv"))
|
| 1071 |
+
|
| 1072 |
+
if csv_files:
|
| 1073 |
+
with st.spinner("Loading UAS scores data..."):
|
| 1074 |
+
progress_bar = st.progress(0)
|
| 1075 |
+
status_text = st.empty()
|
| 1076 |
+
|
| 1077 |
+
for i, csv_file in enumerate(csv_files):
|
| 1078 |
+
relation = csv_file.stem.replace("uas_", "")
|
| 1079 |
+
status_text.text(f"Loading UAS data: {relation}")
|
| 1080 |
+
|
| 1081 |
+
try:
|
| 1082 |
+
df = pd.read_csv(csv_file, index_col=0)
|
| 1083 |
+
uas_data[relation] = df
|
| 1084 |
+
except Exception as e:
|
| 1085 |
+
st.warning(f"Could not load {csv_file.name}: {e}")
|
| 1086 |
+
|
| 1087 |
+
progress_bar.progress((i + 1) / len(csv_files))
|
| 1088 |
+
time.sleep(0.01) # Small delay for smoother progress
|
| 1089 |
+
|
| 1090 |
+
progress_bar.empty()
|
| 1091 |
+
status_text.empty()
|
| 1092 |
+
|
| 1093 |
+
return uas_data
|
| 1094 |
+
|
| 1095 |
+
def _load_head_matching(self, language, config, model):
|
| 1096 |
+
"""Load head matching data"""
|
| 1097 |
+
# Ensure we have the specific data downloaded
|
| 1098 |
+
self._ensure_specific_data_downloaded(language, config, model)
|
| 1099 |
+
|
| 1100 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
| 1101 |
+
heads_dir = self.base_path / f"results_{language}" / config / folder_model_name / "number_of_heads_matching"
|
| 1102 |
+
if not heads_dir.exists():
|
| 1103 |
+
return {}
|
| 1104 |
+
|
| 1105 |
+
heads_data = {}
|
| 1106 |
+
csv_files = list(heads_dir.glob("heads_matching_*.csv"))
|
| 1107 |
+
|
| 1108 |
+
if csv_files:
|
| 1109 |
+
with st.spinner("Loading head matching data..."):
|
| 1110 |
+
progress_bar = st.progress(0)
|
| 1111 |
+
status_text = st.empty()
|
| 1112 |
+
|
| 1113 |
+
for i, csv_file in enumerate(csv_files):
|
| 1114 |
+
relation = csv_file.stem.replace("heads_matching_", "").replace(f"_{folder_model_name}", "")
|
| 1115 |
+
status_text.text(f"Loading head matching data: {relation}")
|
| 1116 |
+
|
| 1117 |
+
try:
|
| 1118 |
+
df = pd.read_csv(csv_file, index_col=0)
|
| 1119 |
+
heads_data[relation] = df
|
| 1120 |
+
except Exception as e:
|
| 1121 |
+
st.warning(f"Could not load {csv_file.name}: {e}")
|
| 1122 |
+
|
| 1123 |
+
progress_bar.progress((i + 1) / len(csv_files))
|
| 1124 |
+
time.sleep(0.01) # Small delay for smoother progress
|
| 1125 |
+
|
| 1126 |
+
progress_bar.empty()
|
| 1127 |
+
status_text.empty()
|
| 1128 |
+
|
| 1129 |
+
return heads_data
|
| 1130 |
+
|
| 1131 |
+
def _load_variability(self, language, config, model):
|
| 1132 |
+
"""Load variability data"""
|
| 1133 |
+
# Ensure we have the specific data downloaded
|
| 1134 |
+
self._ensure_specific_data_downloaded(language, config, model)
|
| 1135 |
+
|
| 1136 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
| 1137 |
+
var_path = self.base_path / f"results_{language}" / config / folder_model_name / "variability" / "variability_list.csv"
|
| 1138 |
+
if var_path.exists():
|
| 1139 |
+
try:
|
| 1140 |
+
return pd.read_csv(var_path, index_col=0)
|
| 1141 |
+
except Exception as e:
|
| 1142 |
+
st.warning(f"Could not load variability data: {e}")
|
| 1143 |
+
return None
|
| 1144 |
+
|
| 1145 |
+
def _get_available_figures(self, language, config, model):
|
| 1146 |
+
"""Get all available figure files"""
|
| 1147 |
+
# Ensure we have the specific data downloaded
|
| 1148 |
+
self._ensure_specific_data_downloaded(language, config, model)
|
| 1149 |
+
|
| 1150 |
+
folder_model_name = self._model_name_to_folder_name(model)
|
| 1151 |
+
figures_dir = self.base_path / f"results_{language}" / config / folder_model_name / "figures"
|
| 1152 |
if not figures_dir.exists():
|
| 1153 |
return []
|
| 1154 |
return list(figures_dir.glob("*.pdf"))
|