Spaces:
Running
Running
wony617
commited on
Commit
Β·
7e4dd83
1
Parent(s):
fe6c90f
Initial for supporting smolagent translation
Browse files- agent/handler.py +24 -15
- agent/workflow.py +5 -4
- app.py +7 -1
- example.env +0 -1
- logger/github_logger.py +3 -3
- translator/project_config.py +48 -0
- translator/retriever.py +39 -23
agent/handler.py
CHANGED
|
@@ -13,12 +13,14 @@ from agent.workflow import (
|
|
| 13 |
)
|
| 14 |
from pr_generator.searcher import find_reference_pr_simple_stream
|
| 15 |
from translator.content import get_full_prompt, get_content, preprocess_content
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
# State management
|
| 19 |
class ChatState:
|
| 20 |
def __init__(self):
|
| 21 |
-
self.step = "welcome" # welcome -> find_files -> translate -> create_github_pr
|
|
|
|
| 22 |
self.target_language = "ko"
|
| 23 |
self.k_files = 10
|
| 24 |
self.files_to_translate = []
|
|
@@ -53,25 +55,26 @@ def _extract_content_for_display(content: str) -> str:
|
|
| 53 |
|
| 54 |
|
| 55 |
def get_welcome_message():
|
| 56 |
-
"""Initial welcome message with
|
| 57 |
return """**π Welcome to π Hugging Face i18n Translation Agent!**
|
| 58 |
|
| 59 |
I'll help you find files that need translation and translate them in a streamlined workflow.
|
| 60 |
|
| 61 |
-
|
| 62 |
|
| 63 |
-
Use the **`Quick Controls`** on the right or **ask me `what`, `how`, or `help`** to get started.
|
| 64 |
"""
|
| 65 |
|
| 66 |
|
| 67 |
-
def process_file_search_handler(lang: str, k: int, history: list) -> tuple:
|
| 68 |
"""Process file search request and update Gradio UI components."""
|
| 69 |
global state
|
|
|
|
| 70 |
state.target_language = lang
|
| 71 |
state.k_files = k
|
| 72 |
state.step = "find_files"
|
| 73 |
|
| 74 |
-
status_report, files_list = report_translation_target_files(lang, k)
|
| 75 |
state.files_to_translate = (
|
| 76 |
[file[0] for file in files_list]
|
| 77 |
if files_list
|
|
@@ -87,8 +90,10 @@ def process_file_search_handler(lang: str, k: int, history: list) -> tuple:
|
|
| 87 |
"""
|
| 88 |
|
| 89 |
if state.files_to_translate:
|
|
|
|
| 90 |
for i, file in enumerate(state.files_to_translate, 1):
|
| 91 |
-
|
|
|
|
| 92 |
|
| 93 |
# if len(state.files_to_translate) > 5:
|
| 94 |
# response += f"\n... and {len(state.files_to_translate) - 5} more files"
|
|
@@ -138,9 +143,8 @@ def start_translation_process():
|
|
| 138 |
p.parent.mkdir(parents=True, exist_ok=True)
|
| 139 |
p.write_text(translated, encoding="utf-8")
|
| 140 |
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
)
|
| 144 |
print("Compeleted translation:\n")
|
| 145 |
print(translated)
|
| 146 |
print("----------------------------")
|
|
@@ -226,12 +230,12 @@ def handle_user_message(message, history):
|
|
| 226 |
|
| 227 |
def update_status():
|
| 228 |
if state.step == "welcome":
|
| 229 |
-
return """
|
| 230 |
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px;padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
|
| 231 |
<div><strong>π Step:</strong> Welcome</div>
|
|
|
|
| 232 |
<div><strong>π Files:</strong> 0</div>
|
| 233 |
-
<div><strong>π Language:</strong>
|
| 234 |
-
<div><strong>β³ Progress:</strong> Ready</div>
|
| 235 |
</div>
|
| 236 |
"""
|
| 237 |
|
|
@@ -267,6 +271,7 @@ def update_status():
|
|
| 267 |
status_html = f"""
|
| 268 |
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
|
| 269 |
<div><strong>π Step:</strong> {step_map.get(state.step, state.step)}</div>
|
|
|
|
| 270 |
<div><strong>π Files:</strong> {len(state.files_to_translate)}</div>
|
| 271 |
<div><strong>π Language:</strong> {state.target_language}</div>
|
| 272 |
<div><strong>β³ Progress:</strong> {progress_map.get(state.step, 'In progress')}</div>
|
|
@@ -292,14 +297,18 @@ def update_github_config(token, owner, repo, reference_pr_url):
|
|
| 292 |
if token:
|
| 293 |
os.environ["GITHUB_TOKEN"] = token
|
| 294 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 295 |
# Save GitHub configuration to state
|
| 296 |
state.github_config.update(
|
| 297 |
{
|
| 298 |
"token": token,
|
| 299 |
"owner": owner,
|
| 300 |
"repo_name": repo,
|
| 301 |
-
"reference_pr_url": reference_pr_url
|
| 302 |
-
or state.github_config["reference_pr_url"],
|
| 303 |
}
|
| 304 |
)
|
| 305 |
|
|
|
|
| 13 |
)
|
| 14 |
from pr_generator.searcher import find_reference_pr_simple_stream
|
| 15 |
from translator.content import get_full_prompt, get_content, preprocess_content
|
| 16 |
+
from translator.project_config import get_available_projects, get_project_config
|
| 17 |
|
| 18 |
|
| 19 |
# State management
|
| 20 |
class ChatState:
|
| 21 |
def __init__(self):
|
| 22 |
+
self.step = "welcome" # welcome -> select_project -> find_files -> translate -> create_github_pr
|
| 23 |
+
self.selected_project = "transformers" # Default project
|
| 24 |
self.target_language = "ko"
|
| 25 |
self.k_files = 10
|
| 26 |
self.files_to_translate = []
|
|
|
|
| 55 |
|
| 56 |
|
| 57 |
def get_welcome_message():
|
| 58 |
+
"""Initial welcome message with project selection"""
|
| 59 |
return """**π Welcome to π Hugging Face i18n Translation Agent!**
|
| 60 |
|
| 61 |
I'll help you find files that need translation and translate them in a streamlined workflow.
|
| 62 |
|
| 63 |
+
**π― First, select which project you want to translate:**
|
| 64 |
|
| 65 |
+
Use the **`Quick Controls`** on the right to select a project, or **ask me `what`, `how`, or `help`** to get started.
|
| 66 |
"""
|
| 67 |
|
| 68 |
|
| 69 |
+
def process_file_search_handler(project: str, lang: str, k: int, history: list) -> tuple:
|
| 70 |
"""Process file search request and update Gradio UI components."""
|
| 71 |
global state
|
| 72 |
+
state.selected_project = project
|
| 73 |
state.target_language = lang
|
| 74 |
state.k_files = k
|
| 75 |
state.step = "find_files"
|
| 76 |
|
| 77 |
+
status_report, files_list = report_translation_target_files(project, lang, k)
|
| 78 |
state.files_to_translate = (
|
| 79 |
[file[0] for file in files_list]
|
| 80 |
if files_list
|
|
|
|
| 90 |
"""
|
| 91 |
|
| 92 |
if state.files_to_translate:
|
| 93 |
+
config = get_project_config(state.selected_project)
|
| 94 |
for i, file in enumerate(state.files_to_translate, 1):
|
| 95 |
+
file_link = f"{config.repo_url}/blob/main/{file}"
|
| 96 |
+
response += f"\n{i}. [`{file}`]({file_link})"
|
| 97 |
|
| 98 |
# if len(state.files_to_translate) > 5:
|
| 99 |
# response += f"\n... and {len(state.files_to_translate) - 5} more files"
|
|
|
|
| 143 |
p.parent.mkdir(parents=True, exist_ok=True)
|
| 144 |
p.write_text(translated, encoding="utf-8")
|
| 145 |
|
| 146 |
+
config = get_project_config(state.selected_project)
|
| 147 |
+
original_file_link = f"{config.repo_url}/blob/main/{current_file}"
|
|
|
|
| 148 |
print("Compeleted translation:\n")
|
| 149 |
print(translated)
|
| 150 |
print("----------------------------")
|
|
|
|
| 230 |
|
| 231 |
def update_status():
|
| 232 |
if state.step == "welcome":
|
| 233 |
+
return f"""
|
| 234 |
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px;padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
|
| 235 |
<div><strong>π Step:</strong> Welcome</div>
|
| 236 |
+
<div><strong>π― Project:</strong> {state.selected_project}</div>
|
| 237 |
<div><strong>π Files:</strong> 0</div>
|
| 238 |
+
<div><strong>π Language:</strong> {state.target_language}</div>
|
|
|
|
| 239 |
</div>
|
| 240 |
"""
|
| 241 |
|
|
|
|
| 271 |
status_html = f"""
|
| 272 |
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; padding: 10px; background: rgba(0, 0, 0, 0.25); border-radius: 8px;">
|
| 273 |
<div><strong>π Step:</strong> {step_map.get(state.step, state.step)}</div>
|
| 274 |
+
<div><strong>π― Project:</strong> {state.selected_project}</div>
|
| 275 |
<div><strong>π Files:</strong> {len(state.files_to_translate)}</div>
|
| 276 |
<div><strong>π Language:</strong> {state.target_language}</div>
|
| 277 |
<div><strong>β³ Progress:</strong> {progress_map.get(state.step, 'In progress')}</div>
|
|
|
|
| 297 |
if token:
|
| 298 |
os.environ["GITHUB_TOKEN"] = token
|
| 299 |
|
| 300 |
+
# Get default reference PR URL from project config if not provided
|
| 301 |
+
if not reference_pr_url:
|
| 302 |
+
config = get_project_config(state.selected_project)
|
| 303 |
+
reference_pr_url = config.reference_pr_url
|
| 304 |
+
|
| 305 |
# Save GitHub configuration to state
|
| 306 |
state.github_config.update(
|
| 307 |
{
|
| 308 |
"token": token,
|
| 309 |
"owner": owner,
|
| 310 |
"repo_name": repo,
|
| 311 |
+
"reference_pr_url": reference_pr_url,
|
|
|
|
| 312 |
}
|
| 313 |
)
|
| 314 |
|
agent/workflow.py
CHANGED
|
@@ -26,19 +26,20 @@ from logger.github_logger import GitHubLogger
|
|
| 26 |
|
| 27 |
|
| 28 |
def report_translation_target_files(
|
| 29 |
-
translate_lang: str, top_k: int = 1
|
| 30 |
) -> tuple[str, list[list[str]]]:
|
| 31 |
"""Return the top-k files that need translation, excluding files already in progress.
|
| 32 |
|
| 33 |
Args:
|
|
|
|
| 34 |
translate_lang: Target language to translate
|
| 35 |
top_k: Number of top-first files to return for translation. (Default 1)
|
| 36 |
"""
|
| 37 |
# Get files in progress
|
| 38 |
-
docs_in_progress, pr_info_list = get_github_issue_open_pr(translate_lang)
|
| 39 |
|
| 40 |
# Get all available files for translation
|
| 41 |
-
all_status_report, all_filepath_list = report(translate_lang, top_k * 2) # Get more to account for filtering
|
| 42 |
|
| 43 |
# Filter out files that are already in progress
|
| 44 |
available_files = [f for f in all_filepath_list if f not in docs_in_progress]
|
|
@@ -52,7 +53,7 @@ def report_translation_target_files(
|
|
| 52 |
if docs_in_progress:
|
| 53 |
status_report += f"\n\nπ€ Found {len(docs_in_progress)} files in progress for translation:"
|
| 54 |
for i, file in enumerate(docs_in_progress):
|
| 55 |
-
status_report += f"\n{i+1}. `{file}
|
| 56 |
status_report += f"\n\nπ Showing {len(filepath_list)} available files (excluding in-progress):"
|
| 57 |
|
| 58 |
return status_report, [[file] for file in filepath_list]
|
|
|
|
| 26 |
|
| 27 |
|
| 28 |
def report_translation_target_files(
|
| 29 |
+
project: str, translate_lang: str, top_k: int = 1
|
| 30 |
) -> tuple[str, list[list[str]]]:
|
| 31 |
"""Return the top-k files that need translation, excluding files already in progress.
|
| 32 |
|
| 33 |
Args:
|
| 34 |
+
project: Project to translate (e.g., "transformers", "smolagents")
|
| 35 |
translate_lang: Target language to translate
|
| 36 |
top_k: Number of top-first files to return for translation. (Default 1)
|
| 37 |
"""
|
| 38 |
# Get files in progress
|
| 39 |
+
docs_in_progress, pr_info_list = get_github_issue_open_pr(project, translate_lang)
|
| 40 |
|
| 41 |
# Get all available files for translation
|
| 42 |
+
all_status_report, all_filepath_list = report(project, translate_lang, top_k * 2) # Get more to account for filtering
|
| 43 |
|
| 44 |
# Filter out files that are already in progress
|
| 45 |
available_files = [f for f in all_filepath_list if f not in docs_in_progress]
|
|
|
|
| 53 |
if docs_in_progress:
|
| 54 |
status_report += f"\n\nπ€ Found {len(docs_in_progress)} files in progress for translation:"
|
| 55 |
for i, file in enumerate(docs_in_progress):
|
| 56 |
+
status_report += f"\n{i+1}. [`{file}`]({pr_info_list[i]})"
|
| 57 |
status_report += f"\n\nπ Showing {len(filepath_list)} available files (excluding in-progress):"
|
| 58 |
|
| 59 |
return status_report, [[file] for file in filepath_list]
|
app.py
CHANGED
|
@@ -19,6 +19,7 @@ from agent.handler import (
|
|
| 19 |
update_github_config,
|
| 20 |
)
|
| 21 |
from translator.model import Languages
|
|
|
|
| 22 |
|
| 23 |
load_dotenv()
|
| 24 |
|
|
@@ -125,6 +126,11 @@ with gr.Blocks(
|
|
| 125 |
with gr.Tabs(elem_classes="simple-tabs") as control_tabs:
|
| 126 |
with gr.TabItem("1. Find Files", id=0):
|
| 127 |
with gr.Group():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
lang_dropdown = gr.Radio(
|
| 129 |
choices=[language.value for language in Languages],
|
| 130 |
label="π Translate To",
|
|
@@ -226,7 +232,7 @@ with gr.Blocks(
|
|
| 226 |
|
| 227 |
find_btn.click(
|
| 228 |
fn=process_file_search_handler,
|
| 229 |
-
inputs=[lang_dropdown, k_input, chatbot],
|
| 230 |
outputs=[chatbot, msg_input, status_display, control_tabs, files_to_translate],
|
| 231 |
)
|
| 232 |
|
|
|
|
| 19 |
update_github_config,
|
| 20 |
)
|
| 21 |
from translator.model import Languages
|
| 22 |
+
from translator.project_config import get_available_projects
|
| 23 |
|
| 24 |
load_dotenv()
|
| 25 |
|
|
|
|
| 126 |
with gr.Tabs(elem_classes="simple-tabs") as control_tabs:
|
| 127 |
with gr.TabItem("1. Find Files", id=0):
|
| 128 |
with gr.Group():
|
| 129 |
+
project_dropdown = gr.Radio(
|
| 130 |
+
choices=get_available_projects(),
|
| 131 |
+
label="π― Select Project",
|
| 132 |
+
value="transformers",
|
| 133 |
+
)
|
| 134 |
lang_dropdown = gr.Radio(
|
| 135 |
choices=[language.value for language in Languages],
|
| 136 |
label="π Translate To",
|
|
|
|
| 232 |
|
| 233 |
find_btn.click(
|
| 234 |
fn=process_file_search_handler,
|
| 235 |
+
inputs=[project_dropdown, lang_dropdown, k_input, chatbot],
|
| 236 |
outputs=[chatbot, msg_input, status_display, control_tabs, files_to_translate],
|
| 237 |
)
|
| 238 |
|
example.env
CHANGED
|
@@ -13,6 +13,5 @@ HF_SPACE_NAME=
|
|
| 13 |
|
| 14 |
# Secrets for logging to Github
|
| 15 |
LOG_REPO=
|
| 16 |
-
LOG_GITHUB_TOKEN=
|
| 17 |
LOG_BRANCH=
|
| 18 |
LOG_FILE_PATH=
|
|
|
|
| 13 |
|
| 14 |
# Secrets for logging to Github
|
| 15 |
LOG_REPO=
|
|
|
|
| 16 |
LOG_BRANCH=
|
| 17 |
LOG_FILE_PATH=
|
logger/github_logger.py
CHANGED
|
@@ -12,7 +12,7 @@ class GitHubLogger:
|
|
| 12 |
"""Dedicated logger that appends JSONL entries to a GitHub repo/branch/file.
|
| 13 |
|
| 14 |
Env vars:
|
| 15 |
-
-
|
| 16 |
- LOG_REPO (format: owner/repo)
|
| 17 |
- LOG_BRANCH (default: 'log_event')
|
| 18 |
- LOG_FILE_PATH (default: 'pr_success.log')
|
|
@@ -21,9 +21,9 @@ class GitHubLogger:
|
|
| 21 |
def __init__(self):
|
| 22 |
if not LIBS_OK:
|
| 23 |
raise ImportError("PyGithub not installed. Please install PyGithub.")
|
| 24 |
-
token = os.environ.get("
|
| 25 |
if not token:
|
| 26 |
-
raise ValueError("Missing
|
| 27 |
self._client = Github(token)
|
| 28 |
|
| 29 |
repo_spec = os.environ.get("LOG_REPO")
|
|
|
|
| 12 |
"""Dedicated logger that appends JSONL entries to a GitHub repo/branch/file.
|
| 13 |
|
| 14 |
Env vars:
|
| 15 |
+
- GITHUB_TOKEN
|
| 16 |
- LOG_REPO (format: owner/repo)
|
| 17 |
- LOG_BRANCH (default: 'log_event')
|
| 18 |
- LOG_FILE_PATH (default: 'pr_success.log')
|
|
|
|
| 21 |
def __init__(self):
|
| 22 |
if not LIBS_OK:
|
| 23 |
raise ImportError("PyGithub not installed. Please install PyGithub.")
|
| 24 |
+
token = os.environ.get("GITHUB_TOKEN")
|
| 25 |
if not token:
|
| 26 |
+
raise ValueError("Missing GITHUB_TOKEN for logging")
|
| 27 |
self._client = Github(token)
|
| 28 |
|
| 29 |
repo_spec = os.environ.get("LOG_REPO")
|
translator/project_config.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Project configuration for different HuggingFace repositories."""
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from typing import Dict
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@dataclass
|
| 8 |
+
class ProjectConfig:
|
| 9 |
+
"""Configuration for a specific HuggingFace project."""
|
| 10 |
+
name: str
|
| 11 |
+
repo_url: str
|
| 12 |
+
api_url: str
|
| 13 |
+
docs_path: str
|
| 14 |
+
github_issues: Dict[str, str] # language -> issue_id
|
| 15 |
+
reference_pr_url: str
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# Project configurations
|
| 19 |
+
PROJECTS = {
|
| 20 |
+
"transformers": ProjectConfig(
|
| 21 |
+
name="Transformers",
|
| 22 |
+
repo_url="https://github.com/huggingface/transformers",
|
| 23 |
+
api_url="https://api.github.com/repos/huggingface/transformers/git/trees/main?recursive=1",
|
| 24 |
+
docs_path="docs/source",
|
| 25 |
+
github_issues={"ko": "20179"},
|
| 26 |
+
reference_pr_url="https://github.com/huggingface/transformers/pull/24968"
|
| 27 |
+
),
|
| 28 |
+
"smolagents": ProjectConfig(
|
| 29 |
+
name="SmolAgents",
|
| 30 |
+
repo_url="https://github.com/huggingface/smolagents",
|
| 31 |
+
api_url="https://api.github.com/repos/huggingface/smolagents/git/trees/main?recursive=1",
|
| 32 |
+
docs_path="docs/source",
|
| 33 |
+
github_issues={"ko": "20179"}, # To be filled when issue is created
|
| 34 |
+
reference_pr_url="https://github.com/huggingface/smolagents/pull/1581" # To be filled with actual PR URL
|
| 35 |
+
)
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def get_project_config(project_key: str) -> ProjectConfig:
|
| 40 |
+
"""Get project configuration by key."""
|
| 41 |
+
if project_key not in PROJECTS:
|
| 42 |
+
raise ValueError(f"Unknown project: {project_key}. Available: {list(PROJECTS.keys())}")
|
| 43 |
+
return PROJECTS[project_key]
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def get_available_projects() -> list[str]:
|
| 47 |
+
"""Get list of available project keys."""
|
| 48 |
+
return list(PROJECTS.keys())
|
translator/retriever.py
CHANGED
|
@@ -5,15 +5,22 @@ from pathlib import Path
|
|
| 5 |
import requests
|
| 6 |
|
| 7 |
from .model import Languages, Summary, TranslationDoc
|
|
|
|
| 8 |
|
| 9 |
-
URL = "https://api.github.com/repos/huggingface/transformers/git/trees/main?recursive=1"
|
| 10 |
|
| 11 |
-
|
| 12 |
-
def get_github_repo_files():
|
| 13 |
"""
|
| 14 |
Get github repo files
|
| 15 |
"""
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
data = response.json()
|
| 19 |
all_items = data.get("tree", [])
|
|
@@ -26,27 +33,33 @@ def get_github_repo_files():
|
|
| 26 |
return file_paths
|
| 27 |
|
| 28 |
|
| 29 |
-
def get_github_issue_open_pr(lang: str = "ko"):
|
| 30 |
"""
|
| 31 |
-
Get open PR in the github issue, filtered by title
|
| 32 |
"""
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
)
|
| 39 |
|
| 40 |
headers = {
|
| 41 |
"Accept": "application/vnd.github+json",
|
| 42 |
}
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
all_open_prs = []
|
| 45 |
page = 1
|
| 46 |
per_page = 100 # Maximum allowed by GitHub API
|
| 47 |
|
| 48 |
while True:
|
| 49 |
-
|
|
|
|
| 50 |
response = requests.get(url, headers=headers)
|
| 51 |
|
| 52 |
if response.status_code != 200:
|
|
@@ -63,17 +76,20 @@ def get_github_issue_open_pr(lang: str = "ko"):
|
|
| 63 |
if len(page_prs) < per_page:
|
| 64 |
break
|
| 65 |
|
| 66 |
-
filtered_prs = [pr for pr in all_open_prs if
|
| 67 |
|
| 68 |
-
|
|
|
|
| 69 |
|
| 70 |
-
filenames = [
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
if
|
| 74 |
-
|
|
|
|
|
|
|
| 75 |
pr_info_list = [
|
| 76 |
-
f"
|
| 77 |
for pr in filtered_prs
|
| 78 |
]
|
| 79 |
return filenames, pr_info_list
|
|
@@ -99,11 +115,11 @@ def retrieve(summary: Summary, table_size: int = 10) -> tuple[str, list[str]]:
|
|
| 99 |
return report, first_missing_docs
|
| 100 |
|
| 101 |
|
| 102 |
-
def report(target_lang: str, top_k: int = 1) -> tuple[str, list[str]]:
|
| 103 |
"""
|
| 104 |
Generate a report for the translated docs
|
| 105 |
"""
|
| 106 |
-
docs_file = get_github_repo_files()
|
| 107 |
|
| 108 |
base_docs_path = Path("docs/source")
|
| 109 |
en_docs_path = Path("docs/source/en")
|
|
|
|
| 5 |
import requests
|
| 6 |
|
| 7 |
from .model import Languages, Summary, TranslationDoc
|
| 8 |
+
from .project_config import get_project_config
|
| 9 |
|
|
|
|
| 10 |
|
| 11 |
+
def get_github_repo_files(project: str = "transformers"):
|
|
|
|
| 12 |
"""
|
| 13 |
Get github repo files
|
| 14 |
"""
|
| 15 |
+
config = get_project_config(project)
|
| 16 |
+
|
| 17 |
+
# Add GitHub token if available to avoid rate limiting
|
| 18 |
+
headers = {}
|
| 19 |
+
github_token = os.environ.get("GITHUB_TOKEN")
|
| 20 |
+
if github_token:
|
| 21 |
+
headers["Authorization"] = f"token {github_token}"
|
| 22 |
+
|
| 23 |
+
response = requests.get(config.api_url, headers=headers)
|
| 24 |
|
| 25 |
data = response.json()
|
| 26 |
all_items = data.get("tree", [])
|
|
|
|
| 33 |
return file_paths
|
| 34 |
|
| 35 |
|
| 36 |
+
def get_github_issue_open_pr(project: str = "transformers", lang: str = "ko"):
|
| 37 |
"""
|
| 38 |
+
Get open PR in the github issue, filtered by title containing '[i18n-KO]'.
|
| 39 |
"""
|
| 40 |
+
config = get_project_config(project)
|
| 41 |
+
issue_id = config.github_issues.get(lang)
|
| 42 |
+
|
| 43 |
+
# For projects without GitHub issue tracking, still search for PRs
|
| 44 |
+
if not issue_id:
|
| 45 |
+
raise ValueError(f"β οΈ No GitHub issue registered for {project}.")
|
| 46 |
|
| 47 |
headers = {
|
| 48 |
"Accept": "application/vnd.github+json",
|
| 49 |
}
|
| 50 |
|
| 51 |
+
# Add GitHub token if available to avoid rate limiting
|
| 52 |
+
github_token = os.environ.get("GITHUB_TOKEN")
|
| 53 |
+
if github_token:
|
| 54 |
+
headers["Authorization"] = f"token {github_token}"
|
| 55 |
+
|
| 56 |
all_open_prs = []
|
| 57 |
page = 1
|
| 58 |
per_page = 100 # Maximum allowed by GitHub API
|
| 59 |
|
| 60 |
while True:
|
| 61 |
+
repo_path = config.repo_url.replace("https://github.com/", "")
|
| 62 |
+
url = f"https://api.github.com/repos/{repo_path}/pulls?state=open&page={page}&per_page={per_page}"
|
| 63 |
response = requests.get(url, headers=headers)
|
| 64 |
|
| 65 |
if response.status_code != 200:
|
|
|
|
| 76 |
if len(page_prs) < per_page:
|
| 77 |
break
|
| 78 |
|
| 79 |
+
filtered_prs = [pr for pr in all_open_prs if "[i18n-KO]" in pr["title"]]
|
| 80 |
|
| 81 |
+
# Pattern to match both `filename.md` and filename.md formats
|
| 82 |
+
pattern = re.compile(r"(?:`([^`]+\.md)`|(\w+\.md))")
|
| 83 |
|
| 84 |
+
filenames = []
|
| 85 |
+
for pr in filtered_prs:
|
| 86 |
+
match = pattern.search(pr["title"])
|
| 87 |
+
if match:
|
| 88 |
+
# Use group 1 (with backticks) or group 2 (without backticks)
|
| 89 |
+
filename = match.group(1) or match.group(2)
|
| 90 |
+
filenames.append("docs/source/en/" + filename)
|
| 91 |
pr_info_list = [
|
| 92 |
+
f"{config.repo_url}/pull/{pr['url'].rstrip('/').split('/')[-1]}"
|
| 93 |
for pr in filtered_prs
|
| 94 |
]
|
| 95 |
return filenames, pr_info_list
|
|
|
|
| 115 |
return report, first_missing_docs
|
| 116 |
|
| 117 |
|
| 118 |
+
def report(project: str, target_lang: str, top_k: int = 1) -> tuple[str, list[str]]:
|
| 119 |
"""
|
| 120 |
Generate a report for the translated docs
|
| 121 |
"""
|
| 122 |
+
docs_file = get_github_repo_files(project)
|
| 123 |
|
| 124 |
base_docs_path = Path("docs/source")
|
| 125 |
en_docs_path = Path("docs/source/en")
|