File size: 17,298 Bytes
22556a8
 
 
 
 
 
d229b84
 
 
 
 
 
 
 
 
 
 
22556a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8957aec
732763a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22556a8
732763a
 
 
22556a8
732763a
eaea5aa
 
 
 
22556a8
732763a
22556a8
732763a
 
 
22556a8
732763a
 
 
 
 
 
 
 
 
 
eaea5aa
732763a
 
eaea5aa
 
732763a
22556a8
 
732763a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22556a8
732763a
 
 
 
 
 
 
22556a8
8957aec
732763a
22556a8
 
 
732763a
22556a8
 
eaea5aa
 
22556a8
 
 
8957aec
22556a8
732763a
22556a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8957aec
22556a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d229b84
 
22556a8
 
 
 
 
 
 
 
 
 
 
 
8957aec
22556a8
 
 
8957aec
 
22556a8
8957aec
 
 
 
 
 
 
 
22556a8
 
 
 
 
eaea5aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
import yaml
import requests
from typing import Dict, List, Any
import os

class TocTreeHandler:
    def __init__(self, project: str = "transformers"):
        from translator.project_config import get_project_config
        self.project = project
        self.project_config = get_project_config(project)
        
        # Extract repository path from config
        repo_path = self.project_config.repo_url.replace("https://github.com/", "")
        
        # Build project-specific URLs
        self.en_toctree_url = f"https://raw.githubusercontent.com/{repo_path}/main/docs/source/en/_toctree.yml"
        self.ko_toctree_url = f"https://raw.githubusercontent.com/{repo_path}/main/docs/source/ko/_toctree.yml"
        self.local_docs_path = "docs/source/ko"
    
    def fetch_toctree(self, url: str) -> Dict[str, Any]:
        """Fetch and parse YAML from URL"""
        response = requests.get(url)
        response.raise_for_status()
        return yaml.safe_load(response.text)
    
    def get_en_toctree(self) -> Dict[str, Any]:
        """Get English toctree structure"""
        return self.fetch_toctree(self.en_toctree_url)
    
    def get_ko_toctree(self) -> Dict[str, Any]:
        """Get Korean toctree structure"""
        return self.fetch_toctree(self.ko_toctree_url)
    
    def extract_title_mappings(self, en_data: List[Dict], ko_data: List[Dict]) -> Dict[str, str]:
        """Extract title mappings between English and Korean"""
        mappings = {}
        
        def process_section(en_section: Dict, ko_section: Dict):
            if 'local' in en_section and 'local' in ko_section:
                if en_section['local'] == ko_section['local']:
                    en_title = en_section.get('title', '')
                    ko_title = ko_section.get('title', '')
                    if en_title and ko_title:
                        mappings[en_title] = ko_title
            
            if 'sections' in en_section and 'sections' in ko_section:
                en_sections = en_section['sections']
                ko_sections = ko_section['sections']
                
                for i, en_sub in enumerate(en_sections):
                    if i < len(ko_sections):
                        process_section(en_sub, ko_sections[i])
        
        for i, en_item in enumerate(en_data):
            if i < len(ko_data):
                process_section(en_item, ko_data[i])
        
        return mappings
    
    def translate_title(self, en_title: str) -> str:
        """Translate English title to Korean using LLM"""
        try:
            from translator.content import llm_translate
            
            prompt = f"""Translate the following English documentation title to Korean. Return only the translated title, nothing else.

English title: {en_title}

Korean title:"""
            
            callback_result, translated_title = llm_translate(prompt)
            return translated_title.strip()
        except Exception as e:
            print(f"Error translating title '{en_title}': {e}")
            return en_title
    
    def create_local_toctree(self, en_title: str, local_file_path: str) -> Dict[str, str]:
        """Create local toctree entry with Korean title and local path"""
        try:
            # First try to get Korean title from existing mappings
            en_data = self.get_en_toctree()
            ko_data = self.get_ko_toctree()
            
            title_mappings = self.extract_title_mappings(en_data, ko_data)
            ko_title = title_mappings.get(en_title)
            
            # If no existing mapping, translate the title
            if not ko_title:
                ko_title = self.translate_title(en_title)
            
            return {
                'local': local_file_path,
                'title': ko_title
            }
        except Exception as e:
            print(f"Error creating local toctree: {e}")
            return {
                'local': local_file_path,
                'title': en_title
            }
   
    def find_and_update_translation_entry(self, ko_toctree_data, target_local: str, english_title: str, korean_title: str):
        """Find entry with '(λ²ˆμ—­μ€‘) μ˜μ–΄μ œλͺ©' and update it"""
        target_title_pattern = f"(λ²ˆμ—­μ€‘) {english_title}"
        
        def process_item(item):
            if isinstance(item, dict):
                # Check if title matches the pattern
                if item.get('title') == target_title_pattern:
                    # Update local path and title
                    item['local'] = target_local
                    item['title'] = korean_title
                    return True
                
                # Process sections recursively
                if 'sections' in item:
                    for section in item['sections']:
                        if process_item(section):
                            return True
            return False
        
        # Process the toctree data
        if isinstance(ko_toctree_data, list):
            for item in ko_toctree_data:
                if process_item(item):
                    return True
        return False

    def create_updated_toctree_with_replacement(self, ko_toctree: list, target_local: str) -> list:
        """Update Korean toctree by finding and updating translation entry"""
        try:
            # Step 1: Get English toctree and find the English title for target_local
            en_toctree = self.get_en_toctree()
            english_title = self.find_title_for_local(en_toctree, target_local)
            
            if not english_title:
                print(f"⚠️ Toctree entry not found: '{target_local}' not in English toctree")
                print(f"πŸ” Attempting to find appropriate section for new entry...")
                # Try to add new entry in appropriate location
                return self.add_new_toctree_entry(ko_toctree, target_local)
            
            print(f"Found English title: {english_title} for local: {target_local}")
            
            # Step 2: Translate the English title to Korean
            korean_title = self.translate_title(english_title)
            print(f"Translated Korean title: {korean_title}")
            
            # Step 3: Make a deep copy to avoid modifying original
            import copy
            updated_toctree = copy.deepcopy(ko_toctree)
            
            # Step 4: Find and update the "(λ²ˆμ—­μ€‘) μ˜μ–΄μ œλͺ©" entry
            updated = self.find_and_update_translation_entry(
                updated_toctree, target_local, english_title, korean_title
            )
            
            if updated:
                print(f"βœ… Successfully updated translation entry: local={target_local}, title={korean_title}")
                return updated_toctree
            else:
                print(f"⚠️ Toctree update skipped: '(λ²ˆμ—­μ€‘) {english_title}' entry not found")
                print(f"πŸ“‹ This may be a new file not yet added to Korean toctree")
                return ko_toctree
                
        except Exception as e:
            print(f"Error creating updated toctree: {e}")
            return ko_toctree

    def find_title_for_local(self, toctree_data, target_local: str):
        """Find title for given local path in toctree"""
        def search_item(item):
            if isinstance(item, dict):
                if item.get('local') == target_local:
                    return item.get('title', '')
                
                if 'sections' in item:
                    for section in item['sections']:
                        result = search_item(section)
                        if result:
                            return result
            return None
        
        if isinstance(toctree_data, list):
            for item in toctree_data:
                result = search_item(item)
                if result:
                    return result
        return None

    def process_pr_commit(self, filepath: str):
        """Process PR commit by updating Korean toctree with translated entry"""
        # Get filepath without prefix
        filepath_without_prefix = filepath.replace("docs/source/en/", "").replace(".md", "")
        
        # Get Korean toctree
        ko_toctree = self.get_ko_toctree()
        
        # Use diff-merge algorithm to add new entry
        updated_ko_toctree = self.add_new_toctree_entry(ko_toctree, filepath_without_prefix)
        
        if not updated_ko_toctree:
            print(f"Failed to create updated Korean toctree for local: {filepath_without_prefix}")
            return
        
        print(f"Successfully updated Korean toctree")
        
        # Store the updated toctree for commit
        self.updated_ko_toctree = updated_ko_toctree

    def commit_and_push_toctree(self, pr_agent, owner: str, repo_name: str, branch_name: str):
        """Commit and push toctree updates as a separate commit"""
        try:
            # Use the updated toctree created by LLM
            if not hasattr(self, 'updated_ko_toctree') or not self.updated_ko_toctree:
                print("No updated Korean toctree available")
                return {"status": "error", "message": "No updated toctree to commit"}
                
            ko_data = self.updated_ko_toctree
            
            # Convert to YAML string
            toctree_content = yaml.dump(ko_data, allow_unicode=True, default_flow_style=False, sort_keys=False)
            
            # Create toctree commit message
            commit_message = "docs: update Korean documentation table of contents"
            
            # Commit toctree file
            file_result = pr_agent.create_or_update_file(
                owner=owner,
                repo_name=repo_name,
                path="docs/source/ko/_toctree.yml",
                message=commit_message,
                content=toctree_content,
                branch_name=branch_name
            )
            
            if file_result.startswith("SUCCESS"):
                return {
                    "status": "success", 
                    "message": f"Toctree committed successfully: {file_result}",
                    "commit_message": commit_message
                }
            else:
                return {
                    "status": "error", 
                    "message": f"Toctree commit failed: {file_result}"
                }
                
        except Exception as e:
            return {
                "status": "error", 
                "message": f"Error committing toctree: {str(e)}"
            }

    def update_toctree_after_translation(
        self,
        translation_result: dict, 
        filepath: str, 
        pr_agent, 
        github_config: dict,
        project: str = "transformers"
    ) -> dict:
        """Update toctree after successful translation PR.
        
        Args:
            translation_result: Result from translation PR workflow
            filepath: Original file path
            pr_agent: GitHub PR agent instance
            github_config: GitHub configuration dictionary
            
        Returns:
            Dictionary with toctree update result
        """
        if translation_result["status"] == "error":
            return None
            
        try:
            # Process toctree update with LLM
            self.process_pr_commit(filepath)
            # Commit toctree as separate commit
            if self.updated_ko_toctree:
                return self.commit_and_push_toctree(
                    pr_agent=pr_agent,
                    owner=github_config["owner"],
                    repo_name=github_config["repo_name"],
                    branch_name=translation_result["branch"]
                )

        except Exception as e:
            return {
                "status": "error",
                "message": f"Error updating toctree: {str(e)}"
            }
    
    def add_new_toctree_entry(self, ko_toctree: list, target_local: str) -> list:
        """Add new toctree entry using diff-merge algorithm"""
        try:
            import copy
            updated_toctree = copy.deepcopy(ko_toctree)
            
            # Generate new entry
            filename = target_local.split('/')[-1].replace('_', ' ').title()
            korean_title = self.translate_title(filename)
            new_entry = {
                'local': target_local,
                'title': korean_title
            }
            
            # Get English toctree for structure reference
            en_toctree = self.get_en_toctree()
            
            # Use diff-merge algorithm
            if self.merge_toctree_sections(en_toctree, updated_toctree, target_local, new_entry):
                return updated_toctree
            else:
                # Fallback: add to root level
                updated_toctree.append(new_entry)
                print(f"βœ… Added new entry at root level: {target_local} -> {korean_title}")
                return updated_toctree
                
        except Exception as e:
            print(f"❌ Error adding new toctree entry: {e}")
            return ko_toctree
    
    def merge_toctree_sections(self, en_toctree: list, ko_toctree: list, target_local: str, new_entry: dict) -> bool:
        """Merge English toctree structure into Korean toctree for target_local"""
        for en_section in en_toctree:
            en_title = en_section.get('title')
            
            # Check if this English section contains our target
            if self.contains_target(en_section, target_local):
                # Find matching Korean section
                ko_section = self.find_matching_section(ko_toctree, en_title)
                
                if ko_section:
                    # Section exists - merge subsections
                    return self.merge_subsections(en_section, ko_section, target_local, new_entry)
                else:
                    # Section doesn't exist - create new section
                    new_ko_section = self.create_section_with_order(en_section, target_local, new_entry)
                    ko_toctree.append(new_ko_section)
                    print(f"βœ… Created new section '{new_ko_section.get('title')}' with ordered structure")
                    return True
        return False
    
    def contains_target(self, section: dict, target_local: str) -> bool:
        """Check if section contains target_local recursively"""
        if 'sections' in section:
            for subsection in section['sections']:
                if subsection.get('local') == target_local:
                    return True
                if self.contains_target(subsection, target_local):
                    return True
        return False
    
    def find_matching_section(self, ko_toctree: list, en_title: str) -> dict:
        """Find Korean section that matches English title"""
        # Try exact match first
        for item in ko_toctree:
            if item.get('title') == en_title:
                return item
        
        # Try translated title match
        try:
            translated_title = self.translate_title(en_title)
            for item in ko_toctree:
                if item.get('title') == translated_title:
                    return item
        except:
            pass
                
        return None
    
    def merge_subsections(self, en_section: dict, ko_section: dict, target_local: str, new_entry: dict) -> bool:
        """Merge subsections while maintaining order"""
        if 'sections' not in en_section:
            return False
            
        # Find target index in English sections
        target_index = -1
        for i, en_subsection in enumerate(en_section['sections']):
            if en_subsection.get('local') == target_local:
                target_index = i
                break
        
        if target_index == -1:
            return False
            
        # Ensure Korean section has sections array
        if 'sections' not in ko_section:
            ko_section['sections'] = []
            
        # Insert at correct position
        self.insert_at_correct_position(ko_section, target_index, new_entry)
        return True
    
    def insert_at_correct_position(self, ko_section: dict, target_index: int, new_entry: dict):
        """Insert entry at correct position, expanding array if needed"""
        sections = ko_section['sections']
        
        # Expand sections array if needed
        while len(sections) <= target_index:
            sections.append(None)  # Placeholder
        
        # Insert new entry
        sections[target_index] = new_entry
        
        # Clean up None placeholders at the end
        while sections and sections[-1] is None:
            sections.pop()
    
    def create_section_with_order(self, en_section: dict, target_local: str, new_entry: dict) -> dict:
        """Create new Korean section with only the translated entry"""
        new_ko_section = {
            'title': self.translate_title(en_section.get('title')),
            'isExpanded': en_section.get('isExpanded', False),
            'sections': [new_entry]  # Only add the translated entry
        }
        
        return new_ko_section