import re from collections import OrderedDict from ProjectPageAgent.html_finder import HtmlFinder import os _LINK_CSS_RE = re.compile( r'''(?isx) ]*? href\s*=\s* (?: "([^"]+?\.css(?:\?[^"]*)?)" | '([^']+?\.css(?:\?[^']*)?)' | ([^\s"'=<>`]+?\.css(?:\?[^\s"'=<>`]*)?) ) [^>]*?> ''' ) _IMPORT_CSS_RE = re.compile( r'''(?isx) @import \s+(?:url\()? \s* (?: "([^"]+?\.css(?:\?[^"]*)?)" | '([^']+?\.css(?:\?[^']*)?)' | ([^'")\s;]+?\.css(?:\?[^'")\s;]+)?) ) \s* \)? ''' ) def _first_nonempty(groups_list): out = [] for groups in groups_list: for g in groups: if g: out.append(g) break return out def extract_css_paths(html: str): links = _first_nonempty(_LINK_CSS_RE.findall(html)) imports = _first_nonempty(_IMPORT_CSS_RE.findall(html)) seen = OrderedDict() for u in links + imports: u = u.strip() if u and u not in seen: seen[u] = True return list(seen.keys()) def check_css(generated_html: str, template_html: str): generated_css = extract_css_paths(generated_html) template_css = extract_css_paths(template_html) print(f'num of css in generated page: {len(generated_css)}') print(f'num of css in template page: {len(template_css)}') template_css_name = {css.strip().split('/')[-1]: css for css in template_css} errors = {} for css in generated_css: if css.startswith('http'): continue if css not in template_css: match = template_css_name.get(css.strip().split('/')[-1], None) if match is not None: errors[css] = match else: print(f"[⚠️ Warning] Missing CSS match for {css}") new_html = generated_html for css, new_css in errors.items(): if new_css: new_html = new_html.replace(css, new_css) return new_html if __name__ == "__main__": templates_root = '/home/jimu/Project_resources/project_page/page_assets/' html_finder = HtmlFinder(specific_name='index.html') count = 0 for page in os.listdir('generated_FastVGGT'): print(page) count += 1 with open(html_finder.find_html(os.path.join('generated_FastVGGT', page)), 'r') as f: generated_html = f.read() with open(html_finder.find_html(os.path.join(templates_root, page)), 'r') as f: template_html = f.read() _ = check_css(generated_html, template_html, page) print(count)