AutoPage / ProjectPageAgent /css_checker.py
Mqleet's picture
upd code
fcaa164
raw
history blame
2.68 kB
import re
from collections import OrderedDict
from ProjectPageAgent.html_finder import HtmlFinder
import os
_LINK_CSS_RE = re.compile(
r'''(?isx)
<link[^>]*?
href\s*=\s*
(?:
"([^"]+?\.css(?:\?[^"]*)?)" |
'([^']+?\.css(?:\?[^']*)?)' |
([^\s"'=<>`]+?\.css(?:\?[^\s"'=<>`]*)?)
)
[^>]*?>
'''
)
_IMPORT_CSS_RE = re.compile(
r'''(?isx)
@import
\s+(?:url\()?
\s*
(?:
"([^"]+?\.css(?:\?[^"]*)?)" |
'([^']+?\.css(?:\?[^']*)?)' |
([^'")\s;]+?\.css(?:\?[^'")\s;]+)?)
)
\s*
\)?
'''
)
def _first_nonempty(groups_list):
out = []
for groups in groups_list:
for g in groups:
if g:
out.append(g)
break
return out
def extract_css_paths(html: str):
links = _first_nonempty(_LINK_CSS_RE.findall(html))
imports = _first_nonempty(_IMPORT_CSS_RE.findall(html))
seen = OrderedDict()
for u in links + imports:
u = u.strip()
if u and u not in seen:
seen[u] = True
return list(seen.keys())
def check_css(generated_html: str, template_html: str):
generated_css = extract_css_paths(generated_html)
template_css = extract_css_paths(template_html)
print(f'num of css in generated page: {len(generated_css)}')
print(f'num of css in template page: {len(template_css)}')
template_css_name = {css.strip().split('/')[-1]: css for css in template_css}
errors = {}
for css in generated_css:
if css.startswith('http'):
continue
if css not in template_css:
match = template_css_name.get(css.strip().split('/')[-1], None)
if match is not None:
errors[css] = match
else:
print(f"[⚠️ Warning] Missing CSS match for {css}")
new_html = generated_html
for css, new_css in errors.items():
if new_css:
new_html = new_html.replace(css, new_css)
return new_html
if __name__ == "__main__":
templates_root = '/home/jimu/Project_resources/project_page/page_assets/'
html_finder = HtmlFinder(specific_name='index.html')
count = 0
for page in os.listdir('generated_FastVGGT'):
print(page)
count += 1
with open(html_finder.find_html(os.path.join('generated_FastVGGT', page)), 'r') as f:
generated_html = f.read()
with open(html_finder.find_html(os.path.join(templates_root, page)), 'r') as f:
template_html = f.read()
_ = check_css(generated_html, template_html, page)
print(count)