Spaces:
Sleeping
Sleeping
| """ | |
| cssselect.parser | |
| ================ | |
| Tokenizer, parser and parsed objects for CSS selectors. | |
| :copyright: (c) 2007-2012 Ian Bicking and contributors. | |
| See AUTHORS for more details. | |
| :license: BSD, see LICENSE for more details. | |
| """ | |
| from __future__ import annotations | |
| import operator | |
| import re | |
| import sys | |
| from typing import TYPE_CHECKING, Literal, Optional, Protocol, Union, cast, overload | |
| if TYPE_CHECKING: | |
| from collections.abc import Iterable, Iterator, Sequence | |
| # typing.Self requires Python 3.11 | |
| from typing_extensions import Self | |
| def ascii_lower(string: str) -> str: | |
| """Lower-case, but only in the ASCII range.""" | |
| return string.encode("utf8").lower().decode("utf8") | |
| class SelectorError(Exception): | |
| """Common parent for :class:`SelectorSyntaxError` and | |
| :class:`ExpressionError`. | |
| You can just use ``except SelectorError:`` when calling | |
| :meth:`~GenericTranslator.css_to_xpath` and handle both exceptions types. | |
| """ | |
| class SelectorSyntaxError(SelectorError, SyntaxError): | |
| """Parsing a selector that does not match the grammar.""" | |
| #### Parsed objects | |
| Tree = Union[ | |
| "Element", | |
| "Hash", | |
| "Class", | |
| "Function", | |
| "Pseudo", | |
| "Attrib", | |
| "Negation", | |
| "Relation", | |
| "Matching", | |
| "SpecificityAdjustment", | |
| "CombinedSelector", | |
| ] | |
| PseudoElement = Union["FunctionalPseudoElement", str] | |
| class Selector: | |
| """ | |
| Represents a parsed selector. | |
| :meth:`~GenericTranslator.selector_to_xpath` accepts this object, | |
| but ignores :attr:`pseudo_element`. It is the user’s responsibility | |
| to account for pseudo-elements and reject selectors with unknown | |
| or unsupported pseudo-elements. | |
| """ | |
| def __init__(self, tree: Tree, pseudo_element: PseudoElement | None = None) -> None: | |
| self.parsed_tree = tree | |
| if pseudo_element is not None and not isinstance( | |
| pseudo_element, FunctionalPseudoElement | |
| ): | |
| pseudo_element = ascii_lower(pseudo_element) | |
| #: A :class:`FunctionalPseudoElement`, | |
| #: or the identifier for the pseudo-element as a string, | |
| # or ``None``. | |
| #: | |
| #: +-------------------------+----------------+--------------------------------+ | |
| #: | | Selector | Pseudo-element | | |
| #: +=========================+================+================================+ | |
| #: | CSS3 syntax | ``a::before`` | ``'before'`` | | |
| #: +-------------------------+----------------+--------------------------------+ | |
| #: | Older syntax | ``a:before`` | ``'before'`` | | |
| #: +-------------------------+----------------+--------------------------------+ | |
| #: | From the Lists3_ draft, | ``li::marker`` | ``'marker'`` | | |
| #: | not in Selectors3 | | | | |
| #: +-------------------------+----------------+--------------------------------+ | |
| #: | Invalid pseudo-class | ``li:marker`` | ``None`` | | |
| #: +-------------------------+----------------+--------------------------------+ | |
| #: | Functional | ``a::foo(2)`` | ``FunctionalPseudoElement(…)`` | | |
| #: +-------------------------+----------------+--------------------------------+ | |
| #: | |
| #: .. _Lists3: http://www.w3.org/TR/2011/WD-css3-lists-20110524/#marker-pseudoelement | |
| self.pseudo_element = pseudo_element | |
| def __repr__(self) -> str: | |
| if isinstance(self.pseudo_element, FunctionalPseudoElement): | |
| pseudo_element = repr(self.pseudo_element) | |
| elif self.pseudo_element: | |
| pseudo_element = f"::{self.pseudo_element}" | |
| else: | |
| pseudo_element = "" | |
| return f"{self.__class__.__name__}[{self.parsed_tree!r}{pseudo_element}]" | |
| def canonical(self) -> str: | |
| """Return a CSS representation for this selector (a string)""" | |
| if isinstance(self.pseudo_element, FunctionalPseudoElement): | |
| pseudo_element = f"::{self.pseudo_element.canonical()}" | |
| elif self.pseudo_element: | |
| pseudo_element = f"::{self.pseudo_element}" | |
| else: | |
| pseudo_element = "" | |
| res = f"{self.parsed_tree.canonical()}{pseudo_element}" | |
| if len(res) > 1: | |
| res = res.lstrip("*") | |
| return res | |
| def specificity(self) -> tuple[int, int, int]: | |
| """Return the specificity_ of this selector as a tuple of 3 integers. | |
| .. _specificity: http://www.w3.org/TR/selectors/#specificity | |
| """ | |
| a, b, c = self.parsed_tree.specificity() | |
| if self.pseudo_element: | |
| c += 1 | |
| return a, b, c | |
| class Class: | |
| """ | |
| Represents selector.class_name | |
| """ | |
| def __init__(self, selector: Tree, class_name: str) -> None: | |
| self.selector = selector | |
| self.class_name = class_name | |
| def __repr__(self) -> str: | |
| return f"{self.__class__.__name__}[{self.selector!r}.{self.class_name}]" | |
| def canonical(self) -> str: | |
| return f"{self.selector.canonical()}.{self.class_name}" | |
| def specificity(self) -> tuple[int, int, int]: | |
| a, b, c = self.selector.specificity() | |
| b += 1 | |
| return a, b, c | |
| class FunctionalPseudoElement: | |
| """ | |
| Represents selector::name(arguments) | |
| .. attribute:: name | |
| The name (identifier) of the pseudo-element, as a string. | |
| .. attribute:: arguments | |
| The arguments of the pseudo-element, as a list of tokens. | |
| **Note:** tokens are not part of the public API, | |
| and may change between cssselect versions. | |
| Use at your own risks. | |
| """ | |
| def __init__(self, name: str, arguments: Sequence[Token]): | |
| self.name = ascii_lower(name) | |
| self.arguments = arguments | |
| def __repr__(self) -> str: | |
| token_values = [token.value for token in self.arguments] | |
| return f"{self.__class__.__name__}[::{self.name}({token_values!r})]" | |
| def argument_types(self) -> list[str]: | |
| return [token.type for token in self.arguments] | |
| def canonical(self) -> str: | |
| args = "".join(token.css() for token in self.arguments) | |
| return f"{self.name}({args})" | |
| class Function: | |
| """ | |
| Represents selector:name(expr) | |
| """ | |
| def __init__(self, selector: Tree, name: str, arguments: Sequence[Token]) -> None: | |
| self.selector = selector | |
| self.name = ascii_lower(name) | |
| self.arguments = arguments | |
| def __repr__(self) -> str: | |
| token_values = [token.value for token in self.arguments] | |
| return f"{self.__class__.__name__}[{self.selector!r}:{self.name}({token_values!r})]" | |
| def argument_types(self) -> list[str]: | |
| return [token.type for token in self.arguments] | |
| def canonical(self) -> str: | |
| args = "".join(token.css() for token in self.arguments) | |
| return f"{self.selector.canonical()}:{self.name}({args})" | |
| def specificity(self) -> tuple[int, int, int]: | |
| a, b, c = self.selector.specificity() | |
| b += 1 | |
| return a, b, c | |
| class Pseudo: | |
| """ | |
| Represents selector:ident | |
| """ | |
| def __init__(self, selector: Tree, ident: str) -> None: | |
| self.selector = selector | |
| self.ident = ascii_lower(ident) | |
| def __repr__(self) -> str: | |
| return f"{self.__class__.__name__}[{self.selector!r}:{self.ident}]" | |
| def canonical(self) -> str: | |
| return f"{self.selector.canonical()}:{self.ident}" | |
| def specificity(self) -> tuple[int, int, int]: | |
| a, b, c = self.selector.specificity() | |
| b += 1 | |
| return a, b, c | |
| class Negation: | |
| """ | |
| Represents selector:not(subselector) | |
| """ | |
| def __init__(self, selector: Tree, subselector: Tree) -> None: | |
| self.selector = selector | |
| self.subselector = subselector | |
| def __repr__(self) -> str: | |
| return f"{self.__class__.__name__}[{self.selector!r}:not({self.subselector!r})]" | |
| def canonical(self) -> str: | |
| subsel = self.subselector.canonical() | |
| if len(subsel) > 1: | |
| subsel = subsel.lstrip("*") | |
| return f"{self.selector.canonical()}:not({subsel})" | |
| def specificity(self) -> tuple[int, int, int]: | |
| a1, b1, c1 = self.selector.specificity() | |
| a2, b2, c2 = self.subselector.specificity() | |
| return a1 + a2, b1 + b2, c1 + c2 | |
| class Relation: | |
| """ | |
| Represents selector:has(subselector) | |
| """ | |
| def __init__(self, selector: Tree, combinator: Token, subselector: Selector): | |
| self.selector = selector | |
| self.combinator = combinator | |
| self.subselector = subselector | |
| def __repr__(self) -> str: | |
| return f"{self.__class__.__name__}[{self.selector!r}:has({self.subselector!r})]" | |
| def canonical(self) -> str: | |
| try: | |
| subsel = self.subselector[0].canonical() # type: ignore[index] | |
| except TypeError: | |
| subsel = self.subselector.canonical() | |
| if len(subsel) > 1: | |
| subsel = subsel.lstrip("*") | |
| return f"{self.selector.canonical()}:has({subsel})" | |
| def specificity(self) -> tuple[int, int, int]: | |
| a1, b1, c1 = self.selector.specificity() | |
| try: | |
| a2, b2, c2 = self.subselector[-1].specificity() # type: ignore[index] | |
| except TypeError: | |
| a2, b2, c2 = self.subselector.specificity() | |
| return a1 + a2, b1 + b2, c1 + c2 | |
| class Matching: | |
| """ | |
| Represents selector:is(selector_list) | |
| """ | |
| def __init__(self, selector: Tree, selector_list: Iterable[Tree]): | |
| self.selector = selector | |
| self.selector_list = selector_list | |
| def __repr__(self) -> str: | |
| args_str = ", ".join(repr(s) for s in self.selector_list) | |
| return f"{self.__class__.__name__}[{self.selector!r}:is({args_str})]" | |
| def canonical(self) -> str: | |
| selector_arguments = [] | |
| for s in self.selector_list: | |
| selarg = s.canonical() | |
| selector_arguments.append(selarg.lstrip("*")) | |
| args_str = ", ".join(str(s) for s in selector_arguments) | |
| return f"{self.selector.canonical()}:is({args_str})" | |
| def specificity(self) -> tuple[int, int, int]: | |
| return max(x.specificity() for x in self.selector_list) | |
| class SpecificityAdjustment: | |
| """ | |
| Represents selector:where(selector_list) | |
| Same as selector:is(selector_list), but its specificity is always 0 | |
| """ | |
| def __init__(self, selector: Tree, selector_list: list[Tree]): | |
| self.selector = selector | |
| self.selector_list = selector_list | |
| def __repr__(self) -> str: | |
| args_str = ", ".join(repr(s) for s in self.selector_list) | |
| return f"{self.__class__.__name__}[{self.selector!r}:where({args_str})]" | |
| def canonical(self) -> str: | |
| selector_arguments = [] | |
| for s in self.selector_list: | |
| selarg = s.canonical() | |
| selector_arguments.append(selarg.lstrip("*")) | |
| args_str = ", ".join(str(s) for s in selector_arguments) | |
| return f"{self.selector.canonical()}:where({args_str})" | |
| def specificity(self) -> tuple[int, int, int]: | |
| return 0, 0, 0 | |
| class Attrib: | |
| """ | |
| Represents selector[namespace|attrib operator value] | |
| """ | |
| def __init__( | |
| self, | |
| selector: Tree, | |
| namespace: str | None, | |
| attrib: str, | |
| operator: Literal["exists"], | |
| value: None, | |
| ) -> None: ... | |
| def __init__( | |
| self, | |
| selector: Tree, | |
| namespace: str | None, | |
| attrib: str, | |
| operator: str, | |
| value: Token, | |
| ) -> None: ... | |
| def __init__( | |
| self, | |
| selector: Tree, | |
| namespace: str | None, | |
| attrib: str, | |
| operator: str, | |
| value: Token | None, | |
| ) -> None: | |
| self.selector = selector | |
| self.namespace = namespace | |
| self.attrib = attrib | |
| self.operator = operator | |
| self.value = value | |
| def __repr__(self) -> str: | |
| attrib = f"{self.namespace}|{self.attrib}" if self.namespace else self.attrib | |
| if self.operator == "exists": | |
| return f"{self.__class__.__name__}[{self.selector!r}[{attrib}]]" | |
| assert self.value is not None | |
| return f"{self.__class__.__name__}[{self.selector!r}[{attrib} {self.operator} {self.value.value!r}]]" | |
| def canonical(self) -> str: | |
| attrib = f"{self.namespace}|{self.attrib}" if self.namespace else self.attrib | |
| if self.operator == "exists": | |
| op = attrib | |
| else: | |
| assert self.value is not None | |
| op = f"{attrib}{self.operator}{self.value.css()}" | |
| return f"{self.selector.canonical()}[{op}]" | |
| def specificity(self) -> tuple[int, int, int]: | |
| a, b, c = self.selector.specificity() | |
| b += 1 | |
| return a, b, c | |
| class Element: | |
| """ | |
| Represents namespace|element | |
| `None` is for the universal selector '*' | |
| """ | |
| def __init__( | |
| self, namespace: str | None = None, element: str | None = None | |
| ) -> None: | |
| self.namespace = namespace | |
| self.element = element | |
| def __repr__(self) -> str: | |
| return f"{self.__class__.__name__}[{self.canonical()}]" | |
| def canonical(self) -> str: | |
| element = self.element or "*" | |
| if self.namespace: | |
| element = f"{self.namespace}|{element}" | |
| return element | |
| def specificity(self) -> tuple[int, int, int]: | |
| if self.element: | |
| return 0, 0, 1 | |
| return 0, 0, 0 | |
| class Hash: | |
| """ | |
| Represents selector#id | |
| """ | |
| def __init__(self, selector: Tree, id: str) -> None: | |
| self.selector = selector | |
| self.id = id | |
| def __repr__(self) -> str: | |
| return f"{self.__class__.__name__}[{self.selector!r}#{self.id}]" | |
| def canonical(self) -> str: | |
| return f"{self.selector.canonical()}#{self.id}" | |
| def specificity(self) -> tuple[int, int, int]: | |
| a, b, c = self.selector.specificity() | |
| a += 1 | |
| return a, b, c | |
| class CombinedSelector: | |
| def __init__(self, selector: Tree, combinator: str, subselector: Tree) -> None: | |
| assert selector is not None | |
| self.selector = selector | |
| self.combinator = combinator | |
| self.subselector = subselector | |
| def __repr__(self) -> str: | |
| comb = "<followed>" if self.combinator == " " else self.combinator | |
| return ( | |
| f"{self.__class__.__name__}[{self.selector!r} {comb} {self.subselector!r}]" | |
| ) | |
| def canonical(self) -> str: | |
| subsel = self.subselector.canonical() | |
| if len(subsel) > 1: | |
| subsel = subsel.lstrip("*") | |
| return f"{self.selector.canonical()} {self.combinator} {subsel}" | |
| def specificity(self) -> tuple[int, int, int]: | |
| a1, b1, c1 = self.selector.specificity() | |
| a2, b2, c2 = self.subselector.specificity() | |
| return a1 + a2, b1 + b2, c1 + c2 | |
| #### Parser | |
| # foo | |
| _el_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$") | |
| # foo#bar or #bar | |
| _id_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$") | |
| # foo.bar or .bar | |
| _class_re = re.compile( | |
| r"^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$" | |
| ) | |
| def parse(css: str) -> list[Selector]: | |
| """Parse a CSS *group of selectors*. | |
| If you don't care about pseudo-elements or selector specificity, | |
| you can skip this and use :meth:`~GenericTranslator.css_to_xpath`. | |
| :param css: | |
| A *group of selectors* as a string. | |
| :raises: | |
| :class:`SelectorSyntaxError` on invalid selectors. | |
| :returns: | |
| A list of parsed :class:`Selector` objects, one for each | |
| selector in the comma-separated group. | |
| """ | |
| # Fast path for simple cases | |
| match = _el_re.match(css) | |
| if match: | |
| return [Selector(Element(element=match.group(1)))] | |
| match = _id_re.match(css) | |
| if match is not None: | |
| return [Selector(Hash(Element(element=match.group(1) or None), match.group(2)))] | |
| match = _class_re.match(css) | |
| if match is not None: | |
| return [ | |
| Selector(Class(Element(element=match.group(1) or None), match.group(2))) | |
| ] | |
| stream = TokenStream(tokenize(css)) | |
| stream.source = css | |
| return list(parse_selector_group(stream)) | |
| # except SelectorSyntaxError: | |
| # e = sys.exc_info()[1] | |
| # message = "%s at %s -> %r" % ( | |
| # e, stream.used, stream.peek()) | |
| # e.msg = message | |
| # e.args = tuple([message]) | |
| # raise | |
| def parse_selector_group(stream: TokenStream) -> Iterator[Selector]: | |
| stream.skip_whitespace() | |
| while 1: | |
| yield Selector(*parse_selector(stream)) | |
| if stream.peek() == ("DELIM", ","): | |
| stream.next() | |
| stream.skip_whitespace() | |
| else: | |
| break | |
| def parse_selector(stream: TokenStream) -> tuple[Tree, PseudoElement | None]: | |
| result, pseudo_element = parse_simple_selector(stream) | |
| while 1: | |
| stream.skip_whitespace() | |
| peek = stream.peek() | |
| if peek in (("EOF", None), ("DELIM", ",")): | |
| break | |
| if pseudo_element: | |
| raise SelectorSyntaxError( | |
| f"Got pseudo-element ::{pseudo_element} not at the end of a selector" | |
| ) | |
| if peek.is_delim("+", ">", "~"): | |
| # A combinator | |
| combinator = cast(str, stream.next().value) | |
| stream.skip_whitespace() | |
| else: | |
| # By exclusion, the last parse_simple_selector() ended | |
| # at peek == ' ' | |
| combinator = " " | |
| next_selector, pseudo_element = parse_simple_selector(stream) | |
| result = CombinedSelector(result, combinator, next_selector) | |
| return result, pseudo_element | |
| def parse_simple_selector( | |
| stream: TokenStream, inside_negation: bool = False | |
| ) -> tuple[Tree, PseudoElement | None]: | |
| stream.skip_whitespace() | |
| selector_start = len(stream.used) | |
| peek = stream.peek() | |
| if peek.type == "IDENT" or peek == ("DELIM", "*"): | |
| if peek.type == "IDENT": | |
| namespace = stream.next().value | |
| else: | |
| stream.next() | |
| namespace = None | |
| if stream.peek() == ("DELIM", "|"): | |
| stream.next() | |
| element = stream.next_ident_or_star() | |
| else: | |
| element = namespace | |
| namespace = None | |
| else: | |
| element = namespace = None | |
| result: Tree = Element(namespace, element) | |
| pseudo_element: PseudoElement | None = None | |
| while 1: | |
| peek = stream.peek() | |
| if ( | |
| peek.type in ("S", "EOF") | |
| or peek.is_delim(",", "+", ">", "~") | |
| or (inside_negation and peek == ("DELIM", ")")) | |
| ): | |
| break | |
| if pseudo_element: | |
| raise SelectorSyntaxError( | |
| f"Got pseudo-element ::{pseudo_element} not at the end of a selector" | |
| ) | |
| if peek.type == "HASH": | |
| result = Hash(result, cast(str, stream.next().value)) | |
| elif peek == ("DELIM", "."): | |
| stream.next() | |
| result = Class(result, stream.next_ident()) | |
| elif peek == ("DELIM", "|"): | |
| stream.next() | |
| result = Element(None, stream.next_ident()) | |
| elif peek == ("DELIM", "["): | |
| stream.next() | |
| result = parse_attrib(result, stream) | |
| elif peek == ("DELIM", ":"): | |
| stream.next() | |
| if stream.peek() == ("DELIM", ":"): | |
| stream.next() | |
| pseudo_element = stream.next_ident() | |
| if stream.peek() == ("DELIM", "("): | |
| stream.next() | |
| pseudo_element = FunctionalPseudoElement( | |
| pseudo_element, parse_arguments(stream) | |
| ) | |
| continue | |
| ident = stream.next_ident() | |
| if ident.lower() in ("first-line", "first-letter", "before", "after"): | |
| # Special case: CSS 2.1 pseudo-elements can have a single ':' | |
| # Any new pseudo-element must have two. | |
| pseudo_element = str(ident) | |
| continue | |
| if stream.peek() != ("DELIM", "("): | |
| result = Pseudo(result, ident) | |
| if repr(result) == "Pseudo[Element[*]:scope]" and not ( | |
| len(stream.used) == 2 | |
| or (len(stream.used) == 3 and stream.used[0].type == "S") | |
| or (len(stream.used) >= 3 and stream.used[-3].is_delim(",")) | |
| or ( | |
| len(stream.used) >= 4 | |
| and stream.used[-3].type == "S" | |
| and stream.used[-4].is_delim(",") | |
| ) | |
| ): | |
| raise SelectorSyntaxError( | |
| 'Got immediate child pseudo-element ":scope" ' | |
| "not at the start of a selector" | |
| ) | |
| continue | |
| stream.next() | |
| stream.skip_whitespace() | |
| if ident.lower() == "not": | |
| if inside_negation: | |
| raise SelectorSyntaxError("Got nested :not()") | |
| argument, argument_pseudo_element = parse_simple_selector( | |
| stream, inside_negation=True | |
| ) | |
| next = stream.next() | |
| if argument_pseudo_element: | |
| raise SelectorSyntaxError( | |
| f"Got pseudo-element ::{argument_pseudo_element} inside :not() at {next.pos}" | |
| ) | |
| if next != ("DELIM", ")"): | |
| raise SelectorSyntaxError(f"Expected ')', got {next}") | |
| result = Negation(result, argument) | |
| elif ident.lower() == "has": | |
| combinator, arguments = parse_relative_selector(stream) | |
| result = Relation(result, combinator, arguments) | |
| elif ident.lower() in ("matches", "is"): | |
| selectors = parse_simple_selector_arguments(stream) | |
| result = Matching(result, selectors) | |
| elif ident.lower() == "where": | |
| selectors = parse_simple_selector_arguments(stream) | |
| result = SpecificityAdjustment(result, selectors) | |
| else: | |
| result = Function(result, ident, parse_arguments(stream)) | |
| else: | |
| raise SelectorSyntaxError(f"Expected selector, got {peek}") | |
| if len(stream.used) == selector_start: | |
| raise SelectorSyntaxError(f"Expected selector, got {stream.peek()}") | |
| return result, pseudo_element | |
| def parse_arguments(stream: TokenStream) -> list[Token]: | |
| arguments: list[Token] = [] | |
| while 1: # noqa: RET503 | |
| stream.skip_whitespace() | |
| next = stream.next() | |
| if next.type in ("IDENT", "STRING", "NUMBER") or next in [ | |
| ("DELIM", "+"), | |
| ("DELIM", "-"), | |
| ]: | |
| arguments.append(next) | |
| elif next == ("DELIM", ")"): | |
| return arguments | |
| else: | |
| raise SelectorSyntaxError(f"Expected an argument, got {next}") | |
| def parse_relative_selector(stream: TokenStream) -> tuple[Token, Selector]: | |
| stream.skip_whitespace() | |
| subselector = "" | |
| next = stream.next() | |
| if next in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]: | |
| combinator = next | |
| stream.skip_whitespace() | |
| next = stream.next() | |
| else: | |
| combinator = Token("DELIM", " ", pos=0) | |
| while 1: # noqa: RET503 | |
| if next.type in ("IDENT", "STRING", "NUMBER") or next in [ | |
| ("DELIM", "."), | |
| ("DELIM", "*"), | |
| ]: | |
| subselector += cast(str, next.value) | |
| elif next == ("DELIM", ")"): | |
| result = parse(subselector) | |
| return combinator, result[0] | |
| else: | |
| raise SelectorSyntaxError(f"Expected an argument, got {next}") | |
| next = stream.next() | |
| def parse_simple_selector_arguments(stream: TokenStream) -> list[Tree]: | |
| arguments = [] | |
| while 1: | |
| result, pseudo_element = parse_simple_selector(stream, True) | |
| if pseudo_element: | |
| raise SelectorSyntaxError( | |
| f"Got pseudo-element ::{pseudo_element} inside function" | |
| ) | |
| stream.skip_whitespace() | |
| next = stream.next() | |
| if next in (("EOF", None), ("DELIM", ",")): | |
| stream.next() | |
| stream.skip_whitespace() | |
| arguments.append(result) | |
| elif next == ("DELIM", ")"): | |
| arguments.append(result) | |
| break | |
| else: | |
| raise SelectorSyntaxError(f"Expected an argument, got {next}") | |
| return arguments | |
| def parse_attrib(selector: Tree, stream: TokenStream) -> Attrib: | |
| stream.skip_whitespace() | |
| attrib = stream.next_ident_or_star() | |
| if attrib is None and stream.peek() != ("DELIM", "|"): | |
| raise SelectorSyntaxError(f"Expected '|', got {stream.peek()}") | |
| namespace: str | None | |
| op: str | None | |
| if stream.peek() == ("DELIM", "|"): | |
| stream.next() | |
| if stream.peek() == ("DELIM", "="): | |
| namespace = None | |
| stream.next() | |
| op = "|=" | |
| else: | |
| namespace = attrib | |
| attrib = stream.next_ident() | |
| op = None | |
| else: | |
| namespace = op = None | |
| if op is None: | |
| stream.skip_whitespace() | |
| next = stream.next() | |
| if next == ("DELIM", "]"): | |
| return Attrib(selector, namespace, cast(str, attrib), "exists", None) | |
| if next == ("DELIM", "="): | |
| op = "=" | |
| elif next.is_delim("^", "$", "*", "~", "|", "!") and ( | |
| stream.peek() == ("DELIM", "=") | |
| ): | |
| op = cast(str, next.value) + "=" | |
| stream.next() | |
| else: | |
| raise SelectorSyntaxError(f"Operator expected, got {next}") | |
| stream.skip_whitespace() | |
| value = stream.next() | |
| if value.type not in ("IDENT", "STRING"): | |
| raise SelectorSyntaxError(f"Expected string or ident, got {value}") | |
| stream.skip_whitespace() | |
| next = stream.next() | |
| if next != ("DELIM", "]"): | |
| raise SelectorSyntaxError(f"Expected ']', got {next}") | |
| return Attrib(selector, namespace, cast(str, attrib), op, value) | |
| def parse_series(tokens: Iterable[Token]) -> tuple[int, int]: | |
| """ | |
| Parses the arguments for :nth-child() and friends. | |
| :raises: A list of tokens | |
| :returns: :``(a, b)`` | |
| """ | |
| for token in tokens: | |
| if token.type == "STRING": | |
| raise ValueError("String tokens not allowed in series.") | |
| s = "".join(cast(str, token.value) for token in tokens).strip() | |
| if s == "odd": | |
| return 2, 1 | |
| if s == "even": | |
| return 2, 0 | |
| if s == "n": | |
| return 1, 0 | |
| if "n" not in s: | |
| # Just b | |
| return 0, int(s) | |
| a, b = s.split("n", 1) | |
| a_as_int: int | |
| if not a: | |
| a_as_int = 1 | |
| elif a in {"-", "+"}: | |
| a_as_int = int(a + "1") | |
| else: | |
| a_as_int = int(a) | |
| b_as_int = int(b) if b else 0 | |
| return a_as_int, b_as_int | |
| #### Token objects | |
| class Token(tuple[str, Optional[str]]): # noqa: SLOT001 | |
| def __new__( | |
| cls, | |
| type_: Literal["IDENT", "HASH", "STRING", "S", "DELIM", "NUMBER"], | |
| value: str, | |
| pos: int, | |
| ) -> Self: ... | |
| def __new__(cls, type_: Literal["EOF"], value: None, pos: int) -> Self: ... | |
| def __new__(cls, type_: str, value: str | None, pos: int) -> Self: | |
| obj = tuple.__new__(cls, (type_, value)) | |
| obj.pos = pos | |
| return obj | |
| def __repr__(self) -> str: | |
| return f"<{self.type} '{self.value}' at {self.pos}>" | |
| def is_delim(self, *values: str) -> bool: | |
| return self.type == "DELIM" and self.value in values | |
| pos: int | |
| def type(self) -> str: | |
| return self[0] | |
| def value(self) -> str | None: | |
| return self[1] | |
| def css(self) -> str: | |
| if self.type == "STRING": | |
| return repr(self.value) | |
| return cast(str, self.value) | |
| class EOFToken(Token): | |
| def __new__(cls, pos: int) -> Self: | |
| return Token.__new__(cls, "EOF", None, pos) | |
| def __repr__(self) -> str: | |
| return f"<{self.type} at {self.pos}>" | |
| #### Tokenizer | |
| class TokenMacros: | |
| unicode_escape = r"\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?" | |
| escape = unicode_escape + r"|\\[^\n\r\f0-9a-f]" | |
| string_escape = r"\\(?:\n|\r\n|\r|\f)|" + escape | |
| nonascii = r"[^\0-\177]" | |
| nmchar = f"[_a-z0-9-]|{escape}|{nonascii}" | |
| nmstart = f"[_a-z]|{escape}|{nonascii}" | |
| class MatchFunc(Protocol): | |
| def __call__( | |
| self, string: str, pos: int = ..., endpos: int = ... | |
| ) -> re.Match[str] | None: ... | |
| def _compile(pattern: str) -> MatchFunc: | |
| return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match | |
| _match_whitespace = _compile(r"[ \t\r\n\f]+") | |
| _match_number = _compile(r"[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)") | |
| _match_hash = _compile("#(?:%(nmchar)s)+") | |
| _match_ident = _compile("-?(?:%(nmstart)s)(?:%(nmchar)s)*") | |
| _match_string_by_quote = { | |
| "'": _compile(r"([^\n\r\f\\']|%(string_escape)s)*"), | |
| '"': _compile(r'([^\n\r\f\\"]|%(string_escape)s)*'), | |
| } | |
| _sub_simple_escape = re.compile(r"\\(.)").sub | |
| _sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.IGNORECASE).sub | |
| _sub_newline_escape = re.compile(r"\\(?:\n|\r\n|\r|\f)").sub | |
| # Same as r'\1', but faster on CPython | |
| _replace_simple = operator.methodcaller("group", 1) | |
| def _replace_unicode(match: re.Match[str]) -> str: | |
| codepoint = int(match.group(1), 16) | |
| if codepoint > sys.maxunicode: | |
| codepoint = 0xFFFD | |
| return chr(codepoint) | |
| def unescape_ident(value: str) -> str: | |
| value = _sub_unicode_escape(_replace_unicode, value) | |
| return _sub_simple_escape(_replace_simple, value) | |
| def tokenize(s: str) -> Iterator[Token]: | |
| pos = 0 | |
| len_s = len(s) | |
| while pos < len_s: | |
| match = _match_whitespace(s, pos=pos) | |
| if match: | |
| yield Token("S", " ", pos) | |
| pos = match.end() | |
| continue | |
| match = _match_ident(s, pos=pos) | |
| if match: | |
| value = _sub_simple_escape( | |
| _replace_simple, _sub_unicode_escape(_replace_unicode, match.group()) | |
| ) | |
| yield Token("IDENT", value, pos) | |
| pos = match.end() | |
| continue | |
| match = _match_hash(s, pos=pos) | |
| if match: | |
| value = _sub_simple_escape( | |
| _replace_simple, | |
| _sub_unicode_escape(_replace_unicode, match.group()[1:]), | |
| ) | |
| yield Token("HASH", value, pos) | |
| pos = match.end() | |
| continue | |
| quote = s[pos] | |
| if quote in _match_string_by_quote: | |
| match = _match_string_by_quote[quote](s, pos=pos + 1) | |
| assert match, "Should have found at least an empty match" | |
| end_pos = match.end() | |
| if end_pos == len_s: | |
| raise SelectorSyntaxError(f"Unclosed string at {pos}") | |
| if s[end_pos] != quote: | |
| raise SelectorSyntaxError(f"Invalid string at {pos}") | |
| value = _sub_simple_escape( | |
| _replace_simple, | |
| _sub_unicode_escape( | |
| _replace_unicode, _sub_newline_escape("", match.group()) | |
| ), | |
| ) | |
| yield Token("STRING", value, pos) | |
| pos = end_pos + 1 | |
| continue | |
| match = _match_number(s, pos=pos) | |
| if match: | |
| value = match.group() | |
| yield Token("NUMBER", value, pos) | |
| pos = match.end() | |
| continue | |
| pos2 = pos + 2 | |
| if s[pos:pos2] == "/*": | |
| pos = s.find("*/", pos2) | |
| if pos == -1: | |
| pos = len_s | |
| else: | |
| pos += 2 | |
| continue | |
| yield Token("DELIM", s[pos], pos) | |
| pos += 1 | |
| assert pos == len_s | |
| yield EOFToken(pos) | |
| class TokenStream: | |
| def __init__(self, tokens: Iterable[Token], source: str | None = None) -> None: | |
| self.used: list[Token] = [] | |
| self.tokens = iter(tokens) | |
| self.source = source | |
| self.peeked: Token | None = None | |
| self._peeking = False | |
| self.next_token = self.tokens.__next__ | |
| def next(self) -> Token: | |
| if self._peeking: | |
| self._peeking = False | |
| assert self.peeked is not None | |
| self.used.append(self.peeked) | |
| return self.peeked | |
| next = self.next_token() | |
| self.used.append(next) | |
| return next | |
| def peek(self) -> Token: | |
| if not self._peeking: | |
| self.peeked = self.next_token() | |
| self._peeking = True | |
| assert self.peeked is not None | |
| return self.peeked | |
| def next_ident(self) -> str: | |
| next = self.next() | |
| if next.type != "IDENT": | |
| raise SelectorSyntaxError(f"Expected ident, got {next}") | |
| return cast(str, next.value) | |
| def next_ident_or_star(self) -> str | None: | |
| next = self.next() | |
| if next.type == "IDENT": | |
| return next.value | |
| if next == ("DELIM", "*"): | |
| return None | |
| raise SelectorSyntaxError(f"Expected ident or '*', got {next}") | |
| def skip_whitespace(self) -> None: | |
| peek = self.peek() | |
| if peek.type == "S": | |
| self.next() | |