Spaces:

lsottani
/

RAG_file_preprocessing

Sleeping

App Files Files Community

RAG_file_preprocessing / venv /lib /python3.9 /site-packages /cssselect /parser.py

lsottani

Upload folder using huggingface_hub

d9f69e5 verified 2 months ago

raw

history blame contribute delete

33.3 kB

	"""
	cssselect.parser
	================

	Tokenizer, parser and parsed objects for CSS selectors.


	:copyright: (c) 2007-2012 Ian Bicking and contributors.
	See AUTHORS for more details.
	:license: BSD, see LICENSE for more details.

	"""

	from __future__ import annotations

	import operator
	import re
	import sys
	from typing import TYPE_CHECKING, Literal, Optional, Protocol, Union, cast, overload

	if TYPE_CHECKING:
	from collections.abc import Iterable, Iterator, Sequence

	# typing.Self requires Python 3.11
	from typing_extensions import Self


	def ascii_lower(string: str) -> str:
	"""Lower-case, but only in the ASCII range."""
	return string.encode("utf8").lower().decode("utf8")


	class SelectorError(Exception):
	"""Common parent for :class:`SelectorSyntaxError` and
	:class:`ExpressionError`.

	You can just use ``except SelectorError:`` when calling
	:meth:`~GenericTranslator.css_to_xpath` and handle both exceptions types.

	"""


	class SelectorSyntaxError(SelectorError, SyntaxError):
	"""Parsing a selector that does not match the grammar."""


	#### Parsed objects

	Tree = Union[
	"Element",
	"Hash",
	"Class",
	"Function",
	"Pseudo",
	"Attrib",
	"Negation",
	"Relation",
	"Matching",
	"SpecificityAdjustment",
	"CombinedSelector",
	]
	PseudoElement = Union["FunctionalPseudoElement", str]


	class Selector:
	"""
	Represents a parsed selector.

	:meth:`~GenericTranslator.selector_to_xpath` accepts this object,
	but ignores :attr:`pseudo_element`. It is the user’s responsibility
	to account for pseudo-elements and reject selectors with unknown
	or unsupported pseudo-elements.

	"""

	def __init__(self, tree: Tree, pseudo_element: PseudoElement \| None = None) -> None:
	self.parsed_tree = tree
	if pseudo_element is not None and not isinstance(
	pseudo_element, FunctionalPseudoElement
	):
	pseudo_element = ascii_lower(pseudo_element)
	#: A :class:`FunctionalPseudoElement`,
	#: or the identifier for the pseudo-element as a string,
	# or ``None``.
	#:
	#: +-------------------------+----------------+--------------------------------+
	#: \| \| Selector \| Pseudo-element \|
	#: +=========================+================+================================+
	#: \| CSS3 syntax \| ``a::before`` \| ``'before'`` \|
	#: +-------------------------+----------------+--------------------------------+
	#: \| Older syntax \| ``a:before`` \| ``'before'`` \|
	#: +-------------------------+----------------+--------------------------------+
	#: \| From the Lists3_ draft, \| ``li::marker`` \| ``'marker'`` \|
	#: \| not in Selectors3 \| \| \|
	#: +-------------------------+----------------+--------------------------------+
	#: \| Invalid pseudo-class \| ``li:marker`` \| ``None`` \|
	#: +-------------------------+----------------+--------------------------------+
	#: \| Functional \| ``a::foo(2)`` \| ``FunctionalPseudoElement(…)`` \|
	#: +-------------------------+----------------+--------------------------------+
	#:
	#: .. _Lists3: http://www.w3.org/TR/2011/WD-css3-lists-20110524/#marker-pseudoelement
	self.pseudo_element = pseudo_element

	def __repr__(self) -> str:
	if isinstance(self.pseudo_element, FunctionalPseudoElement):
	pseudo_element = repr(self.pseudo_element)
	elif self.pseudo_element:
	pseudo_element = f"::{self.pseudo_element}"
	else:
	pseudo_element = ""
	return f"{self.__class__.__name__}[{self.parsed_tree!r}{pseudo_element}]"

	def canonical(self) -> str:
	"""Return a CSS representation for this selector (a string)"""
	if isinstance(self.pseudo_element, FunctionalPseudoElement):
	pseudo_element = f"::{self.pseudo_element.canonical()}"
	elif self.pseudo_element:
	pseudo_element = f"::{self.pseudo_element}"
	else:
	pseudo_element = ""
	res = f"{self.parsed_tree.canonical()}{pseudo_element}"
	if len(res) > 1:
	res = res.lstrip("*")
	return res

	def specificity(self) -> tuple[int, int, int]:
	"""Return the specificity_ of this selector as a tuple of 3 integers.

	.. _specificity: http://www.w3.org/TR/selectors/#specificity

	"""
	a, b, c = self.parsed_tree.specificity()
	if self.pseudo_element:
	c += 1
	return a, b, c


	class Class:
	"""
	Represents selector.class_name
	"""

	def __init__(self, selector: Tree, class_name: str) -> None:
	self.selector = selector
	self.class_name = class_name

	def __repr__(self) -> str:
	return f"{self.__class__.__name__}[{self.selector!r}.{self.class_name}]"

	def canonical(self) -> str:
	return f"{self.selector.canonical()}.{self.class_name}"

	def specificity(self) -> tuple[int, int, int]:
	a, b, c = self.selector.specificity()
	b += 1
	return a, b, c


	class FunctionalPseudoElement:
	"""
	Represents selector::name(arguments)

	.. attribute:: name

	The name (identifier) of the pseudo-element, as a string.

	.. attribute:: arguments

	The arguments of the pseudo-element, as a list of tokens.

	Note: tokens are not part of the public API,
	and may change between cssselect versions.
	Use at your own risks.

	"""

	def __init__(self, name: str, arguments: Sequence[Token]):
	self.name = ascii_lower(name)
	self.arguments = arguments

	def __repr__(self) -> str:
	token_values = [token.value for token in self.arguments]
	return f"{self.__class__.__name__}[::{self.name}({token_values!r})]"

	def argument_types(self) -> list[str]:
	return [token.type for token in self.arguments]

	def canonical(self) -> str:
	args = "".join(token.css() for token in self.arguments)
	return f"{self.name}({args})"


	class Function:
	"""
	Represents selector:name(expr)
	"""

	def __init__(self, selector: Tree, name: str, arguments: Sequence[Token]) -> None:
	self.selector = selector
	self.name = ascii_lower(name)
	self.arguments = arguments

	def __repr__(self) -> str:
	token_values = [token.value for token in self.arguments]
	return f"{self.__class__.__name__}[{self.selector!r}:{self.name}({token_values!r})]"

	def argument_types(self) -> list[str]:
	return [token.type for token in self.arguments]

	def canonical(self) -> str:
	args = "".join(token.css() for token in self.arguments)
	return f"{self.selector.canonical()}:{self.name}({args})"

	def specificity(self) -> tuple[int, int, int]:
	a, b, c = self.selector.specificity()
	b += 1
	return a, b, c


	class Pseudo:
	"""
	Represents selector:ident
	"""

	def __init__(self, selector: Tree, ident: str) -> None:
	self.selector = selector
	self.ident = ascii_lower(ident)

	def __repr__(self) -> str:
	return f"{self.__class__.__name__}[{self.selector!r}:{self.ident}]"

	def canonical(self) -> str:
	return f"{self.selector.canonical()}:{self.ident}"

	def specificity(self) -> tuple[int, int, int]:
	a, b, c = self.selector.specificity()
	b += 1
	return a, b, c


	class Negation:
	"""
	Represents selector:not(subselector)
	"""

	def __init__(self, selector: Tree, subselector: Tree) -> None:
	self.selector = selector
	self.subselector = subselector

	def __repr__(self) -> str:
	return f"{self.__class__.__name__}[{self.selector!r}:not({self.subselector!r})]"

	def canonical(self) -> str:
	subsel = self.subselector.canonical()
	if len(subsel) > 1:
	subsel = subsel.lstrip("*")
	return f"{self.selector.canonical()}:not({subsel})"

	def specificity(self) -> tuple[int, int, int]:
	a1, b1, c1 = self.selector.specificity()
	a2, b2, c2 = self.subselector.specificity()
	return a1 + a2, b1 + b2, c1 + c2


	class Relation:
	"""
	Represents selector:has(subselector)
	"""

	def __init__(self, selector: Tree, combinator: Token, subselector: Selector):
	self.selector = selector
	self.combinator = combinator
	self.subselector = subselector

	def __repr__(self) -> str:
	return f"{self.__class__.__name__}[{self.selector!r}:has({self.subselector!r})]"

	def canonical(self) -> str:
	try:
	subsel = self.subselector[0].canonical() # type: ignore[index]
	except TypeError:
	subsel = self.subselector.canonical()
	if len(subsel) > 1:
	subsel = subsel.lstrip("*")
	return f"{self.selector.canonical()}:has({subsel})"

	def specificity(self) -> tuple[int, int, int]:
	a1, b1, c1 = self.selector.specificity()
	try:
	a2, b2, c2 = self.subselector[-1].specificity() # type: ignore[index]
	except TypeError:
	a2, b2, c2 = self.subselector.specificity()
	return a1 + a2, b1 + b2, c1 + c2


	class Matching:
	"""
	Represents selector:is(selector_list)
	"""

	def __init__(self, selector: Tree, selector_list: Iterable[Tree]):
	self.selector = selector
	self.selector_list = selector_list

	def __repr__(self) -> str:
	args_str = ", ".join(repr(s) for s in self.selector_list)
	return f"{self.__class__.__name__}[{self.selector!r}:is({args_str})]"

	def canonical(self) -> str:
	selector_arguments = []
	for s in self.selector_list:
	selarg = s.canonical()
	selector_arguments.append(selarg.lstrip("*"))
	args_str = ", ".join(str(s) for s in selector_arguments)
	return f"{self.selector.canonical()}:is({args_str})"

	def specificity(self) -> tuple[int, int, int]:
	return max(x.specificity() for x in self.selector_list)


	class SpecificityAdjustment:
	"""
	Represents selector:where(selector_list)
	Same as selector:is(selector_list), but its specificity is always 0
	"""

	def __init__(self, selector: Tree, selector_list: list[Tree]):
	self.selector = selector
	self.selector_list = selector_list

	def __repr__(self) -> str:
	args_str = ", ".join(repr(s) for s in self.selector_list)
	return f"{self.__class__.__name__}[{self.selector!r}:where({args_str})]"

	def canonical(self) -> str:
	selector_arguments = []
	for s in self.selector_list:
	selarg = s.canonical()
	selector_arguments.append(selarg.lstrip("*"))
	args_str = ", ".join(str(s) for s in selector_arguments)
	return f"{self.selector.canonical()}:where({args_str})"

	def specificity(self) -> tuple[int, int, int]:
	return 0, 0, 0


	class Attrib:
	"""
	Represents selector[namespace\|attrib operator value]
	"""

	@overload
	def __init__(
	self,
	selector: Tree,
	namespace: str \| None,
	attrib: str,
	operator: Literal["exists"],
	value: None,
	) -> None: ...

	@overload
	def __init__(
	self,
	selector: Tree,
	namespace: str \| None,
	attrib: str,
	operator: str,
	value: Token,
	) -> None: ...

	def __init__(
	self,
	selector: Tree,
	namespace: str \| None,
	attrib: str,
	operator: str,
	value: Token \| None,
	) -> None:
	self.selector = selector
	self.namespace = namespace
	self.attrib = attrib
	self.operator = operator
	self.value = value

	def __repr__(self) -> str:
	attrib = f"{self.namespace}\|{self.attrib}" if self.namespace else self.attrib
	if self.operator == "exists":
	return f"{self.__class__.__name__}[{self.selector!r}[{attrib}]]"
	assert self.value is not None
	return f"{self.__class__.__name__}[{self.selector!r}[{attrib} {self.operator} {self.value.value!r}]]"

	def canonical(self) -> str:
	attrib = f"{self.namespace}\|{self.attrib}" if self.namespace else self.attrib

	if self.operator == "exists":
	op = attrib
	else:
	assert self.value is not None
	op = f"{attrib}{self.operator}{self.value.css()}"

	return f"{self.selector.canonical()}[{op}]"

	def specificity(self) -> tuple[int, int, int]:
	a, b, c = self.selector.specificity()
	b += 1
	return a, b, c


	class Element:
	"""
	Represents namespace\|element

	`None` is for the universal selector '*'

	"""

	def __init__(
	self, namespace: str \| None = None, element: str \| None = None
	) -> None:
	self.namespace = namespace
	self.element = element

	def __repr__(self) -> str:
	return f"{self.__class__.__name__}[{self.canonical()}]"

	def canonical(self) -> str:
	element = self.element or "*"
	if self.namespace:
	element = f"{self.namespace}\|{element}"
	return element

	def specificity(self) -> tuple[int, int, int]:
	if self.element:
	return 0, 0, 1
	return 0, 0, 0


	class Hash:
	"""
	Represents selector#id
	"""

	def __init__(self, selector: Tree, id: str) -> None:
	self.selector = selector
	self.id = id

	def __repr__(self) -> str:
	return f"{self.__class__.__name__}[{self.selector!r}#{self.id}]"

	def canonical(self) -> str:
	return f"{self.selector.canonical()}#{self.id}"

	def specificity(self) -> tuple[int, int, int]:
	a, b, c = self.selector.specificity()
	a += 1
	return a, b, c


	class CombinedSelector:
	def __init__(self, selector: Tree, combinator: str, subselector: Tree) -> None:
	assert selector is not None
	self.selector = selector
	self.combinator = combinator
	self.subselector = subselector

	def __repr__(self) -> str:
	comb = "<followed>" if self.combinator == " " else self.combinator
	return (
	f"{self.__class__.__name__}[{self.selector!r} {comb} {self.subselector!r}]"
	)

	def canonical(self) -> str:
	subsel = self.subselector.canonical()
	if len(subsel) > 1:
	subsel = subsel.lstrip("*")
	return f"{self.selector.canonical()} {self.combinator} {subsel}"

	def specificity(self) -> tuple[int, int, int]:
	a1, b1, c1 = self.selector.specificity()
	a2, b2, c2 = self.subselector.specificity()
	return a1 + a2, b1 + b2, c1 + c2


	#### Parser

	# foo
	_el_re = re.compile(r"^[ \t\r\n\f]([a-zA-Z]+)[ \t\r\n\f]$")

	# foo#bar or #bar
	_id_re = re.compile(r"^[ \t\r\n\f]([a-zA-Z])#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$")

	# foo.bar or .bar
	_class_re = re.compile(
	r"^[ \t\r\n\f]([a-zA-Z])\.([a-zA-Z][a-zA-Z0-9_-])[ \t\r\n\f]$"
	)


	def parse(css: str) -> list[Selector]:
	"""Parse a CSS group of selectors.

	If you don't care about pseudo-elements or selector specificity,
	you can skip this and use :meth:`~GenericTranslator.css_to_xpath`.

	:param css:
	A group of selectors as a string.
	:raises:
	:class:`SelectorSyntaxError` on invalid selectors.
	:returns:
	A list of parsed :class:`Selector` objects, one for each
	selector in the comma-separated group.

	"""
	# Fast path for simple cases
	match = _el_re.match(css)
	if match:
	return [Selector(Element(element=match.group(1)))]
	match = _id_re.match(css)
	if match is not None:
	return [Selector(Hash(Element(element=match.group(1) or None), match.group(2)))]
	match = _class_re.match(css)
	if match is not None:
	return [
	Selector(Class(Element(element=match.group(1) or None), match.group(2)))
	]

	stream = TokenStream(tokenize(css))
	stream.source = css
	return list(parse_selector_group(stream))


	# except SelectorSyntaxError:
	# e = sys.exc_info()[1]
	# message = "%s at %s -> %r" % (
	# e, stream.used, stream.peek())
	# e.msg = message
	# e.args = tuple([message])
	# raise


	def parse_selector_group(stream: TokenStream) -> Iterator[Selector]:
	stream.skip_whitespace()
	while 1:
	yield Selector(*parse_selector(stream))
	if stream.peek() == ("DELIM", ","):
	stream.next()
	stream.skip_whitespace()
	else:
	break


	def parse_selector(stream: TokenStream) -> tuple[Tree, PseudoElement \| None]:
	result, pseudo_element = parse_simple_selector(stream)
	while 1:
	stream.skip_whitespace()
	peek = stream.peek()
	if peek in (("EOF", None), ("DELIM", ",")):
	break
	if pseudo_element:
	raise SelectorSyntaxError(
	f"Got pseudo-element ::{pseudo_element} not at the end of a selector"
	)
	if peek.is_delim("+", ">", "~"):
	# A combinator
	combinator = cast(str, stream.next().value)
	stream.skip_whitespace()
	else:
	# By exclusion, the last parse_simple_selector() ended
	# at peek == ' '
	combinator = " "
	next_selector, pseudo_element = parse_simple_selector(stream)
	result = CombinedSelector(result, combinator, next_selector)
	return result, pseudo_element


	def parse_simple_selector(
	stream: TokenStream, inside_negation: bool = False
	) -> tuple[Tree, PseudoElement \| None]:
	stream.skip_whitespace()
	selector_start = len(stream.used)
	peek = stream.peek()
	if peek.type == "IDENT" or peek == ("DELIM", "*"):
	if peek.type == "IDENT":
	namespace = stream.next().value
	else:
	stream.next()
	namespace = None
	if stream.peek() == ("DELIM", "\|"):
	stream.next()
	element = stream.next_ident_or_star()
	else:
	element = namespace
	namespace = None
	else:
	element = namespace = None
	result: Tree = Element(namespace, element)
	pseudo_element: PseudoElement \| None = None
	while 1:
	peek = stream.peek()
	if (
	peek.type in ("S", "EOF")
	or peek.is_delim(",", "+", ">", "~")
	or (inside_negation and peek == ("DELIM", ")"))
	):
	break
	if pseudo_element:
	raise SelectorSyntaxError(
	f"Got pseudo-element ::{pseudo_element} not at the end of a selector"
	)
	if peek.type == "HASH":
	result = Hash(result, cast(str, stream.next().value))
	elif peek == ("DELIM", "."):
	stream.next()
	result = Class(result, stream.next_ident())
	elif peek == ("DELIM", "\|"):
	stream.next()
	result = Element(None, stream.next_ident())
	elif peek == ("DELIM", "["):
	stream.next()
	result = parse_attrib(result, stream)
	elif peek == ("DELIM", ":"):
	stream.next()
	if stream.peek() == ("DELIM", ":"):
	stream.next()
	pseudo_element = stream.next_ident()
	if stream.peek() == ("DELIM", "("):
	stream.next()
	pseudo_element = FunctionalPseudoElement(
	pseudo_element, parse_arguments(stream)
	)
	continue
	ident = stream.next_ident()
	if ident.lower() in ("first-line", "first-letter", "before", "after"):
	# Special case: CSS 2.1 pseudo-elements can have a single ':'
	# Any new pseudo-element must have two.
	pseudo_element = str(ident)
	continue
	if stream.peek() != ("DELIM", "("):
	result = Pseudo(result, ident)
	if repr(result) == "Pseudo[Element[*]:scope]" and not (
	len(stream.used) == 2
	or (len(stream.used) == 3 and stream.used[0].type == "S")
	or (len(stream.used) >= 3 and stream.used[-3].is_delim(","))
	or (
	len(stream.used) >= 4
	and stream.used[-3].type == "S"
	and stream.used[-4].is_delim(",")
	)
	):
	raise SelectorSyntaxError(
	'Got immediate child pseudo-element ":scope" '
	"not at the start of a selector"
	)
	continue
	stream.next()
	stream.skip_whitespace()
	if ident.lower() == "not":
	if inside_negation:
	raise SelectorSyntaxError("Got nested :not()")
	argument, argument_pseudo_element = parse_simple_selector(
	stream, inside_negation=True
	)
	next = stream.next()
	if argument_pseudo_element:
	raise SelectorSyntaxError(
	f"Got pseudo-element ::{argument_pseudo_element} inside :not() at {next.pos}"
	)
	if next != ("DELIM", ")"):
	raise SelectorSyntaxError(f"Expected ')', got {next}")
	result = Negation(result, argument)
	elif ident.lower() == "has":
	combinator, arguments = parse_relative_selector(stream)
	result = Relation(result, combinator, arguments)

	elif ident.lower() in ("matches", "is"):
	selectors = parse_simple_selector_arguments(stream)
	result = Matching(result, selectors)
	elif ident.lower() == "where":
	selectors = parse_simple_selector_arguments(stream)
	result = SpecificityAdjustment(result, selectors)
	else:
	result = Function(result, ident, parse_arguments(stream))
	else:
	raise SelectorSyntaxError(f"Expected selector, got {peek}")
	if len(stream.used) == selector_start:
	raise SelectorSyntaxError(f"Expected selector, got {stream.peek()}")
	return result, pseudo_element


	def parse_arguments(stream: TokenStream) -> list[Token]:
	arguments: list[Token] = []
	while 1: # noqa: RET503
	stream.skip_whitespace()
	next = stream.next()
	if next.type in ("IDENT", "STRING", "NUMBER") or next in [
	("DELIM", "+"),
	("DELIM", "-"),
	]:
	arguments.append(next)
	elif next == ("DELIM", ")"):
	return arguments
	else:
	raise SelectorSyntaxError(f"Expected an argument, got {next}")


	def parse_relative_selector(stream: TokenStream) -> tuple[Token, Selector]:
	stream.skip_whitespace()
	subselector = ""
	next = stream.next()

	if next in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]:
	combinator = next
	stream.skip_whitespace()
	next = stream.next()
	else:
	combinator = Token("DELIM", " ", pos=0)

	while 1: # noqa: RET503
	if next.type in ("IDENT", "STRING", "NUMBER") or next in [
	("DELIM", "."),
	("DELIM", "*"),
	]:
	subselector += cast(str, next.value)
	elif next == ("DELIM", ")"):
	result = parse(subselector)
	return combinator, result[0]
	else:
	raise SelectorSyntaxError(f"Expected an argument, got {next}")
	next = stream.next()


	def parse_simple_selector_arguments(stream: TokenStream) -> list[Tree]:
	arguments = []
	while 1:
	result, pseudo_element = parse_simple_selector(stream, True)
	if pseudo_element:
	raise SelectorSyntaxError(
	f"Got pseudo-element ::{pseudo_element} inside function"
	)
	stream.skip_whitespace()
	next = stream.next()
	if next in (("EOF", None), ("DELIM", ",")):
	stream.next()
	stream.skip_whitespace()
	arguments.append(result)
	elif next == ("DELIM", ")"):
	arguments.append(result)
	break
	else:
	raise SelectorSyntaxError(f"Expected an argument, got {next}")
	return arguments


	def parse_attrib(selector: Tree, stream: TokenStream) -> Attrib:
	stream.skip_whitespace()
	attrib = stream.next_ident_or_star()
	if attrib is None and stream.peek() != ("DELIM", "\|"):
	raise SelectorSyntaxError(f"Expected '\|', got {stream.peek()}")
	namespace: str \| None
	op: str \| None
	if stream.peek() == ("DELIM", "\|"):
	stream.next()
	if stream.peek() == ("DELIM", "="):
	namespace = None
	stream.next()
	op = "\|="
	else:
	namespace = attrib
	attrib = stream.next_ident()
	op = None
	else:
	namespace = op = None
	if op is None:
	stream.skip_whitespace()
	next = stream.next()
	if next == ("DELIM", "]"):
	return Attrib(selector, namespace, cast(str, attrib), "exists", None)
	if next == ("DELIM", "="):
	op = "="
	elif next.is_delim("^", "$", "*", "~", "\|", "!") and (
	stream.peek() == ("DELIM", "=")
	):
	op = cast(str, next.value) + "="
	stream.next()
	else:
	raise SelectorSyntaxError(f"Operator expected, got {next}")
	stream.skip_whitespace()
	value = stream.next()
	if value.type not in ("IDENT", "STRING"):
	raise SelectorSyntaxError(f"Expected string or ident, got {value}")
	stream.skip_whitespace()
	next = stream.next()
	if next != ("DELIM", "]"):
	raise SelectorSyntaxError(f"Expected ']', got {next}")
	return Attrib(selector, namespace, cast(str, attrib), op, value)


	def parse_series(tokens: Iterable[Token]) -> tuple[int, int]:
	"""
	Parses the arguments for :nth-child() and friends.

	:raises: A list of tokens
	:returns: :``(a, b)``

	"""
	for token in tokens:
	if token.type == "STRING":
	raise ValueError("String tokens not allowed in series.")
	s = "".join(cast(str, token.value) for token in tokens).strip()
	if s == "odd":
	return 2, 1
	if s == "even":
	return 2, 0
	if s == "n":
	return 1, 0
	if "n" not in s:
	# Just b
	return 0, int(s)
	a, b = s.split("n", 1)
	a_as_int: int
	if not a:
	a_as_int = 1
	elif a in {"-", "+"}:
	a_as_int = int(a + "1")
	else:
	a_as_int = int(a)
	b_as_int = int(b) if b else 0
	return a_as_int, b_as_int


	#### Token objects


	class Token(tuple[str, Optional[str]]): # noqa: SLOT001
	@overload
	def __new__(
	cls,
	type_: Literal["IDENT", "HASH", "STRING", "S", "DELIM", "NUMBER"],
	value: str,
	pos: int,
	) -> Self: ...

	@overload
	def __new__(cls, type_: Literal["EOF"], value: None, pos: int) -> Self: ...

	def __new__(cls, type_: str, value: str \| None, pos: int) -> Self:
	obj = tuple.__new__(cls, (type_, value))
	obj.pos = pos
	return obj

	def __repr__(self) -> str:
	return f"<{self.type} '{self.value}' at {self.pos}>"

	def is_delim(self, *values: str) -> bool:
	return self.type == "DELIM" and self.value in values

	pos: int

	@property
	def type(self) -> str:
	return self[0]

	@property
	def value(self) -> str \| None:
	return self[1]

	def css(self) -> str:
	if self.type == "STRING":
	return repr(self.value)
	return cast(str, self.value)


	class EOFToken(Token):
	def __new__(cls, pos: int) -> Self:
	return Token.__new__(cls, "EOF", None, pos)

	def __repr__(self) -> str:
	return f"<{self.type} at {self.pos}>"


	#### Tokenizer


	class TokenMacros:
	unicode_escape = r"\\([0-9a-f]{1,6})(?:\r\n\|[ \n\r\t\f])?"
	escape = unicode_escape + r"\|\\[^\n\r\f0-9a-f]"
	string_escape = r"\\(?:\n\|\r\n\|\r\|\f)\|" + escape
	nonascii = r"[^\0-\177]"
	nmchar = f"[_a-z0-9-]\|{escape}\|{nonascii}"
	nmstart = f"[_a-z]\|{escape}\|{nonascii}"


	class MatchFunc(Protocol):
	def __call__(
	self, string: str, pos: int = ..., endpos: int = ...
	) -> re.Match[str] \| None: ...


	def _compile(pattern: str) -> MatchFunc:
	return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match


	_match_whitespace = _compile(r"[ \t\r\n\f]+")
	_match_number = _compile(r"[+-]?(?:[0-9]*\.[0-9]+\|[0-9]+)")
	_match_hash = _compile("#(?:%(nmchar)s)+")
	_match_ident = _compile("-?(?:%(nmstart)s)(?:%(nmchar)s)*")
	_match_string_by_quote = {
	"'": _compile(r"([^\n\r\f\\']\|%(string_escape)s)*"),
	'"': _compile(r'([^\n\r\f\\"]\|%(string_escape)s)*'),
	}

	_sub_simple_escape = re.compile(r"\\(.)").sub
	_sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.IGNORECASE).sub
	_sub_newline_escape = re.compile(r"\\(?:\n\|\r\n\|\r\|\f)").sub

	# Same as r'\1', but faster on CPython
	_replace_simple = operator.methodcaller("group", 1)


	def _replace_unicode(match: re.Match[str]) -> str:
	codepoint = int(match.group(1), 16)
	if codepoint > sys.maxunicode:
	codepoint = 0xFFFD
	return chr(codepoint)


	def unescape_ident(value: str) -> str:
	value = _sub_unicode_escape(_replace_unicode, value)
	return _sub_simple_escape(_replace_simple, value)


	def tokenize(s: str) -> Iterator[Token]:
	pos = 0
	len_s = len(s)
	while pos < len_s:
	match = _match_whitespace(s, pos=pos)
	if match:
	yield Token("S", " ", pos)
	pos = match.end()
	continue

	match = _match_ident(s, pos=pos)
	if match:
	value = _sub_simple_escape(
	_replace_simple, _sub_unicode_escape(_replace_unicode, match.group())
	)
	yield Token("IDENT", value, pos)
	pos = match.end()
	continue

	match = _match_hash(s, pos=pos)
	if match:
	value = _sub_simple_escape(
	_replace_simple,
	_sub_unicode_escape(_replace_unicode, match.group()[1:]),
	)
	yield Token("HASH", value, pos)
	pos = match.end()
	continue

	quote = s[pos]
	if quote in _match_string_by_quote:
	match = _match_string_by_quote[quote](s, pos=pos + 1)
	assert match, "Should have found at least an empty match"
	end_pos = match.end()
	if end_pos == len_s:
	raise SelectorSyntaxError(f"Unclosed string at {pos}")
	if s[end_pos] != quote:
	raise SelectorSyntaxError(f"Invalid string at {pos}")
	value = _sub_simple_escape(
	_replace_simple,
	_sub_unicode_escape(
	_replace_unicode, _sub_newline_escape("", match.group())
	),
	)
	yield Token("STRING", value, pos)
	pos = end_pos + 1
	continue

	match = _match_number(s, pos=pos)
	if match:
	value = match.group()
	yield Token("NUMBER", value, pos)
	pos = match.end()
	continue

	pos2 = pos + 2
	if s[pos:pos2] == "/*":
	pos = s.find("*/", pos2)
	if pos == -1:
	pos = len_s
	else:
	pos += 2
	continue

	yield Token("DELIM", s[pos], pos)
	pos += 1

	assert pos == len_s
	yield EOFToken(pos)


	class TokenStream:
	def __init__(self, tokens: Iterable[Token], source: str \| None = None) -> None:
	self.used: list[Token] = []
	self.tokens = iter(tokens)
	self.source = source
	self.peeked: Token \| None = None
	self._peeking = False
	self.next_token = self.tokens.__next__

	def next(self) -> Token:
	if self._peeking:
	self._peeking = False
	assert self.peeked is not None
	self.used.append(self.peeked)
	return self.peeked
	next = self.next_token()
	self.used.append(next)
	return next

	def peek(self) -> Token:
	if not self._peeking:
	self.peeked = self.next_token()
	self._peeking = True
	assert self.peeked is not None
	return self.peeked

	def next_ident(self) -> str:
	next = self.next()
	if next.type != "IDENT":
	raise SelectorSyntaxError(f"Expected ident, got {next}")
	return cast(str, next.value)

	def next_ident_or_star(self) -> str \| None:
	next = self.next()
	if next.type == "IDENT":
	return next.value
	if next == ("DELIM", "*"):
	return None
	raise SelectorSyntaxError(f"Expected ident or '*', got {next}")

	def skip_whitespace(self) -> None:
	peek = self.peek()
	if peek.type == "S":
	self.next()