Spaces:
Runtime error
Runtime error
| """Handwritten parser of dependency specifiers. | |
| The docstring for each __parse_* function contains ENBF-inspired grammar representing | |
| the implementation. | |
| """ | |
| import ast | |
| from typing import Any, List, NamedTuple, Optional, Tuple, Union | |
| from ._tokenizer import DEFAULT_RULES, Tokenizer | |
| class Node: | |
| def __init__(self, value: str) -> None: | |
| self.value = value | |
| def __str__(self) -> str: | |
| return self.value | |
| def __repr__(self) -> str: | |
| return f"<{self.__class__.__name__}('{self}')>" | |
| def serialize(self) -> str: | |
| raise NotImplementedError | |
| class Variable(Node): | |
| def serialize(self) -> str: | |
| return str(self) | |
| class Value(Node): | |
| def serialize(self) -> str: | |
| return f'"{self}"' | |
| class Op(Node): | |
| def serialize(self) -> str: | |
| return str(self) | |
| MarkerVar = Union[Variable, Value] | |
| MarkerItem = Tuple[MarkerVar, Op, MarkerVar] | |
| # MarkerAtom = Union[MarkerItem, List["MarkerAtom"]] | |
| # MarkerList = List[Union["MarkerList", MarkerAtom, str]] | |
| # mypy does not support recursive type definition | |
| # https://github.com/python/mypy/issues/731 | |
| MarkerAtom = Any | |
| MarkerList = List[Any] | |
| class ParsedRequirement(NamedTuple): | |
| name: str | |
| url: str | |
| extras: List[str] | |
| specifier: str | |
| marker: Optional[MarkerList] | |
| # -------------------------------------------------------------------------------------- | |
| # Recursive descent parser for dependency specifier | |
| # -------------------------------------------------------------------------------------- | |
| def parse_requirement(source: str) -> ParsedRequirement: | |
| return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES)) | |
| def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement: | |
| """ | |
| requirement = WS? IDENTIFIER WS? extras WS? requirement_details | |
| """ | |
| tokenizer.consume("WS") | |
| name_token = tokenizer.expect( | |
| "IDENTIFIER", expected="package name at the start of dependency specifier" | |
| ) | |
| name = name_token.text | |
| tokenizer.consume("WS") | |
| extras = _parse_extras(tokenizer) | |
| tokenizer.consume("WS") | |
| url, specifier, marker = _parse_requirement_details(tokenizer) | |
| tokenizer.expect("END", expected="end of dependency specifier") | |
| return ParsedRequirement(name, url, extras, specifier, marker) | |
| def _parse_requirement_details( | |
| tokenizer: Tokenizer, | |
| ) -> Tuple[str, str, Optional[MarkerList]]: | |
| """ | |
| requirement_details = AT URL (WS requirement_marker?)? | |
| | specifier WS? (requirement_marker)? | |
| """ | |
| specifier = "" | |
| url = "" | |
| marker = None | |
| if tokenizer.check("AT"): | |
| tokenizer.read() | |
| tokenizer.consume("WS") | |
| url_start = tokenizer.position | |
| url = tokenizer.expect("URL", expected="URL after @").text | |
| if tokenizer.check("END", peek=True): | |
| return (url, specifier, marker) | |
| tokenizer.expect("WS", expected="whitespace after URL") | |
| # The input might end after whitespace. | |
| if tokenizer.check("END", peek=True): | |
| return (url, specifier, marker) | |
| marker = _parse_requirement_marker( | |
| tokenizer, span_start=url_start, after="URL and whitespace" | |
| ) | |
| else: | |
| specifier_start = tokenizer.position | |
| specifier = _parse_specifier(tokenizer) | |
| tokenizer.consume("WS") | |
| if tokenizer.check("END", peek=True): | |
| return (url, specifier, marker) | |
| marker = _parse_requirement_marker( | |
| tokenizer, | |
| span_start=specifier_start, | |
| after=( | |
| "version specifier" | |
| if specifier | |
| else "name and no valid version specifier" | |
| ), | |
| ) | |
| return (url, specifier, marker) | |
| def _parse_requirement_marker( | |
| tokenizer: Tokenizer, *, span_start: int, after: str | |
| ) -> MarkerList: | |
| """ | |
| requirement_marker = SEMICOLON marker WS? | |
| """ | |
| if not tokenizer.check("SEMICOLON"): | |
| tokenizer.raise_syntax_error( | |
| f"Expected end or semicolon (after {after})", | |
| span_start=span_start, | |
| ) | |
| tokenizer.read() | |
| marker = _parse_marker(tokenizer) | |
| tokenizer.consume("WS") | |
| return marker | |
| def _parse_extras(tokenizer: Tokenizer) -> List[str]: | |
| """ | |
| extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)? | |
| """ | |
| if not tokenizer.check("LEFT_BRACKET", peek=True): | |
| return [] | |
| with tokenizer.enclosing_tokens( | |
| "LEFT_BRACKET", | |
| "RIGHT_BRACKET", | |
| around="extras", | |
| ): | |
| tokenizer.consume("WS") | |
| extras = _parse_extras_list(tokenizer) | |
| tokenizer.consume("WS") | |
| return extras | |
| def _parse_extras_list(tokenizer: Tokenizer) -> List[str]: | |
| """ | |
| extras_list = identifier (wsp* ',' wsp* identifier)* | |
| """ | |
| extras: List[str] = [] | |
| if not tokenizer.check("IDENTIFIER"): | |
| return extras | |
| extras.append(tokenizer.read().text) | |
| while True: | |
| tokenizer.consume("WS") | |
| if tokenizer.check("IDENTIFIER", peek=True): | |
| tokenizer.raise_syntax_error("Expected comma between extra names") | |
| elif not tokenizer.check("COMMA"): | |
| break | |
| tokenizer.read() | |
| tokenizer.consume("WS") | |
| extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma") | |
| extras.append(extra_token.text) | |
| return extras | |
| def _parse_specifier(tokenizer: Tokenizer) -> str: | |
| """ | |
| specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS | |
| | WS? version_many WS? | |
| """ | |
| with tokenizer.enclosing_tokens( | |
| "LEFT_PARENTHESIS", | |
| "RIGHT_PARENTHESIS", | |
| around="version specifier", | |
| ): | |
| tokenizer.consume("WS") | |
| parsed_specifiers = _parse_version_many(tokenizer) | |
| tokenizer.consume("WS") | |
| return parsed_specifiers | |
| def _parse_version_many(tokenizer: Tokenizer) -> str: | |
| """ | |
| version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)? | |
| """ | |
| parsed_specifiers = "" | |
| while tokenizer.check("SPECIFIER"): | |
| span_start = tokenizer.position | |
| parsed_specifiers += tokenizer.read().text | |
| if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True): | |
| tokenizer.raise_syntax_error( | |
| ".* suffix can only be used with `==` or `!=` operators", | |
| span_start=span_start, | |
| span_end=tokenizer.position + 1, | |
| ) | |
| if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True): | |
| tokenizer.raise_syntax_error( | |
| "Local version label can only be used with `==` or `!=` operators", | |
| span_start=span_start, | |
| span_end=tokenizer.position, | |
| ) | |
| tokenizer.consume("WS") | |
| if not tokenizer.check("COMMA"): | |
| break | |
| parsed_specifiers += tokenizer.read().text | |
| tokenizer.consume("WS") | |
| return parsed_specifiers | |
| # -------------------------------------------------------------------------------------- | |
| # Recursive descent parser for marker expression | |
| # -------------------------------------------------------------------------------------- | |
| def parse_marker(source: str) -> MarkerList: | |
| return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES)) | |
| def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList: | |
| retval = _parse_marker(tokenizer) | |
| tokenizer.expect("END", expected="end of marker expression") | |
| return retval | |
| def _parse_marker(tokenizer: Tokenizer) -> MarkerList: | |
| """ | |
| marker = marker_atom (BOOLOP marker_atom)+ | |
| """ | |
| expression = [_parse_marker_atom(tokenizer)] | |
| while tokenizer.check("BOOLOP"): | |
| token = tokenizer.read() | |
| expr_right = _parse_marker_atom(tokenizer) | |
| expression.extend((token.text, expr_right)) | |
| return expression | |
| def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom: | |
| """ | |
| marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS? | |
| | WS? marker_item WS? | |
| """ | |
| tokenizer.consume("WS") | |
| if tokenizer.check("LEFT_PARENTHESIS", peek=True): | |
| with tokenizer.enclosing_tokens( | |
| "LEFT_PARENTHESIS", | |
| "RIGHT_PARENTHESIS", | |
| around="marker expression", | |
| ): | |
| tokenizer.consume("WS") | |
| marker: MarkerAtom = _parse_marker(tokenizer) | |
| tokenizer.consume("WS") | |
| else: | |
| marker = _parse_marker_item(tokenizer) | |
| tokenizer.consume("WS") | |
| return marker | |
| def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem: | |
| """ | |
| marker_item = WS? marker_var WS? marker_op WS? marker_var WS? | |
| """ | |
| tokenizer.consume("WS") | |
| marker_var_left = _parse_marker_var(tokenizer) | |
| tokenizer.consume("WS") | |
| marker_op = _parse_marker_op(tokenizer) | |
| tokenizer.consume("WS") | |
| marker_var_right = _parse_marker_var(tokenizer) | |
| tokenizer.consume("WS") | |
| return (marker_var_left, marker_op, marker_var_right) | |
| def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar: | |
| """ | |
| marker_var = VARIABLE | QUOTED_STRING | |
| """ | |
| if tokenizer.check("VARIABLE"): | |
| return process_env_var(tokenizer.read().text.replace(".", "_")) | |
| elif tokenizer.check("QUOTED_STRING"): | |
| return process_python_str(tokenizer.read().text) | |
| else: | |
| tokenizer.raise_syntax_error( | |
| message="Expected a marker variable or quoted string" | |
| ) | |
| def process_env_var(env_var: str) -> Variable: | |
| if ( | |
| env_var == "platform_python_implementation" | |
| or env_var == "python_implementation" | |
| ): | |
| return Variable("platform_python_implementation") | |
| else: | |
| return Variable(env_var) | |
| def process_python_str(python_str: str) -> Value: | |
| value = ast.literal_eval(python_str) | |
| return Value(str(value)) | |
| def _parse_marker_op(tokenizer: Tokenizer) -> Op: | |
| """ | |
| marker_op = IN | NOT IN | OP | |
| """ | |
| if tokenizer.check("IN"): | |
| tokenizer.read() | |
| return Op("in") | |
| elif tokenizer.check("NOT"): | |
| tokenizer.read() | |
| tokenizer.expect("WS", expected="whitespace after 'not'") | |
| tokenizer.expect("IN", expected="'in' after 'not'") | |
| return Op("not in") | |
| elif tokenizer.check("OP"): | |
| return Op(tokenizer.read().text) | |
| else: | |
| return tokenizer.raise_syntax_error( | |
| "Expected marker operator, one of " | |
| "<=, <, !=, ==, >=, >, ~=, ===, in, not in" | |
| ) | |