mirror of
https://github.com/python/cpython.git
synced 2026-01-27 13:15:25 +00:00
The behaviour of Cut in nested parentheses, Repeat, Opt, and similar is somewhat chaotic. Apparently even the academic papers on PEG aren't as clear as they could be. And it doesn't really matter. Python only uses top-level cuts. When that changes, we can clarify as much as necessary (and even change the implementation to make sense for what we'll need). Document that this is deliberately unspecified, and add a test to make sure any decision is deliberate, tested and documented.
85 lines
3.0 KiB
Python
85 lines
3.0 KiB
Python
from typing import Any
|
|
|
|
from pegen import grammar
|
|
from pegen.grammar import Alt, GrammarVisitor, Rhs, Rule
|
|
|
|
|
|
class ValidationError(Exception):
|
|
pass
|
|
|
|
|
|
class GrammarValidator(GrammarVisitor):
|
|
def __init__(self, grammar: grammar.Grammar) -> None:
|
|
self.grammar = grammar
|
|
self.rulename: str | None = None
|
|
|
|
def validate_rule(self, rulename: str, node: Rule) -> None:
|
|
self.rulename = rulename
|
|
self.visit(node)
|
|
self.rulename = None
|
|
|
|
|
|
class SubRuleValidator(GrammarValidator):
|
|
def visit_Rhs(self, node: Rhs) -> None:
|
|
for index, alt in enumerate(node.alts):
|
|
alts_to_consider = node.alts[index + 1 :]
|
|
for other_alt in alts_to_consider:
|
|
self.check_intersection(alt, other_alt)
|
|
|
|
def check_intersection(self, first_alt: Alt, second_alt: Alt) -> None:
|
|
if str(second_alt).startswith(str(first_alt)):
|
|
raise ValidationError(
|
|
f"In {self.rulename} there is an alternative that will "
|
|
f"never be visited:\n{second_alt}"
|
|
)
|
|
|
|
|
|
class RaiseRuleValidator(GrammarValidator):
|
|
def visit_Alt(self, node: Alt) -> None:
|
|
if self.rulename and self.rulename.startswith('invalid'):
|
|
# raising is allowed in invalid rules
|
|
return
|
|
if node.action and 'RAISE_SYNTAX_ERROR' in node.action:
|
|
raise ValidationError(
|
|
f"In {self.rulename!r} there is an alternative that contains "
|
|
f"RAISE_SYNTAX_ERROR; this is only allowed in invalid_ rules"
|
|
)
|
|
|
|
|
|
class CutValidator(GrammarValidator):
|
|
"""Fail if Cut is not directly in a rule.
|
|
|
|
For simplicity, we currently document that a Cut affects alternatives
|
|
of the *rule* it is in.
|
|
However, the implementation makes cuts local to enclosing Rhs
|
|
(e.g. parenthesized list of choices).
|
|
Additionally, in academic papers about PEG, repeats and optional items
|
|
are "desugared" to choices with an empty alternative, and thus contain
|
|
a Cut's effect.
|
|
|
|
Please update documentation and tests when adding this cut,
|
|
then get rid of this validator.
|
|
|
|
See gh-143054.
|
|
"""
|
|
|
|
def visit(self, node: Any, parents: tuple[Any, ...] = ()) -> None:
|
|
super().visit(node, parents=(*parents, node))
|
|
|
|
def visit_Cut(self, node: Alt, parents: tuple[Any, ...] = ()) -> None:
|
|
parent_types = [type(p).__name__ for p in parents]
|
|
if parent_types != ['Rule', 'Rhs', 'Alt', 'NamedItem', 'Cut']:
|
|
raise ValidationError(
|
|
f"Rule {self.rulename!r} contains cut that's not on the "
|
|
"top level. "
|
|
"The intended semantics of such cases need "
|
|
"to be clarified; see the CutValidator docstring."
|
|
f"\nThe cut is inside: {parent_types}"
|
|
)
|
|
|
|
def validate_grammar(the_grammar: grammar.Grammar) -> None:
|
|
for validator_cls in GrammarValidator.__subclasses__():
|
|
validator = validator_cls(the_grammar)
|
|
for rule_name, rule in the_grammar.rules.items():
|
|
validator.validate_rule(rule_name, rule)
|