gh-142236: Improve error location for missing comma in string concatenations (#142330)

This commit is contained in:
Pablo Galindo Salgado 2025-12-11 14:47:26 +00:00 committed by GitHub
parent a78f43b001
commit c433986005
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 49 additions and 3 deletions

View File

@ -1251,8 +1251,7 @@ invalid_expression:
# !(NAME STRING) is not matched so we don't show this error with some invalid string prefixes like: kf"dsfsdf"
# Soft keywords need to also be ignored because they can be parsed as NAME NAME
| !(NAME STRING | SOFT_KEYWORD) a=disjunction b=expression_without_invalid {
_PyPegen_check_legacy_stmt(p, a) ? NULL : p->tokens[p->mark-1]->level == 0 ? NULL :
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Perhaps you forgot a comma?") }
_PyPegen_raise_error_for_missing_comma(p, a, b) }
| a=disjunction 'if' b=disjunction !('else'|':') { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "expected 'else' after 'if' expression") }
| a=disjunction 'if' b=disjunction 'else' !expression {
RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("expected expression after 'else', but statement is given") }

View File

@ -3336,6 +3336,20 @@ case(34)
lineno=3
)
def test_multiline_string_concat_missing_comma_points_to_last_string(self):
# gh-142236: For multi-line string concatenations with a missing comma,
# the error should point to the last string, not the first.
self._check_error(
"print(\n"
' "line1"\n'
' "line2"\n'
' "line3"\n'
" x=1\n"
")",
"Perhaps you forgot a comma",
lineno=4, # Points to "line3", the last string
)
@support.cpython_only
def test_syntax_error_on_deeply_nested_blocks(self):
# This raises a SyntaxError, it used to raise a SystemError. Context

View File

@ -0,0 +1,3 @@
Improve the "Perhaps you forgot a comma?" syntax error for multi-line string
concatenations to point to the last string instead of the first, making it
easier to locate where the comma is missing. Patch by Pablo Galindo.

View File

@ -947,6 +947,35 @@ _PyPegen_check_legacy_stmt(Parser *p, expr_ty name) {
return 0;
}
void *
_PyPegen_raise_error_for_missing_comma(Parser *p, expr_ty a, expr_ty b)
{
// Don't raise for legacy statements like "print x" or "exec x"
if (_PyPegen_check_legacy_stmt(p, a)) {
return NULL;
}
// Only raise inside parentheses/brackets (level > 0)
if (p->tokens[p->mark - 1]->level == 0) {
return NULL;
}
// For multi-line expressions (like string concatenations), point to the
// last line instead of the first for a more helpful error message.
// Use a->col_offset as the starting column since all strings in the
// concatenation typically share the same indentation.
if (a->end_lineno > a->lineno) {
return RAISE_ERROR_KNOWN_LOCATION(
p, PyExc_SyntaxError, a->end_lineno, a->col_offset,
a->end_lineno, a->end_col_offset,
"invalid syntax. Perhaps you forgot a comma?"
);
}
return RAISE_ERROR_KNOWN_LOCATION(
p, PyExc_SyntaxError, a->lineno, a->col_offset,
b->end_lineno, b->end_col_offset,
"invalid syntax. Perhaps you forgot a comma?"
);
}
static ResultTokenWithMetadata *
result_token_with_metadata(Parser *p, void *result, PyObject *metadata)
{

2
Parser/parser.c generated
View File

@ -21445,7 +21445,7 @@ invalid_expression_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ invalid_expression[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "!(NAME STRING | SOFT_KEYWORD) disjunction expression_without_invalid"));
_res = _PyPegen_check_legacy_stmt ( p , a ) ? NULL : p -> tokens [p -> mark - 1] -> level == 0 ? NULL : RAISE_SYNTAX_ERROR_KNOWN_RANGE ( a , b , "invalid syntax. Perhaps you forgot a comma?" );
_res = _PyPegen_raise_error_for_missing_comma ( p , a , b );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
p->level--;

View File

@ -358,6 +358,7 @@ expr_ty _PyPegen_ensure_real(Parser *p, expr_ty);
asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
int _PyPegen_check_barry_as_flufl(Parser *, Token *);
int _PyPegen_check_legacy_stmt(Parser *p, expr_ty t);
void *_PyPegen_raise_error_for_missing_comma(Parser *p, expr_ty a, expr_ty b);
ResultTokenWithMetadata *_PyPegen_check_fstring_conversion(Parser *p, Token *, expr_ty t);
ResultTokenWithMetadata *_PyPegen_setup_full_format_spec(Parser *, Token *, asdl_expr_seq *, int, int,
int, int, PyArena *);