GH-143493: Conform to spec for generator expressions while supporting virtual iterators (GH-143569)

* Moves the `GET_ITER` instruction into the generator function preamble.
  This means the the iterable is converted into an iterator during generator
  creation, as documented, but keeps it in the same code object allowing
  optimization.
This commit is contained in:
Mark Shannon 2026-01-16 09:11:58 +00:00 committed by GitHub
parent c461aa99e2
commit ae53da5758
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 117 additions and 88 deletions

View File

@ -27,7 +27,7 @@ int _PyCfg_OptimizeCodeUnit(struct _PyCfgBuilder *g, PyObject *consts, PyObject
struct _PyCfgBuilder* _PyCfg_FromInstructionSequence(_PyInstructionSequence *seq);
int _PyCfg_ToInstructionSequence(struct _PyCfgBuilder *g, _PyInstructionSequence *seq);
int _PyCfg_OptimizedCfgToInstructionSequence(struct _PyCfgBuilder *g, _PyCompile_CodeUnitMetadata *umd,
int code_flags, int *stackdepth, int *nlocalsplus,
int *stackdepth, int *nlocalsplus,
_PyInstructionSequence *seq);
PyCodeObject *

View File

@ -73,6 +73,7 @@ int _PyInstructionSequence_SetAnnotationsCode(_PyInstructionSequence *seq,
_PyInstructionSequence *annotations);
int _PyInstructionSequence_AddNested(_PyInstructionSequence *seq, _PyInstructionSequence *nested);
void PyInstructionSequence_Fini(_PyInstructionSequence *seq);
_PyInstruction _PyInstructionSequence_GetInstruction(_PyInstructionSequence *seq, int pos);
extern PyTypeObject _PyInstructionSequence_Type;
#define _PyInstructionSequence_Check(v) Py_IS_TYPE((v), &_PyInstructionSequence_Type)

View File

@ -290,6 +290,7 @@ Known values:
Python 3.15a4 3657 (Add BINARY_OP_SUBSCR_USTR_INT)
Python 3.15a4 3658 (Optimize bytecode for list/set called on genexp)
Python 3.15a4 3659 (Add CALL_FUNCTION_EX specialization)
Python 3.15a4 3660 (Change generator preamble code)
Python 3.16 will start with 3700
@ -303,7 +304,7 @@ PC/launcher.c must also be updated.
*/
#define PYC_MAGIC_NUMBER 3659
#define PYC_MAGIC_NUMBER 3660
/* This is equivalent to converting PYC_MAGIC_NUMBER to 2 bytes
(little-endian) and then appending b'\r\n'. */
#define PYC_MAGIC_NUMBER_TOKEN \

View File

@ -1298,7 +1298,7 @@ class TestSpecifics(unittest.TestCase):
x
in
y)
genexp_lines = [0, 4, 2, 0, 4]
genexp_lines = [4, 0, 4, 2, 0, 4]
genexp_code = return_genexp.__code__.co_consts[0]
code_lines = self.get_code_lines(genexp_code)

View File

@ -875,11 +875,11 @@ dis_nested_2 = """%s
Disassembly of <code object <genexpr> at 0x..., file "%s", line %d>:
-- COPY_FREE_VARS 1
%4d RETURN_GENERATOR
%4d LOAD_FAST 0 (.0)
GET_ITER
RETURN_GENERATOR
POP_TOP
L1: RESUME 0
LOAD_FAST 0 (.0)
GET_ITER
L2: FOR_ITER 14 (to L3)
STORE_FAST 1 (z)
LOAD_DEREF 2 (x)
@ -897,7 +897,7 @@ Disassembly of <code object <genexpr> at 0x..., file "%s", line %d>:
-- L4: CALL_INTRINSIC_1 3 (INTRINSIC_STOPITERATION_ERROR)
RERAISE 1
ExceptionTable:
L1 to L4 -> L4 [0] lasti
L1 to L4 -> L4 [2] lasti
""" % (dis_nested_1,
__file__,
_h.__code__.co_firstlineno + 3,

View File

@ -357,21 +357,25 @@ class ModifyUnderlyingIterableTest(unittest.TestCase):
yield x
return gen(range(10))
def process_tests(self, get_generator):
for obj in self.iterables:
g_obj = get_generator(obj)
with self.subTest(g_obj=g_obj, obj=obj):
self.assertListEqual(list(g_obj), list(obj))
def process_tests(self, get_generator, changes_iterable):
if changes_iterable:
for obj in self.iterables:
g_obj = get_generator(obj)
with self.subTest(g_obj=g_obj, obj=obj):
self.assertListEqual(list(g_obj), list(obj))
g_iter = get_generator(iter(obj))
with self.subTest(g_iter=g_iter, obj=obj):
self.assertListEqual(list(g_iter), list(obj))
g_iter = get_generator(iter(obj))
with self.subTest(g_iter=g_iter, obj=obj):
self.assertListEqual(list(g_iter), list(obj))
err_regex = "'.*' object is not iterable"
for obj in self.non_iterables:
g_obj = get_generator(obj)
with self.subTest(g_obj=g_obj):
self.assertRaisesRegex(TypeError, err_regex, list, g_obj)
if changes_iterable:
self.assertRaisesRegex(TypeError, err_regex, list, g_obj)
else:
next(g_obj)
def test_modify_f_locals(self):
def modify_f_locals(g, local, obj):
@ -384,22 +388,22 @@ class ModifyUnderlyingIterableTest(unittest.TestCase):
def get_generator_genfunc(obj):
return modify_f_locals(self.genfunc(), 'it', obj)
self.process_tests(get_generator_genexpr)
self.process_tests(get_generator_genfunc)
self.process_tests(get_generator_genexpr, False)
self.process_tests(get_generator_genfunc, True)
def test_new_gen_from_gi_code(self):
def new_gen_from_gi_code(g, obj):
generator_func = types.FunctionType(g.gi_code, {})
return generator_func(obj)
def get_generator_genexpr(obj):
return new_gen_from_gi_code(self.genexpr(), obj)
for obj in self.non_iterables:
with self.assertRaises(TypeError):
new_gen_from_gi_code(self.genexpr(), obj)
def get_generator_genfunc(obj):
return new_gen_from_gi_code(self.genfunc(), obj)
self.process_tests(get_generator_genexpr)
self.process_tests(get_generator_genfunc)
self.process_tests(get_generator_genfunc, True)
class ExceptionTest(unittest.TestCase):

View File

@ -0,0 +1,3 @@
Generator expressions in 3.15 now conform to the documented behavior when
the iterable does not support iteration. This matches the behavior in 3.14
and earlier

View File

@ -3189,14 +3189,15 @@ dummy_func(
#ifdef Py_STATS
_Py_GatherStats_GetIter(iterable);
#endif
/* before: [obj]; after [getiter(obj)] */
PyTypeObject *tp = PyStackRef_TYPE(iterable);
if (tp == &PyTuple_Type || tp == &PyList_Type) {
/* Leave iterable on stack and pushed tagged 0 */
iter = iterable;
DEAD(iterable);
index_or_null = PyStackRef_TagInt(0);
}
else {
/* Pop iterable, and push iterator then NULL */
PyObject *iter_o = PyObject_GetIter(PyStackRef_AsPyObjectBorrow(iterable));
PyStackRef_CLOSE(iterable);
ERROR_IF(iter_o == NULL);
@ -5033,7 +5034,7 @@ dummy_func(
PyFunctionObject *func = (PyFunctionObject *)PyStackRef_AsPyObjectBorrow(frame->f_funcobj);
PyGenObject *gen = (PyGenObject *)_Py_MakeCoro(func);
ERROR_IF(gen == NULL);
assert(STACK_LEVEL() == 0);
assert(STACK_LEVEL() <= 2);
SAVE_STACK();
_PyInterpreterFrame *gen_frame = &gen->gi_iframe;
frame->instr_ptr++;

View File

@ -227,19 +227,25 @@ static int codegen_call_helper(compiler *c, location loc,
static int codegen_try_except(compiler *, stmt_ty);
static int codegen_try_star_except(compiler *, stmt_ty);
typedef enum {
ITERABLE_IN_LOCAL = 0,
ITERABLE_ON_STACK = 1,
ITERATOR_ON_STACK = 2,
} IterStackPosition;
static int codegen_sync_comprehension_generator(
compiler *c, location loc,
asdl_comprehension_seq *generators, int gen_index,
int depth,
expr_ty elt, expr_ty val, int type,
int iter_on_stack);
IterStackPosition iter_pos);
static int codegen_async_comprehension_generator(
compiler *c, location loc,
asdl_comprehension_seq *generators, int gen_index,
int depth,
expr_ty elt, expr_ty val, int type,
int iter_on_stack);
IterStackPosition iter_pos);
static int codegen_pattern(compiler *, pattern_ty, pattern_context *);
static int codegen_match(compiler *, stmt_ty);
@ -665,6 +671,18 @@ codegen_enter_scope(compiler *c, identifier name, int scope_type,
if (scope_type == COMPILE_SCOPE_MODULE) {
loc.lineno = 0;
}
/* Add the generator prefix instructions. */
PySTEntryObject *ste = SYMTABLE_ENTRY(c);
if (ste->ste_coroutine || ste->ste_generator) {
/* Note that RETURN_GENERATOR + POP_TOP have a net stack effect
* of 0. This is because RETURN_GENERATOR pushes the generator
before returning. */
location loc = LOCATION(lineno, lineno, -1, -1);
ADDOP(c, loc, RETURN_GENERATOR);
ADDOP(c, loc, POP_TOP);
}
ADDOP_I(c, loc, RESUME, RESUME_AT_FUNC_START);
if (scope_type == COMPILE_SCOPE_MODULE) {
ADDOP(c, loc, ANNOTATIONS_PLACEHOLDER);
@ -1187,10 +1205,15 @@ codegen_wrap_in_stopiteration_handler(compiler *c)
{
NEW_JUMP_TARGET_LABEL(c, handler);
/* Insert SETUP_CLEANUP at start */
/* Insert SETUP_CLEANUP just before RESUME */
instr_sequence *seq = INSTR_SEQUENCE(c);
int resume = 0;
while (_PyInstructionSequence_GetInstruction(seq, resume).i_opcode != RESUME) {
resume++;
}
RETURN_IF_ERROR(
_PyInstructionSequence_InsertInstruction(
INSTR_SEQUENCE(c), 0,
seq, resume,
SETUP_CLEANUP, handler.id, NO_LOCATION));
ADDOP_LOAD_CONST(c, NO_LOCATION, Py_None);
@ -4401,18 +4424,18 @@ codegen_comprehension_generator(compiler *c, location loc,
asdl_comprehension_seq *generators, int gen_index,
int depth,
expr_ty elt, expr_ty val, int type,
int iter_on_stack)
IterStackPosition iter_pos)
{
comprehension_ty gen;
gen = (comprehension_ty)asdl_seq_GET(generators, gen_index);
if (gen->is_async) {
return codegen_async_comprehension_generator(
c, loc, generators, gen_index, depth, elt, val, type,
iter_on_stack);
iter_pos);
} else {
return codegen_sync_comprehension_generator(
c, loc, generators, gen_index, depth, elt, val, type,
iter_on_stack);
iter_pos);
}
}
@ -4421,7 +4444,7 @@ codegen_sync_comprehension_generator(compiler *c, location loc,
asdl_comprehension_seq *generators,
int gen_index, int depth,
expr_ty elt, expr_ty val, int type,
int iter_on_stack)
IterStackPosition iter_pos)
{
/* generate code for the iterator, then each of the ifs,
and then write to the element */
@ -4433,7 +4456,7 @@ codegen_sync_comprehension_generator(compiler *c, location loc,
comprehension_ty gen = (comprehension_ty)asdl_seq_GET(generators,
gen_index);
if (!iter_on_stack) {
if (iter_pos == ITERABLE_IN_LOCAL) {
if (gen_index == 0) {
assert(METADATA(c)->u_argcount == 1);
ADDOP_I(c, loc, LOAD_FAST, 0);
@ -4468,9 +4491,12 @@ codegen_sync_comprehension_generator(compiler *c, location loc,
}
if (IS_JUMP_TARGET_LABEL(start)) {
depth += 2;
ADDOP(c, LOC(gen->iter), GET_ITER);
if (iter_pos != ITERATOR_ON_STACK) {
ADDOP(c, LOC(gen->iter), GET_ITER);
depth += 1;
}
USE_LABEL(c, start);
depth += 1;
ADDOP_JUMP(c, LOC(gen->iter), FOR_ITER, anchor);
}
VISIT(c, expr, gen->target);
@ -4486,7 +4512,7 @@ codegen_sync_comprehension_generator(compiler *c, location loc,
RETURN_IF_ERROR(
codegen_comprehension_generator(c, loc,
generators, gen_index, depth,
elt, val, type, 0));
elt, val, type, ITERABLE_IN_LOCAL));
}
location elt_loc = LOC(elt);
@ -4545,7 +4571,7 @@ codegen_async_comprehension_generator(compiler *c, location loc,
asdl_comprehension_seq *generators,
int gen_index, int depth,
expr_ty elt, expr_ty val, int type,
int iter_on_stack)
IterStackPosition iter_pos)
{
NEW_JUMP_TARGET_LABEL(c, start);
NEW_JUMP_TARGET_LABEL(c, send);
@ -4555,7 +4581,7 @@ codegen_async_comprehension_generator(compiler *c, location loc,
comprehension_ty gen = (comprehension_ty)asdl_seq_GET(generators,
gen_index);
if (!iter_on_stack) {
if (iter_pos == ITERABLE_IN_LOCAL) {
if (gen_index == 0) {
assert(METADATA(c)->u_argcount == 1);
ADDOP_I(c, loc, LOAD_FAST, 0);
@ -4565,7 +4591,9 @@ codegen_async_comprehension_generator(compiler *c, location loc,
VISIT(c, expr, gen->iter);
}
}
ADDOP(c, LOC(gen->iter), GET_AITER);
if (iter_pos != ITERATOR_ON_STACK) {
ADDOP(c, LOC(gen->iter), GET_AITER);
}
USE_LABEL(c, start);
/* Runtime will push a block here, so we need to account for that */
@ -4795,11 +4823,13 @@ codegen_comprehension(compiler *c, expr_ty e, int type,
location loc = LOC(e);
outermost = (comprehension_ty) asdl_seq_GET(generators, 0);
IterStackPosition iter_state;
if (is_inlined) {
VISIT(c, expr, outermost->iter);
if (push_inlined_comprehension_state(c, loc, entry, &inline_state)) {
goto error;
}
iter_state = ITERABLE_ON_STACK;
}
else {
/* Receive outermost iter as an implicit argument */
@ -4810,6 +4840,23 @@ codegen_comprehension(compiler *c, expr_ty e, int type,
(void *)e, e->lineno, NULL, &umd) < 0) {
goto error;
}
if (type == COMP_GENEXP) {
/* Insert GET_ITER before RETURN_GENERATOR.
https://docs.python.org/3/reference/expressions.html#generator-expressions */
RETURN_IF_ERROR(
_PyInstructionSequence_InsertInstruction(
INSTR_SEQUENCE(c), 0,
LOAD_FAST, 0, LOC(outermost->iter)));
RETURN_IF_ERROR(
_PyInstructionSequence_InsertInstruction(
INSTR_SEQUENCE(c), 1,
outermost->is_async ? GET_AITER : GET_ITER,
0, LOC(outermost->iter)));
iter_state = ITERATOR_ON_STACK;
}
else {
iter_state = ITERABLE_IN_LOCAL;
}
}
Py_CLEAR(entry);
@ -4836,9 +4883,8 @@ codegen_comprehension(compiler *c, expr_ty e, int type,
ADDOP_I(c, loc, SWAP, 2);
}
}
if (codegen_comprehension_generator(c, loc, generators, 0, 0,
elt, val, type, is_inlined) < 0) {
elt, val, type, iter_state) < 0) {
goto error_in_scope;
}

View File

@ -1443,7 +1443,7 @@ optimize_and_assemble_code_unit(struct compiler_unit *u, PyObject *const_cache,
int stackdepth;
int nlocalsplus;
if (_PyCfg_OptimizedCfgToInstructionSequence(g, &u->u_metadata, code_flags,
if (_PyCfg_OptimizedCfgToInstructionSequence(g, &u->u_metadata,
&stackdepth, &nlocalsplus,
&optimized_instrs) < 0) {
goto error;
@ -1718,7 +1718,7 @@ _PyCompile_Assemble(_PyCompile_CodeUnitMetadata *umd, PyObject *filename,
int code_flags = 0;
int stackdepth, nlocalsplus;
if (_PyCfg_OptimizedCfgToInstructionSequence(g, umd, code_flags,
if (_PyCfg_OptimizedCfgToInstructionSequence(g, umd,
&stackdepth, &nlocalsplus,
&optimized_instrs) < 0) {
goto error;

View File

@ -10890,7 +10890,6 @@
stack_pointer = _PyFrame_GetStackPointer(frame);
stack_pointer += -1;
#endif
PyTypeObject *tp = PyStackRef_TYPE(iterable);
if (tp == &PyTuple_Type || tp == &PyList_Type) {
iter = iterable;
@ -16238,7 +16237,7 @@
SET_CURRENT_CACHED_VALUES(0);
JUMP_TO_ERROR();
}
assert(STACK_LEVEL() == 0);
assert(STACK_LEVEL() <= 2);
_PyFrame_SetStackPointer(frame, stack_pointer);
_PyInterpreterFrame *gen_frame = &gen->gi_iframe;
frame->instr_ptr++;

View File

@ -850,7 +850,7 @@ calculate_stackdepth(cfg_builder *g)
int new_depth = depth + effects.net;
if (new_depth < 0) {
PyErr_Format(PyExc_ValueError,
"Invalid CFG, stack underflow");
"Invalid CFG, stack underflow at line %d", instr->i_loc.lineno);
goto error;
}
maxdepth = Py_MAX(maxdepth, depth);
@ -971,6 +971,9 @@ label_exception_targets(basicblock *entryblock) {
last_yield_except_depth = -1;
}
}
else if (instr->i_opcode == RETURN_GENERATOR) {
instr->i_except = NULL;
}
else {
instr->i_except = handler;
}
@ -3718,36 +3721,10 @@ error:
static int
insert_prefix_instructions(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock,
int *fixed, int nfreevars, int code_flags)
int *fixed, int nfreevars)
{
assert(umd->u_firstlineno > 0);
/* Add the generator prefix instructions. */
if (IS_GENERATOR(code_flags)) {
/* Note that RETURN_GENERATOR + POP_TOP have a net stack effect
* of 0. This is because RETURN_GENERATOR pushes an element
* with _PyFrame_StackPush before switching stacks.
*/
location loc = LOCATION(umd->u_firstlineno, umd->u_firstlineno, -1, -1);
cfg_instr make_gen = {
.i_opcode = RETURN_GENERATOR,
.i_oparg = 0,
.i_loc = loc,
.i_target = NULL,
.i_except = NULL,
};
RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 0, &make_gen));
cfg_instr pop_top = {
.i_opcode = POP_TOP,
.i_oparg = 0,
.i_loc = loc,
.i_target = NULL,
.i_except = NULL,
};
RETURN_IF_ERROR(basicblock_insert_instruction(entryblock, 1, &pop_top));
}
/* Set up cells for any variable that escapes, to be put in a closure. */
const int ncellvars = (int)PyDict_GET_SIZE(umd->u_cellvars);
if (ncellvars) {
@ -3845,7 +3822,7 @@ fix_cell_offsets(_PyCompile_CodeUnitMetadata *umd, basicblock *entryblock, int *
}
static int
prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g, int code_flags)
prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g)
{
assert(PyDict_GET_SIZE(umd->u_varnames) < INT_MAX);
assert(PyDict_GET_SIZE(umd->u_cellvars) < INT_MAX);
@ -3862,7 +3839,7 @@ prepare_localsplus(_PyCompile_CodeUnitMetadata *umd, cfg_builder *g, int code_fl
}
// This must be called before fix_cell_offsets().
if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars, code_flags)) {
if (insert_prefix_instructions(umd, g->g_entryblock, cellfixedoffsets, nfreevars)) {
PyMem_Free(cellfixedoffsets);
return ERROR;
}
@ -3983,7 +3960,7 @@ _PyCfg_ToInstructionSequence(cfg_builder *g, _PyInstructionSequence *seq)
int
_PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
_PyCompile_CodeUnitMetadata *umd, int code_flags,
_PyCompile_CodeUnitMetadata *umd,
int *stackdepth, int *nlocalsplus,
_PyInstructionSequence *seq)
{
@ -3994,16 +3971,7 @@ _PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g,
return ERROR;
}
/* prepare_localsplus adds instructions for generators that push
* and pop an item on the stack. This assertion makes sure there
* is space on the stack for that.
* It should always be true, because a generator must have at
* least one expression or call to INTRINSIC_STOPITERATION_ERROR,
* which requires stackspace.
*/
assert(!(IS_GENERATOR(code_flags) && *stackdepth == 0));
*nlocalsplus = prepare_localsplus(umd, g, code_flags);
*nlocalsplus = prepare_localsplus(umd, g);
if (*nlocalsplus < 0) {
return ERROR;
}

View File

@ -6226,7 +6226,6 @@
_Py_GatherStats_GetIter(iterable);
stack_pointer = _PyFrame_GetStackPointer(frame);
#endif
PyTypeObject *tp = PyStackRef_TYPE(iterable);
if (tp == &PyTuple_Type || tp == &PyList_Type) {
iter = iterable;
@ -10424,7 +10423,7 @@
if (gen == NULL) {
JUMP_TO_LABEL(error);
}
assert(STACK_LEVEL() == 0);
assert(STACK_LEVEL() <= 2);
_PyFrame_SetStackPointer(frame, stack_pointer);
_PyInterpreterFrame *gen_frame = &gen->gi_iframe;
frame->instr_ptr++;

View File

@ -154,6 +154,13 @@ _PyInstructionSequence_InsertInstruction(instr_sequence *seq, int pos,
return SUCCESS;
}
_PyInstruction
_PyInstructionSequence_GetInstruction(instr_sequence *seq, int pos)
{
assert(pos >= 0 && pos < seq->s_used);
return seq->s_instrs[pos];
}
int
_PyInstructionSequence_SetAnnotationsCode(instr_sequence *seq,
instr_sequence *annotations)