gh-142863: optimize list and set calls with generator expressions (#142864)

This commit is contained in:
Kumar Aditya 2026-01-06 23:23:28 +05:30 committed by GitHub
parent e79c9b7031
commit 74bb3ca1f8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 75 additions and 19 deletions

View File

@ -288,6 +288,7 @@ Known values:
Python 3.15a1 3655 (Fix miscompilation of some module-level annotations)
Python 3.15a2 3656 (Add TRACE_RECORD instruction, for platforms with switch based interpreter)
Python 3.15a4 3657 (Add BINARY_OP_SUBSCR_USTR_INT)
Python 3.15a4 3658 (Optimize bytecode for list/set called on genexp)
Python 3.16 will start with 3700
@ -301,7 +302,7 @@ PC/launcher.c must also be updated.
*/
#define PYC_MAGIC_NUMBER 3657
#define PYC_MAGIC_NUMBER 3658
/* This is equivalent to converting PYC_MAGIC_NUMBER to 2 bytes
(little-endian) and then appending b'\r\n'. */
#define PYC_MAGIC_NUMBER_TOKEN \

View File

@ -73,7 +73,9 @@ extern "C" {
#define CONSTANT_BUILTIN_TUPLE 2
#define CONSTANT_BUILTIN_ALL 3
#define CONSTANT_BUILTIN_ANY 4
#define NUM_COMMON_CONSTANTS 5
#define CONSTANT_BUILTIN_LIST 5
#define CONSTANT_BUILTIN_SET 6
#define NUM_COMMON_CONSTANTS 7
/* Values used in the oparg for RESUME */
#define RESUME_AT_FUNC_START 0

View File

@ -40,7 +40,8 @@ _intrinsic_1_descs = _opcode.get_intrinsic1_descs()
_intrinsic_2_descs = _opcode.get_intrinsic2_descs()
_special_method_names = _opcode.get_special_method_names()
_common_constants = [builtins.AssertionError, builtins.NotImplementedError,
builtins.tuple, builtins.all, builtins.any]
builtins.tuple, builtins.all, builtins.any, builtins.list,
builtins.set]
_nb_ops = _opcode.get_nb_ops()
hascompare = [opmap["COMPARE_OP"]]

View File

@ -246,7 +246,7 @@ class BuiltinTest(ComplexesAreIdenticalMixin, unittest.TestCase):
S = [10, 20, 30]
self.assertEqual(any(x > 42 for x in S), False)
def test_all_any_tuple_optimization(self):
def test_all_any_tuple_list_set_optimization(self):
def f_all():
return all(x-2 for x in [1,2,3])
@ -256,7 +256,13 @@ class BuiltinTest(ComplexesAreIdenticalMixin, unittest.TestCase):
def f_tuple():
return tuple(2*x for x in [1,2,3])
funcs = [f_all, f_any, f_tuple]
def f_list():
return list(2*x for x in [1,2,3])
def f_set():
return set(2*x for x in [1,2,3])
funcs = [f_all, f_any, f_tuple, f_list, f_set]
for f in funcs:
# check that generator code object is not duplicated
@ -266,33 +272,35 @@ class BuiltinTest(ComplexesAreIdenticalMixin, unittest.TestCase):
# check the overriding the builtins works
global all, any, tuple
saved = all, any, tuple
global all, any, tuple, list, set
saved = all, any, tuple, list, set
try:
all = lambda x : "all"
any = lambda x : "any"
tuple = lambda x : "tuple"
list = lambda x : "list"
set = lambda x : "set"
overridden_outputs = [f() for f in funcs]
finally:
all, any, tuple = saved
self.assertEqual(overridden_outputs, ['all', 'any', 'tuple'])
all, any, tuple, list, set = saved
self.assertEqual(overridden_outputs, ['all', 'any', 'tuple', 'list', 'set'])
# Now repeat, overriding the builtins module as well
saved = all, any, tuple
saved = all, any, tuple, list, set
try:
builtins.all = all = lambda x : "all"
builtins.any = any = lambda x : "any"
builtins.tuple = tuple = lambda x : "tuple"
builtins.list = list = lambda x : "list"
builtins.set = set = lambda x : "set"
overridden_outputs = [f() for f in funcs]
finally:
all, any, tuple = saved
builtins.all, builtins.any, builtins.tuple = saved
self.assertEqual(overridden_outputs, ['all', 'any', 'tuple'])
all, any, tuple, list, set = saved
builtins.all, builtins.any, builtins.tuple, builtins.list, builtins.set = saved
self.assertEqual(overridden_outputs, ['all', 'any', 'tuple', 'list', 'set'])
def test_ascii(self):
self.assertEqual(ascii(''), '\'\'')

View File

@ -827,7 +827,14 @@ Disassembly of <code object foo at 0x..., file "%s", line %d>:
%4d RESUME 0
%4d LOAD_GLOBAL 1 (list + NULL)
%4d LOAD_GLOBAL 0 (list)
COPY 1
LOAD_COMMON_CONSTANT 5 (list)
IS_OP 0 (is)
POP_JUMP_IF_FALSE 22 (to L3)
NOT_TAKEN
POP_TOP
BUILD_LIST 0
LOAD_FAST_BORROW 0 (x)
BUILD_TUPLE 1
LOAD_CONST %d (<code object <genexpr> at 0x..., file "%s", line %d>)
@ -835,6 +842,21 @@ Disassembly of <code object foo at 0x..., file "%s", line %d>:
SET_FUNCTION_ATTRIBUTE 8 (closure)
LOAD_DEREF 1 (y)
CALL 0
PUSH_NULL
L1: FOR_ITER 3 (to L2)
LIST_APPEND 3
JUMP_BACKWARD 5 (to L1)
L2: END_FOR
POP_ITER
RETURN_VALUE
L3: PUSH_NULL
LOAD_FAST_BORROW 0 (x)
BUILD_TUPLE 1
LOAD_CONST 1 (<code object <genexpr> at 0x..., file "%s", line %d>)
MAKE_FUNCTION
SET_FUNCTION_ATTRIBUTE 8 (closure)
LOAD_DEREF 1 (y)
CALL 0
CALL 1
RETURN_VALUE
""" % (dis_nested_0,
@ -845,6 +867,8 @@ Disassembly of <code object foo at 0x..., file "%s", line %d>:
1 if __debug__ else 0,
__file__,
_h.__code__.co_firstlineno + 3,
__file__,
_h.__code__.co_firstlineno + 3,
)
dis_nested_2 = """%s

View File

@ -0,0 +1 @@
Generate optimized bytecode when calling :class:`list` or :class:`set` with generator expression.

View File

@ -3892,6 +3892,12 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
else if (_PyUnicode_EqualToASCIIString(func->v.Name.id, "tuple")) {
const_oparg = CONSTANT_BUILTIN_TUPLE;
}
else if (_PyUnicode_EqualToASCIIString(func->v.Name.id, "list")) {
const_oparg = CONSTANT_BUILTIN_LIST;
}
else if (_PyUnicode_EqualToASCIIString(func->v.Name.id, "set")) {
const_oparg = CONSTANT_BUILTIN_SET;
}
if (const_oparg != -1) {
ADDOP_I(c, loc, COPY, 1); // the function
ADDOP_I(c, loc, LOAD_COMMON_CONSTANT, const_oparg);
@ -3899,8 +3905,10 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
ADDOP_JUMP(c, loc, POP_JUMP_IF_FALSE, skip_optimization);
ADDOP(c, loc, POP_TOP);
if (const_oparg == CONSTANT_BUILTIN_TUPLE) {
if (const_oparg == CONSTANT_BUILTIN_TUPLE || const_oparg == CONSTANT_BUILTIN_LIST) {
ADDOP_I(c, loc, BUILD_LIST, 0);
} else if (const_oparg == CONSTANT_BUILTIN_SET) {
ADDOP_I(c, loc, BUILD_SET, 0);
}
expr_ty generator_exp = asdl_seq_GET(args, 0);
VISIT(c, expr, generator_exp);
@ -3911,9 +3919,12 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
ADDOP(c, loc, PUSH_NULL); // Push NULL index for loop
USE_LABEL(c, loop);
ADDOP_JUMP(c, loc, FOR_ITER, cleanup);
if (const_oparg == CONSTANT_BUILTIN_TUPLE) {
if (const_oparg == CONSTANT_BUILTIN_TUPLE || const_oparg == CONSTANT_BUILTIN_LIST) {
ADDOP_I(c, loc, LIST_APPEND, 3);
ADDOP_JUMP(c, loc, JUMP, loop);
} else if (const_oparg == CONSTANT_BUILTIN_SET) {
ADDOP_I(c, loc, SET_ADD, 3);
ADDOP_JUMP(c, loc, JUMP, loop);
}
else {
ADDOP(c, loc, TO_BOOL);
@ -3921,7 +3932,9 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
}
ADDOP(c, NO_LOCATION, POP_ITER);
if (const_oparg != CONSTANT_BUILTIN_TUPLE) {
if (const_oparg != CONSTANT_BUILTIN_TUPLE &&
const_oparg != CONSTANT_BUILTIN_LIST &&
const_oparg != CONSTANT_BUILTIN_SET) {
ADDOP_LOAD_CONST(c, loc, initial_res == Py_True ? Py_False : Py_True);
}
ADDOP_JUMP(c, loc, JUMP, end);
@ -3931,6 +3944,10 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
ADDOP(c, NO_LOCATION, POP_ITER);
if (const_oparg == CONSTANT_BUILTIN_TUPLE) {
ADDOP_I(c, loc, CALL_INTRINSIC_1, INTRINSIC_LIST_TO_TUPLE);
} else if (const_oparg == CONSTANT_BUILTIN_LIST) {
// result is already a list
} else if (const_oparg == CONSTANT_BUILTIN_SET) {
// result is already a set
}
else {
ADDOP_LOAD_CONST(c, loc, initial_res);

View File

@ -832,6 +832,8 @@ pycore_init_builtins(PyThreadState *tstate)
interp->common_consts[CONSTANT_BUILTIN_TUPLE] = (PyObject*)&PyTuple_Type;
interp->common_consts[CONSTANT_BUILTIN_ALL] = all;
interp->common_consts[CONSTANT_BUILTIN_ANY] = any;
interp->common_consts[CONSTANT_BUILTIN_LIST] = (PyObject*)&PyList_Type;
interp->common_consts[CONSTANT_BUILTIN_SET] = (PyObject*)&PySet_Type;
for (int i=0; i < NUM_COMMON_CONSTANTS; i++) {
assert(interp->common_consts[i] != NULL);