GH-132732: Use pure op machinery to optimize COMPARE_OP_INT/FLOAT/STR (#137062)

Co-authored-by: Ken Jin <kenjin4096@gmail.com>
This commit is contained in:
Savannah Bailey 2025-07-25 19:02:04 -07:00 committed by GitHub
parent 1e69cd1634
commit f7c380ef67
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 237 additions and 39 deletions

View File

@ -862,7 +862,8 @@ class TestUopsOptimization(unittest.TestCase):
uops = get_opnames(ex)
self.assertNotIn("_GUARD_TOS_INT", uops)
self.assertNotIn("_GUARD_NOS_INT", uops)
self.assertIn("_BINARY_OP_ADD_INT", uops)
self.assertNotIn("_BINARY_OP_ADD_INT", uops)
self.assertNotIn("_POP_TWO_LOAD_CONST_INLINE_BORROW", uops)
# Try again, but between the runs, set the global to a float.
# This should result in no executor the second time.
ns = {}
@ -1462,7 +1463,8 @@ class TestUopsOptimization(unittest.TestCase):
self.assertEqual(res, 3)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_BINARY_OP_ADD_INT", uops)
self.assertNotIn("_BINARY_OP_ADD_INT", uops)
self.assertNotIn("_POP_TWO_LOAD_CONST_INLINE_BORROW", uops)
self.assertNotIn("_GUARD_NOS_INT", uops)
self.assertNotIn("_GUARD_TOS_INT", uops)
@ -1612,7 +1614,7 @@ class TestUopsOptimization(unittest.TestCase):
# But all of the appends we care about are still there:
self.assertEqual(uops.count("_CALL_LIST_APPEND"), len("ABCDEFG"))
def test_compare_pop_two_load_const_inline_borrow(self):
def test_compare_op_int_pop_two_load_const_inline_borrow(self):
def testfunc(n):
x = 0
for _ in range(n):
@ -1629,6 +1631,40 @@ class TestUopsOptimization(unittest.TestCase):
self.assertNotIn("_COMPARE_OP_INT", uops)
self.assertNotIn("_POP_TWO_LOAD_CONST_INLINE_BORROW", uops)
def test_compare_op_str_pop_two_load_const_inline_borrow(self):
def testfunc(n):
x = 0
for _ in range(n):
a = "foo"
b = "foo"
if a == b:
x += 1
return x
res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertEqual(res, TIER2_THRESHOLD)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertNotIn("_COMPARE_OP_STR", uops)
self.assertNotIn("_POP_TWO_LOAD_CONST_INLINE_BORROW", uops)
def test_compare_op_float_pop_two_load_const_inline_borrow(self):
def testfunc(n):
x = 0
for _ in range(n):
a = 1.0
b = 1.0
if a == b:
x += 1
return x
res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertEqual(res, TIER2_THRESHOLD)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertNotIn("_COMPARE_OP_FLOAT", uops)
self.assertNotIn("_POP_TWO_LOAD_CONST_INLINE_BORROW", uops)
def test_to_bool_bool_contains_op_set(self):
"""
Test that _TO_BOOL_BOOL is removed from code like:

View File

@ -0,0 +1 @@
Optimize constant comparison for ``_COMPARE_OP_INT``, ``_COMPARE_OP_FLOAT`` and ``_COMPARE_OP_STR`` in JIT builds

View File

@ -28,6 +28,7 @@
#include "pycore_range.h"
#include "pycore_unicodeobject.h"
#include "pycore_ceval.h"
#include "pycore_floatobject.h"
#include <stdarg.h>
#include <stdbool.h>

View File

@ -430,31 +430,17 @@ dummy_func(void) {
}
op(_COMPARE_OP_INT, (left, right -- res)) {
if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
assert(PyLong_CheckExact(sym_get_const(ctx, left)));
assert(PyLong_CheckExact(sym_get_const(ctx, right)));
PyObject *tmp = PyObject_RichCompare(sym_get_const(ctx, left),
sym_get_const(ctx, right),
oparg >> 5);
if (tmp == NULL) {
goto error;
}
assert(PyBool_Check(tmp));
assert(_Py_IsImmortal(tmp));
REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)tmp);
res = sym_new_const(ctx, tmp);
Py_DECREF(tmp);
}
else {
res = sym_new_type(ctx, &PyBool_Type);
}
REPLACE_OPCODE_IF_EVALUATES_PURE(left, right);
res = sym_new_type(ctx, &PyBool_Type);
}
op(_COMPARE_OP_FLOAT, (left, right -- res)) {
REPLACE_OPCODE_IF_EVALUATES_PURE(left, right);
res = sym_new_type(ctx, &PyBool_Type);
}
op(_COMPARE_OP_STR, (left, right -- res)) {
REPLACE_OPCODE_IF_EVALUATES_PURE(left, right);
res = sym_new_type(ctx, &PyBool_Type);
}

View File

@ -436,6 +436,14 @@
PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
/* End of uop copied from bytecodes for constant evaluation */
res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
if (sym_is_const(ctx, res)) {
PyObject *result = sym_get_const(ctx, res);
if (_Py_IsImmortal(result)) {
// Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
}
}
stack_pointer[-2] = res;
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
@ -479,6 +487,14 @@
PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
/* End of uop copied from bytecodes for constant evaluation */
res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
if (sym_is_const(ctx, res)) {
PyObject *result = sym_get_const(ctx, res);
if (_Py_IsImmortal(result)) {
// Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
}
}
stack_pointer[-2] = res;
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
@ -522,6 +538,14 @@
PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
/* End of uop copied from bytecodes for constant evaluation */
res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
if (sym_is_const(ctx, res)) {
PyObject *result = sym_get_const(ctx, res);
if (_Py_IsImmortal(result)) {
// Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
}
}
stack_pointer[-2] = res;
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
@ -584,6 +608,14 @@
}
/* End of uop copied from bytecodes for constant evaluation */
res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
if (sym_is_const(ctx, res)) {
PyObject *result = sym_get_const(ctx, res);
if (_Py_IsImmortal(result)) {
// Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
}
}
stack_pointer[-2] = res;
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
@ -629,6 +661,14 @@
}
/* End of uop copied from bytecodes for constant evaluation */
res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
if (sym_is_const(ctx, res)) {
PyObject *result = sym_get_const(ctx, res);
if (_Py_IsImmortal(result)) {
// Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
}
}
stack_pointer[-2] = res;
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
@ -674,6 +714,14 @@
}
/* End of uop copied from bytecodes for constant evaluation */
res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
if (sym_is_const(ctx, res)) {
PyObject *result = sym_get_const(ctx, res);
if (_Py_IsImmortal(result)) {
// Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
}
}
stack_pointer[-2] = res;
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
@ -746,6 +794,14 @@
res_stackref = PyStackRef_FromPyObjectSteal(res_o);
/* End of uop copied from bytecodes for constant evaluation */
res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
if (sym_is_const(ctx, res)) {
PyObject *result = sym_get_const(ctx, res);
if (_Py_IsImmortal(result)) {
// Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
}
}
stack_pointer[-2] = res;
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
@ -1598,7 +1654,45 @@
}
case _COMPARE_OP_FLOAT: {
JitOptRef right;
JitOptRef left;
JitOptRef res;
right = stack_pointer[-1];
left = stack_pointer[-2];
if (
sym_is_safe_const(ctx, left) &&
sym_is_safe_const(ctx, right)
) {
JitOptRef left_sym = left;
JitOptRef right_sym = right;
_PyStackRef left = sym_get_const_as_stackref(ctx, left_sym);
_PyStackRef right = sym_get_const_as_stackref(ctx, right_sym);
_PyStackRef res_stackref;
/* Start of uop copied from bytecodes for constant evaluation */
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
STAT_INC(COMPARE_OP, hit);
double dleft = PyFloat_AS_DOUBLE(left_o);
double dright = PyFloat_AS_DOUBLE(right_o);
int sign_ish = COMPARISON_BIT(dleft, dright);
PyStackRef_CLOSE_SPECIALIZED(left, _PyFloat_ExactDealloc);
PyStackRef_CLOSE_SPECIALIZED(right, _PyFloat_ExactDealloc);
res_stackref = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False;
/* End of uop copied from bytecodes for constant evaluation */
res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
if (sym_is_const(ctx, res)) {
PyObject *result = sym_get_const(ctx, res);
if (_Py_IsImmortal(result)) {
// Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
}
}
stack_pointer[-2] = res;
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
break;
}
res = sym_new_type(ctx, &PyBool_Type);
stack_pointer[-2] = res;
stack_pointer += -1;
@ -1612,34 +1706,93 @@
JitOptRef res;
right = stack_pointer[-1];
left = stack_pointer[-2];
if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
assert(PyLong_CheckExact(sym_get_const(ctx, left)));
assert(PyLong_CheckExact(sym_get_const(ctx, right)));
PyObject *tmp = PyObject_RichCompare(sym_get_const(ctx, left),
sym_get_const(ctx, right),
oparg >> 5);
if (tmp == NULL) {
goto error;
if (
sym_is_safe_const(ctx, left) &&
sym_is_safe_const(ctx, right)
) {
JitOptRef left_sym = left;
JitOptRef right_sym = right;
_PyStackRef left = sym_get_const_as_stackref(ctx, left_sym);
_PyStackRef right = sym_get_const_as_stackref(ctx, right_sym);
_PyStackRef res_stackref;
/* Start of uop copied from bytecodes for constant evaluation */
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
assert(_PyLong_IsCompact((PyLongObject *)left_o));
assert(_PyLong_IsCompact((PyLongObject *)right_o));
STAT_INC(COMPARE_OP, hit);
assert(_PyLong_DigitCount((PyLongObject *)left_o) <= 1 &&
_PyLong_DigitCount((PyLongObject *)right_o) <= 1);
Py_ssize_t ileft = _PyLong_CompactValue((PyLongObject *)left_o);
Py_ssize_t iright = _PyLong_CompactValue((PyLongObject *)right_o);
int sign_ish = COMPARISON_BIT(ileft, iright);
PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc);
res_stackref = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False;
/* End of uop copied from bytecodes for constant evaluation */
res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
if (sym_is_const(ctx, res)) {
PyObject *result = sym_get_const(ctx, res);
if (_Py_IsImmortal(result)) {
// Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
}
}
assert(PyBool_Check(tmp));
assert(_Py_IsImmortal(tmp));
REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)tmp);
res = sym_new_const(ctx, tmp);
stack_pointer[-2] = res;
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
Py_DECREF(tmp);
break;
}
else {
res = sym_new_type(ctx, &PyBool_Type);
stack_pointer += -1;
}
stack_pointer[-1] = res;
res = sym_new_type(ctx, &PyBool_Type);
stack_pointer[-2] = res;
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
break;
}
case _COMPARE_OP_STR: {
JitOptRef right;
JitOptRef left;
JitOptRef res;
right = stack_pointer[-1];
left = stack_pointer[-2];
if (
sym_is_safe_const(ctx, left) &&
sym_is_safe_const(ctx, right)
) {
JitOptRef left_sym = left;
JitOptRef right_sym = right;
_PyStackRef left = sym_get_const_as_stackref(ctx, left_sym);
_PyStackRef right = sym_get_const_as_stackref(ctx, right_sym);
_PyStackRef res_stackref;
/* Start of uop copied from bytecodes for constant evaluation */
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
STAT_INC(COMPARE_OP, hit);
int eq = _PyUnicode_Equal(left_o, right_o);
assert((oparg >> 5) == Py_EQ || (oparg >> 5) == Py_NE);
PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc);
PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc);
assert(eq == 0 || eq == 1);
assert((oparg & 0xf) == COMPARISON_NOT_EQUALS || (oparg & 0xf) == COMPARISON_EQUALS);
assert(COMPARISON_NOT_EQUALS + 1 == COMPARISON_EQUALS);
res_stackref = ((COMPARISON_NOT_EQUALS + eq) & oparg) ? PyStackRef_True : PyStackRef_False;
/* End of uop copied from bytecodes for constant evaluation */
res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
if (sym_is_const(ctx, res)) {
PyObject *result = sym_get_const(ctx, res);
if (_Py_IsImmortal(result)) {
// Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
}
}
stack_pointer[-2] = res;
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
break;
}
res = sym_new_type(ctx, &PyBool_Type);
stack_pointer[-2] = res;
stack_pointer += -1;
@ -2730,6 +2883,14 @@
res_stackref = PyStackRef_FromPyObjectSteal(res_o);
/* End of uop copied from bytecodes for constant evaluation */
res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
if (sym_is_const(ctx, res)) {
PyObject *result = sym_get_const(ctx, res);
if (_Py_IsImmortal(result)) {
// Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
}
}
stack_pointer[-2] = res;
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());

View File

@ -232,6 +232,19 @@ class OptimizerEmitter(Emitter):
emitter.emit(f"{outp.name} = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal({outp.name}_stackref));\n")
else:
emitter.emit(f"{outp.name} = sym_new_const(ctx, PyStackRef_AsPyObjectBorrow({outp.name}_stackref));\n")
if len(used_stack_inputs) == 2 and len(self.original_uop.stack.outputs) == 1:
outp = self.original_uop.stack.outputs[0]
if not outp.peek:
emitter.emit(f"""
if (sym_is_const(ctx, {outp.name})) {{
PyObject *result = sym_get_const(ctx, {outp.name});
if (_Py_IsImmortal(result)) {{
// Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
}}
}}""")
storage.flush(self.out)
emitter.emit("break;\n")
emitter.emit("}\n")