https://github.com/python/cpython/commit/5d416324c56cd6f262fa123f41b97b48631bea79
commit: 5d416324c56cd6f262fa123f41b97b48631bea79
branch: main
author: zSirius <[email protected]>
committer: Eclips4 <[email protected]>
date: 2026-04-26T15:15:24+03:00
summary:
gh-146455: Fix O(N²) in add_const() after constant folding moved to CFG
(#146456)
The add_const() function in flowgraph.c uses a linear search over the
consts list to find the index of a constant. After gh-126835 moved
constant folding from the AST optimizer to the CFG optimizer, this
function is now called N times for N inner tuple elements during
fold_tuple_of_constants(), resulting in O(N²) total time.
Fix by maintaining an auxiliary _Py_hashtable_t that maps object
pointers to their indices in the consts list, providing O(1) lookup.
For a file with 100,000 constant 2-tuples:
- Before: 10.38s (add_const occupies 83.76% of CPU time)
- After: 1.48s
files:
A
Misc/NEWS.d/next/Core_and_Builtins/2026-03-26-08-49-35.gh-issue-146455.f54083a9.rst
M Python/flowgraph.c
diff --git
a/Misc/NEWS.d/next/Core_and_Builtins/2026-03-26-08-49-35.gh-issue-146455.f54083a9.rst
b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-26-08-49-35.gh-issue-146455.f54083a9.rst
new file mode 100644
index 00000000000000..4d7537f2529da6
--- /dev/null
+++
b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-26-08-49-35.gh-issue-146455.f54083a9.rst
@@ -0,0 +1 @@
+Fix O(N²) compile-time regression in constant folding after it was moved from
AST to CFG optimizer.
diff --git a/Python/flowgraph.c b/Python/flowgraph.c
index 202e3bacf2e1bf..2cb2d32a410613 100644
--- a/Python/flowgraph.c
+++ b/Python/flowgraph.c
@@ -6,6 +6,7 @@
#include "pycore_intrinsics.h"
#include "pycore_pymem.h" // _PyMem_IsPtrFreed()
#include "pycore_long.h" // _PY_IS_SMALL_INT()
+#include "pycore_hashtable.h" // _Py_hashtable_t
#include "pycore_opcode_utils.h"
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
@@ -1333,30 +1334,38 @@ get_const_value(int opcode, int oparg, PyObject
*co_consts)
// Steals a reference to newconst.
static int
-add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
+add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache,
+ _Py_hashtable_t *consts_index)
{
if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
Py_DECREF(newconst);
return -1;
}
- Py_ssize_t index;
- for (index = 0; index < PyList_GET_SIZE(consts); index++) {
- if (PyList_GET_ITEM(consts, index) == newconst) {
- break;
- }
+ _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(consts_index, (void
*)newconst);
+ if (entry != NULL) {
+ Py_DECREF(newconst);
+ return (int)(uintptr_t)entry->value;
}
- if (index == PyList_GET_SIZE(consts)) {
- if ((size_t)index >= (size_t)INT_MAX - 1) {
- PyErr_SetString(PyExc_OverflowError, "too many constants");
- Py_DECREF(newconst);
- return -1;
- }
- if (PyList_Append(consts, newconst)) {
- Py_DECREF(newconst);
- return -1;
- }
+
+ Py_ssize_t index = PyList_GET_SIZE(consts);
+ if ((size_t)index >= (size_t)INT_MAX - 1) {
+ PyErr_SetString(PyExc_OverflowError, "too many constants");
+ Py_DECREF(newconst);
+ return -1;
+ }
+ if (PyList_Append(consts, newconst)) {
+ Py_DECREF(newconst);
+ return -1;
+ }
+
+ if (_Py_hashtable_set(consts_index, (void *)newconst, (void
*)(uintptr_t)index) < 0) {
+ PyList_SetSlice(consts, index, index + 1, NULL);
+ Py_DECREF(newconst);
+ PyErr_NoMemory();
+ return -1;
}
+
Py_DECREF(newconst);
return (int)index;
}
@@ -1432,7 +1441,8 @@ maybe_instr_make_load_smallint(cfg_instr *instr, PyObject
*newconst,
/* Steals reference to "newconst" */
static int
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
- PyObject *consts, PyObject *const_cache)
+ PyObject *consts, PyObject *const_cache,
+ _Py_hashtable_t *consts_index)
{
int res = maybe_instr_make_load_smallint(instr, newconst, consts,
const_cache);
if (res < 0) {
@@ -1442,7 +1452,7 @@ instr_make_load_const(cfg_instr *instr, PyObject
*newconst,
if (res > 0) {
return SUCCESS;
}
- int oparg = add_const(newconst, consts, const_cache);
+ int oparg = add_const(newconst, consts, const_cache, consts_index);
RETURN_IF_ERROR(oparg);
INSTR_SET_OP1(instr, LOAD_CONST, oparg);
return SUCCESS;
@@ -1455,7 +1465,8 @@ instr_make_load_const(cfg_instr *instr, PyObject
*newconst,
Called with codestr pointing to the first LOAD_CONST.
*/
static int
-fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject
*const_cache)
+fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts,
+ PyObject *const_cache, _Py_hashtable_t *consts_index)
{
/* Pre-conditions */
assert(PyDict_CheckExact(const_cache));
@@ -1492,7 +1503,7 @@ fold_tuple_of_constants(basicblock *bb, int i, PyObject
*consts, PyObject *const
}
nop_out(const_instrs, seq_size);
- return instr_make_load_const(instr, const_tuple, consts, const_cache);
+ return instr_make_load_const(instr, const_tuple, consts, const_cache,
consts_index);
}
/* Replace:
@@ -1510,7 +1521,8 @@ fold_tuple_of_constants(basicblock *bb, int i, PyObject
*consts, PyObject *const
*/
static int
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
- PyObject *consts, PyObject *const_cache)
+ PyObject *consts, PyObject *const_cache,
+ _Py_hashtable_t *consts_index)
{
assert(PyDict_CheckExact(const_cache));
assert(PyList_CheckExact(consts));
@@ -1562,7 +1574,7 @@ fold_constant_intrinsic_list_to_tuple(basicblock *bb, int
i,
nop_out(&instr, 1);
}
assert(consts_found == 0);
- return instr_make_load_const(intrinsic, newconst, consts,
const_cache);
+ return instr_make_load_const(intrinsic, newconst, consts,
const_cache, consts_index);
}
if (expect_append) {
@@ -1598,7 +1610,8 @@ Optimize lists and sets for:
*/
static int
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
- PyObject *consts, PyObject *const_cache)
+ PyObject *consts, PyObject *const_cache,
+ _Py_hashtable_t *consts_index)
{
assert(PyDict_CheckExact(const_cache));
assert(PyList_CheckExact(consts));
@@ -1648,7 +1661,7 @@ optimize_lists_and_sets(basicblock *bb, int i, int nextop,
Py_SETREF(const_result, frozenset);
}
- int index = add_const(const_result, consts, const_cache);
+ int index = add_const(const_result, consts, const_cache, consts_index);
RETURN_IF_ERROR(index);
nop_out(const_instrs, seq_size);
@@ -1845,7 +1858,8 @@ eval_const_binop(PyObject *left, int op, PyObject *right)
}
static int
-fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject
*const_cache)
+fold_const_binop(basicblock *bb, int i, PyObject *consts,
+ PyObject *const_cache, _Py_hashtable_t *consts_index)
{
#define BINOP_OPERAND_COUNT 2
assert(PyDict_CheckExact(const_cache));
@@ -1887,7 +1901,7 @@ fold_const_binop(basicblock *bb, int i, PyObject *consts,
PyObject *const_cache)
}
nop_out(operands_instrs, BINOP_OPERAND_COUNT);
- return instr_make_load_const(binop, newconst, consts, const_cache);
+ return instr_make_load_const(binop, newconst, consts, const_cache,
consts_index);
}
static PyObject *
@@ -1933,7 +1947,8 @@ eval_const_unaryop(PyObject *operand, int opcode, int
oparg)
}
static int
-fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject
*const_cache)
+fold_const_unaryop(basicblock *bb, int i, PyObject *consts,
+ PyObject *const_cache, _Py_hashtable_t *consts_index)
{
#define UNARYOP_OPERAND_COUNT 1
assert(PyDict_CheckExact(const_cache));
@@ -1970,7 +1985,7 @@ fold_const_unaryop(basicblock *bb, int i, PyObject
*consts, PyObject *const_cach
assert(PyBool_Check(newconst));
}
nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
- return instr_make_load_const(unaryop, newconst, consts, const_cache);
+ return instr_make_load_const(unaryop, newconst, consts, const_cache,
consts_index);
}
#define VISITED (-1)
@@ -2165,7 +2180,8 @@ apply_static_swaps(basicblock *block, int i)
}
static int
-basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject
*consts)
+basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb,
+ PyObject *consts, _Py_hashtable_t *consts_index)
{
assert(PyDict_CheckExact(const_cache));
assert(PyList_CheckExact(consts));
@@ -2283,7 +2299,7 @@ basicblock_optimize_load_const(PyObject *const_cache,
basicblock *bb, PyObject *
return ERROR;
}
cnt = PyBool_FromLong(is_true);
- int index = add_const(cnt, consts, const_cache);
+ int index = add_const(cnt, consts, const_cache, consts_index);
if (index < 0) {
return ERROR;
}
@@ -2297,15 +2313,17 @@ basicblock_optimize_load_const(PyObject *const_cache,
basicblock *bb, PyObject *
}
static int
-optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
+optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts,
+ _Py_hashtable_t *consts_index) {
for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
- RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b,
consts));
+ RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts,
consts_index));
}
return SUCCESS;
}
static int
-optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
+optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts,
+ _Py_hashtable_t *consts_index)
{
assert(PyDict_CheckExact(const_cache));
assert(PyList_CheckExact(consts));
@@ -2345,11 +2363,11 @@ optimize_basic_block(PyObject *const_cache, basicblock
*bb, PyObject *consts)
continue;
}
}
- RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts,
const_cache));
+ RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts,
const_cache, consts_index));
break;
case BUILD_LIST:
case BUILD_SET:
- RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts,
const_cache));
+ RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts,
const_cache, consts_index));
break;
case POP_JUMP_IF_NOT_NONE:
case POP_JUMP_IF_NONE:
@@ -2484,7 +2502,7 @@ optimize_basic_block(PyObject *const_cache, basicblock
*bb, PyObject *consts)
_Py_FALLTHROUGH;
case UNARY_INVERT:
case UNARY_NEGATIVE:
- RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts,
const_cache));
+ RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache,
consts_index));
break;
case CALL_INTRINSIC_1:
if (oparg == INTRINSIC_LIST_TO_TUPLE) {
@@ -2492,15 +2510,15 @@ optimize_basic_block(PyObject *const_cache, basicblock
*bb, PyObject *consts)
INSTR_SET_OP0(inst, NOP);
}
else {
-
RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts,
const_cache));
+
RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts,
const_cache, consts_index));
}
}
else if (oparg == INTRINSIC_UNARY_POSITIVE) {
- RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts,
const_cache));
+ RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts,
const_cache, consts_index));
}
break;
case BINARY_OP:
- RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));
+ RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache,
consts_index));
break;
}
}
@@ -2545,16 +2563,17 @@ remove_redundant_nops_and_jumps(cfg_builder *g)
NOPs. Later those NOPs are removed.
*/
static int
-optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int
firstlineno)
+optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache,
+ _Py_hashtable_t *consts_index, int firstlineno)
{
assert(PyDict_CheckExact(const_cache));
RETURN_IF_ERROR(check_cfg(g));
RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
- RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
+ RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts, consts_index));
for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
- RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
+ RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts,
consts_index));
}
RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
@@ -3674,7 +3693,33 @@ _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject
*consts, PyObject *const_cache,
RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
/** Optimization **/
- RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
+
+ _Py_hashtable_t *consts_index = _Py_hashtable_new(
+ _Py_hashtable_hash_ptr, _Py_hashtable_compare_direct);
+ if (consts_index == NULL) {
+ PyErr_NoMemory();
+ return ERROR;
+ }
+
+ for (Py_ssize_t i = 0; i < PyList_GET_SIZE(consts); i++) {
+ PyObject *item = PyList_GET_ITEM(consts, i);
+ if (_Py_hashtable_get_entry(consts_index, (void *)item) != NULL) {
+ continue;
+ }
+ if (_Py_hashtable_set(consts_index, (void *)item,
+ (void *)(uintptr_t)i) < 0) {
+ _Py_hashtable_destroy(consts_index);
+ PyErr_NoMemory();
+ return ERROR;
+ }
+ }
+
+ int ret = optimize_cfg(g, consts, const_cache, consts_index, firstlineno);
+
+ _Py_hashtable_destroy(consts_index);
+
+ RETURN_IF_ERROR(ret);
+
RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
RETURN_IF_ERROR(
add_checks_for_loads_of_uninitialized_variables(
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]