https://github.com/python/cpython/commit/618b726d68ccd7ae933ced615fd384d62a42ac51
commit: 618b726d68ccd7ae933ced615fd384d62a42ac51
branch: main
author: Hai Zhu <[email protected]>
committer: markshannon <[email protected]>
date: 2026-04-24T10:37:01+01:00
summary:
gh-146073: Add fitness/exit quality mechanism for JIT trace frontend (GH-148089)
* Replaces ad-hoc logic for ending traces with a simple inequality: `fitness <
exit_quality`
* Fitness starts high and is reduced for branches, backward edges, calls and
trace length
* Exit quality reflect how good a spot that instruction is to end a trace.
Closing a loop is very, specializable instructions are very low and the others
in between.
files:
M Include/cpython/pystats.h
M Include/internal/pycore_interp_structs.h
M Include/internal/pycore_optimizer.h
M Lib/test/test_capi/test_opt.py
M Modules/_testinternalcapi/test_cases.c.h
M Python/bytecodes.c
M Python/generated_cases.c.h
M Python/optimizer.c
M Python/pystate.c
M Python/pystats.c
diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h
index e473110eca7415..5d1f44988a6df1 100644
--- a/Include/cpython/pystats.h
+++ b/Include/cpython/pystats.h
@@ -144,6 +144,7 @@ typedef struct _optimization_stats {
uint64_t unknown_callee;
uint64_t trace_immediately_deopts;
uint64_t executors_invalidated;
+ uint64_t fitness_terminated_traces;
UOpStats opcode[PYSTATS_MAX_UOP_ID + 1];
uint64_t unsupported_opcode[256];
uint64_t trace_length_hist[_Py_UOP_HIST_SIZE];
diff --git a/Include/internal/pycore_interp_structs.h
b/Include/internal/pycore_interp_structs.h
index 2bfb84da36cbc8..01adadd1485189 100644
--- a/Include/internal/pycore_interp_structs.h
+++ b/Include/internal/pycore_interp_structs.h
@@ -449,6 +449,9 @@ typedef struct _PyOptimizationConfig {
uint16_t side_exit_initial_value;
uint16_t side_exit_initial_backoff;
+ // Trace fitness thresholds
+ uint16_t fitness_initial;
+
// Optimization flags
bool specialization_enabled;
bool uops_optimize_enabled;
diff --git a/Include/internal/pycore_optimizer.h
b/Include/internal/pycore_optimizer.h
index a809a7e25526f6..7c2e0e95a80c3f 100644
--- a/Include/internal/pycore_optimizer.h
+++ b/Include/internal/pycore_optimizer.h
@@ -15,6 +15,50 @@ extern "C" {
#include "pycore_optimizer_types.h"
#include <stdbool.h>
+/* Fitness controls how long a trace can grow.
+ * Starts at FITNESS_INITIAL, then decreases from per-bytecode buffer usage
+ * plus branch/frame heuristics. The trace stops when fitness drops below the
+ * current exit_quality.
+ *
+ * Design targets for the constants below:
+ * 1. Reaching the abstract frame-depth limit should drop fitness below
+ * EXIT_QUALITY_SPECIALIZABLE.
+ * 2. A backward edge should leave budget for roughly N_BACKWARD_SLACK more
+ * bytecodes, assuming AVG_SLOTS_PER_INSTRUCTION.
+ * 3. Roughly seven balanced branches should reduce fitness to
+ * EXIT_QUALITY_DEFAULT after per-slot costs.
+ * 4. A push followed by a matching return is net-zero on frame-specific
+ * fitness, excluding per-slot costs.
+ */
+#define MAX_TARGET_LENGTH (UOP_MAX_TRACE_LENGTH / 2)
+#define OPTIMIZER_EFFECTIVENESS 2
+#define FITNESS_INITIAL (MAX_TARGET_LENGTH *
OPTIMIZER_EFFECTIVENESS)
+
+/* Exit quality thresholds: trace stops when fitness < exit_quality.
+ * Higher = trace is more willing to stop here. */
+#define EXIT_QUALITY_CLOSE_LOOP (FITNESS_INITIAL -
AVG_SLOTS_PER_INSTRUCTION*4)
+#define EXIT_QUALITY_ENTER_EXECUTOR (FITNESS_INITIAL * 1 / 8)
+#define EXIT_QUALITY_DEFAULT (FITNESS_INITIAL / 40)
+#define EXIT_QUALITY_SPECIALIZABLE (FITNESS_INITIAL / 80)
+
+/* Estimated buffer slots per bytecode, used only to derive heuristics.
+ * Runtime charging uses trace-buffer capacity consumed for each bytecode. */
+#define AVG_SLOTS_PER_INSTRUCTION 6
+
+/* Heuristic backward-edge exit quality: leave room for about 1 unroll and
+ * N_BACKWARD_SLACK more bytecodes before reaching EXIT_QUALITY_CLOSE_LOOP,
+ * based on AVG_SLOTS_PER_INSTRUCTION. */
+#define N_BACKWARD_SLACK 10
+#define EXIT_QUALITY_BACKWARD_EDGE (EXIT_QUALITY_CLOSE_LOOP / 2 -
N_BACKWARD_SLACK * AVG_SLOTS_PER_INSTRUCTION)
+
+/* Penalty for a balanced branch.
+ * It is sized so repeated balanced branches can drive a trace toward
+ * EXIT_QUALITY_DEFAULT, while compute_branch_penalty() keeps any single branch
+ * from dominating the budget.
+ */
+#define FITNESS_BRANCH_BALANCED ((FITNESS_INITIAL - EXIT_QUALITY_DEFAULT - \
+ (MAX_TARGET_LENGTH / 14 *
AVG_SLOTS_PER_INSTRUCTION)) / (14))
+
typedef struct _PyJitUopBuffer {
_PyUOpInstruction *start;
@@ -103,7 +147,8 @@ typedef struct _PyJitTracerPreviousState {
} _PyJitTracerPreviousState;
typedef struct _PyJitTracerTranslatorState {
- int jump_backward_seen;
+ int32_t fitness; // Current trace fitness, starts high,
decrements
+ int frame_depth; // Current inline depth (0 = root frame)
} _PyJitTracerTranslatorState;
typedef struct _PyJitTracerState {
diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py
index 59266b000ed4df..39075fc64cf02b 100644
--- a/Lib/test/test_capi/test_opt.py
+++ b/Lib/test/test_capi/test_opt.py
@@ -1427,9 +1427,13 @@ def testfunc(n):
for _ in gen(n):
pass
testfunc(TIER2_THRESHOLD * 2)
+ # The generator may be inlined into testfunc's trace,
+ # so check whichever executor contains _YIELD_VALUE.
gen_ex = get_first_executor(gen)
- self.assertIsNotNone(gen_ex)
- uops = get_opnames(gen_ex)
+ testfunc_ex = get_first_executor(testfunc)
+ ex = gen_ex or testfunc_ex
+ self.assertIsNotNone(ex)
+ uops = get_opnames(ex)
self.assertNotIn("_MAKE_HEAP_SAFE", uops)
self.assertIn("_YIELD_VALUE", uops)
diff --git a/Modules/_testinternalcapi/test_cases.c.h
b/Modules/_testinternalcapi/test_cases.c.h
index cd579491e4cd2e..8897854078bd45 100644
--- a/Modules/_testinternalcapi/test_cases.c.h
+++ b/Modules/_testinternalcapi/test_cases.c.h
@@ -5913,7 +5913,7 @@
int og_oparg = (oparg & ~255) | executor->vm_data.oparg;
next_instr = this_instr;
if (_PyJit_EnterExecutorShouldStopTracing(og_opcode)) {
- if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]) {
+ if (_PyOpcode_Caches[_PyOpcode_Deopt[og_opcode]]) {
PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter);
}
opcode = og_opcode;
@@ -12500,7 +12500,10 @@
tracer->prev_state.instr_frame = frame;
tracer->prev_state.instr_oparg = oparg;
tracer->prev_state.instr_stacklevel =
PyStackRef_IsNone(frame->f_executable) ? 2 : STACK_LEVEL();
- if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]) {
+ if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]
+ // Branch opcodes use the cache for branch history, not
+ // specialization counters. Don't reset it.
+ && !IS_CONDITIONAL_JUMP_OPCODE(opcode)) {
(&next_instr[1])->counter = trigger_backoff_counter();
}
const _PyOpcodeRecordEntry *record_entry =
&_PyOpcode_RecordEntries[opcode];
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index 7de889b93b71a7..59db0eb399b121 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -3529,7 +3529,7 @@ dummy_func(
int og_oparg = (oparg & ~255) | executor->vm_data.oparg;
next_instr = this_instr;
if (_PyJit_EnterExecutorShouldStopTracing(og_opcode)) {
- if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]) {
+ if (_PyOpcode_Caches[_PyOpcode_Deopt[og_opcode]]) {
PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter);
}
opcode = og_opcode;
@@ -6541,7 +6541,10 @@ dummy_func(
tracer->prev_state.instr_frame = frame;
tracer->prev_state.instr_oparg = oparg;
tracer->prev_state.instr_stacklevel =
PyStackRef_IsNone(frame->f_executable) ? 2 : STACK_LEVEL();
- if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]) {
+ if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]
+ // Branch opcodes use the cache for branch history, not
+ // specialization counters. Don't reset it.
+ && !IS_CONDITIONAL_JUMP_OPCODE(opcode)) {
(&next_instr[1])->counter = trigger_backoff_counter();
}
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index e84886ed04020c..dccee0e4a3b110 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -5913,7 +5913,7 @@
int og_oparg = (oparg & ~255) | executor->vm_data.oparg;
next_instr = this_instr;
if (_PyJit_EnterExecutorShouldStopTracing(og_opcode)) {
- if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]) {
+ if (_PyOpcode_Caches[_PyOpcode_Deopt[og_opcode]]) {
PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter);
}
opcode = og_opcode;
@@ -12497,7 +12497,10 @@
tracer->prev_state.instr_frame = frame;
tracer->prev_state.instr_oparg = oparg;
tracer->prev_state.instr_stacklevel =
PyStackRef_IsNone(frame->f_executable) ? 2 : STACK_LEVEL();
- if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]) {
+ if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]
+ // Branch opcodes use the cache for branch history, not
+ // specialization counters. Don't reset it.
+ && !IS_CONDITIONAL_JUMP_OPCODE(opcode)) {
(&next_instr[1])->counter = trigger_backoff_counter();
}
const _PyOpcodeRecordEntry *record_entry =
&_PyOpcode_RecordEntries[opcode];
diff --git a/Python/optimizer.c b/Python/optimizer.c
index 60f3e541be25cf..a389c0f4072817 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -551,8 +551,6 @@ dynamic_exit_uop[MAX_UOP_ID + 1] = {
};
-#define CONFIDENCE_RANGE 1000
-#define CONFIDENCE_CUTOFF 333
#ifdef Py_DEBUG
#define DPRINTF(level, ...) \
@@ -600,6 +598,54 @@ add_to_trace(
((uint32_t)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive)))
+/* Branch penalty: 0 for a fully biased branch and FITNESS_BRANCH_BALANCED for
+ * a balanced or fully off-trace branch. This keeps any single branch from
+ * consuming more than one balanced-branch cost.
+ */
+static inline int
+compute_branch_penalty(uint16_t history)
+{
+ bool branch_taken = history & 1;
+ int taken_count = _Py_popcount32((uint32_t)history);
+ int on_trace_count = branch_taken ? taken_count : 16 - taken_count;
+ int off_trace = 16 - on_trace_count;
+ int penalty = off_trace * FITNESS_BRANCH_BALANCED / 8;
+ if (penalty > FITNESS_BRANCH_BALANCED) {
+ penalty = FITNESS_BRANCH_BALANCED;
+ }
+ return penalty;
+}
+
+/* Compute exit quality for the current trace position.
+ * Higher values mean better places to stop the trace. */
+static inline int32_t
+compute_exit_quality(_Py_CODEUNIT *target_instr, int opcode,
+ const _PyJitTracerState *tracer)
+{
+ if (target_instr == tracer->initial_state.close_loop_instr) {
+ return EXIT_QUALITY_CLOSE_LOOP;
+ }
+ else if (target_instr->op.code == ENTER_EXECUTOR) {
+ return EXIT_QUALITY_ENTER_EXECUTOR;
+ }
+ else if (opcode == JUMP_BACKWARD_JIT ||
+ opcode == JUMP_BACKWARD ||
+ opcode == JUMP_BACKWARD_NO_INTERRUPT) {
+ return EXIT_QUALITY_BACKWARD_EDGE;
+ }
+ else if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]] > 0) {
+ return EXIT_QUALITY_SPECIALIZABLE;
+ }
+ return EXIT_QUALITY_DEFAULT;
+}
+
+/* Frame penalty: (MAX_ABSTRACT_FRAME_DEPTH-1) pushes exhaust fitness. */
+static inline int32_t
+compute_frame_penalty(uint16_t fitness_initial)
+{
+ return (int32_t)fitness_initial / (MAX_ABSTRACT_FRAME_DEPTH - 1) + 1;
+}
+
static int
is_terminator(const _PyUOpInstruction *uop)
{
@@ -736,13 +782,11 @@ _PyJit_translate_single_bytecode_to_trace(
DPRINTF(2, "Unsupported: oparg too large\n");
unsupported:
{
- // Rewind to previous instruction and replace with _EXIT_TRACE.
_PyUOpInstruction *curr = uop_buffer_last(trace);
while (curr->opcode != _SET_IP && uop_buffer_length(trace) > 2) {
trace->next--;
curr = uop_buffer_last(trace);
}
- assert(curr->opcode == _SET_IP || uop_buffer_length(trace) == 2);
if (curr->opcode == _SET_IP) {
int32_t old_target = (int32_t)uop_get_target(curr);
curr->opcode = _DEOPT;
@@ -765,11 +809,28 @@ _PyJit_translate_single_bytecode_to_trace(
return 1;
}
+ // Stop the trace if fitness has dropped below the exit quality threshold.
+ _PyJitTracerTranslatorState *ts = &tracer->translator_state;
+ int32_t eq = compute_exit_quality(target_instr, opcode, tracer);
+ DPRINTF(3, "Fitness check: %s(%d) fitness=%d, exit_quality=%d, depth=%d\n",
+ _PyOpcode_OpName[opcode], oparg, ts->fitness, eq, ts->frame_depth);
+
+ if (ts->fitness < eq) {
+ // Heuristic exit: leave operand1=0 so the side exit increments
chain_depth.
+ ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
+ OPT_STAT_INC(fitness_terminated_traces);
+ DPRINTF(2, "Fitness terminated: %s(%d) fitness=%d < exit_quality=%d\n",
+ _PyOpcode_OpName[opcode], oparg, ts->fitness, eq);
+ goto done;
+ }
+
+ // Snapshot remaining space so the later fitness charge reflects all buffer
+ // space this bytecode consumed, including reserved tail slots.
+ int32_t remaining_before = uop_buffer_remaining_space(trace);
+
// One for possible _DEOPT, one because _CHECK_VALIDITY itself might _DEOPT
trace->end -= 2;
- const struct opcode_macro_expansion *expansion =
&_PyOpcode_macro_expansion[opcode];
-
assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);
assert(!_PyErr_Occurred(tstate));
@@ -790,13 +851,11 @@ _PyJit_translate_single_bytecode_to_trace(
// _GUARD_IP leads to an exit.
trace->end -= needs_guard_ip;
+#if Py_DEBUG
+ const struct opcode_macro_expansion *expansion =
&_PyOpcode_macro_expansion[opcode];
int space_needed = expansion->nuops + needs_guard_ip + 2 +
(!OPCODE_HAS_NO_SAVE_IP(opcode));
- if (uop_buffer_remaining_space(trace) < space_needed) {
- DPRINTF(2, "No room for expansions and guards (need %d, got %d)\n",
- space_needed, uop_buffer_remaining_space(trace));
- OPT_STAT_INC(trace_too_long);
- goto done;
- }
+ assert(uop_buffer_remaining_space(trace) > space_needed);
+#endif
ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, target);
@@ -818,6 +877,12 @@ _PyJit_translate_single_bytecode_to_trace(
assert(jump_happened ? (next_instr == computed_jump_instr) :
(next_instr == computed_next_instr));
uint32_t uopcode = BRANCH_TO_GUARD[opcode -
POP_JUMP_IF_FALSE][jump_happened];
ADD_TO_TRACE(uopcode, 0, 0, INSTR_IP(jump_happened ?
computed_next_instr : computed_jump_instr, old_code));
+ int bp = compute_branch_penalty(target_instr[1].cache);
+ tracer->translator_state.fitness -= bp;
+ DPRINTF(3, " branch penalty: -%d (history=0x%04x, taken=%d) ->
fitness=%d\n",
+ bp, target_instr[1].cache, jump_happened,
+ tracer->translator_state.fitness);
+
break;
}
case JUMP_BACKWARD_JIT:
@@ -825,29 +890,9 @@ _PyJit_translate_single_bytecode_to_trace(
case JUMP_BACKWARD_NO_JIT:
case JUMP_BACKWARD:
ADD_TO_TRACE(_CHECK_PERIODIC, 0, 0, target);
- _Py_FALLTHROUGH;
+ break;
case JUMP_BACKWARD_NO_INTERRUPT:
- {
- if ((next_instr != tracer->initial_state.close_loop_instr) &&
- (next_instr != tracer->initial_state.start_instr) &&
- uop_buffer_length(&tracer->code_buffer) >
CODE_SIZE_NO_PROGRESS &&
- // For side exits, we don't want to terminate them early.
- tracer->initial_state.exit == NULL &&
- // These are coroutines, and we want to unroll those usually.
- opcode != JUMP_BACKWARD_NO_INTERRUPT) {
- // We encountered a JUMP_BACKWARD but not to the top of our
own loop.
- // We don't want to continue tracing as we might get stuck in
the
- // inner loop. Instead, end the trace where the executor of the
- // inner loop might start and let the traces rejoin.
- OPT_STAT_INC(inner_loop);
- ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
- uop_buffer_last(trace)->operand1 = true; // is_control_flow
- DPRINTF(2, "JUMP_BACKWARD not to top ends trace %p %p %p\n",
next_instr,
- tracer->initial_state.close_loop_instr,
tracer->initial_state.start_instr);
- goto done;
- }
break;
- }
case RESUME:
case RESUME_CHECK:
@@ -948,6 +993,39 @@ _PyJit_translate_single_bytecode_to_trace(
assert(next->op.code == STORE_FAST);
operand = next->op.arg;
}
+ else if (uop == _PUSH_FRAME) {
+ _PyJitTracerTranslatorState *ts_depth =
&tracer->translator_state;
+ ts_depth->frame_depth++;
+ assert(ts_depth->frame_depth < MAX_ABSTRACT_FRAME_DEPTH);
+ int32_t frame_penalty =
compute_frame_penalty(tstate->interp->opt_config.fitness_initial);
+ ts_depth->fitness -= frame_penalty;
+ DPRINTF(3, " _PUSH_FRAME: depth=%d, penalty=-%d ->
fitness=%d\n",
+ ts_depth->frame_depth, frame_penalty,
+ ts_depth->fitness);
+ }
+ else if (uop == _RETURN_VALUE || uop == _RETURN_GENERATOR ||
uop == _YIELD_VALUE) {
+ _PyJitTracerTranslatorState *ts_depth =
&tracer->translator_state;
+ int32_t frame_penalty =
compute_frame_penalty(tstate->interp->opt_config.fitness_initial);
+ if (ts_depth->frame_depth <= 0) {
+ // Returning past the traced root is normal for guarded
+ // caller continuation. Charge a small penalty so these
+ // paths still terminate.
+ int32_t underflow_penalty = frame_penalty / 4;
+ ts_depth->fitness -= underflow_penalty;
+ DPRINTF(3, " %s: underflow penalty=-%d ->
fitness=%d\n",
+ _PyOpcode_uop_name[uop], underflow_penalty,
+ ts_depth->fitness);
+ }
+ else {
+ // Symmetric with push: net-zero frame impact.
+ ts_depth->fitness += frame_penalty;
+ ts_depth->frame_depth--;
+ DPRINTF(3, " %s: return reward=+%d, depth=%d ->
fitness=%d\n",
+ _PyOpcode_uop_name[uop], frame_penalty,
+ ts_depth->frame_depth,
+ ts_depth->fitness);
+ }
+ }
else if (_PyUop_Flags[uop] & HAS_RECORDS_VALUE_FLAG) {
PyObject *recorded_value =
tracer->prev_state.recorded_values[record_idx];
tracer->prev_state.recorded_values[record_idx] = NULL;
@@ -990,13 +1068,20 @@ _PyJit_translate_single_bytecode_to_trace(
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
goto done;
}
- DPRINTF(2, "Trace continuing\n");
+ // Charge fitness by trace-buffer capacity consumed for this bytecode,
+ // including both emitted uops and tail reservations.
+ {
+ int32_t slots_used = remaining_before -
uop_buffer_remaining_space(trace);
+ tracer->translator_state.fitness -= slots_used;
+ DPRINTF(3, " per-insn cost: -%d -> fitness=%d\n", slots_used,
+ tracer->translator_state.fitness);
+ }
+ DPRINTF(2, "Trace continuing (fitness=%d)\n",
tracer->translator_state.fitness);
return 1;
done:
DPRINTF(2, "Trace done\n");
if (!is_terminator(uop_buffer_last(trace))) {
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
- uop_buffer_last(trace)->operand1 = true; // is_control_flow
}
return 0;
}
@@ -1077,6 +1162,13 @@ _PyJit_TryInitializeTracing(
assert(curr_instr->op.code == JUMP_BACKWARD_JIT || curr_instr->op.code ==
RESUME_CHECK_JIT || (exit != NULL));
tracer->initial_state.jump_backward_instr = curr_instr;
+ const _PyOptimizationConfig *cfg = &tstate->interp->opt_config;
+ _PyJitTracerTranslatorState *ts = &tracer->translator_state;
+ ts->fitness = cfg->fitness_initial;
+ ts->frame_depth = 0;
+ DPRINTF(3, "Fitness init: chain_depth=%d, fitness=%d\n",
+ chain_depth, ts->fitness);
+
tracer->is_tracing = true;
return 1;
}
diff --git a/Python/pystate.c b/Python/pystate.c
index b7c838a1c156ae..2df24597e65785 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -630,6 +630,11 @@ init_interpreter(PyInterpreterState *interp,
"PYTHON_JIT_SIDE_EXIT_INITIAL_BACKOFF",
SIDE_EXIT_INITIAL_BACKOFF, 0, MAX_BACKOFF);
+ // Trace fitness configuration
+ init_policy(&interp->opt_config.fitness_initial,
+ "PYTHON_JIT_FITNESS_INITIAL",
+ FITNESS_INITIAL, EXIT_QUALITY_CLOSE_LOOP, UOP_MAX_TRACE_LENGTH
- 1);
+
interp->opt_config.specialization_enabled =
!is_env_enabled("PYTHON_SPECIALIZATION_OFF");
interp->opt_config.uops_optimize_enabled =
!is_env_disabled("PYTHON_UOPS_OPTIMIZE");
if (interp != &runtime->_main_interpreter) {
diff --git a/Python/pystats.c b/Python/pystats.c
index a057ad884566d8..2fac2db1b738c7 100644
--- a/Python/pystats.c
+++ b/Python/pystats.c
@@ -274,6 +274,7 @@ print_optimization_stats(FILE *out, OptimizationStats
*stats)
fprintf(out, "Optimization low confidence: %" PRIu64 "\n",
stats->low_confidence);
fprintf(out, "Optimization unknown callee: %" PRIu64 "\n",
stats->unknown_callee);
fprintf(out, "Executors invalidated: %" PRIu64 "\n",
stats->executors_invalidated);
+ fprintf(out, "Optimization fitness terminated: %" PRIu64 "\n",
stats->fitness_terminated_traces);
print_histogram(out, "Trace length", stats->trace_length_hist);
print_histogram(out, "Trace run length", stats->trace_run_length_hist);
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]