From 60aaed74368f7735c989d6f650416ec4fce08419 Mon Sep 17 00:00:00 2001 From: Sacul0457Deve <183588943+Sacul0457@users.noreply.github.com.> Date: Sat, 11 Apr 2026 09:17:03 +0800 Subject: [PATCH 1/3] optimize _CALL_BUILTIN_FAST_WITH_KEYWORDS --- Include/internal/pycore_ceval.h | 2 +- Include/internal/pycore_opcode_metadata.h | 4 +-- Include/internal/pycore_uop_ids.h | 2 +- Include/internal/pycore_uop_metadata.h | 10 ++++---- Lib/test/test_capi/test_opt.py | 7 ++++++ Modules/_testinternalcapi/test_cases.c.h | 30 +++++++++++++++++------ Python/bytecodes.c | 17 +++++++------ Python/ceval.c | 14 ++--------- Python/executor_cases.c.h | 20 +++++++-------- Python/generated_cases.c.h | 30 +++++++++++++++++------ Python/optimizer_bytecodes.c | 4 +++ Python/optimizer_cases.c.h | 10 +++----- 12 files changed, 92 insertions(+), 58 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index c12f72216b313f..d93d5e5f828c44 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -439,7 +439,7 @@ _Py_BuiltinCallFast_StackRef( int total_args); PyAPI_FUNC(PyObject *) -_Py_BuiltinCallFastWithKeywords_StackRefSteal( +_Py_BuiltinCallFastWithKeywords_StackRef( _PyStackRef callable, _PyStackRef *arguments, int total_args); diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 5b50b58fc83b00..add868fc9b2628 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1125,7 +1125,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [CALL_BOUND_METHOD_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_SYNC_SP_FLAG | HAS_NEEDS_GUARD_IP_FLAG | HAS_RECORDS_VALUE_FLAG }, [CALL_BUILTIN_CLASS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_RECORDS_VALUE_FLAG }, [CALL_BUILTIN_FAST] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_RECORDS_VALUE_FLAG }, - [CALL_BUILTIN_FAST_WITH_KEYWORDS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_RECORDS_VALUE_FLAG }, + [CALL_BUILTIN_FAST_WITH_KEYWORDS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_RECORDS_VALUE_FLAG }, [CALL_BUILTIN_O] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_RECORDS_VALUE_FLAG }, [CALL_EX_NON_PY_GENERAL] = { true, INSTR_FMT_IXC, HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_EX_PY] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_SYNC_SP_FLAG | HAS_NEEDS_GUARD_IP_FLAG | HAS_RECORDS_VALUE_FLAG }, @@ -1376,7 +1376,7 @@ _PyOpcode_macro_expansion[256] = { [CALL_BOUND_METHOD_GENERAL] = { .nuops = 8, .uops = { { _RECORD_BOUND_METHOD, OPARG_SIMPLE, 0 }, { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_METHOD_VERSION, 2, 1 }, { _EXPAND_METHOD, OPARG_SIMPLE, 3 }, { _CHECK_RECURSION_REMAINING, OPARG_SIMPLE, 3 }, { _PY_FRAME_GENERAL, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, [CALL_BUILTIN_CLASS] = { .nuops = 3, .uops = { { _RECORD_CALLABLE, OPARG_SIMPLE, 0 }, { _CALL_BUILTIN_CLASS, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC_AT_END, OPARG_REPLACED, 3 } } }, [CALL_BUILTIN_FAST] = { .nuops = 6, .uops = { { _RECORD_CALLABLE, OPARG_SIMPLE, 0 }, { _GUARD_CALLABLE_BUILTIN_FAST, OPARG_SIMPLE, 3 }, { _CALL_BUILTIN_FAST, OPARG_SIMPLE, 3 }, { _POP_TOP_OPARG, OPARG_SIMPLE, 3 }, { _POP_TOP, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC_AT_END, OPARG_REPLACED, 3 } } }, - [CALL_BUILTIN_FAST_WITH_KEYWORDS] = { .nuops = 4, .uops = { { _RECORD_CALLABLE, OPARG_SIMPLE, 0 }, { _GUARD_CALLABLE_BUILTIN_FAST_WITH_KEYWORDS, OPARG_SIMPLE, 3 }, { _CALL_BUILTIN_FAST_WITH_KEYWORDS, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC_AT_END, OPARG_REPLACED, 3 } } }, + [CALL_BUILTIN_FAST_WITH_KEYWORDS] = { .nuops = 6, .uops = { { _RECORD_CALLABLE, OPARG_SIMPLE, 0 }, { _GUARD_CALLABLE_BUILTIN_FAST_WITH_KEYWORDS, OPARG_SIMPLE, 3 }, { _CALL_BUILTIN_FAST_WITH_KEYWORDS, OPARG_SIMPLE, 3 }, { _POP_TOP_OPARG, OPARG_SIMPLE, 3 }, { _POP_TOP, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC_AT_END, OPARG_REPLACED, 3 } } }, [CALL_BUILTIN_O] = { .nuops = 6, .uops = { { _RECORD_CALLABLE, OPARG_SIMPLE, 0 }, { _GUARD_CALLABLE_BUILTIN_O, OPARG_SIMPLE, 3 }, { _CALL_BUILTIN_O, OPARG_SIMPLE, 3 }, { _POP_TOP, OPARG_SIMPLE, 3 }, { _POP_TOP, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC_AT_END, OPARG_REPLACED, 3 } } }, [CALL_EX_NON_PY_GENERAL] = { .nuops = 4, .uops = { { _CHECK_IS_NOT_PY_CALLABLE_EX, OPARG_SIMPLE, 1 }, { _MAKE_CALLARGS_A_TUPLE, OPARG_SIMPLE, 1 }, { _CALL_FUNCTION_EX_NON_PY_GENERAL, OPARG_SIMPLE, 1 }, { _CHECK_PERIODIC_AT_END, OPARG_REPLACED, 1 } } }, [CALL_EX_PY] = { .nuops = 7, .uops = { { _RECORD_4OS, OPARG_SIMPLE, 0 }, { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _MAKE_CALLARGS_A_TUPLE, OPARG_SIMPLE, 1 }, { _CHECK_IS_PY_CALLABLE_EX, OPARG_SIMPLE, 1 }, { _PY_FRAME_EX, OPARG_SIMPLE, 1 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 1 }, { _PUSH_FRAME, OPARG_SIMPLE, 1 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 50abb537d692a0..9ca606b61b0342 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -508,7 +508,7 @@ extern "C" { #define _BUILD_TUPLE_r01 718 #define _CALL_BUILTIN_CLASS_r01 719 #define _CALL_BUILTIN_FAST_r00 720 -#define _CALL_BUILTIN_FAST_WITH_KEYWORDS_r01 721 +#define _CALL_BUILTIN_FAST_WITH_KEYWORDS_r00 721 #define _CALL_BUILTIN_O_r03 722 #define _CALL_FUNCTION_EX_NON_PY_GENERAL_r31 723 #define _CALL_INTRINSIC_1_r12 724 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index e29ce36e91ebab..895016b0401c71 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -313,7 +313,7 @@ const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GUARD_CALLABLE_BUILTIN_FAST] = HAS_ARG_FLAG | HAS_EXIT_FLAG, [_CALL_BUILTIN_FAST] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_GUARD_CALLABLE_BUILTIN_FAST_WITH_KEYWORDS] = HAS_ARG_FLAG | HAS_EXIT_FLAG, - [_CALL_BUILTIN_FAST_WITH_KEYWORDS] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_CALL_BUILTIN_FAST_WITH_KEYWORDS] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_GUARD_CALLABLE_LEN] = HAS_EXIT_FLAG, [_CALL_LEN] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_GUARD_CALLABLE_ISINSTANCE] = HAS_EXIT_FLAG, @@ -2938,7 +2938,7 @@ const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1] = { [_CALL_BUILTIN_FAST_WITH_KEYWORDS] = { .best = { 0, 0, 0, 0 }, .entries = { - { 1, 0, _CALL_BUILTIN_FAST_WITH_KEYWORDS_r01 }, + { 0, 0, _CALL_BUILTIN_FAST_WITH_KEYWORDS_r00 }, { -1, -1, -1 }, { -1, -1, -1 }, { -1, -1, -1 }, @@ -4424,7 +4424,7 @@ const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1] = { [_GUARD_CALLABLE_BUILTIN_FAST_r00] = _GUARD_CALLABLE_BUILTIN_FAST, [_CALL_BUILTIN_FAST_r00] = _CALL_BUILTIN_FAST, [_GUARD_CALLABLE_BUILTIN_FAST_WITH_KEYWORDS_r00] = _GUARD_CALLABLE_BUILTIN_FAST_WITH_KEYWORDS, - [_CALL_BUILTIN_FAST_WITH_KEYWORDS_r01] = _CALL_BUILTIN_FAST_WITH_KEYWORDS, + [_CALL_BUILTIN_FAST_WITH_KEYWORDS_r00] = _CALL_BUILTIN_FAST_WITH_KEYWORDS, [_GUARD_CALLABLE_LEN_r03] = _GUARD_CALLABLE_LEN, [_GUARD_CALLABLE_LEN_r13] = _GUARD_CALLABLE_LEN, [_GUARD_CALLABLE_LEN_r23] = _GUARD_CALLABLE_LEN, @@ -4819,7 +4819,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] = { [_CALL_BUILTIN_FAST] = "_CALL_BUILTIN_FAST", [_CALL_BUILTIN_FAST_r00] = "_CALL_BUILTIN_FAST_r00", [_CALL_BUILTIN_FAST_WITH_KEYWORDS] = "_CALL_BUILTIN_FAST_WITH_KEYWORDS", - [_CALL_BUILTIN_FAST_WITH_KEYWORDS_r01] = "_CALL_BUILTIN_FAST_WITH_KEYWORDS_r01", + [_CALL_BUILTIN_FAST_WITH_KEYWORDS_r00] = "_CALL_BUILTIN_FAST_WITH_KEYWORDS_r00", [_CALL_BUILTIN_O] = "_CALL_BUILTIN_O", [_CALL_BUILTIN_O_r03] = "_CALL_BUILTIN_O_r03", [_CALL_FUNCTION_EX_NON_PY_GENERAL] = "_CALL_FUNCTION_EX_NON_PY_GENERAL", @@ -6515,7 +6515,7 @@ int _PyUop_num_popped(int opcode, int oparg) case _GUARD_CALLABLE_BUILTIN_FAST_WITH_KEYWORDS: return 0; case _CALL_BUILTIN_FAST_WITH_KEYWORDS: - return 2 + oparg; + return 0; case _GUARD_CALLABLE_LEN: return 0; case _CALL_LEN: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 097a3ac4baf294..883bc9466668ff 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2746,7 +2746,12 @@ def test_call_builtin_fast_with_keywords(self): def testfunc(n): x = 0 for _ in range(n): + # _CALL_BUILTIN_FAST_WITH_KEYWORDS: 1 _POP_TOP_NOP, 1 POP_TOP + # _LIST_EXTEND: 1 _POP_TOP_NOP, y = sorted([3, 1, 2]) + + # _BINARY_OP_SUBSCR_LIST_INT: 2 _POP_TOP_NOP + # _BINARY_OP_ADD_INT: 1 _POP_TOP_NOP x += y[0] return x @@ -2756,6 +2761,8 @@ def testfunc(n): uops = get_opnames(ex) self.assertIn("_CALL_BUILTIN_FAST_WITH_KEYWORDS", uops) self.assertNotIn("_GUARD_CALLABLE_BUILTIN_FAST_WITH_KEYWORDS", uops) + self.assertGreaterEqual(count_ops(ex, "_POP_TOP_NOP"), 5) + self.assertGreaterEqual(count_ops(ex, "_POP_TOP"), 3) def test_call_method_descriptor_o(self): def testfunc(n): diff --git a/Modules/_testinternalcapi/test_cases.c.h b/Modules/_testinternalcapi/test_cases.c.h index c2e69d35070d78..eca8f104d346a0 100644 --- a/Modules/_testinternalcapi/test_cases.c.h +++ b/Modules/_testinternalcapi/test_cases.c.h @@ -2433,7 +2433,7 @@ _PyStackRef callable; _PyStackRef self_or_null; _PyStackRef *args; - _PyStackRef res; + _PyStackRef value; /* Skip 1 cache entry */ /* Skip 2 cache entries */ // _GUARD_CALLABLE_BUILTIN_FAST_WITH_KEYWORDS @@ -2463,20 +2463,36 @@ } STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *res_o = _Py_BuiltinCallFastWithKeywords_StackRefSteal(callable, arguments, total_args); + PyObject *res_o = _Py_BuiltinCallFastWithKeywords_StackRef(callable, arguments, total_args); stack_pointer = _PyFrame_GetStackPointer(frame); if (res_o == NULL) { - stack_pointer += -2 - oparg; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); JUMP_TO_LABEL(error); } - res = PyStackRef_FromPyObjectSteal(res_o); + _PyStackRef temp = callable; + callable = PyStackRef_FromPyObjectSteal(res_o); + stack_pointer[-2 - oparg] = callable; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(temp); + stack_pointer = _PyFrame_GetStackPointer(frame); } - // _CHECK_PERIODIC_AT_END + // _POP_TOP_OPARG { - stack_pointer[-2 - oparg] = res; + args = &stack_pointer[-oparg]; + _PyFrame_SetStackPointer(frame, stack_pointer); + _PyStackRef_CloseStack(args, oparg); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + // _POP_TOP + { + value = self_or_null; stack_pointer += -1 - oparg; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + // _CHECK_PERIODIC_AT_END + { _PyFrame_SetStackPointer(frame, stack_pointer); int err = check_periodics(tstate); stack_pointer = _PyFrame_GetStackPointer(frame); diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a7d2d652e687dc..39900e3def34e9 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4689,7 +4689,7 @@ dummy_func( EXIT_IF(PyCFunction_GET_FLAGS(callable_o) != (METH_FASTCALL | METH_KEYWORDS)); } - op(_CALL_BUILTIN_FAST_WITH_KEYWORDS, (callable, self_or_null, args[oparg] -- res)) { + op(_CALL_BUILTIN_FAST_WITH_KEYWORDS, (callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) { /* Builtin METH_FASTCALL | METH_KEYWORDS functions */ int total_args = oparg; _PyStackRef *arguments = args; @@ -4698,12 +4698,13 @@ dummy_func( total_args++; } STAT_INC(CALL, hit); - PyObject *res_o = _Py_BuiltinCallFastWithKeywords_StackRefSteal(callable, arguments, total_args); - DEAD(args); - DEAD(self_or_null); - DEAD(callable); - ERROR_IF(res_o == NULL); - res = PyStackRef_FromPyObjectSteal(res_o); + PyObject *res_o = _Py_BuiltinCallFastWithKeywords_StackRef(callable, arguments, total_args); + if (res_o == NULL) { + ERROR_NO_POP(); + } + _PyStackRef temp = callable; + callable = PyStackRef_FromPyObjectSteal(res_o); + PyStackRef_CLOSE(temp); } macro(CALL_BUILTIN_FAST_WITH_KEYWORDS) = @@ -4712,6 +4713,8 @@ dummy_func( unused/2 + _GUARD_CALLABLE_BUILTIN_FAST_WITH_KEYWORDS + _CALL_BUILTIN_FAST_WITH_KEYWORDS + + _POP_TOP_OPARG + + POP_TOP + _CHECK_PERIODIC_AT_END; macro(CALL_LEN) = diff --git a/Python/ceval.c b/Python/ceval.c index 3f024ad67f2c4b..c5ec8ae5253209 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -832,7 +832,7 @@ _Py_BuiltinCallFast_StackRef( } PyObject * -_Py_BuiltinCallFastWithKeywords_StackRefSteal( +_Py_BuiltinCallFastWithKeywords_StackRef( _PyStackRef callable, _PyStackRef *arguments, int total_args) @@ -840,8 +840,7 @@ _Py_BuiltinCallFastWithKeywords_StackRefSteal( PyObject *res; STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o); if (CONVERSION_FAILED(args_o)) { - res = NULL; - goto cleanup; + return NULL; } PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); PyCFunctionFastWithKeywords cfunc = @@ -849,15 +848,6 @@ _Py_BuiltinCallFastWithKeywords_StackRefSteal( res = cfunc(PyCFunction_GET_SELF(callable_o), args_o, total_args, NULL); STACKREFS_TO_PYOBJECTS_CLEANUP(args_o); assert((res != NULL) ^ (PyErr_Occurred() != NULL)); -cleanup: - // arguments is a pointer into the GC visible stack, - // so we must NULL out values as we clear them. - for (int i = total_args-1; i >= 0; i--) { - _PyStackRef tmp = arguments[i]; - arguments[i] = PyStackRef_NULL; - PyStackRef_CLOSE(tmp); - } - PyStackRef_CLOSE(callable); return res; } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index ed050570909932..1b01114ecbe1fc 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -16674,13 +16674,12 @@ break; } - case _CALL_BUILTIN_FAST_WITH_KEYWORDS_r01: { + case _CALL_BUILTIN_FAST_WITH_KEYWORDS_r00: { CHECK_CURRENT_CACHED_VALUES(0); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); _PyStackRef *args; _PyStackRef self_or_null; _PyStackRef callable; - _PyStackRef res; oparg = CURRENT_OPARG(); args = &stack_pointer[-oparg]; self_or_null = stack_pointer[-1 - oparg]; @@ -16693,21 +16692,22 @@ } STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *res_o = _Py_BuiltinCallFastWithKeywords_StackRefSteal(callable, arguments, total_args); + PyObject *res_o = _Py_BuiltinCallFastWithKeywords_StackRef(callable, arguments, total_args); stack_pointer = _PyFrame_GetStackPointer(frame); if (res_o == NULL) { - stack_pointer += -2 - oparg; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); SET_CURRENT_CACHED_VALUES(0); JUMP_TO_ERROR(); } - res = PyStackRef_FromPyObjectSteal(res_o); - _tos_cache0 = res; + _PyStackRef temp = callable; + callable = PyStackRef_FromPyObjectSteal(res_o); + stack_pointer[-2 - oparg] = callable; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(temp); + stack_pointer = _PyFrame_GetStackPointer(frame); + _tos_cache0 = PyStackRef_ZERO_BITS; _tos_cache1 = PyStackRef_ZERO_BITS; _tos_cache2 = PyStackRef_ZERO_BITS; - SET_CURRENT_CACHED_VALUES(1); - stack_pointer += -2 - oparg; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + SET_CURRENT_CACHED_VALUES(0); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 2a2751e8a84e79..846da3c1de3de9 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2433,7 +2433,7 @@ _PyStackRef callable; _PyStackRef self_or_null; _PyStackRef *args; - _PyStackRef res; + _PyStackRef value; /* Skip 1 cache entry */ /* Skip 2 cache entries */ // _GUARD_CALLABLE_BUILTIN_FAST_WITH_KEYWORDS @@ -2463,20 +2463,36 @@ } STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *res_o = _Py_BuiltinCallFastWithKeywords_StackRefSteal(callable, arguments, total_args); + PyObject *res_o = _Py_BuiltinCallFastWithKeywords_StackRef(callable, arguments, total_args); stack_pointer = _PyFrame_GetStackPointer(frame); if (res_o == NULL) { - stack_pointer += -2 - oparg; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); JUMP_TO_LABEL(error); } - res = PyStackRef_FromPyObjectSteal(res_o); + _PyStackRef temp = callable; + callable = PyStackRef_FromPyObjectSteal(res_o); + stack_pointer[-2 - oparg] = callable; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_CLOSE(temp); + stack_pointer = _PyFrame_GetStackPointer(frame); } - // _CHECK_PERIODIC_AT_END + // _POP_TOP_OPARG { - stack_pointer[-2 - oparg] = res; + args = &stack_pointer[-oparg]; + _PyFrame_SetStackPointer(frame, stack_pointer); + _PyStackRef_CloseStack(args, oparg); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + // _POP_TOP + { + value = self_or_null; stack_pointer += -1 - oparg; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + // _CHECK_PERIODIC_AT_END + { _PyFrame_SetStackPointer(frame, stack_pointer); int err = check_periodics(tstate); stack_pointer = _PyFrame_GetStackPointer(frame); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 50005d4e968786..a0f81d9447e95f 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -1325,6 +1325,10 @@ dummy_func(void) { } } + op(_CALL_BUILTIN_FAST_WITH_KEYWORDS, (callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) { + callable = sym_new_not_null(ctx); + } + op(_CALL_BUILTIN_O, (callable, self_or_null, args[oparg] -- res, c, s)) { res = sym_new_not_null(ctx); c = callable; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 34b535538a5c56..1a101c5b945d3a 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -4022,12 +4022,10 @@ } case _CALL_BUILTIN_FAST_WITH_KEYWORDS: { - JitOptRef res; - res = sym_new_not_null(ctx); - CHECK_STACK_BOUNDS(-1 - oparg); - stack_pointer[-2 - oparg] = res; - stack_pointer += -1 - oparg; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + JitOptRef callable; + callable = stack_pointer[-2 - oparg]; + callable = sym_new_not_null(ctx); + stack_pointer[-2 - oparg] = callable; break; } From d9e17fe04fe9f0b2a7c4862995235fbb91a8aace Mon Sep 17 00:00:00 2001 From: Sacul0457Deve <183588943+Sacul0457@users.noreply.github.com.> Date: Sat, 11 Apr 2026 09:24:31 +0800 Subject: [PATCH 2/3] regen files --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_ids.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 676b8a1f3a53e6..a0d968c146dae5 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1376,7 +1376,7 @@ _PyOpcode_macro_expansion[256] = { [CALL_BOUND_METHOD_GENERAL] = { .nuops = 8, .uops = { { _RECORD_BOUND_METHOD, OPARG_SIMPLE, 0 }, { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_METHOD_VERSION, 2, 1 }, { _EXPAND_METHOD, OPARG_SIMPLE, 3 }, { _CHECK_RECURSION_REMAINING, OPARG_SIMPLE, 3 }, { _PY_FRAME_GENERAL, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, [CALL_BUILTIN_CLASS] = { .nuops = 4, .uops = { { _RECORD_CALLABLE, OPARG_SIMPLE, 0 }, { _GUARD_CALLABLE_BUILTIN_CLASS, OPARG_SIMPLE, 3 }, { _CALL_BUILTIN_CLASS, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC_AT_END, OPARG_REPLACED, 3 } } }, [CALL_BUILTIN_FAST] = { .nuops = 6, .uops = { { _RECORD_CALLABLE, OPARG_SIMPLE, 0 }, { _GUARD_CALLABLE_BUILTIN_FAST, OPARG_SIMPLE, 3 }, { _CALL_BUILTIN_FAST, OPARG_SIMPLE, 3 }, { _POP_TOP_OPARG, OPARG_SIMPLE, 3 }, { _POP_TOP, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC_AT_END, OPARG_REPLACED, 3 } } }, - [CALL_BUILTIN_FAST_WITH_KEYWORDS] = { .nuops = 4, .uops = { { _RECORD_CALLABLE, OPARG_SIMPLE, 0 }, { _GUARD_CALLABLE_BUILTIN_FAST_WITH_KEYWORDS, OPARG_SIMPLE, 3 }, { _CALL_BUILTIN_FAST_WITH_KEYWORDS, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC_AT_END, OPARG_REPLACED, 3 } } }, + [CALL_BUILTIN_FAST_WITH_KEYWORDS] = { .nuops = 6, .uops = { { _RECORD_CALLABLE, OPARG_SIMPLE, 0 }, { _GUARD_CALLABLE_BUILTIN_FAST_WITH_KEYWORDS, OPARG_SIMPLE, 3 }, { _CALL_BUILTIN_FAST_WITH_KEYWORDS, OPARG_SIMPLE, 3 }, { _POP_TOP_OPARG, OPARG_SIMPLE, 3 }, { _POP_TOP, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC_AT_END, OPARG_REPLACED, 3 } } }, [CALL_BUILTIN_O] = { .nuops = 7, .uops = { { _RECORD_CALLABLE, OPARG_SIMPLE, 0 }, { _GUARD_CALLABLE_BUILTIN_O, OPARG_SIMPLE, 3 }, { _CHECK_RECURSION_LIMIT, OPARG_SIMPLE, 3 }, { _CALL_BUILTIN_O, OPARG_SIMPLE, 3 }, { _POP_TOP, OPARG_SIMPLE, 3 }, { _POP_TOP, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC_AT_END, OPARG_REPLACED, 3 } } }, [CALL_EX_NON_PY_GENERAL] = { .nuops = 4, .uops = { { _CHECK_IS_NOT_PY_CALLABLE_EX, OPARG_SIMPLE, 1 }, { _MAKE_CALLARGS_A_TUPLE, OPARG_SIMPLE, 1 }, { _CALL_FUNCTION_EX_NON_PY_GENERAL, OPARG_SIMPLE, 1 }, { _CHECK_PERIODIC_AT_END, OPARG_REPLACED, 1 } } }, [CALL_EX_PY] = { .nuops = 7, .uops = { { _RECORD_4OS, OPARG_SIMPLE, 0 }, { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _MAKE_CALLARGS_A_TUPLE, OPARG_SIMPLE, 1 }, { _CHECK_IS_PY_CALLABLE_EX, OPARG_SIMPLE, 1 }, { _PY_FRAME_EX, OPARG_SIMPLE, 1 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 1 }, { _PUSH_FRAME, OPARG_SIMPLE, 1 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 12b2bf32d77748..6022bc4255788e 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -509,7 +509,7 @@ extern "C" { #define _BUILD_TUPLE_r01 719 #define _CALL_BUILTIN_CLASS_r01 720 #define _CALL_BUILTIN_FAST_r00 721 -#define _CALL_BUILTIN_FAST_WITH_KEYWORDS_r01 722 +#define _CALL_BUILTIN_FAST_WITH_KEYWORDS_r00 722 #define _CALL_BUILTIN_O_r03 723 #define _CALL_FUNCTION_EX_NON_PY_GENERAL_r31 724 #define _CALL_INTRINSIC_1_r12 725 From 0e00ac8754655f9a3625e9d01284fdf3e38d4937 Mon Sep 17 00:00:00 2001 From: Sacul0457Deve <183588943+Sacul0457@users.noreply.github.com.> Date: Sat, 11 Apr 2026 12:53:09 +0800 Subject: [PATCH 3/3] adress review and revert test --- Lib/test/test_capi/test_opt.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 76d2737e790bbe..2a1abe178e5d84 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2761,12 +2761,7 @@ def test_call_builtin_fast_with_keywords(self): def testfunc(n): x = 0 for _ in range(n): - # _CALL_BUILTIN_FAST_WITH_KEYWORDS: 1 _POP_TOP_NOP, 1 POP_TOP - # _LIST_EXTEND: 1 _POP_TOP_NOP, y = sorted([3, 1, 2]) - - # _BINARY_OP_SUBSCR_LIST_INT: 2 _POP_TOP_NOP - # _BINARY_OP_ADD_INT: 1 _POP_TOP_NOP x += y[0] return x @@ -2776,8 +2771,6 @@ def testfunc(n): uops = get_opnames(ex) self.assertIn("_CALL_BUILTIN_FAST_WITH_KEYWORDS", uops) self.assertNotIn("_GUARD_CALLABLE_BUILTIN_FAST_WITH_KEYWORDS", uops) - self.assertGreaterEqual(count_ops(ex, "_POP_TOP_NOP"), 5) - self.assertGreaterEqual(count_ops(ex, "_POP_TOP"), 3) def test_call_method_descriptor_o(self): def testfunc(n):