diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 15d5bd6684549e..374ac4082afb14 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1289,7 +1289,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [SET_FUNCTION_ATTRIBUTE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [SET_UPDATE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [STORE_ATTR] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [STORE_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IXC000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, + [STORE_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IXC000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG | HAS_RECORDS_VALUE_FLAG }, [STORE_ATTR_SLOT] = { true, INSTR_FMT_IXC000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG | HAS_RECORDS_VALUE_FLAG }, [STORE_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG | HAS_RECORDS_VALUE_FLAG }, [STORE_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ESCAPES_FLAG }, @@ -1501,7 +1501,7 @@ _PyOpcode_macro_expansion[256] = { [SET_FUNCTION_ATTRIBUTE] = { .nuops = 1, .uops = { { _SET_FUNCTION_ATTRIBUTE, OPARG_SIMPLE, 0 } } }, [SET_UPDATE] = { .nuops = 2, .uops = { { _SET_UPDATE, OPARG_SIMPLE, 0 }, { _POP_TOP, OPARG_SIMPLE, 0 } } }, [STORE_ATTR] = { .nuops = 1, .uops = { { _STORE_ATTR, OPARG_SIMPLE, 3 } } }, - [STORE_ATTR_INSTANCE_VALUE] = { .nuops = 5, .uops = { { _LOCK_OBJECT, OPARG_SIMPLE, 1 }, { _GUARD_TYPE_VERSION_LOCKED, 2, 1 }, { _GUARD_DORV_NO_DICT, OPARG_SIMPLE, 3 }, { _STORE_ATTR_INSTANCE_VALUE, 1, 3 }, { _POP_TOP, OPARG_SIMPLE, 4 } } }, + [STORE_ATTR_INSTANCE_VALUE] = { .nuops = 6, .uops = { { _RECORD_TOS_TYPE, OPARG_SIMPLE, 1 }, { _LOCK_OBJECT, OPARG_SIMPLE, 1 }, { _GUARD_TYPE_VERSION_LOCKED, 2, 1 }, { _GUARD_DORV_NO_DICT, OPARG_SIMPLE, 3 }, { _STORE_ATTR_INSTANCE_VALUE, 1, 3 }, { _POP_TOP, OPARG_SIMPLE, 4 } } }, [STORE_ATTR_SLOT] = { .nuops = 4, .uops = { { _RECORD_TOS_TYPE, OPARG_SIMPLE, 1 }, { _GUARD_TYPE_VERSION, 2, 1 }, { _STORE_ATTR_SLOT, 1, 3 }, { _POP_TOP, OPARG_SIMPLE, 4 } } }, [STORE_ATTR_WITH_HINT] = { .nuops = 4, .uops = { { _RECORD_TOS_TYPE, OPARG_SIMPLE, 1 }, { _GUARD_TYPE_VERSION, 2, 1 }, { _STORE_ATTR_WITH_HINT, 1, 3 }, { _POP_TOP, OPARG_SIMPLE, 4 } } }, [STORE_DEREF] = { .nuops = 1, .uops = { { _STORE_DEREF, OPARG_SIMPLE, 0 } } }, diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 8d3da8b5a22968..6b6c89813d381a 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1518,6 +1518,29 @@ class Foo: Foo.attr = 0 self.assertFalse(ex.is_valid()) + def test_guard_type_version_locked_removed(self): + """ + Verify that redundant _GUARD_TYPE_VERSION_LOCKED guards are + eliminated for sequential STORE_ATTR_INSTANCE_VALUE in __init__. + """ + + class Foo: + def __init__(self): + self.a = 1 + self.b = 2 + self.c = 3 + + def thing(n): + for _ in range(n): + Foo() + + res, ex = self._run_with_optimizer(thing, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + opnames = list(iter_opnames(ex)) + guard_locked_count = opnames.count("_GUARD_TYPE_VERSION_LOCKED") + # Only the first store needs the guard; the rest should be NOPed. + self.assertEqual(guard_locked_count, 1) + def test_type_version_doesnt_segfault(self): """ Tests that setting a type version doesn't cause a segfault when later looking at the stack. @@ -1539,6 +1562,98 @@ def fn(a): fn(A()) + def test_init_resolves_callable(self): + """ + _CHECK_AND_ALLOCATE_OBJECT should resolve __init__ to a constant, + enabling the optimizer to propagate type information through the frame + and eliminate redundant function version and arg count checks. + """ + class MyPoint: + def __init__(self, x, y): + # If __init__ callable is propagated through, then + # These will get promoted from globals to constants. + self.x = range(1) + self.y = range(1) + + def testfunc(n): + for _ in range(n): + p = MyPoint(1.0, 2.0) + + _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + # The __init__ call should be traced through via _PUSH_FRAME + self.assertIn("_PUSH_FRAME", uops) + # __init__ resolution allows promotion of range to constant + self.assertNotIn("_LOAD_GLOBAL_BUILTINS", uops) + + def test_guard_type_version_locked_propagates(self): + """ + _GUARD_TYPE_VERSION_LOCKED should set the type version on the + symbol so repeated accesses to the same type can benefit. + """ + class Item: + def __init__(self, val): + self.val = val + + def get(self): + return self.val + + def get2(self): + return self.val + 1 + + def testfunc(n): + item = Item(42) + total = 0 + for _ in range(n): + # Two method calls on the same object — the second + # should benefit from type info set by the first. + total += item.get() + item.get2() + return total + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD * (42 + 43)) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + # Both methods should be traced through + self.assertEqual(uops.count("_PUSH_FRAME"), 2) + # Type version propagation: one guard covers both method lookups + self.assertEqual(uops.count("_GUARD_TYPE_VERSION"), 1) + # Function checks eliminated (type info resolves the callable) + self.assertNotIn("_CHECK_FUNCTION_VERSION", uops) + self.assertNotIn("_CHECK_FUNCTION_EXACT_ARGS", uops) + + def test_method_chain_guard_elimination(self): + """ + Calling two methods on the same object should share the outer + type guard — only one _GUARD_TYPE_VERSION for the two lookups. + """ + class Calc: + def __init__(self, val): + self.val = val + + def add(self, x): + self.val += x + return self + + def testfunc(n): + c = Calc(0) + for _ in range(n): + c.add(1).add(2) + return c.val + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD * 3) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + # Both add() calls should be inlined + push_count = uops.count("_PUSH_FRAME") + self.assertEqual(push_count, 2) + # Only one outer type version guard for the two method lookups + # on the same object c (the second lookup reuses type info) + guard_version_count = uops.count("_GUARD_TYPE_VERSION") + self.assertEqual(guard_version_count, 1) + def test_func_guards_removed_or_reduced(self): def testfunc(n): for i in range(n): diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 92f5b5d530cbff..bddf5deff96724 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2994,6 +2994,7 @@ dummy_func( macro(STORE_ATTR_INSTANCE_VALUE) = unused/1 + + _RECORD_TOS_TYPE + _LOCK_OBJECT + _GUARD_TYPE_VERSION_LOCKED + _GUARD_DORV_NO_DICT + diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 2fd235a2dda149..b6c09694c24b07 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -134,6 +134,20 @@ dummy_func(void) { assert(!PyJitRef_IsUnique(value)); } + op(_GUARD_TYPE_VERSION_LOCKED, (type_version/2, owner -- owner)) { + assert(type_version); + if (sym_matches_type_version(owner, type_version)) { + ADD_OP(_NOP, 0, 0); + } else { + PyTypeObject *probable_type = sym_get_probable_type(owner); + if (probable_type->tp_version_tag == type_version && sym_set_type_version(owner, type_version)) { + // Promote the probable type version to a known one. + PyType_Watch(TYPE_WATCHER_ID, (PyObject *)probable_type); + _Py_BloomFilter_Add(dependencies, probable_type); + } + } + } + op(_STORE_ATTR_INSTANCE_VALUE, (offset/1, value, owner -- o)) { (void)offset; (void)value; @@ -1027,9 +1041,27 @@ dummy_func(void) { } op(_CHECK_AND_ALLOCATE_OBJECT, (type_version/2, callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) { - (void)type_version; (void)args; - callable = sym_new_not_null(ctx); + PyObject *probable_callable = sym_get_probable_value(callable); + assert(probable_callable != NULL); + assert(PyType_Check(probable_callable)); + PyTypeObject *tp = (PyTypeObject *)probable_callable; + if (tp->tp_version_tag == type_version) { + // If the type version has not changed since we last saw it, + // then we know this __init__ is definitely the same one as in the cache. + // We can promote callable to a known constant. This does not need a + // type watcher, as we do not remove this _CHECK_AND_ALLOCATE_OBJECT guard. + // TODO: split up _CHECK_AND_ALLOCATE_OBJECT to the check then alloate, so we can + // eliminate the check. + PyHeapTypeObject *cls = (PyHeapTypeObject *)probable_callable; + PyObject *init = cls->_spec_cache.init; + assert(init != NULL); + assert(PyFunction_Check(init)); + callable = sym_new_const(ctx, init); + } + else { + callable = sym_new_not_null(ctx); + } self_or_null = sym_new_not_null(ctx); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 8634189c707510..629059d9044537 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2386,6 +2386,19 @@ } case _GUARD_TYPE_VERSION_LOCKED: { + JitOptRef owner; + owner = stack_pointer[-1]; + uint32_t type_version = (uint32_t)this_instr->operand0; + assert(type_version); + if (sym_matches_type_version(owner, type_version)) { + ADD_OP(_NOP, 0, 0); + } else { + PyTypeObject *probable_type = sym_get_probable_type(owner); + if (probable_type->tp_version_tag == type_version && sym_set_type_version(owner, type_version)) { + PyType_Watch(TYPE_WATCHER_ID, (PyObject *)probable_type); + _Py_BloomFilter_Add(dependencies, probable_type); + } + } break; } @@ -3776,9 +3789,21 @@ self_or_null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; uint32_t type_version = (uint32_t)this_instr->operand0; - (void)type_version; (void)args; - callable = sym_new_not_null(ctx); + PyObject *probable_callable = sym_get_probable_value(callable); + assert(probable_callable != NULL); + assert(PyType_Check(probable_callable)); + PyTypeObject *tp = (PyTypeObject *)probable_callable; + if (tp->tp_version_tag == type_version) { + PyHeapTypeObject *cls = (PyHeapTypeObject *)probable_callable; + PyObject *init = cls->_spec_cache.init; + assert(init != NULL); + assert(PyFunction_Check(init)); + callable = sym_new_const(ctx, init); + } + else { + callable = sym_new_not_null(ctx); + } self_or_null = sym_new_not_null(ctx); stack_pointer[-2 - oparg] = callable; stack_pointer[-1 - oparg] = self_or_null; diff --git a/Python/record_functions.c.h b/Python/record_functions.c.h index 958cdab34c8ff4..d3b0c3e6c7f60d 100644 --- a/Python/record_functions.c.h +++ b/Python/record_functions.c.h @@ -101,6 +101,7 @@ const uint8_t _PyOpcode_RecordFunctionIndices[256] = { [LOAD_ATTR_SLOT] = _RECORD_TOS_TYPE_INDEX, [LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = _RECORD_TOS_TYPE_INDEX, [LOAD_ATTR_PROPERTY] = _RECORD_TOS_TYPE_INDEX, + [STORE_ATTR_INSTANCE_VALUE] = _RECORD_TOS_TYPE_INDEX, [STORE_ATTR_WITH_HINT] = _RECORD_TOS_TYPE_INDEX, [STORE_ATTR_SLOT] = _RECORD_TOS_TYPE_INDEX, [FOR_ITER_GEN] = _RECORD_NOS_GEN_FUNC_INDEX,