diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/ClassInstanceFlow.qll b/python/ql/lib/semmle/python/dataflow/new/internal/ClassInstanceFlow.qll new file mode 100644 index 000000000000..5558175d04ba --- /dev/null +++ b/python/ql/lib/semmle/python/dataflow/new/internal/ClassInstanceFlow.qll @@ -0,0 +1,118 @@ +/** + * Provides a reusable data-flow configuration for tracking class instances + * through global data-flow with full path support. + * + * This module is designed for quality queries that check whether instances + * of certain classes reach operations that require a specific interface + * (e.g., `__contains__`, `__iter__`, `__hash__`). + * + * The configuration uses two flow states: + * - `TrackingClass`: tracking a reference to the class itself + * - `TrackingInstance`: tracking an instance of the class + * + * At instantiation points (e.g., `cls()`), the state transitions from + * `TrackingClass` to `TrackingInstance`. Sinks are only matched in the + * `TrackingInstance` state. + */ + +private import python +import semmle.python.dataflow.new.DataFlow +private import semmle.python.dataflow.new.internal.DataFlowDispatch +private import semmle.python.ApiGraphs + +/** A flow state for tracking class references and their instances. */ +abstract class ClassInstanceFlowState extends string { + bindingset[this] + ClassInstanceFlowState() { any() } +} + +/** A state signifying that the tracked value is a reference to the class itself. */ +class TrackingClass extends ClassInstanceFlowState { + TrackingClass() { this = "TrackingClass" } +} + +/** A state signifying that the tracked value is an instance of the class. */ +class TrackingInstance extends ClassInstanceFlowState { + TrackingInstance() { this = "TrackingInstance" } +} + +/** + * Signature module for parameterizing `ClassInstanceFlow` per query. + */ +signature module ClassInstanceFlowSig { + /** Holds if `cls` is a class whose instances should be tracked to sinks. */ + predicate isRelevantClass(Class cls); + + /** Holds if `sink` is a location where reaching instances indicate a violation. */ + predicate isInstanceSink(DataFlow::Node sink); + + /** + * Holds if an `isinstance` check against `checkedType` should act as a barrier, + * suppressing alerts when the instance has been verified to have the expected interface. + */ + predicate isGuardType(DataFlow::Node checkedType); +} + +/** + * Constructs a global data-flow configuration for tracking instances of + * relevant classes from their definition to violation sinks. + */ +module ClassInstanceFlow { + /** + * Holds if `guard` is an `isinstance` call checking `node` against a type + * that should suppress the alert. + */ + private predicate isinstanceGuard(DataFlow::GuardNode guard, ControlFlowNode node, boolean branch) { + exists(DataFlow::CallCfgNode isinstance_call | + isinstance_call = API::builtin("isinstance").getACall() and + isinstance_call.getArg(0).asCfgNode() = node and + ( + Sig::isGuardType(isinstance_call.getArg(1)) + or + // Also handle tuples of types: isinstance(x, (T1, T2)) + Sig::isGuardType(DataFlow::exprNode(isinstance_call.getArg(1).asExpr().(Tuple).getAnElt())) + ) and + guard = isinstance_call.asCfgNode() and + branch = true + ) + } + + private module Config implements DataFlow::StateConfigSig { + class FlowState = ClassInstanceFlowState; + + predicate isSource(DataFlow::Node source, FlowState state) { + exists(ClassExpr ce | + Sig::isRelevantClass(ce.getInnerScope()) and + source.asExpr() = ce and + state instanceof TrackingClass + ) + } + + predicate isSink(DataFlow::Node sink, FlowState state) { + Sig::isInstanceSink(sink) and + state instanceof TrackingInstance + } + + predicate isBarrier(DataFlow::Node node) { + node = DataFlow::BarrierGuard::getABarrierNode() + } + + predicate isAdditionalFlowStep( + DataFlow::Node nodeFrom, FlowState stateFrom, DataFlow::Node nodeTo, FlowState stateTo + ) { + // Instantiation: class reference at the call function position + // flows to the call result as an instance. + stateFrom instanceof TrackingClass and + stateTo instanceof TrackingInstance and + exists(CallNode call | + nodeFrom.asCfgNode() = call.getFunction() and + nodeTo.asCfgNode() = call and + // Exclude decorator applications, where the result is a proxy + // rather than a typical instance. + not call.getNode() = any(FunctionExpr fe).getADecoratorCall() + ) + } + } + + module Flow = DataFlow::GlobalWithState; +} diff --git a/python/ql/src/Expressions/ContainsNonContainer.ql b/python/ql/src/Expressions/ContainsNonContainer.ql index de8c38795675..ad2e10de017b 100644 --- a/python/ql/src/Expressions/ContainsNonContainer.ql +++ b/python/ql/src/Expressions/ContainsNonContainer.ql @@ -1,7 +1,7 @@ /** * @name Membership test with a non-container * @description A membership test, such as 'item in sequence', with a non-container on the right hand side will raise a 'TypeError'. - * @kind problem + * @kind path-problem * @tags quality * reliability * correctness @@ -12,25 +12,47 @@ */ import python -private import LegacyPointsTo +import semmle.python.dataflow.new.DataFlow +private import semmle.python.dataflow.new.internal.DataFlowDispatch +private import semmle.python.dataflow.new.internal.ClassInstanceFlow +private import semmle.python.ApiGraphs -predicate rhs_in_expr(ControlFlowNode rhs, Compare cmp) { - exists(Cmpop op, int i | cmp.getOp(i) = op and cmp.getComparator(i) = rhs.getNode() | +predicate rhs_in_expr(Expr rhs, Compare cmp) { + exists(Cmpop op, int i | cmp.getOp(i) = op and cmp.getComparator(i) = rhs | op instanceof In or op instanceof NotIn ) } +module ContainsNonContainerSig implements ClassInstanceFlowSig { + predicate isRelevantClass(Class cls) { + not DuckTyping::isContainer(cls) and + not DuckTyping::hasUnresolvedBase(getADirectSuperclass*(cls)) and + not exists(CallNode setattr_call | + setattr_call.getFunction().(NameNode).getId() = "setattr" and + setattr_call.getArg(0).(NameNode).getId() = cls.getName() and + setattr_call.getScope() = cls.getScope() + ) + } + + predicate isInstanceSink(DataFlow::Node sink) { rhs_in_expr(sink.asExpr(), _) } + + predicate isGuardType(DataFlow::Node checkedType) { + checkedType = + API::builtin(["list", "tuple", "set", "frozenset", "dict", "str", "bytes", "bytearray"]) + .getAValueReachableFromSource() + } +} + +module ContainsNonContainerFlow = ClassInstanceFlow; + +import ContainsNonContainerFlow::Flow::PathGraph + from - ControlFlowNodeWithPointsTo non_seq, Compare cmp, Value v, ClassValue cls, ControlFlowNode origin + ContainsNonContainerFlow::Flow::PathNode source, ContainsNonContainerFlow::Flow::PathNode sink, + ClassExpr ce where - rhs_in_expr(non_seq, cmp) and - non_seq.pointsTo(_, v, origin) and - v.getClass() = cls and - not Types::failedInference(cls, _) and - not cls.hasAttribute("__contains__") and - not cls.hasAttribute("__iter__") and - not cls.hasAttribute("__getitem__") and - not cls = ClassValue::nonetype() and - not cls = Value::named("types.MappingProxyType") -select cmp, "This test may raise an Exception as the $@ may be of non-container class $@.", origin, - "target", cls, cls.getName() + ContainsNonContainerFlow::Flow::flowPath(source, sink) and + source.getNode().asExpr() = ce +select sink.getNode(), source, sink, + "This test may raise an Exception as the $@ may be of non-container class $@.", source.getNode(), + "target", ce.getInnerScope(), ce.getInnerScope().getName() diff --git a/python/ql/test/query-tests/Expressions/general/ContainsNonContainer.expected b/python/ql/test/query-tests/Expressions/general/ContainsNonContainer.expected index cf6d78d0d36b..eca57f44333a 100644 --- a/python/ql/test/query-tests/Expressions/general/ContainsNonContainer.expected +++ b/python/ql/test/query-tests/Expressions/general/ContainsNonContainer.expected @@ -1,2 +1,22 @@ -| expressions_test.py:89:8:89:15 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | class XIter | XIter | -| expressions_test.py:91:8:91:19 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | class XIter | XIter | +edges +| expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | expressions_test.py:77:7:77:11 | ControlFlowNode for XIter | provenance | | +| expressions_test.py:77:7:77:11 | ControlFlowNode for XIter | expressions_test.py:88:11:88:15 | ControlFlowNode for XIter | provenance | | +| expressions_test.py:88:5:88:7 | ControlFlowNode for seq | expressions_test.py:89:13:89:15 | ControlFlowNode for seq | provenance | | +| expressions_test.py:88:5:88:7 | ControlFlowNode for seq | expressions_test.py:91:17:91:19 | ControlFlowNode for seq | provenance | | +| expressions_test.py:88:5:88:7 | ControlFlowNode for seq | expressions_test.py:91:17:91:19 | ControlFlowNode for seq | provenance | | +| expressions_test.py:88:11:88:15 | ControlFlowNode for XIter | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | provenance | Config | +| expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | expressions_test.py:88:5:88:7 | ControlFlowNode for seq | provenance | | +nodes +| expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | semmle.label | ControlFlowNode for ClassExpr | +| expressions_test.py:77:7:77:11 | ControlFlowNode for XIter | semmle.label | ControlFlowNode for XIter | +| expressions_test.py:88:5:88:7 | ControlFlowNode for seq | semmle.label | ControlFlowNode for seq | +| expressions_test.py:88:11:88:15 | ControlFlowNode for XIter | semmle.label | ControlFlowNode for XIter | +| expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | semmle.label | ControlFlowNode for XIter() | +| expressions_test.py:89:13:89:15 | ControlFlowNode for seq | semmle.label | ControlFlowNode for seq | +| expressions_test.py:91:17:91:19 | ControlFlowNode for seq | semmle.label | ControlFlowNode for seq | +| expressions_test.py:91:17:91:19 | ControlFlowNode for seq | semmle.label | ControlFlowNode for seq | +subpaths +#select +| expressions_test.py:89:13:89:15 | ControlFlowNode for seq | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | expressions_test.py:89:13:89:15 | ControlFlowNode for seq | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | target | expressions_test.py:77:1:77:20 | Class XIter | XIter | +| expressions_test.py:91:17:91:19 | ControlFlowNode for seq | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | expressions_test.py:91:17:91:19 | ControlFlowNode for seq | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | target | expressions_test.py:77:1:77:20 | Class XIter | XIter | +| expressions_test.py:91:17:91:19 | ControlFlowNode for seq | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | expressions_test.py:91:17:91:19 | ControlFlowNode for seq | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:77:1:77:20 | ControlFlowNode for ClassExpr | target | expressions_test.py:77:1:77:20 | Class XIter | XIter | diff --git a/python/ql/test/query-tests/Expressions/general/expressions_test.py b/python/ql/test/query-tests/Expressions/general/expressions_test.py index 5e07b58e2041..f9d522d328b3 100644 --- a/python/ql/test/query-tests/Expressions/general/expressions_test.py +++ b/python/ql/test/query-tests/Expressions/general/expressions_test.py @@ -279,3 +279,62 @@ def local(): def apply(f): pass apply(foo)([1]) + +# Class used as a decorator: the runtime value at attribute access is the +# function's return value, not the decorator class instance. +class cached_property(object): + def __init__(self, func): + self.func = func + def __get__(self, obj, cls): + val = self.func(obj) + setattr(obj, self.func.__name__, val) + return val + +class MyForm(object): + @cached_property + def changed_data(self): + return [1, 2, 3] + +def test_decorator_class(form): + f = MyForm() + # OK: cached_property is a descriptor; the actual runtime value is a list. + if "name" in f.changed_data: + pass + +# Class with dynamically added methods via setattr: we cannot statically +# determine its full interface, so we should not flag it. +class DynamicProxy(object): + def __init__(self, args): + self._args = args + +for method_name in ["__contains__", "__iter__", "__len__"]: + def wrapper(self, *args, __method_name=method_name): + pass + setattr(DynamicProxy, method_name, wrapper) + +def test_dynamic_methods(): + proxy = DynamicProxy(()) + # OK: __contains__ is added dynamically via setattr. + if "name" in proxy: + pass + +# isinstance guard should suppress non-container warning +def guarded_contains(x): + obj = XIter() + if isinstance(obj, dict): + if x in obj: # OK: guarded by isinstance + pass + +def guarded_contains_tuple(x): + obj = XIter() + if isinstance(obj, (list, dict, set)): + if x in obj: # OK: guarded by isinstance with tuple of types + pass + +# Negated isinstance guard: early return when NOT a container +def guarded_contains_negated(x): + obj = XIter() + if not isinstance(obj, dict): + return + if x in obj: # OK: guarded by negated isinstance + early return + pass