diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index 1db6c08f5f43..d01bdaea9822 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -1988,6 +1988,38 @@ OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall module DuckTyping { private import semmle.python.ApiGraphs + /** + * Holds if `name` is a globally defined name (a builtin or VM-defined name). + */ + predicate globallyDefinedName(string name) { + exists(API::builtin(name)) + or + name = "WindowsError" + or + name = "_" and exists(Module m | m.getName() = "gettext") + or + name in ["__file__", "__builtins__", "__name__"] + } + + /** + * Holds if `name` is monkey-patched into the builtins module. + */ + predicate monkeyPatchedBuiltin(string name) { + any(DataFlow::AttrWrite aw) + .writes(API::moduleImport("builtins").getAValueReachableFromSource(), name, _) + or + // B.__dict__["name"] = value + exists(SubscriptNode subscr | + subscr.isStore() and + subscr.getObject() = + API::moduleImport("builtins") + .getMember("__dict__") + .getAValueReachableFromSource() + .asCfgNode() and + subscr.getIndex().getNode().(StringLiteral).getText() = name + ) + } + /** * Holds if `cls` or any of its resolved superclasses declares a method with the given `name`. */ @@ -2158,4 +2190,415 @@ module DuckTyping { or f.getADecorator().(Name).getId() = "property" } + + /** Gets the name of the builtin class of the immutable literal `lit`. */ + string getClassName(ImmutableLiteral lit) { + lit instanceof IntegerLiteral and result = "int" + or + lit instanceof FloatLiteral and result = "float" + or + lit instanceof ImaginaryLiteral and result = "complex" + or + lit instanceof NegativeIntegerLiteral and result = "int" + or + lit instanceof StringLiteral and result = "str" + or + lit instanceof BooleanLiteral and result = "bool" + or + lit instanceof None and result = "NoneType" + } +} + +/** + * Provides a class hierarchy for exception types, covering both builtin + * exceptions (from typeshed models) and user-defined exception classes. + */ +module ExceptionTypes { + private import semmle.python.ApiGraphs + private import semmle.python.frameworks.data.internal.ApiGraphModels + + /** Holds if `name` is a builtin exception class name. */ + predicate builtinException(string name) { + typeModel("builtins.BaseException~Subclass", "builtins." + name, "") + } + + /** Holds if builtin exception `sub` is a direct subclass of builtin exception `base`. */ + private predicate builtinExceptionSubclass(string base, string sub) { + typeModel("builtins." + base + "~Subclass", "builtins." + sub, "") + } + + /** An exception type, either a builtin exception or a user-defined exception class. */ + newtype TExceptType = + /** A user-defined exception class. */ + TUserExceptType(Class c) or + /** A builtin exception class, identified by name. */ + TBuiltinExceptType(string name) { builtinException(name) } + + /** An exception type, either a builtin exception or a user-defined exception class. */ + class ExceptType extends TExceptType { + /** Gets the name of this exception type. */ + string getName() { none() } + + /** Gets a data-flow node that refers to this exception type. */ + DataFlow::Node getAUse() { none() } + + /** Gets a direct superclass of this exception type. */ + ExceptType getADirectSuperclass() { none() } + + /** Gets a string representation of this exception type. */ + string toString() { result = this.getName() } + + /** Gets a data-flow node that refers to an instance of this exception type. */ + DataFlow::Node getAnInstance() { none() } + + /** Holds if this is a legal exception type (a subclass of `BaseException`). */ + predicate isLegalExceptionType() { this.getADirectSuperclass*() instanceof BaseException } + + /** + * Holds if this exception type is raised by `r`, either as a class reference + * (e.g. `raise ValueError`) or as an instantiation (e.g. `raise ValueError("msg")`). + */ + predicate isRaisedBy(Raise r) { + exists(Expr raised | raised = r.getRaised() | + this.getAUse().asExpr() in [raised, raised.(Call).getFunc()] + or + this.getAnInstance().asExpr() = raised + ) + } + + /** Holds if this exception type may be raised at control flow node `r`. */ + predicate isRaisedAt(ControlFlowNode r) { + this.isRaisedBy(r.getNode()) + or + exists(Function callee | + resolveCall(r, callee, _) and + this.isRaisedIn(callee) + ) + } + + /** + * Holds if this exception type may be raised in function `f`, either + * directly via `raise` statements or transitively through calls to other functions. + */ + predicate isRaisedIn(Function f) { this.isRaisedAt(any(ControlFlowNode r | r.getScope() = f)) } + + /** Holds if this exception type is handled by the `except` clause at `handler`. */ + predicate isHandledAt(ExceptFlowNode handler) { + exists(ExceptStmt ex, Expr typeExpr | ex = handler.getNode() | + ( + typeExpr = ex.getType() + or + typeExpr = ex.getType().(Tuple).getAnElt() + ) and + this.getAUse().asExpr() = typeExpr + ) + or + // A bare `except:` handles everything + not exists(handler.getNode().(ExceptStmt).getType()) and + this instanceof BaseException + } + + /** + * Holds if this element is at the specified location. + * The location spans column `startColumn` of line `startLine` to + * column `endColumn` of line `endLine` in file `filepath`. + * For more information, see + * [Providing locations in CodeQL queries](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filePath, int startLine, int startColumn, int endLine, int endColumn + ) { + none() + } + } + + /** A user-defined exception class. */ + class UserExceptType extends ExceptType, TUserExceptType { + Class cls; + + UserExceptType() { this = TUserExceptType(cls) } + + /** Gets the underlying class. */ + Class asClass() { result = cls } + + override string getName() { result = cls.getName() } + + override DataFlow::Node getAUse() { result = classTracker(cls) } + + override DataFlow::Node getAnInstance() { result = classInstanceTracker(cls) } + + override ExceptType getADirectSuperclass() { + result.(UserExceptType).asClass() = getADirectSuperclass(cls) + or + result.(BuiltinExceptType).getAUse().asExpr() = cls.getABase() + } + + override predicate hasLocationInfo( + string filePath, int startLine, int startColumn, int endLine, int endColumn + ) { + cls.getLocation().hasLocationInfo(filePath, startLine, startColumn, endLine, endColumn) + } + } + + /** A builtin exception class, identified by name. */ + class BuiltinExceptType extends ExceptType, TBuiltinExceptType { + string name; + + BuiltinExceptType() { this = TBuiltinExceptType(name) } + + /** Gets the builtin name. */ + string asBuiltinName() { result = name } + + override string getName() { result = name } + + override DataFlow::Node getAUse() { result = API::builtin(name).getAValueReachableFromSource() } + + override DataFlow::Node getAnInstance() { + result = API::builtin(name).getAnInstance().getAValueReachableFromSource() + } + + override ExceptType getADirectSuperclass() { + builtinExceptionSubclass(result.(BuiltinExceptType).asBuiltinName(), name) and + result != this + } + + override predicate hasLocationInfo( + string filePath, int startLine, int startColumn, int endLine, int endColumn + ) { + filePath = "" and + startLine = 0 and + startColumn = 0 and + endLine = 0 and + endColumn = 0 + } + } + + /** The builtin `BaseException` type. */ + class BaseException extends BuiltinExceptType { + BaseException() { name = "BaseException" } + } + + /** The builtin `NameError` exception type. */ + class NameError extends BuiltinExceptType { + NameError() { name = "NameError" } + } + + /** + * Holds if the exception edge from `r` to `handler` is unlikely because + * none of the exception types that `r` may raise are handled by `handler`. + */ + predicate unlikelyExceptionEdge(ControlFlowNode r, ExceptFlowNode handler) { + handler = r.getAnExceptionalSuccessor() and + // We can determine at least one raised type + exists(ExceptType t | t.isRaisedAt(r)) and + // But none of them are handled by this handler + not exists(ExceptType raised, ExceptType handled | + raised.isRaisedAt(r) and + handled.isHandledAt(handler) and + raised.getADirectSuperclass*() = handled + ) + } +} + +/** + * Provides predicates for reasoning about the reachability of control flow nodes + * and basic blocks. + */ +module Reachability { + private import semmle.python.ApiGraphs + import ExceptionTypes + + /** + * Holds if `call` is a call to a function that is known to never return normally + * (e.g. `sys.exit()`, `os._exit()`, `os.abort()`). + */ + predicate isCallToNeverReturningFunction(CallNode call) { + // Known never-returning builtins/stdlib functions via API graphs + call = API::builtin("exit").getACall().asCfgNode() + or + call = API::builtin("quit").getACall().asCfgNode() + or + call = API::moduleImport("sys").getMember("exit").getACall().asCfgNode() + or + call = API::moduleImport("os").getMember("_exit").getACall().asCfgNode() + or + call = API::moduleImport("os").getMember("abort").getACall().asCfgNode() + or + // User-defined functions that only contain raise statements (no normal returns) + exists(Function target | + resolveCall(call, target, _) and + neverReturns(target) + ) + } + + /** + * Holds if function `f` never returns normally, because every normal exit + * is dominated by a call to a never-returning function or an unconditional raise. + */ + predicate neverReturns(Function f) { + exists(f.getANormalExit()) and + forall(BasicBlock exit | exit = f.getANormalExit().getBasicBlock() | + exists(BasicBlock raising | + raising.dominates(exit) and + ( + isCallToNeverReturningFunction(raising.getLastNode()) + or + raising.getLastNode().getNode() instanceof Raise + ) + ) + ) + } + + /** + * Holds if `node` is unlikely to raise an exception. This includes entry nodes + * and simple name lookups. + */ + private predicate unlikelyToRaise(ControlFlowNode node) { + exists(node.getAnExceptionalSuccessor()) and + ( + node.getNode() instanceof Name + or + exists(Scope s | s.getEntryNode() = node) + ) + } + + /** + * Holds if it is highly unlikely for control to flow from `node` to `succ`. + */ + predicate unlikelySuccessor(ControlFlowNode node, ControlFlowNode succ) { + // Exceptional edge where the raised type doesn't match the handler + unlikelyExceptionEdge(node, succ) + or + // Normal successor of a never-returning call + isCallToNeverReturningFunction(node) and + succ = node.getASuccessor() and + not succ = node.getAnExceptionalSuccessor() and + not succ.getNode() instanceof Yield + or + // Exception edge from a node that is unlikely to raise + unlikelyToRaise(node) and + succ = node.getAnExceptionalSuccessor() + or + // True branch of `if False:` or `if TYPE_CHECKING:` + isAlwaysFalseGuard(node) and + succ = node.getATrueSuccessor() + } + + /** + * Holds if `node` is a condition that is always `False` at runtime. + * This covers `if False:` and `if typing.TYPE_CHECKING:`. + */ + private predicate isAlwaysFalseGuard(ControlFlowNode node) { + node.getNode() instanceof False + or + node = + API::moduleImport("typing") + .getMember("TYPE_CHECKING") + .getAValueReachableFromSource() + .asCfgNode() + } + + private predicate startBbLikelyReachable(BasicBlock b) { + exists(Scope s | s.getEntryNode() = b.getNode(_)) + or + exists(BasicBlock pred | + pred = b.getAPredecessor() and + endBbLikelyReachable(pred) and + not unlikelySuccessor(pred.getLastNode(), b) + ) + } + + private predicate endBbLikelyReachable(BasicBlock b) { + startBbLikelyReachable(b) and + not exists(ControlFlowNode p, ControlFlowNode s | + unlikelySuccessor(p, s) and + p = b.getNode(_) and + s = b.getNode(_) and + not p = b.getLastNode() + ) + } + + /** + * Holds if basic block `b` is likely to be reachable from the entry of its + * enclosing scope. + */ + predicate likelyReachable(BasicBlock b) { startBbLikelyReachable(b) } + + /** + * Holds if it is unlikely that `node` can be reached during execution. + */ + predicate unlikelyReachable(ControlFlowNode node) { + not startBbLikelyReachable(node.getBasicBlock()) + or + exists(BasicBlock b | + startBbLikelyReachable(b) and + not endBbLikelyReachable(b) and + exists(ControlFlowNode p, int i, int j | + unlikelySuccessor(p, _) and + p = b.getNode(i) and + node = b.getNode(j) and + i < j + ) + ) + } + + /** + * Holds if `var` is an SSA variable that is implicitly defined (a builtin, + * VM-defined name, or `__path__` in a package init). + */ + private predicate implicitlyDefined(SsaVariable var) { + not exists(var.getDefinition()) and + not py_ssa_phi(var, _) and + exists(GlobalVariable gv | var.getVariable() = gv | + DuckTyping::globallyDefinedName(gv.getId()) + or + gv.getId() = "__path__" and gv.getScope().(Module).isPackageInit() + ) + } + + /** + * Gets a phi input of `var`, pruned of unlikely edges. + */ + private SsaVariable getAPrunedPhiInput(SsaVariable var) { + result = var.getAPhiInput() and + exists(BasicBlock incoming | incoming = var.getPredecessorBlockForPhiArgument(result) | + not unlikelySuccessor(incoming.getLastNode(), var.getDefinition().getBasicBlock().firstNode()) + ) + } + + /** + * Gets a predecessor block for a phi node, pruned of unlikely edges. + */ + private BasicBlock getAPrunedPredecessorBlockForPhi(SsaVariable var) { + result = var.getAPredecessorBlockForPhi() and + not unlikelySuccessor(result.getLastNode(), var.getDefinition().getBasicBlock().firstNode()) + } + + /** + * Holds if the SSA variable `var` may be undefined at some use. + */ + private predicate ssaMaybeUndefined(SsaVariable var) { + // No definition, not a phi, not implicitly defined + not exists(var.getDefinition()) and not py_ssa_phi(var, _) and not implicitlyDefined(var) + or + // Defined by a deletion + var.getDefinition().isDelete() + or + // A phi input may be undefined + exists(SsaVariable input | input = getAPrunedPhiInput(var) | ssaMaybeUndefined(input)) + or + // A phi predecessor has no dominating definition + exists(BasicBlock incoming | + likelyReachable(incoming) and + incoming = getAPrunedPredecessorBlockForPhi(var) and + not var.getAPhiInput().getDefinition().getBasicBlock().dominates(incoming) + ) + } + + /** + * Holds if the name `u` may be undefined at its use. + */ + predicate maybeUndefined(Name u) { + exists(SsaVariable var | var.getAUse().getNode() = u | ssaMaybeUndefined(var)) + } } diff --git a/python/ql/src/Exceptions/IncorrectExceptOrder.ql b/python/ql/src/Exceptions/IncorrectExceptOrder.ql index 6eb1b39b0e64..436bc00be4ab 100644 --- a/python/ql/src/Exceptions/IncorrectExceptOrder.ql +++ b/python/ql/src/Exceptions/IncorrectExceptOrder.ql @@ -15,74 +15,7 @@ import python import semmle.python.dataflow.new.internal.DataFlowDispatch -import semmle.python.ApiGraphs -import semmle.python.frameworks.data.internal.ApiGraphModels - -predicate builtinException(string name) { - typeModel("builtins.BaseException~Subclass", "builtins." + name, "") -} - -predicate builtinExceptionSubclass(string base, string sub) { - typeModel("builtins." + base + "~Subclass", "builtins." + sub, "") -} - -newtype TExceptType = - TClass(Class c) or - TBuiltin(string name) { builtinException(name) } - -class ExceptType extends TExceptType { - Class asClass() { this = TClass(result) } - - string asBuiltinName() { this = TBuiltin(result) } - - predicate isBuiltin() { this = TBuiltin(_) } - - string getName() { - result = this.asClass().getName() - or - result = this.asBuiltinName() - } - - string toString() { result = this.getName() } - - DataFlow::Node getAUse() { - result = classTracker(this.asClass()) - or - API::builtin(this.asBuiltinName()).asSource().flowsTo(result) - } - - ExceptType getADirectSuperclass() { - result.asClass() = getADirectSuperclass(this.asClass()) - or - result.isBuiltin() and - result.getAUse().asExpr() = this.asClass().getABase() - or - builtinExceptionSubclass(result.asBuiltinName(), this.asBuiltinName()) and - this != result - } - - /** - * Holds if this element is at the specified location. - * The location spans column `startColumn` of line `startLine` to - * column `endColumn` of line `endLine` in file `filepath`. - * For more information, see - * [Providing locations in CodeQL queries](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). - */ - predicate hasLocationInfo( - string filePath, int startLine, int startColumn, int endLine, int endColumn - ) { - this.asClass() - .getLocation() - .hasLocationInfo(filePath, startLine, startColumn, endLine, endColumn) - or - this.isBuiltin() and - filePath = "" and - startLine = 0 and - startColumn = 0 and - endLine = 0 and - endColumn = 0 - } -} +private import ExceptionTypes predicate incorrectExceptOrder(ExceptStmt ex1, ExceptType cls1, ExceptStmt ex2, ExceptType cls2) { exists(int i, int j, Try t |