From 57d662d69d1c40c65247cf61d896bf3209df63b2 Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 18 Feb 2026 17:12:32 +0000 Subject: [PATCH 01/30] Python: Use API graphs instead of points-to for simple built-ins Removes the use of points-to for accessing various built-ins from three of the queries. In order for this to work I had to extend the lists of known built-ins slightly. --- .../python/dataflow/new/internal/Builtins.qll | 8 +++--- python/ql/src/Expressions/UseofApply.ql | 9 ++++--- python/ql/src/Imports/DeprecatedModule.ql | 4 +-- .../ql/src/Statements/ModificationOfLocals.ql | 8 +++--- .../ql/src/Statements/SideEffectInAssert.ql | 12 ++++----- python/ql/src/Statements/UnnecessaryDelete.ql | 8 +++--- python/ql/src/Statements/UnreachableCode.ql | 8 ++---- python/ql/src/Statements/UseOfExit.ql | 6 +++-- .../SuspiciousUnusedLoopIterationVariable.ql | 27 +++++++------------ 9 files changed, 42 insertions(+), 48 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll b/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll index 6a66d241083a..764af5d9dc57 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll @@ -32,7 +32,9 @@ module Builtins { "UnicodeDecodeError", "UnicodeEncodeError", "UnicodeError", "UnicodeTranslateError", "UnicodeWarning", "UserWarning", "ValueError", "Warning", "ZeroDivisionError", // Added for compatibility - "exec" + "exec", + // Added by the `site` module (available by default unless `-S` is used) + "copyright", "credits", "exit", "quit" ] or // Built-in constants shared between Python 2 and 3 @@ -51,8 +53,8 @@ module Builtins { or // Python 2 only result in [ - "basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload", "unichr", - "unicode", "xrange" + "apply", "basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload", + "unichr", "unicode", "xrange" ] } diff --git a/python/ql/src/Expressions/UseofApply.ql b/python/ql/src/Expressions/UseofApply.ql index 2012f2d93618..f1068eca837c 100644 --- a/python/ql/src/Expressions/UseofApply.ql +++ b/python/ql/src/Expressions/UseofApply.ql @@ -10,9 +10,10 @@ */ import python -private import LegacyPointsTo -private import semmle.python.types.Builtins +private import semmle.python.ApiGraphs -from CallNode call, ControlFlowNodeWithPointsTo func -where major_version() = 2 and call.getFunction() = func and func.pointsTo(Value::named("apply")) +from CallNode call +where + major_version() = 2 and + call = API::builtin("apply").getACall().asCfgNode() select call, "Call to the obsolete builtin function 'apply'." diff --git a/python/ql/src/Imports/DeprecatedModule.ql b/python/ql/src/Imports/DeprecatedModule.ql index 3d529943fb8f..62d772313343 100644 --- a/python/ql/src/Imports/DeprecatedModule.ql +++ b/python/ql/src/Imports/DeprecatedModule.ql @@ -11,7 +11,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs /** * Holds if the module `name` was deprecated in Python version `major`.`minor`, @@ -80,7 +80,7 @@ where name = imp.getName() and deprecated_module(name, instead, _, _) and not exists(Try try, ExceptStmt except | except = try.getAHandler() | - except.getType().(ExprWithPointsTo).pointsTo(ClassValue::importError()) and + except.getType() = API::builtin("ImportError").getAValueReachableFromSource().asExpr() and except.containsInScope(imp) ) select imp, deprecation_message(name) + replacement_message(name) diff --git a/python/ql/src/Statements/ModificationOfLocals.ql b/python/ql/src/Statements/ModificationOfLocals.ql index e4791a410f7a..82529cbd6d0b 100644 --- a/python/ql/src/Statements/ModificationOfLocals.ql +++ b/python/ql/src/Statements/ModificationOfLocals.ql @@ -12,10 +12,10 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs -predicate originIsLocals(ControlFlowNodeWithPointsTo n) { - n.pointsTo(_, _, Value::named("locals").getACall()) +predicate originIsLocals(ControlFlowNode n) { + API::builtin("locals").getReturn().getAValueReachableFromSource().asCfgNode() = n } predicate modification_of_locals(ControlFlowNode f) { @@ -37,5 +37,5 @@ where // in module level scope `locals() == globals()` // see https://docs.python.org/3/library/functions.html#locals // FP report in https://github.com/github/codeql/issues/6674 - not a.getScope() instanceof ModuleScope + not a.getScope() instanceof Module select a, "Modification of the locals() dictionary will have no effect on the local variables." diff --git a/python/ql/src/Statements/SideEffectInAssert.ql b/python/ql/src/Statements/SideEffectInAssert.ql index 902b6c4c9c16..7ac96030c04e 100644 --- a/python/ql/src/Statements/SideEffectInAssert.ql +++ b/python/ql/src/Statements/SideEffectInAssert.ql @@ -13,7 +13,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs predicate func_with_side_effects(Expr e) { exists(string name | name = e.(Attribute).getName() or name = e.(Name).getId() | @@ -24,11 +24,11 @@ predicate func_with_side_effects(Expr e) { } predicate call_with_side_effect(Call e) { - e.getAFlowNode() = Value::named("subprocess.call").getACall() - or - e.getAFlowNode() = Value::named("subprocess.check_call").getACall() - or - e.getAFlowNode() = Value::named("subprocess.check_output").getACall() + e.getAFlowNode() = + API::moduleImport("subprocess") + .getMember(["call", "check_call", "check_output"]) + .getACall() + .asCfgNode() } predicate probable_side_effect(Expr e) { diff --git a/python/ql/src/Statements/UnnecessaryDelete.ql b/python/ql/src/Statements/UnnecessaryDelete.ql index c7b80ecc66ac..da239f814054 100644 --- a/python/ql/src/Statements/UnnecessaryDelete.ql +++ b/python/ql/src/Statements/UnnecessaryDelete.ql @@ -13,7 +13,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs predicate isInsideLoop(AstNode node) { node.getParentNode() instanceof While @@ -33,9 +33,9 @@ where not isInsideLoop(del) and // False positive: calling `sys.exc_info` within a function results in a // reference cycle, and an explicit call to `del` helps break this cycle. - not exists(FunctionValue ex | - ex = Value::named("sys.exc_info") and - ex.getACall().getScope() = f + not exists(API::CallNode call | + call = API::moduleImport("sys").getMember("exc_info").getACall() and + call.getScope() = f ) select del, "Unnecessary deletion of local variable $@ in function $@.", e, e.toString(), f, f.getName() diff --git a/python/ql/src/Statements/UnreachableCode.ql b/python/ql/src/Statements/UnreachableCode.ql index 55582ed2f061..200e073cff6b 100644 --- a/python/ql/src/Statements/UnreachableCode.ql +++ b/python/ql/src/Statements/UnreachableCode.ql @@ -13,7 +13,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs predicate typing_import(ImportingStmt is) { exists(Module m | @@ -34,11 +34,7 @@ predicate unique_yield(Stmt s) { /** Holds if `contextlib.suppress` may be used in the same scope as `s` */ predicate suppression_in_scope(Stmt s) { exists(With w | - w.getContextExpr() - .(Call) - .getFunc() - .(ExprWithPointsTo) - .pointsTo(Value::named("contextlib.suppress")) and + w.getContextExpr() = API::moduleImport("contextlib").getMember("suppress").getACall().asExpr() and w.getScope() = s.getScope() ) } diff --git a/python/ql/src/Statements/UseOfExit.ql b/python/ql/src/Statements/UseOfExit.ql index 437ff93b5371..2310a839f67b 100644 --- a/python/ql/src/Statements/UseOfExit.ql +++ b/python/ql/src/Statements/UseOfExit.ql @@ -12,10 +12,12 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs from CallNode call, string name -where call.getFunction().(ControlFlowNodeWithPointsTo).pointsTo(Value::siteQuitter(name)) +where + name = ["exit", "quit"] and + call = API::builtin(name).getACall().asCfgNode() select call, "The '" + name + "' site.Quitter object may not exist if the 'site' module is not loaded or is modified." diff --git a/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql b/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql index 87900c48fc58..18c832406675 100644 --- a/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql +++ b/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql @@ -12,7 +12,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs import Definition predicate is_increment(Stmt s) { @@ -41,23 +41,16 @@ predicate one_item_only(For f) { ) } -predicate points_to_call_to_range(ControlFlowNode f) { - /* (x)range is a function in Py2 and a class in Py3, so we must treat it as a plain object */ - exists(Value range | - range = Value::named("range") or - range = Value::named("xrange") - | - f = range.getACall() - ) +/** Holds if `node` is a call to `range`, `xrange`, or `list(range(...))`. */ +predicate call_to_range(DataFlow::Node node) { + node = API::builtin(["range", "xrange"]).getACall() or - /* In case points-to fails due to 'from six.moves import range' or similar. */ - exists(string range | f.getNode().(Call).getFunc().(Name).getId() = range | - range = "range" or range = "xrange" - ) + /* Handle 'from six.moves import range' or similar. */ + node = API::moduleImport("six").getMember("moves").getMember(["range", "xrange"]).getACall() or /* Handle list(range(...)) and list(list(range(...))) */ - f.(CallNode).(ControlFlowNodeWithPointsTo).pointsTo().getClass() = ClassValue::list() and - points_to_call_to_range(f.(CallNode).getArg(0)) + node = API::builtin("list").getACall() and + call_to_range(node.(DataFlow::CallCfgNode).getArg(0)) } /** Whether n is a use of a variable that is a not effectively a constant. */ @@ -102,8 +95,8 @@ from For f, Variable v, string msg where f.getTarget() = v.getAnAccess() and not f.getAStmt().contains(v.getAnAccess()) and - not points_to_call_to_range(f.getIter().getAFlowNode()) and - not points_to_call_to_range(get_comp_iterable(f)) and + not call_to_range(DataFlow::exprNode(f.getIter())) and + not call_to_range(DataFlow::exprNode(get_comp_iterable(f).getNode())) and not name_acceptable_for_unused_variable(v) and not f.getScope().getName() = "genexpr" and not empty_loop(f) and From 61b4980a94b89fe7b434922244041f4485ac084f Mon Sep 17 00:00:00 2001 From: Taus Date: Thu, 19 Feb 2026 15:06:47 +0000 Subject: [PATCH 02/30] Python: Port `py/print-during-import` Uses a (perhaps) slightly coarser approximation of what modules are imported, but it's probably fine. --- python/ql/src/Statements/TopLevelPrint.ql | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/python/ql/src/Statements/TopLevelPrint.ql b/python/ql/src/Statements/TopLevelPrint.ql index 12095f7a484d..1896d41908e3 100644 --- a/python/ql/src/Statements/TopLevelPrint.ql +++ b/python/ql/src/Statements/TopLevelPrint.ql @@ -12,7 +12,6 @@ */ import python -private import LegacyPointsTo predicate main_eq_name(If i) { exists(Name n, StringLiteral m, Compare c | @@ -32,10 +31,19 @@ predicate is_print_stmt(Stmt s) { ) } +/** + * Holds if module `m` is likely used as a module (imported by another module), + * as opposed to being exclusively used as a script. + */ +predicate is_used_as_module(Module m) { + m.isPackageInit() + or + exists(ImportingStmt i | i.getAnImportedModuleName() = m.getName()) +} + from Stmt p where is_print_stmt(p) and - // TODO: Need to discuss how we would like to handle ModuleObject.getKind in the glorious future - exists(ModuleValue m | m.getScope() = p.getScope() and m.isUsedAsModule()) and + is_used_as_module(p.getScope()) and not exists(If i | main_eq_name(i) and i.getASubStatement().getASubStatement*() = p) select p, "Print statement may execute during import." From ed6a6bcc83e86410e006c444fc19ceb39022187b Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 13:37:25 +0000 Subject: [PATCH 03/30] Python: Introduce `DuckTyping` module This module (which for convenience currently resides inside `DataFlowDispatch`, but this may change later) contains convenience predicates for bridging the gap between the data-flow layer and the old points-to analysis. --- .../new/internal/DataFlowDispatch.qll | 92 +++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index d4444c6795bf..0b6730fa8415 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -1956,3 +1956,95 @@ private module OutNodes { * `kind`. */ OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall(kind) } + +/** + * Provides predicates for approximating type properties of user-defined classes + * based on their structure (method declarations, base classes). + * + * This module should _not_ be used in the call graph computation itself, as parts of it may depend + * on layers that themselves build upon the call graph (e.g. API graphs). + */ +module DuckTyping { + private import semmle.python.ApiGraphs + + /** + * Holds if `cls` or any of its resolved superclasses declares a method with the given `name`. + */ + predicate hasMethod(Class cls, string name) { + cls.getAMethod().getName() = name + or + hasMethod(getADirectSuperclass(cls), name) + } + + /** + * Holds if `cls` has a base class that cannot be resolved to a user-defined class + * and is not just `object`, meaning it may inherit methods from an unknown class. + */ + predicate hasUnresolvedBase(Class cls) { + exists(Expr base | base = cls.getABase() | + not base = classTracker(_).asExpr() and + not base = API::builtin("object").getAValueReachableFromSource().asExpr() + ) + } + + /** + * Holds if `cls` supports the container protocol, i.e. it declares + * `__contains__`, `__iter__`, or `__getitem__`. + */ + predicate isContainer(Class cls) { + hasMethod(cls, "__contains__") or + hasMethod(cls, "__iter__") or + hasMethod(cls, "__getitem__") + } + + /** + * Holds if `cls` supports the iterable protocol, i.e. it declares + * `__iter__` or `__getitem__`. + */ + predicate isIterable(Class cls) { + hasMethod(cls, "__iter__") or + hasMethod(cls, "__getitem__") + } + + /** + * Holds if `cls` supports the iterator protocol, i.e. it declares + * both `__iter__` and `__next__`. + */ + predicate isIterator(Class cls) { + hasMethod(cls, "__iter__") and + hasMethod(cls, "__next__") + } + + /** + * Holds if `cls` supports the context manager protocol, i.e. it declares + * both `__enter__` and `__exit__`. + */ + predicate isContextManager(Class cls) { + hasMethod(cls, "__enter__") and + hasMethod(cls, "__exit__") + } + + /** + * Holds if `cls` supports the descriptor protocol, i.e. it declares + * `__get__`, `__set__`, or `__delete__`. + */ + predicate isDescriptor(Class cls) { + hasMethod(cls, "__get__") or + hasMethod(cls, "__set__") or + hasMethod(cls, "__delete__") + } + + /** + * Holds if `cls` is callable, i.e. it declares `__call__`. + */ + predicate isCallable(Class cls) { hasMethod(cls, "__call__") } + + /** + * Holds if `cls` supports the mapping protocol, i.e. it declares + * `__getitem__` and `__keys__`, or `__getitem__` and `__iter__`. + */ + predicate isMapping(Class cls) { + hasMethod(cls, "__getitem__") and + (hasMethod(cls, "keys") or hasMethod(cls, "__iter__")) + } +} From be61d3d41122b839a945a79c6ab3a3399285eff4 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 14:52:14 +0000 Subject: [PATCH 04/30] Python: Port ContainsNonContainer.ql Uses the new `DuckTyping` module to handle recognising whether a class is a container or not. Only trivial test changes (one version uses "class", the other "Class"). Note that the ported query has no understanding of built-in classes. At some point we'll likely want to replace `hasUnresolvedBase` (which will hold for any class that extends a built-in) with something that's aware of the built-in classes. --- .../src/Expressions/ContainsNonContainer.ql | 24 ++++++++----------- .../general/ContainsNonContainer.expected | 4 ++-- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/python/ql/src/Expressions/ContainsNonContainer.ql b/python/ql/src/Expressions/ContainsNonContainer.ql index de8c38795675..a1b989020586 100644 --- a/python/ql/src/Expressions/ContainsNonContainer.ql +++ b/python/ql/src/Expressions/ContainsNonContainer.ql @@ -12,25 +12,21 @@ */ import python -private import LegacyPointsTo +import semmle.python.dataflow.new.DataFlow +private import semmle.python.dataflow.new.internal.DataFlowDispatch -predicate rhs_in_expr(ControlFlowNode rhs, Compare cmp) { - exists(Cmpop op, int i | cmp.getOp(i) = op and cmp.getComparator(i) = rhs.getNode() | +predicate rhs_in_expr(Expr rhs, Compare cmp) { + exists(Cmpop op, int i | cmp.getOp(i) = op and cmp.getComparator(i) = rhs | op instanceof In or op instanceof NotIn ) } -from - ControlFlowNodeWithPointsTo non_seq, Compare cmp, Value v, ClassValue cls, ControlFlowNode origin +from Compare cmp, DataFlow::LocalSourceNode origin, DataFlow::Node rhs, Class cls where - rhs_in_expr(non_seq, cmp) and - non_seq.pointsTo(_, v, origin) and - v.getClass() = cls and - not Types::failedInference(cls, _) and - not cls.hasAttribute("__contains__") and - not cls.hasAttribute("__iter__") and - not cls.hasAttribute("__getitem__") and - not cls = ClassValue::nonetype() and - not cls = Value::named("types.MappingProxyType") + origin = classInstanceTracker(cls) and + origin.flowsTo(rhs) and + not DuckTyping::isContainer(cls) and + not DuckTyping::hasUnresolvedBase(getADirectSuperclass*(cls)) and + rhs_in_expr(rhs.asExpr(), cmp) select cmp, "This test may raise an Exception as the $@ may be of non-container class $@.", origin, "target", cls, cls.getName() diff --git a/python/ql/test/query-tests/Expressions/general/ContainsNonContainer.expected b/python/ql/test/query-tests/Expressions/general/ContainsNonContainer.expected index cf6d78d0d36b..132852c73f1c 100644 --- a/python/ql/test/query-tests/Expressions/general/ContainsNonContainer.expected +++ b/python/ql/test/query-tests/Expressions/general/ContainsNonContainer.expected @@ -1,2 +1,2 @@ -| expressions_test.py:89:8:89:15 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | class XIter | XIter | -| expressions_test.py:91:8:91:19 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | class XIter | XIter | +| expressions_test.py:89:8:89:15 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | Class XIter | XIter | +| expressions_test.py:91:8:91:19 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | Class XIter | XIter | From 0acad039e25d65915b4266d9d7de6d41e9e39c5d Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 14:53:39 +0000 Subject: [PATCH 05/30] Python: Port NonIteratorInForLoop.ql Same comment as for the preceding commit. We lose one test result due to the fact that we don't know what to do about `for ... in 1` (because `1` is an instance of a built-in). I'm going to defer addressing this until we get some modelling of built-in types. --- .../ql/src/Statements/NonIteratorInForLoop.ql | 21 +++++++++---------- .../iter/NonIteratorInForLoop.expected | 3 +-- .../general/NonIteratorInForLoop.expected | 2 +- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/python/ql/src/Statements/NonIteratorInForLoop.ql b/python/ql/src/Statements/NonIteratorInForLoop.ql index f8e6e51b55ff..44dfa967bff9 100644 --- a/python/ql/src/Statements/NonIteratorInForLoop.ql +++ b/python/ql/src/Statements/NonIteratorInForLoop.ql @@ -12,16 +12,15 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch -from For loop, ControlFlowNodeWithPointsTo iter, Value v, ClassValue t, ControlFlowNode origin +from For loop, Expr iter, Class cls where - loop.getIter().getAFlowNode() = iter and - iter.pointsTo(_, v, origin) and - v.getClass() = t and - not t.isIterable() and - not t.failedInference(_) and - not v = Value::named("None") and - not t.isDescriptorType() -select loop, "This for-loop may attempt to iterate over a $@ of class $@.", origin, - "non-iterable instance", t, t.getName() + iter = loop.getIter() and + classInstanceTracker(cls).asExpr() = iter and + not DuckTyping::isIterable(cls) and + not DuckTyping::isDescriptor(cls) and + not (loop.isAsync() and DuckTyping::hasMethod(cls, "__aiter__")) and + not DuckTyping::hasUnresolvedBase(getADirectSuperclass*(cls)) +select loop, "This for-loop may attempt to iterate over a $@ of class $@.", iter, + "non-iterable instance", cls, cls.getName() diff --git a/python/ql/test/3/query-tests/Statements/iter/NonIteratorInForLoop.expected b/python/ql/test/3/query-tests/Statements/iter/NonIteratorInForLoop.expected index c59db3b2b657..396382d0b9f9 100644 --- a/python/ql/test/3/query-tests/Statements/iter/NonIteratorInForLoop.expected +++ b/python/ql/test/3/query-tests/Statements/iter/NonIteratorInForLoop.expected @@ -1,2 +1 @@ -| async_iterator.py:26:11:26:34 | For | This for-loop may attempt to iterate over a $@ of class $@. | async_iterator.py:26:20:26:33 | ControlFlowNode for MissingAiter() | non-iterable instance | async_iterator.py:13:1:13:19 | class MissingAiter | MissingAiter | -| statements_test.py:34:5:34:19 | For | This for-loop may attempt to iterate over a $@ of class $@. | statements_test.py:34:18:34:18 | ControlFlowNode for IntegerLiteral | non-iterable instance | file://:0:0:0:0 | builtin-class int | int | +| async_iterator.py:26:11:26:34 | For | This for-loop may attempt to iterate over a $@ of class $@. | async_iterator.py:26:20:26:33 | MissingAiter() | non-iterable instance | async_iterator.py:13:1:13:19 | Class MissingAiter | MissingAiter | diff --git a/python/ql/test/query-tests/Statements/general/NonIteratorInForLoop.expected b/python/ql/test/query-tests/Statements/general/NonIteratorInForLoop.expected index 4c79685061f5..d11b87191ba6 100644 --- a/python/ql/test/query-tests/Statements/general/NonIteratorInForLoop.expected +++ b/python/ql/test/query-tests/Statements/general/NonIteratorInForLoop.expected @@ -1 +1 @@ -| test.py:50:1:50:23 | For | This for-loop may attempt to iterate over a $@ of class $@. | test.py:50:10:50:22 | ControlFlowNode for NonIterator() | non-iterable instance | test.py:45:1:45:26 | class NonIterator | NonIterator | +| test.py:50:1:50:23 | For | This for-loop may attempt to iterate over a $@ of class $@. | test.py:50:10:50:22 | NonIterator() | non-iterable instance | test.py:45:1:45:26 | Class NonIterator | NonIterator | From da164543f774f94c6c25408502e1285b65203186 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 14:54:19 +0000 Subject: [PATCH 06/30] Python: Port ShouldUseWithStatement.ql Only trivial test changes. --- python/ql/src/Statements/ShouldUseWithStatement.ql | 14 ++++---------- .../general/ShouldUseWithStatement.expected | 2 +- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/python/ql/src/Statements/ShouldUseWithStatement.ql b/python/ql/src/Statements/ShouldUseWithStatement.ql index eb5cf9237d57..20bf053f6daa 100644 --- a/python/ql/src/Statements/ShouldUseWithStatement.ql +++ b/python/ql/src/Statements/ShouldUseWithStatement.ql @@ -13,7 +13,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch predicate calls_close(Call c) { exists(Attribute a | c.getFunc() = a and a.getName() = "close") } @@ -23,18 +23,12 @@ predicate only_stmt_in_finally(Try t, Call c) { ) } -predicate points_to_context_manager(ControlFlowNodeWithPointsTo f, ClassValue cls) { - forex(Value v | f.pointsTo(v) | v.getClass() = cls) and - cls.isContextManager() -} - -from Call close, Try t, ClassValue cls +from Call close, Try t, Class cls where only_stmt_in_finally(t, close) and calls_close(close) and - exists(ControlFlowNode f | f = close.getFunc().getAFlowNode().(AttrNode).getObject() | - points_to_context_manager(f, cls) - ) + classInstanceTracker(cls).asExpr() = close.getFunc().(Attribute).getObject() and + DuckTyping::isContextManager(cls) select close, "Instance of context-manager class $@ is closed in a finally block. Consider using 'with' statement.", cls, cls.getName() diff --git a/python/ql/test/query-tests/Statements/general/ShouldUseWithStatement.expected b/python/ql/test/query-tests/Statements/general/ShouldUseWithStatement.expected index d062717bbf25..50ff6cc1f914 100644 --- a/python/ql/test/query-tests/Statements/general/ShouldUseWithStatement.expected +++ b/python/ql/test/query-tests/Statements/general/ShouldUseWithStatement.expected @@ -1 +1 @@ -| test.py:168:9:168:17 | Attribute() | Instance of context-manager class $@ is closed in a finally block. Consider using 'with' statement. | test.py:151:1:151:17 | class CM | CM | +| test.py:168:9:168:17 | Attribute() | Instance of context-manager class $@ is closed in a finally block. Consider using 'with' statement. | test.py:151:1:151:17 | Class CM | CM | From 49289470afeeeb9da761b73590badc0e1d568dae Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 15:03:15 +0000 Subject: [PATCH 07/30] Python: Port UnusedExceptionObject.ql Depending on whether other queries depend on this, we may end up moving the exception utility functions to a more central location. --- .../src/Statements/UnusedExceptionObject.ql | 46 +++++++++++++++++-- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/python/ql/src/Statements/UnusedExceptionObject.ql b/python/ql/src/Statements/UnusedExceptionObject.ql index 9a6a3650b7e6..890cdc963aca 100644 --- a/python/ql/src/Statements/UnusedExceptionObject.ql +++ b/python/ql/src/Statements/UnusedExceptionObject.ql @@ -12,11 +12,49 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch +private import semmle.python.dataflow.new.internal.Builtins +private import semmle.python.ApiGraphs -from Call call, ClassValue ex +/** + * Holds if `cls` is a user-defined exception class, i.e. it transitively + * extends one of the builtin exception base classes. + */ +predicate isUserDefinedExceptionClass(Class cls) { + cls.getABase() = + API::builtin(["BaseException", "Exception"]).getAValueReachableFromSource().asExpr() + or + isUserDefinedExceptionClass(getADirectSuperclass(cls)) +} + +/** + * Gets the name of a builtin exception class. + */ +string getBuiltinExceptionName() { + result = Builtins::getBuiltinName() and + ( + result.matches("%Error") or + result.matches("%Exception") or + result.matches("%Warning") or + result = + ["GeneratorExit", "KeyboardInterrupt", "StopIteration", "StopAsyncIteration", "SystemExit"] + ) +} + +/** + * Holds if `call` is an instantiation of an exception class. + */ +predicate isExceptionInstantiation(Call call) { + exists(Class cls | + classTracker(cls).asExpr() = call.getFunc() and + isUserDefinedExceptionClass(cls) + ) + or + call.getFunc() = API::builtin(getBuiltinExceptionName()).getAValueReachableFromSource().asExpr() +} + +from Call call where - call.getFunc().(ExprWithPointsTo).pointsTo(ex) and - ex.getASuperType() = ClassValue::exception() and + isExceptionInstantiation(call) and exists(ExprStmt s | s.getValue() = call) select call, "Instantiating an exception, but not raising it, has no effect." From 70a67aab53184507fb1cb7ef1eaf9ab583df378a Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 15:19:18 +0000 Subject: [PATCH 08/30] Python: Add `DuckTyping::isNewStyle` Approximates the behaviour of `Types::isNewStyle` but without depending on points-to --- .../new/internal/DataFlowDispatch.qll | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index 0b6730fa8415..67af03fc07df 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -2047,4 +2047,30 @@ module DuckTyping { hasMethod(cls, "__getitem__") and (hasMethod(cls, "keys") or hasMethod(cls, "__iter__")) } + + /** + * Holds if `cls` is a new-style class. In Python 3, all classes are new-style. + * In Python 2, a class is new-style if it (transitively) inherits from `object`, + * or has a declared `__metaclass__`, or has an unresolved base class. + */ + predicate isNewStyle(Class cls) { + major_version() = 3 + or + major_version() = 2 and + ( + cls.getABase() = API::builtin("object").getAValueReachableFromSource().asExpr() + or + isNewStyle(getADirectSuperclass(cls)) + or + hasUnresolvedBase(cls) + or + exists(cls.getMetaClass()) + or + // Module-level __metaclass__ = type makes all classes in the module new-style + exists(Assign a | + a.getScope() = cls.getEnclosingModule() and + a.getATarget().(Name).getId() = "__metaclass__" + ) + ) + } } From 815a7adf846cdb5c9c7a274662110491e9c34778 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 15:56:14 +0000 Subject: [PATCH 09/30] Python: Add declares/getAttribute API These could arguably be moved to `Class` itself, but for now I'm choosing to limit the changes to the `DuckTyping` module (until we decide on a proper API). --- .../dataflow/new/internal/DataFlowDispatch.qll | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index 67af03fc07df..533a565e2eaa 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -2034,6 +2034,23 @@ module DuckTyping { hasMethod(cls, "__delete__") } + /** + * Holds if `cls` directly assigns to an attribute named `name` in its class body. + * This covers attribute assignments like `x = value`, but not method definitions. + */ + predicate declaresAttribute(Class cls, string name) { exists(getAnAttributeValue(cls, name)) } + + /** + * Gets the value expression assigned to attribute `name` directly in the class body of `cls`. + */ + Expr getAnAttributeValue(Class cls, string name) { + exists(Assign a | + a.getScope() = cls and + a.getATarget().(Name).getId() = name and + result = a.getValue() + ) + } + /** * Holds if `cls` is callable, i.e. it declares `__call__`. */ From 2af8b2929cd40dd628b028e30c7ee9e5001068f9 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 15:25:57 +0000 Subject: [PATCH 10/30] Python: Port SlotsInOldStyleClass.ql Only trivial test changes. --- python/ql/src/Classes/SlotsInOldStyleClass.ql | 8 +++++--- .../Classes/new-style/SlotsInOldStyleClass.expected | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/python/ql/src/Classes/SlotsInOldStyleClass.ql b/python/ql/src/Classes/SlotsInOldStyleClass.ql index 2f91a88cf64e..70a7997113e6 100644 --- a/python/ql/src/Classes/SlotsInOldStyleClass.ql +++ b/python/ql/src/Classes/SlotsInOldStyleClass.ql @@ -12,9 +12,11 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch -from ClassObject c -where not c.isNewStyle() and c.declaresAttribute("__slots__") and not c.failedInference() +from Class c +where + not DuckTyping::isNewStyle(c) and + DuckTyping::declaresAttribute(c, "__slots__") select c, "Using '__slots__' in an old style class just creates a class attribute called '__slots__'." diff --git a/python/ql/test/2/query-tests/Classes/new-style/SlotsInOldStyleClass.expected b/python/ql/test/2/query-tests/Classes/new-style/SlotsInOldStyleClass.expected index ccad85bd3846..14d30913b93a 100644 --- a/python/ql/test/2/query-tests/Classes/new-style/SlotsInOldStyleClass.expected +++ b/python/ql/test/2/query-tests/Classes/new-style/SlotsInOldStyleClass.expected @@ -1 +1 @@ -| newstyle_test.py:4:1:4:16 | class OldStyle1 | Using '__slots__' in an old style class just creates a class attribute called '__slots__'. | +| newstyle_test.py:4:1:4:16 | Class OldStyle1 | Using '__slots__' in an old style class just creates a class attribute called '__slots__'. | From 671571db4ceeadeeb96f196ccf6e27d0011942eb Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 15:26:27 +0000 Subject: [PATCH 11/30] Python: Port SuperInOldStyleClass.ql --- python/ql/src/Classes/SuperInOldStyleClass.ql | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/python/ql/src/Classes/SuperInOldStyleClass.ql b/python/ql/src/Classes/SuperInOldStyleClass.ql index a6a272b1b3b7..bc6541052de0 100644 --- a/python/ql/src/Classes/SuperInOldStyleClass.ql +++ b/python/ql/src/Classes/SuperInOldStyleClass.ql @@ -11,14 +11,13 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch predicate uses_of_super_in_old_style_class(Call s) { - exists(Function f, ClassObject c | + exists(Function f, Class c | s.getScope() = f and - f.getScope() = c.getPyClass() and - not c.failedInference() and - not c.isNewStyle() and + f.getScope() = c and + not DuckTyping::isNewStyle(c) and s.getFunc().(Name).getId() = "super" ) } From ecd8fe60579014af77d70ce1d153e2cc953bbe6d Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 15:27:32 +0000 Subject: [PATCH 12/30] Python: Port PropertyInOldStyleClass.ql Only trivial test changes. --- python/ql/src/Classes/PropertyInOldStyleClass.ql | 9 ++++++--- .../Classes/new-style/PropertyInOldStyleClass.expected | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/python/ql/src/Classes/PropertyInOldStyleClass.ql b/python/ql/src/Classes/PropertyInOldStyleClass.ql index 2fd7b1d14cf2..80a5ef06bbee 100644 --- a/python/ql/src/Classes/PropertyInOldStyleClass.ql +++ b/python/ql/src/Classes/PropertyInOldStyleClass.ql @@ -11,10 +11,13 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch -from PropertyObject prop, ClassObject cls -where cls.declaredAttribute(_) = prop and not cls.failedInference() and not cls.isNewStyle() +from Function prop, Class cls +where + prop.getScope() = cls and + prop.getADecorator().(Name).getId() = "property" and + not DuckTyping::isNewStyle(cls) select prop, "Property " + prop.getName() + " will not work properly, as class " + cls.getName() + " is an old-style class." diff --git a/python/ql/test/2/query-tests/Classes/new-style/PropertyInOldStyleClass.expected b/python/ql/test/2/query-tests/Classes/new-style/PropertyInOldStyleClass.expected index 9fb3e582cb7b..1e5be51fbaff 100644 --- a/python/ql/test/2/query-tests/Classes/new-style/PropertyInOldStyleClass.expected +++ b/python/ql/test/2/query-tests/Classes/new-style/PropertyInOldStyleClass.expected @@ -1 +1 @@ -| property_old_style.py:8:6:8:13 | Property piosc | Property piosc will not work properly, as class OldStyle is an old-style class. | +| property_old_style.py:9:5:9:20 | Function piosc | Property piosc will not work properly, as class OldStyle is an old-style class. | From c30a25fb58fa84f3fd934de490099c2e2f5f0941 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 15:29:48 +0000 Subject: [PATCH 13/30] Python: Port InconsistentMRO.ql For this one we actually lose a test result. However, this is kind of to be expected since we no longer have the "precise" MRO that the points-to analysis computes. Honestly, I'm on the fence about even keeping this query at all. It seems like it might be superfluous in a world with good Python type checking. --- python/ql/src/Classes/InconsistentMRO.ql | 19 ++++++++++++------- .../inconsistent-mro/InconsistentMRO.expected | 2 +- .../inconsistent-mro/InconsistentMRO.expected | 3 +-- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/python/ql/src/Classes/InconsistentMRO.ql b/python/ql/src/Classes/InconsistentMRO.ql index aa319d56114e..73f6bf9240b8 100644 --- a/python/ql/src/Classes/InconsistentMRO.ql +++ b/python/ql/src/Classes/InconsistentMRO.ql @@ -12,19 +12,24 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch -ClassObject left_base(ClassObject type, ClassObject base) { - exists(int i | i > 0 and type.getBaseType(i) = base and result = type.getBaseType(i - 1)) +/** + * Gets the `i`th base class of `cls`, if it can be resolved to a user-defined class. + */ +Class getBaseType(Class cls, int i) { cls.getBase(i) = classTracker(result).asExpr() } + +Class left_base(Class type, Class base) { + exists(int i | i > 0 and getBaseType(type, i) = base and result = getBaseType(type, i - 1)) } -predicate invalid_mro(ClassObject t, ClassObject left, ClassObject right) { - t.isNewStyle() and +predicate invalid_mro(Class t, Class left, Class right) { + DuckTyping::isNewStyle(t) and left = left_base(t, right) and - left = right.getAnImproperSuperType() + left = getADirectSuperclass*(right) } -from ClassObject t, ClassObject left, ClassObject right +from Class t, Class left, Class right where invalid_mro(t, left, right) select t, "Construction of class " + t.getName() + diff --git a/python/ql/test/2/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected b/python/ql/test/2/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected index ebec9bb61e0b..93d12fc0a454 100644 --- a/python/ql/test/2/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected +++ b/python/ql/test/2/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected @@ -1 +1 @@ -| inconsistent_mro.py:9:1:9:14 | class Z | Construction of class Z can fail due to invalid method resolution order(MRO) for bases $@ and $@. | inconsistent_mro.py:3:1:3:16 | class X | X | inconsistent_mro.py:6:1:6:11 | class Y | Y | +| inconsistent_mro.py:9:1:9:14 | Class Z | Construction of class Z can fail due to invalid method resolution order(MRO) for bases $@ and $@. | inconsistent_mro.py:3:1:3:16 | Class X | X | inconsistent_mro.py:6:1:6:11 | Class Y | Y | diff --git a/python/ql/test/3/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected b/python/ql/test/3/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected index ca0a64f70e1b..93d12fc0a454 100644 --- a/python/ql/test/3/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected +++ b/python/ql/test/3/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected @@ -1,2 +1 @@ -| inconsistent_mro.py:9:1:9:14 | class Z | Construction of class Z can fail due to invalid method resolution order(MRO) for bases $@ and $@. | inconsistent_mro.py:3:1:3:16 | class X | X | inconsistent_mro.py:6:1:6:11 | class Y | Y | -| inconsistent_mro.py:16:1:16:19 | class N | Construction of class N can fail due to invalid method resolution order(MRO) for bases $@ and $@. | file://:Compiled Code:0:0:0:0 | builtin-class object | object | inconsistent_mro.py:12:1:12:8 | class O | O | +| inconsistent_mro.py:9:1:9:14 | Class Z | Construction of class Z can fail due to invalid method resolution order(MRO) for bases $@ and $@. | inconsistent_mro.py:3:1:3:16 | Class X | X | inconsistent_mro.py:6:1:6:11 | Class Y | Y | From 933af14f12afce49c129aacbd228fd0192132008 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 16:33:25 +0000 Subject: [PATCH 14/30] Python: Port HashedButNoHash.ql This one is a bit more involved. Of note is the fact that it at present only uses local flow when determining the origin of some value (whereas the points-to version used global flow). It may be desirable to rewrite this query to use global data-flow, but this should be done with some care (as using "all unhashable objects" as the set of sources is somewhat iffy with respect to performance). For that reason, I'm sticking to mostly local flow (except for well behaved things like types and built-ins). --- python/ql/src/Expressions/HashedButNoHash.ql | 115 +++++++++++------- .../general/HashedButNoHash.expected | 2 +- 2 files changed, 69 insertions(+), 48 deletions(-) diff --git a/python/ql/src/Expressions/HashedButNoHash.ql b/python/ql/src/Expressions/HashedButNoHash.ql index 705806bf3605..93edc7c9c6cc 100644 --- a/python/ql/src/Expressions/HashedButNoHash.ql +++ b/python/ql/src/Expressions/HashedButNoHash.ql @@ -12,76 +12,97 @@ */ import python -private import LegacyPointsTo +import semmle.python.dataflow.new.DataFlow +private import semmle.python.dataflow.new.internal.DataFlowDispatch +private import semmle.python.ApiGraphs -/* - * This assumes that any indexing operation where the value is not a sequence or numpy array involves hashing. - * For sequences, the index must be an int, which are hashable, so we don't need to treat them specially. - * For numpy arrays, the index may be a list, which are not hashable and needs to be treated specially. +/** + * Holds if `cls` explicitly sets `__hash__` to `None`, making instances unhashable. */ - -predicate numpy_array_type(ClassValue na) { - exists(ModuleValue np | np.getName() = "numpy" or np.getName() = "numpy.core" | - na.getASuperType() = np.attr("ndarray") - ) +predicate setsHashToNone(Class cls) { + DuckTyping::getAnAttributeValue(cls, "__hash__") instanceof None } -predicate has_custom_getitem(Value v) { - v.getClass().lookup("__getitem__") instanceof PythonFunctionValue +/** + * Holds if `cls` is a user-defined class whose instances are unhashable. + * A new-style class without `__hash__` is unhashable, as is one that explicitly + * sets `__hash__ = None`. + */ +predicate isUnhashableUserClass(Class cls) { + DuckTyping::isNewStyle(cls) and + not DuckTyping::hasMethod(cls, "__hash__") and + not DuckTyping::hasUnresolvedBase(getADirectSuperclass*(cls)) or - numpy_array_type(v.getClass()) + setsHashToNone(cls) } -predicate explicitly_hashed(ControlFlowNode f) { - exists(CallNode c, GlobalVariable hash | - c.getArg(0) = f and c.getFunction().(NameNode).uses(hash) and hash.getId() = "hash" +/** + * Gets the name of a builtin type whose instances are unhashable. + */ +string getUnhashableBuiltinName() { result = ["list", "set", "dict", "bytearray"] } + +/** + * Holds if `origin` is a local source node tracking an unhashable instance that + * flows to `node`, with `clsName` describing the class for the alert. + */ +predicate isUnhashable(DataFlow::LocalSourceNode origin, DataFlow::Node node, string clsName) { + exists(Class c | + isUnhashableUserClass(c) and + origin = classInstanceTracker(c) and + origin.flowsTo(node) and + clsName = c.getName() ) + or + clsName = getUnhashableBuiltinName() and + origin = API::builtin(clsName).getAnInstance().asSource() and + origin.flowsTo(node) +} + +predicate explicitly_hashed(DataFlow::Node node) { + node = API::builtin("hash").getACall().getArg(0) } -predicate unhashable_subscript(ControlFlowNode f, ClassValue c, ControlFlowNode origin) { - is_unhashable(f, c, origin) and - exists(SubscriptNode sub | sub.getIndex() = f | - exists(Value custom_getitem | - sub.getObject().(ControlFlowNodeWithPointsTo).pointsTo(custom_getitem) and - not has_custom_getitem(custom_getitem) - ) +/** + * Holds if the subscript object in `sub[...]` is known to use hashing for indexing, + * i.e. it does not have a custom `__getitem__` that could accept unhashable indices. + */ +predicate subscriptUsesHashing(Subscript sub) { + DataFlow::exprNode(sub.getObject()) = + API::builtin("dict").getAnInstance().getAValueReachableFromSource() + or + exists(Class cls | + classInstanceTracker(cls) + .(DataFlow::LocalSourceNode) + .flowsTo(DataFlow::exprNode(sub.getObject())) and + not DuckTyping::hasMethod(cls, "__getitem__") ) } -predicate is_unhashable(ControlFlowNodeWithPointsTo f, ClassValue cls, ControlFlowNode origin) { - exists(Value v | f.pointsTo(v, origin) and v.getClass() = cls | - not cls.hasAttribute("__hash__") and not cls.failedInference(_) and cls.isNewStyle() - or - cls.lookup("__hash__") = Value::named("None") +predicate unhashable_subscript(DataFlow::LocalSourceNode origin, DataFlow::Node node, string clsName) { + exists(Subscript sub | + node = DataFlow::exprNode(sub.getIndex()) and + subscriptUsesHashing(sub) + | + isUnhashable(origin, node, clsName) ) } /** - * Holds if `f` is inside a `try` that catches `TypeError`. For example: - * - * try: - * ... f ... - * except TypeError: - * ... - * - * This predicate is used to eliminate false positive results. If `hash` - * is called on an unhashable object then a `TypeError` will be thrown. - * But this is not a bug if the code catches the `TypeError` and handles - * it. + * Holds if `e` is inside a `try` that catches `TypeError`. */ -predicate typeerror_is_caught(ControlFlowNode f) { +predicate typeerror_is_caught(Expr e) { exists(Try try | - try.getBody().contains(f.getNode()) and - try.getAHandler().getType().(ExprWithPointsTo).pointsTo(ClassValue::typeError()) + try.getBody().contains(e) and + try.getAHandler().getType() = API::builtin("TypeError").getAValueReachableFromSource().asExpr() ) } -from ControlFlowNode f, ClassValue c, ControlFlowNode origin +from DataFlow::LocalSourceNode origin, DataFlow::Node node, string clsName where - not typeerror_is_caught(f) and + not typeerror_is_caught(node.asExpr()) and ( - explicitly_hashed(f) and is_unhashable(f, c, origin) + explicitly_hashed(node) and isUnhashable(origin, node, clsName) or - unhashable_subscript(f, c, origin) + unhashable_subscript(origin, node, clsName) ) -select f.getNode(), "This $@ of $@ is unhashable.", origin, "instance", c, c.getQualifiedName() +select node, "This $@ of $@ is unhashable.", origin, "instance", origin, clsName diff --git a/python/ql/test/query-tests/Expressions/general/HashedButNoHash.expected b/python/ql/test/query-tests/Expressions/general/HashedButNoHash.expected index 9589547056f7..536a7281d2d6 100644 --- a/python/ql/test/query-tests/Expressions/general/HashedButNoHash.expected +++ b/python/ql/test/query-tests/Expressions/general/HashedButNoHash.expected @@ -1 +1 @@ -| expressions_test.py:42:20:42:25 | unhash | This $@ of $@ is unhashable. | expressions_test.py:41:32:41:37 | ControlFlowNode for list() | instance | file://:0:0:0:0 | builtin-class list | list | +| expressions_test.py:42:20:42:25 | ControlFlowNode for unhash | This $@ of $@ is unhashable. | expressions_test.py:41:32:41:37 | ControlFlowNode for list() | instance | expressions_test.py:41:32:41:37 | ControlFlowNode for list() | list | From 38347633e1630d614466fae6c005191e0b299b65 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 16:54:34 +0000 Subject: [PATCH 15/30] Python: Port UselessClass.ql No test changes. --- python/ql/src/Classes/UselessClass.ql | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/python/ql/src/Classes/UselessClass.ql b/python/ql/src/Classes/UselessClass.ql index 740c74bf96d9..229e42fd292f 100644 --- a/python/ql/src/Classes/UselessClass.ql +++ b/python/ql/src/Classes/UselessClass.ql @@ -13,7 +13,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch predicate fewer_than_two_public_methods(Class cls, int methods) { (methods = 0 or methods = 1) and @@ -25,13 +25,8 @@ predicate does_not_define_special_method(Class cls) { } predicate no_inheritance(Class c) { - not exists(ClassValue cls, ClassValue other | - cls.getScope() = c and - other != ClassValue::object() - | - other.getABaseType() = cls or - cls.getABaseType() = other - ) and + not exists(getADirectSubclass(c)) and + not exists(getADirectSuperclass(c)) and not exists(Expr base | base = c.getABase() | not base instanceof Name or base.(Name).getId() != "object" ) From 096bcd5b4182c0d3bfc7dfdf34e8414ad2b2e28c Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 16:54:49 +0000 Subject: [PATCH 16/30] Python: Port ShouldBeContextManager.ql Only trivial test changes. --- python/ql/src/Classes/ShouldBeContextManager.ql | 8 +++++--- .../ShouldBeContextManager.expected | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/python/ql/src/Classes/ShouldBeContextManager.ql b/python/ql/src/Classes/ShouldBeContextManager.ql index 6aec0f0e0ab0..9a50d841a740 100644 --- a/python/ql/src/Classes/ShouldBeContextManager.ql +++ b/python/ql/src/Classes/ShouldBeContextManager.ql @@ -14,10 +14,12 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch -from ClassValue c -where not c.isBuiltin() and not c.isContextManager() and exists(c.declaredAttribute("__del__")) +from Class c +where + not DuckTyping::isContextManager(c) and + DuckTyping::hasMethod(c, "__del__") select c, "Class " + c.getName() + " implements __del__ (presumably to release some resource). Consider making it a context manager." diff --git a/python/ql/test/query-tests/Classes/should-be-context-manager/ShouldBeContextManager.expected b/python/ql/test/query-tests/Classes/should-be-context-manager/ShouldBeContextManager.expected index 47c773804ae8..3cd6d92ff641 100644 --- a/python/ql/test/query-tests/Classes/should-be-context-manager/ShouldBeContextManager.expected +++ b/python/ql/test/query-tests/Classes/should-be-context-manager/ShouldBeContextManager.expected @@ -1,2 +1,2 @@ -| should_be_context_manager.py:3:1:3:22 | class MegaDel | Class MegaDel implements __del__ (presumably to release some resource). Consider making it a context manager. | -| should_be_context_manager.py:16:1:16:22 | class MiniDel | Class MiniDel implements __del__ (presumably to release some resource). Consider making it a context manager. | +| should_be_context_manager.py:3:1:3:22 | Class MegaDel | Class MegaDel implements __del__ (presumably to release some resource). Consider making it a context manager. | +| should_be_context_manager.py:16:1:16:22 | Class MiniDel | Class MiniDel implements __del__ (presumably to release some resource). Consider making it a context manager. | From 849da2859564b9aad36b064572dbc25585e5a2fe Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 23 Feb 2026 13:36:31 +0000 Subject: [PATCH 17/30] Python: Port WrongNameForArgumentInClassInstantiation.ql --- .../new/internal/DataFlowDispatch.qll | 6 ++++ ...rongNameForArgumentInClassInstantiation.ql | 30 ++++++++++++++++--- ...meForArgumentInClassInstantiation.expected | 8 ++--- 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index 533a565e2eaa..a85256d303f9 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -2090,4 +2090,10 @@ module DuckTyping { ) ) } + + /** + * Gets the `__init__` function that will be invoked when `cls` is constructed, + * resolved according to the MRO. + */ + Function getInit(Class cls) { result = invokedFunctionFromClassConstruction(cls, "__init__") } } diff --git a/python/ql/src/Classes/WrongNameForArgumentInClassInstantiation.ql b/python/ql/src/Classes/WrongNameForArgumentInClassInstantiation.ql index 5b50855dcdf7..984114111f50 100644 --- a/python/ql/src/Classes/WrongNameForArgumentInClassInstantiation.ql +++ b/python/ql/src/Classes/WrongNameForArgumentInClassInstantiation.ql @@ -15,12 +15,34 @@ */ import python -import Expressions.CallArgs -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch -from Call call, ClassValue cls, string name, FunctionValue init +/** + * Holds if `name` is a legal argument name for calling `init`. + */ +bindingset[name] +predicate isLegalArgumentName(Function init, string name) { + exists(init.getArgByName(name)) + or + init.hasKwArg() +} + +/** + * Holds if `call` constructs class `cls` and passes a keyword argument `name` + * that does not correspond to any parameter of `cls.__init__`. + */ +predicate illegally_named_parameter(Call call, Class cls, string name) { + exists(Function init | + resolveClassCall(call.getAFlowNode(), cls) and + init = DuckTyping::getInit(cls) and + name = call.getANamedArgumentName() and + not isLegalArgumentName(init, name) + ) +} + +from Call call, Class cls, string name, Function init where illegally_named_parameter(call, cls, name) and - init = get_function_or_initializer(cls) + init = DuckTyping::getInit(cls) select call, "Keyword argument '" + name + "' is not a supported parameter name of $@.", init, init.getQualifiedName() diff --git a/python/ql/test/query-tests/Classes/Arguments/WrongNameForArgumentInClassInstantiation.expected b/python/ql/test/query-tests/Classes/Arguments/WrongNameForArgumentInClassInstantiation.expected index 4bdb11347768..cf89ddfc44f0 100644 --- a/python/ql/test/query-tests/Classes/Arguments/WrongNameForArgumentInClassInstantiation.expected +++ b/python/ql/test/query-tests/Classes/Arguments/WrongNameForArgumentInClassInstantiation.expected @@ -1,4 +1,4 @@ -| wrong_arguments.py:65:1:65:7 | F0() | Keyword argument 'y' is not a supported parameter name of $@. | wrong_arguments.py:4:5:4:26 | Function F0.__init__ | F0.__init__ | -| wrong_arguments.py:66:1:66:7 | F1() | Keyword argument 'z' is not a supported parameter name of $@. | wrong_arguments.py:8:5:8:36 | Function F1.__init__ | F1.__init__ | -| wrong_arguments.py:67:1:67:12 | F2() | Keyword argument 'y' is not a supported parameter name of $@. | wrong_arguments.py:12:5:12:30 | Function F2.__init__ | F2.__init__ | -| wrong_arguments.py:92:1:92:27 | F6() | Keyword argument 'z' is not a supported parameter name of $@. | wrong_arguments.py:28:5:28:30 | Function F6.__init__ | F6.__init__ | +| wrong_arguments.py:65:1:65:7 | F0() | Keyword argument 'y' is not a supported parameter name of $@. | wrong_arguments.py:4:5:4:26 | Function __init__ | F0.__init__ | +| wrong_arguments.py:66:1:66:7 | F1() | Keyword argument 'z' is not a supported parameter name of $@. | wrong_arguments.py:8:5:8:36 | Function __init__ | F1.__init__ | +| wrong_arguments.py:67:1:67:12 | F2() | Keyword argument 'y' is not a supported parameter name of $@. | wrong_arguments.py:12:5:12:30 | Function __init__ | F2.__init__ | +| wrong_arguments.py:92:1:92:27 | F6() | Keyword argument 'z' is not a supported parameter name of $@. | wrong_arguments.py:28:5:28:30 | Function __init__ | F6.__init__ | From 52d45a1aa290ed584d65f304a2319ed5a19092c5 Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 23 Feb 2026 14:43:49 +0000 Subject: [PATCH 18/30] Python: Port WrongNumberArgumentsInClassInstantiation.ql Included test changes are trivial `toString` changes. --- ...rongNumberArgumentsInClassInstantiation.ql | 59 +++++++++++++++++-- ...mberArgumentsInClassInstantiation.expected | 26 ++++---- 2 files changed, 68 insertions(+), 17 deletions(-) diff --git a/python/ql/src/Classes/WrongNumberArgumentsInClassInstantiation.ql b/python/ql/src/Classes/WrongNumberArgumentsInClassInstantiation.ql index 263a1a336a1a..7309860f287b 100644 --- a/python/ql/src/Classes/WrongNumberArgumentsInClassInstantiation.ql +++ b/python/ql/src/Classes/WrongNumberArgumentsInClassInstantiation.ql @@ -14,10 +14,61 @@ */ import python -import Expressions.CallArgs -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch -from Call call, ClassValue cls, string too, string should, int limit, FunctionValue init +/** + * Gets the number of positional arguments in `call`, including elements of any + * literal list passed as `*args`, plus keyword arguments that don't match + * keyword-only parameters (when the function doesn't accept `**kwargs`). + */ +int positional_arg_count(Call call, Function init) { + exists(int positional_keywords | + ( + not init.hasKwArg() and + positional_keywords = + count(Keyword kw | + kw = call.getAKeyword() and + not init.getAKeywordOnlyArg().getId() = kw.getArg() + ) + or + init.hasKwArg() and positional_keywords = 0 + ) and + result = + count(call.getAnArg()) + count(call.getStarargs().(List).getAnElt()) + positional_keywords + ) +} + +/** + * Holds if `call` constructs `cls` with too many arguments, where `limit` is the maximum. + */ +predicate too_many_args(Call call, Class cls, int limit) { + exists(Function init | + resolveClassCall(call.getAFlowNode(), cls) and + init = DuckTyping::getInit(cls) and + not init.hasVarArg() and + // Subtract 1 from max to account for `self` parameter + limit = init.getMaxPositionalArguments() - 1 and + limit >= 0 and + positional_arg_count(call, init) > limit + ) +} + +/** + * Holds if `call` constructs `cls` with too few arguments, where `limit` is the minimum. + */ +predicate too_few_args(Call call, Class cls, int limit) { + exists(Function init | + resolveClassCall(call.getAFlowNode(), cls) and + init = DuckTyping::getInit(cls) and + not exists(call.getStarargs()) and + not exists(call.getKwargs()) and + // Subtract 1 from min to account for `self` parameter + limit = init.getMinPositionalArguments() - 1 and + count(call.getAnArg()) + count(call.getAKeyword()) < limit + ) +} + +from Call call, Class cls, string too, string should, int limit, Function init where ( too_many_args(call, cls, limit) and @@ -28,6 +79,6 @@ where too = "too few arguments" and should = "no fewer than " ) and - init = get_function_or_initializer(cls) + init = DuckTyping::getInit(cls) select call, "Call to $@ with " + too + "; should be " + should + limit.toString() + ".", init, init.getQualifiedName() diff --git a/python/ql/test/query-tests/Classes/Arguments/WrongNumberArgumentsInClassInstantiation.expected b/python/ql/test/query-tests/Classes/Arguments/WrongNumberArgumentsInClassInstantiation.expected index f0df1fe58c98..290350259642 100644 --- a/python/ql/test/query-tests/Classes/Arguments/WrongNumberArgumentsInClassInstantiation.expected +++ b/python/ql/test/query-tests/Classes/Arguments/WrongNumberArgumentsInClassInstantiation.expected @@ -1,15 +1,15 @@ -| wrong_arguments.py:37:1:37:4 | F0() | Call to $@ with too few arguments; should be no fewer than 1. | wrong_arguments.py:4:5:4:26 | Function F0.__init__ | F0.__init__ | -| wrong_arguments.py:38:1:38:4 | F1() | Call to $@ with too few arguments; should be no fewer than 1. | wrong_arguments.py:8:5:8:36 | Function F1.__init__ | F1.__init__ | -| wrong_arguments.py:39:1:39:4 | F2() | Call to $@ with too few arguments; should be no fewer than 1. | wrong_arguments.py:12:5:12:30 | Function F2.__init__ | F2.__init__ | -| wrong_arguments.py:40:1:40:4 | F3() | Call to $@ with too few arguments; should be no fewer than 1. | wrong_arguments.py:16:5:16:40 | Function F3.__init__ | F3.__init__ | -| wrong_arguments.py:41:1:41:4 | F4() | Call to $@ with too few arguments; should be no fewer than 1. | wrong_arguments.py:20:5:20:31 | Function F4.__init__ | F4.__init__ | -| wrong_arguments.py:42:1:42:4 | F5() | Call to $@ with too few arguments; should be no fewer than 1. | wrong_arguments.py:24:5:24:42 | Function F5.__init__ | F5.__init__ | -| wrong_arguments.py:43:1:43:5 | F6() | Call to $@ with too few arguments; should be no fewer than 2. | wrong_arguments.py:28:5:28:30 | Function F6.__init__ | F6.__init__ | -| wrong_arguments.py:44:1:44:7 | F7() | Call to $@ with too few arguments; should be no fewer than 3. | wrong_arguments.py:32:5:32:33 | Function F7.__init__ | F7.__init__ | -| wrong_arguments.py:48:1:48:7 | F0() | Call to $@ with too many arguments; should be no more than 1. | wrong_arguments.py:4:5:4:26 | Function F0.__init__ | F0.__init__ | -| wrong_arguments.py:49:1:49:9 | F1() | Call to $@ with too many arguments; should be no more than 2. | wrong_arguments.py:8:5:8:36 | Function F1.__init__ | F1.__init__ | -| wrong_arguments.py:50:1:50:9 | F5() | Call to $@ with too many arguments; should be no more than 2. | wrong_arguments.py:24:5:24:42 | Function F5.__init__ | F5.__init__ | -| wrong_arguments.py:51:1:51:9 | F6() | Call to $@ with too many arguments; should be no more than 2. | wrong_arguments.py:28:5:28:30 | Function F6.__init__ | F6.__init__ | -| wrong_arguments.py:52:1:52:11 | F6() | Call to $@ with too many arguments; should be no more than 2. | wrong_arguments.py:28:5:28:30 | Function F6.__init__ | F6.__init__ | | wrong_arguments.py:85:1:85:12 | F6() | Call to $@ with too many arguments; should be no more than 2. | wrong_arguments.py:28:5:28:30 | Function F6.__init__ | F6.__init__ | | wrong_arguments.py:86:1:86:7 | F6() | Call to $@ with too many arguments; should be no more than 2. | wrong_arguments.py:28:5:28:30 | Function F6.__init__ | F6.__init__ | +| wrong_arguments.py:37:1:37:4 | F0() | Call to $@ with too few arguments; should be no fewer than 1. | wrong_arguments.py:4:5:4:26 | Function __init__ | F0.__init__ | +| wrong_arguments.py:38:1:38:4 | F1() | Call to $@ with too few arguments; should be no fewer than 1. | wrong_arguments.py:8:5:8:36 | Function __init__ | F1.__init__ | +| wrong_arguments.py:39:1:39:4 | F2() | Call to $@ with too few arguments; should be no fewer than 1. | wrong_arguments.py:12:5:12:30 | Function __init__ | F2.__init__ | +| wrong_arguments.py:40:1:40:4 | F3() | Call to $@ with too few arguments; should be no fewer than 1. | wrong_arguments.py:16:5:16:40 | Function __init__ | F3.__init__ | +| wrong_arguments.py:41:1:41:4 | F4() | Call to $@ with too few arguments; should be no fewer than 1. | wrong_arguments.py:20:5:20:31 | Function __init__ | F4.__init__ | +| wrong_arguments.py:42:1:42:4 | F5() | Call to $@ with too few arguments; should be no fewer than 1. | wrong_arguments.py:24:5:24:42 | Function __init__ | F5.__init__ | +| wrong_arguments.py:43:1:43:5 | F6() | Call to $@ with too few arguments; should be no fewer than 2. | wrong_arguments.py:28:5:28:30 | Function __init__ | F6.__init__ | +| wrong_arguments.py:44:1:44:7 | F7() | Call to $@ with too few arguments; should be no fewer than 3. | wrong_arguments.py:32:5:32:33 | Function __init__ | F7.__init__ | +| wrong_arguments.py:48:1:48:7 | F0() | Call to $@ with too many arguments; should be no more than 1. | wrong_arguments.py:4:5:4:26 | Function __init__ | F0.__init__ | +| wrong_arguments.py:49:1:49:9 | F1() | Call to $@ with too many arguments; should be no more than 2. | wrong_arguments.py:8:5:8:36 | Function __init__ | F1.__init__ | +| wrong_arguments.py:50:1:50:9 | F5() | Call to $@ with too many arguments; should be no more than 2. | wrong_arguments.py:24:5:24:42 | Function __init__ | F5.__init__ | +| wrong_arguments.py:51:1:51:9 | F6() | Call to $@ with too many arguments; should be no more than 2. | wrong_arguments.py:28:5:28:30 | Function __init__ | F6.__init__ | +| wrong_arguments.py:52:1:52:11 | F6() | Call to $@ with too many arguments; should be no more than 2. | wrong_arguments.py:28:5:28:30 | Function __init__ | F6.__init__ | From 9c54c418daa654a0804035d8e0418ca191c3c8a1 Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 23 Feb 2026 15:40:09 +0000 Subject: [PATCH 19/30] Python: Remove missing results These results are missing because we no longer (unlike the points-to analysis) track how many elements a given tuple has. This is something we might want to implement in the future (most likely through an `int`-indexed type tracker). --- .../Arguments/WrongNumberArgumentsInClassInstantiation.expected | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/ql/test/query-tests/Classes/Arguments/WrongNumberArgumentsInClassInstantiation.expected b/python/ql/test/query-tests/Classes/Arguments/WrongNumberArgumentsInClassInstantiation.expected index 290350259642..bccdcf51d66e 100644 --- a/python/ql/test/query-tests/Classes/Arguments/WrongNumberArgumentsInClassInstantiation.expected +++ b/python/ql/test/query-tests/Classes/Arguments/WrongNumberArgumentsInClassInstantiation.expected @@ -1,5 +1,3 @@ -| wrong_arguments.py:85:1:85:12 | F6() | Call to $@ with too many arguments; should be no more than 2. | wrong_arguments.py:28:5:28:30 | Function F6.__init__ | F6.__init__ | -| wrong_arguments.py:86:1:86:7 | F6() | Call to $@ with too many arguments; should be no more than 2. | wrong_arguments.py:28:5:28:30 | Function F6.__init__ | F6.__init__ | | wrong_arguments.py:37:1:37:4 | F0() | Call to $@ with too few arguments; should be no fewer than 1. | wrong_arguments.py:4:5:4:26 | Function __init__ | F0.__init__ | | wrong_arguments.py:38:1:38:4 | F1() | Call to $@ with too few arguments; should be no fewer than 1. | wrong_arguments.py:8:5:8:36 | Function __init__ | F1.__init__ | | wrong_arguments.py:39:1:39:4 | F2() | Call to $@ with too few arguments; should be no fewer than 1. | wrong_arguments.py:12:5:12:30 | Function __init__ | F2.__init__ | From 6d339b58c55007564c650931f7d700678643fd71 Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 23 Feb 2026 15:55:58 +0000 Subject: [PATCH 20/30] Python: Extend DuckTyping module Adds `overridesMethod` and `isPropertyAccessor`. --- .../dataflow/new/internal/DataFlowDispatch.qll | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index a85256d303f9..3d048ce72ebb 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -2096,4 +2096,21 @@ module DuckTyping { * resolved according to the MRO. */ Function getInit(Class cls) { result = invokedFunctionFromClassConstruction(cls, "__init__") } + + /** + * Holds if `f` overrides a method in a superclass with the same name. + */ + predicate overridesMethod(Function f) { + exists(Class cls | f.getScope() = cls | hasMethod(getADirectSuperclass(cls), f.getName())) + } + + /** + * Holds if `f` is a property accessor (decorated with `@property`, `@name.setter`, + * or `@name.deleter`). + */ + predicate isPropertyAccessor(Function f) { + exists(Attribute a | a = f.getADecorator() | a.getName() = "setter" or a.getName() = "deleter") + or + f.getADecorator().(Name).getId() = "property" + } } From 1112a18cf7f5368c4f5b267d1c576c2080c24b8e Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 23 Feb 2026 15:51:25 +0000 Subject: [PATCH 21/30] Python: Port DeprecatedSliceMethod.ql Only trivial test changes. --- python/ql/src/Functions/DeprecatedSliceMethod.ql | 11 ++++++----- .../Functions/general/DeprecatedSliceMethod.expected | 6 +++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/python/ql/src/Functions/DeprecatedSliceMethod.ql b/python/ql/src/Functions/DeprecatedSliceMethod.ql index 3e9cdb681d9a..937b3f46a4ff 100644 --- a/python/ql/src/Functions/DeprecatedSliceMethod.ql +++ b/python/ql/src/Functions/DeprecatedSliceMethod.ql @@ -10,16 +10,17 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch predicate slice_method_name(string name) { name = "__getslice__" or name = "__setslice__" or name = "__delslice__" } -from PythonFunctionValue f, string meth +from Function f, string meth where - f.getScope().isMethod() and - not f.isOverridingMethod() and + f.isMethod() and slice_method_name(meth) and - f.getName() = meth + f.getName() = meth and + not DuckTyping::overridesMethod(f) and + not DuckTyping::hasUnresolvedBase(getADirectSuperclass*(f.getScope())) select f, meth + " method has been deprecated since Python 2.0." diff --git a/python/ql/test/query-tests/Functions/general/DeprecatedSliceMethod.expected b/python/ql/test/query-tests/Functions/general/DeprecatedSliceMethod.expected index 1a6df6eca47d..c9875b7a8054 100644 --- a/python/ql/test/query-tests/Functions/general/DeprecatedSliceMethod.expected +++ b/python/ql/test/query-tests/Functions/general/DeprecatedSliceMethod.expected @@ -1,3 +1,3 @@ -| functions_test.py:95:5:95:40 | Function DeprecatedSliceMethods.__getslice__ | __getslice__ method has been deprecated since Python 2.0. | -| functions_test.py:98:5:98:47 | Function DeprecatedSliceMethods.__setslice__ | __setslice__ method has been deprecated since Python 2.0. | -| functions_test.py:101:5:101:40 | Function DeprecatedSliceMethods.__delslice__ | __delslice__ method has been deprecated since Python 2.0. | +| functions_test.py:95:5:95:40 | Function __getslice__ | __getslice__ method has been deprecated since Python 2.0. | +| functions_test.py:98:5:98:47 | Function __setslice__ | __setslice__ method has been deprecated since Python 2.0. | +| functions_test.py:101:5:101:40 | Function __delslice__ | __delslice__ method has been deprecated since Python 2.0. | From 5bd96a5fb7add8f00b04649c0fb09ff8abc3ae28 Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 23 Feb 2026 15:55:05 +0000 Subject: [PATCH 22/30] Python: Port DocStrings.ql --- python/ql/src/Statements/DocStrings.ql | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/python/ql/src/Statements/DocStrings.ql b/python/ql/src/Statements/DocStrings.ql index df7b09a963e0..f71b204c0182 100644 --- a/python/ql/src/Statements/DocStrings.ql +++ b/python/ql/src/Statements/DocStrings.ql @@ -17,7 +17,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch predicate needs_docstring(Scope s) { s.isPublic() and @@ -29,15 +29,15 @@ predicate needs_docstring(Scope s) { } predicate function_needs_docstring(FunctionMetrics f) { - not exists(FunctionValue fo, FunctionValue base | fo.overrides(base) and fo.getScope() = f | - not function_needs_docstring(base.getScope()) + not exists(Function base | + DuckTyping::overridesMethod(f) and + base.getScope() = getADirectSuperclass+(f.getScope()) and + base.getName() = f.getName() and + not function_needs_docstring(base) ) and f.getName() != "lambda" and (f.getNumberOfLinesOfCode() - count(f.getADecorator())) > 2 and - not exists(PythonPropertyObject p | - p.getGetter().getFunction() = f or - p.getSetter().getFunction() = f - ) + not DuckTyping::isPropertyAccessor(f) } string scope_type(Scope s) { From c1e2d9f5a43333995700218b3de72b71dc82684f Mon Sep 17 00:00:00 2001 From: Taus Date: Tue, 24 Feb 2026 15:14:15 +0000 Subject: [PATCH 23/30] Python: Move exception modelling to DataFlowDispatch.qll This analysis will is needed for the reachability modelling (which tracks things like which exceptions are caught by which handles), so it makes more sense for it to move to `DataFlowDispatch` for now. --- .../new/internal/DataFlowDispatch.qll | 110 ++++++++++++++++++ .../ql/src/Exceptions/IncorrectExceptOrder.ql | 69 +---------- 2 files changed, 111 insertions(+), 68 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index 3d048ce72ebb..fc78773a0e63 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -2114,3 +2114,113 @@ module DuckTyping { f.getADecorator().(Name).getId() = "property" } } + +/** + * Provides a class hierarchy for exception types, covering both builtin + * exceptions (from typeshed models) and user-defined exception classes. + */ +module ExceptionTypes { + private import semmle.python.ApiGraphs + private import semmle.python.frameworks.data.internal.ApiGraphModels + + /** Holds if `name` is a builtin exception class name. */ + predicate builtinException(string name) { + typeModel("builtins.BaseException~Subclass", "builtins." + name, "") + } + + /** Holds if builtin exception `sub` is a direct subclass of builtin exception `base`. */ + private predicate builtinExceptionSubclass(string base, string sub) { + typeModel("builtins." + base + "~Subclass", "builtins." + sub, "") + } + + /** An exception type, either a builtin exception or a user-defined exception class. */ + newtype TExceptType = + /** A user-defined exception class. */ + TUserExceptType(Class c) or + /** A builtin exception class, identified by name. */ + TBuiltinExceptType(string name) { builtinException(name) } + + /** An exception type, either a builtin exception or a user-defined exception class. */ + class ExceptType extends TExceptType { + /** Gets the name of this exception type. */ + string getName() { none() } + + /** Gets a data-flow node that refers to this exception type. */ + DataFlow::Node getAUse() { none() } + + /** Gets a direct superclass of this exception type. */ + ExceptType getADirectSuperclass() { none() } + + /** Gets a string representation of this exception type. */ + string toString() { result = this.getName() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startColumn` of line `startLine` to + * column `endColumn` of line `endLine` in file `filepath`. + * For more information, see + * [Providing locations in CodeQL queries](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filePath, int startLine, int startColumn, int endLine, int endColumn + ) { + none() + } + } + + /** A user-defined exception class. */ + class UserExceptType extends ExceptType, TUserExceptType { + Class cls; + + UserExceptType() { this = TUserExceptType(cls) } + + /** Gets the underlying class. */ + Class asClass() { result = cls } + + override string getName() { result = cls.getName() } + + override DataFlow::Node getAUse() { result = classTracker(cls) } + + override ExceptType getADirectSuperclass() { + result.(UserExceptType).asClass() = getADirectSuperclass(cls) + or + result.(BuiltinExceptType).getAUse().asExpr() = cls.getABase() + } + + override predicate hasLocationInfo( + string filePath, int startLine, int startColumn, int endLine, int endColumn + ) { + cls.getLocation().hasLocationInfo(filePath, startLine, startColumn, endLine, endColumn) + } + } + + /** A builtin exception class, identified by name. */ + class BuiltinExceptType extends ExceptType, TBuiltinExceptType { + string name; + + BuiltinExceptType() { this = TBuiltinExceptType(name) } + + /** Gets the builtin name. */ + string asBuiltinName() { result = name } + + override string getName() { result = name } + + override DataFlow::Node getAUse() { API::builtin(name).asSource().flowsTo(result) } + + override ExceptType getADirectSuperclass() { + builtinExceptionSubclass(result.(BuiltinExceptType).asBuiltinName(), name) and + result != this + } + + override predicate hasLocationInfo( + string filePath, int startLine, int startColumn, int endLine, int endColumn + ) { + filePath = "" and + startLine = 0 and + startColumn = 0 and + endLine = 0 and + endColumn = 0 + } + } +} + diff --git a/python/ql/src/Exceptions/IncorrectExceptOrder.ql b/python/ql/src/Exceptions/IncorrectExceptOrder.ql index 6eb1b39b0e64..436bc00be4ab 100644 --- a/python/ql/src/Exceptions/IncorrectExceptOrder.ql +++ b/python/ql/src/Exceptions/IncorrectExceptOrder.ql @@ -15,74 +15,7 @@ import python import semmle.python.dataflow.new.internal.DataFlowDispatch -import semmle.python.ApiGraphs -import semmle.python.frameworks.data.internal.ApiGraphModels - -predicate builtinException(string name) { - typeModel("builtins.BaseException~Subclass", "builtins." + name, "") -} - -predicate builtinExceptionSubclass(string base, string sub) { - typeModel("builtins." + base + "~Subclass", "builtins." + sub, "") -} - -newtype TExceptType = - TClass(Class c) or - TBuiltin(string name) { builtinException(name) } - -class ExceptType extends TExceptType { - Class asClass() { this = TClass(result) } - - string asBuiltinName() { this = TBuiltin(result) } - - predicate isBuiltin() { this = TBuiltin(_) } - - string getName() { - result = this.asClass().getName() - or - result = this.asBuiltinName() - } - - string toString() { result = this.getName() } - - DataFlow::Node getAUse() { - result = classTracker(this.asClass()) - or - API::builtin(this.asBuiltinName()).asSource().flowsTo(result) - } - - ExceptType getADirectSuperclass() { - result.asClass() = getADirectSuperclass(this.asClass()) - or - result.isBuiltin() and - result.getAUse().asExpr() = this.asClass().getABase() - or - builtinExceptionSubclass(result.asBuiltinName(), this.asBuiltinName()) and - this != result - } - - /** - * Holds if this element is at the specified location. - * The location spans column `startColumn` of line `startLine` to - * column `endColumn` of line `endLine` in file `filepath`. - * For more information, see - * [Providing locations in CodeQL queries](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). - */ - predicate hasLocationInfo( - string filePath, int startLine, int startColumn, int endLine, int endColumn - ) { - this.asClass() - .getLocation() - .hasLocationInfo(filePath, startLine, startColumn, endLine, endColumn) - or - this.isBuiltin() and - filePath = "" and - startLine = 0 and - startColumn = 0 and - endLine = 0 and - endColumn = 0 - } -} +private import ExceptionTypes predicate incorrectExceptOrder(ExceptStmt ex1, ExceptType cls1, ExceptStmt ex2, ExceptType cls2) { exists(int i, int j, Try t | From f6962a2b7ed0bb2c15555537b3cbdc9cf52dbd7c Mon Sep 17 00:00:00 2001 From: Taus Date: Tue, 24 Feb 2026 16:05:32 +0000 Subject: [PATCH 24/30] Python: Add Reachability module The implementation is essentially the same as the one from `BasicBlockWithPointsTo`, with the main difference being that this one uses the exception machinery we just added (and some extensions added in this commit). --- .../new/internal/DataFlowDispatch.qll | 158 ++++++++++++++++++ 1 file changed, 158 insertions(+) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index fc78773a0e63..51a4d43f3a6f 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -2154,6 +2154,41 @@ module ExceptionTypes { /** Gets a string representation of this exception type. */ string toString() { result = this.getName() } + /** Holds if this exception type may be raised at control flow node `r`. */ + predicate isRaisedAt(ControlFlowNode r) { + exists(Expr raised | + raised = r.getNode().(Raise).getRaised() and + this.getAUse().asExpr() in [raised, raised.(Call).getFunc()] + ) + or + exists(Function callee | + resolveCall(r, callee, _) and + this.isRaisedIn(callee) + ) + } + + /** + * Holds if this exception type may be raised in function `f`, either + * directly via `raise` statements or transitively through calls to other functions. + */ + predicate isRaisedIn(Function f) { this.isRaisedAt(any(ControlFlowNode r | r.getScope() = f)) } + + /** Holds if this exception type is handled by the `except` clause at `handler`. */ + predicate isHandledAt(ExceptFlowNode handler) { + exists(ExceptStmt ex, Expr typeExpr | ex = handler.getNode() | + ( + typeExpr = ex.getType() + or + typeExpr = ex.getType().(Tuple).getAnElt() + ) and + this.getAUse().asExpr() = typeExpr + ) + or + // A bare `except:` handles everything + not exists(handler.getNode().(ExceptStmt).getType()) and + this.(BuiltinExceptType).getName() = "BaseException" + } + /** * Holds if this element is at the specified location. * The location spans column `startColumn` of line `startLine` to @@ -2222,5 +2257,128 @@ module ExceptionTypes { endColumn = 0 } } + + /** + * Holds if the exception edge from `r` to `handler` is unlikely because + * none of the exception types that `r` may raise are handled by `handler`. + */ + predicate unlikelyExceptionEdge(ControlFlowNode r, ExceptFlowNode handler) { + handler = r.getAnExceptionalSuccessor() and + // We can determine at least one raised type + exists(ExceptType t | t.isRaisedAt(r)) and + // But none of them are handled by this handler + not exists(ExceptType raised, ExceptType handled | + raised.isRaisedAt(r) and + handled.isHandledAt(handler) and + raised.getADirectSuperclass*() = handled + ) + } } +/** + * Provides predicates for reasoning about the reachability of control flow nodes + * and basic blocks. + */ +module Reachability { + private import semmle.python.ApiGraphs + import ExceptionTypes + + /** + * Holds if `call` is a call to a function that is known to never return normally + * (e.g. `sys.exit()`, `os._exit()`, `os.abort()`). + */ + predicate isCallToNeverReturningFunction(CallNode call) { + // Known never-returning builtins/stdlib functions via API graphs + call = API::builtin("exit").getACall().asCfgNode() + or + call = API::builtin("quit").getACall().asCfgNode() + or + call = API::moduleImport("sys").getMember("exit").getACall().asCfgNode() + or + call = API::moduleImport("os").getMember("_exit").getACall().asCfgNode() + or + call = API::moduleImport("os").getMember("abort").getACall().asCfgNode() + or + // User-defined functions that only contain raise statements (no normal returns) + exists(Function target | + resolveCall(call, target, _) and + neverReturns(target) + ) + } + + /** + * Holds if function `f` never returns normally, because every normal exit + * is dominated by a call to a never-returning function or an unconditional raise. + */ + predicate neverReturns(Function f) { + exists(f.getANormalExit()) and + forall(BasicBlock exit | exit = f.getANormalExit().getBasicBlock() | + exists(BasicBlock raising | + raising.dominates(exit) and + ( + isCallToNeverReturningFunction(raising.getLastNode()) + or + raising.getLastNode().getNode() instanceof Raise + ) + ) + ) + } + + /** + * Holds if it is highly unlikely for control to flow from `node` to `succ`. + */ + predicate unlikelySuccessor(ControlFlowNode node, ControlFlowNode succ) { + // Exceptional edge where the raised type doesn't match the handler + unlikelyExceptionEdge(node, succ) + or + // Normal successor of a never-returning call + isCallToNeverReturningFunction(node) and + succ = node.getASuccessor() and + not succ = node.getAnExceptionalSuccessor() and + not succ.getNode() instanceof Yield + } + + private predicate startBbLikelyReachable(BasicBlock b) { + exists(Scope s | s.getEntryNode() = b.getNode(_)) + or + exists(BasicBlock pred | + pred = b.getAPredecessor() and + endBbLikelyReachable(pred) and + not unlikelySuccessor(pred.getLastNode(), b) + ) + } + + private predicate endBbLikelyReachable(BasicBlock b) { + startBbLikelyReachable(b) and + not exists(ControlFlowNode p, ControlFlowNode s | + unlikelySuccessor(p, s) and + p = b.getNode(_) and + s = b.getNode(_) and + not p = b.getLastNode() + ) + } + + /** + * Holds if basic block `b` is likely to be reachable from the entry of its + * enclosing scope. + */ + predicate likelyReachable(BasicBlock b) { startBbLikelyReachable(b) } + + /** + * Holds if it is unlikely that `node` can be reached during execution. + */ + predicate unlikelyReachable(ControlFlowNode node) { + not startBbLikelyReachable(node.getBasicBlock()) + or + exists(BasicBlock b | + startBbLikelyReachable(b) and + not endBbLikelyReachable(b) and + exists(ControlFlowNode p, int i, int j | + unlikelySuccessor(p, _) and + p = b.getNode(i) and + node = b.getNode(j) and + i < j + ) + ) + } +} From 109edade72831d4592e3d3c6f84bfa954ebec24d Mon Sep 17 00:00:00 2001 From: Taus Date: Tue, 24 Feb 2026 16:11:12 +0000 Subject: [PATCH 25/30] Python: Port getCyclomaticComplexity function Note that this does not give the exact same results as the old function, however it's not clear to me that the old results were actually correct (it _looks_ like `read()` might be doing an IO operation, but in fact `read` is not defined, so at best this will raise a NameError, not an IOError). --- python/ql/lib/LegacyPointsTo.qll | 32 ------------------- python/ql/lib/semmle/python/Metrics.qll | 27 ++++++++++++++++ .../new/internal/DataFlowDispatch.qll | 17 ++++++++++ python/ql/src/Metrics/CyclomaticComplexity.ql | 4 +-- .../cyclo/CyclomaticComplexity.expected | 2 +- 5 files changed, 47 insertions(+), 35 deletions(-) diff --git a/python/ql/lib/LegacyPointsTo.qll b/python/ql/lib/LegacyPointsTo.qll index ffea2d93b66c..ed04d9e9e79a 100644 --- a/python/ql/lib/LegacyPointsTo.qll +++ b/python/ql/lib/LegacyPointsTo.qll @@ -433,38 +433,6 @@ private predicate exits_early(BasicBlock b) { /** The metrics for a function that require points-to analysis */ class FunctionMetricsWithPointsTo extends FunctionMetrics { - /** - * Gets the cyclomatic complexity of the function: - * The number of linearly independent paths through the source code. - * Computed as E - N + 2P, - * where - * E = the number of edges of the graph. - * N = the number of nodes of the graph. - * P = the number of connected components, which for a single function is 1. - */ - int getCyclomaticComplexity() { - exists(int e, int n | - n = count(BasicBlockWithPointsTo b | b = this.getABasicBlock() and b.likelyReachable()) and - e = - count(BasicBlockWithPointsTo b1, BasicBlockWithPointsTo b2 | - b1 = this.getABasicBlock() and - b1.likelyReachable() and - b2 = this.getABasicBlock() and - b2.likelyReachable() and - b2 = b1.getASuccessor() and - not b1.unlikelySuccessor(b2) - ) - | - result = e - n + 2 - ) - } - - private BasicBlock getABasicBlock() { - result = this.getEntryNode().getBasicBlock() - or - exists(BasicBlock mid | mid = this.getABasicBlock() and result = mid.getASuccessor()) - } - /** * Dependency of Callables * One callable "this" depends on another callable "result" diff --git a/python/ql/lib/semmle/python/Metrics.qll b/python/ql/lib/semmle/python/Metrics.qll index 26560bad25c9..fcbfdbe58b36 100644 --- a/python/ql/lib/semmle/python/Metrics.qll +++ b/python/ql/lib/semmle/python/Metrics.qll @@ -1,5 +1,6 @@ import python private import semmle.python.SelfAttribute +private import semmle.python.dataflow.new.internal.DataFlowDispatch /** The metrics for a function */ class FunctionMetrics extends Function { @@ -27,6 +28,32 @@ class FunctionMetrics extends Function { int getStatementNestingDepth() { result = max(Stmt s | s.getScope() = this | getNestingDepth(s)) } int getNumberOfCalls() { result = count(Call c | c.getScope() = this) } + + /** + * Gets the cyclomatic complexity of the function: + * The number of linearly independent paths through the source code. + * Computed as E - N + 2P, + * where + * E = the number of edges of the graph. + * N = the number of nodes of the graph. + * P = the number of connected components, which for a single function is 1. + */ + int getCyclomaticComplexity() { + exists(int n, int e | + n = count(BasicBlock b | b.getScope() = this and Reachability::likelyReachable(b)) and + e = + count(BasicBlock b1, BasicBlock b2 | + b1.getScope() = this and + Reachability::likelyReachable(b1) and + b2.getScope() = this and + Reachability::likelyReachable(b2) and + b2 = b1.getASuccessor() and + not Reachability::unlikelySuccessor(b1.getLastNode(), b2.firstNode()) + ) + | + result = e - n + 2 + ) + } } /** The metrics for a class */ diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index 51a4d43f3a6f..e3af9d8c0e59 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -2324,6 +2324,19 @@ module Reachability { ) } + /** + * Holds if `node` is unlikely to raise an exception. This includes entry nodes + * and simple name lookups. + */ + private predicate unlikelyToRaise(ControlFlowNode node) { + exists(node.getAnExceptionalSuccessor()) and + ( + node.getNode() instanceof Name + or + exists(Scope s | s.getEntryNode() = node) + ) + } + /** * Holds if it is highly unlikely for control to flow from `node` to `succ`. */ @@ -2336,6 +2349,10 @@ module Reachability { succ = node.getASuccessor() and not succ = node.getAnExceptionalSuccessor() and not succ.getNode() instanceof Yield + or + // Exception edge from a node that is unlikely to raise + unlikelyToRaise(node) and + succ = node.getAnExceptionalSuccessor() } private predicate startBbLikelyReachable(BasicBlock b) { diff --git a/python/ql/src/Metrics/CyclomaticComplexity.ql b/python/ql/src/Metrics/CyclomaticComplexity.ql index 1d9874ac12c6..089d128cd6a6 100644 --- a/python/ql/src/Metrics/CyclomaticComplexity.ql +++ b/python/ql/src/Metrics/CyclomaticComplexity.ql @@ -13,8 +13,8 @@ */ import python -private import LegacyPointsTo +import semmle.python.Metrics -from FunctionMetricsWithPointsTo func, int complexity +from FunctionMetrics func, int complexity where complexity = func.getCyclomaticComplexity() select func, complexity order by complexity desc diff --git a/python/ql/test/query-tests/Metrics/cyclo/CyclomaticComplexity.expected b/python/ql/test/query-tests/Metrics/cyclo/CyclomaticComplexity.expected index fd69f810c217..485db3046c07 100644 --- a/python/ql/test/query-tests/Metrics/cyclo/CyclomaticComplexity.expected +++ b/python/ql/test/query-tests/Metrics/cyclo/CyclomaticComplexity.expected @@ -1,6 +1,6 @@ +| code.py:35:1:35:21 | Function exceptions | 5 | | code.py:23:1:23:17 | Function nested | 4 | | code.py:12:1:12:21 | Function two_branch | 3 | | code.py:6:1:6:18 | Function one_branch | 2 | -| code.py:35:1:35:21 | Function exceptions | 2 | | code.py:1:1:1:16 | Function f_linear | 1 | | code.py:45:1:45:39 | Function must_be_positive | 1 | From c4c797bd2a4a0b05739179902f3964b4e52e8f6d Mon Sep 17 00:00:00 2001 From: Taus Date: Tue, 24 Feb 2026 16:11:56 +0000 Subject: [PATCH 26/30] Python: Port OverlyComplexDelMethod.ql Only trivial test changes. --- python/ql/src/Functions/OverlyComplexDelMethod.ql | 11 +++++------ .../Functions/general/OverlyComplexDelMethod.expected | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/python/ql/src/Functions/OverlyComplexDelMethod.ql b/python/ql/src/Functions/OverlyComplexDelMethod.ql index 6b08a40fa290..3510c0daac4d 100644 --- a/python/ql/src/Functions/OverlyComplexDelMethod.ql +++ b/python/ql/src/Functions/OverlyComplexDelMethod.ql @@ -12,12 +12,11 @@ */ import python -private import LegacyPointsTo +import semmle.python.Metrics -from FunctionValue method +from FunctionMetrics method where - exists(ClassValue c | - c.declaredAttribute("__del__") = method and - method.getScope().(FunctionMetricsWithPointsTo).getCyclomaticComplexity() > 3 - ) + method.getName() = "__del__" and + method.isMethod() and + method.getCyclomaticComplexity() > 3 select method, "Overly complex '__del__' method." diff --git a/python/ql/test/query-tests/Functions/general/OverlyComplexDelMethod.expected b/python/ql/test/query-tests/Functions/general/OverlyComplexDelMethod.expected index 2eff178d972f..84c08d894268 100644 --- a/python/ql/test/query-tests/Functions/general/OverlyComplexDelMethod.expected +++ b/python/ql/test/query-tests/Functions/general/OverlyComplexDelMethod.expected @@ -1 +1 @@ -| protocols.py:74:5:74:22 | Function MegaDel.__del__ | Overly complex '__del__' method. | +| protocols.py:74:5:74:22 | Function __del__ | Overly complex '__del__' method. | From c5f29c6166c746bcd1e745b23456914a00dded65 Mon Sep 17 00:00:00 2001 From: Taus Date: Tue, 24 Feb 2026 16:12:24 +0000 Subject: [PATCH 27/30] Python: Port ConsistentReturns.ql No test changes. --- python/ql/src/Functions/ConsistentReturns.ql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/ql/src/Functions/ConsistentReturns.ql b/python/ql/src/Functions/ConsistentReturns.ql index 9beeafc63723..c4ac492193eb 100644 --- a/python/ql/src/Functions/ConsistentReturns.ql +++ b/python/ql/src/Functions/ConsistentReturns.ql @@ -12,7 +12,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch predicate explicitly_returns_non_none(Function func) { exists(Return return | @@ -22,8 +22,8 @@ predicate explicitly_returns_non_none(Function func) { } predicate has_implicit_return(Function func) { - exists(ControlFlowNodeWithPointsTo fallthru | - fallthru = func.getFallthroughNode() and not fallthru.unlikelyReachable() + exists(ControlFlowNode fallthru | + fallthru = func.getFallthroughNode() and not Reachability::unlikelyReachable(fallthru) ) or exists(Return return | return.getScope() = func and not exists(return.getValue())) From 1593cbf11542eb1b380bffb630ac1d795c50cd96 Mon Sep 17 00:00:00 2001 From: Taus Date: Tue, 24 Feb 2026 21:04:39 +0000 Subject: [PATCH 28/30] Python: Extend ExceptionTypes API Adds support for finding instances, and adds a `BaseException` convenience class. --- .../new/internal/DataFlowDispatch.qll | 38 ++++++++++++++++--- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index e3af9d8c0e59..45d3b4102a12 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -2154,12 +2154,27 @@ module ExceptionTypes { /** Gets a string representation of this exception type. */ string toString() { result = this.getName() } - /** Holds if this exception type may be raised at control flow node `r`. */ - predicate isRaisedAt(ControlFlowNode r) { - exists(Expr raised | - raised = r.getNode().(Raise).getRaised() and + /** Gets a data-flow node that refers to an instance of this exception type. */ + DataFlow::Node getAnInstance() { none() } + + /** Holds if this is a legal exception type (a subclass of `BaseException`). */ + predicate isLegalExceptionType() { this.getADirectSuperclass*() instanceof BaseException } + + /** + * Holds if this exception type is raised by `r`, either as a class reference + * (e.g. `raise ValueError`) or as an instantiation (e.g. `raise ValueError("msg")`). + */ + predicate isRaisedBy(Raise r) { + exists(Expr raised | raised = r.getRaised() | this.getAUse().asExpr() in [raised, raised.(Call).getFunc()] + or + this.getAnInstance().asExpr() = raised ) + } + + /** Holds if this exception type may be raised at control flow node `r`. */ + predicate isRaisedAt(ControlFlowNode r) { + this.isRaisedBy(r.getNode()) or exists(Function callee | resolveCall(r, callee, _) and @@ -2186,7 +2201,7 @@ module ExceptionTypes { or // A bare `except:` handles everything not exists(handler.getNode().(ExceptStmt).getType()) and - this.(BuiltinExceptType).getName() = "BaseException" + this instanceof BaseException } /** @@ -2216,6 +2231,8 @@ module ExceptionTypes { override DataFlow::Node getAUse() { result = classTracker(cls) } + override DataFlow::Node getAnInstance() { result = classInstanceTracker(cls) } + override ExceptType getADirectSuperclass() { result.(UserExceptType).asClass() = getADirectSuperclass(cls) or @@ -2240,7 +2257,11 @@ module ExceptionTypes { override string getName() { result = name } - override DataFlow::Node getAUse() { API::builtin(name).asSource().flowsTo(result) } + override DataFlow::Node getAUse() { result = API::builtin(name).getAValueReachableFromSource() } + + override DataFlow::Node getAnInstance() { + result = API::builtin(name).getAnInstance().getAValueReachableFromSource() + } override ExceptType getADirectSuperclass() { builtinExceptionSubclass(result.(BuiltinExceptType).asBuiltinName(), name) and @@ -2258,6 +2279,11 @@ module ExceptionTypes { } } + /** The builtin `BaseException` type. */ + class BaseException extends BuiltinExceptType { + BaseException() { name = "BaseException" } + } + /** * Holds if the exception edge from `r` to `handler` is unlikely because * none of the exception types that `r` may raise are handled by `handler`. From bca03d166579a5664e2ae73769643ab676a32380 Mon Sep 17 00:00:00 2001 From: Taus Date: Tue, 24 Feb 2026 22:24:19 +0000 Subject: [PATCH 29/30] Python: Port IllegalRaise.ql Adds a convenient way to get the class name for an immutable literal (to maintain the same output format as was provided by the points-to version). I don't know if people are in the habit of writing `raise 5`, but I guess `raise "NotImplemented"` (wrong on so many levels) is not entirely impossible. No test changes. --- .../new/internal/DataFlowDispatch.qll | 17 ++++++ python/ql/src/Exceptions/IllegalRaise.ql | 53 +++++++++++++++---- 2 files changed, 60 insertions(+), 10 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index 45d3b4102a12..10bc99ce941f 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -2113,6 +2113,23 @@ module DuckTyping { or f.getADecorator().(Name).getId() = "property" } + + /** Gets the name of the builtin class of the immutable literal `lit`. */ + string getClassName(ImmutableLiteral lit) { + lit instanceof IntegerLiteral and result = "int" + or + lit instanceof FloatLiteral and result = "float" + or + lit instanceof ImaginaryLiteral and result = "complex" + or + lit instanceof NegativeIntegerLiteral and result = "int" + or + lit instanceof StringLiteral and result = "str" + or + lit instanceof BooleanLiteral and result = "bool" + or + lit instanceof None and result = "NoneType" + } } /** diff --git a/python/ql/src/Exceptions/IllegalRaise.ql b/python/ql/src/Exceptions/IllegalRaise.ql index f9f263552b18..3ae99a55f7cc 100644 --- a/python/ql/src/Exceptions/IllegalRaise.ql +++ b/python/ql/src/Exceptions/IllegalRaise.ql @@ -12,15 +12,48 @@ */ import python -import Raising -import Exceptions.NotImplemented -private import LegacyPointsTo +import semmle.python.dataflow.new.internal.DataFlowDispatch +import semmle.python.ApiGraphs +private import ExceptionTypes -from Raise r, ClassValue t +/** + * Holds if `r` raises an instance of a builtin non-exception class named `name`. + */ +private predicate raisesNonExceptionBuiltin(Raise r, string name) { + exists(Expr raised | raised = r.getRaised() | + API::builtin(name).getAValueReachableFromSource().asExpr() = raised + or + API::builtin(name).getAValueReachableFromSource().asExpr() = raised.(Call).getFunc() and + // Exclude `type` since `type(x)` returns the class of `x`, not a `type` instance + not name = "type" + ) and + not builtinException(name) +} + +from Raise r, string msg where - type_or_typeof(r, t, _) and - not t.isLegalExceptionType() and - not t.failedInference(_) and - not use_of_not_implemented_in_raise(r, _) -select r, - "Illegal class '" + t.getName() + "' raised; will result in a TypeError being raised instead." + not raisesNonExceptionBuiltin(r, "NotImplemented") and + ( + exists(ExceptType t | + t.isRaisedBy(r) and + not t.isLegalExceptionType() and + not t.getName() = "None" and + msg = + "Illegal class '" + t.getName() + + "' raised; will result in a TypeError being raised instead." + ) + or + exists(ImmutableLiteral lit | lit = r.getRaised() | + msg = + "Illegal class '" + DuckTyping::getClassName(lit) + + "' raised; will result in a TypeError being raised instead." + ) + or + exists(string name | + raisesNonExceptionBuiltin(r, name) and + not r.getRaised() instanceof ImmutableLiteral and + not name = "None" and + msg = "Illegal class '" + name + "' raised; will result in a TypeError being raised instead." + ) + ) +select r, msg From 679f9208612061ed0ff65d8f37b9d82c57722c59 Mon Sep 17 00:00:00 2001 From: Taus Date: Tue, 24 Feb 2026 22:30:27 +0000 Subject: [PATCH 30/30] Python: Port IllegalExceptionHandlerType.ql A few relevant changes compared to the points-to version: - we've lost `origin`, so we can no longer point to where the illegal type lives. I opted to keep the output message the same, mirroring what we were already doing in IllegalRaise.ql. - We no longer track literal values flowing in from elsewhere, so we lost a single test result where the handled "type" is the result of calling a float-returning function. Apart from that, the only test changes are cosmetic. --- .../Exceptions/IllegalExceptionHandlerType.ql | 46 +++++++++++++------ .../IllegalExceptionHandlerType.expected | 7 ++- 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/python/ql/src/Exceptions/IllegalExceptionHandlerType.ql b/python/ql/src/Exceptions/IllegalExceptionHandlerType.ql index dc1c3f2fa359..e603abb64cb7 100644 --- a/python/ql/src/Exceptions/IllegalExceptionHandlerType.ql +++ b/python/ql/src/Exceptions/IllegalExceptionHandlerType.ql @@ -12,20 +12,38 @@ */ import python -private import LegacyPointsTo +import semmle.python.dataflow.new.internal.DataFlowDispatch +private import ExceptionTypes -from ExceptFlowNodeWithPointsTo ex, Value t, ClassValue c, ControlFlowNode origin, string what +/** + * Gets an expression used as a handler type in the `except` clause at `ex`, + * either directly or as an element of a tuple. + */ +Expr handlerExpr(ExceptStmt ex) { + result = ex.getType() or + result = ex.getType().(Tuple).getAnElt() +} + +/** + * Gets an exception type used in the `except` clause at `ex`, + * where that type is not a legal exception type. + */ +ExceptType illegalHandlerType(ExceptStmt ex) { + result.getAUse().asExpr() = handlerExpr(ex) and + not result.isLegalExceptionType() +} + +from ExceptStmt ex, string msg where - ex.handledException(t, c, origin) and - ( - exists(ClassValue x | x = t | - not x.isLegalExceptionType() and - not x.failedInference(_) and - what = "class '" + x.getName() + "'" - ) - or - not t instanceof ClassValue and - what = "instance of '" + c.getName() + "'" + exists(ExceptType t | t = illegalHandlerType(ex) | + msg = + "Non-exception class '" + t.getName() + + "' in exception handler which will never match raised exception." + ) + or + exists(ImmutableLiteral lit | lit = handlerExpr(ex) and not lit instanceof None | + msg = + "Non-exception class '" + DuckTyping::getClassName(lit) + + "' in exception handler which will never match raised exception." ) -select ex.getNode(), - "Non-exception $@ in exception handler which will never match raised exception.", origin, what +select ex, msg diff --git a/python/ql/test/query-tests/Exceptions/general/IllegalExceptionHandlerType.expected b/python/ql/test/query-tests/Exceptions/general/IllegalExceptionHandlerType.expected index 5ba0c7163713..f366f9de865d 100644 --- a/python/ql/test/query-tests/Exceptions/general/IllegalExceptionHandlerType.expected +++ b/python/ql/test/query-tests/Exceptions/general/IllegalExceptionHandlerType.expected @@ -1,4 +1,3 @@ -| exceptions_test.py:51:5:51:25 | ExceptStmt | Non-exception $@ in exception handler which will never match raised exception. | exceptions_test.py:33:1:33:28 | ControlFlowNode for ClassExpr | class 'NotException1' | -| exceptions_test.py:54:5:54:25 | ExceptStmt | Non-exception $@ in exception handler which will never match raised exception. | exceptions_test.py:36:1:36:28 | ControlFlowNode for ClassExpr | class 'NotException2' | -| exceptions_test.py:138:5:138:22 | ExceptStmt | Non-exception $@ in exception handler which will never match raised exception. | exceptions_test.py:133:12:133:14 | ControlFlowNode for FloatLiteral | instance of 'float' | -| pypy_test.py:14:5:14:14 | ExceptStmt | Non-exception $@ in exception handler which will never match raised exception. | pypy_test.py:14:12:14:13 | ControlFlowNode for IntegerLiteral | instance of 'int' | +| exceptions_test.py:51:5:51:25 | ExceptStmt | Non-exception class 'NotException1' in exception handler which will never match raised exception. | +| exceptions_test.py:54:5:54:25 | ExceptStmt | Non-exception class 'NotException2' in exception handler which will never match raised exception. | +| pypy_test.py:14:5:14:14 | ExceptStmt | Non-exception class 'int' in exception handler which will never match raised exception. |