8000 Merge pull request #8457 from RasmusWL/add-dataflow-consistency-query · github/codeql@2fed0ad · GitHub
[go: up one dir, main page]

Skip to content

Commit 2fed0ad

Browse files
authored
Merge pull request #8457 from RasmusWL/add-dataflow-consistency-query
Python: Add dataflow consistency query
2 parents 4c8861a + 2c10160 commit 2fed0ad

File tree

53 files changed

+149
-701
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+149
-701
lines changed
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
/**
2+
* Provides consistency queries for checking invariants in the language-specific
3+
* data-flow classes and predicates.
4+
*/
5+
6+
private import python
7+
import semmle.python.dataflow.new.DataFlow::DataFlow
8+
private import semmle.python.dataflow.new.internal.DataFlowImplSpecific
9+
private import semmle.python.dataflow.new.internal.DataFlowDispatch
10+
private import semmle.python.dataflow.new.internal.TaintTrackingImplSpecific
11+
private import codeql.dataflow.internal.DataFlowImplConsistency
12+
13+
private module Input implements InputSig<PythonDataFlow> {
14+
private import Private
15+
private import Public
16+
17+
predicate argHasPostUpdateExclude(ArgumentNode n) {
18+
// TODO: Implement post-updates for *args, see tests added in https://github.com/github/codeql/pull/14936
19+
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isStarArgs(_))
20+
or
21+
// TODO: Implement post-updates for **kwargs, see tests added in https://github.com/github/codeql/pull/14936
22+
exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isDictSplat())
23+
}
24+
25+
predicate reverseReadExclude(Node n) {
26+
// since `self`/`cls` parameters can be marked as implicit argument to `super()`,
27+
// they will have PostUpdateNodes. We have a read-step from the synthetic `**kwargs`
28+
// parameter, but dataflow-consistency queries should _not_ complain about there not
29+
// being a post-update node for the synthetic `**kwargs` parameter.
30+
n instanceof SynthDictSplatParameterNode
31+
}
32+
33+
predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
34+
// For normal parameters that can both be passed as positional arguments or keyword
35+
// arguments, we currently have parameter positions for both cases..
36+
//
37+
// TODO: Figure out how bad breaking this consistency check is
38+
exists(Function func, Parameter param |
39+
c.getScope() = func and
40+
p = parameterNode(param) and
41+
c.getParameter(pos) = p and
42+
param = func.getArg(_) and
43+
param = func.getArgByName(_)
44+
)
45+
}
46+
47+
predicate uniqueCallEnclosingCallableExclude(DataFlowCall call) {
48+
not exists(call.getLocation().getFile().getRelativePath())
49+
}
50+
51+
predicate identityLocalStepExclude(Node n) {
52+
not exists(n.getLocation().getFile().getRelativePath())
53+
}
54+
55+
predicate multipleArgumentCallExclude(ArgumentNode arg, DataFlowCall call) {
56+
// since we can have multiple DataFlowCall for a CallNode (for example if can
57+
// resolve to multiple functions), but we only make _one_ ArgumentNode for each
58+
// argument in the CallNode, we end up violating this consistency check in those
59+
// cases. (see `getCallArg` in DataFlowDispatch.qll)
60+
exists(DataFlowCall other, CallNode cfgCall | other != call |
61+
call.getNode() = cfgCall and
62+
other.getNode() = cfgCall and
63+
isArgumentNode(arg, call, _) and
64+
isArgumentNode(arg, other, _)
65+
)
66+
or
67+
// bound methods that refer to the same self argument.
68+
// Example: In `bm = self.foo; bm(); bm()` both bm() calls use the same `self` as
69+
// the (pos self) argument
70+
exists(AttrRead attr, DataFlowCall other | other != call |
71+
// for simple cases we can track the function back to the attr read but when the
72+
// call appears in the body of a list-comprehension, we can't do that, and simply
73+
// allow it instead.
74+
(
75+
call.getScope() = attr.getScope() and
76+
any(CfgNode n | n.asCfgNode() = call.getNode().(CallNode).getFunction()).getALocalSource() =
77+
attr
78+
or
79+
not exists(call.getScope().(Function).getDefinition()) and
80+
call.getScope().getScope+() = attr.getScope()
81+
) and
82+
(
83+
other.getScope() = attr.getScope() and
84+
any(CfgNode n | n.asCfgNode() = other.getNode().(CallNode).getFunction()).getALocalSource() =
85+
attr
86+
or
87+
not exists(other.getScope().(Function).getDefinition()) and
88+
other.getScope().getScope+() = attr.getScope()
89+
) and
90+
arg = attr.getObject() and
91+
arg = call.getArgument(any(ArgumentPosition p | p.isSelf())) and
92+
arg = other.getArgument(any(ArgumentPosition p | p.isSelf()))
93+
)
94+
or
95+
// `f = getattr(obj, "foo"); f()` where `obj` is used as (pos self) argument for
96+
// `f()` call
97+
exists(DataFlowCall getAttrCall, DataFlowCall methodCall, AttrRead attr |
98+
call in [getAttrCall, methodCall]
99+
|
100+
arg = getAttrCall.getArgument(any(ArgumentPosition p | p.isPositional(0))) and
101+
arg = methodCall.getArgument(any(ArgumentPosition p | p.isSelf())) and
102+
attr.getObject() = arg and
103+
attr.(CfgNode).getNode() = getAttrCall.getNode()
104+
)
105+
or
106+
// In the code `super(Base, self).foo()` we use `self` as an argument in both the
107+
// super() call (pos 1) and in the .foo() call (pos self).
108+
exists(DataFlowCall superCall, DataFlowCall methodCall | call in [superCall, methodCall] |
109+
exists(superCallTwoArgumentTracker(_, arg)) and
110+
arg = superCall.getArgument(any(ArgumentPosition p | p.isPositional(1))) and
111+
arg = methodCall.getArgument(any(ArgumentPosition p | p.isSelf()))
112+
)
113+
or
114+
// in the code `def func(self): super().foo(); super.bar()` we use `self` as the
115+
// (pos self) argument in both .foo() and .bar() calls.
116+
exists(Function f, DataFlowCall other | other != call |
117+
exprNode(f.getArg(0)) = arg and
118+
call.getNode().getScope() = f and
119+
arg = call.getArgument(any(ArgumentPosition p | p.isSelf())) and
120+
arg = other.getArgument(any(ArgumentPosition p | p.isSelf())) and
121+
other.getNode().getScope() = f
122+
)
123+
}
124+
}
125+
126+
import MakeConsistency<PythonDataFlow, PythonTaintTracking, Input>

python/ql/test/experimental/dataflow/TestUtil/DataFlowConsistency.qll

Lines changed: 0 additions & 6 deletions
This file was deleted.

python/ql/test/experimental/dataflow/basic/dataflow-consistency.expected

Lines changed: 0 additions & 27 deletions
This file was deleted.

python/ql/test/experimental/dataflow/basic/dataflow-consistency.ql

Lines changed: 0 additions & 2 deletions
This file was deleted.

python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.expected

Lines changed: 0 additions & 27 deletions
This file was deleted.

python/ql/test/experimental/dataflow/callgraph_crosstalk/dataflow-consistency.ql

Lines changed: 0 additions & 2 deletions
This file was deleted.

python/ql/test/experimental/dataflow/calls/dataflow-consistency.expected

Lines changed: 0 additions & 27 deletions
This file was deleted.

python/ql/test/experimental/dataflow/calls/dataflow-consistency.ql

Lines changed: 0 additions & 2 deletions
This file was deleted.

python/ql/test/experimental/dataflow/consistency/dataflow-consistency.expected

Lines changed: 0 additions & 27 deletions
This file was deleted.

python/ql/test/experimental/dataflow/consistency/dataflow-consistency.ql

Lines changed: 0 additions & 2 deletions
This file was deleted.

0 commit comments

Comments
 (0)
0