-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Expand file tree
/
Copy pathpruner.py
More file actions
473 lines (409 loc) · 14.6 KB
/
pruner.py
File metadata and controls
473 lines (409 loc) · 14.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
'''
Prune the flow-graph, eliminating edges with impossible constraints.
For example:
1. if x:
2. if x == 0:
3. pass
The edge from `x == 0` to pass (line 2 to line 3) is impossible as `x` cannot be zero to
reach line 2.
While code like the above is unlikely in source code, it is quite common after splitting.
'''
from semmle.python import ast
import cmath
from collections import defaultdict
from semmle.python.passes.ast_pass import ASTVisitor
import semmle.util as util
from semmle.python.ast import Lt, LtE, Eq, NotEq, Gt, GtE, Is, IsNot
__all__ = [ 'do_pruning' ]
INT_TYPES = int
# Classes representing constraint on branches, for pruning.
# For example, the constraint `x` allows pruning and edge with the constraint `x == 0`
# since if `x` is True it cannot be zero.
class Truthy(object):
'''A test of the form `x` or `not x`'''
def __init__(self, sense):
self.sense = sense
def invert(self):
return (VAR_IS_TRUE, VAR_IS_FALSE)[self.sense]
def contradicts(self, other):
'''Holds if self and other are contradictory.'''
if self.sense:
return other.constrainsVariableToBeFalse()
else:
return other.constrainsVariableToBeTrue()
def constrainsVariableToBeTrue(self):
'''Holds if this constrains the variable such that `bool(var) is True`'''
return self.sense
def constrainsVariableToBeFalse(self):
'''Holds if this constrains the variable such that `bool(var) is False`'''
return not self.sense
def __repr__(self):
return "True" if self.sense else "False"
class IsNone(object):
'''A test of the form `x is None` or `x is not None`'''
def __init__(self, sense):
self.sense = sense
def contradicts(self, other):
if self is VAR_IS_NONE:
return other is VAR_IS_NOT_NONE or other is VAR_IS_TRUE
else:
return other is VAR_IS_NONE
def invert(self):
return (VAR_IS_NONE, VAR_IS_NOT_NONE)[self.sense]
def constrainsVariableToBeTrue(self):
return False
def constrainsVariableToBeFalse(self):
return self is VAR_IS_NONE
def __repr__(self):
return "Is None" if self.sense else "Is Not None"
class ComparedToConst(object):
'''A test of the form `x == k`, `x < k`, etc.'''
def __init__(self, op, k):
#We can treat is/is not as ==/!= as we only
#compare with simple literals which are always interned.
if op is Is:
op = Eq
elif op is IsNot:
op = NotEq
self.op = op
self.k = k
def invert(self):
return ComparedToConst(INVERT_OP[self.op], self.k)
def constrainsVariableToBeTrue(self):
if self.op == Eq:
return self.k != 0
if self.op == NotEq:
return self.k == 0
if self.op == GtE:
return self.k > 0
if self.op == Gt:
return self.k >= 0
if self.op == LtE:
return self.k < 0
if self.op == Lt:
return self.k <= 0
return False
def constrainsVariableToBeFalse(self):
return self.op == Eq and self.k == 0
def contradicts(self, other):
if self.constrainsVariableToBeTrue() and other is VAR_IS_FALSE:
return True
if self.constrainsVariableToBeFalse() and other is VAR_IS_TRUE:
return True
if self.op == Eq and other is VAR_IS_NONE:
return True
if not isinstance(other, ComparedToConst):
return False
if self.op == Eq:
if other.op == NotEq:
return self.k == other.k
if other.op == Eq:
return self.k != other.k
if other.op == Lt:
return self.k >= other.k
if other.op == LtE:
return self.k > other.k
if other.op == Gt:
return self.k <= other.k
if other.op == GtE:
return self.k < other.k
return False
if self.op == Lt:
if other.op == Eq or other.op == Gt or other.op == GtE:
return self.k <= other.k
return False
if self.op == LtE:
if other.op == Eq or other.op == GtE:
return self.k < other.k
if other.op == Gt:
return self.k <= other.k
return False
if other.op in (NotEq, Gt, GtE):
return False
return other.contradicts(self)
def __repr__(self):
return "%s %d" % (OP_NAME[self.op], self.k)
INVERT_OP = {
Eq: NotEq,
NotEq: Eq,
Lt: GtE,
LtE: Gt,
Gt: LtE,
GtE: Lt
}
OP_NAME = {
Eq: "==",
NotEq: "!=",
Lt: "<",
LtE: "<=",
Gt: ">",
GtE: ">=",
}
VAR_IS_TRUE = Truthy(True)
VAR_IS_FALSE = Truthy(False)
VAR_IS_NONE = IsNone(True)
VAR_IS_NOT_NONE = IsNone(False)
NAME_CONSTS = {
"True" : VAR_IS_TRUE,
"False": VAR_IS_FALSE,
"None": VAR_IS_NONE,
}
class SkippedVisitor(ASTVisitor):
def __init__(self):
self.nodes = set()
def visit_Subscript(self, node):
if isinstance(node.value, ast.Name):
self.nodes.add(node.value)
def visit_Attribute(self, node):
if isinstance(node.value, ast.Name):
self.nodes.add(node.value)
class NotBooleanTestVisitor(ASTVisitor):
"""Visitor that checks if a test is not a boolean test."""
def __init__(self):
self.nodes = set()
def visit_MatchLiteralPattern(self, node):
# MatchLiteralPatterns _look_ like boolean tests in that they have both a true ("matched")
# and false ("didn't match") successor, but are not.
# Thus, without this check, we would interpret
#
# match x:
# case False:
# pass
#
# (and similarly for True) as if it was a boolean test. This would cause the true edge
# (leading to pass) to be pruned later on.
self.nodes.add(node.literal)
class NonlocalVisitor(ASTVisitor):
def __init__(self):
self.names = set()
def visit_Nonlocal(self, node):
for name in node.names:
self.names.add(name)
class GlobalVisitor(ASTVisitor):
def __init__(self):
self.names = set()
def visit_Global(self, node):
for name in node.names:
self.names.add(name)
class KeptVisitor(ASTVisitor):
def __init__(self):
self.nodes = set()
#Keep imports
def visit_alias(self, node):
bool_const = const_value(node.value)
if bool_const is None:
return
defn = node.asname
if hasattr(defn, 'variable'):
self.nodes.add(defn)
def skipped_variables(tree, graph, use_map):
'''Returns a collection of SsaVariables that
are skipped as possibly mutated.
Variables are skipped if their values may be mutated
in such a way that it might alter their boolean value.
This means that they have an attribute accessed, or are subscripted.
However, modules are always true, so are never skipped.
'''
variables = use_map.values()
skiplist = set()
v = SkippedVisitor()
v.visit(tree)
ast_skiplist = v.nodes
for node, var in use_map.items():
if node.node in ast_skiplist:
skiplist.add(var)
v = KeptVisitor()
v.visit(tree)
ast_keeplist = v.nodes
keeplist = set()
for var in variables:
defn = graph.get_ssa_definition(var)
if defn and defn.node in ast_keeplist:
keeplist.add(var)
return skiplist - keeplist
def get_branching_edges(tree, graph, use_map):
''''Returns an iterator of pred, succ, var, bool tuples
representing edges and the boolean value or None-ness that
the ssa variable holds on that edge.
'''
for pred, succ, ann in graph.edges():
if ann not in (util.TRUE_EDGE, util.FALSE_EDGE):
continue
#Handle 'not' expressions.
invert = ann == util.FALSE_EDGE
test = pred
while isinstance(test.node, ast.UnaryOp):
if not isinstance(test.node.op, ast.Not):
break
preds = graph.pred[test]
if len(preds) != 1:
break
test = preds[0]
invert = not invert
t = comparison_kind(graph, test)
if t is None:
continue
val, use = t
if invert:
val = val.invert()
if use in use_map:
yield pred, succ, use_map[use], val
def effective_constants_definitions(bool_const_defns, graph, branching_edges):
'''Returns a mapping of var -> list of (node, effective-constant)
representing the effective boolean constant definitions.
A constant definition is an assignment to a
SSA variable 'var' such that bool(var) is a constant
for all uses of that variable dominated by the definition.
A (SSA) variable is effectively constant if it assigned
a constant, or it is guarded by a test.
'''
consts = defaultdict(list)
for var in graph.ssa_variables():
defn = graph.get_ssa_definition(var)
if not defn or defn.node not in bool_const_defns:
continue
consts[var].append((defn, bool_const_defns[defn.node]))
for pred, succ, var, bval in branching_edges:
if len(graph.pred[succ]) != 1:
continue
consts[var].append((succ, bval))
return consts
def do_pruning(tree, graph):
v = BoolConstVisitor()
v.visit(tree)
not_boolean_test = NotBooleanTestVisitor()
not_boolean_test.visit(tree)
nonlocals = NonlocalVisitor()
nonlocals.visit(tree)
global_vars = GlobalVisitor()
global_vars.visit(tree)
bool_const_defns = v.const_defns
#Need to repeatedly do this until we reach a fixed point
while True:
use_map = {}
for node, var in graph.ssa_uses():
if isinstance(node.node, ast.Name):
use_map[node] = var
skiplist = skipped_variables(tree, graph, use_map)
edges = list(get_branching_edges(tree, graph, use_map))
consts = effective_constants_definitions(bool_const_defns, graph, edges)
dominated_by = {}
#Look for effectively constant definitions that dominate edges on
#which the relative variable has the inverse sense.
#Put edges to be removed in a set, as an edge could be removed for
#multiple reasons.
to_be_removed = set()
for pred, succ, var, bval in edges:
if var not in consts:
continue
if var in skiplist and bval in (VAR_IS_TRUE, VAR_IS_FALSE):
continue
if var.variable.id in nonlocals.names:
continue
if var.variable.id in global_vars.names:
continue
for defn, const_kind in consts[var]:
if not const_kind.contradicts(bval):
continue
if defn not in dominated_by:
dominated_by[defn] = graph.dominated_by(defn)
if pred in dominated_by[defn]:
to_be_removed.add((pred, succ))
#Delete simply dead edges (like `if False: ...` )
for pred, succ, ann in graph.edges():
if ann == util.TRUE_EDGE:
val = VAR_IS_TRUE
elif ann == util.FALSE_EDGE:
val = VAR_IS_FALSE
else:
continue
b = const_value(pred.node)
if b is None:
continue
if pred.node in not_boolean_test.nodes:
continue
if b.contradicts(val):
to_be_removed.add((pred, succ))
if not to_be_removed:
break
for pred, succ in to_be_removed:
graph.remove_edge(pred, succ)
graph.clear_computed()
class BoolConstVisitor(ASTVisitor):
'''Look for assignments of a boolean constant to a variable.
self.const_defns holds a mapping from the AST node for the definition
to the True/False value for the constant.'''
def __init__(self):
self.const_defns = {}
def visit_alias(self, node):
bool_const = const_value(node.value)
if bool_const is None:
return
defn = node.asname
if hasattr(defn, 'variable'):
self.const_defns[defn] = bool_const
def visit_Assign(self, node):
bool_const = const_value(node.value)
if bool_const is None:
return
for defn in node.targets:
if hasattr(defn, 'variable'):
self.const_defns[defn] = bool_const
def _comparison(test):
# Comparisons to None or ints
if isinstance(test, ast.Compare) and len(test.ops) == 1:
left = test.left
right = test.comparators[0]
if not hasattr(left, "variable"):
return None
if isinstance(right, ast.Name) and right.id == "None":
if isinstance(test.ops[0], ast.Is):
return VAR_IS_NONE
if isinstance(test.ops[0], ast.IsNot):
return VAR_IS_NOT_NONE
if isinstance(right, ast.Num) and isinstance(right.n, INT_TYPES):
return ComparedToConst(type(test.ops[0]), right.n)
return None
def comparison_kind(graph, test):
# Comparisons to None or ints
val = _comparison(test.node)
if val is None:
if hasattr(test.node, "variable"):
return VAR_IS_TRUE, test
return None
use_set = graph.pred[graph.pred[test][0]]
if len(use_set) != 1:
return None
use = use_set[0]
return val, use
def const_value(ast_node):
'''Returns the boolean value of a boolean or numeric constant AST node or None if not a constant.
NaN is not a constant.'''
if isinstance(ast_node, ast.Name):
if ast_node.id in ("True", "False", "None"):
return NAME_CONSTS[ast_node.id]
else:
return None
if isinstance(ast_node, ast.ImportExpr):
#Modules always evaluate True
return VAR_IS_TRUE
if isinstance(ast_node, ast.Num):
n = ast_node.n
elif isinstance(ast_node, ast.UnaryOp):
if isinstance(ast_node.op, ast.USub) and isinstance(ast_node.operand, ast.Num):
n = ast_node.operand.n
elif isinstance(ast_node.op, ast.Not):
not_value = const_value(ast_node.operand)
if not_value is None:
return None
return not_value.invert()
else:
return None
else:
return None
#Check for NaN, but be careful not to overflow
#Handle integers first as they may overflow cmath.isnan()
if not isinstance(n, INT_TYPES) and cmath.isnan(n):
return None
#Now have an int or a normal float or complex
return ComparedToConst(Eq, n)