from blib2to3.pgen2 import token from ast import literal_eval from semmle.python import ast from blib2to3.pgen2.parse import ParseError import sys LOAD = ast.Load() STORE = ast.Store() PARAM = ast.Param() DEL = ast.Del() POSITIONAL = 1 KEYWORD = 2 class ParseTreeVisitor(object): '''Standard tree-walking visitor, using `node.name` rather than `type(node).__name__` ''' def visit(self, node, extra_arg=None): method = 'visit_' + node.name if extra_arg is None: return getattr(self, method)(node) else: return getattr(self, method)(node, extra_arg) class Convertor(ParseTreeVisitor): ''' Walk the conrete parse tree, returning an AST. The CPT is specified by blib2to3/Grammar.txt. The AST specified by semmle/python/master.py. Each `visit_X` method takes a `X` node in the CFG and produces some part of the AST, usually a single node. ''' def __init__(self, logger): self.logger = logger # To handle f-strings nested inside other f-strings, we must keep track of the stack of # surrounding prefixes while walking the tree. This is necessary because inside an f-string # like `f"hello{f'to{you}dear'}world"`, the string part containing "world" has (in terms of # the concrete parse tree) a prefix of `}`, which doesn't tell us how to interpret it (in # particular, we can't tell if it's a raw string or not). So instead we look at the top of # the prefix stack to figure out what the "current prefix" is. The nested f-string in the # example above demonstrates why we must do this as a stack -- we must restore the outer # `f"` prefix when we're done with the inner `f'`-prefix string. # # The stack manipulation itself takes place in the `visit_FSTRING_START` and # `visit_FSTRING_END` methods. The text wrangling takes place in the `parse_string` helper # function. self.outer_prefix_stack = [] def visit_file_input(self, node): body = [] for s in [self.visit(s) for s in node.children if s.name not in ("ENDMARKER", "NEWLINE")]: if isinstance(s, list): body.extend(s) else: body.append(s) result = ast.Module(body) set_location(result, node) return result def visit_import_from(self, node): level = 0 index = 1 module_start = node.children[index].start while is_token(node.children[index], "."): level += 1 index += 1 if is_token(node.children[index], "import"): module_end = node.children[index-1].end index += 1 module_name = None else: module_end = node.children[index].end module_name = self.visit(node.children[index]) index += 2 if is_token(node.children[index], "*"): module = ast.ImportExpr(level, module_name, False) set_location(module, module_start, module_end) result = ast.ImportFrom(module) set_location(result, node) return result if is_token(node.children[index], "("): import_as_names = node.children[index+1] else: import_as_names = node.children[index] aliases = [] for import_as_name in import_as_names.children[::2]: module = ast.ImportExpr(level, module_name, False) set_location(module, module_start, module_end) aliases.append(self._import_as_name(import_as_name, module)) result = ast.Import(aliases) set_location(result, node) return result #Helper for visit_import_from def _import_as_name(self, node, module): name = node.children[0].value if len(node.children) == 3: asname = node.children[2] else: asname = node.children[0] expr = ast.ImportMember(module, name) set_location(expr, node) rhs = make_name(asname.value, STORE, asname.start, asname.end) result = ast.alias(expr, rhs) set_location(result, node) return result def visit_small_stmt(self, node): return self.visit(node.children[0]) def visit_simple_stmt(self, node): return [self.visit(s) for s in node.children if s.name not in ("SEMI", "NEWLINE")] def visit_stmt(self, node): return self.visit(node.children[0]) def visit_compound_stmt(self, node): return self.visit(node.children[0]) def visit_pass_stmt(self, node): p = ast.Pass() set_location(p, node) return p def visit_classdef(self, node): if len(node.children) == 4: cls, name, colon, suite = node.children args, keywords = [], [] elif len(node.children) == 7: cls, name, _, args, _, colon, suite = node.children args, keywords = self.visit(args) else: assert len(node.children) == 6 cls, name, _, _, colon, suite = node.children args, keywords = [], [] start = cls.start end = colon.end suite = self.visit(suite) inner = ast.Class(name.value, suite) set_location(inner, start, end) cls_expr = ast.ClassExpr(name.value, [], args, keywords, inner) set_location(cls_expr, start, end) name_expr = make_name(name.value, STORE, name.start, name.end) result = ast.Assign(cls_expr, [name_expr]) set_location(result, start, end) return result def visit_arglist(self, node): all_args = self._visit_list(node.children[::2]) args = [ arg for kind, arg in all_args if kind is POSITIONAL ] keywords = [ arg for kind, arg in all_args if kind is KEYWORD ] return args, keywords def visit_argument(self, node): child = node.children[0] if is_token(child, "*"): kind, arg = POSITIONAL, ast.Starred(self.visit(node.children[1], LOAD), LOAD) elif is_token(child, "**"): kind, arg = KEYWORD, ast.DictUnpacking(self.visit(node.children[1], LOAD)) elif len(node.children) == 3 and is_token(node.children[1], "="): try: name = get_node_value(child) except Exception: #Not a legal name name = None self.logger.warning("Illegal name for keyword on line %s", child.start[0]) kind, arg = KEYWORD, ast.keyword(name, self.visit(node.children[2], LOAD)) else: arg = self.visit(child, LOAD) if len(node.children) == 1: return POSITIONAL, arg elif len(node.children) == 3 and is_token(node.children[1], ":="): return POSITIONAL, self.visit_namedexpr_test(node, LOAD) generators = self.visit(node.children[1]) kind, arg = POSITIONAL, ast.GeneratorExp(arg, generators) set_location(arg, node) rewrite_comp(arg) set_location(arg, node) return kind, arg def visit_namedexpr_test(self, node, ctx): if len(node.children) == 1: return self.visit(node.children[0], ctx) target = self.visit(node.children[0], STORE) value = self.visit(node.children[-1], LOAD) result = ast.AssignExpr(value, target) set_location(result, node) return result def visit_test(self, node, ctx): if len(node.children) == 1: return self.visit(node.children[0], ctx) else: if ctx is not LOAD: context_error(node) body = self.visit(node.children[0], ctx) test = self.visit(node.children[2], ctx) orelse = self.visit(node.children[4], ctx) ifexp = ast.IfExp(test, body, orelse) set_location(ifexp, node) return ifexp def visit_or_test(self, node, ctx): return self._boolop(node, ast.Or, ctx) def visit_and_test(self, node, ctx): return self._boolop(node, ast.And, ctx) def visit_not_test(self, node, ctx): if len(node.children) == 1: return self.visit(node.children[0], ctx) if ctx is not LOAD: context_error(node) result = ast.UnaryOp( ast.Not(), self.visit(node.children[1], ctx) ) set_location(result, node) return result # Helper for `or` and `and`. def _boolop(self, node, opcls, ctx): if len(node.children) == 1: return self.visit(node.children[0], ctx) values = [ self.visit(s, ctx) for s in node.children[::2] ] result = ast.BoolOp(opcls(), values) set_location(result, node) return result # Helper for various binary expression visitors. def _binary(self, node, opfact, ctx): if len(node.children) == 1: return self.visit(node.children[0], ctx) if ctx is not LOAD: context_error(node) children = iter(node.children) result = self.visit(next(children), LOAD) for op in children: item = next(children) rhs = self.visit(item, LOAD) result = ast.BinOp(result, opfact(op), rhs) set_location(result, node.start, item.end) return result def visit_suite(self, node): if len(node.children) == 1: return self.visit(node.children[0]) result = [] for s in [self.visit(s) for s in node.children[2:-1]]: if isinstance(s, list): result.extend(s) else: result.append(s) return result def visit_expr_stmt(self, node): if len(node.children) == 1: result = ast.Expr(self.visit(node.children[0], LOAD)) set_location(result, node) return result if len(node.children) > 1 and is_token(node.children[1], "="): return self._assign(node) if len(node.children) == 2: # Annotated assignment target = self.visit(node.children[0], STORE) ann = node.children[1] type_anno = self.visit(ann.children[1], LOAD) if len(ann.children) > 2: value = self.visit(ann.children[3], LOAD) else: value = None result = ast.AnnAssign(value, type_anno, target) else: #Augmented assignment lhs = self.visit(node.children[0], LOAD) op = self.visit(node.children[1]) rhs = self.visit(node.children[2], LOAD) expr = ast.BinOp(lhs, op, rhs) set_location(expr, node) result = ast.AugAssign(expr) set_location(result, node) return result def visit_augassign(self, node): return AUG_ASSIGN_OPS[node.children[0].value]() #Helper for visit_expr_stmt (for assignment) def _assign(self, node): targets = [ self.visit(t, STORE) for t in node.children[:-1:2]] result = ast.Assign(self.visit(node.children[-1], LOAD), targets) set_location(result, node) return result def visit_testlist(self, node, ctx): if len(node.children) == 1: return self.visit(node.children[0], ctx) elts = self._visit_list(node.children[::2], ctx) result = ast.Tuple(elts, ctx) set_location(result, node) return result visit_testlist_star_expr = visit_testlist def visit_comparison(self, node, ctx): if len(node.children) == 1: return self.visit(node.children[0], ctx) if ctx is not LOAD: context_error(node) left = self.visit(node.children[0], ctx) ops = [ self.visit(op) for op in node.children[1::2]] comps = [ self.visit(op, ctx) for op in node.children[2::2]] result = ast.Compare(left, ops, comps) set_location(result, node) return result def visit_comp_op(self, node): if len(node.children) == 1: return COMP_OP_CLASSES[node.children[0].value]() else: assert len(node.children) == 2 return ast.IsNot() if node.children[0].value == "is" else ast.NotIn() def visit_expr(self, node, ctx): return self._binary(node, lambda _: ast.BitOr(), ctx) def visit_xor_expr(self, node, ctx): return self._binary(node, lambda _: ast.BitXor(), ctx) def visit_and_expr(self, node, ctx): return self._binary(node, lambda _: ast.BitAnd(), ctx) def visit_shift_expr(self, node, ctx): return self._binary( node, lambda op: ast.LShift() if op.value == "<<" else ast.RShift(), ctx ) def visit_arith_expr(self, node, ctx): return self._binary( node, lambda op: ast.Add() if op.value == "+" else ast.Sub(), ctx ) def visit_term(self, node, ctx): return self._binary( node, lambda op: TERM_OP_CLASSES[op.value](), ctx ) def visit_factor(self, node, ctx): if len(node.children) == 1: return self.visit(node.children[0], ctx) result = ast.UnaryOp( FACTOR_OP_CLASSES[node.children[0].value](), self.visit(node.children[1], ctx) ) set_location(result, node) return result def visit_power(self, node, ctx): '''This part of the Grammar is formulated in a slightly awkward way, so we need to recursively handle the `await` prefix, then the `** factor` suffix, then the atom and trailers. ''' # Because `await` was a valid identifier in earlier versions of Python, # we cannot assume it indicates an `await` expression. We therefore # have to look at what follows in order to make a decision. The # relevant part of the grammar is # # power: ['await'] atom trailer* ['**' factor] # # The case we wish to identify is when 'await' appears, but as an # `atom`, and not an `await` token. # # Because `atom` nodes may no longer be present (see # `SKIP_IF_SINGLE_CHILD_NAMES` in `__init__.py`) we instead look at the # node following the (potentially) skipped `atom`. In particular, if # the following node is a `trailer` or "**" token, we know that the # given node cannot be an `await` token, and must be an `atom` instead. try: next_node = node.children[1] next_is_atom = next_node.name != "trailer" and not is_token(next_node, "**") except (IndexError, AttributeError): # IndexError if `node` has at most one child. # AttributeError if `next_node` is a `Leaf` instead of a `Node`. next_is_atom = False if is_token(node.children[0], "await") and next_is_atom: if ctx is not LOAD: context_error(node) pow = self._power(node.children[1:], ctx) result = ast.Await(pow) set_location(result, node) return result else: return self._power(node.children, ctx) #Helper for visit_power def _power(self, children, ctx): start = children[0].start if len(children) > 1 and is_token(children[-2], "**"): if ctx is not LOAD: context_error(children[0]) trailers = children[1:-2] pow_expr = self.visit(children[-1], ctx) else: trailers = children[1:] pow_expr = None if trailers: expr = self.visit(children[0], LOAD) for trailer in trailers[:-1]: expr = self._apply_trailer(expr, trailer, start, LOAD) expr = self._apply_trailer(expr, trailers[-1], start, ctx) else: expr = self.visit(children[0], ctx) if pow_expr: expr = ast.BinOp(expr, ast.Pow(), pow_expr) set_location(expr, children[0].start, children[-1].end) return expr #Helper for _power def _atom(self, children, ctx): start = children[0].start if len(children) == 1: return self.visit(children[0], ctx) atom = self.visit(children[0], LOAD) for trailer in children[1:-1]: atom = self._apply_trailer(atom, trailer, start, LOAD) atom = self._apply_trailer(atom, children[-1], start, ctx) return atom #Helper for _atom def _apply_trailer(self, atom, trailer, start, ctx): children = trailer.children left = children[0] if is_token(left, "("): if is_token(children[1], ")"): args, keywords = [], [] end = children[1].end else: args, keywords = self.visit(children[1]) end = children[2].end result = ast.Call(atom, args, keywords) elif is_token(left, "["): result = ast.Subscript(atom, self.visit(children[1], LOAD), ctx) end = children[2].end else: assert is_token(left, ".") result = ast.Attribute(atom, children[1].value, ctx) end = children[1].end set_location(result, start, end) return result def visit_atom(self, node, ctx): left = node.children[0] if left.value in "[({": n = node.children[1] if hasattr(n, "value") and n.value in "])}": if n.value == ")": result = ast.Tuple([], ctx) elif n.value == "]": result = ast.List([], ctx) else: result = ast.Dict([]) set_location(result, node) return result else: result = self.visit(node.children[1], ctx) if left.value == "(": result.parenthesised = True else: #Meaningful bracketing set_location(result, node) if isinstance(result, (ast.GeneratorExp, ast.ListComp, ast.SetComp, ast.DictComp)): rewrite_comp(result) return result if left.type == token.NAME: return make_name(left.value, ctx, left.start, left.end) if ctx is not LOAD: context_error(node) if left.type == token.NUMBER: val = get_numeric_value(left) result = ast.Num(val, left.value) set_location(result, left) return result if left.value == ".": assert len(node.children) == 3 and node.children[2].value == "." result = ast.Ellipsis() set_location(result, node) return result assert left.type == token.BACKQUOTE result = ast.Repr(self.visit(node.children[1], LOAD)) set_location(result, node) return result def visit_STRING(self, node, ctx): if ctx is not LOAD: context_error(node) outer_prefix = self.outer_prefix_stack[-1] if self.outer_prefix_stack else None prefix, s = parse_string(node.value, self.logger, outer_prefix) text = get_text(node.value, outer_prefix) result = ast.StringPart(prefix, text, s) set_location(result, node) return result def visit_NUMBER(self, node, ctx): if ctx is not LOAD: context_error(node) val = get_numeric_value(node) result = ast.Num(val, node.value) set_location(result, node) return result def visit_funcdef(self, node, is_async=False): # funcdef: 'def' NAME parameters ['->' test] ':' suite name = node.children[1].value if node.children[3].value == "->": return_type = self.visit(node.children[4], LOAD) end = node.children[5].end body = self.visit(node.children[6]) else: return_type = None end = node.children[3].end body = self.visit(node.children[4]) start = node.children[0].start params = node.children[2] if len(params.children) == 2: args, vararg, kwonlyargs, kwarg = [], None, [], None else: args, vararg, kwonlyargs, kwarg = self._get_parameters(params.children[1]) func = ast.Function(name, [], args, vararg, kwonlyargs, kwarg, body, is_async) set_location(func, start, end) if len(params.children) == 2: args = ast.arguments([], [], [], None, None, []) else: args = self._get_defaults_and_annotations(params.children[1]) funcexpr = ast.FunctionExpr(name, args, return_type, func) set_location(funcexpr, start, end) name_expr = make_name(name, STORE, node.children[1].start, node.children[1].end) result = ast.Assign(funcexpr, [name_expr]) set_location(result, start, end) return result #Helper for visit_funcdef and visit_lambdef def _get_parameters(self, node): '''Returns the quadruple: args, vararg, kwonlyargs, kwarg ''' args = [] vararg = None kwonlyargs = [] kwarg = None children = iter(node.children) arg = None for child in children: if is_token(child, "*"): try: child = next(children) except StopIteration: pass else: if not is_token(child, ","): vararg = self.visit(child, PARAM) break if is_token(child, ","): pass elif is_token(child, "/"): pass elif is_token(child, "="): next(children) elif is_token(child, "**"): child = next(children) kwarg = self.visit(child, PARAM) else: arg = self.visit(child, PARAM) args.append(arg) #kwonly args for child in children: if is_token(child, ","): pass elif is_token(child, "="): next(children) elif is_token(child, "**"): child = next(children) kwarg = self.visit(child, PARAM) else: arg = self.visit(child, PARAM) kwonlyargs.append(arg) return args, vararg, kwonlyargs, kwarg #Helper for visit_funcdef and visit_lambdef def _get_defaults_and_annotations(self, node): defaults = [] kw_defaults = [] annotations = [] varargannotation = None kwargannotation = None kw_annotations = [] children = iter(node.children) # Because we want the i'th element of `kw_defaults` to be the default value for # the i'th keyword-only argument, when encountering the combined token for the # argument name and optional annotation, we add a `None` to `kw_defaults` assuming # that there is no default value. If there turns out to be a default value, we # remove the `None` and add the real default value. Like-wise for `defaults`. # positional-only args and "normal" args for child in children: if is_token(child, "*"): try: child = next(children) except StopIteration: pass else: if not is_token(child, ","): varargannotation = self.visit(child, LOAD) break if is_token(child, ","): pass elif is_token(child, "/"): pass elif is_token(child, "="): child = next(children) defaults.pop() defaults.append(self.visit(child, LOAD)) elif is_token(child, "**"): child = next(children) kwargannotation = self.visit(child, LOAD) arg = None else: # Preemptively assume there is no default argument (indicated by None) defaults.append(None) annotations.append(self.visit(child, LOAD)) #kwonly args for child in children: if is_token(child, ","): pass elif is_token(child, "="): child = next(children) kw_defaults.pop() kw_defaults.append(self.visit(child, LOAD)) elif is_token(child, "**"): child = next(children) kwargannotation = self.visit(child, LOAD) else: # Preemptively assume there is no default argument (indicated by None) kw_defaults.append(None) kw_annotations.append(self.visit(child, LOAD)) result = ast.arguments(defaults, kw_defaults, annotations, varargannotation, kwargannotation, kw_annotations) set_location(result, node) return result def visit_tfpdef(self, node, ctx): # TO DO Support tuple parameters # No one uses them any more, so this isn't super important. child = node.children[0] if is_token(child, "("): return None return self.visit(child, ctx) def visit_tname(self, node, ctx): if ctx is PARAM: child = node.children[0] return make_name(child.value, ctx, child.start, child.end) elif len(node.children) > 1: return self.visit(node.children[2], ctx) else: return None def visit_decorated(self, node): asgn = self.visit(node.children[1]) value = asgn.value for deco in reversed(node.children[0].children): defn = value decorator = self.visit(deco) value = ast.Call(decorator, [defn], []) copy_location(decorator, value) asgn.value = value return asgn def visit_decorators(self, node): return self._visit_list(node.children) def visit_decorator(self, node): namedexpr_test = node.children[1] result = self.visit_namedexpr_test(namedexpr_test, LOAD) set_location(result, namedexpr_test) return result def _visit_list(self, items, ctx=None): if ctx is None: return [ self.visit(i) for i in items ] else: return [ self.visit(i, ctx) for i in items ] def visit_dotted_name(self, node): return ".".join(name.value for name in node.children[::2]) def visit_NAME(self, name, ctx): return make_name(name.value, ctx, name.start, name.end) def visit_listmaker(self, node, ctx): if len(node.children) == 1 or is_token(node.children[1], ","): items = [self.visit(c, ctx) for c in node.children[::2]] result = ast.List(items, ctx) else: if ctx is not LOAD: context_error(node) elt = self.visit(node.children[0], ctx) generators = self.visit(node.children[1]) result = ast.ListComp(elt, generators) set_location(result, node) return result def visit_testlist_gexp(self, node, ctx): if len(node.children) == 1: return self.visit(node.children[0], ctx) if is_token(node.children[1], ","): items = [self.visit(c, ctx) for c in node.children[::2]] result = ast.Tuple(items, ctx) else: if ctx is not LOAD: context_error(node) elt = self.visit(node.children[0], ctx) generators = self.visit(node.children[1]) result = ast.GeneratorExp(elt, generators) set_location(result, node) return result def visit_comp_for(self, node): is_async = is_token(node.children[0], "async") target = self.visit(node.children[1+is_async], STORE) iter = self.visit(node.children[3+is_async], LOAD) if len(node.children) == 5+is_async: ifs = [] end = iter._end comp_iter = self.visit(node.children[4+is_async]) while comp_iter and not isinstance(comp_iter[0], ast.comprehension): ifs.append(comp_iter[0]) end = comp_iter[0]._end comp_iter = comp_iter[1:] comp = ast.comprehension(target, iter, ifs) comp.is_async = is_async set_location(comp, node.children[0].start, end) return [comp] + comp_iter else: comp = ast.comprehension(target, iter, []) comp.is_async = is_async set_location(comp, node) return [comp] visit_old_comp_for = visit_comp_for def visit_comp_iter(self, node): return self.visit(node.children[0]) def visit_comp_if(self, node): cond = self.visit(node.children[1], LOAD) if len(node.children) == 3: comp_list = self.visit(node.children[2]) return [cond] + comp_list else: return [cond] visit_old_comp_if = visit_comp_if visit_old_comp_iter = visit_comp_iter def visit_exprlist(self, node, ctx): #Despite the name this returns a single expression if len(node.children) == 1: return self.visit(node.children[0], ctx) else: elts = self._visit_list(node.children[::2], ctx) result = ast.Tuple(elts, ctx) set_location(result, node) return result visit_testlist_safe = visit_exprlist def visit_old_test(self, node, ctx): return self.visit(node.children[0], ctx) def visit_if_stmt(self, node): endindex = len(node.children) if is_token(node.children[-3], "else"): orelse = self.visit(node.children[-1]) endindex -= 3 else: orelse = None while endindex: test = self.visit(node.children[endindex-3], LOAD) body = self.visit(node.children[endindex-1]) result = ast.If(test, body, orelse) start = node.children[endindex-4].start end = node.children[endindex-2].end set_location(result, start, end) orelse = [result] endindex -= 4 return result def visit_import_stmt(self, node): return self.visit(node.children[0]) def visit_import_name(self, node): aliases = self.visit(node.children[1]) result = ast.Import(aliases) set_location(result, node) return result def visit_dotted_as_names(self, node): return self._visit_list(node.children[::2]) def visit_dotted_as_name(self, node): child0 = node.children[0] dotted_name = self.visit(child0) if len(node.children) == 3: value = ast.ImportExpr(0, dotted_name, False) child2 = node.children[2] asname = make_name(child2.value, STORE, child2.start, child2.end) else: value = ast.ImportExpr(0, dotted_name, True) topname = dotted_name.split(".")[0] asname = make_name(topname, STORE, child0.start, child0.end) set_location(value, child0) result = ast.alias(value, asname) set_location(result, node) return result def visit_dictsetmaker(self, node, ctx): if ctx is not LOAD: context_error(node) if is_token(node.children[0], "**") or len(node.children) > 1 and is_token(node.children[1], ":"): return self._dictmaker(node) else: return self._setmaker(node) #Helper for visit_dictsetmaker (for dictionaries) def _dictmaker(self, node): if len(node.children) == 4 and is_token(node.children[1], ":") and not is_token(node.children[3], ","): #Comprehension form key = self.visit(node.children[0], LOAD) value = self.visit(node.children[2], LOAD) generators = self.visit(node.children[3]) result = ast.DictComp(key, value, generators) set_location(result, node) return result index = 0 items = [] while len(node.children) > index: if is_token(node.children[index], "**"): d = self.visit(node.children[index+1], LOAD) item = ast.DictUnpacking(d) set_location(item, node.children[index].start, node.children[index+1].end) index += 3 else: key = self.visit(node.children[index], LOAD) value = self.visit(node.children[index+2], LOAD) item = ast.KeyValuePair(key, value) set_location(item, node.children[index].start, node.children[index+2].end) index += 4 items.append(item) result = ast.Dict(items) set_location(result, node) return result #Helper for visit_dictsetmaker (for sets) def _setmaker(self, node): if len(node.children) == 2 and not is_token(node.children[1], ","): #Comprehension form elt = self.visit(node.children[0], LOAD) generators = self.visit(node.children[1]) result = ast.SetComp(elt, generators) set_location(result, node) return result items = self._visit_list(node.children[::2], LOAD) result = ast.Set(items) set_location(result, node) return result def visit_while_stmt(self, node): test = self.visit(node.children[1], LOAD) body = self.visit(node.children[3]) if len(node.children) == 7: orelse = self.visit(node.children[6]) else: orelse = None result = ast.While(test, body, orelse) set_location(result, node.children[0].start, node.children[2].end) return result def visit_flow_stmt(self, node): return self.visit(node.children[0]) def visit_break_stmt(self, node): result = ast.Break() set_location(result, node) return result def visit_continue_stmt(self, node): result = ast.Continue() set_location(result, node) return result def visit_return_stmt(self, node): if len(node.children) == 2: result = ast.Return(self.visit(node.children[1], LOAD)) else: result = ast.Return(None) set_location(result, node) return result def visit_raise_stmt(self, node): result = ast.Raise() set_location(result, node) if len(node.children) == 1: return result result.exc = self.visit(node.children[1], LOAD) if len(node.children) > 3: if is_token(node.children[2], "from"): result.cause = self.visit(node.children[3], LOAD) else: result.type = result.exc del result.exc result.inst = self.visit(node.children[3], LOAD) if len(node.children) == 6: result.tback = self.visit(node.children[5], LOAD) return result def visit_yield_stmt(self, node): result = ast.Expr(self.visit(node.children[0], LOAD)) set_location(result, node) return result def visit_yield_expr(self, node, ctx): if ctx is not LOAD: context_error(node) if len(node.children) == 1: result = ast.Yield(None) else: if is_token(node.children[1].children[0], "from"): result = ast.YieldFrom(self.visit(node.children[1].children[1], LOAD)) else: result = ast.Yield(self.visit(node.children[1].children[0], LOAD)) set_location(result, node) return result def visit_try_stmt(self, node): body = self.visit(node.children[2]) index = 3 handlers = [] while len(node.children) > index and not hasattr(node.children[index], "value"): #Except block. type, name = self.visit(node.children[index]) handler_body = self.visit(node.children[index+2]) handler = ast.ExceptStmt(type, name, handler_body) set_location(handler, node.children[index].start , node.children[index+1].end) handlers.append(handler) index += 3 if len(node.children) > index and is_token(node.children[index], "else"): orelse = self.visit(node.children[index+2]) else: orelse = [] if is_token(node.children[-3], "finally"): finalbody = self.visit(node.children[-1]) else: finalbody = [] result = ast.Try(body, orelse, handlers, finalbody) set_location(result, node.start, node.children[1].end) return result def visit_except_clause(self, node): type, name = None, None if len(node.children) > 1: type = self.visit(node.children[1], LOAD) if len(node.children) > 3: name = self.visit(node.children[3], STORE) return type, name def visit_del_stmt(self, node): if len(node.children) > 1: result = ast.Delete(self._visit_list(node.children[1].children[::2], DEL)) else: result = ast.Delete([]) set_location(result, node) return result visit_subscriptlist = visit_testlist visit_testlist1 = visit_testlist def visit_subscript(self, node, ctx): if len(node.children) == 1 and not is_token(node.children[0], ":"): return self.visit(node.children[0], ctx) values = [None, None, None] index = 0 for child in node.children: if is_token(child, ":"): index += 1 else: values[index] = self.visit(child, LOAD) result = ast.Slice(*values) set_location(result, node) return result def visit_sliceop(self, node, ctx): if ctx is not LOAD: context_error(node) if len(node.children) == 2: return self.visit(node.children[1], LOAD) else: return None def visit_assert_stmt(self, node): test = self.visit(node.children[1], LOAD) if len(node.children) > 2: msg = self.visit(node.children[3], LOAD) else: msg = None result = ast.Assert(test, msg) set_location(result, node) return result def visit_for_stmt(self, node, is_async=False): target = self.visit(node.children[1], STORE) iter = self.visit(node.children[3], LOAD) body = self.visit(node.children[5]) if len(node.children) == 9: orelse = self.visit(node.children[8]) else: orelse = None result = ast.For(target, iter, body, orelse) result.is_async = is_async set_location(result, node.children[0].start, node.children[4].end) return result def visit_global_stmt(self, node): cls = ast.Global if node.children[0].value == "global" else ast.Nonlocal names = [child.value for child in node.children[1::2]] result = cls(names) set_location(result, node) return result def visit_lambdef(self, node, ctx): if ctx is not LOAD: context_error(node) test = self.visit(node.children[-1], LOAD) stmt = ast.Return(test) set_location(stmt, node.children[-1]) if is_token(node.children[1], ":"): args, vararg, kwonlyargs, kwarg = [], None, [], None else: args, vararg, kwonlyargs, kwarg = self._get_parameters(node.children[1]) func = ast.Function("lambda", [], args, vararg, kwonlyargs, kwarg, [stmt], False) set_location(func, node) if is_token(node.children[1], ":"): args = ast.arguments([], [], [], None, None, []) else: args = self._get_defaults_and_annotations(node.children[1]) result = ast.Lambda(args, func) set_location(result, node) return result visit_old_lambdef = visit_lambdef visit_vfpdef = visit_tfpdef def visit_vname(self, node, ctx): if ctx is PARAM: child = node.children[0] return make_name(child.value, ctx, child.start, child.end) else: return None def visit_star_expr(self, node, ctx): result = ast.Starred(self.visit(node.children[1], ctx), ctx) set_location(result, node) return result def visit_with_stmt(self, node, is_async=False): body = self.visit(node.children[-1]) for item in node.children[-3:0:-2]: ctx_mngr, opt_vars = self.visit(item) withstmt = ast.With(ctx_mngr, opt_vars, body) set_location(withstmt, item) body = [withstmt] set_location(withstmt, node.children[0].start, node.children[-2].end) withstmt.is_async = is_async return withstmt def visit_with_item(self, node): ctx_mngr = self.visit(node.children[0], LOAD) if len(node.children) == 1: return ctx_mngr, None else: return ctx_mngr, self.visit(node.children[2], STORE) def visit_async_stmt(self, node): return self.visit(node.children[1], True) visit_async_funcdef = visit_async_stmt def visit_print_stmt(self, node): if len(node.children) > 1 and is_token(node.children[1], ">>"): dest = self.visit(node.children[2], LOAD) items = node.children[4::2] else: dest = None items = node.children[1::2] values = self._visit_list(items, LOAD) nl = not is_token(node.children[-1], ",") result = ast.Print(dest, values, nl) set_location(result, node) return result def visit_exec_stmt(self, node): body = self.visit(node.children[1], LOAD) globals, locals = None, None if len(node.children) > 3: globals = self.visit(node.children[3], LOAD) if len(node.children) > 5: locals = self.visit(node.children[5], LOAD) result = ast.Exec(body, globals, locals) set_location(result, node) return result def visit_special_operation(self, node, ctx): if ctx is not LOAD: context_error(node) name = node.children[0].value if len(node.children) == 3: args = [] else: args = self._visit_list(node.children[2].children[::2], LOAD) result = ast.SpecialOperation(name, args) set_location(result, node) return result def visit_string(self, node, ctx): def convert_parts_to_expr(): if not current_parts: return None if len(current_parts) == 1: string = ast.Str(current_parts[0].s, current_parts[0].prefix, None) else: # Our string parts may be any combination of byte and unicode # strings, as this is valid in Python 2. We therefore decode # the strings into unicode before concatenating. text = "".join(decode_str(p.s) for p in current_parts) string = ast.Str(text, current_parts[0].prefix, current_parts[:]) start = current_parts[0].lineno, current_parts[0].col_offset set_location(string, start, current_parts[-1]._end) current_parts[:] = [] return string if ctx is not LOAD: context_error(node) parts = [] for p in self._visit_list(node.children, LOAD): if isinstance(p, list): parts.extend(p) else: parts.append(p) current_parts = [] exprs = [] for part in parts: if part is None: #Conversion -- currently ignored. pass elif isinstance(part, ast.StringPart): current_parts.append(part) else: assert isinstance(part, ast.expr), part string = convert_parts_to_expr() if string: exprs.append(string) exprs.append(part) string = convert_parts_to_expr() if string: exprs.append(string) if len(exprs) == 1: return exprs[0] result = ast.JoinedStr(exprs) set_location(result, node) return result def visit_fstring_part(self, node, ctx): nodes_to_visit = [] for node in node.children: if node.name == 'format_specifier': # Flatten format_specifiers first nodes_to_visit += [ n for n in node.children if not n.name == 'FSTRING_SPEC' ] else: nodes_to_visit += [node] return self._visit_list(nodes_to_visit, ctx) def visit_format_specifier(self, node, ctx): # This will currently never be visited because of the above flattening assert ctx is LOAD #Currently ignored return None def visit_CONVERSION(self, node, ctx): return None def visit_COLON(self, node, ctx): return None def visit_EQUAL(self, node, ctx): return None def visit_FSTRING_START(self, node, ctx): string = self.visit_STRING(node, ctx) # Push the current prefix onto the prefix stack self.outer_prefix_stack.append(string.prefix) return string def visit_FSTRING_END(self, node, ctx): string = self.visit_STRING(node, ctx) # We're done with this f-string, so pop its prefix off the prefix stack self.outer_prefix_stack.pop() return string visit_FSTRING_MID = visit_STRING # In the following function, we decode to `latin-1` in order to preserve # the byte values present in the string. This is an undocumented feature of # this encoding. See also the `test_python_sanity.py` test file in `/tests`. def decode_str(s): if isinstance(s, bytes): return str(s, 'latin-1') else: return s def context_error(node): s = SyntaxError("Invalid context") s.lineno, s.offset = node.start raise s def is_token(node, text): '''Holds if `node` is a token (terminal) and its textual value is `text`''' return hasattr(node, "value") and node.value == text def get_node_value(node): '''Get the value from a NAME node, stripping redundant CPT nodes''' while hasattr(node, "children"): assert len(node.children) == 1 node = node.children[0] return node.value #Mapping from comparison operator strings to ast classes. COMP_OP_CLASSES = { "<": ast.Lt, "<=": ast.LtE, ">": ast.Gt, ">=": ast.GtE, "==": ast.Eq, "<>": ast.NotEq, "!=": ast.NotEq, "in": ast.In, "not in": ast.NotIn, "is": ast.Is, "is not": ast.IsNot, } #Mapping from multiplicative operator strings to ast classes. TERM_OP_CLASSES = { '*': ast.Mult, '/': ast.Div, '%': ast.Mod, '//': ast.FloorDiv, '@': ast.MatMult, } #Mapping from additive operator strings to ast classes. FACTOR_OP_CLASSES = { '+': ast.UAdd, '-': ast.USub, '~': ast.Invert, } #Mapping from assignment operator strings to ast classes. AUG_ASSIGN_OPS = { '+=': ast.Add, '-=': ast.Sub, '*=': ast.Mult, '/=': ast.Div, '%=': ast.Mod, '&=': ast.BitAnd, '|=': ast.BitOr, '^=': ast.BitXor, '<<=': ast.LShift, '>>=': ast.RShift, '**=': ast.Pow, '//=': ast.FloorDiv, '@=': ast.MatMult, } def make_name(name, ctx, start, end): '''Create a `Name` ast node''' variable = ast.Variable(name) node = ast.Name(variable, ctx) set_location(node, start, end) return node def set_location(astnode, cptnode_or_start, end=None): '''Set the location of `astnode` from either the CPT node or pair of locations. ''' if end is None: astnode.lineno, astnode.col_offset = cptnode_or_start.start astnode._end = cptnode_or_start.end else: astnode.lineno, astnode.col_offset = cptnode_or_start astnode._end = end def split_full_prefix(s): """Splits a prefix (or a string starting with a prefix) into prefix and quote parts.""" quote_start = 0 # First, locate the end of the prefix (and the start of the quotes) while s[quote_start] not in "'\"}": quote_start += 1 # Next, find the end of the quotes. This is either one character past `quote_start`, or three # (for triple-quoted strings). if s[quote_start:quote_start + 3] in ("'''",'"""'): prefix_end = quote_start + 3 else: prefix_end = quote_start + 1 return s[:quote_start], s[quote_start:prefix_end] def split_string(s, outer_prefix): """Splits a string into prefix, quotes, and content.""" s_prefix, s_quotes = split_full_prefix(s) quote_start = len(s_prefix) prefix_end = quote_start + len(s_quotes) # If the string starts with `}`, it is a non-inital string part of an f-string. In this case we # must use the prefix and quotes from the outer f-string. if s[0] == '}': prefix, quotes = split_full_prefix(outer_prefix) else: prefix, quotes = s_prefix, s_quotes # The string either ends with a `{` (if it comes before an interpolation inside an f-string) # or else it ends with the same quotes as it begins with. if s[-1] == "{": content = s[prefix_end:-1] else: content = s[prefix_end:-len(quotes)] return prefix.lower(), quotes, content def get_text(s, outer_prefix): """Returns a cleaned-up text version of the string, normalizing the quotes and removing any format string marker.""" prefix, quotes, content = split_string(s, outer_prefix) return prefix.strip("fF") + quotes + content + quotes def parse_string(s, logger, outer_prefix): '''Gets the prefix and escaped string text''' prefix, quotes, content = split_string(s, outer_prefix) saved_content = content try: ends_with_illegal_character = False # If the string ends with the same quote character as the outer quotes (and/or backslashes) # (e.g. the first string part of `f"""hello"{0}"""`), we must take care to not accidently create # the ending quotes at the wrong place. (`literal_eval` would be unhappy with `"""hello""""` # as an input.) To do this, we insert an extra space at the end (that we then must remember # to remove later on). if content.endswith(quotes[0]) or content.endswith('\\'): ends_with_illegal_character = True content = content + " " text = prefix.strip("fF") + quotes + content + quotes s = literal_eval(text) except Exception as ex: # Something has gone wrong, but we still have the original form - Should be OK. logger.warning("Unable to parse string %s: %s", text, ex) logger.traceback() ends_with_illegal_character = False s = saved_content if isinstance(s, bytes): try: s = s.decode(sys.getfilesystemencoding()) except UnicodeDecodeError: s = decode_str(s) if ends_with_illegal_character: s = s[:-1] return prefix + quotes, s ESCAPES = "" def get_numeric_value(node): '''Gets numeric value from a CPT leaf node.''' value = node.value value = value.replace("_", "") chars = set(value.lower()) try: if u'.' in chars or u'e' in chars or u'j' in chars: # Probable float or hex or imaginary return literal_eval(value) if len(value) > 1 and value[0] == u'0' and value[1] not in u'boxlBOXL': # Old-style octal value = u'0o' + value[1:] if value[-1] in u'lL': return literal_eval(value[:-1]) return literal_eval(value) except ValueError: raise ParseError("Not a valid numeric value", node.type, node.value, (node.start, node.end)) #This rewriting step is performed separately for two reasons. # 1. It is complicated # 2. In future, we may want to make the AST more like the syntax and less like the semantics. # Keeping step separate should make that a bit easier. def rewrite_comp(node): if hasattr(node, "function"): return gens = node.generators if hasattr(node, "elt"): elt = node.elt del node.elt else: elt = ast.Tuple([node.value, node.key], LOAD) elt.lineno = node.key.lineno elt.col_offset = node.key.col_offset elt._end = node.value._end del node.key del node.value y = ast.Yield(elt) copy_location(elt, y) stmt = ast.Expr(y) copy_location(elt, stmt) for gen in reversed(gens[1:]): for if_ in gen.ifs: stmt = ast.If(if_, [stmt], None) copy_location(if_, stmt) stmt = ast.For(gen.target, gen.iter, [stmt], None) if getattr(gen, "is_async", False): stmt.is_async = True copy_location(node, stmt) for if_ in gens[0].ifs: stmt = ast.If(if_, [stmt], None) copy_location(if_, stmt) p0 = ".0" pvar = ast.Variable(p0) arg = ast.Name(pvar, LOAD) copy_location(node, arg) stmt = ast.For(gens[0].target, arg, [stmt], None) if getattr(gens[0], "is_async", False): stmt.is_async = True copy_location(node, stmt) pvar = ast.Variable(p0) arg = ast.Name(pvar, PARAM) copy_location(node, arg) function = ast.Function(COMP_NAMES[type(node).__name__], [],[arg], None, None, None, [ stmt ]) copy_location(node, function) node.function = function node.iterable = gens[0].iter del node.generators COMP_NAMES = { 'GeneratorExp' : 'genexpr', 'DictComp' : 'dictcomp', 'ListComp' : 'listcomp', 'SetComp' : 'setcomp' } def copy_location(src, dest): '''Copy location from `src` to `dest`''' dest.lineno = src.lineno dest.col_offset = src.col_offset dest._end = src._end def convert(logger, cpt): '''Covert concrete parse tree as specified by blib2to3/Grammar.txt to the AST specified by semmle/python/master.py ''' return Convertor(logger).visit(cpt)