blob: 16d1d8ba4c6d9eb3fcc654bb4864eb89e09e5ea9 [file] [log] [blame]
Greg Steinfd342bf2000-08-03 17:39:13 +00001#
2# Copyright (C) 1997-1998 Greg Stein. All Rights Reserved.
3#
4# This module is provided under a BSD-ish license. See
5# http://www.opensource.org/licenses/bsd-license.html
6# and replace OWNER, ORGANIZATION, and YEAR as appropriate.
7#
8#
9# Written by Greg Stein (gstein@lyra.org)
10# and Bill Tutt (rassilon@lima.mudlib.org)
11# February 1997.
12#
13# Support for ast.Node subclasses written and other revisions by
Jeremy Hylton8612f1c2000-08-04 16:54:54 +000014# Jeremy Hylton (jeremy@beopen.com)
Greg Steinfd342bf2000-08-03 17:39:13 +000015#
16
Jeremy Hyltonfa974a92000-03-06 18:50:48 +000017"""Parse tree transformation module.
18
19Transforms Python source code into an abstract syntax tree (AST)
20defined in the ast module.
21
22The simplest ways to invoke this module are via parse and parseFile.
23parse(buf) -> AST
24parseFile(path) -> AST
25"""
26
Jeremy Hyltonf968e852000-02-04 00:25:23 +000027#
28# The output tree has the following nodes:
29#
30# Source Python line #'s appear at the end of each of all of these nodes
31# If a line # doesn't apply, there will be a None instead.
32#
33# module: doc, node
34# stmt: [ node1, ..., nodeN ]
35# function: name, argnames, defaults, flags, doc, codeNode
36# lambda: argnames, defaults, flags, codeNode
37# classdef: name, bases, doc, codeNode
38# pass:
39# break:
40# continue:
41# for: assignNode, listNode, bodyNode, elseNode
42# while: testNode, bodyNode, elseNode
43# if: [ (testNode, suiteNode), ... ], elseNode
44# exec: expr1Node, expr2Node, expr3Node
45# from: modname, [ name1, ..., nameN ]
46# import: [ name1, ..., nameN ]
47# raise: expr1Node, expr2Node, expr3Node
48# tryfinally: trySuiteNode, finSuiteNode
49# tryexcept: trySuiteNode, [ (exprNode, assgnNode, suiteNode), ... ], elseNode
50# return: valueNode
51# const: value
52# print: [ node1, ..., nodeN ]
53# printnl: [ node1, ..., nodeN ]
54# discard: exprNode
55# assign: [ node1, ..., nodeN ], exprNode
56# ass_tuple: [ node1, ..., nodeN ]
57# ass_list: [ node1, ..., nodeN ]
58# ass_name: name, flags
59# ass_attr: exprNode, attrname, flags
60# list: [ node1, ..., nodeN ]
61# dict: [ (key1, val1), ..., (keyN, valN) ]
62# not: exprNode
63# compare: exprNode, [ (op, node), ..., (op, node) ]
64# name: name
65# global: [ name1, ..., nameN ]
66# backquote: node
67# getattr: exprNode, attrname
68# call_func: node, [ arg1, ..., argN ]
69# keyword: name, exprNode
70# subscript: exprNode, flags, [ sub1, ..., subN ]
71# ellipsis:
72# sliceobj: [ node1, ..., nodeN ]
73# slice: exprNode, flags, lowerNode, upperNode
74# assert: expr1, expr2
75#
76# Compiled as "binary" ops:
77# tuple: [ node1, ..., nodeN ]
78# or: [ node1, ..., nodeN ]
79# and: [ node1, ..., nodeN ]
80# bitor: [ node1, ..., nodeN ]
81# bitxor: [ node1, ..., nodeN ]
82# bitand: [ node1, ..., nodeN ]
83#
84# Operations easily evaluateable on constants:
85# <<: exprNode, shiftNode
86# >>: exprNode, shiftNode
87# +: leftNode, rightNode
88# -: leftNode, rightNode
89# *: leftNode, rightNode
90# /: leftNode, rightNode
91# %: leftNode, rightNode
92# power: leftNode, rightNode
93# unary+: node
94# unary-: node
95# invert: node
96#
97
Jeremy Hyltonf968e852000-02-04 00:25:23 +000098import ast
99import parser
100import symbol
101import token
102import string
103
104import pprint
105
106error = 'walker.error'
107
Jeremy Hylton9605c112000-02-08 18:57:32 +0000108from consts import CO_VARARGS, CO_VARKEYWORDS
109from consts import OP_ASSIGN, OP_DELETE, OP_APPLY
Jeremy Hyltonf968e852000-02-04 00:25:23 +0000110
Jeremy Hyltonfa974a92000-03-06 18:50:48 +0000111def parseFile(path):
112 f = open(path)
113 src = f.read()
114 f.close()
115 return parse(src)
116
117def parse(buf):
118 return Transformer().parsesuite(buf)
119
Jeremy Hyltonf968e852000-02-04 00:25:23 +0000120def asList(nodes):
121 l = []
122 for item in nodes:
123 if hasattr(item, "asList"):
124 l.append(item.asList())
125 else:
126 if type(item) is type( (None, None) ):
127 l.append(tuple(asList(item)))
128 elif type(item) is type( [] ):
129 l.append(asList(item))
130 else:
131 l.append(item)
132 return l
133
134def Node(*args):
135 kind = args[0]
136 if ast.nodes.has_key(kind):
137 try:
138 return apply(ast.nodes[kind], args[1:])
139 except TypeError:
140 print ast.nodes[kind], len(args), args
141 raise
142 else:
143 raise error, "Can't find appropriate Node type."
144 #return apply(ast.Node, args)
145
146class Transformer:
147 """Utility object for transforming Python parse trees.
148
149 Exposes the following methods:
150 tree = transform(ast_tree)
151 tree = parsesuite(text)
152 tree = parseexpr(text)
153 tree = parsefile(fileob | filename)
154 """
155
156 def __init__(self):
157 self._dispatch = { }
158 for value, name in symbol.sym_name.items():
159 if hasattr(self, name):
160 self._dispatch[value] = getattr(self, name)
161
162 def transform(self, tree):
163 """Transform an AST into a modified parse tree."""
164 if type(tree) != type(()) and type(tree) != type([]):
165 tree = parser.ast2tuple(tree,1)
166 return self.compile_node(tree)
167
168 def parsesuite(self, text):
169 """Return a modified parse tree for the given suite text."""
170 # Hack for handling non-native line endings on non-DOS like OSs.
171 text = string.replace(text, '\x0d', '')
172 return self.transform(parser.suite(text))
173
174 def parseexpr(self, text):
175 """Return a modified parse tree for the given expression text."""
176 return self.transform(parser.expr(text))
177
178 def parsefile(self, file):
179 """Return a modified parse tree for the contents of the given file."""
180 if type(file) == type(''):
181 file = open(file)
182 return self.parsesuite(file.read())
183
184 # --------------------------------------------------------------
185 #
186 # PRIVATE METHODS
187 #
188
189 def compile_node(self, node):
190 ### emit a line-number node?
191 n = node[0]
192 if n == symbol.single_input:
193 return self.single_input(node[1:])
194 if n == symbol.file_input:
195 return self.file_input(node[1:])
196 if n == symbol.eval_input:
197 return self.eval_input(node[1:])
198 if n == symbol.lambdef:
199 return self.lambdef(node[1:])
200 if n == symbol.funcdef:
201 return self.funcdef(node[1:])
202 if n == symbol.classdef:
203 return self.classdef(node[1:])
204
205 raise error, ('unexpected node type', n)
206
207 def single_input(self, node):
208 ### do we want to do anything about being "interactive" ?
209
210 # NEWLINE | simple_stmt | compound_stmt NEWLINE
211 n = node[0][0]
212 if n != token.NEWLINE:
213 return self.com_stmt(node[0])
214
215 return Node('pass')
216
217 def file_input(self, nodelist):
218 doc = self.get_docstring(nodelist, symbol.file_input)
219 stmts = [ ]
220 for node in nodelist:
221 if node[0] != token.ENDMARKER and node[0] != token.NEWLINE:
222 self.com_append_stmt(stmts, node)
223 return Node('module', doc, Node('stmt', stmts))
224
225 def eval_input(self, nodelist):
226 # from the built-in function input()
227 ### is this sufficient?
228 return self.com_node(nodelist[0])
229
230 def funcdef(self, nodelist):
231 # funcdef: 'def' NAME parameters ':' suite
232 # parameters: '(' [varargslist] ')'
233
234 lineno = nodelist[1][2]
235 name = nodelist[1][1]
236 args = nodelist[2][2]
237
238 if args[0] == symbol.varargslist:
239 names, defaults, flags = self.com_arglist(args[1:])
240 else:
241 names = defaults = ()
242 flags = 0
243 doc = self.get_docstring(nodelist[4])
244
245 # code for function
246 code = self.com_node(nodelist[4])
247
248 n = Node('function', name, names, defaults, flags, doc, code)
249 n.lineno = lineno
250 return n
251
252 def lambdef(self, nodelist):
253 # lambdef: 'lambda' [varargslist] ':' test
254 if nodelist[2][0] == symbol.varargslist:
255 names, defaults, flags = self.com_arglist(nodelist[2][1:])
256 else:
257 names = defaults = ()
258 flags = 0
259
260 # code for lambda
261 code = self.com_node(nodelist[-1])
262
263 n = Node('lambda', names, defaults, flags, code)
264 n.lineno = nodelist[1][2]
265 return n
266
267 def classdef(self, nodelist):
268 # classdef: 'class' NAME ['(' testlist ')'] ':' suite
269
270 name = nodelist[1][1]
271 doc = self.get_docstring(nodelist[-1])
272 if nodelist[2][0] == token.COLON:
273 bases = []
274 else:
275 bases = self.com_bases(nodelist[3])
276
277 # code for class
278 code = self.com_node(nodelist[-1])
279
Jeremy Hyltonfa974a92000-03-06 18:50:48 +0000280 n = Node('class', name, bases, doc, code)
Jeremy Hyltonf968e852000-02-04 00:25:23 +0000281 n.lineno = nodelist[1][2]
282 return n
283
284 def stmt(self, nodelist):
285 return self.com_stmt(nodelist[0])
286
287 small_stmt = stmt
288 flow_stmt = stmt
289 compound_stmt = stmt
290
291 def simple_stmt(self, nodelist):
292 # small_stmt (';' small_stmt)* [';'] NEWLINE
293 stmts = [ ]
294 for i in range(0, len(nodelist), 2):
295 self.com_append_stmt(stmts, nodelist[i])
296 return Node('stmt', stmts)
297
298 def parameters(self, nodelist):
299 raise error
300
301 def varargslist(self, nodelist):
302 raise error
303
304 def fpdef(self, nodelist):
305 raise error
306
307 def fplist(self, nodelist):
308 raise error
309
310 def dotted_name(self, nodelist):
311 raise error
312
313 def comp_op(self, nodelist):
314 raise error
315
316 def trailer(self, nodelist):
317 raise error
318
319 def sliceop(self, nodelist):
320 raise error
321
322 def argument(self, nodelist):
323 raise error
324
325 # --------------------------------------------------------------
326 #
327 # STATEMENT NODES (invoked by com_node())
328 #
329
330 def expr_stmt(self, nodelist):
331 # testlist ('=' testlist)*
332 exprNode = self.com_node(nodelist[-1])
333 if len(nodelist) == 1:
334 return Node('discard', exprNode)
335 nodes = [ ]
336 for i in range(0, len(nodelist) - 2, 2):
337 nodes.append(self.com_assign(nodelist[i], OP_ASSIGN))
338 n = Node('assign', nodes, exprNode)
339 n.lineno = nodelist[1][2]
340 return n
341
342 def print_stmt(self, nodelist):
343 # print: (test ',')* [test]
344 items = [ ]
345 for i in range(1, len(nodelist), 2):
346 items.append(self.com_node(nodelist[i]))
347 if nodelist[-1][0] == token.COMMA:
348 n = Node('print', items)
349 n.lineno = nodelist[0][2]
350 return n
351 n = Node('printnl', items)
352 n.lineno = nodelist[0][2]
353 return n
354
355 def del_stmt(self, nodelist):
356 return self.com_assign(nodelist[1], OP_DELETE)
357
358 def pass_stmt(self, nodelist):
359 # pass:
360 n = Node('pass')
361 n.lineno = nodelist[0][2]
362 return n
363
364 def break_stmt(self, nodelist):
365 # break:
366 n = Node('break')
367 n.lineno = nodelist[0][2]
368 return n
369
370 def continue_stmt(self, nodelist):
371 # continue
372 n = Node('continue')
373 n.lineno = nodelist[0][2]
374 return n
375
376 def return_stmt(self, nodelist):
377 # return: [testlist]
378 if len(nodelist) < 2:
379 n = Node('return', Node('const', None))
380 n.lineno = nodelist[0][2]
381 return n
382 n = Node('return', self.com_node(nodelist[1]))
383 n.lineno = nodelist[0][2]
384 return n
385
386 def raise_stmt(self, nodelist):
387 # raise: [test [',' test [',' test]]]
388 if len(nodelist) > 5:
389 expr3 = self.com_node(nodelist[5])
390 else:
391 expr3 = None
392 if len(nodelist) > 3:
393 expr2 = self.com_node(nodelist[3])
394 else:
395 expr2 = None
396 if len(nodelist) > 1:
397 expr1 = self.com_node(nodelist[1])
398 else:
399 expr1 = None
400 n = Node('raise', expr1, expr2, expr3)
401 n.lineno = nodelist[0][2]
402 return n
403
404 def import_stmt(self, nodelist):
405 # import: dotted_name (',' dotted_name)* |
406 # from: dotted_name 'import' ('*' | NAME (',' NAME)*)
407 names = [ ]
408 if nodelist[0][1][0] == 'f':
409 for i in range(3, len(nodelist), 2):
410 # note: nodelist[i] could be (token.STAR, '*') or (token.NAME, name)
411 names.append(nodelist[i][1])
412 n = Node('from', self.com_dotted_name(nodelist[1]), names)
413 n.lineno = nodelist[0][2]
414 return n
415
416 for i in range(1, len(nodelist), 2):
417 names.append(self.com_dotted_name(nodelist[i]))
418 n = Node('import', names)
419 n.lineno = nodelist[0][2]
420 return n
421
422 def global_stmt(self, nodelist):
423 # global: NAME (',' NAME)*
424 names = [ ]
425 for i in range(1, len(nodelist), 2):
426 names.append(nodelist[i][1])
427 n = Node('global', names)
428 n.lineno = nodelist[0][2]
429 return n
430
431 def exec_stmt(self, nodelist):
432 # exec_stmt: 'exec' expr ['in' expr [',' expr]]
433 expr1 = self.com_node(nodelist[1])
434 if len(nodelist) >= 4:
435 expr2 = self.com_node(nodelist[3])
436 if len(nodelist) >= 6:
437 expr3 = self.com_node(nodelist[5])
438 else:
439 expr3 = None
440 else:
441 expr2 = expr3 = None
442
443 n = Node('exec', expr1, expr2, expr3)
444 n.lineno = nodelist[0][2]
445 return n
446
447 def assert_stmt(self, nodelist):
448 # 'assert': test, [',' test]
449 expr1 = self.com_node(nodelist[1])
450 if (len(nodelist) == 4):
451 expr2 = self.com_node(nodelist[3])
452 else:
453 expr2 = Node('name', 'None')
454 n = Node('assert', expr1, expr2)
455 n.lineno = nodelist[0][2]
456 return n
457
458 def if_stmt(self, nodelist):
459 # if: test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
460 tests = [ ]
461 for i in range(0, len(nodelist) - 3, 4):
462 testNode = self.com_node(nodelist[i + 1])
463 suiteNode = self.com_node(nodelist[i + 3])
464 tests.append((testNode, suiteNode))
465
466 if len(nodelist) % 4 == 3:
467 elseNode = self.com_node(nodelist[-1])
468## elseNode.lineno = nodelist[-1][1][2]
469 else:
470 elseNode = None
471 n = Node('if', tests, elseNode)
472 n.lineno = nodelist[0][2]
473 return n
474
475 def while_stmt(self, nodelist):
476 # 'while' test ':' suite ['else' ':' suite]
477
478 testNode = self.com_node(nodelist[1])
479 bodyNode = self.com_node(nodelist[3])
480
481 if len(nodelist) > 4:
482 elseNode = self.com_node(nodelist[6])
483 else:
484 elseNode = None
485
486 n = Node('while', testNode, bodyNode, elseNode)
487 n.lineno = nodelist[0][2]
488 return n
489
490 def for_stmt(self, nodelist):
491 # 'for' exprlist 'in' exprlist ':' suite ['else' ':' suite]
492
493 assignNode = self.com_assign(nodelist[1], OP_ASSIGN)
494 listNode = self.com_node(nodelist[3])
495 bodyNode = self.com_node(nodelist[5])
496
497 if len(nodelist) > 8:
498 elseNode = self.com_node(nodelist[8])
499 else:
500 elseNode = None
501
502 n = Node('for', assignNode, listNode, bodyNode, elseNode)
503 n.lineno = nodelist[0][2]
504 return n
505
506 def try_stmt(self, nodelist):
507 # 'try' ':' suite (except_clause ':' suite)+ ['else' ':' suite]
508 # | 'try' ':' suite 'finally' ':' suite
509 if nodelist[3][0] != symbol.except_clause:
510 return self.com_try_finally(nodelist)
511
512 return self.com_try_except(nodelist)
513
514 def suite(self, nodelist):
515 # simple_stmt | NEWLINE INDENT NEWLINE* (stmt NEWLINE*)+ DEDENT
516 if len(nodelist) == 1:
517 return self.com_stmt(nodelist[0])
518
519 stmts = [ ]
520 for node in nodelist:
521 if node[0] == symbol.stmt:
522 self.com_append_stmt(stmts, node)
523 return Node('stmt', stmts)
524
525 # --------------------------------------------------------------
526 #
527 # EXPRESSION NODES (invoked by com_node())
528 #
529
530 def testlist(self, nodelist):
531 # testlist: expr (',' expr)* [',']
532 # exprlist: expr (',' expr)* [',']
533 return self.com_binary('tuple', nodelist)
534
535 exprlist = testlist
536
537 def test(self, nodelist):
538 # and_test ('or' and_test)* | lambdef
539 if len(nodelist) == 1 and nodelist[0][0] == symbol.lambdef:
540 return self.lambdef(nodelist[0])
541 return self.com_binary('or', nodelist)
542
543 def and_test(self, nodelist):
544 # not_test ('and' not_test)*
545 return self.com_binary('and', nodelist)
546
547 def not_test(self, nodelist):
548 # 'not' not_test | comparison
549 result = self.com_node(nodelist[-1])
550 if len(nodelist) == 2:
551 n = Node('not', result)
552 n.lineno = nodelist[0][2]
553 return n
554 return result
555
556 def comparison(self, nodelist):
557 # comparison: expr (comp_op expr)*
558 node = self.com_node(nodelist[0])
559 if len(nodelist) == 1:
560 return node
561
562 results = [ ]
563 for i in range(2, len(nodelist), 2):
564 nl = nodelist[i-1]
565
566 # comp_op: '<' | '>' | '=' | '>=' | '<=' | '<>' | '!=' | '=='
567 # | 'in' | 'not' 'in' | 'is' | 'is' 'not'
568 n = nl[1]
569 if n[0] == token.NAME:
570 type = n[1]
571 if len(nl) == 3:
572 if type == 'not':
Jeremy Hylton18519012000-02-08 19:58:33 +0000573 type = 'not in'
Jeremy Hyltonf968e852000-02-04 00:25:23 +0000574 else:
Jeremy Hylton18519012000-02-08 19:58:33 +0000575 type = 'is not'
Jeremy Hyltonf968e852000-02-04 00:25:23 +0000576 else:
577 type = _cmp_types[n[0]]
578
579 lineno = nl[1][2]
Jeremy Hyltonb631b8e2000-03-16 20:03:04 +0000580 results.append((type, self.com_node(nodelist[i])))
Jeremy Hyltonf968e852000-02-04 00:25:23 +0000581
582 # we need a special "compare" node so that we can distinguish
583 # 3 < x < 5 from (3 < x) < 5
584 # the two have very different semantics and results (note that the
585 # latter form is always true)
586
587 n = Node('compare', node, results)
588 n.lineno = lineno
589 return n
590
591 def expr(self, nodelist):
592 # xor_expr ('|' xor_expr)*
593 return self.com_binary('bitor', nodelist)
594
595 def xor_expr(self, nodelist):
596 # xor_expr ('^' xor_expr)*
597 return self.com_binary('bitxor', nodelist)
598
599 def and_expr(self, nodelist):
600 # xor_expr ('&' xor_expr)*
601 return self.com_binary('bitand', nodelist)
602
603 def shift_expr(self, nodelist):
604 # shift_expr ('<<'|'>>' shift_expr)*
605 node = self.com_node(nodelist[0])
606 for i in range(2, len(nodelist), 2):
607 right = self.com_node(nodelist[i])
608 if nodelist[i-1][0] == token.LEFTSHIFT:
609 node = Node('<<', [node, right])
610 node.lineno = nodelist[1][2]
611 else:
612 node = Node('>>', [node, right])
613 node.lineno = nodelist[1][2]
614 return node
615
616 def arith_expr(self, nodelist):
617 node = self.com_node(nodelist[0])
618 for i in range(2, len(nodelist), 2):
619 right = self.com_node(nodelist[i])
620 if nodelist[i-1][0] == token.PLUS:
621 node = Node('+', [node, right])
622 node.lineno = nodelist[1][2]
623 else:
624 node = Node('-', [node, right])
625 node.lineno = nodelist[1][2]
626 return node
627
628 def term(self, nodelist):
629 node = self.com_node(nodelist[0])
630 for i in range(2, len(nodelist), 2):
631 right = self.com_node(nodelist[i])
632 if nodelist[i-1][0] == token.STAR:
633 node = Node('*', [node, right])
634 node.lineno = nodelist[1][2]
635 elif nodelist[i-1][0] == token.SLASH:
636 node = Node('/', [node, right])
637 node.lineno = nodelist[1][2]
638 else:
639 node = Node('%', [node, right])
640 node.lineno = nodelist[1][2]
641 return node
642
643 def factor(self, nodelist):
644 t = nodelist[0][0]
645 node = self.com_node(nodelist[-1])
646 if t == token.PLUS:
647 node = Node('unary+', node)
648 node.lineno = nodelist[0][2]
649 elif t == token.MINUS:
650 node = Node('unary-', node)
651 node.lineno = nodelist[0][2]
652 elif t == token.TILDE:
653 node = Node('invert', node)
654 node.lineno = nodelist[0][2]
655 return node
656
657 def power(self, nodelist):
658 # power: atom trailer* ('**' factor)*
659 node = self.com_node(nodelist[0])
660 for i in range(1, len(nodelist)):
661 if nodelist[i][0] == token.DOUBLESTAR:
662 n = Node('power', [node, self.com_node(nodelist[i+1])])
663 n.lineno = nodelist[i][2]
664 return n
665
666 node = self.com_apply_trailer(node, nodelist[i])
667
668 return node
669
670 def atom(self, nodelist):
671 t = nodelist[0][0]
672 if t == token.LPAR:
673 if nodelist[1][0] == token.RPAR:
Jeremy Hylton42907792000-02-14 18:32:46 +0000674 n = Node('tuple', ())
Jeremy Hyltonf968e852000-02-04 00:25:23 +0000675 n.lineno = nodelist[0][2]
676 return n
677 return self.com_node(nodelist[1])
678
679 if t == token.LSQB:
680 if nodelist[1][0] == token.RSQB:
Jeremy Hylton42907792000-02-14 18:32:46 +0000681 n = Node('list', ())
Jeremy Hyltonf968e852000-02-04 00:25:23 +0000682 n.lineno = nodelist[0][2]
683 return n
684 return self.com_list_constructor(nodelist[1])
685
686 if t == token.LBRACE:
687 if nodelist[1][0] == token.RBRACE:
Jeremy Hylton42907792000-02-14 18:32:46 +0000688 return Node('dict', ())
Jeremy Hyltonf968e852000-02-04 00:25:23 +0000689 return self.com_dictmaker(nodelist[1])
690
691 if t == token.BACKQUOTE:
692 n = Node('backquote', self.com_node(nodelist[1]))
693 n.lineno = nodelist[0][2]
694 return n
695
696 if t == token.NUMBER:
697 ### need to verify this matches compile.c
698 k = eval(nodelist[0][1])
699 n = Node('const', k)
700 n.lineno = nodelist[0][2]
701 return n
702
703 if t == token.STRING:
704 ### need to verify this matches compile.c
705 k = ''
706 for node in nodelist:
707 k = k + eval(node[1])
708 n = Node('const', k)
709 n.lineno = nodelist[0][2]
710 return n
711
712 if t == token.NAME:
713 ### any processing to do?
714 n = Node('name', nodelist[0][1])
715 n.lineno = nodelist[0][2]
716 return n
717
718 raise error, "unknown node type"
719
720 # --------------------------------------------------------------
721 #
722 # INTERNAL PARSING UTILITIES
723 #
724
725 def com_node(self, node):
726 # Note: compile.c has handling in com_node for del_stmt, pass_stmt,
727 # break_stmt, stmt, small_stmt, flow_stmt, simple_stmt,
728 # and compound_stmt.
729 # We'll just dispatch them.
730
731 #
732 # A ';' at the end of a line can make a NEWLINE token appear here,
733 # Render it harmless. (genc discards ('discard', ('const', xxxx)) Nodes)
734 #
735 if node[0] == token.NEWLINE:
736 return Node('discard', Node('const', None))
737
738 if node[0] not in _legal_node_types:
739 raise error, 'illegal node passed to com_node: %s' % node[0]
740
741 return self._dispatch[node[0]](node[1:])
742
743 def com_arglist(self, nodelist):
744 # varargslist:
745 # (fpdef ['=' test] ',')* ('*' NAME [',' ('**'|'*' '*') NAME]
746 # | fpdef ['=' test] (',' fpdef ['=' test])* [',']
747 # | ('**'|'*' '*') NAME)
748 # fpdef: NAME | '(' fplist ')'
749 # fplist: fpdef (',' fpdef)* [',']
750 names = [ ]
751 defaults = [ ]
752 flags = 0
753
754 i = 0
755 while i < len(nodelist):
756 node = nodelist[i]
757 if node[0] == token.STAR or node[0] == token.DOUBLESTAR:
758 if node[0] == token.STAR:
759 node = nodelist[i+1]
760 if node[0] == token.NAME:
761 names.append(node[1])
762 flags = flags | CO_VARARGS
763 i = i + 3
764
765 if i < len(nodelist):
766 # should be DOUBLESTAR or STAR STAR
767 if nodelist[i][0] == token.DOUBLESTAR:
768 node = nodelist[i+1]
769 else:
770 node = nodelist[i+2]
771 names.append(node[1])
772 flags = flags | CO_VARKEYWORDS
773
774 break
775
776 # fpdef: NAME | '(' fplist ')'
777 names.append(self.com_fpdef(node))
778
779 i = i + 1
780 if i >= len(nodelist):
781 break
782
783 if nodelist[i][0] == token.EQUAL:
784 defaults.append(self.com_node(nodelist[i + 1]))
785 i = i + 2
786 elif len(defaults):
787 # Treat "(a=1, b)" as "(a=1, b=None)"
788 defaults.append(Node('const', None))
789
790 i = i + 1
791
792 return names, defaults, flags
793
794 def com_fpdef(self, node):
795 # fpdef: NAME | '(' fplist ')'
796 if node[1][0] == token.LPAR:
797 return self.com_fplist(node[2])
798 return node[1][1]
799
800 def com_fplist(self, node):
801 # fplist: fpdef (',' fpdef)* [',']
802 if len(node) == 2:
803 return self.com_fpdef(node[1])
804 list = [ ]
805 for i in range(1, len(node), 2):
806 list.append(self.com_fpdef(node[i]))
807 return tuple(list)
808
809 def com_dotted_name(self, node):
810 # String together the dotted names and return the string
811 name = ""
812 for n in node:
813 if type(n) == type(()) and n[0] == 1:
814 name = name + n[1] + '.'
815 return name[:-1]
816
817 def com_bases(self, node):
818 bases = [ ]
819 for i in range(1, len(node), 2):
820 bases.append(self.com_node(node[i]))
821 return bases
822
823 def com_try_finally(self, nodelist):
824 # try_fin_stmt: "try" ":" suite "finally" ":" suite
825 n = Node('tryfinally', self.com_node(nodelist[2]), self.com_node(nodelist[5]))
826 n.lineno = nodelist[0][2]
827 return n
828
829 def com_try_except(self, nodelist):
830 # try_except: 'try' ':' suite (except_clause ':' suite)* ['else' suite]
831 #tryexcept: [TryNode, [except_clauses], elseNode)]
832 stmt = self.com_node(nodelist[2])
833 clauses = []
834 elseNode = None
835 for i in range(3, len(nodelist), 3):
836 node = nodelist[i]
837 if node[0] == symbol.except_clause:
838 # except_clause: 'except' [expr [',' expr]] */
839 if len(node) > 2:
840 expr1 = self.com_node(node[2])
841 if len(node) > 4:
842 expr2 = self.com_assign(node[4], OP_ASSIGN)
843 else:
844 expr2 = None
845 else:
846 expr1 = expr2 = None
Jeremy Hyltonb631b8e2000-03-16 20:03:04 +0000847 clauses.append((expr1, expr2, self.com_node(nodelist[i+2])))
Jeremy Hyltonf968e852000-02-04 00:25:23 +0000848
849 if node[0] == token.NAME:
850 elseNode = self.com_node(nodelist[i+2])
851 n = Node('tryexcept', self.com_node(nodelist[2]), clauses, elseNode)
852 n.lineno = nodelist[0][2]
853 return n
854
855 def com_assign(self, node, assigning):
856 # return a node suitable for use as an "lvalue"
857 # loop to avoid trivial recursion
858 while 1:
859 t = node[0]
860 if t == symbol.exprlist or t == symbol.testlist:
861 if len(node) > 2:
862 return self.com_assign_tuple(node, assigning)
863 node = node[1]
864 elif t in _assign_types:
865 if len(node) > 2:
866 raise SyntaxError, "can't assign to operator"
867 node = node[1]
868 elif t == symbol.power:
869 if node[1][0] != symbol.atom:
870 raise SyntaxError, "can't assign to operator"
871 if len(node) > 2:
872 primary = self.com_node(node[1])
873 for i in range(2, len(node)-1):
874 ch = node[i]
875 if ch[0] == token.DOUBLESTAR:
876 raise SyntaxError, "can't assign to operator"
877 primary = self.com_apply_trailer(primary, ch)
878 return self.com_assign_trailer(primary, node[-1], assigning)
879 node = node[1]
880 elif t == symbol.atom:
881 t = node[1][0]
882 if t == token.LPAR:
883 node = node[2]
884 if node[0] == token.RPAR:
885 raise SyntaxError, "can't assign to ()"
886 elif t == token.LSQB:
887 node = node[2]
888 if node[0] == token.RSQB:
889 raise SyntaxError, "can't assign to []"
890 return self.com_assign_list(node, assigning)
891 elif t == token.NAME:
892 return self.com_assign_name(node[1], assigning)
893 else:
894 raise SyntaxError, "can't assign to literal"
895 else:
896 raise SyntaxError, "bad assignment"
897
898 def com_assign_tuple(self, node, assigning):
899 assigns = [ ]
900 for i in range(1, len(node), 2):
901 assigns.append(self.com_assign(node[i], assigning))
902 return Node('ass_tuple', assigns)
903
904 def com_assign_list(self, node, assigning):
905 assigns = [ ]
906 for i in range(1, len(node), 2):
907 assigns.append(self.com_assign(node[i], assigning))
908 return Node('ass_list', assigns)
909
910 def com_assign_name(self, node, assigning):
Jeremy Hyltond603dee2000-02-15 21:30:48 +0000911 n = Node('ass_name', node[1], assigning)
912 n.lineno = node[2]
913 return n
Jeremy Hyltonf968e852000-02-04 00:25:23 +0000914
915 def com_assign_trailer(self, primary, node, assigning):
916 t = node[1][0]
917 if t == token.LPAR:
918 raise SyntaxError, "can't assign to function call"
919 if t == token.DOT:
920 return self.com_assign_attr(primary, node[2], assigning)
921 if t == token.LSQB:
922 return self.com_subscriptlist(primary, node[2], assigning)
923 raise SyntaxError, "unknown trailer type: %s" % t
924
925 def com_assign_attr(self, primary, node, assigning):
926 return Node('ass_attr', primary, node[1], assigning)
927
928 def com_binary(self, type, nodelist):
929 "Compile 'NODE (OP NODE)*' into (type, [ node1, ..., nodeN ])."
930 if len(nodelist) == 1:
931 return self.com_node(nodelist[0])
932 items = [ ]
933 for i in range(0, len(nodelist), 2):
934 items.append(self.com_node(nodelist[i]))
935 return Node(type, items)
936
937 def com_stmt(self, node):
938 #pprint.pprint(node)
939 result = self.com_node(node)
940 try:
941 result[0]
942 except:
943 print node[0]
944 if result[0] == 'stmt':
945 return result
946 return Node('stmt', [ result ])
947
948 def com_append_stmt(self, stmts, node):
949 result = self.com_node(node)
950 try:
951 result[0]
952 except:
953 print node
954 if result[0] == 'stmt':
955 stmts[len(stmts):] = result[1]
956 else:
957 stmts.append(result)
958
959 def com_list_constructor(self, nodelist):
960 values = [ ]
961 for i in range(1, len(nodelist), 2):
962 values.append(self.com_node(nodelist[i]))
963 return Node('list', values)
964
965 def com_dictmaker(self, nodelist):
966 # dictmaker: test ':' test (',' test ':' value)* [',']
967 items = [ ]
968 for i in range(1, len(nodelist), 4):
Jeremy Hyltonb631b8e2000-03-16 20:03:04 +0000969 items.append((self.com_node(nodelist[i]), self.com_node(nodelist[i+2])))
Jeremy Hyltonf968e852000-02-04 00:25:23 +0000970 return Node('dict', items)
971
972 def com_apply_trailer(self, primaryNode, nodelist):
973 t = nodelist[1][0]
974 if t == token.LPAR:
975 return self.com_call_function(primaryNode, nodelist[2])
976 if t == token.DOT:
977 return self.com_select_member(primaryNode, nodelist[2])
978 if t == token.LSQB:
979 return self.com_subscriptlist(primaryNode, nodelist[2], OP_APPLY)
980
981 raise SyntaxError, 'unknown node type: %s' % t
982
983 def com_select_member(self, primaryNode, nodelist):
984 if nodelist[0] != token.NAME:
985 raise SyntaxError, "member must be a name"
986 n = Node('getattr', primaryNode, nodelist[1])
987 n.lineno = nodelist[2]
988 return n
989
990 def com_call_function(self, primaryNode, nodelist):
991 if nodelist[0] == token.RPAR:
992 return Node('call_func', primaryNode, [ ])
993 args = [ ]
994 kw = 0
Jeremy Hyltonbe317e62000-05-02 22:32:59 +0000995 len_nodelist = len(nodelist)
996 for i in range(1, len_nodelist, 2):
997 node = nodelist[i]
998 if node[0] == token.STAR or node[0] == token.DOUBLESTAR:
999 break
1000 kw, result = self.com_argument(node, kw)
Jeremy Hyltonf968e852000-02-04 00:25:23 +00001001 args.append(result)
Jeremy Hyltonbe317e62000-05-02 22:32:59 +00001002 else:
1003 i = i + 1 # No broken by star arg, so skip the last one we processed.
Jeremy Hylton8612f1c2000-08-04 16:54:54 +00001004 if i < len_nodelist and nodelist[i][0] == token.COMMA:
1005 # need to accept an application that looks like "f(a, b,)"
1006 i = i + 1
Jeremy Hyltonbe317e62000-05-02 22:32:59 +00001007 star_node = dstar_node = None
1008 while i < len_nodelist:
1009 tok = nodelist[i]
1010 ch = nodelist[i+1]
1011 i = i + 3
1012 if tok[0]==token.STAR:
1013 if star_node is not None:
1014 raise SyntaxError, 'already have the varargs indentifier'
1015 star_node = self.com_node(ch)
1016 elif tok[0]==token.DOUBLESTAR:
1017 if dstar_node is not None:
1018 raise SyntaxError, 'already have the kwargs indentifier'
1019 dstar_node = self.com_node(ch)
1020 else:
1021 raise SyntaxError, 'unknown node type: %s' % tok
1022
1023 return Node('call_func', primaryNode, args, star_node, dstar_node)
Jeremy Hyltonf968e852000-02-04 00:25:23 +00001024
1025 def com_argument(self, nodelist, kw):
1026 if len(nodelist) == 2:
1027 if kw:
1028 raise SyntaxError, "non-keyword arg after keyword arg"
1029 return 0, self.com_node(nodelist[1])
1030 result = self.com_node(nodelist[3])
1031 n = nodelist[1]
1032 while len(n) == 2 and n[0] != token.NAME:
1033 n = n[1]
1034 if n[0] != token.NAME:
1035 raise SyntaxError, "keyword can't be an expression (%s)"%n[0]
Jeremy Hylton3d9f5e42000-02-16 00:51:37 +00001036 node = Node('keyword', n[1], result)
1037 node.lineno = n[2]
1038 return 1, node
Jeremy Hyltonf968e852000-02-04 00:25:23 +00001039
1040 def com_subscriptlist(self, primary, nodelist, assigning):
1041 # slicing: simple_slicing | extended_slicing
1042 # simple_slicing: primary "[" short_slice "]"
1043 # extended_slicing: primary "[" slice_list "]"
1044 # slice_list: slice_item ("," slice_item)* [","]
1045
1046 # backwards compat slice for '[i:j]'
1047 if len(nodelist) == 2:
1048 sub = nodelist[1]
1049 if (sub[1][0] == token.COLON or \
1050 (len(sub) > 2 and sub[2][0] == token.COLON)) and \
1051 sub[-1][0] != symbol.sliceop:
1052 return self.com_slice(primary, sub, assigning)
1053
1054 subscripts = [ ]
1055 for i in range(1, len(nodelist), 2):
1056 subscripts.append(self.com_subscript(nodelist[i]))
1057
1058 return Node('subscript', primary, assigning, subscripts)
1059
1060 def com_subscript(self, node):
1061 # slice_item: expression | proper_slice | ellipsis
1062 ch = node[1]
1063 if ch[0] == token.DOT and node[2][0] == token.DOT:
Jeremy Hylton1ebba962000-02-15 23:43:19 +00001064 return Node('ellipsis')
Jeremy Hyltonf968e852000-02-04 00:25:23 +00001065 if ch[0] == token.COLON or len(node) > 2:
1066 return self.com_sliceobj(node)
1067 return self.com_node(ch)
1068
1069 def com_sliceobj(self, node):
1070 # proper_slice: short_slice | long_slice
1071 # short_slice: [lower_bound] ":" [upper_bound]
1072 # long_slice: short_slice ":" [stride]
1073 # lower_bound: expression
1074 # upper_bound: expression
1075 # stride: expression
1076 #
1077 # Note: a stride may be further slicing...
1078
1079 items = [ ]
1080
1081 if node[1][0] == token.COLON:
1082 items.append(Node('const', None))
1083 i = 2
1084 else:
1085 items.append(self.com_node(node[1]))
1086 # i == 2 is a COLON
1087 i = 3
1088
1089 if i < len(node) and node[i][0] == symbol.test:
1090 items.append(self.com_node(node[i]))
1091 i = i + 1
1092 else:
1093 items.append(Node('const', None))
1094
1095 # a short_slice has been built. look for long_slice now by looking
1096 # for strides...
1097 for j in range(i, len(node)):
1098 ch = node[j]
1099 if len(ch) == 2:
1100 items.append(Node('const', None))
1101 else:
1102 items.append(self.com_node(ch[2]))
1103
1104 return Node('sliceobj', items)
1105
1106 def com_slice(self, primary, node, assigning):
1107 # short_slice: [lower_bound] ":" [upper_bound]
1108 lower = upper = None
1109 if len(node) == 3:
1110 if node[1][0] == token.COLON:
1111 upper = self.com_node(node[2])
1112 else:
1113 lower = self.com_node(node[1])
1114 elif len(node) == 4:
1115 lower = self.com_node(node[1])
1116 upper = self.com_node(node[3])
1117 return Node('slice', primary, assigning, lower, upper)
1118
1119 def get_docstring(self, node, n=None):
1120 if n is None:
1121 n = node[0]
1122 node = node[1:]
1123 if n == symbol.suite:
1124 if len(node) == 1:
1125 return self.get_docstring(node[0])
1126 for sub in node:
1127 if sub[0] == symbol.stmt:
1128 return self.get_docstring(sub)
1129 return None
1130 if n == symbol.file_input:
1131 for sub in node:
1132 if sub[0] == symbol.stmt:
1133 return self.get_docstring(sub)
1134 return None
1135 if n == symbol.atom:
1136 if node[0][0] == token.STRING:
1137 s = ''
1138 for t in node:
1139 s = s + eval(t[1])
1140 return s
1141 return None
1142 if n == symbol.stmt or n == symbol.simple_stmt or n == symbol.small_stmt:
1143 return self.get_docstring(node[0])
1144 if n in _doc_nodes and len(node) == 1:
1145 return self.get_docstring(node[0])
1146 return None
1147
1148
1149_doc_nodes = [
1150 symbol.expr_stmt,
1151 symbol.testlist,
1152 symbol.test,
1153 symbol.and_test,
1154 symbol.not_test,
1155 symbol.comparison,
1156 symbol.expr,
1157 symbol.xor_expr,
1158 symbol.and_expr,
1159 symbol.shift_expr,
1160 symbol.arith_expr,
1161 symbol.term,
1162 symbol.factor,
1163 symbol.power,
1164 ]
1165
1166# comp_op: '<' | '>' | '=' | '>=' | '<=' | '<>' | '!=' | '=='
1167# | 'in' | 'not' 'in' | 'is' | 'is' 'not'
1168_cmp_types = {
1169 token.LESS : '<',
1170 token.GREATER : '>',
1171 token.EQEQUAL : '==',
1172 token.EQUAL : '==',
1173 token.LESSEQUAL : '<=',
1174 token.GREATEREQUAL : '>=',
1175 token.NOTEQUAL : '!=',
1176 }
1177
1178_legal_node_types = [
1179 symbol.funcdef,
1180 symbol.classdef,
1181 symbol.stmt,
1182 symbol.small_stmt,
1183 symbol.flow_stmt,
1184 symbol.simple_stmt,
1185 symbol.compound_stmt,
1186 symbol.expr_stmt,
1187 symbol.print_stmt,
1188 symbol.del_stmt,
1189 symbol.pass_stmt,
1190 symbol.break_stmt,
1191 symbol.continue_stmt,
1192 symbol.return_stmt,
1193 symbol.raise_stmt,
1194 symbol.import_stmt,
1195 symbol.global_stmt,
1196 symbol.exec_stmt,
1197 symbol.assert_stmt,
1198 symbol.if_stmt,
1199 symbol.while_stmt,
1200 symbol.for_stmt,
1201 symbol.try_stmt,
1202 symbol.suite,
1203 symbol.testlist,
1204 symbol.test,
1205 symbol.and_test,
1206 symbol.not_test,
1207 symbol.comparison,
1208 symbol.exprlist,
1209 symbol.expr,
1210 symbol.xor_expr,
1211 symbol.and_expr,
1212 symbol.shift_expr,
1213 symbol.arith_expr,
1214 symbol.term,
1215 symbol.factor,
1216 symbol.power,
1217 symbol.atom,
1218 ]
1219
1220_assign_types = [
1221 symbol.test,
1222 symbol.and_test,
1223 symbol.not_test,
1224 symbol.comparison,
1225 symbol.expr,
1226 symbol.xor_expr,
1227 symbol.and_expr,
1228 symbol.shift_expr,
1229 symbol.arith_expr,
1230 symbol.term,
1231 symbol.factor,
1232 ]
1233
Jeremy Hyltonf968e852000-02-04 00:25:23 +00001234