Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 1 | # Module 'parser' |
| 2 | # |
| 3 | # Parse S-expressions output by the Panel Editor |
| 4 | # (which is written in Scheme so it can't help writing S-expressions). |
| 5 | # |
| 6 | # See notes at end of file. |
Brett Cannon | 11ae6e7 | 2008-05-15 03:49:00 +0000 | [diff] [blame] | 7 | from warnings import warnpy3k |
| 8 | warnpy3k("the panelparser module has been removed in Python 3.0", stacklevel=2) |
| 9 | del warnpy3k |
Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 10 | |
| 11 | |
| 12 | whitespace = ' \t\n' |
| 13 | operators = '()\'' |
| 14 | separators = operators + whitespace + ';' + '"' |
| 15 | |
| 16 | |
| 17 | # Tokenize a string. |
| 18 | # Return a list of tokens (strings). |
| 19 | # |
| 20 | def tokenize_string(s): |
Tim Peters | 182b5ac | 2004-07-18 06:16:08 +0000 | [diff] [blame] | 21 | tokens = [] |
| 22 | while s: |
| 23 | c = s[:1] |
| 24 | if c in whitespace: |
| 25 | s = s[1:] |
| 26 | elif c == ';': |
| 27 | s = '' |
| 28 | elif c == '"': |
| 29 | n = len(s) |
| 30 | i = 1 |
| 31 | while i < n: |
| 32 | c = s[i] |
| 33 | i = i+1 |
| 34 | if c == '"': break |
| 35 | if c == '\\': i = i+1 |
| 36 | tokens.append(s[:i]) |
| 37 | s = s[i:] |
| 38 | elif c in operators: |
| 39 | tokens.append(c) |
| 40 | s = s[1:] |
| 41 | else: |
| 42 | n = len(s) |
| 43 | i = 1 |
| 44 | while i < n: |
| 45 | if s[i] in separators: break |
| 46 | i = i+1 |
| 47 | tokens.append(s[:i]) |
| 48 | s = s[i:] |
| 49 | return tokens |
Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 50 | |
| 51 | |
| 52 | # Tokenize a whole file (given as file object, not as file name). |
| 53 | # Return a list of tokens (strings). |
| 54 | # |
| 55 | def tokenize_file(fp): |
Tim Peters | 182b5ac | 2004-07-18 06:16:08 +0000 | [diff] [blame] | 56 | tokens = [] |
| 57 | while 1: |
| 58 | line = fp.readline() |
| 59 | if not line: break |
| 60 | tokens = tokens + tokenize_string(line) |
| 61 | return tokens |
Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 62 | |
| 63 | |
| 64 | # Exception raised by parse_exr. |
| 65 | # |
| 66 | syntax_error = 'syntax error' |
| 67 | |
| 68 | |
| 69 | # Parse an S-expression. |
| 70 | # Input is a list of tokens as returned by tokenize_*(). |
| 71 | # Return a pair (expr, tokens) |
| 72 | # where expr is a list representing the s-expression, |
| 73 | # and tokens contains the remaining tokens. |
| 74 | # May raise syntax_error. |
| 75 | # |
| 76 | def parse_expr(tokens): |
Tim Peters | 182b5ac | 2004-07-18 06:16:08 +0000 | [diff] [blame] | 77 | if (not tokens) or tokens[0] != '(': |
| 78 | raise syntax_error, 'expected "("' |
| 79 | tokens = tokens[1:] |
| 80 | expr = [] |
| 81 | while 1: |
| 82 | if not tokens: |
| 83 | raise syntax_error, 'missing ")"' |
| 84 | if tokens[0] == ')': |
| 85 | return expr, tokens[1:] |
| 86 | elif tokens[0] == '(': |
| 87 | subexpr, tokens = parse_expr(tokens) |
| 88 | expr.append(subexpr) |
| 89 | else: |
| 90 | expr.append(tokens[0]) |
| 91 | tokens = tokens[1:] |
Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 92 | |
| 93 | |
| 94 | # Parse a file (given as file object, not as file name). |
| 95 | # Return a list of parsed S-expressions found at the top level. |
| 96 | # |
| 97 | def parse_file(fp): |
Tim Peters | 182b5ac | 2004-07-18 06:16:08 +0000 | [diff] [blame] | 98 | tokens = tokenize_file(fp) |
| 99 | exprlist = [] |
| 100 | while tokens: |
| 101 | expr, tokens = parse_expr(tokens) |
| 102 | exprlist.append(expr) |
| 103 | return exprlist |
Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 104 | |
| 105 | |
| 106 | # EXAMPLE: |
| 107 | # |
| 108 | # The input |
Tim Peters | 182b5ac | 2004-07-18 06:16:08 +0000 | [diff] [blame] | 109 | # '(hip (hop hur-ray))' |
Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 110 | # |
| 111 | # passed to tokenize_string() returns the token list |
Tim Peters | 182b5ac | 2004-07-18 06:16:08 +0000 | [diff] [blame] | 112 | # ['(', 'hip', '(', 'hop', 'hur-ray', ')', ')'] |
Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 113 | # |
| 114 | # When this is passed to parse_expr() it returns the expression |
Tim Peters | 182b5ac | 2004-07-18 06:16:08 +0000 | [diff] [blame] | 115 | # ['hip', ['hop', 'hur-ray']] |
Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 116 | # plus an empty token list (because there are no tokens left. |
| 117 | # |
| 118 | # When a file containing the example is passed to parse_file() it returns |
| 119 | # a list whose only element is the output of parse_expr() above: |
Tim Peters | 182b5ac | 2004-07-18 06:16:08 +0000 | [diff] [blame] | 120 | # [['hip', ['hop', 'hur-ray']]] |
Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 121 | |
| 122 | |
| 123 | # TOKENIZING: |
| 124 | # |
| 125 | # Comments start with semicolon (;) and continue till the end of the line. |
| 126 | # |
| 127 | # Tokens are separated by whitespace, except the following characters |
| 128 | # always form a separate token (outside strings): |
Tim Peters | 182b5ac | 2004-07-18 06:16:08 +0000 | [diff] [blame] | 129 | # ( ) ' |
Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 130 | # Strings are enclosed in double quotes (") and backslash (\) is used |
| 131 | # as escape character in strings. |