blob: 362156d4c1903f3018394918b0a76d085c4d8f72 [file] [log] [blame]
Guido van Rossum1ce7c6f1997-01-15 19:19:19 +00001# Module 'parser'
2#
3# Parse S-expressions output by the Panel Editor
4# (which is written in Scheme so it can't help writing S-expressions).
5#
6# See notes at end of file.
Brett Cannon11ae6e72008-05-15 03:49:00 +00007from warnings import warnpy3k
8warnpy3k("the panelparser module has been removed in Python 3.0", stacklevel=2)
9del warnpy3k
Guido van Rossum1ce7c6f1997-01-15 19:19:19 +000010
11
12whitespace = ' \t\n'
13operators = '()\''
14separators = operators + whitespace + ';' + '"'
15
16
17# Tokenize a string.
18# Return a list of tokens (strings).
19#
20def tokenize_string(s):
Tim Peters182b5ac2004-07-18 06:16:08 +000021 tokens = []
22 while s:
23 c = s[:1]
24 if c in whitespace:
25 s = s[1:]
26 elif c == ';':
27 s = ''
28 elif c == '"':
29 n = len(s)
30 i = 1
31 while i < n:
32 c = s[i]
33 i = i+1
34 if c == '"': break
35 if c == '\\': i = i+1
36 tokens.append(s[:i])
37 s = s[i:]
38 elif c in operators:
39 tokens.append(c)
40 s = s[1:]
41 else:
42 n = len(s)
43 i = 1
44 while i < n:
45 if s[i] in separators: break
46 i = i+1
47 tokens.append(s[:i])
48 s = s[i:]
49 return tokens
Guido van Rossum1ce7c6f1997-01-15 19:19:19 +000050
51
52# Tokenize a whole file (given as file object, not as file name).
53# Return a list of tokens (strings).
54#
55def tokenize_file(fp):
Tim Peters182b5ac2004-07-18 06:16:08 +000056 tokens = []
57 while 1:
58 line = fp.readline()
59 if not line: break
60 tokens = tokens + tokenize_string(line)
61 return tokens
Guido van Rossum1ce7c6f1997-01-15 19:19:19 +000062
63
64# Exception raised by parse_exr.
65#
66syntax_error = 'syntax error'
67
68
69# Parse an S-expression.
70# Input is a list of tokens as returned by tokenize_*().
71# Return a pair (expr, tokens)
72# where expr is a list representing the s-expression,
73# and tokens contains the remaining tokens.
74# May raise syntax_error.
75#
76def parse_expr(tokens):
Tim Peters182b5ac2004-07-18 06:16:08 +000077 if (not tokens) or tokens[0] != '(':
78 raise syntax_error, 'expected "("'
79 tokens = tokens[1:]
80 expr = []
81 while 1:
82 if not tokens:
83 raise syntax_error, 'missing ")"'
84 if tokens[0] == ')':
85 return expr, tokens[1:]
86 elif tokens[0] == '(':
87 subexpr, tokens = parse_expr(tokens)
88 expr.append(subexpr)
89 else:
90 expr.append(tokens[0])
91 tokens = tokens[1:]
Guido van Rossum1ce7c6f1997-01-15 19:19:19 +000092
93
94# Parse a file (given as file object, not as file name).
95# Return a list of parsed S-expressions found at the top level.
96#
97def parse_file(fp):
Tim Peters182b5ac2004-07-18 06:16:08 +000098 tokens = tokenize_file(fp)
99 exprlist = []
100 while tokens:
101 expr, tokens = parse_expr(tokens)
102 exprlist.append(expr)
103 return exprlist
Guido van Rossum1ce7c6f1997-01-15 19:19:19 +0000104
105
106# EXAMPLE:
107#
108# The input
Tim Peters182b5ac2004-07-18 06:16:08 +0000109# '(hip (hop hur-ray))'
Guido van Rossum1ce7c6f1997-01-15 19:19:19 +0000110#
111# passed to tokenize_string() returns the token list
Tim Peters182b5ac2004-07-18 06:16:08 +0000112# ['(', 'hip', '(', 'hop', 'hur-ray', ')', ')']
Guido van Rossum1ce7c6f1997-01-15 19:19:19 +0000113#
114# When this is passed to parse_expr() it returns the expression
Tim Peters182b5ac2004-07-18 06:16:08 +0000115# ['hip', ['hop', 'hur-ray']]
Guido van Rossum1ce7c6f1997-01-15 19:19:19 +0000116# plus an empty token list (because there are no tokens left.
117#
118# When a file containing the example is passed to parse_file() it returns
119# a list whose only element is the output of parse_expr() above:
Tim Peters182b5ac2004-07-18 06:16:08 +0000120# [['hip', ['hop', 'hur-ray']]]
Guido van Rossum1ce7c6f1997-01-15 19:19:19 +0000121
122
123# TOKENIZING:
124#
125# Comments start with semicolon (;) and continue till the end of the line.
126#
127# Tokens are separated by whitespace, except the following characters
128# always form a separate token (outside strings):
Tim Peters182b5ac2004-07-18 06:16:08 +0000129# ( ) '
Guido van Rossum1ce7c6f1997-01-15 19:19:19 +0000130# Strings are enclosed in double quotes (") and backslash (\) is used
131# as escape character in strings.