blob: 6a2f42a8e76a9b5085db07f5bebbd55799f4130c [file] [log] [blame]
Eli Bendersky3921e8e2010-05-21 09:05:39 +03001import re
2import sys
3import unittest
4
5sys.path.insert(0, '..')
6from pycparser.c_lexer import CLexer
7
8
9def token_list(clex):
10 return list(iter(clex.token, None))
11
12
13def token_types(clex):
14 return [i.type for i in token_list(clex)]
15
16
17class TestCLexerNoErrors(unittest.TestCase):
18 """ Test lexing of strings that are not supposed to cause
19 errors. Therefore, the error_func passed to the lexer
20 raises an exception.
21 """
22 def error_func(self, msg, line, column):
23 self.fail(msg)
24
25 def type_lookup_func(self, typ):
26 if typ.startswith('mytype'):
27 return True
28 else:
29 return False
30
31 def setUp(self):
32 self.clex = CLexer(self.error_func, self.type_lookup_func)
33 self.clex.build(optimize=False)
34
35 def assertTokensTypes(self, str, types):
36 self.clex.input(str)
37 self.assertEqual(token_types(self.clex), types)
38
39 def test_trivial_tokens(self):
40 self.assertTokensTypes('1', ['INT_CONST_DEC'])
41 self.assertTokensTypes('-', ['MINUS'])
42 self.assertTokensTypes('volatile', ['VOLATILE'])
43 self.assertTokensTypes('...', ['ELLIPSIS'])
44 self.assertTokensTypes('++', ['PLUSPLUS'])
45 self.assertTokensTypes('case int', ['CASE', 'INT'])
46 self.assertTokensTypes('caseint', ['ID'])
47 self.assertTokensTypes('i ^= 1;', ['ID', 'XOREQUAL', 'INT_CONST_DEC', 'SEMI'])
48
49 def test_id_typeid(self):
50 self.assertTokensTypes('myt', ['ID'])
51 self.assertTokensTypes('mytype', ['TYPEID'])
52 self.assertTokensTypes('mytype6 var', ['TYPEID', 'ID'])
53
54 def test_integer_constants(self):
55 self.assertTokensTypes('12', ['INT_CONST_DEC'])
56 self.assertTokensTypes('12u', ['INT_CONST_DEC'])
57 self.assertTokensTypes('199872Ul', ['INT_CONST_DEC'])
58
59 self.assertTokensTypes('077', ['INT_CONST_OCT'])
60 self.assertTokensTypes('0123456L', ['INT_CONST_OCT'])
61
62 self.assertTokensTypes('0xf7', ['INT_CONST_HEX'])
63 self.assertTokensTypes('0x01202AAbbf7Ul', ['INT_CONST_HEX'])
64
65 # no 0 before x, so ID catches it
66 self.assertTokensTypes('xf7', ['ID'])
67
68 # - is MINUS, the rest a constnant
69 self.assertTokensTypes('-1', ['MINUS', 'INT_CONST_DEC'])
70
71 def test_floating_constants(self):
72 self.assertTokensTypes('1.5f', ['FLOAT_CONST'])
73 self.assertTokensTypes('01.5', ['FLOAT_CONST'])
74 self.assertTokensTypes('.15L', ['FLOAT_CONST'])
75 self.assertTokensTypes('0.', ['FLOAT_CONST'])
76
77 # but just a period is a period
78 self.assertTokensTypes('.', ['PERIOD'])
79
80 self.assertTokensTypes('3.3e-3', ['FLOAT_CONST'])
81 self.assertTokensTypes('.7e25L', ['FLOAT_CONST'])
82 self.assertTokensTypes('6.e+125f', ['FLOAT_CONST'])
83 self.assertTokensTypes('666e666', ['FLOAT_CONST'])
84 self.assertTokensTypes('00666e+3', ['FLOAT_CONST'])
85
86 # but this is a hex integer + 3
87 self.assertTokensTypes('0x0666e+3', ['INT_CONST_HEX', 'PLUS', 'INT_CONST_DEC'])
88
89 def test_char_constants(self):
90 self.assertTokensTypes(r"""'x'""", ['CHAR_CONST'])
91 self.assertTokensTypes(r"""L'x'""", ['WCHAR_CONST'])
92 self.assertTokensTypes(r"""'\t'""", ['CHAR_CONST'])
93 self.assertTokensTypes(r"""'\''""", ['CHAR_CONST'])
94 self.assertTokensTypes(r"""'\?'""", ['CHAR_CONST'])
95 self.assertTokensTypes(r"""'\012'""", ['CHAR_CONST'])
96 self.assertTokensTypes(r"""'\x2f'""", ['CHAR_CONST'])
97 self.assertTokensTypes(r"""'\x2f12'""", ['CHAR_CONST'])
98 self.assertTokensTypes(r"""L'\xaf'""", ['WCHAR_CONST'])
99
100 def test_string_literal(self):
101 self.assertTokensTypes('"a string"', ['STRING_LITERAL'])
102 self.assertTokensTypes('L"ing"', ['WSTRING_LITERAL'])
103 self.assertTokensTypes(
104 '"i am a string too \t"',
105 ['STRING_LITERAL'])
106 self.assertTokensTypes(
107 r'''"esc\ape \"\'\? \0234 chars \rule"''',
108 ['STRING_LITERAL'])
109 self.assertTokensTypes(
110 r'''"hello 'joe' wanna give it a \"go\"?"''',
111 ['STRING_LITERAL'])
112
113 def test_mess(self):
114 self.assertTokensTypes(
115 r'[{}]()',
116 ['LBRACKET',
117 'LBRACE', 'RBRACE',
118 'RBRACKET',
119 'LPAREN', 'RPAREN'])
120
121 self.assertTokensTypes(
122 r'()||!C&~Z?J',
123 ['LPAREN', 'RPAREN',
124 'LOR',
125 'LNOT', 'ID',
126 'AND',
127 'NOT', 'ID',
128 'CONDOP', 'ID'])
129
130 self.assertTokensTypes(
131 r'+-*/%|||&&&^><>=<===!=',
132 ['PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
133 'LOR', 'OR',
134 'LAND', 'AND',
135 'XOR',
136 'GT', 'LT', 'GE', 'LE', 'EQ', 'NE'])
137
138 self.assertTokensTypes(
139 r'++--->?.,;:',
140 ['PLUSPLUS', 'MINUSMINUS',
141 'ARROW', 'CONDOP',
142 'PERIOD', 'COMMA', 'SEMI', 'COLON'])
143
144 def test_exprs(self):
145 self.assertTokensTypes(
146 'bb-cc',
147 ['ID', 'MINUS', 'ID'])
148
149 self.assertTokensTypes(
150 'foo & 0xFF',
151 ['ID', 'AND', 'INT_CONST_HEX'])
152
153 self.assertTokensTypes(
154 '(2+k) * 62',
155 ['LPAREN', 'INT_CONST_DEC', 'PLUS', 'ID',
156 'RPAREN', 'TIMES', 'INT_CONST_DEC'],)
157
158 self.assertTokensTypes(
159 'x | y >> z',
160 ['ID', 'OR', 'ID', 'RSHIFT', 'ID'])
161
162 self.assertTokensTypes(
163 'x <<= z << 5',
164 ['ID', 'LSHIFTEQUAL', 'ID', 'LSHIFT', 'INT_CONST_DEC'])
165
166 self.assertTokensTypes(
167 'x = y > 0 ? y : -6',
168 ['ID', 'EQUALS',
169 'ID', 'GT', 'INT_CONST_OCT',
170 'CONDOP',
171 'ID',
172 'COLON',
173 'MINUS', 'INT_CONST_DEC'])
174
175 self.assertTokensTypes(
176 'a+++b',
177 ['ID', 'PLUSPLUS', 'PLUS', 'ID'])
178
179 def test_statements(self):
180 self.assertTokensTypes(
181 'for (int i = 0; i < n; ++i)',
182 ['FOR', 'LPAREN',
183 'INT', 'ID', 'EQUALS', 'INT_CONST_OCT', 'SEMI',
184 'ID', 'LT', 'ID', 'SEMI',
185 'PLUSPLUS', 'ID',
186 'RPAREN'])
187
188 self.assertTokensTypes(
189 'self: goto self;',
190 ['ID', 'COLON', 'GOTO', 'ID', 'SEMI'])
191
192 self.assertTokensTypes(
193 """ switch (typ)
194 {
195 case TYPE_ID:
196 m = 5;
197 break;
198 default:
199 m = 8;
200 }""",
201 ['SWITCH', 'LPAREN', 'ID', 'RPAREN',
202 'LBRACE',
203 'CASE', 'ID', 'COLON',
204 'ID', 'EQUALS', 'INT_CONST_DEC', 'SEMI',
205 'BREAK', 'SEMI',
206 'DEFAULT', 'COLON',
207 'ID', 'EQUALS', 'INT_CONST_DEC', 'SEMI',
208 'RBRACE'])
209
210 def test_preprocessor(self):
211 self.assertTokensTypes('#abracadabra', ['PPHASH', 'ID'])
212
213 str = r"""
214 546
215 #line 66 "kwas\df.h"
216 id 4
217 dsf
218 # 9
219 armo
220 """
221
222 #~ self.clex.filename
223 self.clex.input(str)
224 self.clex.reset_lineno()
225
226 t1 = self.clex.token()
227 self.assertEqual(t1.type, 'INT_CONST_DEC')
228 self.assertEqual(t1.lineno, 2)
229
230 t2 = self.clex.token()
231 self.assertEqual(t2.type, 'ID')
232 self.assertEqual(t2.value, 'id')
233 self.assertEqual(t2.lineno, 66)
234 self.assertEqual(self.clex.filename, r'kwas\df.h')
235
236 for i in range(3):
237 t = self.clex.token()
238
239 self.assertEqual(t.type, 'ID')
240 self.assertEqual(t.value, 'armo')
241 self.assertEqual(t.lineno, 9)
242 self.assertEqual(self.clex.filename, r'kwas\df.h')
243
244
245
246# Keeps all the errors the lexer spits in one place, to allow
247# easier modification if the error syntax changes.
248#
249ERR_ILLEGAL_CHAR = 'Illegal character'
250ERR_OCTAL = 'Invalid octal constant'
251ERR_UNMATCHED_QUOTE = 'Unmatched \''
252ERR_INVALID_CCONST = 'Invalid char constant'
253ERR_STRING_ESCAPE = 'String contains invalid escape'
254
255ERR_FILENAME_BEFORE_LINE = 'filename before line'
256ERR_LINENUM_MISSING = 'line number missing'
257ERR_INVALID_LINE_DIRECTIVE = 'invalid #line directive'
258
259
260class TestCLexerErrors(unittest.TestCase):
261 """ Test lexing of erroneous strings.
262 Works by passing an error functions that saves the error
263 in an attribute for later perusal.
264 """
265 def error_func(self, msg, line, column):
266 self.error = msg
267
268 def type_lookup_func(self, typ):
269 return False
270
271 def setUp(self):
272 self.clex = CLexer(self.error_func, self.type_lookup_func)
273 self.clex.build(optimize=False)
274 self.error = ""
275
276 def assertLexerError(self, str, error_like):
277 # feed the string to the lexer
278 self.clex.input(str)
279
280 # Pulls all tokens from the string. Errors will
281 # be written into self.error by the error_func
282 # callback
283 #
284 token_types(self.clex)
285
286 # compare the error to the expected
287 self.failUnless(re.search(error_like, self.error),
288 "\nExpected error matching: %s\nGot: %s" %
289 (error_like, self.error))
290
291 # clear last error, for the sake of subsequent invocations
292 self.error = ""
293
294 def test_trivial_tokens(self):
295 self.assertLexerError('@', ERR_ILLEGAL_CHAR)
296 self.assertLexerError('$', ERR_ILLEGAL_CHAR)
297 self.assertLexerError('`', ERR_ILLEGAL_CHAR)
298 self.assertLexerError('\\', ERR_ILLEGAL_CHAR)
299
300 def test_integer_constants(self):
301 self.assertLexerError('029', ERR_OCTAL)
302 self.assertLexerError('012345678', ERR_OCTAL)
303
304 def test_char_constants(self):
305 self.assertLexerError("'", ERR_UNMATCHED_QUOTE)
306 self.assertLexerError("'b\n", ERR_UNMATCHED_QUOTE)
307
308 self.assertLexerError("'jx'", ERR_INVALID_CCONST)
309 self.assertLexerError("'\*'", ERR_INVALID_CCONST)
310 self.assertLexerError("'\9'", ERR_INVALID_CCONST)
311 self.assertLexerError("L'\9'", ERR_INVALID_CCONST)
312
313 def test_string_literals(self):
314 self.assertLexerError('"jx\9"', ERR_STRING_ESCAPE)
315 self.assertLexerError('"hekllo\* on ix"', ERR_STRING_ESCAPE)
316 self.assertLexerError('L"hekllo\* on ix"', ERR_STRING_ESCAPE)
317
318 def test_preprocessor(self):
319 self.assertLexerError('#line "ka"', ERR_FILENAME_BEFORE_LINE)
320 self.assertLexerError('#line df', ERR_INVALID_LINE_DIRECTIVE)
321 self.assertLexerError('#line \n', ERR_LINENUM_MISSING)
322
323
324if __name__ == '__main__':
325 unittest.main()
326
327