blob: 20f514c858c9f7098e21413834cea289498494ba [file] [log] [blame]
Eli Bendersky3b1b08d2012-06-15 12:37:54 +03001import re
2import sys
3import unittest
Eli Bendersky3921e8e2010-05-21 09:05:39 +03004
Eli Bendersky3b1b08d2012-06-15 12:37:54 +03005sys.path.insert(0, '..')
6from pycparser.c_lexer import CLexer
7
8
9def token_list(clex):
10 return list(iter(clex.token, None))
11
12
13def token_types(clex):
14 return [i.type for i in token_list(clex)]
15
16
17class TestCLexerNoErrors(unittest.TestCase):
18 """ Test lexing of strings that are not supposed to cause
19 errors. Therefore, the error_func passed to the lexer
20 raises an exception.
21 """
22 def error_func(self, msg, line, column):
23 self.fail(msg)
Eli Bendersky86f2eee2013-01-18 06:04:01 -080024
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030025 def type_lookup_func(self, typ):
26 if typ.startswith('mytype'):
27 return True
28 else:
29 return False
Eli Bendersky86f2eee2013-01-18 06:04:01 -080030
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030031 def setUp(self):
32 self.clex = CLexer(self.error_func, self.type_lookup_func)
33 self.clex.build(optimize=False)
Eli Bendersky86f2eee2013-01-18 06:04:01 -080034
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030035 def assertTokensTypes(self, str, types):
36 self.clex.input(str)
37 self.assertEqual(token_types(self.clex), types)
Eli Bendersky86f2eee2013-01-18 06:04:01 -080038
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030039 def test_trivial_tokens(self):
40 self.assertTokensTypes('1', ['INT_CONST_DEC'])
41 self.assertTokensTypes('-', ['MINUS'])
42 self.assertTokensTypes('volatile', ['VOLATILE'])
43 self.assertTokensTypes('...', ['ELLIPSIS'])
44 self.assertTokensTypes('++', ['PLUSPLUS'])
45 self.assertTokensTypes('case int', ['CASE', 'INT'])
46 self.assertTokensTypes('caseint', ['ID'])
47 self.assertTokensTypes('i ^= 1;', ['ID', 'XOREQUAL', 'INT_CONST_DEC', 'SEMI'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -080048
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030049 def test_id_typeid(self):
50 self.assertTokensTypes('myt', ['ID'])
51 self.assertTokensTypes('mytype', ['TYPEID'])
52 self.assertTokensTypes('mytype6 var', ['TYPEID', 'ID'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -080053
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030054 def test_integer_constants(self):
55 self.assertTokensTypes('12', ['INT_CONST_DEC'])
56 self.assertTokensTypes('12u', ['INT_CONST_DEC'])
Sye van der Veen08a54892013-06-10 12:59:03 -040057 self.assertTokensTypes('12l', ['INT_CONST_DEC'])
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030058 self.assertTokensTypes('199872Ul', ['INT_CONST_DEC'])
Sye van der Veen08a54892013-06-10 12:59:03 -040059 self.assertTokensTypes('199872lU', ['INT_CONST_DEC'])
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030060 self.assertTokensTypes('199872LL', ['INT_CONST_DEC'])
61 self.assertTokensTypes('199872ull', ['INT_CONST_DEC'])
Sye van der Veen08a54892013-06-10 12:59:03 -040062 self.assertTokensTypes('199872llu', ['INT_CONST_DEC'])
63 self.assertTokensTypes('1009843200000uLL', ['INT_CONST_DEC'])
64 self.assertTokensTypes('1009843200000LLu', ['INT_CONST_DEC'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -080065
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030066 self.assertTokensTypes('077', ['INT_CONST_OCT'])
67 self.assertTokensTypes('0123456L', ['INT_CONST_OCT'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -080068
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030069 self.assertTokensTypes('0xf7', ['INT_CONST_HEX'])
70 self.assertTokensTypes('0x01202AAbbf7Ul', ['INT_CONST_HEX'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -080071
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030072 # no 0 before x, so ID catches it
73 self.assertTokensTypes('xf7', ['ID'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -080074
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030075 # - is MINUS, the rest a constnant
76 self.assertTokensTypes('-1', ['MINUS', 'INT_CONST_DEC'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -080077
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030078 def test_floating_constants(self):
79 self.assertTokensTypes('1.5f', ['FLOAT_CONST'])
80 self.assertTokensTypes('01.5', ['FLOAT_CONST'])
81 self.assertTokensTypes('.15L', ['FLOAT_CONST'])
82 self.assertTokensTypes('0.', ['FLOAT_CONST'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -080083
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030084 # but just a period is a period
85 self.assertTokensTypes('.', ['PERIOD'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -080086
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030087 self.assertTokensTypes('3.3e-3', ['FLOAT_CONST'])
88 self.assertTokensTypes('.7e25L', ['FLOAT_CONST'])
89 self.assertTokensTypes('6.e+125f', ['FLOAT_CONST'])
90 self.assertTokensTypes('666e666', ['FLOAT_CONST'])
91 self.assertTokensTypes('00666e+3', ['FLOAT_CONST'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -080092
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030093 # but this is a hex integer + 3
94 self.assertTokensTypes('0x0666e+3', ['INT_CONST_HEX', 'PLUS', 'INT_CONST_DEC'])
95
96 def test_hexadecimal_floating_constants(self):
97 self.assertTokensTypes('0xDE.488641p0', ['HEX_FLOAT_CONST'])
98 self.assertTokensTypes('0x.488641p0', ['HEX_FLOAT_CONST'])
99 self.assertTokensTypes('0X12.P0', ['HEX_FLOAT_CONST'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800100
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300101 def test_char_constants(self):
102 self.assertTokensTypes(r"""'x'""", ['CHAR_CONST'])
103 self.assertTokensTypes(r"""L'x'""", ['WCHAR_CONST'])
104 self.assertTokensTypes(r"""'\t'""", ['CHAR_CONST'])
105 self.assertTokensTypes(r"""'\''""", ['CHAR_CONST'])
106 self.assertTokensTypes(r"""'\?'""", ['CHAR_CONST'])
107 self.assertTokensTypes(r"""'\012'""", ['CHAR_CONST'])
108 self.assertTokensTypes(r"""'\x2f'""", ['CHAR_CONST'])
109 self.assertTokensTypes(r"""'\x2f12'""", ['CHAR_CONST'])
110 self.assertTokensTypes(r"""L'\xaf'""", ['WCHAR_CONST'])
111
112 def test_string_literal(self):
113 self.assertTokensTypes('"a string"', ['STRING_LITERAL'])
114 self.assertTokensTypes('L"ing"', ['WSTRING_LITERAL'])
115 self.assertTokensTypes(
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800116 '"i am a string too \t"',
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300117 ['STRING_LITERAL'])
118 self.assertTokensTypes(
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800119 r'''"esc\ape \"\'\? \0234 chars \rule"''',
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300120 ['STRING_LITERAL'])
121 self.assertTokensTypes(
122 r'''"hello 'joe' wanna give it a \"go\"?"''',
123 ['STRING_LITERAL'])
124
125 def test_mess(self):
126 self.assertTokensTypes(
127 r'[{}]()',
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800128 ['LBRACKET',
129 'LBRACE', 'RBRACE',
130 'RBRACKET',
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300131 'LPAREN', 'RPAREN'])
132
133 self.assertTokensTypes(
134 r'()||!C&~Z?J',
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800135 ['LPAREN', 'RPAREN',
136 'LOR',
137 'LNOT', 'ID',
138 'AND',
139 'NOT', 'ID',
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300140 'CONDOP', 'ID'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800141
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300142 self.assertTokensTypes(
143 r'+-*/%|||&&&^><>=<===!=',
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800144 ['PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
145 'LOR', 'OR',
146 'LAND', 'AND',
147 'XOR',
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300148 'GT', 'LT', 'GE', 'LE', 'EQ', 'NE'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800149
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300150 self.assertTokensTypes(
151 r'++--->?.,;:',
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800152 ['PLUSPLUS', 'MINUSMINUS',
153 'ARROW', 'CONDOP',
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300154 'PERIOD', 'COMMA', 'SEMI', 'COLON'])
155
156 def test_exprs(self):
157 self.assertTokensTypes(
158 'bb-cc',
159 ['ID', 'MINUS', 'ID'])
160
161 self.assertTokensTypes(
162 'foo & 0xFF',
163 ['ID', 'AND', 'INT_CONST_HEX'])
164
165 self.assertTokensTypes(
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800166 '(2+k) * 62',
167 ['LPAREN', 'INT_CONST_DEC', 'PLUS', 'ID',
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300168 'RPAREN', 'TIMES', 'INT_CONST_DEC'],)
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800169
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300170 self.assertTokensTypes(
171 'x | y >> z',
172 ['ID', 'OR', 'ID', 'RSHIFT', 'ID'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800173
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300174 self.assertTokensTypes(
175 'x <<= z << 5',
176 ['ID', 'LSHIFTEQUAL', 'ID', 'LSHIFT', 'INT_CONST_DEC'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800177
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300178 self.assertTokensTypes(
179 'x = y > 0 ? y : -6',
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800180 ['ID', 'EQUALS',
181 'ID', 'GT', 'INT_CONST_OCT',
182 'CONDOP',
183 'ID',
184 'COLON',
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300185 'MINUS', 'INT_CONST_DEC'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800186
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300187 self.assertTokensTypes(
188 'a+++b',
189 ['ID', 'PLUSPLUS', 'PLUS', 'ID'])
190
191 def test_statements(self):
192 self.assertTokensTypes(
193 'for (int i = 0; i < n; ++i)',
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800194 ['FOR', 'LPAREN',
195 'INT', 'ID', 'EQUALS', 'INT_CONST_OCT', 'SEMI',
196 'ID', 'LT', 'ID', 'SEMI',
197 'PLUSPLUS', 'ID',
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300198 'RPAREN'])
199
200 self.assertTokensTypes(
201 'self: goto self;',
202 ['ID', 'COLON', 'GOTO', 'ID', 'SEMI'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800203
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300204 self.assertTokensTypes(
205 """ switch (typ)
206 {
207 case TYPE_ID:
208 m = 5;
209 break;
210 default:
211 m = 8;
212 }""",
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800213 ['SWITCH', 'LPAREN', 'ID', 'RPAREN',
214 'LBRACE',
215 'CASE', 'ID', 'COLON',
216 'ID', 'EQUALS', 'INT_CONST_DEC', 'SEMI',
217 'BREAK', 'SEMI',
218 'DEFAULT', 'COLON',
219 'ID', 'EQUALS', 'INT_CONST_DEC', 'SEMI',
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300220 'RBRACE'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800221
Eli Bendersky0373cbe2012-08-10 07:48:17 +0300222 def test_preprocessor_line(self):
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300223 self.assertTokensTypes('#abracadabra', ['PPHASH', 'ID'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800224
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300225 str = r"""
226 546
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800227 #line 66 "kwas\df.h"
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300228 id 4
229 dsf
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800230 # 9
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300231 armo
232 #line 10 "..\~..\test.h"
233 tok1
234 #line 99999 "include/me.h"
235 tok2
236 """
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800237
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300238 #~ self.clex.filename
239 self.clex.input(str)
240 self.clex.reset_lineno()
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800241
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300242 t1 = self.clex.token()
243 self.assertEqual(t1.type, 'INT_CONST_DEC')
244 self.assertEqual(t1.lineno, 2)
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800245
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300246 t2 = self.clex.token()
247 self.assertEqual(t2.type, 'ID')
248 self.assertEqual(t2.value, 'id')
249 self.assertEqual(t2.lineno, 66)
250 self.assertEqual(self.clex.filename, r'kwas\df.h')
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800251
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300252 for i in range(3):
253 t = self.clex.token()
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800254
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300255 self.assertEqual(t.type, 'ID')
256 self.assertEqual(t.value, 'armo')
257 self.assertEqual(t.lineno, 9)
258 self.assertEqual(self.clex.filename, r'kwas\df.h')
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800259
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300260 t4 = self.clex.token()
261 self.assertEqual(t4.type, 'ID')
262 self.assertEqual(t4.value, 'tok1')
263 self.assertEqual(t4.lineno, 10)
264 self.assertEqual(self.clex.filename, r'..\~..\test.h')
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800265
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300266 t5 = self.clex.token()
267 self.assertEqual(t5.type, 'ID')
268 self.assertEqual(t5.value, 'tok2')
269 self.assertEqual(t5.lineno, 99999)
270 self.assertEqual(self.clex.filename, r'include/me.h')
Eli Bendersky09fc2002012-08-10 07:41:42 +0300271
Eli Bendersky0373cbe2012-08-10 07:48:17 +0300272 def test_preprocessor_line_funny(self):
273 str = r'''
274 #line 10 "..\6\joe.h"
275 10
276 '''
277 self.clex.input(str)
278 self.clex.reset_lineno()
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800279
Eli Bendersky0373cbe2012-08-10 07:48:17 +0300280 t1 = self.clex.token()
281 self.assertEqual(t1.type, 'INT_CONST_DEC')
282 self.assertEqual(t1.lineno, 10)
283 self.assertEqual(self.clex.filename, r'..\6\joe.h')
284
285
Eli Bendersky09fc2002012-08-10 07:41:42 +0300286 def test_preprocessor_pragma(self):
287 str = r'''
288 42
289 #pragma helo me
290 #pragma once
291 # pragma omp parallel private(th_id)
292 #pragma {pack: 2, smack: 3}
293 #pragma <includeme.h> "nowit.h"
294 #pragma "string"
295 #pragma id 124124 and numbers 0235495
296 59
297 '''
298
299 # Check that pragmas are ignored but the line number advances
300 self.clex.input(str)
301 self.clex.reset_lineno()
302
303 t1 = self.clex.token()
304 self.assertEqual(t1.type, 'INT_CONST_DEC')
305 t2 = self.clex.token()
306 self.assertEqual(t2.type, 'INT_CONST_DEC')
307 self.assertEqual(t2.lineno, 10)
308
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300309
310
311# Keeps all the errors the lexer spits in one place, to allow
312# easier modification if the error syntax changes.
313#
314ERR_ILLEGAL_CHAR = 'Illegal character'
315ERR_OCTAL = 'Invalid octal constant'
316ERR_UNMATCHED_QUOTE = 'Unmatched \''
317ERR_INVALID_CCONST = 'Invalid char constant'
318ERR_STRING_ESCAPE = 'String contains invalid escape'
319
320ERR_FILENAME_BEFORE_LINE = 'filename before line'
321ERR_LINENUM_MISSING = 'line number missing'
322ERR_INVALID_LINE_DIRECTIVE = 'invalid #line directive'
323
324
325class TestCLexerErrors(unittest.TestCase):
326 """ Test lexing of erroneous strings.
327 Works by passing an error functions that saves the error
328 in an attribute for later perusal.
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800329 """
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300330 def error_func(self, msg, line, column):
331 self.error = msg
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800332
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300333 def type_lookup_func(self, typ):
334 return False
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800335
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300336 def setUp(self):
337 self.clex = CLexer(self.error_func, self.type_lookup_func)
338 self.clex.build(optimize=False)
339 self.error = ""
340
341 def assertLexerError(self, str, error_like):
342 # feed the string to the lexer
343 self.clex.input(str)
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800344
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300345 # Pulls all tokens from the string. Errors will
346 # be written into self.error by the error_func
347 # callback
348 #
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800349 token_types(self.clex)
350
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300351 # compare the error to the expected
Eli Bendersky09fc2002012-08-10 07:41:42 +0300352 self.assertTrue(re.search(error_like, self.error),
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800353 "\nExpected error matching: %s\nGot: %s" %
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300354 (error_like, self.error))
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800355
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300356 # clear last error, for the sake of subsequent invocations
357 self.error = ""
358
359 def test_trivial_tokens(self):
360 self.assertLexerError('@', ERR_ILLEGAL_CHAR)
361 self.assertLexerError('$', ERR_ILLEGAL_CHAR)
362 self.assertLexerError('`', ERR_ILLEGAL_CHAR)
363 self.assertLexerError('\\', ERR_ILLEGAL_CHAR)
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800364
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300365 def test_integer_constants(self):
366 self.assertLexerError('029', ERR_OCTAL)
367 self.assertLexerError('012345678', ERR_OCTAL)
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800368
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300369 def test_char_constants(self):
370 self.assertLexerError("'", ERR_UNMATCHED_QUOTE)
371 self.assertLexerError("'b\n", ERR_UNMATCHED_QUOTE)
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800372
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300373 self.assertLexerError("'jx'", ERR_INVALID_CCONST)
374 self.assertLexerError("'\*'", ERR_INVALID_CCONST)
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800375
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300376 def test_string_literals(self):
377 self.assertLexerError('"jx\9"', ERR_STRING_ESCAPE)
378 self.assertLexerError('"hekllo\* on ix"', ERR_STRING_ESCAPE)
379 self.assertLexerError('L"hekllo\* on ix"', ERR_STRING_ESCAPE)
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800380
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300381 def test_preprocessor(self):
382 self.assertLexerError('#line "ka"', ERR_FILENAME_BEFORE_LINE)
383 self.assertLexerError('#line df', ERR_INVALID_LINE_DIRECTIVE)
384 self.assertLexerError('#line \n', ERR_LINENUM_MISSING)
385
386
387if __name__ == '__main__':
388 unittest.main()
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800389
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300390