blob: 14242b8733e44a9b98d041957aeb08ea92172c32 [file] [log] [blame]
Eli Bendersky3b1b08d2012-06-15 12:37:54 +03001import re
2import sys
3import unittest
Eli Bendersky3921e8e2010-05-21 09:05:39 +03004
Eli Bendersky3b1b08d2012-06-15 12:37:54 +03005sys.path.insert(0, '..')
6from pycparser.c_lexer import CLexer
7
8
9def token_list(clex):
10 return list(iter(clex.token, None))
11
12
13def token_types(clex):
14 return [i.type for i in token_list(clex)]
15
16
17class TestCLexerNoErrors(unittest.TestCase):
18 """ Test lexing of strings that are not supposed to cause
19 errors. Therefore, the error_func passed to the lexer
20 raises an exception.
21 """
22 def error_func(self, msg, line, column):
23 self.fail(msg)
Eli Bendersky86f2eee2013-01-18 06:04:01 -080024
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030025 def type_lookup_func(self, typ):
26 if typ.startswith('mytype'):
27 return True
28 else:
29 return False
Eli Bendersky86f2eee2013-01-18 06:04:01 -080030
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030031 def setUp(self):
32 self.clex = CLexer(self.error_func, self.type_lookup_func)
33 self.clex.build(optimize=False)
Eli Bendersky86f2eee2013-01-18 06:04:01 -080034
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030035 def assertTokensTypes(self, str, types):
36 self.clex.input(str)
37 self.assertEqual(token_types(self.clex), types)
Eli Bendersky86f2eee2013-01-18 06:04:01 -080038
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030039 def test_trivial_tokens(self):
40 self.assertTokensTypes('1', ['INT_CONST_DEC'])
41 self.assertTokensTypes('-', ['MINUS'])
42 self.assertTokensTypes('volatile', ['VOLATILE'])
43 self.assertTokensTypes('...', ['ELLIPSIS'])
44 self.assertTokensTypes('++', ['PLUSPLUS'])
45 self.assertTokensTypes('case int', ['CASE', 'INT'])
46 self.assertTokensTypes('caseint', ['ID'])
Sye van der Veen3576ed12013-06-10 13:27:58 -040047 self.assertTokensTypes('$dollar cent$', ['ID', 'ID'])
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030048 self.assertTokensTypes('i ^= 1;', ['ID', 'XOREQUAL', 'INT_CONST_DEC', 'SEMI'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -080049
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030050 def test_id_typeid(self):
51 self.assertTokensTypes('myt', ['ID'])
52 self.assertTokensTypes('mytype', ['TYPEID'])
53 self.assertTokensTypes('mytype6 var', ['TYPEID', 'ID'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -080054
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030055 def test_integer_constants(self):
56 self.assertTokensTypes('12', ['INT_CONST_DEC'])
57 self.assertTokensTypes('12u', ['INT_CONST_DEC'])
58 self.assertTokensTypes('199872Ul', ['INT_CONST_DEC'])
59 self.assertTokensTypes('199872LL', ['INT_CONST_DEC'])
60 self.assertTokensTypes('199872ull', ['INT_CONST_DEC'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -080061
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030062 self.assertTokensTypes('077', ['INT_CONST_OCT'])
63 self.assertTokensTypes('0123456L', ['INT_CONST_OCT'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -080064
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030065 self.assertTokensTypes('0xf7', ['INT_CONST_HEX'])
66 self.assertTokensTypes('0x01202AAbbf7Ul', ['INT_CONST_HEX'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -080067
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030068 # no 0 before x, so ID catches it
69 self.assertTokensTypes('xf7', ['ID'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -080070
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030071 # - is MINUS, the rest a constnant
72 self.assertTokensTypes('-1', ['MINUS', 'INT_CONST_DEC'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -080073
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030074 def test_floating_constants(self):
75 self.assertTokensTypes('1.5f', ['FLOAT_CONST'])
76 self.assertTokensTypes('01.5', ['FLOAT_CONST'])
77 self.assertTokensTypes('.15L', ['FLOAT_CONST'])
78 self.assertTokensTypes('0.', ['FLOAT_CONST'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -080079
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030080 # but just a period is a period
81 self.assertTokensTypes('.', ['PERIOD'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -080082
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030083 self.assertTokensTypes('3.3e-3', ['FLOAT_CONST'])
84 self.assertTokensTypes('.7e25L', ['FLOAT_CONST'])
85 self.assertTokensTypes('6.e+125f', ['FLOAT_CONST'])
86 self.assertTokensTypes('666e666', ['FLOAT_CONST'])
87 self.assertTokensTypes('00666e+3', ['FLOAT_CONST'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -080088
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030089 # but this is a hex integer + 3
90 self.assertTokensTypes('0x0666e+3', ['INT_CONST_HEX', 'PLUS', 'INT_CONST_DEC'])
91
92 def test_hexadecimal_floating_constants(self):
93 self.assertTokensTypes('0xDE.488641p0', ['HEX_FLOAT_CONST'])
94 self.assertTokensTypes('0x.488641p0', ['HEX_FLOAT_CONST'])
95 self.assertTokensTypes('0X12.P0', ['HEX_FLOAT_CONST'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -080096
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030097 def test_char_constants(self):
98 self.assertTokensTypes(r"""'x'""", ['CHAR_CONST'])
99 self.assertTokensTypes(r"""L'x'""", ['WCHAR_CONST'])
100 self.assertTokensTypes(r"""'\t'""", ['CHAR_CONST'])
101 self.assertTokensTypes(r"""'\''""", ['CHAR_CONST'])
102 self.assertTokensTypes(r"""'\?'""", ['CHAR_CONST'])
103 self.assertTokensTypes(r"""'\012'""", ['CHAR_CONST'])
104 self.assertTokensTypes(r"""'\x2f'""", ['CHAR_CONST'])
105 self.assertTokensTypes(r"""'\x2f12'""", ['CHAR_CONST'])
106 self.assertTokensTypes(r"""L'\xaf'""", ['WCHAR_CONST'])
107
108 def test_string_literal(self):
109 self.assertTokensTypes('"a string"', ['STRING_LITERAL'])
110 self.assertTokensTypes('L"ing"', ['WSTRING_LITERAL'])
111 self.assertTokensTypes(
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800112 '"i am a string too \t"',
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300113 ['STRING_LITERAL'])
114 self.assertTokensTypes(
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800115 r'''"esc\ape \"\'\? \0234 chars \rule"''',
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300116 ['STRING_LITERAL'])
117 self.assertTokensTypes(
118 r'''"hello 'joe' wanna give it a \"go\"?"''',
119 ['STRING_LITERAL'])
120
121 def test_mess(self):
122 self.assertTokensTypes(
123 r'[{}]()',
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800124 ['LBRACKET',
125 'LBRACE', 'RBRACE',
126 'RBRACKET',
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300127 'LPAREN', 'RPAREN'])
128
129 self.assertTokensTypes(
130 r'()||!C&~Z?J',
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800131 ['LPAREN', 'RPAREN',
132 'LOR',
133 'LNOT', 'ID',
134 'AND',
135 'NOT', 'ID',
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300136 'CONDOP', 'ID'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800137
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300138 self.assertTokensTypes(
139 r'+-*/%|||&&&^><>=<===!=',
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800140 ['PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
141 'LOR', 'OR',
142 'LAND', 'AND',
143 'XOR',
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300144 'GT', 'LT', 'GE', 'LE', 'EQ', 'NE'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800145
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300146 self.assertTokensTypes(
147 r'++--->?.,;:',
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800148 ['PLUSPLUS', 'MINUSMINUS',
149 'ARROW', 'CONDOP',
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300150 'PERIOD', 'COMMA', 'SEMI', 'COLON'])
151
152 def test_exprs(self):
153 self.assertTokensTypes(
154 'bb-cc',
155 ['ID', 'MINUS', 'ID'])
156
157 self.assertTokensTypes(
158 'foo & 0xFF',
159 ['ID', 'AND', 'INT_CONST_HEX'])
160
161 self.assertTokensTypes(
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800162 '(2+k) * 62',
163 ['LPAREN', 'INT_CONST_DEC', 'PLUS', 'ID',
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300164 'RPAREN', 'TIMES', 'INT_CONST_DEC'],)
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800165
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300166 self.assertTokensTypes(
167 'x | y >> z',
168 ['ID', 'OR', 'ID', 'RSHIFT', 'ID'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800169
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300170 self.assertTokensTypes(
171 'x <<= z << 5',
172 ['ID', 'LSHIFTEQUAL', 'ID', 'LSHIFT', 'INT_CONST_DEC'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800173
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300174 self.assertTokensTypes(
175 'x = y > 0 ? y : -6',
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800176 ['ID', 'EQUALS',
177 'ID', 'GT', 'INT_CONST_OCT',
178 'CONDOP',
179 'ID',
180 'COLON',
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300181 'MINUS', 'INT_CONST_DEC'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800182
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300183 self.assertTokensTypes(
184 'a+++b',
185 ['ID', 'PLUSPLUS', 'PLUS', 'ID'])
186
187 def test_statements(self):
188 self.assertTokensTypes(
189 'for (int i = 0; i < n; ++i)',
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800190 ['FOR', 'LPAREN',
191 'INT', 'ID', 'EQUALS', 'INT_CONST_OCT', 'SEMI',
192 'ID', 'LT', 'ID', 'SEMI',
193 'PLUSPLUS', 'ID',
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300194 'RPAREN'])
195
196 self.assertTokensTypes(
197 'self: goto self;',
198 ['ID', 'COLON', 'GOTO', 'ID', 'SEMI'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800199
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300200 self.assertTokensTypes(
201 """ switch (typ)
202 {
203 case TYPE_ID:
204 m = 5;
205 break;
206 default:
207 m = 8;
208 }""",
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800209 ['SWITCH', 'LPAREN', 'ID', 'RPAREN',
210 'LBRACE',
211 'CASE', 'ID', 'COLON',
212 'ID', 'EQUALS', 'INT_CONST_DEC', 'SEMI',
213 'BREAK', 'SEMI',
214 'DEFAULT', 'COLON',
215 'ID', 'EQUALS', 'INT_CONST_DEC', 'SEMI',
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300216 'RBRACE'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800217
Eli Bendersky0373cbe2012-08-10 07:48:17 +0300218 def test_preprocessor_line(self):
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300219 self.assertTokensTypes('#abracadabra', ['PPHASH', 'ID'])
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800220
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300221 str = r"""
222 546
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800223 #line 66 "kwas\df.h"
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300224 id 4
225 dsf
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800226 # 9
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300227 armo
228 #line 10 "..\~..\test.h"
229 tok1
230 #line 99999 "include/me.h"
231 tok2
232 """
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800233
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300234 #~ self.clex.filename
235 self.clex.input(str)
236 self.clex.reset_lineno()
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800237
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300238 t1 = self.clex.token()
239 self.assertEqual(t1.type, 'INT_CONST_DEC')
240 self.assertEqual(t1.lineno, 2)
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800241
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300242 t2 = self.clex.token()
243 self.assertEqual(t2.type, 'ID')
244 self.assertEqual(t2.value, 'id')
245 self.assertEqual(t2.lineno, 66)
246 self.assertEqual(self.clex.filename, r'kwas\df.h')
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800247
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300248 for i in range(3):
249 t = self.clex.token()
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800250
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300251 self.assertEqual(t.type, 'ID')
252 self.assertEqual(t.value, 'armo')
253 self.assertEqual(t.lineno, 9)
254 self.assertEqual(self.clex.filename, r'kwas\df.h')
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800255
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300256 t4 = self.clex.token()
257 self.assertEqual(t4.type, 'ID')
258 self.assertEqual(t4.value, 'tok1')
259 self.assertEqual(t4.lineno, 10)
260 self.assertEqual(self.clex.filename, r'..\~..\test.h')
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800261
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300262 t5 = self.clex.token()
263 self.assertEqual(t5.type, 'ID')
264 self.assertEqual(t5.value, 'tok2')
265 self.assertEqual(t5.lineno, 99999)
266 self.assertEqual(self.clex.filename, r'include/me.h')
Eli Bendersky09fc2002012-08-10 07:41:42 +0300267
Eli Bendersky0373cbe2012-08-10 07:48:17 +0300268 def test_preprocessor_line_funny(self):
269 str = r'''
270 #line 10 "..\6\joe.h"
271 10
272 '''
273 self.clex.input(str)
274 self.clex.reset_lineno()
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800275
Eli Bendersky0373cbe2012-08-10 07:48:17 +0300276 t1 = self.clex.token()
277 self.assertEqual(t1.type, 'INT_CONST_DEC')
278 self.assertEqual(t1.lineno, 10)
279 self.assertEqual(self.clex.filename, r'..\6\joe.h')
280
281
Eli Bendersky09fc2002012-08-10 07:41:42 +0300282 def test_preprocessor_pragma(self):
283 str = r'''
284 42
285 #pragma helo me
286 #pragma once
287 # pragma omp parallel private(th_id)
288 #pragma {pack: 2, smack: 3}
289 #pragma <includeme.h> "nowit.h"
290 #pragma "string"
291 #pragma id 124124 and numbers 0235495
292 59
293 '''
294
295 # Check that pragmas are ignored but the line number advances
296 self.clex.input(str)
297 self.clex.reset_lineno()
298
299 t1 = self.clex.token()
300 self.assertEqual(t1.type, 'INT_CONST_DEC')
301 t2 = self.clex.token()
302 self.assertEqual(t2.type, 'INT_CONST_DEC')
303 self.assertEqual(t2.lineno, 10)
304
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300305
306
307# Keeps all the errors the lexer spits in one place, to allow
308# easier modification if the error syntax changes.
309#
310ERR_ILLEGAL_CHAR = 'Illegal character'
311ERR_OCTAL = 'Invalid octal constant'
312ERR_UNMATCHED_QUOTE = 'Unmatched \''
313ERR_INVALID_CCONST = 'Invalid char constant'
314ERR_STRING_ESCAPE = 'String contains invalid escape'
315
316ERR_FILENAME_BEFORE_LINE = 'filename before line'
317ERR_LINENUM_MISSING = 'line number missing'
318ERR_INVALID_LINE_DIRECTIVE = 'invalid #line directive'
319
320
321class TestCLexerErrors(unittest.TestCase):
322 """ Test lexing of erroneous strings.
323 Works by passing an error functions that saves the error
324 in an attribute for later perusal.
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800325 """
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300326 def error_func(self, msg, line, column):
327 self.error = msg
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800328
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300329 def type_lookup_func(self, typ):
330 return False
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800331
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300332 def setUp(self):
333 self.clex = CLexer(self.error_func, self.type_lookup_func)
334 self.clex.build(optimize=False)
335 self.error = ""
336
337 def assertLexerError(self, str, error_like):
338 # feed the string to the lexer
339 self.clex.input(str)
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800340
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300341 # Pulls all tokens from the string. Errors will
342 # be written into self.error by the error_func
343 # callback
344 #
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800345 token_types(self.clex)
346
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300347 # compare the error to the expected
Eli Bendersky09fc2002012-08-10 07:41:42 +0300348 self.assertTrue(re.search(error_like, self.error),
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800349 "\nExpected error matching: %s\nGot: %s" %
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300350 (error_like, self.error))
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800351
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300352 # clear last error, for the sake of subsequent invocations
353 self.error = ""
354
355 def test_trivial_tokens(self):
356 self.assertLexerError('@', ERR_ILLEGAL_CHAR)
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300357 self.assertLexerError('`', ERR_ILLEGAL_CHAR)
358 self.assertLexerError('\\', ERR_ILLEGAL_CHAR)
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800359
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300360 def test_integer_constants(self):
361 self.assertLexerError('029', ERR_OCTAL)
362 self.assertLexerError('012345678', ERR_OCTAL)
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800363
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300364 def test_char_constants(self):
365 self.assertLexerError("'", ERR_UNMATCHED_QUOTE)
366 self.assertLexerError("'b\n", ERR_UNMATCHED_QUOTE)
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800367
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300368 self.assertLexerError("'jx'", ERR_INVALID_CCONST)
369 self.assertLexerError("'\*'", ERR_INVALID_CCONST)
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800370
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300371 def test_string_literals(self):
372 self.assertLexerError('"jx\9"', ERR_STRING_ESCAPE)
373 self.assertLexerError('"hekllo\* on ix"', ERR_STRING_ESCAPE)
374 self.assertLexerError('L"hekllo\* on ix"', ERR_STRING_ESCAPE)
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800375
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300376 def test_preprocessor(self):
377 self.assertLexerError('#line "ka"', ERR_FILENAME_BEFORE_LINE)
378 self.assertLexerError('#line df', ERR_INVALID_LINE_DIRECTIVE)
379 self.assertLexerError('#line \n', ERR_LINENUM_MISSING)
380
381
382if __name__ == '__main__':
383 unittest.main()
Eli Bendersky86f2eee2013-01-18 06:04:01 -0800384
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300385