blob: 5f6b459b72392b750b9237b5150f23b7d6a2a71d [file] [log] [blame]
Eli Bendersky3b1b08d2012-06-15 12:37:54 +03001import re
2import sys
3import unittest
Eli Bendersky3921e8e2010-05-21 09:05:39 +03004
Eli Bendersky3b1b08d2012-06-15 12:37:54 +03005sys.path.insert(0, '..')
6from pycparser.c_lexer import CLexer
7
8
9def token_list(clex):
10 return list(iter(clex.token, None))
11
12
13def token_types(clex):
14 return [i.type for i in token_list(clex)]
15
16
17class TestCLexerNoErrors(unittest.TestCase):
18 """ Test lexing of strings that are not supposed to cause
19 errors. Therefore, the error_func passed to the lexer
20 raises an exception.
21 """
22 def error_func(self, msg, line, column):
23 self.fail(msg)
24
25 def type_lookup_func(self, typ):
26 if typ.startswith('mytype'):
27 return True
28 else:
29 return False
30
31 def setUp(self):
32 self.clex = CLexer(self.error_func, self.type_lookup_func)
33 self.clex.build(optimize=False)
34
35 def assertTokensTypes(self, str, types):
36 self.clex.input(str)
37 self.assertEqual(token_types(self.clex), types)
38
39 def test_trivial_tokens(self):
40 self.assertTokensTypes('1', ['INT_CONST_DEC'])
41 self.assertTokensTypes('-', ['MINUS'])
42 self.assertTokensTypes('volatile', ['VOLATILE'])
43 self.assertTokensTypes('...', ['ELLIPSIS'])
44 self.assertTokensTypes('++', ['PLUSPLUS'])
45 self.assertTokensTypes('case int', ['CASE', 'INT'])
46 self.assertTokensTypes('caseint', ['ID'])
47 self.assertTokensTypes('i ^= 1;', ['ID', 'XOREQUAL', 'INT_CONST_DEC', 'SEMI'])
Eli Bendersky3921e8e2010-05-21 09:05:39 +030048
Eli Bendersky3b1b08d2012-06-15 12:37:54 +030049 def test_id_typeid(self):
50 self.assertTokensTypes('myt', ['ID'])
51 self.assertTokensTypes('mytype', ['TYPEID'])
52 self.assertTokensTypes('mytype6 var', ['TYPEID', 'ID'])
53
54 def test_integer_constants(self):
55 self.assertTokensTypes('12', ['INT_CONST_DEC'])
56 self.assertTokensTypes('12u', ['INT_CONST_DEC'])
57 self.assertTokensTypes('199872Ul', ['INT_CONST_DEC'])
58 self.assertTokensTypes('199872LL', ['INT_CONST_DEC'])
59 self.assertTokensTypes('199872ull', ['INT_CONST_DEC'])
60
61 self.assertTokensTypes('077', ['INT_CONST_OCT'])
62 self.assertTokensTypes('0123456L', ['INT_CONST_OCT'])
63
64 self.assertTokensTypes('0xf7', ['INT_CONST_HEX'])
65 self.assertTokensTypes('0x01202AAbbf7Ul', ['INT_CONST_HEX'])
66
67 # no 0 before x, so ID catches it
68 self.assertTokensTypes('xf7', ['ID'])
69
70 # - is MINUS, the rest a constnant
71 self.assertTokensTypes('-1', ['MINUS', 'INT_CONST_DEC'])
72
73 def test_floating_constants(self):
74 self.assertTokensTypes('1.5f', ['FLOAT_CONST'])
75 self.assertTokensTypes('01.5', ['FLOAT_CONST'])
76 self.assertTokensTypes('.15L', ['FLOAT_CONST'])
77 self.assertTokensTypes('0.', ['FLOAT_CONST'])
78
79 # but just a period is a period
80 self.assertTokensTypes('.', ['PERIOD'])
81
82 self.assertTokensTypes('3.3e-3', ['FLOAT_CONST'])
83 self.assertTokensTypes('.7e25L', ['FLOAT_CONST'])
84 self.assertTokensTypes('6.e+125f', ['FLOAT_CONST'])
85 self.assertTokensTypes('666e666', ['FLOAT_CONST'])
86 self.assertTokensTypes('00666e+3', ['FLOAT_CONST'])
87
88 # but this is a hex integer + 3
89 self.assertTokensTypes('0x0666e+3', ['INT_CONST_HEX', 'PLUS', 'INT_CONST_DEC'])
90
91 def test_hexadecimal_floating_constants(self):
92 self.assertTokensTypes('0xDE.488641p0', ['HEX_FLOAT_CONST'])
93 self.assertTokensTypes('0x.488641p0', ['HEX_FLOAT_CONST'])
94 self.assertTokensTypes('0X12.P0', ['HEX_FLOAT_CONST'])
95
96 def test_char_constants(self):
97 self.assertTokensTypes(r"""'x'""", ['CHAR_CONST'])
98 self.assertTokensTypes(r"""L'x'""", ['WCHAR_CONST'])
99 self.assertTokensTypes(r"""'\t'""", ['CHAR_CONST'])
100 self.assertTokensTypes(r"""'\''""", ['CHAR_CONST'])
101 self.assertTokensTypes(r"""'\?'""", ['CHAR_CONST'])
102 self.assertTokensTypes(r"""'\012'""", ['CHAR_CONST'])
103 self.assertTokensTypes(r"""'\x2f'""", ['CHAR_CONST'])
104 self.assertTokensTypes(r"""'\x2f12'""", ['CHAR_CONST'])
105 self.assertTokensTypes(r"""L'\xaf'""", ['WCHAR_CONST'])
106
107 def test_string_literal(self):
108 self.assertTokensTypes('"a string"', ['STRING_LITERAL'])
109 self.assertTokensTypes('L"ing"', ['WSTRING_LITERAL'])
110 self.assertTokensTypes(
111 '"i am a string too \t"',
112 ['STRING_LITERAL'])
113 self.assertTokensTypes(
114 r'''"esc\ape \"\'\? \0234 chars \rule"''',
115 ['STRING_LITERAL'])
116 self.assertTokensTypes(
117 r'''"hello 'joe' wanna give it a \"go\"?"''',
118 ['STRING_LITERAL'])
119
120 def test_mess(self):
121 self.assertTokensTypes(
122 r'[{}]()',
123 ['LBRACKET',
124 'LBRACE', 'RBRACE',
125 'RBRACKET',
126 'LPAREN', 'RPAREN'])
127
128 self.assertTokensTypes(
129 r'()||!C&~Z?J',
130 ['LPAREN', 'RPAREN',
131 'LOR',
132 'LNOT', 'ID',
133 'AND',
134 'NOT', 'ID',
135 'CONDOP', 'ID'])
136
137 self.assertTokensTypes(
138 r'+-*/%|||&&&^><>=<===!=',
139 ['PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
140 'LOR', 'OR',
141 'LAND', 'AND',
142 'XOR',
143 'GT', 'LT', 'GE', 'LE', 'EQ', 'NE'])
144
145 self.assertTokensTypes(
146 r'++--->?.,;:',
147 ['PLUSPLUS', 'MINUSMINUS',
148 'ARROW', 'CONDOP',
149 'PERIOD', 'COMMA', 'SEMI', 'COLON'])
150
151 def test_exprs(self):
152 self.assertTokensTypes(
153 'bb-cc',
154 ['ID', 'MINUS', 'ID'])
155
156 self.assertTokensTypes(
157 'foo & 0xFF',
158 ['ID', 'AND', 'INT_CONST_HEX'])
159
160 self.assertTokensTypes(
161 '(2+k) * 62',
162 ['LPAREN', 'INT_CONST_DEC', 'PLUS', 'ID',
163 'RPAREN', 'TIMES', 'INT_CONST_DEC'],)
164
165 self.assertTokensTypes(
166 'x | y >> z',
167 ['ID', 'OR', 'ID', 'RSHIFT', 'ID'])
168
169 self.assertTokensTypes(
170 'x <<= z << 5',
171 ['ID', 'LSHIFTEQUAL', 'ID', 'LSHIFT', 'INT_CONST_DEC'])
172
173 self.assertTokensTypes(
174 'x = y > 0 ? y : -6',
175 ['ID', 'EQUALS',
176 'ID', 'GT', 'INT_CONST_OCT',
177 'CONDOP',
178 'ID',
179 'COLON',
180 'MINUS', 'INT_CONST_DEC'])
181
182 self.assertTokensTypes(
183 'a+++b',
184 ['ID', 'PLUSPLUS', 'PLUS', 'ID'])
185
186 def test_statements(self):
187 self.assertTokensTypes(
188 'for (int i = 0; i < n; ++i)',
189 ['FOR', 'LPAREN',
190 'INT', 'ID', 'EQUALS', 'INT_CONST_OCT', 'SEMI',
191 'ID', 'LT', 'ID', 'SEMI',
192 'PLUSPLUS', 'ID',
193 'RPAREN'])
194
195 self.assertTokensTypes(
196 'self: goto self;',
197 ['ID', 'COLON', 'GOTO', 'ID', 'SEMI'])
198
199 self.assertTokensTypes(
200 """ switch (typ)
201 {
202 case TYPE_ID:
203 m = 5;
204 break;
205 default:
206 m = 8;
207 }""",
208 ['SWITCH', 'LPAREN', 'ID', 'RPAREN',
209 'LBRACE',
210 'CASE', 'ID', 'COLON',
211 'ID', 'EQUALS', 'INT_CONST_DEC', 'SEMI',
212 'BREAK', 'SEMI',
213 'DEFAULT', 'COLON',
214 'ID', 'EQUALS', 'INT_CONST_DEC', 'SEMI',
215 'RBRACE'])
216
217 def test_preprocessor(self):
218 self.assertTokensTypes('#abracadabra', ['PPHASH', 'ID'])
219
220 str = r"""
221 546
222 #line 66 "kwas\df.h"
223 id 4
224 dsf
225 # 9
226 armo
227 #line 10 "..\~..\test.h"
228 tok1
229 #line 99999 "include/me.h"
230 tok2
231 """
232
233 #~ self.clex.filename
234 self.clex.input(str)
235 self.clex.reset_lineno()
236
237 t1 = self.clex.token()
238 self.assertEqual(t1.type, 'INT_CONST_DEC')
239 self.assertEqual(t1.lineno, 2)
240
241 t2 = self.clex.token()
242 self.assertEqual(t2.type, 'ID')
243 self.assertEqual(t2.value, 'id')
244 self.assertEqual(t2.lineno, 66)
245 self.assertEqual(self.clex.filename, r'kwas\df.h')
246
247 for i in range(3):
248 t = self.clex.token()
249
250 self.assertEqual(t.type, 'ID')
251 self.assertEqual(t.value, 'armo')
252 self.assertEqual(t.lineno, 9)
253 self.assertEqual(self.clex.filename, r'kwas\df.h')
254
255 t4 = self.clex.token()
256 self.assertEqual(t4.type, 'ID')
257 self.assertEqual(t4.value, 'tok1')
258 self.assertEqual(t4.lineno, 10)
259 self.assertEqual(self.clex.filename, r'..\~..\test.h')
260
261 t5 = self.clex.token()
262 self.assertEqual(t5.type, 'ID')
263 self.assertEqual(t5.value, 'tok2')
264 self.assertEqual(t5.lineno, 99999)
265 self.assertEqual(self.clex.filename, r'include/me.h')
Eli Bendersky09fc2002012-08-10 07:41:42 +0300266
267 def test_preprocessor_pragma(self):
268 str = r'''
269 42
270 #pragma helo me
271 #pragma once
272 # pragma omp parallel private(th_id)
273 #pragma {pack: 2, smack: 3}
274 #pragma <includeme.h> "nowit.h"
275 #pragma "string"
276 #pragma id 124124 and numbers 0235495
277 59
278 '''
279
280 # Check that pragmas are ignored but the line number advances
281 self.clex.input(str)
282 self.clex.reset_lineno()
283
284 t1 = self.clex.token()
285 self.assertEqual(t1.type, 'INT_CONST_DEC')
286 t2 = self.clex.token()
287 self.assertEqual(t2.type, 'INT_CONST_DEC')
288 self.assertEqual(t2.lineno, 10)
289
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300290
291
292# Keeps all the errors the lexer spits in one place, to allow
293# easier modification if the error syntax changes.
294#
295ERR_ILLEGAL_CHAR = 'Illegal character'
296ERR_OCTAL = 'Invalid octal constant'
297ERR_UNMATCHED_QUOTE = 'Unmatched \''
298ERR_INVALID_CCONST = 'Invalid char constant'
299ERR_STRING_ESCAPE = 'String contains invalid escape'
300
301ERR_FILENAME_BEFORE_LINE = 'filename before line'
302ERR_LINENUM_MISSING = 'line number missing'
303ERR_INVALID_LINE_DIRECTIVE = 'invalid #line directive'
304
305
306class TestCLexerErrors(unittest.TestCase):
307 """ Test lexing of erroneous strings.
308 Works by passing an error functions that saves the error
309 in an attribute for later perusal.
310 """
311 def error_func(self, msg, line, column):
312 self.error = msg
313
314 def type_lookup_func(self, typ):
315 return False
316
317 def setUp(self):
318 self.clex = CLexer(self.error_func, self.type_lookup_func)
319 self.clex.build(optimize=False)
320 self.error = ""
321
322 def assertLexerError(self, str, error_like):
323 # feed the string to the lexer
324 self.clex.input(str)
325
326 # Pulls all tokens from the string. Errors will
327 # be written into self.error by the error_func
328 # callback
329 #
330 token_types(self.clex)
331
332 # compare the error to the expected
Eli Bendersky09fc2002012-08-10 07:41:42 +0300333 self.assertTrue(re.search(error_like, self.error),
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300334 "\nExpected error matching: %s\nGot: %s" %
335 (error_like, self.error))
336
337 # clear last error, for the sake of subsequent invocations
338 self.error = ""
339
340 def test_trivial_tokens(self):
341 self.assertLexerError('@', ERR_ILLEGAL_CHAR)
342 self.assertLexerError('$', ERR_ILLEGAL_CHAR)
343 self.assertLexerError('`', ERR_ILLEGAL_CHAR)
344 self.assertLexerError('\\', ERR_ILLEGAL_CHAR)
345
346 def test_integer_constants(self):
347 self.assertLexerError('029', ERR_OCTAL)
348 self.assertLexerError('012345678', ERR_OCTAL)
349
350 def test_char_constants(self):
351 self.assertLexerError("'", ERR_UNMATCHED_QUOTE)
352 self.assertLexerError("'b\n", ERR_UNMATCHED_QUOTE)
353
354 self.assertLexerError("'jx'", ERR_INVALID_CCONST)
355 self.assertLexerError("'\*'", ERR_INVALID_CCONST)
356 self.assertLexerError("'\9'", ERR_INVALID_CCONST)
357 self.assertLexerError("L'\9'", ERR_INVALID_CCONST)
358
359 def test_string_literals(self):
360 self.assertLexerError('"jx\9"', ERR_STRING_ESCAPE)
361 self.assertLexerError('"hekllo\* on ix"', ERR_STRING_ESCAPE)
362 self.assertLexerError('L"hekllo\* on ix"', ERR_STRING_ESCAPE)
363
364 def test_preprocessor(self):
365 self.assertLexerError('#line "ka"', ERR_FILENAME_BEFORE_LINE)
366 self.assertLexerError('#line df', ERR_INVALID_LINE_DIRECTIVE)
367 self.assertLexerError('#line \n', ERR_LINENUM_MISSING)
368
369
370if __name__ == '__main__':
371 unittest.main()
372
373