Eli Bendersky | 3921e8e | 2010-05-21 09:05:39 +0300 | [diff] [blame] | 1 | import re
|
| 2 | import sys
|
| 3 | import unittest
|
| 4 |
|
| 5 | sys.path.insert(0, '..')
|
| 6 | from pycparser.c_lexer import CLexer
|
| 7 |
|
| 8 |
|
| 9 | def token_list(clex):
|
| 10 | return list(iter(clex.token, None))
|
| 11 |
|
| 12 |
|
| 13 | def token_types(clex): |
| 14 | return [i.type for i in token_list(clex)]
|
| 15 |
|
| 16 |
|
| 17 | class TestCLexerNoErrors(unittest.TestCase):
|
| 18 | """ Test lexing of strings that are not supposed to cause
|
| 19 | errors. Therefore, the error_func passed to the lexer
|
| 20 | raises an exception. |
| 21 | """
|
| 22 | def error_func(self, msg, line, column): |
| 23 | self.fail(msg)
|
| 24 |
|
| 25 | def type_lookup_func(self, typ):
|
| 26 | if typ.startswith('mytype'):
|
| 27 | return True |
| 28 | else:
|
| 29 | return False
|
| 30 |
|
| 31 | def setUp(self): |
| 32 | self.clex = CLexer(self.error_func, self.type_lookup_func)
|
| 33 | self.clex.build(optimize=False)
|
| 34 |
|
| 35 | def assertTokensTypes(self, str, types):
|
| 36 | self.clex.input(str) |
| 37 | self.assertEqual(token_types(self.clex), types)
|
| 38 |
|
| 39 | def test_trivial_tokens(self):
|
| 40 | self.assertTokensTypes('1', ['INT_CONST_DEC'])
|
| 41 | self.assertTokensTypes('-', ['MINUS'])
|
| 42 | self.assertTokensTypes('volatile', ['VOLATILE'])
|
| 43 | self.assertTokensTypes('...', ['ELLIPSIS'])
|
| 44 | self.assertTokensTypes('++', ['PLUSPLUS'])
|
| 45 | self.assertTokensTypes('case int', ['CASE', 'INT'])
|
| 46 | self.assertTokensTypes('caseint', ['ID'])
|
| 47 | self.assertTokensTypes('i ^= 1;', ['ID', 'XOREQUAL', 'INT_CONST_DEC', 'SEMI'])
|
| 48 |
|
| 49 | def test_id_typeid(self): |
| 50 | self.assertTokensTypes('myt', ['ID'])
|
| 51 | self.assertTokensTypes('mytype', ['TYPEID'])
|
| 52 | self.assertTokensTypes('mytype6 var', ['TYPEID', 'ID'])
|
| 53 |
|
| 54 | def test_integer_constants(self):
|
| 55 | self.assertTokensTypes('12', ['INT_CONST_DEC'])
|
| 56 | self.assertTokensTypes('12u', ['INT_CONST_DEC'])
|
| 57 | self.assertTokensTypes('199872Ul', ['INT_CONST_DEC'])
|
eli.bendersky | 98f4537 | 2010-10-30 09:46:29 +0200 | [diff] [blame] | 58 | self.assertTokensTypes('199872LL', ['INT_CONST_DEC'])
|
| 59 | self.assertTokensTypes('199872ull', ['INT_CONST_DEC'])
|
Eli Bendersky | 3921e8e | 2010-05-21 09:05:39 +0300 | [diff] [blame] | 60 |
|
| 61 | self.assertTokensTypes('077', ['INT_CONST_OCT'])
|
| 62 | self.assertTokensTypes('0123456L', ['INT_CONST_OCT'])
|
| 63 |
|
| 64 | self.assertTokensTypes('0xf7', ['INT_CONST_HEX'])
|
| 65 | self.assertTokensTypes('0x01202AAbbf7Ul', ['INT_CONST_HEX'])
|
| 66 |
|
| 67 | # no 0 before x, so ID catches it
|
| 68 | self.assertTokensTypes('xf7', ['ID'])
|
| 69 |
|
| 70 | # - is MINUS, the rest a constnant
|
| 71 | self.assertTokensTypes('-1', ['MINUS', 'INT_CONST_DEC'])
|
| 72 |
|
| 73 | def test_floating_constants(self): |
| 74 | self.assertTokensTypes('1.5f', ['FLOAT_CONST'])
|
| 75 | self.assertTokensTypes('01.5', ['FLOAT_CONST'])
|
| 76 | self.assertTokensTypes('.15L', ['FLOAT_CONST'])
|
| 77 | self.assertTokensTypes('0.', ['FLOAT_CONST'])
|
| 78 |
|
| 79 | # but just a period is a period
|
| 80 | self.assertTokensTypes('.', ['PERIOD'])
|
| 81 |
|
| 82 | self.assertTokensTypes('3.3e-3', ['FLOAT_CONST'])
|
| 83 | self.assertTokensTypes('.7e25L', ['FLOAT_CONST'])
|
| 84 | self.assertTokensTypes('6.e+125f', ['FLOAT_CONST'])
|
| 85 | self.assertTokensTypes('666e666', ['FLOAT_CONST'])
|
| 86 | self.assertTokensTypes('00666e+3', ['FLOAT_CONST'])
|
| 87 |
|
| 88 | # but this is a hex integer + 3
|
| 89 | self.assertTokensTypes('0x0666e+3', ['INT_CONST_HEX', 'PLUS', 'INT_CONST_DEC'])
|
| 90 |
|
| 91 | def test_char_constants(self): |
| 92 | self.assertTokensTypes(r"""'x'""", ['CHAR_CONST'])
|
| 93 | self.assertTokensTypes(r"""L'x'""", ['WCHAR_CONST'])
|
| 94 | self.assertTokensTypes(r"""'\t'""", ['CHAR_CONST'])
|
| 95 | self.assertTokensTypes(r"""'\''""", ['CHAR_CONST'])
|
| 96 | self.assertTokensTypes(r"""'\?'""", ['CHAR_CONST'])
|
| 97 | self.assertTokensTypes(r"""'\012'""", ['CHAR_CONST'])
|
| 98 | self.assertTokensTypes(r"""'\x2f'""", ['CHAR_CONST'])
|
| 99 | self.assertTokensTypes(r"""'\x2f12'""", ['CHAR_CONST'])
|
| 100 | self.assertTokensTypes(r"""L'\xaf'""", ['WCHAR_CONST'])
|
| 101 |
|
| 102 | def test_string_literal(self): |
| 103 | self.assertTokensTypes('"a string"', ['STRING_LITERAL'])
|
| 104 | self.assertTokensTypes('L"ing"', ['WSTRING_LITERAL'])
|
| 105 | self.assertTokensTypes(
|
| 106 | '"i am a string too \t"',
|
| 107 | ['STRING_LITERAL'])
|
| 108 | self.assertTokensTypes(
|
| 109 | r'''"esc\ape \"\'\? \0234 chars \rule"''',
|
| 110 | ['STRING_LITERAL'])
|
| 111 | self.assertTokensTypes(
|
| 112 | r'''"hello 'joe' wanna give it a \"go\"?"''',
|
| 113 | ['STRING_LITERAL'])
|
| 114 |
|
| 115 | def test_mess(self): |
| 116 | self.assertTokensTypes(
|
| 117 | r'[{}]()',
|
| 118 | ['LBRACKET',
|
| 119 | 'LBRACE', 'RBRACE',
|
| 120 | 'RBRACKET',
|
| 121 | 'LPAREN', 'RPAREN'])
|
| 122 |
|
| 123 | self.assertTokensTypes(
|
| 124 | r'()||!C&~Z?J',
|
| 125 | ['LPAREN', 'RPAREN',
|
| 126 | 'LOR',
|
| 127 | 'LNOT', 'ID',
|
| 128 | 'AND',
|
| 129 | 'NOT', 'ID',
|
| 130 | 'CONDOP', 'ID'])
|
| 131 |
|
| 132 | self.assertTokensTypes(
|
| 133 | r'+-*/%|||&&&^><>=<===!=',
|
| 134 | ['PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
|
| 135 | 'LOR', 'OR',
|
| 136 | 'LAND', 'AND',
|
| 137 | 'XOR',
|
| 138 | 'GT', 'LT', 'GE', 'LE', 'EQ', 'NE'])
|
| 139 |
|
| 140 | self.assertTokensTypes(
|
| 141 | r'++--->?.,;:',
|
| 142 | ['PLUSPLUS', 'MINUSMINUS',
|
| 143 | 'ARROW', 'CONDOP',
|
| 144 | 'PERIOD', 'COMMA', 'SEMI', 'COLON'])
|
| 145 |
|
| 146 | def test_exprs(self):
|
| 147 | self.assertTokensTypes(
|
| 148 | 'bb-cc',
|
| 149 | ['ID', 'MINUS', 'ID'])
|
| 150 |
|
| 151 | self.assertTokensTypes(
|
| 152 | 'foo & 0xFF',
|
| 153 | ['ID', 'AND', 'INT_CONST_HEX'])
|
| 154 | |
| 155 | self.assertTokensTypes(
|
| 156 | '(2+k) * 62',
|
| 157 | ['LPAREN', 'INT_CONST_DEC', 'PLUS', 'ID',
|
| 158 | 'RPAREN', 'TIMES', 'INT_CONST_DEC'],)
|
| 159 |
|
| 160 | self.assertTokensTypes(
|
| 161 | 'x | y >> z',
|
| 162 | ['ID', 'OR', 'ID', 'RSHIFT', 'ID'])
|
| 163 |
|
| 164 | self.assertTokensTypes(
|
| 165 | 'x <<= z << 5',
|
| 166 | ['ID', 'LSHIFTEQUAL', 'ID', 'LSHIFT', 'INT_CONST_DEC'])
|
| 167 |
|
| 168 | self.assertTokensTypes(
|
| 169 | 'x = y > 0 ? y : -6',
|
| 170 | ['ID', 'EQUALS',
|
| 171 | 'ID', 'GT', 'INT_CONST_OCT',
|
| 172 | 'CONDOP',
|
| 173 | 'ID',
|
| 174 | 'COLON',
|
| 175 | 'MINUS', 'INT_CONST_DEC'])
|
| 176 |
|
| 177 | self.assertTokensTypes(
|
| 178 | 'a+++b',
|
| 179 | ['ID', 'PLUSPLUS', 'PLUS', 'ID'])
|
| 180 |
|
| 181 | def test_statements(self): |
| 182 | self.assertTokensTypes(
|
| 183 | 'for (int i = 0; i < n; ++i)',
|
| 184 | ['FOR', 'LPAREN',
|
| 185 | 'INT', 'ID', 'EQUALS', 'INT_CONST_OCT', 'SEMI',
|
| 186 | 'ID', 'LT', 'ID', 'SEMI',
|
| 187 | 'PLUSPLUS', 'ID',
|
| 188 | 'RPAREN'])
|
| 189 |
|
| 190 | self.assertTokensTypes(
|
| 191 | 'self: goto self;',
|
| 192 | ['ID', 'COLON', 'GOTO', 'ID', 'SEMI'])
|
| 193 |
|
| 194 | self.assertTokensTypes(
|
| 195 | """ switch (typ)
|
| 196 | {
|
| 197 | case TYPE_ID:
|
| 198 | m = 5;
|
| 199 | break;
|
| 200 | default:
|
| 201 | m = 8;
|
| 202 | }""",
|
| 203 | ['SWITCH', 'LPAREN', 'ID', 'RPAREN',
|
| 204 | 'LBRACE',
|
| 205 | 'CASE', 'ID', 'COLON',
|
| 206 | 'ID', 'EQUALS', 'INT_CONST_DEC', 'SEMI',
|
| 207 | 'BREAK', 'SEMI',
|
| 208 | 'DEFAULT', 'COLON',
|
| 209 | 'ID', 'EQUALS', 'INT_CONST_DEC', 'SEMI',
|
| 210 | 'RBRACE'])
|
| 211 |
|
| 212 | def test_preprocessor(self):
|
| 213 | self.assertTokensTypes('#abracadabra', ['PPHASH', 'ID'])
|
| 214 |
|
| 215 | str = r"""
|
| 216 | 546
|
| 217 | #line 66 "kwas\df.h"
|
| 218 | id 4
|
| 219 | dsf
|
| 220 | # 9
|
| 221 | armo
|
eli.bendersky | bfa9a94 | 2011-11-12 13:18:40 +0200 | [diff] [blame] | 222 | #line 10 "..\~..\test.h"
|
eli.bendersky | 49f3b63 | 2011-10-31 06:38:41 +0200 | [diff] [blame] | 223 | tok1
|
| 224 | #line 99999 "include/me.h"
|
| 225 | tok2
|
Eli Bendersky | 3921e8e | 2010-05-21 09:05:39 +0300 | [diff] [blame] | 226 | """
|
| 227 |
|
| 228 | #~ self.clex.filename
|
| 229 | self.clex.input(str)
|
| 230 | self.clex.reset_lineno()
|
| 231 |
|
| 232 | t1 = self.clex.token()
|
| 233 | self.assertEqual(t1.type, 'INT_CONST_DEC')
|
| 234 | self.assertEqual(t1.lineno, 2)
|
| 235 |
|
| 236 | t2 = self.clex.token()
|
| 237 | self.assertEqual(t2.type, 'ID')
|
| 238 | self.assertEqual(t2.value, 'id')
|
| 239 | self.assertEqual(t2.lineno, 66)
|
| 240 | self.assertEqual(self.clex.filename, r'kwas\df.h')
|
| 241 |
|
| 242 | for i in range(3):
|
| 243 | t = self.clex.token()
|
| 244 |
|
| 245 | self.assertEqual(t.type, 'ID')
|
| 246 | self.assertEqual(t.value, 'armo')
|
| 247 | self.assertEqual(t.lineno, 9)
|
| 248 | self.assertEqual(self.clex.filename, r'kwas\df.h')
|
| 249 |
|
eli.bendersky | 49f3b63 | 2011-10-31 06:38:41 +0200 | [diff] [blame] | 250 | t4 = self.clex.token()
|
| 251 | self.assertEqual(t4.type, 'ID')
|
| 252 | self.assertEqual(t4.value, 'tok1')
|
| 253 | self.assertEqual(t4.lineno, 10)
|
eli.bendersky | bfa9a94 | 2011-11-12 13:18:40 +0200 | [diff] [blame] | 254 | self.assertEqual(self.clex.filename, r'..\~..\test.h')
|
eli.bendersky | 49f3b63 | 2011-10-31 06:38:41 +0200 | [diff] [blame] | 255 |
|
| 256 | t5 = self.clex.token()
|
| 257 | self.assertEqual(t5.type, 'ID')
|
| 258 | self.assertEqual(t5.value, 'tok2')
|
| 259 | self.assertEqual(t5.lineno, 99999)
|
| 260 | self.assertEqual(self.clex.filename, r'include/me.h')
|
| 261 |
|
Eli Bendersky | 3921e8e | 2010-05-21 09:05:39 +0300 | [diff] [blame] | 262 |
|
| 263 |
|
| 264 | # Keeps all the errors the lexer spits in one place, to allow
|
| 265 | # easier modification if the error syntax changes.
|
| 266 | #
|
| 267 | ERR_ILLEGAL_CHAR = 'Illegal character'
|
| 268 | ERR_OCTAL = 'Invalid octal constant'
|
| 269 | ERR_UNMATCHED_QUOTE = 'Unmatched \''
|
| 270 | ERR_INVALID_CCONST = 'Invalid char constant'
|
| 271 | ERR_STRING_ESCAPE = 'String contains invalid escape'
|
| 272 |
|
| 273 | ERR_FILENAME_BEFORE_LINE = 'filename before line'
|
| 274 | ERR_LINENUM_MISSING = 'line number missing'
|
| 275 | ERR_INVALID_LINE_DIRECTIVE = 'invalid #line directive'
|
| 276 |
|
| 277 |
|
| 278 | class TestCLexerErrors(unittest.TestCase):
|
| 279 | """ Test lexing of erroneous strings.
|
| 280 | Works by passing an error functions that saves the error
|
| 281 | in an attribute for later perusal.
|
| 282 | """
|
| 283 | def error_func(self, msg, line, column):
|
| 284 | self.error = msg
|
| 285 |
|
| 286 | def type_lookup_func(self, typ):
|
| 287 | return False
|
| 288 |
|
| 289 | def setUp(self):
|
| 290 | self.clex = CLexer(self.error_func, self.type_lookup_func)
|
| 291 | self.clex.build(optimize=False)
|
| 292 | self.error = ""
|
| 293 |
|
| 294 | def assertLexerError(self, str, error_like):
|
| 295 | # feed the string to the lexer |
| 296 | self.clex.input(str)
|
| 297 |
|
| 298 | # Pulls all tokens from the string. Errors will
|
| 299 | # be written into self.error by the error_func
|
| 300 | # callback
|
| 301 | #
|
| 302 | token_types(self.clex)
|
| 303 |
|
| 304 | # compare the error to the expected
|
| 305 | self.failUnless(re.search(error_like, self.error),
|
| 306 | "\nExpected error matching: %s\nGot: %s" %
|
| 307 | (error_like, self.error))
|
| 308 |
|
| 309 | # clear last error, for the sake of subsequent invocations
|
| 310 | self.error = ""
|
| 311 |
|
| 312 | def test_trivial_tokens(self):
|
| 313 | self.assertLexerError('@', ERR_ILLEGAL_CHAR) |
| 314 | self.assertLexerError('$', ERR_ILLEGAL_CHAR)
|
| 315 | self.assertLexerError('`', ERR_ILLEGAL_CHAR)
|
| 316 | self.assertLexerError('\\', ERR_ILLEGAL_CHAR)
|
| 317 |
|
| 318 | def test_integer_constants(self): |
| 319 | self.assertLexerError('029', ERR_OCTAL)
|
| 320 | self.assertLexerError('012345678', ERR_OCTAL)
|
| 321 |
|
| 322 | def test_char_constants(self): |
| 323 | self.assertLexerError("'", ERR_UNMATCHED_QUOTE)
|
| 324 | self.assertLexerError("'b\n", ERR_UNMATCHED_QUOTE)
|
| 325 |
|
| 326 | self.assertLexerError("'jx'", ERR_INVALID_CCONST)
|
| 327 | self.assertLexerError("'\*'", ERR_INVALID_CCONST)
|
| 328 | self.assertLexerError("'\9'", ERR_INVALID_CCONST)
|
| 329 | self.assertLexerError("L'\9'", ERR_INVALID_CCONST)
|
| 330 |
|
| 331 | def test_string_literals(self): |
| 332 | self.assertLexerError('"jx\9"', ERR_STRING_ESCAPE)
|
| 333 | self.assertLexerError('"hekllo\* on ix"', ERR_STRING_ESCAPE)
|
| 334 | self.assertLexerError('L"hekllo\* on ix"', ERR_STRING_ESCAPE)
|
| 335 |
|
| 336 | def test_preprocessor(self): |
| 337 | self.assertLexerError('#line "ka"', ERR_FILENAME_BEFORE_LINE)
|
| 338 | self.assertLexerError('#line df', ERR_INVALID_LINE_DIRECTIVE)
|
| 339 | self.assertLexerError('#line \n', ERR_LINENUM_MISSING)
|
| 340 |
|
| 341 |
|
| 342 | if __name__ == '__main__':
|
| 343 | unittest.main()
|
| 344 | |
| 345 |
|