Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 1 | import re |
| 2 | import sys |
| 3 | import unittest |
Eli Bendersky | 3921e8e | 2010-05-21 09:05:39 +0300 | [diff] [blame] | 4 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 5 | sys.path.insert(0, '..') |
| 6 | from pycparser.c_lexer import CLexer |
| 7 | |
| 8 | |
| 9 | def token_list(clex): |
| 10 | return list(iter(clex.token, None)) |
| 11 | |
| 12 | |
| 13 | def token_types(clex): |
| 14 | return [i.type for i in token_list(clex)] |
| 15 | |
| 16 | |
| 17 | class TestCLexerNoErrors(unittest.TestCase): |
| 18 | """ Test lexing of strings that are not supposed to cause |
| 19 | errors. Therefore, the error_func passed to the lexer |
| 20 | raises an exception. |
| 21 | """ |
| 22 | def error_func(self, msg, line, column): |
| 23 | self.fail(msg) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 24 | |
Sye van der Veen | 9ec6c42 | 2013-07-11 09:10:38 -0400 | [diff] [blame] | 25 | def on_lbrace_func(self): |
| 26 | pass |
| 27 | |
| 28 | def on_rbrace_func(self): |
| 29 | pass |
| 30 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 31 | def type_lookup_func(self, typ): |
| 32 | if typ.startswith('mytype'): |
| 33 | return True |
| 34 | else: |
| 35 | return False |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 36 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 37 | def setUp(self): |
Eli Bendersky | b9b9e14 | 2013-07-13 06:02:00 -0700 | [diff] [blame] | 38 | self.clex = CLexer(self.error_func, lambda: None, lambda: None, |
| 39 | self.type_lookup_func) |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 40 | self.clex.build(optimize=False) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 41 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 42 | def assertTokensTypes(self, str, types): |
| 43 | self.clex.input(str) |
| 44 | self.assertEqual(token_types(self.clex), types) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 45 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 46 | def test_trivial_tokens(self): |
| 47 | self.assertTokensTypes('1', ['INT_CONST_DEC']) |
| 48 | self.assertTokensTypes('-', ['MINUS']) |
| 49 | self.assertTokensTypes('volatile', ['VOLATILE']) |
| 50 | self.assertTokensTypes('...', ['ELLIPSIS']) |
| 51 | self.assertTokensTypes('++', ['PLUSPLUS']) |
| 52 | self.assertTokensTypes('case int', ['CASE', 'INT']) |
| 53 | self.assertTokensTypes('caseint', ['ID']) |
Sye van der Veen | 3576ed1 | 2013-06-10 13:27:58 -0400 | [diff] [blame] | 54 | self.assertTokensTypes('$dollar cent$', ['ID', 'ID']) |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 55 | self.assertTokensTypes('i ^= 1;', ['ID', 'XOREQUAL', 'INT_CONST_DEC', 'SEMI']) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 56 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 57 | def test_id_typeid(self): |
| 58 | self.assertTokensTypes('myt', ['ID']) |
| 59 | self.assertTokensTypes('mytype', ['TYPEID']) |
| 60 | self.assertTokensTypes('mytype6 var', ['TYPEID', 'ID']) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 61 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 62 | def test_integer_constants(self): |
| 63 | self.assertTokensTypes('12', ['INT_CONST_DEC']) |
| 64 | self.assertTokensTypes('12u', ['INT_CONST_DEC']) |
Sye van der Veen | 08a5489 | 2013-06-10 12:59:03 -0400 | [diff] [blame] | 65 | self.assertTokensTypes('12l', ['INT_CONST_DEC']) |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 66 | self.assertTokensTypes('199872Ul', ['INT_CONST_DEC']) |
Sye van der Veen | 08a5489 | 2013-06-10 12:59:03 -0400 | [diff] [blame] | 67 | self.assertTokensTypes('199872lU', ['INT_CONST_DEC']) |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 68 | self.assertTokensTypes('199872LL', ['INT_CONST_DEC']) |
| 69 | self.assertTokensTypes('199872ull', ['INT_CONST_DEC']) |
Sye van der Veen | 08a5489 | 2013-06-10 12:59:03 -0400 | [diff] [blame] | 70 | self.assertTokensTypes('199872llu', ['INT_CONST_DEC']) |
| 71 | self.assertTokensTypes('1009843200000uLL', ['INT_CONST_DEC']) |
| 72 | self.assertTokensTypes('1009843200000LLu', ['INT_CONST_DEC']) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 73 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 74 | self.assertTokensTypes('077', ['INT_CONST_OCT']) |
| 75 | self.assertTokensTypes('0123456L', ['INT_CONST_OCT']) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 76 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 77 | self.assertTokensTypes('0xf7', ['INT_CONST_HEX']) |
Konstanty Bialkowski | 3bdbfdc | 2015-04-19 20:09:21 +1000 | [diff] [blame] | 78 | self.assertTokensTypes('0b110', ['INT_CONST_BIN']) |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 79 | self.assertTokensTypes('0x01202AAbbf7Ul', ['INT_CONST_HEX']) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 80 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 81 | # no 0 before x, so ID catches it |
| 82 | self.assertTokensTypes('xf7', ['ID']) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 83 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 84 | # - is MINUS, the rest a constnant |
| 85 | self.assertTokensTypes('-1', ['MINUS', 'INT_CONST_DEC']) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 86 | |
Eli Bendersky | e59395b | 2015-05-09 15:20:46 -0700 | [diff] [blame^] | 87 | def test_special_names(self): |
| 88 | self.assertTokensTypes('sizeof offsetof', ['SIZEOF', 'OFFSETOF']) |
| 89 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 90 | def test_floating_constants(self): |
| 91 | self.assertTokensTypes('1.5f', ['FLOAT_CONST']) |
| 92 | self.assertTokensTypes('01.5', ['FLOAT_CONST']) |
| 93 | self.assertTokensTypes('.15L', ['FLOAT_CONST']) |
| 94 | self.assertTokensTypes('0.', ['FLOAT_CONST']) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 95 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 96 | # but just a period is a period |
| 97 | self.assertTokensTypes('.', ['PERIOD']) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 98 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 99 | self.assertTokensTypes('3.3e-3', ['FLOAT_CONST']) |
| 100 | self.assertTokensTypes('.7e25L', ['FLOAT_CONST']) |
| 101 | self.assertTokensTypes('6.e+125f', ['FLOAT_CONST']) |
| 102 | self.assertTokensTypes('666e666', ['FLOAT_CONST']) |
| 103 | self.assertTokensTypes('00666e+3', ['FLOAT_CONST']) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 104 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 105 | # but this is a hex integer + 3 |
| 106 | self.assertTokensTypes('0x0666e+3', ['INT_CONST_HEX', 'PLUS', 'INT_CONST_DEC']) |
| 107 | |
| 108 | def test_hexadecimal_floating_constants(self): |
| 109 | self.assertTokensTypes('0xDE.488641p0', ['HEX_FLOAT_CONST']) |
| 110 | self.assertTokensTypes('0x.488641p0', ['HEX_FLOAT_CONST']) |
| 111 | self.assertTokensTypes('0X12.P0', ['HEX_FLOAT_CONST']) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 112 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 113 | def test_char_constants(self): |
| 114 | self.assertTokensTypes(r"""'x'""", ['CHAR_CONST']) |
| 115 | self.assertTokensTypes(r"""L'x'""", ['WCHAR_CONST']) |
| 116 | self.assertTokensTypes(r"""'\t'""", ['CHAR_CONST']) |
| 117 | self.assertTokensTypes(r"""'\''""", ['CHAR_CONST']) |
| 118 | self.assertTokensTypes(r"""'\?'""", ['CHAR_CONST']) |
| 119 | self.assertTokensTypes(r"""'\012'""", ['CHAR_CONST']) |
| 120 | self.assertTokensTypes(r"""'\x2f'""", ['CHAR_CONST']) |
| 121 | self.assertTokensTypes(r"""'\x2f12'""", ['CHAR_CONST']) |
| 122 | self.assertTokensTypes(r"""L'\xaf'""", ['WCHAR_CONST']) |
| 123 | |
Eli Bendersky | b9b9e14 | 2013-07-13 06:02:00 -0700 | [diff] [blame] | 124 | def test_on_rbrace_lbrace(self): |
| 125 | braces = [] |
| 126 | def on_lbrace(): |
| 127 | braces.append('{') |
| 128 | def on_rbrace(): |
| 129 | braces.append('}') |
| 130 | clex = CLexer(self.error_func, on_lbrace, on_rbrace, |
| 131 | self.type_lookup_func) |
| 132 | clex.build(optimize=False) |
| 133 | clex.input('hello { there } } and again }}{') |
| 134 | token_list(clex) |
| 135 | self.assertEqual(braces, ['{', '}', '}', '}', '}', '{']) |
| 136 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 137 | def test_string_literal(self): |
| 138 | self.assertTokensTypes('"a string"', ['STRING_LITERAL']) |
| 139 | self.assertTokensTypes('L"ing"', ['WSTRING_LITERAL']) |
| 140 | self.assertTokensTypes( |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 141 | '"i am a string too \t"', |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 142 | ['STRING_LITERAL']) |
| 143 | self.assertTokensTypes( |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 144 | r'''"esc\ape \"\'\? \0234 chars \rule"''', |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 145 | ['STRING_LITERAL']) |
| 146 | self.assertTokensTypes( |
| 147 | r'''"hello 'joe' wanna give it a \"go\"?"''', |
| 148 | ['STRING_LITERAL']) |
| 149 | |
| 150 | def test_mess(self): |
| 151 | self.assertTokensTypes( |
| 152 | r'[{}]()', |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 153 | ['LBRACKET', |
| 154 | 'LBRACE', 'RBRACE', |
| 155 | 'RBRACKET', |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 156 | 'LPAREN', 'RPAREN']) |
| 157 | |
| 158 | self.assertTokensTypes( |
| 159 | r'()||!C&~Z?J', |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 160 | ['LPAREN', 'RPAREN', |
| 161 | 'LOR', |
| 162 | 'LNOT', 'ID', |
| 163 | 'AND', |
| 164 | 'NOT', 'ID', |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 165 | 'CONDOP', 'ID']) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 166 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 167 | self.assertTokensTypes( |
| 168 | r'+-*/%|||&&&^><>=<===!=', |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 169 | ['PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', |
| 170 | 'LOR', 'OR', |
| 171 | 'LAND', 'AND', |
| 172 | 'XOR', |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 173 | 'GT', 'LT', 'GE', 'LE', 'EQ', 'NE']) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 174 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 175 | self.assertTokensTypes( |
| 176 | r'++--->?.,;:', |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 177 | ['PLUSPLUS', 'MINUSMINUS', |
| 178 | 'ARROW', 'CONDOP', |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 179 | 'PERIOD', 'COMMA', 'SEMI', 'COLON']) |
| 180 | |
| 181 | def test_exprs(self): |
| 182 | self.assertTokensTypes( |
| 183 | 'bb-cc', |
| 184 | ['ID', 'MINUS', 'ID']) |
| 185 | |
| 186 | self.assertTokensTypes( |
| 187 | 'foo & 0xFF', |
| 188 | ['ID', 'AND', 'INT_CONST_HEX']) |
| 189 | |
| 190 | self.assertTokensTypes( |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 191 | '(2+k) * 62', |
| 192 | ['LPAREN', 'INT_CONST_DEC', 'PLUS', 'ID', |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 193 | 'RPAREN', 'TIMES', 'INT_CONST_DEC'],) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 194 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 195 | self.assertTokensTypes( |
| 196 | 'x | y >> z', |
| 197 | ['ID', 'OR', 'ID', 'RSHIFT', 'ID']) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 198 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 199 | self.assertTokensTypes( |
| 200 | 'x <<= z << 5', |
| 201 | ['ID', 'LSHIFTEQUAL', 'ID', 'LSHIFT', 'INT_CONST_DEC']) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 202 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 203 | self.assertTokensTypes( |
| 204 | 'x = y > 0 ? y : -6', |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 205 | ['ID', 'EQUALS', |
| 206 | 'ID', 'GT', 'INT_CONST_OCT', |
| 207 | 'CONDOP', |
| 208 | 'ID', |
| 209 | 'COLON', |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 210 | 'MINUS', 'INT_CONST_DEC']) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 211 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 212 | self.assertTokensTypes( |
| 213 | 'a+++b', |
| 214 | ['ID', 'PLUSPLUS', 'PLUS', 'ID']) |
| 215 | |
| 216 | def test_statements(self): |
| 217 | self.assertTokensTypes( |
| 218 | 'for (int i = 0; i < n; ++i)', |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 219 | ['FOR', 'LPAREN', |
| 220 | 'INT', 'ID', 'EQUALS', 'INT_CONST_OCT', 'SEMI', |
| 221 | 'ID', 'LT', 'ID', 'SEMI', |
| 222 | 'PLUSPLUS', 'ID', |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 223 | 'RPAREN']) |
| 224 | |
| 225 | self.assertTokensTypes( |
| 226 | 'self: goto self;', |
| 227 | ['ID', 'COLON', 'GOTO', 'ID', 'SEMI']) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 228 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 229 | self.assertTokensTypes( |
| 230 | """ switch (typ) |
| 231 | { |
| 232 | case TYPE_ID: |
| 233 | m = 5; |
| 234 | break; |
| 235 | default: |
| 236 | m = 8; |
| 237 | }""", |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 238 | ['SWITCH', 'LPAREN', 'ID', 'RPAREN', |
| 239 | 'LBRACE', |
| 240 | 'CASE', 'ID', 'COLON', |
| 241 | 'ID', 'EQUALS', 'INT_CONST_DEC', 'SEMI', |
| 242 | 'BREAK', 'SEMI', |
| 243 | 'DEFAULT', 'COLON', |
| 244 | 'ID', 'EQUALS', 'INT_CONST_DEC', 'SEMI', |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 245 | 'RBRACE']) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 246 | |
Eli Bendersky | 0373cbe | 2012-08-10 07:48:17 +0300 | [diff] [blame] | 247 | def test_preprocessor_line(self): |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 248 | self.assertTokensTypes('#abracadabra', ['PPHASH', 'ID']) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 249 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 250 | str = r""" |
| 251 | 546 |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 252 | #line 66 "kwas\df.h" |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 253 | id 4 |
| 254 | dsf |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 255 | # 9 |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 256 | armo |
| 257 | #line 10 "..\~..\test.h" |
| 258 | tok1 |
| 259 | #line 99999 "include/me.h" |
| 260 | tok2 |
| 261 | """ |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 262 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 263 | #~ self.clex.filename |
| 264 | self.clex.input(str) |
| 265 | self.clex.reset_lineno() |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 266 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 267 | t1 = self.clex.token() |
| 268 | self.assertEqual(t1.type, 'INT_CONST_DEC') |
| 269 | self.assertEqual(t1.lineno, 2) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 270 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 271 | t2 = self.clex.token() |
| 272 | self.assertEqual(t2.type, 'ID') |
| 273 | self.assertEqual(t2.value, 'id') |
| 274 | self.assertEqual(t2.lineno, 66) |
| 275 | self.assertEqual(self.clex.filename, r'kwas\df.h') |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 276 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 277 | for i in range(3): |
| 278 | t = self.clex.token() |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 279 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 280 | self.assertEqual(t.type, 'ID') |
| 281 | self.assertEqual(t.value, 'armo') |
| 282 | self.assertEqual(t.lineno, 9) |
| 283 | self.assertEqual(self.clex.filename, r'kwas\df.h') |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 284 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 285 | t4 = self.clex.token() |
| 286 | self.assertEqual(t4.type, 'ID') |
| 287 | self.assertEqual(t4.value, 'tok1') |
| 288 | self.assertEqual(t4.lineno, 10) |
| 289 | self.assertEqual(self.clex.filename, r'..\~..\test.h') |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 290 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 291 | t5 = self.clex.token() |
| 292 | self.assertEqual(t5.type, 'ID') |
| 293 | self.assertEqual(t5.value, 'tok2') |
| 294 | self.assertEqual(t5.lineno, 99999) |
| 295 | self.assertEqual(self.clex.filename, r'include/me.h') |
Eli Bendersky | 09fc200 | 2012-08-10 07:41:42 +0300 | [diff] [blame] | 296 | |
Eli Bendersky | 0373cbe | 2012-08-10 07:48:17 +0300 | [diff] [blame] | 297 | def test_preprocessor_line_funny(self): |
| 298 | str = r''' |
| 299 | #line 10 "..\6\joe.h" |
| 300 | 10 |
| 301 | ''' |
| 302 | self.clex.input(str) |
| 303 | self.clex.reset_lineno() |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 304 | |
Eli Bendersky | 0373cbe | 2012-08-10 07:48:17 +0300 | [diff] [blame] | 305 | t1 = self.clex.token() |
| 306 | self.assertEqual(t1.type, 'INT_CONST_DEC') |
| 307 | self.assertEqual(t1.lineno, 10) |
| 308 | self.assertEqual(self.clex.filename, r'..\6\joe.h') |
| 309 | |
| 310 | |
Eli Bendersky | 09fc200 | 2012-08-10 07:41:42 +0300 | [diff] [blame] | 311 | def test_preprocessor_pragma(self): |
| 312 | str = r''' |
| 313 | 42 |
| 314 | #pragma helo me |
| 315 | #pragma once |
| 316 | # pragma omp parallel private(th_id) |
| 317 | #pragma {pack: 2, smack: 3} |
| 318 | #pragma <includeme.h> "nowit.h" |
| 319 | #pragma "string" |
wvi | 044db0c | 2014-09-09 12:50:25 +0200 | [diff] [blame] | 320 | #pragma somestring="some_other_string" |
Eli Bendersky | 09fc200 | 2012-08-10 07:41:42 +0300 | [diff] [blame] | 321 | #pragma id 124124 and numbers 0235495 |
| 322 | 59 |
| 323 | ''' |
| 324 | |
| 325 | # Check that pragmas are ignored but the line number advances |
| 326 | self.clex.input(str) |
| 327 | self.clex.reset_lineno() |
| 328 | |
| 329 | t1 = self.clex.token() |
| 330 | self.assertEqual(t1.type, 'INT_CONST_DEC') |
| 331 | t2 = self.clex.token() |
| 332 | self.assertEqual(t2.type, 'INT_CONST_DEC') |
wvi | 044db0c | 2014-09-09 12:50:25 +0200 | [diff] [blame] | 333 | self.assertEqual(t2.lineno, 11) |
Eli Bendersky | 09fc200 | 2012-08-10 07:41:42 +0300 | [diff] [blame] | 334 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 335 | |
| 336 | |
| 337 | # Keeps all the errors the lexer spits in one place, to allow |
| 338 | # easier modification if the error syntax changes. |
| 339 | # |
| 340 | ERR_ILLEGAL_CHAR = 'Illegal character' |
| 341 | ERR_OCTAL = 'Invalid octal constant' |
| 342 | ERR_UNMATCHED_QUOTE = 'Unmatched \'' |
| 343 | ERR_INVALID_CCONST = 'Invalid char constant' |
| 344 | ERR_STRING_ESCAPE = 'String contains invalid escape' |
| 345 | |
| 346 | ERR_FILENAME_BEFORE_LINE = 'filename before line' |
| 347 | ERR_LINENUM_MISSING = 'line number missing' |
| 348 | ERR_INVALID_LINE_DIRECTIVE = 'invalid #line directive' |
| 349 | |
| 350 | |
| 351 | class TestCLexerErrors(unittest.TestCase): |
| 352 | """ Test lexing of erroneous strings. |
| 353 | Works by passing an error functions that saves the error |
| 354 | in an attribute for later perusal. |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 355 | """ |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 356 | def error_func(self, msg, line, column): |
| 357 | self.error = msg |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 358 | |
Sye van der Veen | 9ec6c42 | 2013-07-11 09:10:38 -0400 | [diff] [blame] | 359 | def on_lbrace_func(self): |
| 360 | pass |
| 361 | |
| 362 | def on_rbrace_func(self): |
| 363 | pass |
| 364 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 365 | def type_lookup_func(self, typ): |
| 366 | return False |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 367 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 368 | def setUp(self): |
Sye van der Veen | 9ec6c42 | 2013-07-11 09:10:38 -0400 | [diff] [blame] | 369 | self.clex = CLexer(self.error_func, self.on_lbrace_func, |
| 370 | self.on_rbrace_func, self.type_lookup_func) |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 371 | self.clex.build(optimize=False) |
| 372 | self.error = "" |
| 373 | |
| 374 | def assertLexerError(self, str, error_like): |
| 375 | # feed the string to the lexer |
| 376 | self.clex.input(str) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 377 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 378 | # Pulls all tokens from the string. Errors will |
| 379 | # be written into self.error by the error_func |
| 380 | # callback |
| 381 | # |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 382 | token_types(self.clex) |
| 383 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 384 | # compare the error to the expected |
Eli Bendersky | 09fc200 | 2012-08-10 07:41:42 +0300 | [diff] [blame] | 385 | self.assertTrue(re.search(error_like, self.error), |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 386 | "\nExpected error matching: %s\nGot: %s" % |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 387 | (error_like, self.error)) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 388 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 389 | # clear last error, for the sake of subsequent invocations |
| 390 | self.error = "" |
| 391 | |
| 392 | def test_trivial_tokens(self): |
| 393 | self.assertLexerError('@', ERR_ILLEGAL_CHAR) |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 394 | self.assertLexerError('`', ERR_ILLEGAL_CHAR) |
| 395 | self.assertLexerError('\\', ERR_ILLEGAL_CHAR) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 396 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 397 | def test_integer_constants(self): |
| 398 | self.assertLexerError('029', ERR_OCTAL) |
| 399 | self.assertLexerError('012345678', ERR_OCTAL) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 400 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 401 | def test_char_constants(self): |
| 402 | self.assertLexerError("'", ERR_UNMATCHED_QUOTE) |
| 403 | self.assertLexerError("'b\n", ERR_UNMATCHED_QUOTE) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 404 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 405 | self.assertLexerError("'jx'", ERR_INVALID_CCONST) |
| 406 | self.assertLexerError("'\*'", ERR_INVALID_CCONST) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 407 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 408 | def test_string_literals(self): |
| 409 | self.assertLexerError('"jx\9"', ERR_STRING_ESCAPE) |
| 410 | self.assertLexerError('"hekllo\* on ix"', ERR_STRING_ESCAPE) |
| 411 | self.assertLexerError('L"hekllo\* on ix"', ERR_STRING_ESCAPE) |
Eli Bendersky | 86f2eee | 2013-01-18 06:04:01 -0800 | [diff] [blame] | 412 | |
Eli Bendersky | 3b1b08d | 2012-06-15 12:37:54 +0300 | [diff] [blame] | 413 | def test_preprocessor(self): |
| 414 | self.assertLexerError('#line "ka"', ERR_FILENAME_BEFORE_LINE) |
| 415 | self.assertLexerError('#line df', ERR_INVALID_LINE_DIRECTIVE) |
| 416 | self.assertLexerError('#line \n', ERR_LINENUM_MISSING) |
| 417 | |
| 418 | |
| 419 | if __name__ == '__main__': |
| 420 | unittest.main() |