blob: 393f0a827a5ee06fc1c36b988d567f75b2596e44 [file] [log] [blame]
Eli Benderskyffa18092013-07-13 06:30:42 -07001#------------------------------------------------------------------------------
Eli Bendersky3921e8e2010-05-21 09:05:39 +03002# pycparser: c_lexer.py
3#
4# CLexer class: lexer for the C language
5#
Eli Benderskyffa18092013-07-13 06:30:42 -07006# Copyright (C) 2008-2013, Eli Bendersky
eli.bendersky84a6a632011-04-29 09:00:43 +03007# License: BSD
Eli Benderskyffa18092013-07-13 06:30:42 -07008#------------------------------------------------------------------------------
Eli Bendersky3921e8e2010-05-21 09:05:39 +03009import re
10import sys
11
Eli Bendersky97b1ee02012-12-24 15:15:22 -080012from .ply import lex
13from .ply.lex import TOKEN
Eli Bendersky3921e8e2010-05-21 09:05:39 +030014
15
16class CLexer(object):
17 """ A lexer for the C language. After building it, set the
Eli Bendersky64b7a202013-06-12 06:24:46 -070018 input text with input(), and call token() to get new
Eli Bendersky3921e8e2010-05-21 09:05:39 +030019 tokens.
Eli Bendersky64b7a202013-06-12 06:24:46 -070020
Eli Bendersky3921e8e2010-05-21 09:05:39 +030021 The public attribute filename can be set to an initial
Eli Bendersky64b7a202013-06-12 06:24:46 -070022 filaneme, but the lexer will update it upon #line
Eli Bendersky3921e8e2010-05-21 09:05:39 +030023 directives.
24 """
Sye van der Veen9ec6c422013-07-11 09:10:38 -040025 def __init__(self, error_func, on_lbrace_func, on_rbrace_func,
Eli Benderskyfe26e7a2013-07-13 05:54:02 -070026 type_lookup_func):
Eli Bendersky3921e8e2010-05-21 09:05:39 +030027 """ Create a new Lexer.
Eli Bendersky64b7a202013-06-12 06:24:46 -070028
Eli Bendersky3921e8e2010-05-21 09:05:39 +030029 error_func:
30 An error function. Will be called with an error
Eli Bendersky64b7a202013-06-12 06:24:46 -070031 message, line and column as arguments, in case of
Eli Bendersky3921e8e2010-05-21 09:05:39 +030032 an error during lexing.
Eli Bendersky64b7a202013-06-12 06:24:46 -070033
Sye van der Veen9ec6c422013-07-11 09:10:38 -040034 on_lbrace_func, on_rbrace_func:
35 Called when an LBRACE or RBRACE is encountered
36 (likely to push/pop type_lookup_func's scope)
37
Eli Bendersky3921e8e2010-05-21 09:05:39 +030038 type_lookup_func:
39 A type lookup function. Given a string, it must
40 return True IFF this string is a name of a type
41 that was defined with a typedef earlier.
42 """
43 self.error_func = error_func
Sye van der Veen9ec6c422013-07-11 09:10:38 -040044 self.on_lbrace_func = on_lbrace_func
45 self.on_rbrace_func = on_rbrace_func
Eli Bendersky3921e8e2010-05-21 09:05:39 +030046 self.type_lookup_func = type_lookup_func
47 self.filename = ''
Eli Bendersky64b7a202013-06-12 06:24:46 -070048
Eli Bendersky2a826bc2013-07-13 06:40:36 -070049 # Keeps track of the last token returned from self.token()
50 self.last_token = None
51
Eli Bendersky3921e8e2010-05-21 09:05:39 +030052 # Allow either "# line" or "# <num>" to support GCC's
53 # cpp output
54 #
55 self.line_pattern = re.compile('([ \t]*line\W)|([ \t]*\d+)')
Eli Bendersky09fc2002012-08-10 07:41:42 +030056 self.pragma_pattern = re.compile('[ \t]*pragma\W')
57
Eli Bendersky3921e8e2010-05-21 09:05:39 +030058 def build(self, **kwargs):
59 """ Builds the lexer from the specification. Must be
Eli Bendersky64b7a202013-06-12 06:24:46 -070060 called after the lexer object is created.
61
Eli Bendersky3921e8e2010-05-21 09:05:39 +030062 This method exists separately, because the PLY
63 manual warns against calling lex.lex inside
64 __init__
65 """
Eli Bendersky97b1ee02012-12-24 15:15:22 -080066 self.lexer = lex.lex(object=self, **kwargs)
Eli Bendersky3921e8e2010-05-21 09:05:39 +030067
68 def reset_lineno(self):
69 """ Resets the internal line number counter of the lexer.
70 """
71 self.lexer.lineno = 1
72
73 def input(self, text):
74 self.lexer.input(text)
Eli Bendersky64b7a202013-06-12 06:24:46 -070075
Eli Bendersky3921e8e2010-05-21 09:05:39 +030076 def token(self):
Eli Bendersky2a826bc2013-07-13 06:40:36 -070077 self.last_token = self.lexer.token()
78 return self.last_token
Eli Bendersky3921e8e2010-05-21 09:05:39 +030079
Eli Benderskye8b7eb62012-12-25 06:13:53 -080080 def find_tok_column(self, token):
81 """ Find the column of the token in its line.
82 """
Eli Bendersky645e2602012-12-25 06:21:08 -080083 last_cr = self.lexer.lexdata.rfind('\n', 0, token.lexpos)
84 return token.lexpos - last_cr
Eli Benderskye8b7eb62012-12-25 06:13:53 -080085
Eli Bendersky3921e8e2010-05-21 09:05:39 +030086 ######################-- PRIVATE --######################
Eli Bendersky64b7a202013-06-12 06:24:46 -070087
Eli Bendersky3921e8e2010-05-21 09:05:39 +030088 ##
89 ## Internal auxiliary methods
90 ##
91 def _error(self, msg, token):
92 location = self._make_tok_location(token)
93 self.error_func(msg, location[0], location[1])
94 self.lexer.skip(1)
Eli Bendersky64b7a202013-06-12 06:24:46 -070095
Eli Bendersky3921e8e2010-05-21 09:05:39 +030096 def _make_tok_location(self, token):
Eli Benderskye8b7eb62012-12-25 06:13:53 -080097 return (token.lineno, self.find_tok_column(token))
Eli Bendersky64b7a202013-06-12 06:24:46 -070098
Eli Bendersky3921e8e2010-05-21 09:05:39 +030099 ##
100 ## Reserved keywords
101 ##
102 keywords = (
Eli Benderskyf4d73462012-01-19 05:56:27 +0200103 '_BOOL', '_COMPLEX', 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST',
104 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', 'ELSE', 'ENUM', 'EXTERN',
eli.bendersky145890d2010-10-29 12:02:32 +0200105 'FLOAT', 'FOR', 'GOTO', 'IF', 'INLINE', 'INT', 'LONG', 'REGISTER',
106 'RESTRICT', 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT',
Evenf08560d2011-09-18 15:14:08 +0200107 'SWITCH', 'TYPEDEF', 'UNION', 'UNSIGNED', 'VOID',
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300108 'VOLATILE', 'WHILE',
109 )
110
111 keyword_map = {}
eli.benderskyaffe0322011-09-24 06:16:55 +0300112 for keyword in keywords:
113 if keyword == '_BOOL':
114 keyword_map['_Bool'] = keyword
Eli Benderskyf4d73462012-01-19 05:56:27 +0200115 elif keyword == '_COMPLEX':
116 keyword_map['_Complex'] = keyword
eli.benderskyaffe0322011-09-24 06:16:55 +0300117 else:
118 keyword_map[keyword.lower()] = keyword
Evenf08560d2011-09-18 15:14:08 +0200119
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300120 ##
121 ## All the tokens recognized by the lexer
122 ##
123 tokens = keywords + (
124 # Identifiers
Eli Bendersky64b7a202013-06-12 06:24:46 -0700125 'ID',
126
127 # Type identifiers (identifiers previously defined as
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300128 # types with typedef)
129 'TYPEID',
Eli Bendersky64b7a202013-06-12 06:24:46 -0700130
131 # constants
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300132 'INT_CONST_DEC', 'INT_CONST_OCT', 'INT_CONST_HEX',
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300133 'FLOAT_CONST', 'HEX_FLOAT_CONST',
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300134 'CHAR_CONST',
135 'WCHAR_CONST',
Eli Bendersky64b7a202013-06-12 06:24:46 -0700136
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300137 # String literals
138 'STRING_LITERAL',
139 'WSTRING_LITERAL',
140
Eli Benderskyfe26e7a2013-07-13 05:54:02 -0700141 # Operators
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300142 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
143 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
144 'LOR', 'LAND', 'LNOT',
145 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
Eli Bendersky64b7a202013-06-12 06:24:46 -0700146
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300147 # Assignment
Eli Bendersky64b7a202013-06-12 06:24:46 -0700148 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL',
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300149 'PLUSEQUAL', 'MINUSEQUAL',
Eli Bendersky64b7a202013-06-12 06:24:46 -0700150 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL',
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300151 'OREQUAL',
152
Eli Benderskyfe26e7a2013-07-13 05:54:02 -0700153 # Increment/decrement
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300154 'PLUSPLUS', 'MINUSMINUS',
155
156 # Structure dereference (->)
157 'ARROW',
158
159 # Conditional operator (?)
160 'CONDOP',
Eli Bendersky64b7a202013-06-12 06:24:46 -0700161
162 # Delimeters
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300163 'LPAREN', 'RPAREN', # ( )
164 'LBRACKET', 'RBRACKET', # [ ]
Eli Benderskyfe26e7a2013-07-13 05:54:02 -0700165 'LBRACE', 'RBRACE', # { }
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300166 'COMMA', 'PERIOD', # . ,
167 'SEMI', 'COLON', # ; :
168
169 # Ellipsis (...)
170 'ELLIPSIS',
Eli Bendersky64b7a202013-06-12 06:24:46 -0700171
172 # pre-processor
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300173 'PPHASH', # '#'
174 )
175
176 ##
177 ## Regexes for use in tokens
178 ##
179 ##
180
Sye van der Veen3576ed12013-06-10 13:27:58 -0400181 # valid C identifiers (K&R2: A.2.3), plus '$' (supported by some compilers)
182 identifier = r'[a-zA-Z_$][0-9a-zA-Z_$]*'
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300183
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300184 hex_prefix = '0[xX]'
185 hex_digits = '[0-9a-fA-F]+'
186
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300187 # integer constants (K&R2: A.2.5.1)
Sye van der Veen08a54892013-06-10 12:59:03 -0400188 integer_suffix_opt = r'(([uU]ll)|([uU]LL)|(ll[uU]?)|(LL[uU]?)|([uU][lL])|([lL][uU]?)|[uU])?'
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300189 decimal_constant = '(0'+integer_suffix_opt+')|([1-9][0-9]*'+integer_suffix_opt+')'
190 octal_constant = '0[0-7]*'+integer_suffix_opt
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300191 hex_constant = hex_prefix+hex_digits+integer_suffix_opt
Eli Bendersky64b7a202013-06-12 06:24:46 -0700192
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300193 bad_octal_constant = '0[0-7]*[89]'
194
195 # character constants (K&R2: A.2.5.2)
Even13ad2192011-11-06 16:02:43 +0100196 # Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line
eli.bendersky49f3b632011-10-31 06:38:41 +0200197 # directives with Windows paths as filenames (..\..\dir\file)
elibene40ae0a2012-08-10 08:24:28 +0300198 # For the same reason, decimal_escape allows all digit sequences. We want to
199 # parse all correct code, even if it means to sometimes parse incorrect
200 # code.
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300201 #
Even13ad2192011-11-06 16:02:43 +0100202 simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])"""
elibene40ae0a2012-08-10 08:24:28 +0300203 decimal_escape = r"""(\d+)"""
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300204 hex_escape = r"""(x[0-9a-fA-F]+)"""
Even13ad2192011-11-06 16:02:43 +0100205 bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])"""
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300206
elibene40ae0a2012-08-10 08:24:28 +0300207 escape_sequence = r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))'
Eli Bendersky64b7a202013-06-12 06:24:46 -0700208 cconst_char = r"""([^'\\\n]|"""+escape_sequence+')'
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300209 char_const = "'"+cconst_char+"'"
210 wchar_const = 'L'+char_const
211 unmatched_quote = "('"+cconst_char+"*\\n)|('"+cconst_char+"*$)"
212 bad_char_const = r"""('"""+cconst_char+"""[^'\n]+')|('')|('"""+bad_escape+r"""[^'\n]*')"""
213
214 # string literals (K&R2: A.2.6)
Eli Bendersky64b7a202013-06-12 06:24:46 -0700215 string_char = r"""([^"\\\n]|"""+escape_sequence+')'
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300216 string_literal = '"'+string_char+'*"'
217 wstring_literal = 'L'+string_literal
218 bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"'
219
220 # floating constants (K&R2: A.2.5.3)
221 exponent_part = r"""([eE][-+]?[0-9]+)"""
222 fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)"""
223 floating_constant = '(((('+fractional_constant+')'+exponent_part+'?)|([0-9]+'+exponent_part+'))[FfLl]?)'
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300224 binary_exponent_part = r'''([pP][+-]?[0-9]+)'''
225 hex_fractional_constant = '((('+hex_digits+r""")?\."""+hex_digits+')|('+hex_digits+r"""\.))"""
226 hex_floating_constant = '('+hex_prefix+'('+hex_digits+'|'+hex_fractional_constant+')'+binary_exponent_part+'[FfLl]?)'
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300227
228 ##
Eli Bendersky09fc2002012-08-10 07:41:42 +0300229 ## Lexer states: used for preprocessor \n-terminated directives
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300230 ##
231 states = (
232 # ppline: preprocessor line directives
Eli Bendersky64b7a202013-06-12 06:24:46 -0700233 #
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300234 ('ppline', 'exclusive'),
Eli Bendersky09fc2002012-08-10 07:41:42 +0300235
236 # pppragma: pragma
237 #
238 ('pppragma', 'exclusive'),
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300239 )
Eli Bendersky64b7a202013-06-12 06:24:46 -0700240
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300241 def t_PPHASH(self, t):
242 r'[ \t]*\#'
Eli Bendersky09fc2002012-08-10 07:41:42 +0300243 if self.line_pattern.match(t.lexer.lexdata, pos=t.lexer.lexpos):
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300244 t.lexer.begin('ppline')
245 self.pp_line = self.pp_filename = None
Eli Bendersky09fc2002012-08-10 07:41:42 +0300246 elif self.pragma_pattern.match(t.lexer.lexdata, pos=t.lexer.lexpos):
247 t.lexer.begin('pppragma')
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300248 else:
249 t.type = 'PPHASH'
250 return t
Eli Bendersky64b7a202013-06-12 06:24:46 -0700251
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300252 ##
253 ## Rules for the ppline state
254 ##
255 @TOKEN(string_literal)
256 def t_ppline_FILENAME(self, t):
257 if self.pp_line is None:
258 self._error('filename before line number in #line', t)
259 else:
260 self.pp_filename = t.value.lstrip('"').rstrip('"')
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300261
262 @TOKEN(decimal_constant)
263 def t_ppline_LINE_NUMBER(self, t):
264 if self.pp_line is None:
265 self.pp_line = t.value
266 else:
267 # Ignore: GCC's cpp sometimes inserts a numeric flag
268 # after the file name
269 pass
270
271 def t_ppline_NEWLINE(self, t):
272 r'\n'
Eli Bendersky64b7a202013-06-12 06:24:46 -0700273
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300274 if self.pp_line is None:
275 self._error('line number missing in #line', t)
276 else:
277 self.lexer.lineno = int(self.pp_line)
Eli Bendersky64b7a202013-06-12 06:24:46 -0700278
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300279 if self.pp_filename is not None:
280 self.filename = self.pp_filename
Eli Bendersky64b7a202013-06-12 06:24:46 -0700281
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300282 t.lexer.begin('INITIAL')
283
284 def t_ppline_PPLINE(self, t):
285 r'line'
286 pass
Eli Bendersky64b7a202013-06-12 06:24:46 -0700287
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300288 t_ppline_ignore = ' \t'
289
290 def t_ppline_error(self, t):
Eli Bendersky09fc2002012-08-10 07:41:42 +0300291 self._error('invalid #line directive', t)
292
293 ##
294 ## Rules for the pppragma state
295 ##
296 def t_pppragma_NEWLINE(self, t):
297 r'\n'
298 t.lexer.lineno += 1
299 t.lexer.begin('INITIAL')
300
301 def t_pppragma_PPPRAGMA(self, t):
302 r'pragma'
303 pass
Eli Bendersky64b7a202013-06-12 06:24:46 -0700304
Eli Bendersky09fc2002012-08-10 07:41:42 +0300305 t_pppragma_ignore = ' \t<>.-{}();+-*/$%@&^~!?:,0123456789'
306
307 @TOKEN(string_literal)
308 def t_pppragma_STR(self, t): pass
309
310 @TOKEN(identifier)
311 def t_pppragma_ID(self, t): pass
312
313 def t_pppragma_error(self, t):
314 self._error('invalid #pragma directive', t)
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300315
316 ##
317 ## Rules for the normal state
318 ##
319 t_ignore = ' \t'
320
321 # Newlines
322 def t_NEWLINE(self, t):
323 r'\n+'
324 t.lexer.lineno += t.value.count("\n")
325
326 # Operators
327 t_PLUS = r'\+'
328 t_MINUS = r'-'
329 t_TIMES = r'\*'
330 t_DIVIDE = r'/'
331 t_MOD = r'%'
332 t_OR = r'\|'
333 t_AND = r'&'
334 t_NOT = r'~'
335 t_XOR = r'\^'
336 t_LSHIFT = r'<<'
337 t_RSHIFT = r'>>'
338 t_LOR = r'\|\|'
339 t_LAND = r'&&'
340 t_LNOT = r'!'
341 t_LT = r'<'
342 t_GT = r'>'
343 t_LE = r'<='
344 t_GE = r'>='
345 t_EQ = r'=='
346 t_NE = r'!='
347
348 # Assignment operators
349 t_EQUALS = r'='
350 t_TIMESEQUAL = r'\*='
351 t_DIVEQUAL = r'/='
352 t_MODEQUAL = r'%='
353 t_PLUSEQUAL = r'\+='
354 t_MINUSEQUAL = r'-='
355 t_LSHIFTEQUAL = r'<<='
356 t_RSHIFTEQUAL = r'>>='
357 t_ANDEQUAL = r'&='
358 t_OREQUAL = r'\|='
359 t_XOREQUAL = r'\^='
360
361 # Increment/decrement
362 t_PLUSPLUS = r'\+\+'
363 t_MINUSMINUS = r'--'
364
365 # ->
366 t_ARROW = r'->'
367
368 # ?
369 t_CONDOP = r'\?'
370
371 # Delimeters
372 t_LPAREN = r'\('
373 t_RPAREN = r'\)'
374 t_LBRACKET = r'\['
375 t_RBRACKET = r'\]'
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300376 t_COMMA = r','
377 t_PERIOD = r'\.'
378 t_SEMI = r';'
379 t_COLON = r':'
380 t_ELLIPSIS = r'\.\.\.'
381
Sye van der Veen9ec6c422013-07-11 09:10:38 -0400382 # Scope delimiters
383 # To see why on_lbrace_func is needed, consider:
384 # typedef char TT;
385 # void foo(int TT) { TT = 10; }
386 # TT x = 5;
387 # Outside the function, TT is a typedef, but inside (starting and ending
388 # with the braces) it's a parameter. The trouble begins with yacc's
389 # lookahead token. If we open a new scope in brace_open, then TT has
390 # already been read and incorrectly interpreted as TYPEID. So, we need
391 # to open and close scopes from within the lexer.
392 # Similar for the TT immediately outside the end of the function.
393 #
394 @TOKEN(r'\{')
395 def t_LBRACE(self, t):
396 self.on_lbrace_func()
397 return t
398 @TOKEN(r'\}')
399 def t_RBRACE(self, t):
400 self.on_rbrace_func()
401 return t
402
Eli Benderskyfe26e7a2013-07-13 05:54:02 -0700403 t_STRING_LITERAL = string_literal
Eli Bendersky64b7a202013-06-12 06:24:46 -0700404
405 # The following floating and integer constants are defined as
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300406 # functions to impose a strict order (otherwise, decimal
407 # is placed before the others because its regex is longer,
408 # and this is bad)
409 #
410 @TOKEN(floating_constant)
411 def t_FLOAT_CONST(self, t):
412 return t
413
Eli Bendersky3b1b08d2012-06-15 12:37:54 +0300414 @TOKEN(hex_floating_constant)
415 def t_HEX_FLOAT_CONST(self, t):
416 return t
417
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300418 @TOKEN(hex_constant)
419 def t_INT_CONST_HEX(self, t):
420 return t
421
422 @TOKEN(bad_octal_constant)
423 def t_BAD_CONST_OCT(self, t):
424 msg = "Invalid octal constant"
425 self._error(msg, t)
426
427 @TOKEN(octal_constant)
428 def t_INT_CONST_OCT(self, t):
429 return t
430
431 @TOKEN(decimal_constant)
432 def t_INT_CONST_DEC(self, t):
433 return t
434
Eli Bendersky64b7a202013-06-12 06:24:46 -0700435 # Must come before bad_char_const, to prevent it from
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300436 # catching valid char constants as invalid
Eli Bendersky64b7a202013-06-12 06:24:46 -0700437 #
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300438 @TOKEN(char_const)
439 def t_CHAR_CONST(self, t):
440 return t
Eli Bendersky64b7a202013-06-12 06:24:46 -0700441
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300442 @TOKEN(wchar_const)
443 def t_WCHAR_CONST(self, t):
444 return t
Eli Bendersky64b7a202013-06-12 06:24:46 -0700445
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300446 @TOKEN(unmatched_quote)
447 def t_UNMATCHED_QUOTE(self, t):
448 msg = "Unmatched '"
449 self._error(msg, t)
450
451 @TOKEN(bad_char_const)
452 def t_BAD_CHAR_CONST(self, t):
453 msg = "Invalid char constant %s" % t.value
454 self._error(msg, t)
455
456 @TOKEN(wstring_literal)
457 def t_WSTRING_LITERAL(self, t):
458 return t
Eli Bendersky64b7a202013-06-12 06:24:46 -0700459
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300460 # unmatched string literals are caught by the preprocessor
Eli Bendersky64b7a202013-06-12 06:24:46 -0700461
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300462 @TOKEN(bad_string_literal)
463 def t_BAD_STRING_LITERAL(self, t):
Eli Bendersky64b7a202013-06-12 06:24:46 -0700464 msg = "String contains invalid escape code"
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300465 self._error(msg, t)
466
467 @TOKEN(identifier)
468 def t_ID(self, t):
469 t.type = self.keyword_map.get(t.value, "ID")
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300470 if t.type == 'ID' and self.type_lookup_func(t.value):
471 t.type = "TYPEID"
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300472 return t
Eli Bendersky64b7a202013-06-12 06:24:46 -0700473
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300474 def t_error(self, t):
475 msg = 'Illegal character %s' % repr(t.value[0])
476 self._error(msg, t)
477