Blame - lib/python2.7/site-packages/sepolgen/lex.py - platform/prebuilts/python/linux-x86/2.7.5

blob: c13acef0507a682a3ebdba69b0d762d55638b7c3 [file] [log] [blame]

Jeff Vander Stoep	74e4f93	2016-02-08 15:27:10 -0800	[diff] [blame^]	1	#-----------------------------------------------------------------------------
				2	# ply: lex.py
				3	#
				4	# Author: David M. Beazley (dave@dabeaz.com)
				5	#
				6	# Copyright (C) 2001-2006, David M. Beazley
				7	#
				8	# This library is free software; you can redistribute it and/or
				9	# modify it under the terms of the GNU Lesser General Public
				10	# License as published by the Free Software Foundation; either
				11	# version 2.1 of the License, or (at your option) any later version.
				12	#
				13	# This library is distributed in the hope that it will be useful,
				14	# but WITHOUT ANY WARRANTY; without even the implied warranty of
				15	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				16	# Lesser General Public License for more details.
				17	#
				18	# You should have received a copy of the GNU Lesser General Public
				19	# License along with this library; if not, write to the Free Software
				20	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
				21	#
				22	# See the file COPYING for a complete copy of the LGPL.
				23	#-----------------------------------------------------------------------------
				24
				25	__version__ = "2.2"
				26
				27	import re, sys, types
				28
				29	from . import util
				30	import collections
				31
				32
				33	# Regular expression used to match valid token names
				34	_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')
				35
				36	# Available instance types. This is used when parsers are defined by a class.
				37	# In Python3 the InstanceType and ObjectType are no more, they've passed, ceased
				38	# to be, they are ex-classes along with old-style classes
				39
				40	try:
				41	_INSTANCETYPE = (types.InstanceType, types.ObjectType)
				42	except AttributeError:
				43	_INSTANCETYPE = object
				44
				45	# Exception thrown when invalid token encountered and no default error
				46	# handler is defined.
				47	class LexError(Exception):
				48	def __init__(self,message,s):
				49	self.args = (message,)
				50	self.text = s
				51
				52	# Token class
				53	class LexToken(object):
				54	def __str__(self):
				55	return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos)
				56	def __repr__(self):
				57	return str(self)
				58	def skip(self,n):
				59	self.lexer.skip(n)
				60
				61	# -----------------------------------------------------------------------------
				62	# Lexer class
				63	#
				64	# This class encapsulates all of the methods and data associated with a lexer.
				65	#
				66	# input() - Store a new string in the lexer
				67	# token() - Get the next token
				68	# -----------------------------------------------------------------------------
				69
				70	class Lexer:
				71	def __init__(self):
				72	self.lexre = None # Master regular expression. This is a list of
				73	# tuples (re,findex) where re is a compiled
				74	# regular expression and findex is a list
				75	# mapping regex group numbers to rules
				76	self.lexretext = None # Current regular expression strings
				77	self.lexstatere = {} # Dictionary mapping lexer states to master regexs
				78	self.lexstateretext = {} # Dictionary mapping lexer states to regex strings
				79	self.lexstate = "INITIAL" # Current lexer state
				80	self.lexstatestack = [] # Stack of lexer states
				81	self.lexstateinfo = None # State information
				82	self.lexstateignore = {} # Dictionary of ignored characters for each state
				83	self.lexstateerrorf = {} # Dictionary of error functions for each state
				84	self.lexreflags = 0 # Optional re compile flags
				85	self.lexdata = None # Actual input data (as a string)
				86	self.lexpos = 0 # Current position in input text
				87	self.lexlen = 0 # Length of the input text
				88	self.lexerrorf = None # Error rule (if any)
				89	self.lextokens = None # List of valid tokens
				90	self.lexignore = "" # Ignored characters
				91	self.lexliterals = "" # Literal characters that can be passed through
				92	self.lexmodule = None # Module
				93	self.lineno = 1 # Current line number
				94	self.lexdebug = 0 # Debugging mode
				95	self.lexoptimize = 0 # Optimized mode
				96
				97	def clone(self,object=None):
				98	c = Lexer()
				99	c.lexstatere = self.lexstatere
				100	c.lexstateinfo = self.lexstateinfo
				101	c.lexstateretext = self.lexstateretext
				102	c.lexstate = self.lexstate
				103	c.lexstatestack = self.lexstatestack
				104	c.lexstateignore = self.lexstateignore
				105	c.lexstateerrorf = self.lexstateerrorf
				106	c.lexreflags = self.lexreflags
				107	c.lexdata = self.lexdata
				108	c.lexpos = self.lexpos
				109	c.lexlen = self.lexlen
				110	c.lextokens = self.lextokens
				111	c.lexdebug = self.lexdebug
				112	c.lineno = self.lineno
				113	c.lexoptimize = self.lexoptimize
				114	c.lexliterals = self.lexliterals
				115	c.lexmodule = self.lexmodule
				116
				117	# If the object parameter has been supplied, it means we are attaching the
				118	# lexer to a new object. In this case, we have to rebind all methods in
				119	# the lexstatere and lexstateerrorf tables.
				120
				121	if object:
				122	newtab = { }
				123	for key, ritem in self.lexstatere.items():
				124	newre = []
				125	for cre, findex in ritem:
				126	newfindex = []
				127	for f in findex:
				128	if not f or not f[0]:
				129	newfindex.append(f)
				130	continue
				131	newfindex.append((getattr(object,f[0].__name__),f[1]))
				132	newre.append((cre,newfindex))
				133	newtab[key] = newre
				134	c.lexstatere = newtab
				135	c.lexstateerrorf = { }
				136	for key, ef in self.lexstateerrorf.items():
				137	c.lexstateerrorf[key] = getattr(object,ef.__name__)
				138	c.lexmodule = object
				139
				140	# Set up other attributes
				141	c.begin(c.lexstate)
				142	return c
				143
				144	# ------------------------------------------------------------
				145	# writetab() - Write lexer information to a table file
				146	# ------------------------------------------------------------
				147	def writetab(self,tabfile):
				148	tf = open(tabfile+".py","w")
				149	tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__))
				150	tf.write("_lextokens = %s\n" % repr(self.lextokens))
				151	tf.write("_lexreflags = %s\n" % repr(self.lexreflags))
				152	tf.write("_lexliterals = %s\n" % repr(self.lexliterals))
				153	tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo))
				154
				155	tabre = { }
				156	for key, lre in self.lexstatere.items():
				157	titem = []
				158	for i in range(len(lre)):
				159	titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1])))
				160	tabre[key] = titem
				161
				162	tf.write("_lexstatere = %s\n" % repr(tabre))
				163	tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore))
				164
				165	taberr = { }
				166	for key, ef in self.lexstateerrorf.items():
				167	if ef:
				168	taberr[key] = ef.__name__
				169	else:
				170	taberr[key] = None
				171	tf.write("_lexstateerrorf = %s\n" % repr(taberr))
				172	tf.close()
				173
				174	# ------------------------------------------------------------
				175	# readtab() - Read lexer information from a tab file
				176	# ------------------------------------------------------------
				177	def readtab(self,tabfile,fdict):
				178	exec("import %s as lextab" % tabfile)
				179	self.lextokens = lextab._lextokens
				180	self.lexreflags = lextab._lexreflags
				181	self.lexliterals = lextab._lexliterals
				182	self.lexstateinfo = lextab._lexstateinfo
				183	self.lexstateignore = lextab._lexstateignore
				184	self.lexstatere = { }
				185	self.lexstateretext = { }
				186	for key,lre in lextab._lexstatere.items():
				187	titem = []
				188	txtitem = []
				189	for i in range(len(lre)):
				190	titem.append((re.compile(lre[i][0],lextab._lexreflags),_names_to_funcs(lre[i][1],fdict)))
				191	txtitem.append(lre[i][0])
				192	self.lexstatere[key] = titem
				193	self.lexstateretext[key] = txtitem
				194	self.lexstateerrorf = { }
				195	for key,ef in lextab._lexstateerrorf.items():
				196	self.lexstateerrorf[key] = fdict[ef]
				197	self.begin('INITIAL')
				198
				199	# ------------------------------------------------------------
				200	# input() - Push a new string into the lexer
				201	# ------------------------------------------------------------
				202	def input(self,s):
				203	if not (isinstance(s,util.bytes_type) or isinstance(s, util.string_type)):
				204	raise ValueError("Expected a string")
				205	self.lexdata = s
				206	self.lexpos = 0
				207	self.lexlen = len(s)
				208
				209	# ------------------------------------------------------------
				210	# begin() - Changes the lexing state
				211	# ------------------------------------------------------------
				212	def begin(self,state):
				213	if state not in self.lexstatere:
				214	raise ValueError("Undefined state")
				215	self.lexre = self.lexstatere[state]
				216	self.lexretext = self.lexstateretext[state]
				217	self.lexignore = self.lexstateignore.get(state,"")
				218	self.lexerrorf = self.lexstateerrorf.get(state,None)
				219	self.lexstate = state
				220
				221	# ------------------------------------------------------------
				222	# push_state() - Changes the lexing state and saves old on stack
				223	# ------------------------------------------------------------
				224	def push_state(self,state):
				225	self.lexstatestack.append(self.lexstate)
				226	self.begin(state)
				227
				228	# ------------------------------------------------------------
				229	# pop_state() - Restores the previous state
				230	# ------------------------------------------------------------
				231	def pop_state(self):
				232	self.begin(self.lexstatestack.pop())
				233
				234	# ------------------------------------------------------------
				235	# current_state() - Returns the current lexing state
				236	# ------------------------------------------------------------
				237	def current_state(self):
				238	return self.lexstate
				239
				240	# ------------------------------------------------------------
				241	# skip() - Skip ahead n characters
				242	# ------------------------------------------------------------
				243	def skip(self,n):
				244	self.lexpos += n
				245
				246	# ------------------------------------------------------------
				247	# token() - Return the next token from the Lexer
				248	#
				249	# Note: This function has been carefully implemented to be as fast
				250	# as possible. Don't make changes unless you really know what
				251	# you are doing
				252	# ------------------------------------------------------------
				253	def token(self):
				254	# Make local copies of frequently referenced attributes
				255	lexpos = self.lexpos
				256	lexlen = self.lexlen
				257	lexignore = self.lexignore
				258	lexdata = self.lexdata
				259
				260	while lexpos < lexlen:
				261	# This code provides some short-circuit code for whitespace, tabs, and other ignored characters
				262	if lexdata[lexpos] in lexignore:
				263	lexpos += 1
				264	continue
				265
				266	# Look for a regular expression match
				267	for lexre,lexindexfunc in self.lexre:
				268	m = lexre.match(lexdata,lexpos)
				269	if not m: continue
				270
				271	# Set last match in lexer so that rules can access it if they want
				272	self.lexmatch = m
				273
				274	# Create a token for return
				275	tok = LexToken()
				276	tok.value = m.group()
				277	tok.lineno = self.lineno
				278	tok.lexpos = lexpos
				279	tok.lexer = self
				280
				281	lexpos = m.end()
				282	i = m.lastindex
				283	func,tok.type = lexindexfunc[i]
				284	self.lexpos = lexpos
				285
				286	if not func:
				287	# If no token type was set, it's an ignored token
				288	if tok.type: return tok
				289	break
				290
				291	# if func not callable, it means it's an ignored token
				292	if not isinstance(func, collections.Callable):
				293	break
				294
				295	# If token is processed by a function, call it
				296	newtok = func(tok)
				297
				298	# Every function must return a token, if nothing, we just move to next token
				299	if not newtok:
				300	lexpos = self.lexpos # This is here in case user has updated lexpos.
				301	break
				302
				303	# Verify type of the token. If not in the token map, raise an error
				304	if not self.lexoptimize:
				305	if newtok.type not in self.lextokens:
				306	raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % (
				307	func.__code__.co_filename, func.__code__.co_firstlineno,
				308	func.__name__, newtok.type),lexdata[lexpos:])
				309
				310	return newtok
				311	else:
				312	# No match, see if in literals
				313	if lexdata[lexpos] in self.lexliterals:
				314	tok = LexToken()
				315	tok.value = lexdata[lexpos]
				316	tok.lineno = self.lineno
				317	tok.lexer = self
				318	tok.type = tok.value
				319	tok.lexpos = lexpos
				320	self.lexpos = lexpos + 1
				321	return tok
				322
				323	# No match. Call t_error() if defined.
				324	if self.lexerrorf:
				325	tok = LexToken()
				326	tok.value = self.lexdata[lexpos:]
				327	tok.lineno = self.lineno
				328	tok.type = "error"
				329	tok.lexer = self
				330	tok.lexpos = lexpos
				331	self.lexpos = lexpos
				332	newtok = self.lexerrorf(tok)
				333	if lexpos == self.lexpos:
				334	# Error method didn't change text position at all. This is an error.
				335	raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:])
				336	lexpos = self.lexpos
				337	if not newtok: continue
				338	return newtok
				339
				340	self.lexpos = lexpos
				341	raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:])
				342
				343	self.lexpos = lexpos + 1
				344	if self.lexdata is None:
				345	raise RuntimeError("No input string given with input()")
				346	return None
				347
				348	# -----------------------------------------------------------------------------
				349	# _validate_file()
				350	#
				351	# This checks to see if there are duplicated t_rulename() functions or strings
				352	# in the parser input file. This is done using a simple regular expression
				353	# match on each line in the filename.
				354	# -----------------------------------------------------------------------------
				355
				356	def _validate_file(filename):
				357	import os.path
				358	base,ext = os.path.splitext(filename)
				359	if ext != '.py': return 1 # No idea what the file is. Return OK
				360
				361	try:
				362	f = open(filename)
				363	lines = f.readlines()
				364	f.close()
				365	except IOError:
				366	return 1 # Oh well
				367
				368	fre = re.compile(r'\sdef\s+(t_[a-zA-Z_0-9])\(')
				369	sre = re.compile(r'\s(t_[a-zA-Z_0-9])\s*=')
				370	counthash = { }
				371	linen = 1
				372	noerror = 1
				373	for l in lines:
				374	m = fre.match(l)
				375	if not m:
				376	m = sre.match(l)
				377	if m:
				378	name = m.group(1)
				379	prev = counthash.get(name)
				380	if not prev:
				381	counthash[name] = linen
				382	else:
				383	print("%s:%d: Rule %s redefined. Previously defined on line %d" % (filename,linen,name,prev))
				384	noerror = 0
				385	linen += 1
				386	return noerror
				387
				388	# -----------------------------------------------------------------------------
				389	# _funcs_to_names()
				390	#
				391	# Given a list of regular expression functions, this converts it to a list
				392	# suitable for output to a table file
				393	# -----------------------------------------------------------------------------
				394
				395	def _funcs_to_names(funclist):
				396	result = []
				397	for f in funclist:
				398	if f and f[0]:
				399	result.append((f[0].__name__,f[1]))
				400	else:
				401	result.append(f)
				402	return result
				403
				404	# -----------------------------------------------------------------------------
				405	# _names_to_funcs()
				406	#
				407	# Given a list of regular expression function names, this converts it back to
				408	# functions.
				409	# -----------------------------------------------------------------------------
				410
				411	def _names_to_funcs(namelist,fdict):
				412	result = []
				413	for n in namelist:
				414	if n and n[0]:
				415	result.append((fdict[n[0]],n[1]))
				416	else:
				417	result.append(n)
				418	return result
				419
				420	# -----------------------------------------------------------------------------
				421	# _form_master_re()
				422	#
				423	# This function takes a list of all of the regex components and attempts to
				424	# form the master regular expression. Given limitations in the Python re
				425	# module, it may be necessary to break the master regex into separate expressions.
				426	# -----------------------------------------------------------------------------
				427
				428	def _form_master_re(relist,reflags,ldict):
				429	if not relist: return []
				430	regex = "\|".join(relist)
				431	try:
				432	lexre = re.compile(regex,re.VERBOSE \| reflags)
				433
				434	# Build the index to function map for the matching engine
				435	lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1)
				436	for f,i in lexre.groupindex.items():
				437	handle = ldict.get(f,None)
				438	if type(handle) in (types.FunctionType, types.MethodType):
				439	lexindexfunc[i] = (handle,handle.__name__[2:])
				440	elif handle is not None:
				441	# If rule was specified as a string, we build an anonymous
				442	# callback function to carry out the action
				443	if f.find("ignore_") > 0:
				444	lexindexfunc[i] = (None,None)
				445	print("IGNORE", f)
				446	else:
				447	lexindexfunc[i] = (None, f[2:])
				448
				449	return [(lexre,lexindexfunc)],[regex]
				450	except Exception as e:
				451	m = int(len(relist)/2)
				452	if m == 0: m = 1
				453	llist, lre = _form_master_re(relist[:m],reflags,ldict)
				454	rlist, rre = _form_master_re(relist[m:],reflags,ldict)
				455	return llist+rlist, lre+rre
				456
				457	# -----------------------------------------------------------------------------
				458	# def _statetoken(s,names)
				459	#
				460	# Given a declaration name s of the form "t_" and a dictionary whose keys are
				461	# state names, this function returns a tuple (states,tokenname) where states
				462	# is a tuple of state names and tokenname is the name of the token. For example,
				463	# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM')
				464	# -----------------------------------------------------------------------------
				465
				466	def _statetoken(s,names):
				467	nonstate = 1
				468	parts = s.split("_")
				469	for i in range(1,len(parts)):
				470	if parts[i] not in names and parts[i] != 'ANY': break
				471	if i > 1:
				472	states = tuple(parts[1:i])
				473	else:
				474	states = ('INITIAL',)
				475
				476	if 'ANY' in states:
				477	states = tuple(names.keys())
				478
				479	tokenname = "_".join(parts[i:])
				480	return (states,tokenname)
				481
				482	# -----------------------------------------------------------------------------
				483	# lex(module)
				484	#
				485	# Build all of the regular expression rules from definitions in the supplied module
				486	# -----------------------------------------------------------------------------
				487	def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0):
				488	global lexer
				489	ldict = None
				490	stateinfo = { 'INITIAL' : 'inclusive'}
				491	error = 0
				492	files = { }
				493	lexobj = Lexer()
				494	lexobj.lexdebug = debug
				495	lexobj.lexoptimize = optimize
				496	global token,input
				497
				498	if nowarn: warn = 0
				499	else: warn = 1
				500
				501	if object: module = object
				502
				503	if module:
				504	# User supplied a module object.
				505	if isinstance(module, types.ModuleType):
				506	ldict = module.__dict__
				507	elif isinstance(module, _INSTANCETYPE):
				508	_items = [(k,getattr(module,k)) for k in dir(module)]
				509	ldict = { }
				510	for (i,v) in _items:
				511	ldict[i] = v
				512	else:
				513	raise ValueError("Expected a module or instance")
				514	lexobj.lexmodule = module
				515
				516	else:
				517	# No module given. We might be able to get information from the caller.
				518	try:
				519	raise RuntimeError
				520	except RuntimeError:
				521	e,b,t = sys.exc_info()
				522	f = t.tb_frame
				523	f = f.f_back # Walk out to our calling function
				524	ldict = f.f_globals # Grab its globals dictionary
				525
				526	if optimize and lextab:
				527	try:
				528	lexobj.readtab(lextab,ldict)
				529	token = lexobj.token
				530	input = lexobj.input
				531	lexer = lexobj
				532	return lexobj
				533
				534	except ImportError:
				535	pass
				536
				537	# Get the tokens, states, and literals variables (if any)
				538	if (module and isinstance(module,_INSTANCETYPE)):
				539	tokens = getattr(module,"tokens",None)
				540	states = getattr(module,"states",None)
				541	literals = getattr(module,"literals","")
				542	else:
				543	tokens = ldict.get("tokens",None)
				544	states = ldict.get("states",None)
				545	literals = ldict.get("literals","")
				546
				547	if not tokens:
				548	raise SyntaxError("lex: module does not define 'tokens'")
				549	if not (isinstance(tokens,list) or isinstance(tokens,tuple)):
				550	raise SyntaxError("lex: tokens must be a list or tuple.")
				551
				552	# Build a dictionary of valid token names
				553	lexobj.lextokens = { }
				554	if not optimize:
				555	for n in tokens:
				556	if not _is_identifier.match(n):
				557	print("lex: Bad token name '%s'" % n)
				558	error = 1
				559	if warn and n in lexobj.lextokens:
				560	print("lex: Warning. Token '%s' multiply defined." % n)
				561	lexobj.lextokens[n] = None
				562	else:
				563	for n in tokens: lexobj.lextokens[n] = None
				564
				565	if debug:
				566	print("lex: tokens = '%s'" % list(lexobj.lextokens.keys()))
				567
				568	try:
				569	for c in literals:
				570	if not (isinstance(c,util.bytes_type) or isinstance(c, util.string_type)) or len(c) > 1:
				571	print("lex: Invalid literal %s. Must be a single character" % repr(c))
				572	error = 1
				573	continue
				574
				575	except TypeError:
				576	print("lex: Invalid literals specification. literals must be a sequence of characters.")
				577	error = 1
				578
				579	lexobj.lexliterals = literals
				580
				581	# Build statemap
				582	if states:
				583	if not (isinstance(states,tuple) or isinstance(states,list)):
				584	print("lex: states must be defined as a tuple or list.")
				585	error = 1
				586	else:
				587	for s in states:
				588	if not isinstance(s,tuple) or len(s) != 2:
				589	print("lex: invalid state specifier %s. Must be a tuple (statename,'exclusive\|inclusive')" % repr(s))
				590	error = 1
				591	continue
				592	name, statetype = s
				593	if isinstance(name, util.string_type):
				594	original_name = name
				595	name = util.encode_input(name)
				596	if not isinstance(name,util.bytes_type) or len(original_name) != len(name):
				597	print("lex: state name %s must be a byte string" % repr(original_name))
				598	error = 1
				599	continue
				600	if not (statetype == 'inclusive' or statetype == 'exclusive'):
				601	print("lex: state type for state %s must be 'inclusive' or 'exclusive'" % name)
				602	error = 1
				603	continue
				604	if name in stateinfo:
				605	print("lex: state '%s' already defined." % name)
				606	error = 1
				607	continue
				608	stateinfo[name] = statetype
				609
				610	# Get a list of symbols with the t_ or s_ prefix
				611	tsymbols = [f for f in ldict.keys() if f[:2] == 't_' ]
				612
				613	# Now build up a list of functions and a list of strings
				614
				615	funcsym = { } # Symbols defined as functions
				616	strsym = { } # Symbols defined as strings
				617	toknames = { } # Mapping of symbols to token names
				618
				619	for s in stateinfo.keys():
				620	funcsym[s] = []
				621	strsym[s] = []
				622
				623	ignore = { } # Ignore strings by state
				624	errorf = { } # Error functions by state
				625
				626	if len(tsymbols) == 0:
				627	raise SyntaxError("lex: no rules of the form t_rulename are defined.")
				628
				629	for f in tsymbols:
				630	t = ldict[f]
				631	states, tokname = _statetoken(f,stateinfo)
				632	toknames[f] = tokname
				633
				634	if isinstance(t, collections.Callable):
				635	for s in states: funcsym[s].append((f,t))
				636	elif (isinstance(t, util.bytes_type) or isinstance(t,util.string_type)):
				637	for s in states: strsym[s].append((f,t))
				638	else:
				639	print("lex: %s not defined as a function or string" % f)
				640	error = 1
				641
				642	# Sort the functions by line number
				643	for f in funcsym.values():
				644	f.sort(key=lambda x: x[1].__code__.co_firstlineno)
				645
				646	# Sort the strings by regular expression length
				647	for s in strsym.values():
				648	s.sort(key=lambda x: len(x[1]))
				649
				650	regexs = { }
				651
				652	# Build the master regular expressions
				653	for state in stateinfo.keys():
				654	regex_list = []
				655
				656	# Add rules defined by functions first
				657	for fname, f in funcsym[state]:
				658	line = f.__code__.co_firstlineno
				659	file = f.__code__.co_filename
				660	files[file] = None
				661	tokname = toknames[fname]
				662
				663	ismethod = isinstance(f, types.MethodType)
				664
				665	if not optimize:
				666	nargs = f.__code__.co_argcount
				667	if ismethod:
				668	reqargs = 2
				669	else:
				670	reqargs = 1
				671	if nargs > reqargs:
				672	print("%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__))
				673	error = 1
				674	continue
				675
				676	if nargs < reqargs:
				677	print("%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__))
				678	error = 1
				679	continue
				680
				681	if tokname == 'ignore':
				682	print("%s:%d: Rule '%s' must be defined as a string." % (file,line,f.__name__))
				683	error = 1
				684	continue
				685
				686	if tokname == 'error':
				687	errorf[state] = f
				688	continue
				689
				690	if f.__doc__:
				691	if not optimize:
				692	try:
				693	c = re.compile("(?P<%s>%s)" % (f.__name__,f.__doc__), re.VERBOSE \| reflags)
				694	if c.match(""):
				695	print("%s:%d: Regular expression for rule '%s' matches empty string." % (file,line,f.__name__))
				696	error = 1
				697	continue
				698	except re.error as e:
				699	print("%s:%d: Invalid regular expression for rule '%s'. %s" % (file,line,f.__name__,e))
				700	if '#' in f.__doc__:
				701	print("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'." % (file,line, f.__name__))
				702	error = 1
				703	continue
				704
				705	if debug:
				706	print("lex: Adding rule %s -> '%s' (state '%s')" % (f.__name__,f.__doc__, state))
				707
				708	# Okay. The regular expression seemed okay. Let's append it to the master regular
				709	# expression we're building
				710
				711	regex_list.append("(?P<%s>%s)" % (f.__name__,f.__doc__))
				712	else:
				713	print("%s:%d: No regular expression defined for rule '%s'" % (file,line,f.__name__))
				714
				715	# Now add all of the simple rules
				716	for name,r in strsym[state]:
				717	tokname = toknames[name]
				718
				719	if tokname == 'ignore':
				720	ignore[state] = r
				721	continue
				722
				723	if not optimize:
				724	if tokname == 'error':
				725	raise SyntaxError("lex: Rule '%s' must be defined as a function" % name)
				726	error = 1
				727	continue
				728
				729	if tokname not in lexobj.lextokens and tokname.find("ignore_") < 0:
				730	print("lex: Rule '%s' defined for an unspecified token %s." % (name,tokname))
				731	error = 1
				732	continue
				733	try:
				734	c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE \| reflags)
				735	if (c.match("")):
				736	print("lex: Regular expression for rule '%s' matches empty string." % name)
				737	error = 1
				738	continue
				739	except re.error as e:
				740	print("lex: Invalid regular expression for rule '%s'. %s" % (name,e))
				741	if '#' in r:
				742	print("lex: Make sure '#' in rule '%s' is escaped with '\\#'." % name)
				743
				744	error = 1
				745	continue
				746	if debug:
				747	print("lex: Adding rule %s -> '%s' (state '%s')" % (name,r,state))
				748
				749	regex_list.append("(?P<%s>%s)" % (name,r))
				750
				751	if not regex_list:
				752	print("lex: No rules defined for state '%s'" % state)
				753	error = 1
				754
				755	regexs[state] = regex_list
				756
				757
				758	if not optimize:
				759	for f in files.keys():
				760	if not _validate_file(f):
				761	error = 1
				762
				763	if error:
				764	raise SyntaxError("lex: Unable to build lexer.")
				765
				766	# From this point forward, we're reasonably confident that we can build the lexer.
				767	# No more errors will be generated, but there might be some warning messages.
				768
				769	# Build the master regular expressions
				770
				771	for state in regexs.keys():
				772	lexre, re_text = _form_master_re(regexs[state],reflags,ldict)
				773	lexobj.lexstatere[state] = lexre
				774	lexobj.lexstateretext[state] = re_text
				775	if debug:
				776	for i in range(len(re_text)):
				777	print("lex: state '%s'. regex[%d] = '%s'" % (state, i, re_text[i]))
				778
				779	# For inclusive states, we need to add the INITIAL state
				780	for state,type in stateinfo.items():
				781	if state != "INITIAL" and type == 'inclusive':
				782	lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL'])
				783	lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL'])
				784
				785	lexobj.lexstateinfo = stateinfo
				786	lexobj.lexre = lexobj.lexstatere["INITIAL"]
				787	lexobj.lexretext = lexobj.lexstateretext["INITIAL"]
				788
				789	# Set up ignore variables
				790	lexobj.lexstateignore = ignore
				791	lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","")
				792
				793	# Set up error functions
				794	lexobj.lexstateerrorf = errorf
				795	lexobj.lexerrorf = errorf.get("INITIAL",None)
				796	if warn and not lexobj.lexerrorf:
				797	print("lex: Warning. no t_error rule is defined.")
				798
				799	# Check state information for ignore and error rules
				800	for s,stype in stateinfo.items():
				801	if stype == 'exclusive':
				802	if warn and s not in errorf:
				803	print("lex: Warning. no error rule is defined for exclusive state '%s'" % s)
				804	if warn and s not in ignore and lexobj.lexignore:
				805	print("lex: Warning. no ignore rule is defined for exclusive state '%s'" % s)
				806	elif stype == 'inclusive':
				807	if s not in errorf:
				808	errorf[s] = errorf.get("INITIAL",None)
				809	if s not in ignore:
				810	ignore[s] = ignore.get("INITIAL","")
				811
				812
				813	# Create global versions of the token() and input() functions
				814	token = lexobj.token
				815	input = lexobj.input
				816	lexer = lexobj
				817
				818	# If in optimize mode, we write the lextab
				819	if lextab and optimize:
				820	lexobj.writetab(lextab)
				821
				822	return lexobj
				823
				824	# -----------------------------------------------------------------------------
				825	# runmain()
				826	#
				827	# This runs the lexer as a main program
				828	# -----------------------------------------------------------------------------
				829
				830	def runmain(lexer=None,data=None):
				831	if not data:
				832	try:
				833	filename = sys.argv[1]
				834	f = open(filename)
				835	data = f.read()
				836	f.close()
				837	except IndexError:
				838	print("Reading from standard input (type EOF to end):")
				839	data = sys.stdin.read()
				840
				841	if lexer:
				842	_input = lexer.input
				843	else:
				844	_input = input
				845	_input(data)
				846	if lexer:
				847	_token = lexer.token
				848	else:
				849	_token = token
				850
				851	while 1:
				852	tok = _token()
				853	if not tok: break
				854	print("(%s,%r,%d,%d)" % (tok.type, tok.value, tok.lineno,tok.lexpos))
				855
				856
				857	# -----------------------------------------------------------------------------
				858	# @TOKEN(regex)
				859	#
				860	# This decorator function can be used to set the regex expression on a function
				861	# when its docstring might need to be set in an alternative way
				862	# -----------------------------------------------------------------------------
				863
				864	def TOKEN(r):
				865	def set_doc(f):
				866	f.__doc__ = r
				867	return f
				868	return set_doc
				869
				870	# Alternative spelling of the TOKEN decorator
				871	Token = TOKEN
				872