Blame - Doc/tools/partparse.py - platform/external/python/cpython2

blob: 2f072bbdf1e1ff9c68d8057d270cd74b33a3665f [file] [log] [blame]

Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1	#
				2	# partparse.py: parse a by-Guido-written-and-by-Jan-Hein-edited LaTeX file,
				3	# and generate texinfo source.
				4	#
				5	# This is not a good example of good programming practices. In fact, this
				6	# file could use a complete rewrite, in order to become faster, more
				7	# easy extensible and maintainable.
				8	#
				9	# However, I added some comments on a few places for the pityful person who
				10	# would ever need to take a look into this file.
				11	#
				12	# Have I been clear enough??
				13	#
				14	# -jh
				15
				16
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	17	import sys, string, regex, getopt, os
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	18
Guido van Rossum	49604d3	1996-09-10 22:19:51 +0000	[diff] [blame]	19	from types import IntType, ListType, StringType, TupleType
				20
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	21	# Different parse modes for phase 1
				22	MODE_REGULAR = 0
				23	MODE_VERBATIM = 1
				24	MODE_CS_SCAN = 2
				25	MODE_COMMENT = 3
				26	MODE_MATH = 4
				27	MODE_DMATH = 5
				28	MODE_GOBBLEWHITE = 6
				29
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	30	the_modes = (MODE_REGULAR, MODE_VERBATIM, MODE_CS_SCAN, MODE_COMMENT,
				31	MODE_MATH, MODE_DMATH, MODE_GOBBLEWHITE)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	32
				33	# Show the neighbourhood of the scanned buffer
				34	def epsilon(buf, where):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	35	wmt, wpt = where - 10, where + 10
				36	if wmt < 0:
				37	wmt = 0
				38	if wpt > len(buf):
				39	wpt = len(buf)
				40	return ' Context ' + `buf[wmt:where]` + '.' + `buf[where:wpt]` + '.'
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	41
				42	# Should return the line number. never worked
				43	def lin():
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	44	global lineno
				45	return ' Line ' + `lineno` + '.'
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	46
				47	# Displays the recursion level.
				48	def lv(lvl):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	49	return ' Level ' + `lvl` + '.'
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	50
				51	# Combine the three previous functions. Used often.
				52	def lle(lvl, buf, where):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	53	return lv(lvl) + lin() + epsilon(buf, where)
				54
				55
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	56	# This class is only needed for _symbolic_ representation of the parse mode.
				57	class Mode:
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	58	def __init__(self, arg):
				59	if arg not in the_modes:
				60	raise ValueError, 'mode not in the_modes'
				61	self.mode = arg
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	62
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	63	def __cmp__(self, other):
				64	if type(self) != type(other):
				65	other = mode(other)
				66	return cmp(self.mode, other.mode)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	67
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	68	def __repr__(self):
				69	if self.mode == MODE_REGULAR:
				70	return 'MODE_REGULAR'
				71	elif self.mode == MODE_VERBATIM:
				72	return 'MODE_VERBATIM'
				73	elif self.mode == MODE_CS_SCAN:
				74	return 'MODE_CS_SCAN'
				75	elif self.mode == MODE_COMMENT:
				76	return 'MODE_COMMENT'
				77	elif self.mode == MODE_MATH:
				78	return 'MODE_MATH'
				79	elif self.mode == MODE_DMATH:
				80	return 'MODE_DMATH'
				81	elif self.mode == MODE_GOBBLEWHITE:
				82	return 'MODE_GOBBLEWHITE'
				83	else:
				84	raise ValueError, 'mode not in the_modes'
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	85
				86	# just a wrapper around a class initialisation
Guido van Rossum	49604d3	1996-09-10 22:19:51 +0000	[diff] [blame]	87	mode = Mode
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	88
				89
				90	# After phase 1, the text consists of chunks, with a certain type
				91	# this type will be assigned to the chtype member of the chunk
				92	# the where-field contains the file position where this is found
				93	# and the data field contains (1): a tuple describing start- end end
				94	# positions of the substring (can be used as slice for the buf-variable),
				95	# (2) just a string, mostly generated by the changeit routine,
				96	# or (3) a list, describing a (recursive) subgroup of chunks
				97	PLAIN = 0 # ASSUME PLAINTEXT, data = the text
				98	GROUP = 1 # GROUP ({}), data = [chunk, chunk,..]
				99	CSNAME = 2 # CONTROL SEQ TOKEN, data = the command
				100	COMMENT = 3 # data is the actual comment
				101	DMATH = 4 # DISPLAYMATH, data = [chunk, chunk,..]
				102	MATH = 5 # MATH, see DISPLAYMATH
				103	OTHER = 6 # CHAR WITH CATCODE OTHER, data = char
				104	ACTIVE = 7 # ACTIVE CHAR
				105	GOBBLEDWHITE = 8 # Gobbled LWSP, after CSNAME
				106	ENDLINE = 9 # END-OF-LINE, data = '\n'
				107	DENDLINE = 10 # DOUBLE EOL, data='\n', indicates \par
				108	ENV = 11 # LaTeX-environment
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	109	# data =(envname,[ch,ch,ch,.])
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	110	CSLINE = 12 # for texi: next chunk will be one group
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	111	# of args. Will be set all on 1 line
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	112	IGNORE = 13 # IGNORE this data
				113	ENDENV = 14 # TEMP END OF GROUP INDICATOR
				114	IF = 15 # IF-directive
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	115	# data = (flag,negate,[ch, ch, ch,...])
				116	the_types = (PLAIN, GROUP, CSNAME, COMMENT, DMATH, MATH, OTHER, ACTIVE,
				117	GOBBLEDWHITE, ENDLINE, DENDLINE, ENV, CSLINE, IGNORE, ENDENV, IF)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	118
				119	# class, just to display symbolic name
				120	class ChunkType:
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	121	def __init__(self, chunk_type):
				122	if chunk_type not in the_types:
				123	raise ValueError, 'chunk_type not in the_types'
				124	self.chunk_type = chunk_type
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	125
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	126	def __cmp__(self, other):
				127	if type(self) != type(other):
				128	other = chunk_type(other)
				129	return cmp(self.chunk_type, other.chunk_type)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	130
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	131	def __repr__(self):
				132	if self.chunk_type == PLAIN:
				133	return 'PLAIN'
				134	elif self.chunk_type == GROUP:
				135	return 'GROUP'
				136	elif self.chunk_type == CSNAME:
				137	return 'CSNAME'
				138	elif self.chunk_type == COMMENT:
				139	return 'COMMENT'
				140	elif self.chunk_type == DMATH:
				141	return 'DMATH'
				142	elif self.chunk_type == MATH:
				143	return 'MATH'
				144	elif self.chunk_type == OTHER:
				145	return 'OTHER'
				146	elif self.chunk_type == ACTIVE:
				147	return 'ACTIVE'
				148	elif self.chunk_type == GOBBLEDWHITE:
				149	return 'GOBBLEDWHITE'
				150	elif self.chunk_type == DENDLINE:
				151	return 'DENDLINE'
				152	elif self.chunk_type == ENDLINE:
				153	return 'ENDLINE'
				154	elif self.chunk_type == ENV:
				155	return 'ENV'
				156	elif self.chunk_type == CSLINE:
				157	return 'CSLINE'
				158	elif self.chunk_type == IGNORE:
				159	return 'IGNORE'
				160	elif self.chunk_type == ENDENV:
				161	return 'ENDENV'
				162	elif self.chunk_type == IF:
				163	return 'IF'
				164	else:
				165	raise ValueError, 'chunk_type not in the_types'
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	166
				167	# ...and the wrapper
Guido van Rossum	49604d3	1996-09-10 22:19:51 +0000	[diff] [blame]	168	_all_chunk_types = {}
				169	for t in the_types:
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	170	_all_chunk_types[t] = ChunkType(t)
Guido van Rossum	49604d3	1996-09-10 22:19:51 +0000	[diff] [blame]	171
				172	def chunk_type(t):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	173	return _all_chunk_types[t]
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	174
				175	# store a type object of the ChunkType-class-instance...
				176	chunk_type_type = type(chunk_type(0))
Guido van Rossum	49604d3	1996-09-10 22:19:51 +0000	[diff] [blame]	177
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	178	# this class contains a part of the parsed buffer
				179	class Chunk:
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	180	def __init__(self, chtype, where, data):
				181	if type(chtype) != chunk_type_type:
				182	chtype = chunk_type(chtype)
				183	self.chtype = chtype
				184	if type(where) != IntType:
				185	raise TypeError, "'where' is not a number"
				186	self.where = where
				187	self.data = data
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	188
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	189	def __repr__(self):
				190	return 'chunk' + `self.chtype, self.where, self.data`
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	191
				192	# and the wrapper
Guido van Rossum	49604d3	1996-09-10 22:19:51 +0000	[diff] [blame]	193	chunk = Chunk
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	194
				195
				196	error = 'partparse.error'
				197
				198	#
				199	# TeX's catcodes...
				200	#
				201	CC_ESCAPE = 0
				202	CC_LBRACE = 1
				203	CC_RBRACE = 2
				204	CC_MATHSHIFT = 3
				205	CC_ALIGNMENT = 4
				206	CC_ENDLINE = 5
				207	CC_PARAMETER = 6
				208	CC_SUPERSCRIPT = 7
				209	CC_SUBSCRIPT = 8
				210	CC_IGNORE = 9
				211	CC_WHITE = 10
				212	CC_LETTER = 11
				213	CC_OTHER = 12
				214	CC_ACTIVE = 13
				215	CC_COMMENT = 14
				216	CC_INVALID = 15
				217
				218	# and the names
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	219	cc_names = [
				220	'CC_ESCAPE',
				221	'CC_LBRACE',
				222	'CC_RBRACE',
				223	'CC_MATHSHIFT',
				224	'CC_ALIGNMENT',
				225	'CC_ENDLINE',
				226	'CC_PARAMETER',
				227	'CC_SUPERSCRIPT',
				228	'CC_SUBSCRIPT',
				229	'CC_IGNORE',
				230	'CC_WHITE',
				231	'CC_LETTER',
				232	'CC_OTHER',
				233	'CC_ACTIVE',
				234	'CC_COMMENT',
				235	'CC_INVALID',
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	236	]
				237
				238	# Show a list of catcode-name-symbols
				239	def pcl(codelist):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	240	result = ''
				241	for i in codelist:
				242	result = result + cc_names[i] + ', '
				243	return '[' + result[:-2] + ']'
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	244
				245	# the name of the catcode (ACTIVE, OTHER, etc.)
				246	def pc(code):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	247	return cc_names[code]
				248
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	249
				250	# Which catcodes make the parser stop parsing regular plaintext
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	251	regular_stopcodes = [CC_ESCAPE, CC_LBRACE, CC_RBRACE, CC_MATHSHIFT,
				252	CC_ALIGNMENT, CC_PARAMETER, CC_SUPERSCRIPT, CC_SUBSCRIPT,
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	253	CC_IGNORE, CC_ACTIVE, CC_COMMENT, CC_INVALID, CC_ENDLINE]
				254
				255	# same for scanning a control sequence name
				256	csname_scancodes = [CC_LETTER]
				257
				258	# same for gobbling LWSP
				259	white_scancodes = [CC_WHITE]
				260	##white_scancodes = [CC_WHITE, CC_ENDLINE]
				261
				262	# make a list of all catcode id's, except for catcode ``other''
				263	all_but_other_codes = range(16)
				264	del all_but_other_codes[CC_OTHER]
				265	##print all_but_other_codes
				266
				267	# when does a comment end
				268	comment_stopcodes = [CC_ENDLINE]
				269
				270	# gather all characters together, specified by a list of catcodes
				271	def code2string(cc, codelist):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	272	##print 'code2string: codelist = ' + pcl(codelist),
				273	result = ''
				274	for category in codelist:
				275	if cc[category]:
				276	result = result + cc[category]
				277	##print 'result = ' + `result`
				278	return result
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	279
				280	# automatically generate all characters of catcode other, being the
				281	# complement set in the ASCII range (128 characters)
				282	def make_other_codes(cc):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	283	otherchars = range(256) # could be made 256, no problem
				284	for category in all_but_other_codes:
				285	if cc[category]:
				286	for c in cc[category]:
				287	otherchars[ord(c)] = None
				288	result = ''
				289	for i in otherchars:
				290	if i != None:
				291	result = result + chr(i)
				292	return result
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	293
				294	# catcode dump (which characters have which catcodes).
				295	def dump_cc(name, cc):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	296	##print '\t' + name
				297	##print '=' * (8+len(name))
				298	if len(cc) != 16:
				299	raise TypeError, 'cc not good cat class'
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	300	## for i in range(16):
				301	## print pc(i) + '\t' + `cc[i]`
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	302
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	303
				304	# In the beginning,....
				305	epoch_cc = [None] * 16
				306	##dump_cc('epoch_cc', epoch_cc)
				307
				308
				309	# INITEX
				310	initex_cc = epoch_cc[:]
				311	initex_cc[CC_ESCAPE] = '\\'
				312	initex_cc[CC_ENDLINE], initex_cc[CC_IGNORE], initex_cc[CC_WHITE] = \
				313	'\n', '\0', ' '
				314	initex_cc[CC_LETTER] = string.uppercase + string.lowercase
				315	initex_cc[CC_COMMENT], initex_cc[CC_INVALID] = '%', '\x7F'
				316	#initex_cc[CC_OTHER] = make_other_codes(initex_cc) I don't need them, anyway
				317	##dump_cc('initex_cc', initex_cc)
				318
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	319
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	320	# LPLAIN: LaTeX catcode setting (see lplain.tex)
				321	lplain_cc = initex_cc[:]
				322	lplain_cc[CC_LBRACE], lplain_cc[CC_RBRACE] = '{', '}'
				323	lplain_cc[CC_MATHSHIFT] = '$'
				324	lplain_cc[CC_ALIGNMENT] = '&'
				325	lplain_cc[CC_PARAMETER] = '#'
				326	lplain_cc[CC_SUPERSCRIPT] = '^\x0B' # '^' and C-k
				327	lplain_cc[CC_SUBSCRIPT] = '_\x01' # '_' and C-a
				328	lplain_cc[CC_WHITE] = lplain_cc[CC_WHITE] + '\t'
				329	lplain_cc[CC_ACTIVE] = '~\x0C' # '~' and C-l
				330	lplain_cc[CC_OTHER] = make_other_codes(lplain_cc)
				331	##dump_cc('lplain_cc', lplain_cc)
				332
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	333
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	334	# Guido's LaTeX environment catcoded '_' as ``other''
				335	# my own purpose catlist
				336	my_cc = lplain_cc[:]
				337	my_cc[CC_SUBSCRIPT] = my_cc[CC_SUBSCRIPT][1:] # remove '_' here
				338	my_cc[CC_OTHER] = my_cc[CC_OTHER] + '_' # add it to OTHER list
				339	dump_cc('my_cc', my_cc)
				340
				341
				342
				343	# needed for un_re, my equivalent for regexp-quote in Emacs
				344	re_meaning = '\\[]^$'
				345
				346	def un_re(str):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	347	result = ''
				348	for i in str:
				349	if i in re_meaning:
				350	result = result + '\\'
				351	result = result + i
				352	return result
				353
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	354	# NOTE the negate ('^') operator in some of the regexps below
				355	def make_rc_regular(cc):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	356	# problems here if '[]' are included!!
				357	return regex.compile('[' + code2string(cc, regular_stopcodes) + ']')
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	358
				359	def make_rc_cs_scan(cc):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	360	return regex.compile('[^' + code2string(cc, csname_scancodes) + ']')
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	361
				362	def make_rc_comment(cc):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	363	return regex.compile('[' + code2string(cc, comment_stopcodes) + ']')
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	364
				365	def make_rc_endwhite(cc):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	366	return regex.compile('[^' + code2string(cc, white_scancodes) + ']')
				367
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	368
				369
				370	# regular: normal mode:
				371	rc_regular = make_rc_regular(my_cc)
				372
				373	# scan: scan a command sequence e.g. `newlength' or `mbox' or `;', `,' or `$'
				374	rc_cs_scan = make_rc_cs_scan(my_cc)
				375	rc_comment = make_rc_comment(my_cc)
				376	rc_endwhite = make_rc_endwhite(my_cc)
				377
				378
				379	# parseit (BUF, PARSEMODE=mode(MODE_REGULAR), START=0, RECURSION-LEVEL=0)
				380	# RECURSION-LEVEL will is incremented on entry.
				381	# result contains the list of chunks returned
				382	# together with this list, the buffer position is returned
				383
				384	# RECURSION-LEVEL will be set to zero again, when recursively a
				385	# {,D}MATH-mode scan has been enetered.
				386	# This has been done in order to better check for environment-mismatches
				387
				388	def parseit(buf, *rest):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	389	global lineno
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	390
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	391	if len(rest) == 3:
				392	parsemode, start, lvl = rest
				393	elif len(rest) == 2:
				394	parsemode, start, lvl = rest + (0, )
				395	elif len(rest) == 1:
				396	parsemode, start, lvl = rest + (0, 0)
				397	elif len(rest) == 0:
				398	parsemode, start, lvl = mode(MODE_REGULAR), 0, 0
				399	else:
				400	raise TypeError, 'usage: parseit(buf[, parsemode[, start[, level]]])'
				401	result = []
				402	end = len(buf)
				403	if lvl == 0 and parsemode == mode(MODE_REGULAR):
				404	lineno = 1
				405	lvl = lvl + 1
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	406
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	407	##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')'
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	408
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	409	#
				410	# some of the more regular modes...
				411	#
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	412
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	413	if parsemode in (mode(MODE_REGULAR), mode(MODE_DMATH), mode(MODE_MATH)):
				414	cstate = []
				415	newpos = start
				416	curpmode = parsemode
				417	while 1:
				418	where = newpos
				419	#print '\tnew round: ' + epsilon(buf, where)
				420	if where == end:
				421	if lvl > 1 or curpmode != mode(MODE_REGULAR):
				422	# not the way we started...
				423	raise EOFError, 'premature end of file.' + lle(lvl, buf, where)
				424	# the real ending of lvl-1 parse
				425	return end, result
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	426
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	427	pos = rc_regular.search(buf, where)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	428
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	429	if pos < 0:
				430	pos = end
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	431
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	432	if pos != where:
				433	newpos, c = pos, chunk(PLAIN, where, (where, pos))
				434	result.append(c)
				435	continue
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	436
				437
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	438	#
				439	# ok, pos == where and pos != end
				440	#
				441	foundchar = buf[where]
				442	if foundchar in my_cc[CC_LBRACE]:
				443	# recursive subgroup parse...
				444	newpos, data = parseit(buf, curpmode, where+1, lvl)
				445	result.append(chunk(GROUP, where, data))
				446
				447	elif foundchar in my_cc[CC_RBRACE]:
				448	if lvl <= 1:
				449	raise error, 'ENDGROUP while in base level.' + lle(lvl, buf, where)
				450	if lvl == 1 and mode != mode(MODE_REGULAR):
				451	raise error, 'endgroup while in math mode. +lin() + epsilon(buf, where)'
				452	return where + 1, result
				453
				454	elif foundchar in my_cc[CC_ESCAPE]:
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	455	#
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	456	# call the routine that actually deals with
				457	# this problem. If do_ret is None, than
				458	# return the value of do_ret
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	459	#
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	460	# Note that handle_cs might call this routine
				461	# recursively again...
				462	#
				463	do_ret, newpos = handlecs(buf, where,
				464	curpmode, lvl, result, end)
				465	if do_ret != None:
				466	return do_ret
				467
				468	elif foundchar in my_cc[CC_COMMENT]:
				469	newpos, data = parseit(buf,
				470	mode(MODE_COMMENT), where+1, lvl)
				471	result.append(chunk(COMMENT, where, data))
				472
				473	elif foundchar in my_cc[CC_MATHSHIFT]:
				474	# note that recursive calls to math-mode
				475	# scanning are called with recursion-level 0
				476	# again, in order to check for bad mathend
				477	#
				478	if where + 1 != end and buf[where + 1] in my_cc[CC_MATHSHIFT]:
				479	#
				480	# double mathshift, e.g. '$$'
				481	#
				482	if curpmode == mode(MODE_REGULAR):
				483	newpos, data = parseit(buf,
				484	mode(MODE_DMATH),
				485	where+2, 0)
				486	result.append(chunk(DMATH,
				487	where, data))
				488	elif curpmode == mode(MODE_MATH):
				489	raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
				490	elif lvl != 1:
				491	raise error, 'bad mathend.' + lle(lvl, buf, where)
				492	else:
				493	return where + 2, result
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	494	else:
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	495	#
				496	# single math shift, e.g. '$'
				497	#
				498	if curpmode == mode(MODE_REGULAR):
				499	newpos, data = parseit(buf,
				500	mode(MODE_MATH),
				501	where+1, 0)
				502	result.append(chunk(MATH,
				503	where, data))
				504	elif curpmode == mode(MODE_DMATH):
				505	raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
				506	elif lvl != 1:
				507	raise error, 'bad mathend.' + lv(lvl, buf, where)
				508	else:
				509	return where + 1, result
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	510
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	511	elif foundchar in my_cc[CC_IGNORE]:
				512	print 'warning: ignored char', `foundchar`
				513	newpos = where + 1
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	514
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	515	elif foundchar in my_cc[CC_ACTIVE]:
				516	result.append(chunk(ACTIVE, where, foundchar))
				517	newpos = where + 1
				518
				519	elif foundchar in my_cc[CC_INVALID]:
				520	raise error, 'invalid char ' + `foundchar`
				521	newpos = where + 1
				522
				523	elif foundchar in my_cc[CC_ENDLINE]:
				524	#
				525	# after an end of line, eat the rest of
				526	# whitespace on the beginning of the next line
				527	# this is what LaTeX more or less does
				528	#
				529	# also, try to indicate double newlines (\par)
				530	#
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	531	lineno = lineno + 1
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	532	savedwhere = where
				533	newpos, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), where + 1, lvl)
				534	if newpos != end and buf[newpos] in my_cc[CC_ENDLINE]:
				535	result.append(chunk(DENDLINE, savedwhere, foundchar))
				536	else:
				537	result.append(chunk(ENDLINE, savedwhere, foundchar))
				538	else:
				539	result.append(chunk(OTHER, where, foundchar))
				540	newpos = where + 1
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	541
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	542	elif parsemode == mode(MODE_CS_SCAN):
				543	#
				544	# scan for a control sequence token. `\ape', `\nut' or `\%'
				545	#
				546	if start == end:
				547	raise EOFError, 'can\'t find end of csname'
				548	pos = rc_cs_scan.search(buf, start)
				549	if pos < 0:
				550	pos = end
				551	if pos == start:
				552	# first non-letter right where we started the search
				553	# ---> the control sequence name consists of one single
				554	# character. Also: don't eat white space...
				555	if buf[pos] in my_cc[CC_ENDLINE]:
				556	lineno = lineno + 1
				557	pos = pos + 1
				558	return pos, (start, pos)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	559	else:
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	560	spos = pos
				561	if buf[pos] == '\n':
				562	lineno = lineno + 1
				563	spos = pos + 1
				564	pos2, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), spos, lvl)
				565	return pos2, (start, pos)
				566
				567	elif parsemode == mode(MODE_GOBBLEWHITE):
				568	if start == end:
				569	return start, ''
				570	pos = rc_endwhite.search(buf, start)
				571	if pos < 0:
				572	pos = start
				573	return pos, (start, pos)
				574
				575	elif parsemode == mode(MODE_COMMENT):
				576	pos = rc_comment.search(buf, start)
				577	lineno = lineno + 1
				578	if pos < 0:
				579	print 'no newline perhaps?'
				580	raise EOFError, 'can\'t find end of comment'
				581	pos = pos + 1
				582	pos2, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), pos, lvl)
				583	return pos2, (start, pos)
				584
				585
				586	else:
				587	raise error, 'Unknown mode (' + `parsemode` + ')'
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	588
				589
				590	#moreresult = cswitch(buf[x1:x2], buf, newpos, parsemode, lvl)
				591
				592	#boxcommands = 'mbox', 'fbox'
				593	#defcommands = 'def', 'newcommand'
				594
				595	endverbstr = '\\end{verbatim}'
				596
				597	re_endverb = regex.compile(un_re(endverbstr))
				598
				599	#
				600	# handlecs: helper function for parseit, for the special thing we might
				601	# wanna do after certain command control sequences
				602	# returns: None or return_data, newpos
				603	#
				604	# in the latter case, the calling function is instructed to immediately
				605	# return with the data in return_data
				606	#
				607	def handlecs(buf, where, curpmode, lvl, result, end):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	608	global lineno
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	609
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	610	# get the control sequence name...
				611	newpos, data = parseit(buf, mode(MODE_CS_SCAN), where+1, lvl)
				612	saveddata = data
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	613
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	614	if s(buf, data) in ('begin', 'end'):
				615	# skip the expected '{' and get the LaTeX-envname '}'
				616	newpos, data = parseit(buf, mode(MODE_REGULAR), newpos+1, lvl)
				617	if len(data) != 1:
				618	raise error, 'expected 1 chunk of data.' + \
				619	lle(lvl, buf, where)
Guido van Rossum	49604d3	1996-09-10 22:19:51 +0000	[diff] [blame]	620
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	621	# yucky, we've got an environment
				622	envname = s(buf, data[0].data)
				623	##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl)
				624	if s(buf, saveddata) == 'begin' and envname == 'verbatim':
				625	# verbatim deserves special treatment
				626	pos = re_endverb.search(buf, newpos)
				627	if pos < 0:
				628	raise error, `endverbstr` + ' not found.' + lle(lvl, buf, where)
				629	result.append(chunk(ENV, where, (envname, [chunk(PLAIN, newpos, (newpos, pos))])))
				630	newpos = pos + len(endverbstr)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	631
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	632	elif s(buf, saveddata) == 'begin':
				633	# start parsing recursively... If that parse returns
				634	# from an '\end{...}', then should the last item of
				635	# the returned data be a string containing the ended
				636	# environment
				637	newpos, data = parseit(buf, curpmode, newpos, lvl)
				638	if not data or type(data[-1]) is not StringType:
				639	raise error, 'missing \'end\'' + lle(lvl, buf, where) + epsilon(buf, newpos)
				640	retenv = data[-1]
				641	del data[-1]
				642	if retenv != envname:
				643	#[`retenv`, `envname`]
				644	raise error, 'environments do not match.' + \
				645	lle(lvl, buf, where) + \
				646	epsilon(buf, newpos)
				647	result.append(chunk(ENV, where, (retenv, data)))
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	648	else:
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	649	# 'end'... append the environment name, as just
				650	# pointed out, and order parsit to return...
				651	result.append(envname)
				652	##print 'POINT of return: ' + epsilon(buf, newpos)
				653	# the tuple will be returned by parseit
				654	return (newpos, result), newpos
				655
				656	# end of \begin ... \end handling
				657
				658	elif s(buf, data)[0:2] == 'if':
				659	# another scary monster: the 'if' directive
				660	flag = s(buf, data)[2:]
				661
				662	# recursively call parseit, just like environment above..
				663	# the last item of data should contain the if-termination
				664	# e.g., 'else' of 'fi'
				665	newpos, data = parseit(buf, curpmode, newpos, lvl)
				666	if not data or data[-1] not in ('else', 'fi'):
				667	raise error, 'wrong if... termination' + \
				668	lle(lvl, buf, where) + epsilon(buf, newpos)
				669
				670	ifterm = data[-1]
				671	del data[-1]
				672	# 0 means dont_negate flag
				673	result.append(chunk(IF, where, (flag, 0, data)))
				674	if ifterm == 'else':
				675	# do the whole thing again, there is only one way
				676	# to end this one, by 'fi'
				677	newpos, data = parseit(buf, curpmode, newpos, lvl)
				678	if not data or data[-1] not in ('fi', ):
				679	raise error, 'wrong if...else... termination' \
				680	+ lle(lvl, buf, where) \
				681	+ epsilon(buf, newpos)
				682
				683	ifterm = data[-1]
				684	del data[-1]
				685	result.append(chunk(IF, where, (flag, 1, data)))
				686	#done implicitely: return None, newpos
				687
				688	elif s(buf, data) in ('else', 'fi'):
				689	result.append(s(buf, data))
				690	# order calling party to return tuple
				691	return (newpos, result), newpos
				692
				693	# end of \if, \else, ... \fi handling
				694
				695	elif s(buf, saveddata) == 'verb':
				696	x2 = saveddata[1]
				697	result.append(chunk(CSNAME, where, data))
				698	if x2 == end:
				699	raise error, 'premature end of command.' + lle(lvl, buf, where)
				700	delimchar = buf[x2]
				701	##print 'VERB: delimchar ' + `delimchar`
				702	pos = regex.compile(un_re(delimchar)).search(buf, x2 + 1)
				703	if pos < 0:
				704	raise error, 'end of \'verb\' argument (' + \
				705	`delimchar` + ') not found.' + \
				706	lle(lvl, buf, where)
				707	result.append(chunk(GROUP, x2, [chunk(PLAIN, x2+1, (x2+1, pos))]))
				708	newpos = pos + 1
				709	else:
				710	result.append(chunk(CSNAME, where, data))
				711	return None, newpos
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	712
				713	# this is just a function to get the string value if the possible data-tuple
				714	def s(buf, data):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	715	if type(data) is StringType:
				716	return data
				717	if len(data) != 2 or not (type(data[0]) is type(data[1]) is IntType):
				718	raise TypeError, 'expected tuple of 2 integers'
				719	x1, x2 = data
				720	return buf[x1:x2]
Guido van Rossum	49604d3	1996-09-10 22:19:51 +0000	[diff] [blame]	721
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	722
				723	##length, data1, i = getnextarg(length, buf, pp, i + 1)
				724
				725	# make a deep-copy of some chunks
				726	def crcopy(r):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	727	return map(chunkcopy, r)
Guido van Rossum	49604d3	1996-09-10 22:19:51 +0000	[diff] [blame]	728
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	729
				730	# copy a chunk, would better be a method of class Chunk...
				731	def chunkcopy(ch):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	732	if ch.chtype == chunk_type(GROUP):
				733	return chunk(GROUP, ch.where, map(chunkcopy, ch.data))
				734	else:
				735	return chunk(ch.chtype, ch.where, ch.data)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	736
				737
				738	# get next argument for TeX-macro, flatten a group (insert between)
				739	# or return Command Sequence token, or give back one character
				740	def getnextarg(length, buf, pp, item):
				741
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	742	##wobj = Wobj()
				743	##dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
				744	##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	745
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	746	while item < length and pp[item].chtype == chunk_type(ENDLINE):
				747	del pp[item]
				748	length = length - 1
				749	if item >= length:
				750	raise error, 'no next arg.' + epsilon(buf, pp[-1].where)
				751	if pp[item].chtype == chunk_type(GROUP):
				752	newpp = pp[item].data
				753	del pp[item]
				754	length = length - 1
				755	changeit(buf, newpp)
				756	length = length + len(newpp)
				757	pp[item:item] = newpp
				758	item = item + len(newpp)
				759	if len(newpp) < 10:
				760	wobj = Wobj()
				761	dumpit(buf, wobj.write, newpp)
				762	##print 'GETNEXTARG: inserted ' + `wobj.data`
				763	return length, item
				764	elif pp[item].chtype == chunk_type(PLAIN):
				765	#grab one char
				766	print 'WARNING: grabbing one char'
				767	if len(s(buf, pp[item].data)) > 1:
				768	pp.insert(item, chunk(PLAIN, pp[item].where, s(buf, pp[item].data)[:1]))
				769	item, length = item+1, length+1
				770	pp[item].data = s(buf, pp[item].data)[1:]
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	771	else:
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	772	item = item+1
				773	return length, item
				774	else:
				775	ch = pp[item]
				776	try:
				777	str = `s(buf, ch.data)`
				778	except TypeError:
				779	str = `ch.data`
				780	if len(str) > 400:
				781	str = str[:400] + '...'
				782	print 'GETNEXTARG:', ch.chtype, 'not handled, data ' + str
				783	return length, item
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	784
				785
				786	# this one is needed to find the end of LaTeX's optional argument, like
				787	# item[...]
				788	re_endopt = regex.compile(']')
				789
				790	# get a LaTeX-optional argument, you know, the square braces '[' and ']'
				791	def getoptarg(length, buf, pp, item):
				792
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	793	wobj = Wobj()
				794	dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
				795	##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	796
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	797	if item >= length or \
				798	pp[item].chtype != chunk_type(PLAIN) or \
				799	s(buf, pp[item].data)[0] != '[':
				800	return length, item
				801
				802	pp[item].data = s(buf, pp[item].data)[1:]
				803	if len(pp[item].data) == 0:
				804	del pp[item]
				805	length = length-1
				806
				807	while 1:
				808	if item == length:
				809	raise error, 'No end of optional arg found'
				810	if pp[item].chtype == chunk_type(PLAIN):
				811	text = s(buf, pp[item].data)
				812	pos = re_endopt.search(text)
				813	if pos >= 0:
				814	pp[item].data = text[:pos]
				815	if pos == 0:
				816	del pp[item]
				817	length = length-1
				818	else:
				819	item=item+1
				820	text = text[pos+1:]
				821
				822	while text and text[0] in ' \t':
				823	text = text[1:]
				824
				825	if text:
				826	pp.insert(item, chunk(PLAIN, 0, text))
				827	length = length + 1
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	828	return length, item
				829
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	830	item = item+1
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	831
				832
				833	# Wobj just add write-requests to the ``data'' attribute
				834	class Wobj:
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	835	data = ''
Guido van Rossum	49604d3	1996-09-10 22:19:51 +0000	[diff] [blame]	836
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	837	def write(self, data):
				838	self.data = self.data + data
Guido van Rossum	b819bdf	1995-03-15 11:26:26 +0000	[diff] [blame]	839
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	840	# ignore these commands
Guido van Rossum	b819bdf	1995-03-15 11:26:26 +0000	[diff] [blame]	841	ignoredcommands = ('bcode', 'ecode')
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	842	# map commands like these to themselves as plaintext
Guido van Rossum	7760cde	1995-03-17 16:03:11 +0000	[diff] [blame]	843	wordsselves = ('UNIX', 'ABC', 'C', 'ASCII', 'EOF', 'LaTeX')
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	844	# \{ --> {, \} --> }, etc
Guido van Rossum	7760cde	1995-03-17 16:03:11 +0000	[diff] [blame]	845	themselves = ('{', '}', '.', '@', ' ', '\n') + wordsselves
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	846	# these ones also themselves (see argargs macro in myformat.sty)
				847	inargsselves = (',', '[', ']', '(', ')')
				848	# this is how I would show the difference between emph and strong
				849	# code 1 means: fold to uppercase
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	850	markcmds = {'code': ('', ''), 'var': 1, 'emph': ('_', '_'),
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	851	'strong': ('', '')}
				852
				853	# recognise patter {\FONTCHANGE-CMD TEXT} to \MAPPED-FC-CMD{TEXT}
				854	fontchanges = {'rm': 'r', 'it': 'i', 'em': 'emph', 'bf': 'b', 'tt': 't'}
				855
				856	# transparent for these commands
Guido van Rossum	7760cde	1995-03-17 16:03:11 +0000	[diff] [blame]	857	for_texi = ('emph', 'var', 'strong', 'code', 'kbd', 'key', 'dfn', 'samp',
				858	'file', 'r', 'i', 't')
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	859
				860
				861	# try to remove macros and return flat text
				862	def flattext(buf, pp):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	863	pp = crcopy(pp)
				864	##print '---> FLATTEXT ' + `pp`
				865	wobj = Wobj()
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	866
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	867	i, length = 0, len(pp)
				868	while 1:
				869	if len(pp) != length:
				870	raise 'FATAL', 'inconsistent length'
				871	if i >= length:
				872	break
				873	ch = pp[i]
				874	i = i+1
				875	if ch.chtype == chunk_type(PLAIN):
				876	pass
				877	elif ch.chtype == chunk_type(CSNAME):
				878	if s(buf, ch.data) in themselves or hist.inargs and s(buf, ch.data) in inargsselves:
				879	ch.chtype = chunk_type(PLAIN)
				880	elif s(buf, ch.data) == 'e':
				881	ch.chtype = chunk_type(PLAIN)
				882	ch.data = '\\'
				883	elif len(s(buf, ch.data)) == 1 \
				884	and s(buf, ch.data) in onlylatexspecial:
				885	ch.chtype = chunk_type(PLAIN)
				886	# if it is followed by an empty group,
				887	# remove that group, it was needed for
				888	# a true space
				889	if i < length \
				890	and pp[i].chtype==chunk_type(GROUP) \
				891	and len(pp[i].data) == 0:
				892	del pp[i]
				893	length = length-1
				894
				895	elif s(buf, ch.data) in markcmds.keys():
				896	length, newi = getnextarg(length, buf, pp, i)
				897	str = flattext(buf, pp[i:newi])
				898	del pp[i:newi]
				899	length = length - (newi - i)
				900	ch.chtype = chunk_type(PLAIN)
				901	markcmd = s(buf, ch.data)
				902	x = markcmds[markcmd]
				903	if type(x) == TupleType:
				904	pre, after = x
				905	str = pre+str+after
				906	elif x == 1:
				907	str = string.upper(str)
				908	else:
				909	raise 'FATAL', 'corrupt markcmds'
				910	ch.data = str
				911	else:
				912	if s(buf, ch.data) not in ignoredcommands:
				913	print 'WARNING: deleting command ' + `s(buf, ch.data)`
				914	print 'PP' + `pp[i-1]`
				915	del pp[i-1]
				916	i, length = i-1, length-1
				917	elif ch.chtype == chunk_type(GROUP):
				918	length, newi = getnextarg(length, buf, pp, i-1)
				919	i = i-1
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	920	## str = flattext(buf, crcopy(pp[i-1:newi]))
				921	## del pp[i:newi]
				922	## length = length - (newi - i)
				923	## ch.chtype = chunk_type(PLAIN)
				924	## ch.data = str
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	925	else:
				926	pass
				927
				928	dumpit(buf, wobj.write, pp)
				929	##print 'FLATTEXT: RETURNING ' + `wobj.data`
				930	return wobj.data
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	931
				932	# try to generate node names (a bit shorter than the chapter title)
				933	# note that the \nodename command (see elsewhere) overules these efforts
				934	def invent_node_names(text):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	935	words = string.split(text)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	936
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	937	##print 'WORDS ' + `words`
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	938
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	939	if len(words) == 2 \
				940	and string.lower(words[0]) == 'built-in' \
				941	and string.lower(words[1]) not in ('modules', 'functions'):
				942	return words[1]
				943	if len(words) == 3 and string.lower(words[1]) == 'module':
				944	return words[2]
				945	if len(words) == 3 and string.lower(words[1]) == 'object':
				946	return string.join(words[0:2])
				947	if len(words) > 4 and string.lower(string.join(words[-4:])) == \
				948	'methods and data attributes':
				949	return string.join(words[:2])
				950	return text
				951
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	952	re_commas_etc = regex.compile('[,`\'@{}]')
				953
				954	re_whitespace = regex.compile('[ \t]*')
				955
				956
				957	##nodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
				958
				959	# look if the next non-white stuff is also a command, resulting in skipping
				960	# double endlines (DENDLINE) too, and thus omitting \par's
				961	# Sometimes this is too much, maybe consider DENDLINE's as stop
				962	def next_command_p(length, buf, pp, i, cmdname):
				963
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	964	while 1:
				965	if i >= len(pp):
				966	break
				967	ch = pp[i]
				968	i = i+1
				969	if ch.chtype == chunk_type(ENDLINE):
				970	continue
				971	if ch.chtype == chunk_type(DENDLINE):
				972	continue
				973	if ch.chtype == chunk_type(PLAIN):
				974	if re_whitespace.search(s(buf, ch.data)) == 0 and \
				975	re_whitespace.match(s(buf, ch.data)) == len(s(buf, ch.data)):
				976	continue
				977	return -1
				978	if ch.chtype == chunk_type(CSNAME):
				979	if s(buf, ch.data) == cmdname:
				980	return i # _after_ the command
				981	return -1
				982	return -1
				983
				984
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	985	# things that are special to LaTeX, but not to texi..
				986	onlylatexspecial = '_~^$#&%'
				987
Guido van Rossum	23301a9	1993-05-24 14:19:37 +0000	[diff] [blame]	988	class Struct: pass
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	989
				990	hist = Struct()
				991	out = Struct()
				992
				993	def startchange():
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	994	global hist, out
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	995
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	996	hist.inenv = []
				997	hist.nodenames = []
				998	hist.cindex = []
				999	hist.inargs = 0
				1000	hist.enumeratenesting, hist.itemizenesting = 0, 0
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1001
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1002	out.doublenodes = []
				1003	out.doublecindeces = []
				1004
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1005
				1006	spacech = [chunk(PLAIN, 0, ' ')]
				1007	commach = [chunk(PLAIN, 0, ', ')]
				1008	cindexch = [chunk(CSLINE, 0, 'cindex')]
				1009
				1010	# the standard variation in symbols for itemize
				1011	itemizesymbols = ['bullet', 'minus', 'dots']
				1012
				1013	# same for enumerate
				1014	enumeratesymbols = ['1', 'A', 'a']
				1015
				1016	##
				1017	## \begin{ {func,data,exc}desc }{name}...
				1018	## the resulting texi-code is dependent on the contents of indexsubitem
				1019	##
				1020
				1021	# indexsubitem: `['XXX', 'function']
				1022	# funcdesc:
				1023	# deffn {`idxsi`} NAME (FUNCARGS)
				1024
				1025	# indexsubitem: `['XXX', 'method']`
				1026	# funcdesc:
				1027	# defmethod {`idxsi[0]`} NAME (FUNCARGS)
				1028
				1029	# indexsubitem: `['in', 'module', 'MODNAME']'
				1030	# datadesc:
				1031	# defcv data {`idxsi[1:]`} NAME
				1032	# excdesc:
				1033	# defcv exception {`idxsi[1:]`} NAME
				1034	# funcdesc:
				1035	# deffn {function of `idxsi[1:]`} NAME (FUNCARGS)
				1036
				1037	# indexsubitem: `['OBJECT', 'attribute']'
				1038	# datadesc
				1039	# defcv attribute {`OBJECT`} NAME
				1040
				1041
				1042	## this routine will be called on \begin{funcdesc}{NAME}{ARGS}
				1043	## or \funcline{NAME}{ARGS}
				1044	##
				1045	def do_funcdesc(length, buf, pp, i):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1046	startpoint = i-1
				1047	ch = pp[startpoint]
				1048	wh = ch.where
				1049	length, newi = getnextarg(length, buf, pp, i)
				1050	funcname = chunk(GROUP, wh, pp[i:newi])
				1051	del pp[i:newi]
				1052	length = length - (newi-i)
				1053	save = hist.inargs
				1054	hist.inargs = 1
				1055	length, newi = getnextarg(length, buf, pp, i)
				1056	hist.inargs = save
				1057	del save
				1058	the_args = [chunk(PLAIN, wh, '()'[0])] + pp[i:newi] + \
				1059	[chunk(PLAIN, wh, '()'[1])]
				1060	del pp[i:newi]
				1061	length = length - (newi-i)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1062
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1063	idxsi = hist.indexsubitem # words
				1064	command = ''
				1065	cat_class = ''
				1066	if idxsi and idxsi[-1] in ('method', 'protocol'):
				1067	command = 'defmethod'
				1068	cat_class = string.join(idxsi[:-1])
				1069	elif len(idxsi) == 2 and idxsi[1] == 'function':
				1070	command = 'deffn'
				1071	cat_class = string.join(idxsi)
				1072	elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
				1073	command = 'deffn'
				1074	cat_class = 'function of ' + string.join(idxsi[1:])
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1075
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1076	if not command:
				1077	raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1078
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1079	ch.chtype = chunk_type(CSLINE)
				1080	ch.data = command
				1081
				1082	cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
				1083	cslinearg.append(chunk(PLAIN, wh, ' '))
				1084	cslinearg.append(funcname)
				1085	cslinearg.append(chunk(PLAIN, wh, ' '))
				1086	l = len(cslinearg)
				1087	cslinearg[l:l] = the_args
				1088
				1089	pp.insert(i, chunk(GROUP, wh, cslinearg))
				1090	i, length = i+1, length+1
				1091	hist.command = command
				1092	return length, i
				1093
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1094
				1095	## this routine will be called on \begin{excdesc}{NAME}
				1096	## or \excline{NAME}
				1097	##
				1098	def do_excdesc(length, buf, pp, i):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1099	startpoint = i-1
				1100	ch = pp[startpoint]
				1101	wh = ch.where
				1102	length, newi = getnextarg(length, buf, pp, i)
				1103	excname = chunk(GROUP, wh, pp[i:newi])
				1104	del pp[i:newi]
				1105	length = length - (newi-i)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1106
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1107	idxsi = hist.indexsubitem # words
				1108	command = ''
				1109	cat_class = ''
				1110	class_class = ''
				1111	if len(idxsi) == 2 and idxsi[1] == 'exception':
				1112	command = 'defvr'
				1113	cat_class = string.join(idxsi)
				1114	elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
				1115	command = 'defcv'
				1116	cat_class = 'exception'
				1117	class_class = string.join(idxsi[1:])
				1118	elif len(idxsi) == 4 and idxsi[:3] == ['exception', 'in', 'module']:
				1119	command = 'defcv'
				1120	cat_class = 'exception'
				1121	class_class = string.join(idxsi[2:])
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1122
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1123
				1124	if not command:
				1125	raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
				1126
				1127	ch.chtype = chunk_type(CSLINE)
				1128	ch.data = command
				1129
				1130	cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
				1131	cslinearg.append(chunk(PLAIN, wh, ' '))
				1132	if class_class:
				1133	cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1134	cslinearg.append(chunk(PLAIN, wh, ' '))
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1135	cslinearg.append(excname)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1136
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1137	pp.insert(i, chunk(GROUP, wh, cslinearg))
				1138	i, length = i+1, length+1
				1139	hist.command = command
				1140	return length, i
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1141
				1142	## same for datadesc or dataline...
				1143	def do_datadesc(length, buf, pp, i):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1144	startpoint = i-1
				1145	ch = pp[startpoint]
				1146	wh = ch.where
				1147	length, newi = getnextarg(length, buf, pp, i)
				1148	dataname = chunk(GROUP, wh, pp[i:newi])
				1149	del pp[i:newi]
				1150	length = length - (newi-i)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1151
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1152	idxsi = hist.indexsubitem # words
				1153	command = ''
				1154	cat_class = ''
				1155	class_class = ''
				1156	if idxsi[-1] in ('attribute', 'option'):
				1157	command = 'defcv'
				1158	cat_class = idxsi[-1]
				1159	class_class = string.join(idxsi[:-1])
				1160	elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
				1161	command = 'defcv'
				1162	cat_class = 'data'
				1163	class_class = string.join(idxsi[1:])
				1164	elif len(idxsi) == 4 and idxsi[:3] == ['data', 'in', 'module']:
				1165	command = 'defcv'
				1166	cat_class = 'data'
				1167	class_class = string.join(idxsi[2:])
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1168
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1169
				1170	if not command:
				1171	raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
				1172
				1173	ch.chtype = chunk_type(CSLINE)
				1174	ch.data = command
				1175
				1176	cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
				1177	cslinearg.append(chunk(PLAIN, wh, ' '))
				1178	if class_class:
				1179	cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1180	cslinearg.append(chunk(PLAIN, wh, ' '))
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1181	cslinearg.append(dataname)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1182
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1183	pp.insert(i, chunk(GROUP, wh, cslinearg))
				1184	i, length = i+1, length+1
				1185	hist.command = command
				1186	return length, i
				1187
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1188
				1189	# regular indices: those that are not set in tt font by default....
				1190	regindices = ('cindex', )
				1191
				1192	# remove illegal characters from node names
				1193	def rm_commas_etc(text):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1194	result = ''
				1195	changed = 0
				1196	while 1:
				1197	pos = re_commas_etc.search(text)
				1198	if pos >= 0:
				1199	changed = 1
				1200	result = result + text[:pos]
				1201	text = text[pos+1:]
				1202	else:
				1203	result = result + text
				1204	break
				1205	if changed:
				1206	print 'Warning: nodename changhed to ' + `result`
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1207
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1208	return result
				1209
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1210	# boolean flags
				1211	flags = {'texi': 1}
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1212
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1213
				1214	##
				1215	## changeit: the actual routine, that changes the contents of the parsed
				1216	## chunks
				1217	##
				1218
				1219	def changeit(buf, pp):
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1220	global onlylatexspecial, hist, out
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1221
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1222	i, length = 0, len(pp)
				1223	while 1:
				1224	# sanity check: length should always equal len(pp)
				1225	if len(pp) != length:
				1226	raise 'FATAL', 'inconsistent length. thought ' + `length` + ', but should really be ' + `len(pp)`
				1227	if i >= length:
				1228	break
				1229	ch = pp[i]
				1230	i = i + 1
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1231
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1232	if type(ch) is StringType:
				1233	#normally, only chunks are present in pp,
				1234	# but in some cases, some extra info
				1235	# has been inserted, e.g., the \end{...} clauses
				1236	raise 'FATAL', 'got string, probably too many ' + `end`
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1237
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1238	if ch.chtype == chunk_type(GROUP):
				1239	# check for {\em ...} constructs
				1240	if ch.data and \
				1241	ch.data[0].chtype == chunk_type(CSNAME) and \
				1242	s(buf, ch.data[0].data) in fontchanges.keys():
				1243	k = s(buf, ch.data[0].data)
				1244	del ch.data[0]
				1245	pp.insert(i-1, chunk(CSNAME, ch.where, fontchanges[k]))
				1246	length, i = length+1, i+1
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1247
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1248	# recursively parse the contents of the group
				1249	changeit(buf, ch.data)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1250
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1251	elif ch.chtype == chunk_type(IF):
				1252	# \if...
				1253	flag, negate, data = ch.data
				1254	##print 'IF: flag, negate = ' + `flag, negate`
				1255	if flag not in flags.keys():
				1256	raise error, 'unknown flag ' + `flag`
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1257
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1258	value = flags[flag]
				1259	if negate:
				1260	value = (not value)
				1261	del pp[i-1]
				1262	length, i = length-1, i-1
				1263	if value:
				1264	pp[i:i] = data
				1265	length = length + len(data)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1266
				1267
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1268	elif ch.chtype == chunk_type(ENV):
				1269	# \begin{...} ....
				1270	envname, data = ch.data
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1271
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1272	#push this environment name on stack
				1273	hist.inenv.insert(0, envname)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1274
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1275	#append an endenv chunk after grouped data
				1276	data.append(chunk(ENDENV, ch.where, envname))
				1277	##[`data`]
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1278
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1279	#delete this object
				1280	del pp[i-1]
				1281	i, length = i-1, length-1
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1282
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1283	#insert found data
				1284	pp[i:i] = data
				1285	length = length + len(data)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1286
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1287	if envname == 'verbatim':
				1288	pp[i:i] = [chunk(CSLINE, ch.where, 'example'),
				1289	chunk(GROUP, ch.where, [])]
				1290	length, i = length+2, i+2
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1291
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1292	elif envname == 'itemize':
				1293	if hist.itemizenesting > len(itemizesymbols):
				1294	raise error, 'too deep itemize nesting'
				1295	ingroupch = [chunk(CSNAME, ch.where,
				1296	itemizesymbols[hist.itemizenesting])]
				1297	hist.itemizenesting = hist.itemizenesting + 1
				1298	pp[i:i] = [chunk(CSLINE, ch.where, 'itemize'),
				1299	chunk(GROUP, ch.where, ingroupch)]
				1300	length, i = length+2, i+2
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1301
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1302	elif envname == 'enumerate':
				1303	if hist.enumeratenesting > len(enumeratesymbols):
				1304	raise error, 'too deep enumerate nesting'
				1305	ingroupch = [chunk(PLAIN, ch.where,
				1306	enumeratesymbols[hist.enumeratenesting])]
				1307	hist.enumeratenesting = hist.enumeratenesting + 1
				1308	pp[i:i] = [chunk(CSLINE, ch.where, 'enumerate'),
				1309	chunk(GROUP, ch.where, ingroupch)]
				1310	length, i = length+2, i+2
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1311
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1312	elif envname == 'description':
				1313	ingroupch = [chunk(CSNAME, ch.where, 'b')]
				1314	pp[i:i] = [chunk(CSLINE, ch.where, 'table'),
				1315	chunk(GROUP, ch.where, ingroupch)]
				1316	length, i = length+2, i+2
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1317
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1318	elif (envname == 'tableiii') or (envname == 'tableii'):
				1319	if (envname == 'tableii'):
				1320	ltable = 2
				1321	else:
				1322	ltable = 3
				1323	wh = ch.where
				1324	newcode = []
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1325
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1326	#delete tabular format description
				1327	# e.g., {\|l\|c\|l\|}
				1328	length, newi = getnextarg(length, buf, pp, i)
				1329	del pp[i:newi]
				1330	length = length - (newi-i)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1331
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1332	newcode.append(chunk(CSLINE, wh, 'table'))
				1333	ingroupch = [chunk(CSNAME, wh, 'asis')]
				1334	newcode.append(chunk(GROUP, wh, ingroupch))
				1335	newcode.append(chunk(CSLINE, wh, 'item'))
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1336
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1337	#get the name of macro for @item
				1338	# e.g., {code}
				1339	length, newi = getnextarg(length, buf, pp, i)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1340
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1341	if newi-i != 1:
				1342	raise error, 'Sorry, expected 1 chunk argument'
				1343	if pp[i].chtype != chunk_type(PLAIN):
				1344	raise error, 'Sorry, expected plain text argument'
				1345	hist.itemargmacro = s(buf, pp[i].data)
				1346	del pp[i:newi]
				1347	length = length - (newi-i)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1348
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1349	itembody = []
				1350	for count in range(ltable):
				1351	length, newi = getnextarg(length, buf, pp, i)
				1352	emphgroup = [
				1353	chunk(CSNAME, wh, 'emph'),
				1354	chunk(GROUP, 0, pp[i:newi])]
				1355	del pp[i:newi]
				1356	length = length - (newi-i)
				1357	if count == 0:
				1358	itemarg = emphgroup
				1359	elif count == ltable-1:
				1360	itembody = itembody + \
				1361	[chunk(PLAIN, wh, ' --- ')] + emphgroup
				1362	else:
				1363	itembody = emphgroup
				1364	newcode.append(chunk(GROUP, wh, itemarg))
				1365	newcode = newcode + itembody + [chunk(DENDLINE, wh, '\n')]
				1366	pp[i:i] = newcode
				1367	l = len(newcode)
				1368	length, i = length+l, i+l
				1369	del newcode, l
				1370
				1371	if length != len(pp):
				1372	raise 'STILL, SOMETHING wrong', `i`
				1373
				1374
				1375	elif envname == 'funcdesc':
				1376	pp.insert(i, chunk(PLAIN, ch.where, ''))
				1377	i, length = i+1, length+1
				1378	length, i = do_funcdesc(length, buf, pp, i)
				1379
				1380	elif envname == 'excdesc':
				1381	pp.insert(i, chunk(PLAIN, ch.where, ''))
				1382	i, length = i+1, length+1
				1383	length, i = do_excdesc(length, buf, pp, i)
				1384
				1385	elif envname == 'datadesc':
				1386	pp.insert(i, chunk(PLAIN, ch.where, ''))
				1387	i, length = i+1, length+1
				1388	length, i = do_datadesc(length, buf, pp, i)
				1389
				1390	else:
				1391	print 'WARNING: don\'t know what to do with env ' + `envname`
				1392
				1393	elif ch.chtype == chunk_type(ENDENV):
				1394	envname = ch.data
				1395	if envname != hist.inenv[0]:
				1396	raise error, '\'end\' does not match. Name ' + `envname` + ', expected ' + `hist.inenv[0]`
				1397	del hist.inenv[0]
				1398	del pp[i-1]
				1399	i, length = i-1, length-1
				1400
				1401	if envname == 'verbatim':
				1402	pp[i:i] = [
				1403	chunk(CSLINE, ch.where, 'end'),
				1404	chunk(GROUP, ch.where, [
				1405	chunk(PLAIN, ch.where, 'example')])]
				1406	i, length = i+2, length+2
				1407	elif envname == 'itemize':
				1408	hist.itemizenesting = hist.itemizenesting - 1
				1409	pp[i:i] = [
				1410	chunk(CSLINE, ch.where, 'end'),
				1411	chunk(GROUP, ch.where, [
				1412	chunk(PLAIN, ch.where, 'itemize')])]
				1413	i, length = i+2, length+2
				1414	elif envname == 'enumerate':
				1415	hist.enumeratenesting = hist.enumeratenesting-1
				1416	pp[i:i] = [
				1417	chunk(CSLINE, ch.where, 'end'),
				1418	chunk(GROUP, ch.where, [
				1419	chunk(PLAIN, ch.where, 'enumerate')])]
				1420	i, length = i+2, length+2
				1421	elif envname == 'description':
				1422	pp[i:i] = [
				1423	chunk(CSLINE, ch.where, 'end'),
				1424	chunk(GROUP, ch.where, [
				1425	chunk(PLAIN, ch.where, 'table')])]
				1426	i, length = i+2, length+2
				1427	elif (envname == 'tableiii') or (envname == 'tableii'):
				1428	pp[i:i] = [
				1429	chunk(CSLINE, ch.where, 'end'),
				1430	chunk(GROUP, ch.where, [
				1431	chunk(PLAIN, ch.where, 'table')])]
				1432	i, length = i+2, length + 2
				1433	pp.insert(i, chunk(DENDLINE, ch.where, '\n'))
				1434	i, length = i+1, length+1
				1435
				1436	elif envname in ('funcdesc', 'excdesc', 'datadesc'):
				1437	pp[i:i] = [
				1438	chunk(CSLINE, ch.where, 'end'),
				1439	chunk(GROUP, ch.where, [
				1440	chunk(PLAIN, ch.where, hist.command)])]
				1441	i, length = i+2, length+2
				1442	else:
				1443	print 'WARNING: ending env ' + `envname` + 'has no actions'
				1444
				1445	elif ch.chtype == chunk_type(CSNAME):
				1446	# control name transformations
				1447	if s(buf, ch.data) == 'optional':
				1448	pp[i-1].chtype = chunk_type (PLAIN)
				1449	pp[i-1].data = '['
				1450	if (i < length) and \
				1451	(pp[i].chtype == chunk_type(GROUP)):
				1452	cp=pp[i].data
				1453	pp[i:i+1]=cp + [
				1454	chunk(PLAIN, ch.where, ']')]
				1455	length = length+len(cp)
				1456	elif s(buf, ch.data) in ignoredcommands:
				1457	del pp[i-1]
				1458	i, length = i-1, length-1
				1459	elif s(buf, ch.data) == '@' and \
				1460	i != length and \
				1461	pp[i].chtype == chunk_type(PLAIN) and \
				1462	s(buf, pp[i].data)[0] == '.':
				1463	# \@. --> \. --> @.
				1464	ch.data = '.'
				1465	del pp[i]
				1466	length = length-1
				1467	elif s(buf, ch.data) == '\\':
				1468	# \\ --> \* --> @*
				1469	ch.data = '*'
				1470	elif len(s(buf, ch.data)) == 1 and \
				1471	s(buf, ch.data) in onlylatexspecial:
				1472	ch.chtype = chunk_type(PLAIN)
				1473	# check if such a command is followed by
				1474	# an empty group: e.g., `\%{}'. If so, remove
				1475	# this empty group too
				1476	if i < length and \
				1477	pp[i].chtype == chunk_type(GROUP) \
				1478	and len(pp[i].data) == 0:
				1479	del pp[i]
				1480	length = length-1
				1481
				1482	elif hist.inargs and s(buf, ch.data) in inargsselves:
				1483	# This is the special processing of the
				1484	# arguments of the \begin{funcdesc}... or
				1485	# \funcline... arguments
				1486	# \, --> , \[ --> [, \] --> ]
				1487	ch.chtype = chunk_type(PLAIN)
				1488
				1489	elif s(buf, ch.data) == 'renewcommand':
				1490	# \renewcommand{\indexsubitem}....
				1491	i, length = i-1, length-1
				1492	del pp[i]
				1493	length, newi = getnextarg(length, buf, pp, i)
				1494	if newi-i == 1 \
				1495	and i < length \
				1496	and pp[i].chtype == chunk_type(CSNAME) \
				1497	and s(buf, pp[i].data) == 'indexsubitem':
				1498	del pp[i:newi]
				1499	length = length - (newi-i)
				1500	length, newi = getnextarg(length, buf, pp, i)
				1501	text = flattext(buf, pp[i:newi])
				1502	if text[:1] != '(' or text[-1:] != ')':
				1503	raise error, 'expected indexsubitme enclosed in braces'
				1504	words = string.split(text[1:-1])
				1505	hist.indexsubitem = words
				1506	del text, words
				1507	else:
				1508	print 'WARNING: renewcommand with unsupported arg removed'
				1509	del pp[i:newi]
				1510	length = length - (newi-i)
				1511
				1512	elif s(buf, ch.data) == 'item':
				1513	ch.chtype = chunk_type(CSLINE)
				1514	length, newi = getoptarg(length, buf, pp, i)
				1515	ingroupch = pp[i:newi]
				1516	del pp[i:newi]
				1517	length = length - (newi-i)
				1518	pp.insert(i, chunk(GROUP, ch.where, ingroupch))
				1519	i, length = i+1, length+1
				1520
				1521	elif s(buf, ch.data) == 'ttindex':
				1522	idxsi = hist.indexsubitem
				1523
				1524	cat_class = ''
				1525	if len(idxsi) >= 2 and idxsi[1] in \
				1526	('method', 'function', 'protocol'):
				1527	command = 'findex'
				1528	elif len(idxsi) >= 2 and idxsi[1] in \
				1529	('exception', 'object'):
				1530	command = 'vindex'
				1531	else:
				1532	print 'WARNING: can\'t categorize ' + `idxsi` + ' for \'ttindex\' command'
				1533	command = 'cindex'
				1534
				1535	if not cat_class:
				1536	cat_class = '('+string.join(idxsi)+')'
				1537
				1538	ch.chtype = chunk_type(CSLINE)
				1539	ch.data = command
				1540
				1541	length, newi = getnextarg(length, buf, pp, i)
				1542	arg = pp[i:newi]
				1543	del pp[i:newi]
				1544	length = length - (newi-i)
				1545
				1546	cat_arg = [chunk(PLAIN, ch.where, cat_class)]
				1547
				1548	# determine what should be set in roman, and
				1549	# what in tt-font
				1550	if command in regindices:
				1551
				1552	arg = [chunk(CSNAME, ch.where, 't'),
				1553	chunk(GROUP, ch.where, arg)]
				1554	else:
				1555	cat_arg = [chunk(CSNAME, ch.where, 'r'),
				1556	chunk(GROUP, ch.where, cat_arg)]
				1557
				1558	ingroupch = arg + \
				1559	[chunk(PLAIN, ch.where, ' ')] + \
				1560	cat_arg
				1561
				1562	pp.insert(i, chunk(GROUP, ch.where, ingroupch))
				1563	length, i = length+1, i+1
				1564
				1565
				1566	elif s(buf, ch.data) == 'ldots':
				1567	# \ldots --> \dots{} --> @dots{}
				1568	ch.data = 'dots'
				1569	if i == length \
				1570	or pp[i].chtype != chunk_type(GROUP) \
				1571	or pp[i].data != []:
				1572	pp.insert(i, chunk(GROUP, ch.where, []))
				1573	i, length = i+1, length+1
				1574	elif s(buf, ch.data) in wordsselves:
				1575	# \UNIX --> UNIX
				1576	ch.chtype = chunk_type(PLAIN)
				1577	if i != length \
				1578	and pp[i].chtype == chunk_type(GROUP) \
				1579	and pp[i].data == []:
				1580	del pp[i]
				1581	length = length-1
				1582	elif s(buf, ch.data) in for_texi:
				1583	pass
				1584
				1585	elif s(buf, ch.data) == 'e':
				1586	# "\e" --> "\"
				1587	ch.data = '\\'
				1588	ch.chtype = chunk_type(PLAIN)
				1589	elif (s(buf, ch.data) == 'lineiii') or\
				1590	(s(buf, ch.data) == 'lineii'):
				1591	# This is the most tricky one
				1592	# \lineiii{a1}{a2}[{a3}] -->
				1593	# @item @<cts. of itemargmacro>{a1}
				1594	# a2 [ -- a3]
				1595	#
				1596	##print 'LINEIIIIII!!!!!!!'
Guido van Rossum	49604d3	1996-09-10 22:19:51 +0000	[diff] [blame]	1597	## wobj = Wobj()
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1598	## dumpit(buf, wobj.write, pp[i-1:i+5])
				1599	## print '--->' + wobj.data + '<----'
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1600	if not hist.inenv:
				1601	raise error, 'no environment for lineiii'
				1602	if (hist.inenv[0] != 'tableiii') and \
				1603	(hist.inenv[0] != 'tableii'):
				1604	raise error, \
				1605	'wrong command (' + \
				1606	s(buf, ch.data)+ \
				1607	') in wrong environment (' \
				1608	+ `hist.inenv[0]` + ')'
				1609	ch.chtype = chunk_type(CSLINE)
				1610	ch.data = 'item'
				1611	length, newi = getnextarg(length, buf, pp, i)
				1612	ingroupch = [chunk(CSNAME, 0,
				1613	hist.itemargmacro),
				1614	chunk(GROUP, 0, pp[i:newi])]
				1615	del pp[i:newi]
				1616	length = length - (newi-i)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1617	## print 'ITEM ARG: --->',
Guido van Rossum	49604d3	1996-09-10 22:19:51 +0000	[diff] [blame]	1618	## wobj = Wobj()
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1619	## dumpit(buf, wobj.write, ingroupch)
				1620	## print wobj.data, '<---'
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1621	pp.insert(i, chunk(GROUP, ch.where, ingroupch))
				1622	grouppos = i
				1623	i, length = i+1, length+1
				1624	length, i = getnextarg(length, buf, pp, i)
				1625	length, newi = getnextarg(length, buf, pp, i)
				1626	if newi > i:
				1627	# we have a 3rd arg
				1628	pp.insert(i, chunk(PLAIN, ch.where, ' --- '))
				1629	i = newi + 1
				1630	length = length + 1
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1631	## pp[grouppos].data = pp[grouppos].data \
				1632	## + [chunk(PLAIN, ch.where, ' ')] \
				1633	## + pp[i:newi]
				1634	## del pp[i:newi]
				1635	## length = length - (newi-i)
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1636	if length != len(pp):
				1637	raise 'IN LINEIII IS THE ERR', `i`
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1638
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1639	elif s(buf, ch.data) in ('chapter', 'section', 'subsection', 'subsubsection'):
				1640	#\xxxsection{A} ---->
				1641	# @node A, , ,
				1642	# @xxxsection A
				1643	## also: remove commas and quotes
				1644	ch.chtype = chunk_type(CSLINE)
				1645	length, newi = getnextarg(length, buf, pp, i)
				1646	afternodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
				1647	if afternodenamecmd < 0:
				1648	cp1 = crcopy(pp[i:newi])
				1649	pp[i:newi] = [
				1650	chunk(GROUP, ch.where,
				1651	pp[i:newi])]
				1652	length, newi = length - (newi-i) + 1, i+1
				1653	text = flattext(buf, cp1)
				1654	text = invent_node_names(text)
				1655	else:
				1656	length, endarg = getnextarg(length, buf, pp, afternodenamecmd)
				1657	cp1 = crcopy(pp[afternodenamecmd:endarg])
				1658	del pp[newi:endarg]
				1659	length = length - (endarg-newi)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1660
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1661	pp[i:newi] = [
				1662	chunk(GROUP, ch.where,
				1663	pp[i:newi])]
				1664	length, newi = length - (newi-i) + 1, i + 1
				1665	text = flattext(buf, cp1)
				1666	if text[-1] == '.':
				1667	text = text[:-1]
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	1668	## print 'FLATTEXT:', `text`
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1669	if text in hist.nodenames:
				1670	print 'WARNING: node name ' + `text` + ' already used'
				1671	out.doublenodes.append(text)
				1672	else:
				1673	hist.nodenames.append(text)
				1674	text = rm_commas_etc(text)
				1675	pp[i-1:i-1] = [
				1676	chunk(CSLINE, ch.where, 'node'),
				1677	chunk(GROUP, ch.where, [
				1678	chunk(PLAIN, ch.where, text+', , ,')
				1679	])]
				1680	i, length = newi+2, length+2
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1681
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1682	elif s(buf,ch.data) == 'funcline':
				1683	# fold it to a very short environment
				1684	pp[i-1:i-1] = [
				1685	chunk(CSLINE, ch.where, 'end'),
				1686	chunk(GROUP, ch.where, [
				1687	chunk(PLAIN, ch.where, hist.command)])]
				1688	i, length = i+2, length+2
				1689	length, i = do_funcdesc(length, buf, pp, i)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1690
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1691	elif s(buf,ch.data) == 'dataline':
				1692	pp[i-1:i-1] = [
				1693	chunk(CSLINE, ch.where, 'end'),
				1694	chunk(GROUP, ch.where, [
				1695	chunk(PLAIN, ch.where, hist.command)])]
				1696	i, length = i+2, length+2
				1697	length, i = do_datadesc(length, buf, pp, i)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1698
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1699	elif s(buf,ch.data) == 'excline':
				1700	pp[i-1:i-1] = [
				1701	chunk(CSLINE, ch.where, 'end'),
				1702	chunk(GROUP, ch.where, [
				1703	chunk(PLAIN, ch.where, hist.command)])]
				1704	i, length = i+2, length+2
				1705	length, i = do_excdesc(length, buf, pp, i)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1706
				1707
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1708	elif s(buf, ch.data) == 'index':
				1709	#\index{A} --->
				1710	# @cindex A
				1711	ch.chtype = chunk_type(CSLINE)
				1712	ch.data = 'cindex'
				1713	length, newi = getnextarg(length, buf, pp, i)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1714
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1715	ingroupch = pp[i:newi]
				1716	del pp[i:newi]
				1717	length = length - (newi-i)
				1718	pp.insert(i, chunk(GROUP, ch.where, ingroupch))
				1719	length, i = length+1, i+1
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1720
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1721	elif s(buf, ch.data) == 'bifuncindex':
				1722	ch.chtype = chunk_type(CSLINE)
				1723	ch.data = 'findex'
				1724	length, newi = getnextarg(length, buf, pp, i)
				1725	ingroupch = pp[i:newi]
				1726	del pp[i:newi]
				1727	length = length - (newi-i)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1728
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1729	ingroupch.append(chunk(PLAIN, ch.where, ' '))
				1730	ingroupch.append(chunk(CSNAME, ch.where, 'r'))
				1731	ingroupch.append(chunk(GROUP, ch.where, [
				1732	chunk(PLAIN, ch.where,
				1733	'(built-in function)')]))
				1734
				1735	pp.insert(i, chunk(GROUP, ch.where, ingroupch))
				1736	length, i = length+1, i+1
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1737
				1738
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1739	elif s(buf, ch.data) == 'obindex':
				1740	ch.chtype = chunk_type(CSLINE)
				1741	ch.data = 'findex'
				1742	length, newi = getnextarg(length, buf, pp, i)
				1743	ingroupch = pp[i:newi]
				1744	del pp[i:newi]
				1745	length = length - (newi-i)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1746
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1747	ingroupch.append(chunk(PLAIN, ch.where, ' '))
				1748	ingroupch.append(chunk(CSNAME, ch.where, 'r'))
				1749	ingroupch.append(chunk(GROUP, ch.where, [
				1750	chunk(PLAIN, ch.where,
				1751	'(object)')]))
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1752
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1753	pp.insert(i, chunk(GROUP, ch.where, ingroupch))
				1754	length, i = length+1, i+1
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1755
				1756
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1757	elif s(buf, ch.data) == 'opindex':
				1758	ch.chtype = chunk_type(CSLINE)
				1759	ch.data = 'findex'
				1760	length, newi = getnextarg(length, buf, pp, i)
				1761	ingroupch = pp[i:newi]
				1762	del pp[i:newi]
				1763	length = length - (newi-i)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1764
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1765	ingroupch.append(chunk(PLAIN, ch.where, ' '))
				1766	ingroupch.append(chunk(CSNAME, ch.where, 'r'))
				1767	ingroupch.append(chunk(GROUP, ch.where, [
				1768	chunk(PLAIN, ch.where,
				1769	'(operator)')]))
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1770
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1771	pp.insert(i, chunk(GROUP, ch.where, ingroupch))
				1772	length, i = length+1, i+1
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1773
				1774
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1775	elif s(buf, ch.data) == 'bimodindex':
				1776	ch.chtype = chunk_type(CSLINE)
				1777	ch.data = 'pindex'
				1778	length, newi = getnextarg(length, buf, pp, i)
				1779	ingroupch = pp[i:newi]
				1780	del pp[i:newi]
				1781	length = length - (newi-i)
				1782
				1783	ingroupch.append(chunk(PLAIN, ch.where, ' '))
				1784	ingroupch.append(chunk(CSNAME, ch.where, 'r'))
				1785	ingroupch.append(chunk(GROUP, ch.where, [
				1786	chunk(PLAIN, ch.where,
				1787	'(built-in)')]))
				1788
				1789	pp.insert(i, chunk(GROUP, ch.where, ingroupch))
				1790	length, i = length+1, i+1
				1791
				1792	elif s(buf, ch.data) == 'sectcode':
				1793	ch.data = 'code'
				1794
				1795
				1796	elif s(buf, ch.data) == 'stmodindex':
				1797	ch.chtype = chunk_type(CSLINE)
				1798	# use the program index as module index
				1799	ch.data = 'pindex'
				1800	length, newi = getnextarg(length, buf, pp, i)
				1801	ingroupch = pp[i:newi]
				1802	del pp[i:newi]
				1803	length = length - (newi-i)
				1804
				1805	ingroupch.append(chunk(PLAIN, ch.where, ' '))
				1806	ingroupch.append(chunk(CSNAME, ch.where, 'r'))
				1807	ingroupch.append(chunk(GROUP, ch.where, [
				1808	chunk(PLAIN, ch.where,
				1809	'(standard)')]))
				1810
				1811	pp.insert(i, chunk(GROUP, ch.where, ingroupch))
				1812	length, i = length+1, i+1
				1813
				1814
				1815	elif s(buf, ch.data) == 'stindex':
				1816	# XXX must actually go to newindex st
				1817	wh = ch.where
				1818	ch.chtype = chunk_type(CSLINE)
				1819	ch.data = 'cindex'
				1820	length, newi = getnextarg(length, buf, pp, i)
				1821	ingroupch = [chunk(CSNAME, wh, 'code'),
				1822	chunk(GROUP, wh, pp[i:newi])]
				1823
				1824	del pp[i:newi]
				1825	length = length - (newi-i)
				1826
				1827	t = ingroupch[:]
				1828	t.append(chunk(PLAIN, wh, ' statement'))
				1829
				1830	pp.insert(i, chunk(GROUP, wh, t))
				1831	i, length = i+1, length+1
				1832
				1833	pp.insert(i, chunk(CSLINE, wh, 'cindex'))
				1834	i, length = i+1, length+1
				1835
				1836	t = ingroupch[:]
				1837	t.insert(0, chunk(PLAIN, wh, 'statement, '))
				1838
				1839	pp.insert(i, chunk(GROUP, wh, t))
				1840	i, length = i+1, length+1
				1841
				1842
				1843	elif s(buf, ch.data) == 'indexii':
				1844	#\indexii{A}{B} --->
				1845	# @cindex A B
				1846	# @cindex B, A
				1847	length, newi = getnextarg(length, buf, pp, i)
				1848	cp11 = pp[i:newi]
				1849	cp21 = crcopy(pp[i:newi])
				1850	del pp[i:newi]
				1851	length = length - (newi-i)
				1852	length, newi = getnextarg(length, buf, pp, i)
				1853	cp12 = pp[i:newi]
				1854	cp22 = crcopy(pp[i:newi])
				1855	del pp[i:newi]
				1856	length = length - (newi-i)
				1857
				1858	ch.chtype = chunk_type(CSLINE)
				1859	ch.data = 'cindex'
				1860	pp.insert(i, chunk(GROUP, ch.where, cp11 + [
				1861	chunk(PLAIN, ch.where, ' ')] + cp12))
				1862	i, length = i+1, length+1
				1863	pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'),
				1864	chunk(GROUP, ch.where, cp22 + [
				1865	chunk(PLAIN, ch.where, ', ')]+ cp21)]
				1866	i, length = i+2, length+2
				1867
				1868	elif s(buf, ch.data) == 'indexiii':
				1869	length, newi = getnextarg(length, buf, pp, i)
				1870	cp11 = pp[i:newi]
				1871	cp21 = crcopy(pp[i:newi])
				1872	cp31 = crcopy(pp[i:newi])
				1873	del pp[i:newi]
				1874	length = length - (newi-i)
				1875	length, newi = getnextarg(length, buf, pp, i)
				1876	cp12 = pp[i:newi]
				1877	cp22 = crcopy(pp[i:newi])
				1878	cp32 = crcopy(pp[i:newi])
				1879	del pp[i:newi]
				1880	length = length - (newi-i)
				1881	length, newi = getnextarg(length, buf, pp, i)
				1882	cp13 = pp[i:newi]
				1883	cp23 = crcopy(pp[i:newi])
				1884	cp33 = crcopy(pp[i:newi])
				1885	del pp[i:newi]
				1886	length = length - (newi-i)
				1887
				1888	ch.chtype = chunk_type(CSLINE)
				1889	ch.data = 'cindex'
				1890	pp.insert(i, chunk(GROUP, ch.where, cp11 + [
				1891	chunk(PLAIN, ch.where, ' ')] + cp12
				1892	+ [chunk(PLAIN, ch.where, ' ')]
				1893	+ cp13))
				1894	i, length = i+1, length+1
				1895	pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'),
				1896	chunk(GROUP, ch.where, cp22 + [
				1897	chunk(PLAIN, ch.where, ' ')]+ cp23
				1898	+ [chunk(PLAIN, ch.where, ', ')] +
				1899	cp21)]
				1900	i, length = i+2, length+2
				1901	pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'),
				1902	chunk(GROUP, ch.where, cp33 + [
				1903	chunk(PLAIN, ch.where, ', ')]+ cp31
				1904	+ [chunk(PLAIN, ch.where, ' ')] +
				1905	cp32)]
				1906	i, length = i+2, length+2
				1907
				1908
				1909	elif s(buf, ch.data) == 'indexiv':
				1910	length, newi = getnextarg(length, buf, pp, i)
				1911	cp11 = pp[i:newi]
				1912	cp21 = crcopy(pp[i:newi])
				1913	cp31 = crcopy(pp[i:newi])
				1914	cp41 = crcopy(pp[i:newi])
				1915	del pp[i:newi]
				1916	length = length - (newi-i)
				1917	length, newi = getnextarg(length, buf, pp, i)
				1918	cp12 = pp[i:newi]
				1919	cp22 = crcopy(pp[i:newi])
				1920	cp32 = crcopy(pp[i:newi])
				1921	cp42 = crcopy(pp[i:newi])
				1922	del pp[i:newi]
				1923	length = length - (newi-i)
				1924	length, newi = getnextarg(length, buf, pp, i)
				1925	cp13 = pp[i:newi]
				1926	cp23 = crcopy(pp[i:newi])
				1927	cp33 = crcopy(pp[i:newi])
				1928	cp43 = crcopy(pp[i:newi])
				1929	del pp[i:newi]
				1930	length = length - (newi-i)
				1931	length, newi = getnextarg(length, buf, pp, i)
				1932	cp14 = pp[i:newi]
				1933	cp24 = crcopy(pp[i:newi])
				1934	cp34 = crcopy(pp[i:newi])
				1935	cp44 = crcopy(pp[i:newi])
				1936	del pp[i:newi]
				1937	length = length - (newi-i)
				1938
				1939	ch.chtype = chunk_type(CSLINE)
				1940	ch.data = 'cindex'
				1941	ingroupch = cp11 + \
				1942	spacech + cp12 + \
				1943	spacech + cp13 + \
				1944	spacech + cp14
				1945	pp.insert(i, chunk(GROUP, ch.where, ingroupch))
				1946	i, length = i+1, length+1
				1947	ingroupch = cp22 + \
				1948	spacech + cp23 + \
				1949	spacech + cp24 + \
				1950	commach + cp21
				1951	pp[i:i] = cindexch + [
				1952	chunk(GROUP, ch.where, ingroupch)]
				1953	i, length = i+2, length+2
				1954	ingroupch = cp33 + \
				1955	spacech + cp34 + \
				1956	commach + cp31 + \
				1957	spacech + cp32
				1958	pp[i:i] = cindexch + [
				1959	chunk(GROUP, ch.where, ingroupch)]
				1960	i, length = i+2, length+2
				1961	ingroupch = cp44 + \
				1962	commach + cp41 + \
				1963	spacech + cp42 + \
				1964	spacech + cp43
				1965	pp[i:i] = cindexch + [
				1966	chunk(GROUP, ch.where, ingroupch)]
				1967	i, length = i+2, length+2
				1968
				1969
				1970
				1971	else:
				1972	print 'don\'t know what to do with keyword ' + `s(buf, ch.data)`
				1973
				1974
				1975
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1976	re_atsign = regex.compile('[@{}]')
				1977	re_newline = regex.compile('\n')
				1978
				1979	def dumpit(buf, wm, pp):
				1980
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1981	global out
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1982
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	1983	i, length = 0, len(pp)
				1984
				1985	addspace = 0
				1986
				1987	while 1:
				1988	if len(pp) != length:
				1989	raise 'FATAL', 'inconsistent length'
				1990	if i == length:
				1991	break
				1992	ch = pp[i]
				1993	i = i + 1
				1994
				1995	if addspace:
				1996	dospace = 1
				1997	addspace = 0
				1998	else:
				1999	dospace = 0
				2000
				2001	if ch.chtype == chunk_type(CSNAME):
				2002	wm('@' + s(buf, ch.data))
				2003	if s(buf, ch.data) == 'node' and \
				2004	pp[i].chtype == chunk_type(PLAIN) and \
				2005	s(buf, pp[i].data) in out.doublenodes:
				2006	##XXX doesnt work yet??
				2007	wm(' ZZZ-' + zfill(`i`, 4))
				2008	if s(buf, ch.data)[0] in string.letters:
				2009	addspace = 1
				2010	elif ch.chtype == chunk_type(PLAIN):
				2011	if dospace and s(buf, ch.data) not in (' ', '\t'):
				2012	wm(' ')
				2013	text = s(buf, ch.data)
				2014	while 1:
				2015	pos = re_atsign.search(text)
				2016	if pos < 0:
				2017	break
				2018	wm(text[:pos] + '@' + text[pos])
				2019	text = text[pos+1:]
				2020	wm(text)
				2021	elif ch.chtype == chunk_type(GROUP):
				2022	wm('{')
				2023	dumpit(buf, wm, ch.data)
				2024	wm('}')
				2025	elif ch.chtype == chunk_type(DENDLINE):
				2026	wm('\n\n')
				2027	while i != length and pp[i].chtype in \
				2028	(chunk_type(DENDLINE), chunk_type(ENDLINE)):
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	2029	i = i + 1
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	2030	elif ch.chtype == chunk_type(OTHER):
				2031	wm(s(buf, ch.data))
				2032	elif ch.chtype == chunk_type(ACTIVE):
				2033	wm(s(buf, ch.data))
				2034	elif ch.chtype == chunk_type(ENDLINE):
				2035	wm('\n')
				2036	elif ch.chtype == chunk_type(CSLINE):
				2037	if i >= 2 and pp[i-2].chtype not in \
				2038	(chunk_type(ENDLINE), chunk_type(DENDLINE)) \
				2039	and (pp[i-2].chtype != chunk_type(PLAIN)
				2040	or s(buf, pp[i-2].data)[-1] != '\n'):
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	2041
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	2042	wm('\n')
				2043	wm('@' + s(buf, ch.data))
				2044	if i == length:
				2045	raise error, 'CSLINE expected another chunk'
				2046	if pp[i].chtype != chunk_type(GROUP):
				2047	raise error, 'CSLINE expected GROUP'
				2048	if type(pp[i].data) != ListType:
				2049	raise error, 'GROUP chould contain []-data'
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	2050
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	2051	wobj = Wobj()
				2052	dumpit(buf, wobj.write, pp[i].data)
				2053	i = i + 1
				2054	text = wobj.data
				2055	del wobj
				2056	if text:
				2057	wm(' ')
				2058	while 1:
				2059	pos = re_newline.search(text)
				2060	if pos < 0:
				2061	break
				2062	print 'WARNING: found newline in csline arg'
				2063	wm(text[:pos] + ' ')
				2064	text = text[pos+1:]
				2065	wm(text)
				2066	if i >= length or \
				2067	pp[i].chtype not in (chunk_type(CSLINE),
				2068	chunk_type(ENDLINE), chunk_type(DENDLINE)) \
				2069	and (pp[i].chtype != chunk_type(PLAIN)
				2070	or s(buf, pp[i].data)[0] != '\n'):
				2071	wm('\n')
Guido van Rossum	49604d3	1996-09-10 22:19:51 +0000	[diff] [blame]	2072
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	2073	elif ch.chtype == chunk_type(COMMENT):
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	2074	## print 'COMMENT: previous chunk =', pp[i-2]
Guido van Rossum	557ed94	1995-03-28 13:33:45 +0000	[diff] [blame]	2075	## if pp[i-2].chtype == chunk_type(PLAIN):
				2076	## print 'PLAINTEXT =', `s(buf, pp[i-2].data)`
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	2077	if s(buf, ch.data) and \
				2078	regex.match('^[ \t]*$', s(buf, ch.data)) < 0:
				2079	if i >= 2 and pp[i-2].chtype not in \
				2080	(chunk_type(ENDLINE), chunk_type(DENDLINE)) \
				2081	and not (pp[i-2].chtype == chunk_type(PLAIN)
				2082	and regex.match('\$.\\\|\n\$[ \t]\n$', s(buf, pp[i-2].data)) >= 0):
				2083	print 'ADDING NEWLINE'
				2084	wm('\n')
				2085	wm('@c ' + s(buf, ch.data))
				2086	elif ch.chtype == chunk_type(IGNORE):
				2087	pass
				2088	else:
				2089	try:
				2090	str = `s(buf, ch.data)`
				2091	except TypeError:
				2092	str = `ch.data`
				2093	if len(str) > 400:
				2094	str = str[:400] + '...'
				2095	print 'warning:', ch.chtype, 'not handled, data ' + str
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	2096
				2097
				2098
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	2099	def main():
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	2100	outfile = None
				2101	headerfile = 'texipre.dat'
				2102	trailerfile = 'texipost.dat'
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	2103
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	2104	try:
				2105	opts, args = getopt.getopt(sys.argv[1:], 'o:h:t:')
				2106	except getopt.error:
				2107	args = []
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	2108
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	2109	if not args:
				2110	print 'usage: partparse [-o outfile] [-h headerfile]',
				2111	print '[-t trailerfile] file ...'
				2112	sys.exit(2)
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	2113
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	2114	for opt, arg in opts:
				2115	if opt == '-o': outfile = arg
				2116	if opt == '-h': headerfile = arg
				2117	if opt == '-t': trailerfile = arg
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	2118
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	2119	if not outfile:
				2120	root, ext = os.path.splitext(args[0])
				2121	outfile = root + '.texi'
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	2122
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	2123	if outfile in args:
				2124	print 'will not overwrite input file', outfile
				2125	sys.exit(2)
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	2126
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	2127	outf = open(outfile, 'w')
				2128	outf.write(open(headerfile, 'r').read())
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	2129
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	2130	for file in args:
				2131	if len(args) > 1: print '='20, file, '='20
				2132	buf = open(file, 'r').read()
				2133	w, pp = parseit(buf)
				2134	startchange()
				2135	changeit(buf, pp)
				2136	dumpit(buf, outf.write, pp)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	2137
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	2138	outf.write(open(trailerfile, 'r').read())
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	2139
Guido van Rossum	5f18d6c	1996-09-10 22:34:20 +0000	[diff] [blame]	2140	outf.close()
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	2141
Guido van Rossum	49604d3	1996-09-10 22:19:51 +0000	[diff] [blame]	2142	if __name__ == "__main__":
				2143	main()