Blame - Doc/partparse.py - platform/external/python/cpython3

blob: 6128bbfaf971a1829b634e4ac73f98319bfb4ffa [file] [log] [blame]

Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1	#
				2	# partparse.py: parse a by-Guido-written-and-by-Jan-Hein-edited LaTeX file,
				3	# and generate texinfo source.
				4	#
				5	# This is not a good example of good programming practices. In fact, this
				6	# file could use a complete rewrite, in order to become faster, more
				7	# easy extensible and maintainable.
				8	#
				9	# However, I added some comments on a few places for the pityful person who
				10	# would ever need to take a look into this file.
				11	#
				12	# Have I been clear enough??
				13	#
				14	# -jh
				15
				16
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	17	import sys, string, regex, getopt, os
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	18
				19	# Different parse modes for phase 1
				20	MODE_REGULAR = 0
				21	MODE_VERBATIM = 1
				22	MODE_CS_SCAN = 2
				23	MODE_COMMENT = 3
				24	MODE_MATH = 4
				25	MODE_DMATH = 5
				26	MODE_GOBBLEWHITE = 6
				27
				28	the_modes = MODE_REGULAR, MODE_VERBATIM, MODE_CS_SCAN, MODE_COMMENT, \
				29	MODE_MATH, MODE_DMATH, MODE_GOBBLEWHITE
				30
				31	# Show the neighbourhood of the scanned buffer
				32	def epsilon(buf, where):
				33	wmt, wpt = where - 10, where + 10
				34	if wmt < 0:
				35	wmt = 0
				36	if wpt > len(buf):
				37	wpt = len(buf)
				38	return ' Context ' + `buf[wmt:where]` + '.' + `buf[where:wpt]` + '.'
				39
				40	# Should return the line number. never worked
				41	def lin():
				42	global lineno
				43	return ' Line ' + `lineno` + '.'
				44
				45	# Displays the recursion level.
				46	def lv(lvl):
				47	return ' Level ' + `lvl` + '.'
				48
				49	# Combine the three previous functions. Used often.
				50	def lle(lvl, buf, where):
				51	return lv(lvl) + lin() + epsilon(buf, where)
				52
				53
				54	# This class is only needed for _symbolic_ representation of the parse mode.
				55	class Mode:
				56	def init(self, arg):
				57	if arg not in the_modes:
				58	raise ValueError, 'mode not in the_modes'
				59	self.mode = arg
				60	return self
				61
				62	def __cmp__(self, other):
				63	if type(self) != type(other):
				64	other = mode(other)
				65	return cmp(self.mode, other.mode)
				66
				67	def __repr__(self):
				68	if self.mode == MODE_REGULAR:
				69	return 'MODE_REGULAR'
				70	elif self.mode == MODE_VERBATIM:
				71	return 'MODE_VERBATIM'
				72	elif self.mode == MODE_CS_SCAN:
				73	return 'MODE_CS_SCAN'
				74	elif self.mode == MODE_COMMENT:
				75	return 'MODE_COMMENT'
				76	elif self.mode == MODE_MATH:
				77	return 'MODE_MATH'
				78	elif self.mode == MODE_DMATH:
				79	return 'MODE_DMATH'
				80	elif self.mode == MODE_GOBBLEWHITE:
				81	return 'MODE_GOBBLEWHITE'
				82	else:
				83	raise ValueError, 'mode not in the_modes'
				84
				85	# just a wrapper around a class initialisation
				86	def mode(arg):
				87	return Mode().init(arg)
				88
				89
				90	# After phase 1, the text consists of chunks, with a certain type
				91	# this type will be assigned to the chtype member of the chunk
				92	# the where-field contains the file position where this is found
				93	# and the data field contains (1): a tuple describing start- end end
				94	# positions of the substring (can be used as slice for the buf-variable),
				95	# (2) just a string, mostly generated by the changeit routine,
				96	# or (3) a list, describing a (recursive) subgroup of chunks
				97	PLAIN = 0 # ASSUME PLAINTEXT, data = the text
				98	GROUP = 1 # GROUP ({}), data = [chunk, chunk,..]
				99	CSNAME = 2 # CONTROL SEQ TOKEN, data = the command
				100	COMMENT = 3 # data is the actual comment
				101	DMATH = 4 # DISPLAYMATH, data = [chunk, chunk,..]
				102	MATH = 5 # MATH, see DISPLAYMATH
				103	OTHER = 6 # CHAR WITH CATCODE OTHER, data = char
				104	ACTIVE = 7 # ACTIVE CHAR
				105	GOBBLEDWHITE = 8 # Gobbled LWSP, after CSNAME
				106	ENDLINE = 9 # END-OF-LINE, data = '\n'
				107	DENDLINE = 10 # DOUBLE EOL, data='\n', indicates \par
				108	ENV = 11 # LaTeX-environment
				109	# data =(envname,[ch,ch,ch,.])
				110	CSLINE = 12 # for texi: next chunk will be one group
				111	# of args. Will be set all on 1 line
				112	IGNORE = 13 # IGNORE this data
				113	ENDENV = 14 # TEMP END OF GROUP INDICATOR
				114	IF = 15 # IF-directive
				115	# data = (flag,negate,[ch, ch, ch,...])
				116	the_types = PLAIN, GROUP, CSNAME, COMMENT, DMATH, MATH, OTHER, ACTIVE, \
				117	GOBBLEDWHITE, ENDLINE, DENDLINE, ENV, CSLINE, IGNORE, ENDENV, IF
				118
				119	# class, just to display symbolic name
				120	class ChunkType:
				121	def init(self, chunk_type):
				122	if chunk_type not in the_types:
				123	raise 'ValueError', 'chunk_type not in the_types'
				124	self.chunk_type = chunk_type
				125	return self
				126
				127	def __cmp__(self, other):
				128	if type(self) != type(other):
				129	other = chunk_type(other)
				130	return cmp(self.chunk_type, other.chunk_type)
				131
				132	def __repr__(self):
				133	if self.chunk_type == PLAIN:
				134	return 'PLAIN'
				135	elif self.chunk_type == GROUP:
				136	return 'GROUP'
				137	elif self.chunk_type == CSNAME:
				138	return 'CSNAME'
				139	elif self.chunk_type == COMMENT:
				140	return 'COMMENT'
				141	elif self.chunk_type == DMATH:
				142	return 'DMATH'
				143	elif self.chunk_type == MATH:
				144	return 'MATH'
				145	elif self.chunk_type == OTHER:
				146	return 'OTHER'
				147	elif self.chunk_type == ACTIVE:
				148	return 'ACTIVE'
				149	elif self.chunk_type == GOBBLEDWHITE:
				150	return 'GOBBLEDWHITE'
				151	elif self.chunk_type == DENDLINE:
				152	return 'DENDLINE'
				153	elif self.chunk_type == ENDLINE:
				154	return 'ENDLINE'
				155	elif self.chunk_type == ENV:
				156	return 'ENV'
				157	elif self.chunk_type == CSLINE:
				158	return 'CSLINE'
				159	elif self.chunk_type == IGNORE:
				160	return 'IGNORE'
				161	elif self.chunk_type == ENDENV:
				162	return 'ENDENV'
				163	elif self.chunk_type == IF:
				164	return 'IF'
				165	else:
				166	raise ValueError, 'chunk_type not in the_types'
				167
				168	# ...and the wrapper
				169	def chunk_type(type):
				170	return ChunkType().init(type)
				171
				172	# store a type object of the ChunkType-class-instance...
				173	chunk_type_type = type(chunk_type(0))
				174
				175	# this class contains a part of the parsed buffer
				176	class Chunk:
				177	def init(self, chtype, where, data):
				178	if type(chtype) != chunk_type_type:
				179	chtype = chunk_type(chtype)
				180	self.chtype = chtype
				181	if type(where) != type(0):
				182	raise TypeError, '\'where\' is not a number'
				183	self.where = where
				184	self.data = data
				185	##print 'CHUNK', self
				186	return self
				187
				188	def __repr__(self):
				189	return 'chunk' + `self.chtype, self.where, self.data`
				190
				191	# and the wrapper
				192	def chunk(chtype, where, data):
				193	return Chunk().init(chtype, where, data)
				194
				195
				196
				197	error = 'partparse.error'
				198
				199	#
				200	# TeX's catcodes...
				201	#
				202	CC_ESCAPE = 0
				203	CC_LBRACE = 1
				204	CC_RBRACE = 2
				205	CC_MATHSHIFT = 3
				206	CC_ALIGNMENT = 4
				207	CC_ENDLINE = 5
				208	CC_PARAMETER = 6
				209	CC_SUPERSCRIPT = 7
				210	CC_SUBSCRIPT = 8
				211	CC_IGNORE = 9
				212	CC_WHITE = 10
				213	CC_LETTER = 11
				214	CC_OTHER = 12
				215	CC_ACTIVE = 13
				216	CC_COMMENT = 14
				217	CC_INVALID = 15
				218
				219	# and the names
				220	cc_names = [\
				221	'CC_ESCAPE', \
				222	'CC_LBRACE', \
				223	'CC_RBRACE', \
				224	'CC_MATHSHIFT', \
				225	'CC_ALIGNMENT', \
				226	'CC_ENDLINE', \
				227	'CC_PARAMETER', \
				228	'CC_SUPERSCRIPT', \
				229	'CC_SUBSCRIPT', \
				230	'CC_IGNORE', \
				231	'CC_WHITE', \
				232	'CC_LETTER', \
				233	'CC_OTHER', \
				234	'CC_ACTIVE', \
				235	'CC_COMMENT', \
				236	'CC_INVALID', \
				237	]
				238
				239	# Show a list of catcode-name-symbols
				240	def pcl(codelist):
				241	result = ''
				242	for i in codelist:
				243	result = result + cc_names[i] + ', '
				244	return '[' + result[:-2] + ']'
				245
				246	# the name of the catcode (ACTIVE, OTHER, etc.)
				247	def pc(code):
				248	return cc_names[code]
				249
				250
				251	# Which catcodes make the parser stop parsing regular plaintext
				252	regular_stopcodes = [CC_ESCAPE, CC_LBRACE, CC_RBRACE, CC_MATHSHIFT, \
				253	CC_ALIGNMENT, CC_PARAMETER, CC_SUPERSCRIPT, CC_SUBSCRIPT, \
				254	CC_IGNORE, CC_ACTIVE, CC_COMMENT, CC_INVALID, CC_ENDLINE]
				255
				256	# same for scanning a control sequence name
				257	csname_scancodes = [CC_LETTER]
				258
				259	# same for gobbling LWSP
				260	white_scancodes = [CC_WHITE]
				261	##white_scancodes = [CC_WHITE, CC_ENDLINE]
				262
				263	# make a list of all catcode id's, except for catcode ``other''
				264	all_but_other_codes = range(16)
				265	del all_but_other_codes[CC_OTHER]
				266	##print all_but_other_codes
				267
				268	# when does a comment end
				269	comment_stopcodes = [CC_ENDLINE]
				270
				271	# gather all characters together, specified by a list of catcodes
				272	def code2string(cc, codelist):
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	273	##print 'code2string: codelist = ' + pcl(codelist),
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	274	result = ''
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	275	for category in codelist:
				276	if cc[category]:
				277	result = result + cc[category]
				278	##print 'result = ' + `result`
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	279	return result
				280
				281	# automatically generate all characters of catcode other, being the
				282	# complement set in the ASCII range (128 characters)
				283	def make_other_codes(cc):
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	284	otherchars = range(256) # could be made 256, no problem
				285	for category in all_but_other_codes:
				286	if cc[category]:
				287	for c in cc[category]:
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	288	otherchars[ord(c)] = None
				289	result = ''
				290	for i in otherchars:
				291	if i != None:
				292	result = result + chr(i)
				293	return result
				294
				295	# catcode dump (which characters have which catcodes).
				296	def dump_cc(name, cc):
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	297	##print '\t' + name
				298	##print '=' * (8+len(name))
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	299	if len(cc) != 16:
				300	raise TypeError, 'cc not good cat class'
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	301	## for i in range(16):
				302	## print pc(i) + '\t' + `cc[i]`
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	303
				304
				305	# In the beginning,....
				306	epoch_cc = [None] * 16
				307	##dump_cc('epoch_cc', epoch_cc)
				308
				309
				310	# INITEX
				311	initex_cc = epoch_cc[:]
				312	initex_cc[CC_ESCAPE] = '\\'
				313	initex_cc[CC_ENDLINE], initex_cc[CC_IGNORE], initex_cc[CC_WHITE] = \
				314	'\n', '\0', ' '
				315	initex_cc[CC_LETTER] = string.uppercase + string.lowercase
				316	initex_cc[CC_COMMENT], initex_cc[CC_INVALID] = '%', '\x7F'
				317	#initex_cc[CC_OTHER] = make_other_codes(initex_cc) I don't need them, anyway
				318	##dump_cc('initex_cc', initex_cc)
				319
				320
				321	# LPLAIN: LaTeX catcode setting (see lplain.tex)
				322	lplain_cc = initex_cc[:]
				323	lplain_cc[CC_LBRACE], lplain_cc[CC_RBRACE] = '{', '}'
				324	lplain_cc[CC_MATHSHIFT] = '$'
				325	lplain_cc[CC_ALIGNMENT] = '&'
				326	lplain_cc[CC_PARAMETER] = '#'
				327	lplain_cc[CC_SUPERSCRIPT] = '^\x0B' # '^' and C-k
				328	lplain_cc[CC_SUBSCRIPT] = '_\x01' # '_' and C-a
				329	lplain_cc[CC_WHITE] = lplain_cc[CC_WHITE] + '\t'
				330	lplain_cc[CC_ACTIVE] = '~\x0C' # '~' and C-l
				331	lplain_cc[CC_OTHER] = make_other_codes(lplain_cc)
				332	##dump_cc('lplain_cc', lplain_cc)
				333
				334
				335	# Guido's LaTeX environment catcoded '_' as ``other''
				336	# my own purpose catlist
				337	my_cc = lplain_cc[:]
				338	my_cc[CC_SUBSCRIPT] = my_cc[CC_SUBSCRIPT][1:] # remove '_' here
				339	my_cc[CC_OTHER] = my_cc[CC_OTHER] + '_' # add it to OTHER list
				340	dump_cc('my_cc', my_cc)
				341
				342
				343
				344	# needed for un_re, my equivalent for regexp-quote in Emacs
				345	re_meaning = '\\[]^$'
				346
				347	def un_re(str):
				348	result = ''
				349	for i in str:
				350	if i in re_meaning:
				351	result = result + '\\'
				352	result = result + i
				353	return result
				354
				355	# NOTE the negate ('^') operator in some of the regexps below
				356	def make_rc_regular(cc):
				357	# problems here if '[]' are included!!
				358	return regex.compile('[' + code2string(cc, regular_stopcodes) + ']')
				359
				360	def make_rc_cs_scan(cc):
				361	return regex.compile('[^' + code2string(cc, csname_scancodes) + ']')
				362
				363	def make_rc_comment(cc):
				364	return regex.compile('[' + code2string(cc, comment_stopcodes) + ']')
				365
				366	def make_rc_endwhite(cc):
				367	return regex.compile('[^' + code2string(cc, white_scancodes) + ']')
				368
				369
				370
				371	# regular: normal mode:
				372	rc_regular = make_rc_regular(my_cc)
				373
				374	# scan: scan a command sequence e.g. `newlength' or `mbox' or `;', `,' or `$'
				375	rc_cs_scan = make_rc_cs_scan(my_cc)
				376	rc_comment = make_rc_comment(my_cc)
				377	rc_endwhite = make_rc_endwhite(my_cc)
				378
				379
				380	# parseit (BUF, PARSEMODE=mode(MODE_REGULAR), START=0, RECURSION-LEVEL=0)
				381	# RECURSION-LEVEL will is incremented on entry.
				382	# result contains the list of chunks returned
				383	# together with this list, the buffer position is returned
				384
				385	# RECURSION-LEVEL will be set to zero again, when recursively a
				386	# {,D}MATH-mode scan has been enetered.
				387	# This has been done in order to better check for environment-mismatches
				388
				389	def parseit(buf, *rest):
				390	global lineno
				391
				392	if len(rest) == 3:
				393	parsemode, start, lvl = rest
				394	elif len(rest) == 2:
				395	parsemode, start, lvl = rest + (0, )
				396	elif len(rest) == 1:
				397	parsemode, start, lvl = rest + (0, 0)
				398	elif len(rest) == 0:
				399	parsemode, start, lvl = mode(MODE_REGULAR), 0, 0
				400	else:
				401	raise TypeError, 'usage: parseit(buf[, parsemode[, start[, level]]])'
				402	result = []
				403	end = len(buf)
				404	if lvl == 0 and parsemode == mode(MODE_REGULAR):
				405	lineno = 1
				406	lvl = lvl + 1
				407
				408	##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')'
				409
				410	#
				411	# some of the more regular modes...
				412	#
				413
				414	if parsemode in (mode(MODE_REGULAR), mode(MODE_DMATH), mode(MODE_MATH)):
				415	cstate = []
				416	newpos = start
				417	curpmode = parsemode
				418	while 1:
				419	where = newpos
				420	#print '\tnew round: ' + epsilon(buf, where)
				421	if where == end:
				422	if lvl > 1 or curpmode != mode(MODE_REGULAR):
				423	# not the way we started...
				424	raise EOFError, 'premature end of file.' + lle(lvl, buf, where)
				425	# the real ending of lvl-1 parse
				426	return end, result
				427
				428	pos = rc_regular.search(buf, where)
				429
				430	if pos < 0:
				431	pos = end
				432
				433	if pos != where:
				434	newpos, c = pos, chunk(PLAIN, where, (where, pos))
				435	result.append(c)
				436	continue
				437
				438
				439	#
				440	# ok, pos == where and pos != end
				441	#
				442	foundchar = buf[where]
				443	if foundchar in my_cc[CC_LBRACE]:
				444	# recursive subgroup parse...
				445	newpos, data = parseit(buf, curpmode, where+1, lvl)
				446	result.append(chunk(GROUP, where, data))
				447
				448	elif foundchar in my_cc[CC_RBRACE]:
				449	if lvl <= 1:
				450	raise error, 'ENDGROUP while in base level.' + lle(lvl, buf, where)
				451	if lvl == 1 and mode != mode(MODE_REGULAR):
				452	raise error, 'endgroup while in math mode. +lin() + epsilon(buf, where)'
				453	return where + 1, result
				454
				455	elif foundchar in my_cc[CC_ESCAPE]:
				456	#
				457	# call the routine that actually deals with
				458	# this problem. If do_ret is None, than
				459	# return the value of do_ret
				460	#
				461	# Note that handle_cs might call this routine
				462	# recursively again...
				463	#
				464	do_ret, newpos = handlecs(buf, where, \
				465	curpmode, lvl, result, end)
				466	if do_ret != None:
				467	return do_ret
				468
				469	elif foundchar in my_cc[CC_COMMENT]:
				470	newpos, data = parseit(buf, \
				471	mode(MODE_COMMENT), where+1, lvl)
				472	result.append(chunk(COMMENT, where, data))
				473
				474	elif foundchar in my_cc[CC_MATHSHIFT]:
				475	# note that recursive calls to math-mode
				476	# scanning are called with recursion-level 0
				477	# again, in order to check for bad mathend
				478	#
				479	if where + 1 != end and \
				480	buf[where + 1] in \
				481	my_cc[CC_MATHSHIFT]:
				482	#
				483	# double mathshift, e.g. '$$'
				484	#
				485	if curpmode == mode(MODE_REGULAR):
				486	newpos, data = parseit(buf, \
				487	mode(MODE_DMATH), \
				488	where+2, 0)
				489	result.append(chunk(DMATH, \
				490	where, data))
				491	elif curpmode == mode(MODE_MATH):
				492	raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
				493	elif lvl != 1:
				494	raise error, 'bad mathend.' + \
				495	lle(lvl, buf, where)
				496	else:
				497	return where + 2, result
				498	else:
				499	#
				500	# single math shift, e.g. '$'
				501	#
				502	if curpmode == mode(MODE_REGULAR):
				503	newpos, data = parseit(buf, \
				504	mode(MODE_MATH), \
				505	where+1, 0)
				506	result.append(chunk(MATH, \
				507	where, data))
				508	elif curpmode == mode(MODE_DMATH):
				509	raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
				510	elif lvl != 1:
				511	raise error, 'bad mathend.' + \
				512	lv(lvl, buf, where)
				513	else:
				514	return where + 1, result
				515
				516	elif foundchar in my_cc[CC_IGNORE]:
				517	print 'warning: ignored char', `foundchar`
				518	newpos = where + 1
				519
				520	elif foundchar in my_cc[CC_ACTIVE]:
				521	result.append(chunk(ACTIVE, where, foundchar))
				522	newpos = where + 1
				523
				524	elif foundchar in my_cc[CC_INVALID]:
				525	raise error, 'invalid char ' + `foundchar`
				526	newpos = where + 1
				527
				528	elif foundchar in my_cc[CC_ENDLINE]:
				529	#
				530	# after an end of line, eat the rest of
				531	# whitespace on the beginning of the next line
				532	# this is what LaTeX more or less does
				533	#
				534	# also, try to indicate double newlines (\par)
				535	#
				536	lineno = lineno + 1
				537	savedwhere = where
				538	newpos, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), where + 1, lvl)
				539	if newpos != end and buf[newpos] in \
				540	my_cc[CC_ENDLINE]:
				541	result.append(chunk(DENDLINE, \
				542	savedwhere, foundchar))
				543	else:
				544	result.append(chunk(ENDLINE, \
				545	savedwhere, foundchar))
				546	else:
				547	result.append(chunk(OTHER, where, foundchar))
				548	newpos = where + 1
				549
				550	elif parsemode == mode(MODE_CS_SCAN):
				551	#
				552	# scan for a control sequence token. `\ape', `\nut' or `\%'
				553	#
				554	if start == end:
				555	raise EOFError, 'can\'t find end of csname'
				556	pos = rc_cs_scan.search(buf, start)
				557	if pos < 0:
				558	pos = end
				559	if pos == start:
				560	# first non-letter right where we started the search
				561	# ---> the control sequence name consists of one single
				562	# character. Also: don't eat white space...
				563	if buf[pos] in my_cc[CC_ENDLINE]:
				564	lineno = lineno + 1
				565	pos = pos + 1
				566	return pos, (start, pos)
				567	else:
				568	spos = pos
				569	if buf[pos] == '\n':
				570	lineno = lineno + 1
				571	spos = pos + 1
				572	pos2, dummy = parseit(buf, \
				573	mode(MODE_GOBBLEWHITE), spos, lvl)
				574	return pos2, (start, pos)
				575
				576	elif parsemode == mode(MODE_GOBBLEWHITE):
				577	if start == end:
				578	return start, ''
				579	pos = rc_endwhite.search(buf, start)
				580	if pos < 0:
				581	pos = start
				582	return pos, (start, pos)
				583
				584	elif parsemode == mode(MODE_COMMENT):
				585	pos = rc_comment.search(buf, start)
				586	lineno = lineno + 1
				587	if pos < 0:
				588	print 'no newline perhaps?'
				589	raise EOFError, 'can\'t find end of comment'
				590	pos = pos + 1
				591	pos2, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), pos, lvl)
				592	return pos2, (start, pos)
				593
				594
				595	else:
				596	raise error, 'Unknown mode (' + `parsemode` + ')'
				597
				598
				599	#moreresult = cswitch(buf[x1:x2], buf, newpos, parsemode, lvl)
				600
				601	#boxcommands = 'mbox', 'fbox'
				602	#defcommands = 'def', 'newcommand'
				603
				604	endverbstr = '\\end{verbatim}'
				605
				606	re_endverb = regex.compile(un_re(endverbstr))
				607
				608	#
				609	# handlecs: helper function for parseit, for the special thing we might
				610	# wanna do after certain command control sequences
				611	# returns: None or return_data, newpos
				612	#
				613	# in the latter case, the calling function is instructed to immediately
				614	# return with the data in return_data
				615	#
				616	def handlecs(buf, where, curpmode, lvl, result, end):
				617	global lineno
				618
				619	# get the control sequence name...
				620	newpos, data = parseit(buf, mode(MODE_CS_SCAN), where+1, lvl)
				621	saveddata = data
				622
				623	if s(buf, data) in ('begin', 'end'):
				624	# skip the expected '{' and get the LaTeX-envname '}'
				625	newpos, data = parseit(buf, mode(MODE_REGULAR), newpos+1, lvl)
				626	if len(data) != 1:
				627	raise error, 'expected 1 chunk of data.' + \
				628	lle(lvl, buf, where)
				629
				630	# yucky, we've got an environment
				631	envname = s(buf, data[0].data)
				632	##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl)
				633	if s(buf, saveddata) == 'begin' and envname == 'verbatim':
				634	# verbatim deserves special treatment
				635	pos = re_endverb.search(buf, newpos)
				636	if pos < 0:
				637	raise error, `endverbstr` + ' not found.' + lle(lvl, buf, where)
				638	result.append(chunk(ENV, where, (envname, [chunk(PLAIN, newpos, (newpos, pos))])))
				639	newpos = pos + len(endverbstr)
				640
				641	elif s(buf, saveddata) == 'begin':
				642	# start parsing recursively... If that parse returns
				643	# from an '\end{...}', then should the last item of
				644	# the returned data be a string containing the ended
				645	# environment
				646	newpos, data = parseit(buf, curpmode, newpos, lvl)
				647	if not data or type(data[-1]) != type(''):
				648	raise error, 'missing \'end\'' + lle(lvl, buf, where) + epsilon(buf, newpos)
				649	retenv = data[-1]
				650	del data[-1]
				651	if retenv != envname:
				652	#[`retenv`, `envname`]
				653	raise error, 'environments do not match.' + \
				654	lle(lvl, buf, where) + \
				655	epsilon(buf, newpos)
				656	result.append(chunk(ENV, where, (retenv, data)))
				657	else:
				658	# 'end'... append the environment name, as just
				659	# pointed out, and order parsit to return...
				660	result.append(envname)
				661	##print 'POINT of return: ' + epsilon(buf, newpos)
				662	# the tuple will be returned by parseit
				663	return (newpos, result), newpos
				664
				665	# end of \begin ... \end handling
				666
				667	elif s(buf, data)[0:2] == 'if':
				668	# another scary monster: the 'if' directive
				669	flag = s(buf, data)[2:]
				670
				671	# recursively call parseit, just like environment above..
				672	# the last item of data should contain the if-termination
				673	# e.g., 'else' of 'fi'
				674	newpos, data = parseit(buf, curpmode, newpos, lvl)
				675	if not data or data[-1] not in ('else', 'fi'):
				676	raise error, 'wrong if... termination' + \
				677	lle(lvl, buf, where) + epsilon(buf, newpos)
				678
				679	ifterm = data[-1]
				680	del data[-1]
				681	# 0 means dont_negate flag
				682	result.append(chunk(IF, where, (flag, 0, data)))
				683	if ifterm == 'else':
				684	# do the whole thing again, there is only one way
				685	# to end this one, by 'fi'
				686	newpos, data = parseit(buf, curpmode, newpos, lvl)
				687	if not data or data[-1] not in ('fi', ):
				688	raise error, 'wrong if...else... termination' \
				689	+ lle(lvl, buf, where) \
				690	+ epsilon(buf, newpos)
				691
				692	ifterm = data[-1]
				693	del data[-1]
				694	result.append(chunk(IF, where, (flag, 1, data)))
				695	#done implicitely: return None, newpos
				696
				697	elif s(buf, data) in ('else', 'fi'):
				698	result.append(s(buf, data))
				699	# order calling party to return tuple
				700	return (newpos, result), newpos
				701
				702	# end of \if, \else, ... \fi handling
				703
				704	elif s(buf, saveddata) == 'verb':
				705	x2 = saveddata[1]
				706	result.append(chunk(CSNAME, where, data))
				707	if x2 == end:
				708	raise error, 'premature end of command.' + lle(lvl, buf, where)
				709	delimchar = buf[x2]
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	710	##print 'VERB: delimchar ' + `delimchar`
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	711	pos = regex.compile(un_re(delimchar)).search(buf, x2 + 1)
				712	if pos < 0:
				713	raise error, 'end of \'verb\' argument (' + \
				714	`delimchar` + ') not found.' + \
				715	lle(lvl, buf, where)
				716	result.append(chunk(GROUP, x2, [chunk(PLAIN, x2+1, (x2+1, pos))]))
				717	newpos = pos + 1
				718	else:
				719	result.append(chunk(CSNAME, where, data))
				720	return None, newpos
				721
				722	# this is just a function to get the string value if the possible data-tuple
				723	def s(buf, data):
				724	if type(data) == type(''):
				725	return data
				726	if len(data) != 2 or not (type(data[0]) == type(data[1]) == type(0)):
				727	raise TypeError, 'expected tuple of 2 integers'
				728	x1, x2 = data
				729	return buf[x1:x2]
				730
				731
				732	##length, data1, i = getnextarg(length, buf, pp, i + 1)
				733
				734	# make a deep-copy of some chunks
				735	def crcopy(r):
				736	result = []
				737	for x in r:
				738	result.append(chunkcopy(x))
				739	return result
				740
				741
				742
				743	# copy a chunk, would better be a method of class Chunk...
				744	def chunkcopy(ch):
				745	if ch.chtype == chunk_type(GROUP):
				746	listc = ch.data[:]
				747	for i in range(len(listc)):
				748	listc[i] = chunkcopy(listc[i])
				749	return chunk(GROUP, ch.where, listc)
				750	else:
				751	return chunk(ch.chtype, ch.where, ch.data)
				752
				753
				754	# get next argument for TeX-macro, flatten a group (insert between)
				755	# or return Command Sequence token, or give back one character
				756	def getnextarg(length, buf, pp, item):
				757
				758	##wobj = Wobj().init()
				759	##dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
				760	##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
				761
				762	while item < length and pp[item].chtype == chunk_type(ENDLINE):
				763	del pp[item]
				764	length = length - 1
				765	if item >= length:
				766	raise error, 'no next arg.' + epsilon(buf, pp[-1].where)
				767	if pp[item].chtype == chunk_type(GROUP):
				768	newpp = pp[item].data
				769	del pp[item]
				770	length = length - 1
				771	changeit(buf, newpp)
				772	length = length + len(newpp)
				773	pp[item:item] = newpp
				774	item = item + len(newpp)
				775	if len(newpp) < 10:
				776	wobj = Wobj().init()
				777	dumpit(buf, wobj.write, newpp)
				778	##print 'GETNEXTARG: inserted ' + `wobj.data`
				779	return length, item
				780	elif pp[item].chtype == chunk_type(PLAIN):
				781	#grab one char
				782	print 'WARNING: grabbing one char'
				783	if len(s(buf, pp[item].data)) > 1:
				784	pp.insert(item, chunk(PLAIN, pp[item].where, s(buf, pp[item].data)[:1]))
				785	item, length = item+1, length+1
				786	pp[item].data = s(buf, pp[item].data)[1:]
				787	else:
				788	item = item+1
				789	return length, item
				790	else:
Guido van Rossum	b819bdf	1995-03-15 11:26:26 +0000	[diff] [blame]	791	ch = pp[item]
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	792	try:
				793	str = `s(buf, ch.data)`
				794	except TypeError:
				795	str = `ch.data`
				796	if len(str) > 400:
				797	str = str[:400] + '...'
				798	print 'GETNEXTARG:', ch.chtype, 'not handled, data ' + str
				799	return length, item
				800
				801
				802	# this one is needed to find the end of LaTeX's optional argument, like
				803	# item[...]
				804	re_endopt = regex.compile(']')
				805
				806	# get a LaTeX-optional argument, you know, the square braces '[' and ']'
				807	def getoptarg(length, buf, pp, item):
				808
				809	wobj = Wobj().init()
				810	dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
				811	##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
				812
				813	if item >= length or \
				814	pp[item].chtype != chunk_type(PLAIN) or \
				815	s(buf, pp[item].data)[0] != '[':
				816	return length, item
				817
				818	pp[item].data = s(buf, pp[item].data)[1:]
				819	if len(pp[item].data) == 0:
				820	del pp[item]
				821	length = length-1
				822
				823	while 1:
				824	if item == length:
				825	raise error, 'No end of optional arg found'
				826	if pp[item].chtype == chunk_type(PLAIN):
				827	text = s(buf, pp[item].data)
				828	pos = re_endopt.search(text)
				829	if pos >= 0:
				830	pp[item].data = text[:pos]
				831	if pos == 0:
				832	del pp[item]
				833	length = length-1
				834	else:
				835	item=item+1
				836	text = text[pos+1:]
				837
				838	while text and text[0] in ' \t':
				839	text = text[1:]
				840
				841	if text:
				842	pp.insert(item, chunk(PLAIN, 0, text))
				843	length = length + 1
				844	return length, item
				845
				846	item = item+1
				847
				848
				849	# Wobj just add write-requests to the ``data'' attribute
				850	class Wobj:
				851	def init(self):
				852	self.data = ''
				853	return self
				854	def write(self, data):
				855	self.data = self.data + data
Guido van Rossum	b819bdf	1995-03-15 11:26:26 +0000	[diff] [blame]	856
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	857	# ignore these commands
Guido van Rossum	b819bdf	1995-03-15 11:26:26 +0000	[diff] [blame]	858	ignoredcommands = ('bcode', 'ecode')
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	859	# map commands like these to themselves as plaintext
Guido van Rossum	7760cde	1995-03-17 16:03:11 +0000	[diff] [blame]	860	wordsselves = ('UNIX', 'ABC', 'C', 'ASCII', 'EOF', 'LaTeX')
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	861	# \{ --> {, \} --> }, etc
Guido van Rossum	7760cde	1995-03-17 16:03:11 +0000	[diff] [blame]	862	themselves = ('{', '}', '.', '@', ' ', '\n') + wordsselves
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	863	# these ones also themselves (see argargs macro in myformat.sty)
				864	inargsselves = (',', '[', ']', '(', ')')
				865	# this is how I would show the difference between emph and strong
				866	# code 1 means: fold to uppercase
				867	markcmds = {'code': ('', ''), 'var': 1, 'emph': ('_', '_'), \
				868	'strong': ('', '')}
				869
				870	# recognise patter {\FONTCHANGE-CMD TEXT} to \MAPPED-FC-CMD{TEXT}
				871	fontchanges = {'rm': 'r', 'it': 'i', 'em': 'emph', 'bf': 'b', 'tt': 't'}
				872
				873	# transparent for these commands
Guido van Rossum	7760cde	1995-03-17 16:03:11 +0000	[diff] [blame]	874	for_texi = ('emph', 'var', 'strong', 'code', 'kbd', 'key', 'dfn', 'samp',
				875	'file', 'r', 'i', 't')
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	876
				877
				878	# try to remove macros and return flat text
				879	def flattext(buf, pp):
				880	pp = crcopy(pp)
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	881	##print '---> FLATTEXT ' + `pp`
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	882	wobj = Wobj().init()
				883
				884	i, length = 0, len(pp)
				885	while 1:
				886	if len(pp) != length:
				887	raise 'FATAL', 'inconsistent length'
				888	if i >= length:
				889	break
				890	ch = pp[i]
				891	i = i+1
				892	if ch.chtype == chunk_type(PLAIN):
				893	pass
				894	elif ch.chtype == chunk_type(CSNAME):
				895	if s(buf, ch.data) in themselves or hist.inargs and s(buf, ch.data) in inargsselves:
				896	ch.chtype = chunk_type(PLAIN)
				897	elif s(buf, ch.data) == 'e':
				898	ch.chtype = chunk_type(PLAIN)
				899	ch.data = '\\'
				900	elif len(s(buf, ch.data)) == 1 \
				901	and s(buf, ch.data) in onlylatexspecial:
				902	ch.chtype = chunk_type(PLAIN)
				903	# if it is followed by an empty group,
				904	# remove that group, it was needed for
				905	# a true space
				906	if i < length \
				907	and pp[i].chtype==chunk_type(GROUP) \
				908	and len(pp[i].data) == 0:
				909	del pp[i]
				910	length = length-1
				911
				912	elif s(buf, ch.data) in markcmds.keys():
				913	length, newi = getnextarg(length, buf, pp, i)
				914	str = flattext(buf, pp[i:newi])
				915	del pp[i:newi]
				916	length = length - (newi - i)
				917	ch.chtype = chunk_type(PLAIN)
				918	markcmd = s(buf, ch.data)
				919	x = markcmds[markcmd]
				920	if type(x) == type(()):
				921	pre, after = x
				922	str = pre+str+after
				923	elif x == 1:
				924	str = string.upper(str)
				925	else:
				926	raise 'FATAL', 'corrupt markcmds'
				927	ch.data = str
				928	else:
				929	if s(buf, ch.data) not in ignoredcommands:
				930	print 'WARNING: deleting command ' + `s(buf, ch.data)`
				931	print 'PP' + `pp[i-1]`
				932	del pp[i-1]
				933	i, length = i-1, length-1
				934	elif ch.chtype == chunk_type(GROUP):
				935	length, newi = getnextarg(length, buf, pp, i-1)
				936	i = i-1
				937	## str = flattext(buf, crcopy(pp[i-1:newi]))
				938	## del pp[i:newi]
				939	## length = length - (newi - i)
				940	## ch.chtype = chunk_type(PLAIN)
				941	## ch.data = str
				942	else:
				943	pass
				944
				945	dumpit(buf, wobj.write, pp)
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	946	##print 'FLATTEXT: RETURNING ' + `wobj.data`
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	947	return wobj.data
				948
				949	# try to generate node names (a bit shorter than the chapter title)
				950	# note that the \nodename command (see elsewhere) overules these efforts
				951	def invent_node_names(text):
				952	words = string.split(text)
				953
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	954	##print 'WORDS ' + `words`
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	955
				956	if len(words) == 2 \
				957	and string.lower(words[0]) == 'built-in' \
				958	and string.lower(words[1]) not in ('modules', 'functions'):
				959	return words[1]
				960	if len(words) == 3 and string.lower(words[1]) == 'module':
				961	return words[2]
				962	if len(words) == 3 and string.lower(words[1]) == 'object':
				963	return string.join(words[0:2])
				964	if len(words) > 4 and string.lower(string.join(words[-4:])) == \
				965	'methods and data attributes':
				966	return string.join(words[:2])
				967	return text
				968
				969	re_commas_etc = regex.compile('[,`\'@{}]')
				970
				971	re_whitespace = regex.compile('[ \t]*')
				972
				973
				974	##nodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
				975
				976	# look if the next non-white stuff is also a command, resulting in skipping
				977	# double endlines (DENDLINE) too, and thus omitting \par's
				978	# Sometimes this is too much, maybe consider DENDLINE's as stop
				979	def next_command_p(length, buf, pp, i, cmdname):
				980
				981	while 1:
				982	if i >= len(pp):
				983	break
				984	ch = pp[i]
				985	i = i+1
				986	if ch.chtype == chunk_type(ENDLINE):
				987	continue
				988	if ch.chtype == chunk_type(DENDLINE):
				989	continue
				990	if ch.chtype == chunk_type(PLAIN):
				991	if re_whitespace.search(s(buf, ch.data)) == 0 and \
				992	re_whitespace.match(s(buf, ch.data)) == len(s(buf, ch.data)):
				993	continue
				994	return -1
				995	if ch.chtype == chunk_type(CSNAME):
				996	if s(buf, ch.data) == cmdname:
				997	return i # _after_ the command
				998	return -1
				999	return -1
				1000
				1001
				1002	# things that are special to LaTeX, but not to texi..
				1003	onlylatexspecial = '_~^$#&%'
				1004
Guido van Rossum	23301a9	1993-05-24 14:19:37 +0000	[diff] [blame]	1005	class Struct: pass
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1006
				1007	hist = Struct()
				1008	out = Struct()
				1009
				1010	def startchange():
				1011	global hist, out
				1012
				1013	hist.inenv = []
				1014	hist.nodenames = []
				1015	hist.cindex = []
				1016	hist.inargs = 0
				1017	hist.enumeratenesting, hist.itemizenesting = 0, 0
				1018
				1019	out.doublenodes = []
				1020	out.doublecindeces = []
				1021
				1022
				1023	spacech = [chunk(PLAIN, 0, ' ')]
				1024	commach = [chunk(PLAIN, 0, ', ')]
				1025	cindexch = [chunk(CSLINE, 0, 'cindex')]
				1026
				1027	# the standard variation in symbols for itemize
				1028	itemizesymbols = ['bullet', 'minus', 'dots']
				1029
				1030	# same for enumerate
				1031	enumeratesymbols = ['1', 'A', 'a']
				1032
				1033	##
				1034	## \begin{ {func,data,exc}desc }{name}...
				1035	## the resulting texi-code is dependent on the contents of indexsubitem
				1036	##
				1037
				1038	# indexsubitem: `['XXX', 'function']
				1039	# funcdesc:
				1040	# deffn {`idxsi`} NAME (FUNCARGS)
				1041
				1042	# indexsubitem: `['XXX', 'method']`
				1043	# funcdesc:
				1044	# defmethod {`idxsi[0]`} NAME (FUNCARGS)
				1045
				1046	# indexsubitem: `['in', 'module', 'MODNAME']'
				1047	# datadesc:
				1048	# defcv data {`idxsi[1:]`} NAME
				1049	# excdesc:
				1050	# defcv exception {`idxsi[1:]`} NAME
				1051	# funcdesc:
				1052	# deffn {function of `idxsi[1:]`} NAME (FUNCARGS)
				1053
				1054	# indexsubitem: `['OBJECT', 'attribute']'
				1055	# datadesc
				1056	# defcv attribute {`OBJECT`} NAME
				1057
				1058
				1059	## this routine will be called on \begin{funcdesc}{NAME}{ARGS}
				1060	## or \funcline{NAME}{ARGS}
				1061	##
				1062	def do_funcdesc(length, buf, pp, i):
				1063	startpoint = i-1
				1064	ch = pp[startpoint]
				1065	wh = ch.where
				1066	length, newi = getnextarg(length, buf, pp, i)
				1067	funcname = chunk(GROUP, wh, pp[i:newi])
				1068	del pp[i:newi]
				1069	length = length - (newi-i)
				1070	save = hist.inargs
				1071	hist.inargs = 1
				1072	length, newi = getnextarg(length, buf, pp, i)
				1073	hist.inargs = save
				1074	del save
				1075	the_args = [chunk(PLAIN, wh, '()'[0])] + \
				1076	pp[i:newi] + \
				1077	[chunk(PLAIN, wh, '()'[1])]
				1078	del pp[i:newi]
				1079	length = length - (newi-i)
				1080
				1081	idxsi = hist.indexsubitem # words
				1082	command = ''
				1083	cat_class = ''
Guido van Rossum	b819bdf	1995-03-15 11:26:26 +0000	[diff] [blame]	1084	if idxsi and idxsi[-1] in ('method', 'protocol'):
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1085	command = 'defmethod'
				1086	cat_class = string.join(idxsi[:-1])
				1087	elif len(idxsi) == 2 and idxsi[1] == 'function':
				1088	command = 'deffn'
				1089	cat_class = string.join(idxsi)
				1090	elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
				1091	command = 'deffn'
				1092	cat_class = 'function of ' + string.join(idxsi[1:])
				1093
				1094	if not command:
				1095	raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
				1096
				1097	ch.chtype = chunk_type(CSLINE)
				1098	ch.data = command
				1099
				1100	cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
				1101	cslinearg.append(chunk(PLAIN, wh, ' '))
				1102	cslinearg.append(funcname)
				1103	cslinearg.append(chunk(PLAIN, wh, ' '))
				1104	l = len(cslinearg)
				1105	cslinearg[l:l] = the_args
				1106
				1107	pp.insert(i, chunk(GROUP, wh, cslinearg))
				1108	i, length = i+1, length+1
				1109	hist.command = command
				1110	return length, i
				1111
				1112
				1113	## this routine will be called on \begin{excdesc}{NAME}
				1114	## or \excline{NAME}
				1115	##
				1116	def do_excdesc(length, buf, pp, i):
				1117	startpoint = i-1
				1118	ch = pp[startpoint]
				1119	wh = ch.where
				1120	length, newi = getnextarg(length, buf, pp, i)
				1121	excname = chunk(GROUP, wh, pp[i:newi])
				1122	del pp[i:newi]
				1123	length = length - (newi-i)
				1124
				1125	idxsi = hist.indexsubitem # words
				1126	command = ''
				1127	cat_class = ''
				1128	class_class = ''
				1129	if len(idxsi) == 2 and idxsi[1] == 'exception':
				1130	command = 'defvr'
				1131	cat_class = string.join(idxsi)
				1132	elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
				1133	command = 'defcv'
				1134	cat_class = 'exception'
				1135	class_class = string.join(idxsi[1:])
				1136	elif len(idxsi) == 4 and idxsi[:3] == ['exception', 'in', 'module']:
				1137	command = 'defcv'
				1138	cat_class = 'exception'
				1139	class_class = string.join(idxsi[2:])
				1140
				1141
				1142	if not command:
				1143	raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
				1144
				1145	ch.chtype = chunk_type(CSLINE)
				1146	ch.data = command
				1147
				1148	cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
				1149	cslinearg.append(chunk(PLAIN, wh, ' '))
				1150	if class_class:
				1151	cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
				1152	cslinearg.append(chunk(PLAIN, wh, ' '))
				1153	cslinearg.append(excname)
				1154
				1155	pp.insert(i, chunk(GROUP, wh, cslinearg))
				1156	i, length = i+1, length+1
				1157	hist.command = command
				1158	return length, i
				1159
				1160	## same for datadesc or dataline...
				1161	def do_datadesc(length, buf, pp, i):
				1162	startpoint = i-1
				1163	ch = pp[startpoint]
				1164	wh = ch.where
				1165	length, newi = getnextarg(length, buf, pp, i)
				1166	dataname = chunk(GROUP, wh, pp[i:newi])
				1167	del pp[i:newi]
				1168	length = length - (newi-i)
				1169
				1170	idxsi = hist.indexsubitem # words
				1171	command = ''
				1172	cat_class = ''
				1173	class_class = ''
Guido van Rossum	b819bdf	1995-03-15 11:26:26 +0000	[diff] [blame]	1174	if idxsi[-1] in ('attribute', 'option'):
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1175	command = 'defcv'
Guido van Rossum	b819bdf	1995-03-15 11:26:26 +0000	[diff] [blame]	1176	cat_class = idxsi[-1]
Sjoerd Mullender	1cfb6b8	1994-12-14 15:28:22 +0000	[diff] [blame]	1177	class_class = string.join(idxsi[:-1])
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1178	elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
				1179	command = 'defcv'
				1180	cat_class = 'data'
				1181	class_class = string.join(idxsi[1:])
				1182	elif len(idxsi) == 4 and idxsi[:3] == ['data', 'in', 'module']:
				1183	command = 'defcv'
				1184	cat_class = 'data'
				1185	class_class = string.join(idxsi[2:])
				1186
				1187
				1188	if not command:
				1189	raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
				1190
				1191	ch.chtype = chunk_type(CSLINE)
				1192	ch.data = command
				1193
				1194	cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
				1195	cslinearg.append(chunk(PLAIN, wh, ' '))
				1196	if class_class:
				1197	cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
				1198	cslinearg.append(chunk(PLAIN, wh, ' '))
				1199	cslinearg.append(dataname)
				1200
				1201	pp.insert(i, chunk(GROUP, wh, cslinearg))
				1202	i, length = i+1, length+1
				1203	hist.command = command
				1204	return length, i
				1205
				1206
				1207	# regular indices: those that are not set in tt font by default....
				1208	regindices = ('cindex', )
				1209
				1210	# remove illegal characters from node names
				1211	def rm_commas_etc(text):
				1212	result = ''
				1213	changed = 0
				1214	while 1:
				1215	pos = re_commas_etc.search(text)
				1216	if pos >= 0:
				1217	changed = 1
				1218	result = result + text[:pos]
				1219	text = text[pos+1:]
				1220	else:
				1221	result = result + text
				1222	break
				1223	if changed:
				1224	print 'Warning: nodename changhed to ' + `result`
				1225
				1226	return result
				1227
				1228	# boolean flags
				1229	flags = {'texi': 1}
				1230
				1231
				1232	##
				1233	## changeit: the actual routine, that changes the contents of the parsed
				1234	## chunks
				1235	##
				1236
				1237	def changeit(buf, pp):
				1238	global onlylatexspecial, hist, out
				1239
				1240	i, length = 0, len(pp)
				1241	while 1:
				1242	# sanity check: length should always equal len(pp)
				1243	if len(pp) != length:
				1244	raise 'FATAL', 'inconsistent length. thought ' + `length` + ', but should really be ' + `len(pp)`
				1245	if i >= length:
				1246	break
				1247	ch = pp[i]
				1248	i = i + 1
				1249
				1250	if type(ch) == type(''):
				1251	#normally, only chunks are present in pp,
				1252	# but in some cases, some extra info
				1253	# has been inserted, e.g., the \end{...} clauses
				1254	raise 'FATAL', 'got string, probably too many ' + `end`
				1255
				1256	if ch.chtype == chunk_type(GROUP):
				1257	# check for {\em ...} constructs
				1258	if ch.data and \
				1259	ch.data[0].chtype == chunk_type(CSNAME) and \
				1260	s(buf, ch.data[0].data) in fontchanges.keys():
				1261	k = s(buf, ch.data[0].data)
				1262	del ch.data[0]
				1263	pp.insert(i-1, chunk(CSNAME, ch.where, fontchanges[k]))
				1264	length, i = length+1, i+1
				1265
				1266	# recursively parse the contents of the group
				1267	changeit(buf, ch.data)
				1268
				1269	elif ch.chtype == chunk_type(IF):
				1270	# \if...
				1271	flag, negate, data = ch.data
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	1272	##print 'IF: flag, negate = ' + `flag, negate`
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1273	if flag not in flags.keys():
				1274	raise error, 'unknown flag ' + `flag`
				1275
				1276	value = flags[flag]
				1277	if negate:
				1278	value = (not value)
				1279	del pp[i-1]
				1280	length, i = length-1, i-1
				1281	if value:
				1282	pp[i:i] = data
				1283	length = length + len(data)
				1284
				1285
				1286	elif ch.chtype == chunk_type(ENV):
				1287	# \begin{...} ....
				1288	envname, data = ch.data
				1289
				1290	#push this environment name on stack
				1291	hist.inenv.insert(0, envname)
				1292
				1293	#append an endenv chunk after grouped data
				1294	data.append(chunk(ENDENV, ch.where, envname))
				1295	##[`data`]
				1296
				1297	#delete this object
				1298	del pp[i-1]
				1299	i, length = i-1, length-1
				1300
				1301	#insert found data
				1302	pp[i:i] = data
				1303	length = length + len(data)
				1304
				1305	if envname == 'verbatim':
				1306	pp[i:i] = [chunk(CSLINE, ch.where, 'example'), \
				1307	chunk(GROUP, ch.where, [])]
				1308	length, i = length+2, i+2
				1309
				1310	elif envname == 'itemize':
				1311	if hist.itemizenesting > len(itemizesymbols):
				1312	raise error, 'too deep itemize nesting'
				1313	ingroupch = [chunk(CSNAME, ch.where,\
				1314	itemizesymbols[hist.itemizenesting])]
				1315	hist.itemizenesting = hist.itemizenesting + 1
				1316	pp[i:i] = [chunk(CSLINE, ch.where, 'itemize'),\
				1317	chunk(GROUP, ch.where, ingroupch)]
				1318	length, i = length+2, i+2
				1319
				1320	elif envname == 'enumerate':
				1321	if hist.enumeratenesting > len(enumeratesymbols):
				1322	raise error, 'too deep enumerate nesting'
				1323	ingroupch = [chunk(PLAIN, ch.where,\
				1324	enumeratesymbols[hist.enumeratenesting])]
				1325	hist.enumeratenesting = hist.enumeratenesting + 1
				1326	pp[i:i] = [chunk(CSLINE, ch.where, 'enumerate'),\
				1327	chunk(GROUP, ch.where, ingroupch)]
				1328	length, i = length+2, i+2
				1329
				1330	elif envname == 'description':
				1331	ingroupch = [chunk(CSNAME, ch.where, 'b')]
				1332	pp[i:i] = [chunk(CSLINE, ch.where, 'table'), \
				1333	chunk(GROUP, ch.where, ingroupch)]
				1334	length, i = length+2, i+2
				1335
Guido van Rossum	b819bdf	1995-03-15 11:26:26 +0000	[diff] [blame]	1336	elif (envname == 'tableiii') or \
				1337	(envname == 'tableii'):
				1338	if (envname == 'tableii'):
				1339	ltable = 2
				1340	else:
				1341	ltable = 3
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1342	wh = ch.where
				1343	newcode = []
				1344
				1345	#delete tabular format description
				1346	# e.g., {\|l\|c\|l\|}
				1347	length, newi = getnextarg(length, buf, pp, i)
				1348	del pp[i:newi]
				1349	length = length - (newi-i)
				1350
				1351	newcode.append(chunk(CSLINE, wh, 'table'))
				1352	ingroupch = [chunk(CSNAME, wh, 'asis')]
				1353	newcode.append(chunk(GROUP, wh, ingroupch))
				1354	newcode.append(chunk(CSLINE, wh, 'item'))
				1355
				1356	#get the name of macro for @item
				1357	# e.g., {code}
				1358	length, newi = getnextarg(length, buf, pp, i)
				1359
				1360	if newi-i != 1:
				1361	raise error, 'Sorry, expected 1 chunk argument'
				1362	if pp[i].chtype != chunk_type(PLAIN):
				1363	raise error, 'Sorry, expected plain text argument'
				1364	hist.itemargmacro = s(buf, pp[i].data)
				1365	del pp[i:newi]
				1366	length = length - (newi-i)
				1367
Guido van Rossum	b819bdf	1995-03-15 11:26:26 +0000	[diff] [blame]	1368	itembody = []
				1369	for count in range(ltable):
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1370	length, newi = getnextarg(length, buf, pp, i)
				1371	emphgroup = [\
				1372	chunk(CSNAME, wh, 'emph'), \
				1373	chunk(GROUP, 0, pp[i:newi])]
				1374	del pp[i:newi]
				1375	length = length - (newi-i)
				1376	if count == 0:
				1377	itemarg = emphgroup
Guido van Rossum	b819bdf	1995-03-15 11:26:26 +0000	[diff] [blame]	1378	elif count == ltable-1:
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1379	itembody = itembody + \
				1380	[chunk(PLAIN, wh, ' --- ')] + \
				1381	emphgroup
				1382	else:
				1383	itembody = emphgroup
				1384	newcode.append(chunk(GROUP, wh, itemarg))
				1385	newcode = newcode + itembody + [chunk(DENDLINE, wh, '\n')]
				1386	pp[i:i] = newcode
				1387	l = len(newcode)
				1388	length, i = length+l, i+l
				1389	del newcode, l
				1390
				1391	if length != len(pp):
				1392	raise 'STILL, SOMETHING wrong', `i`
				1393
				1394
				1395	elif envname == 'funcdesc':
				1396	pp.insert(i, chunk(PLAIN, ch.where, ''))
				1397	i, length = i+1, length+1
				1398	length, i = do_funcdesc(length, buf, pp, i)
				1399
				1400	elif envname == 'excdesc':
				1401	pp.insert(i, chunk(PLAIN, ch.where, ''))
				1402	i, length = i+1, length+1
				1403	length, i = do_excdesc(length, buf, pp, i)
				1404
				1405	elif envname == 'datadesc':
				1406	pp.insert(i, chunk(PLAIN, ch.where, ''))
				1407	i, length = i+1, length+1
				1408	length, i = do_datadesc(length, buf, pp, i)
				1409
				1410	else:
				1411	print 'WARNING: don\'t know what to do with env ' + `envname`
				1412
				1413	elif ch.chtype == chunk_type(ENDENV):
				1414	envname = ch.data
				1415	if envname != hist.inenv[0]:
				1416	raise error, '\'end\' does not match. Name ' + `envname` + ', expected ' + `hist.inenv[0]`
				1417	del hist.inenv[0]
				1418	del pp[i-1]
				1419	i, length = i-1, length-1
				1420
				1421	if envname == 'verbatim':
				1422	pp[i:i] = [\
				1423	chunk(CSLINE, ch.where, 'end'), \
				1424	chunk(GROUP, ch.where, [\
				1425	chunk(PLAIN, ch.where, 'example')])]
				1426	i, length = i+2, length+2
				1427	elif envname == 'itemize':
				1428	hist.itemizenesting = hist.itemizenesting - 1
				1429	pp[i:i] = [\
				1430	chunk(CSLINE, ch.where, 'end'), \
				1431	chunk(GROUP, ch.where, [\
				1432	chunk(PLAIN, ch.where, 'itemize')])]
				1433	i, length = i+2, length+2
				1434	elif envname == 'enumerate':
				1435	hist.enumeratenesting = hist.enumeratenesting-1
				1436	pp[i:i] = [\
				1437	chunk(CSLINE, ch.where, 'end'), \
				1438	chunk(GROUP, ch.where, [\
				1439	chunk(PLAIN, ch.where, 'enumerate')])]
				1440	i, length = i+2, length+2
				1441	elif envname == 'description':
				1442	pp[i:i] = [\
				1443	chunk(CSLINE, ch.where, 'end'), \
				1444	chunk(GROUP, ch.where, [\
				1445	chunk(PLAIN, ch.where, 'table')])]
				1446	i, length = i+2, length+2
Guido van Rossum	b819bdf	1995-03-15 11:26:26 +0000	[diff] [blame]	1447	elif (envname == 'tableiii') or (envname == 'tableii'):
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1448	pp[i:i] = [\
				1449	chunk(CSLINE, ch.where, 'end'), \
				1450	chunk(GROUP, ch.where, [\
				1451	chunk(PLAIN, ch.where, 'table')])]
				1452	i, length = i+2, length + 2
				1453	pp.insert(i, chunk(DENDLINE, ch.where, '\n'))
				1454	i, length = i+1, length+1
				1455
				1456	elif envname in ('funcdesc', 'excdesc', 'datadesc'):
				1457	pp[i:i] = [\
				1458	chunk(CSLINE, ch.where, 'end'), \
				1459	chunk(GROUP, ch.where, [\
				1460	chunk(PLAIN, ch.where, hist.command)])]
				1461	i, length = i+2, length+2
				1462	else:
				1463	print 'WARNING: ending env ' + `envname` + 'has no actions'
				1464
				1465	elif ch.chtype == chunk_type(CSNAME):
				1466	# control name transformations
Guido van Rossum	b819bdf	1995-03-15 11:26:26 +0000	[diff] [blame]	1467	if s(buf, ch.data) == 'optional':
				1468	pp[i-1].chtype = chunk_type (PLAIN)
				1469	pp[i-1].data = '['
				1470	if (i < length) and \
				1471	(pp[i].chtype == chunk_type(GROUP)):
				1472	cp=pp[i].data
				1473	pp[i:i+1]=cp + [\
				1474	chunk(PLAIN, ch.where, ']')]
				1475	length = length+len(cp)
				1476	elif s(buf, ch.data) in ignoredcommands:
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1477	del pp[i-1]
				1478	i, length = i-1, length-1
				1479	elif s(buf, ch.data) == '@' and \
				1480	i != length and \
				1481	pp[i].chtype == chunk_type(PLAIN) and \
				1482	s(buf, pp[i].data)[0] == '.':
				1483	# \@. --> \. --> @.
				1484	ch.data = '.'
				1485	del pp[i]
				1486	length = length-1
				1487	elif s(buf, ch.data) == '\\':
				1488	# \\ --> \* --> @*
				1489	ch.data = '*'
				1490	elif len(s(buf, ch.data)) == 1 and \
				1491	s(buf, ch.data) in onlylatexspecial:
				1492	ch.chtype = chunk_type(PLAIN)
				1493	# check if such a command is followed by
				1494	# an empty group: e.g., `\%{}'. If so, remove
				1495	# this empty group too
				1496	if i < length and \
				1497	pp[i].chtype == chunk_type(GROUP) \
				1498	and len(pp[i].data) == 0:
				1499	del pp[i]
				1500	length = length-1
				1501
				1502	elif hist.inargs and s(buf, ch.data) in inargsselves:
				1503	# This is the special processing of the
				1504	# arguments of the \begin{funcdesc}... or
				1505	# \funcline... arguments
				1506	# \, --> , \[ --> [, \] --> ]
				1507	ch.chtype = chunk_type(PLAIN)
				1508
				1509	elif s(buf, ch.data) == 'renewcommand':
				1510	# \renewcommand{\indexsubitem}....
				1511	i, length = i-1, length-1
				1512	del pp[i]
				1513	length, newi = getnextarg(length, buf, pp, i)
				1514	if newi-i == 1 \
				1515	and i < length \
				1516	and pp[i].chtype == chunk_type(CSNAME) \
				1517	and s(buf, pp[i].data) == 'indexsubitem':
				1518	del pp[i:newi]
				1519	length = length - (newi-i)
				1520	length, newi = getnextarg(length, buf, pp, i)
				1521	text = flattext(buf, pp[i:newi])
Guido van Rossum	b819bdf	1995-03-15 11:26:26 +0000	[diff] [blame]	1522	if text[:1] != '(' or text[-1:] != ')':
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1523	raise error, 'expected indexsubitme enclosed in braces'
				1524	words = string.split(text[1:-1])
				1525	hist.indexsubitem = words
				1526	del text, words
				1527	else:
				1528	print 'WARNING: renewcommand with unsupported arg removed'
				1529	del pp[i:newi]
				1530	length = length - (newi-i)
				1531
				1532	elif s(buf, ch.data) == 'item':
				1533	ch.chtype = chunk_type(CSLINE)
				1534	length, newi = getoptarg(length, buf, pp, i)
				1535	ingroupch = pp[i:newi]
				1536	del pp[i:newi]
				1537	length = length - (newi-i)
				1538	pp.insert(i, chunk(GROUP, ch.where, ingroupch))
				1539	i, length = i+1, length+1
				1540
				1541	elif s(buf, ch.data) == 'ttindex':
				1542	idxsi = hist.indexsubitem
				1543
				1544	cat_class = ''
				1545	if len(idxsi) >= 2 and idxsi[1] in \
Guido van Rossum	b819bdf	1995-03-15 11:26:26 +0000	[diff] [blame]	1546	('method', 'function', 'protocol'):
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1547	command = 'findex'
				1548	elif len(idxsi) >= 2 and idxsi[1] in \
				1549	('exception', 'object'):
				1550	command = 'vindex'
				1551	else:
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	1552	print 'WARNING: can\'t categorize ' + `idxsi` + ' for \'ttindex\' command'
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1553	command = 'cindex'
				1554
				1555	if not cat_class:
				1556	cat_class = '('+string.join(idxsi)+')'
				1557
				1558	ch.chtype = chunk_type(CSLINE)
				1559	ch.data = command
				1560
				1561	length, newi = getnextarg(length, buf, pp, i)
				1562	arg = pp[i:newi]
				1563	del pp[i:newi]
				1564	length = length - (newi-i)
				1565
				1566	cat_arg = [chunk(PLAIN, ch.where, cat_class)]
				1567
				1568	# determine what should be set in roman, and
				1569	# what in tt-font
				1570	if command in regindices:
				1571
				1572	arg = [chunk(CSNAME, ch.where, 't'), \
				1573	chunk(GROUP, ch.where, arg)]
				1574	else:
				1575	cat_arg = [chunk(CSNAME, ch.where, 'r'), \
				1576	chunk(GROUP, ch.where, cat_arg)]
				1577
				1578	ingroupch = arg + \
				1579	[chunk(PLAIN, ch.where, ' ')] + \
				1580	cat_arg
				1581
				1582	pp.insert(i, chunk(GROUP, ch.where, ingroupch))
				1583	length, i = length+1, i+1
				1584
				1585
				1586	elif s(buf, ch.data) == 'ldots':
				1587	# \ldots --> \dots{} --> @dots{}
				1588	ch.data = 'dots'
				1589	if i == length \
				1590	or pp[i].chtype != chunk_type(GROUP) \
				1591	or pp[i].data != []:
				1592	pp.insert(i, chunk(GROUP, ch.where, []))
				1593	i, length = i+1, length+1
				1594	elif s(buf, ch.data) in wordsselves:
				1595	# \UNIX --> UNIX
				1596	ch.chtype = chunk_type(PLAIN)
				1597	if i != length \
				1598	and pp[i].chtype == chunk_type(GROUP) \
				1599	and pp[i].data == []:
				1600	del pp[i]
				1601	length = length-1
				1602	elif s(buf, ch.data) in for_texi:
				1603	pass
				1604
				1605	elif s(buf, ch.data) == 'e':
				1606	# \e --> \
				1607	ch.data = '\\'
				1608	ch.chtype = chunk_type(PLAIN)
Guido van Rossum	b819bdf	1995-03-15 11:26:26 +0000	[diff] [blame]	1609	elif (s(buf, ch.data) == 'lineiii') or\
				1610	(s(buf, ch.data) == 'lineii'):
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1611	# This is the most tricky one
				1612	# \lineiii{a1}{a2}[{a3}] -->
				1613	# @item @<cts. of itemargmacro>{a1}
				1614	# a2 [ -- a3]
				1615	#
				1616	##print 'LINEIIIIII!!!!!!!'
				1617	## wobj = Wobj().init()
				1618	## dumpit(buf, wobj.write, pp[i-1:i+5])
				1619	## print '--->' + wobj.data + '<----'
				1620	if not hist.inenv:
				1621	raise error, \
				1622	'no environment for lineiii'
Guido van Rossum	b819bdf	1995-03-15 11:26:26 +0000	[diff] [blame]	1623	if (hist.inenv[0] != 'tableiii') and\
				1624	(hist.inenv[0] != 'tableii'):
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1625	raise error, \
				1626	'wrong command (' + \
Guido van Rossum	b819bdf	1995-03-15 11:26:26 +0000	[diff] [blame]	1627	s(buf, ch.data)+ \
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1628	') in wrong environment (' \
				1629	+ `hist.inenv[0]` + ')'
				1630	ch.chtype = chunk_type(CSLINE)
				1631	ch.data = 'item'
				1632	length, newi = getnextarg(length, buf, pp, i)
				1633	ingroupch = [chunk(CSNAME, 0, \
				1634	hist.itemargmacro), \
				1635	chunk(GROUP, 0, pp[i:newi])]
				1636	del pp[i:newi]
				1637	length = length - (newi-i)
				1638	## print 'ITEM ARG: --->',
				1639	## wobj = Wobj().init()
				1640	## dumpit(buf, wobj.write, ingroupch)
				1641	## print wobj.data, '<---'
				1642	pp.insert(i, chunk(GROUP, ch.where, ingroupch))
				1643	grouppos = i
				1644	i, length = i+1, length+1
				1645	length, i = getnextarg(length, buf, pp, i)
				1646	length, newi = getnextarg(length, buf, pp, i)
				1647	if newi > i:
				1648	# we have a 3rd arg
				1649	pp.insert(i, chunk(PLAIN, ch.where, ' --- '))
				1650	i = newi + 1
				1651	length = length + 1
				1652	## pp[grouppos].data = pp[grouppos].data \
				1653	## + [chunk(PLAIN, ch.where, ' ')] \
				1654	## + pp[i:newi]
				1655	## del pp[i:newi]
				1656	## length = length - (newi-i)
				1657	if length != len(pp):
				1658	raise 'IN LINEIII IS THE ERR', `i`
				1659
				1660	elif s(buf, ch.data) in ('chapter', 'section', 'subsection', 'subsubsection'):
				1661	#\xxxsection{A} ---->
				1662	# @node A, , ,
				1663	# @xxxsection A
				1664	## also: remove commas and quotes
				1665	ch.chtype = chunk_type(CSLINE)
				1666	length, newi = getnextarg(length, buf, pp, i)
				1667	afternodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
				1668	if afternodenamecmd < 0:
				1669	cp1 = crcopy(pp[i:newi])
				1670	pp[i:newi] = [\
				1671	chunk(GROUP, ch.where, \
				1672	pp[i:newi])]
				1673	length, newi = length - (newi-i) + 1, \
				1674	i+1
				1675	text = flattext(buf, cp1)
				1676	text = invent_node_names(text)
				1677	else:
				1678	length, endarg = getnextarg(length, buf, pp, afternodenamecmd)
				1679	cp1 = crcopy(pp[afternodenamecmd:endarg])
				1680	del pp[newi:endarg]
				1681	length = length - (endarg-newi)
				1682
				1683	pp[i:newi] = [\
				1684	chunk(GROUP, ch.where, \
				1685	pp[i:newi])]
				1686	length, newi = length - (newi-i) + 1, \
				1687	i + 1
				1688	text = flattext(buf, cp1)
				1689	if text[-1] == '.':
				1690	text = text[:-1]
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	1691	## print 'FLATTEXT:', `text`
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1692	if text in hist.nodenames:
				1693	print 'WARNING: node name ' + `text` + ' already used'
				1694	out.doublenodes.append(text)
				1695	else:
				1696	hist.nodenames.append(text)
				1697	text = rm_commas_etc(text)
				1698	pp[i-1:i-1] = [\
				1699	chunk(CSLINE, ch.where, 'node'), \
				1700	chunk(GROUP, ch.where, [\
				1701	chunk(PLAIN, ch.where, text+', , ,')\
				1702	])]
				1703	i, length = newi+2, length+2
				1704
				1705	elif s(buf,ch.data) == 'funcline':
				1706	# fold it to a very short environment
				1707	pp[i-1:i-1] = [\
				1708	chunk(CSLINE, ch.where, 'end'), \
				1709	chunk(GROUP, ch.where, [\
				1710	chunk(PLAIN, ch.where, hist.command)])]
				1711	i, length = i+2, length+2
				1712	length, i = do_funcdesc(length, buf, pp, i)
				1713
				1714	elif s(buf,ch.data) == 'dataline':
				1715	pp[i-1:i-1] = [\
				1716	chunk(CSLINE, ch.where, 'end'), \
				1717	chunk(GROUP, ch.where, [\
				1718	chunk(PLAIN, ch.where, hist.command)])]
				1719	i, length = i+2, length+2
				1720	length, i = do_datadesc(length, buf, pp, i)
				1721
				1722	elif s(buf,ch.data) == 'excline':
				1723	pp[i-1:i-1] = [\
				1724	chunk(CSLINE, ch.where, 'end'), \
				1725	chunk(GROUP, ch.where, [\
				1726	chunk(PLAIN, ch.where, hist.command)])]
				1727	i, length = i+2, length+2
				1728	length, i = do_excdesc(length, buf, pp, i)
				1729
				1730
				1731	elif s(buf, ch.data) == 'index':
				1732	#\index{A} --->
				1733	# @cindex A
				1734	ch.chtype = chunk_type(CSLINE)
				1735	ch.data = 'cindex'
				1736	length, newi = getnextarg(length, buf, pp, i)
				1737
				1738	ingroupch = pp[i:newi]
				1739	del pp[i:newi]
				1740	length = length - (newi-i)
				1741	pp.insert(i, chunk(GROUP, ch.where, ingroupch))
				1742	length, i = length+1, i+1
				1743
				1744	elif s(buf, ch.data) == 'bifuncindex':
				1745	ch.chtype = chunk_type(CSLINE)
				1746	ch.data = 'findex'
				1747	length, newi = getnextarg(length, buf, pp, i)
				1748	ingroupch = pp[i:newi]
				1749	del pp[i:newi]
				1750	length = length - (newi-i)
				1751
				1752	ingroupch.append(chunk(PLAIN, ch.where, ' '))
				1753	ingroupch.append(chunk(CSNAME, ch.where, 'r'))
				1754	ingroupch.append(chunk(GROUP, ch.where, [\
				1755	chunk(PLAIN, ch.where, \
				1756	'(built-in function)')]))
				1757
				1758	pp.insert(i, chunk(GROUP, ch.where, ingroupch))
				1759	length, i = length+1, i+1
				1760
				1761
Guido van Rossum	7760cde	1995-03-17 16:03:11 +0000	[diff] [blame]	1762	elif s(buf, ch.data) == 'obindex':
				1763	ch.chtype = chunk_type(CSLINE)
				1764	ch.data = 'findex'
				1765	length, newi = getnextarg(length, buf, pp, i)
				1766	ingroupch = pp[i:newi]
				1767	del pp[i:newi]
				1768	length = length - (newi-i)
				1769
				1770	ingroupch.append(chunk(PLAIN, ch.where, ' '))
				1771	ingroupch.append(chunk(CSNAME, ch.where, 'r'))
				1772	ingroupch.append(chunk(GROUP, ch.where, [\
				1773	chunk(PLAIN, ch.where, \
				1774	'(object)')]))
				1775
				1776	pp.insert(i, chunk(GROUP, ch.where, ingroupch))
				1777	length, i = length+1, i+1
				1778
				1779
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	1780	elif s(buf, ch.data) == 'opindex':
				1781	ch.chtype = chunk_type(CSLINE)
				1782	ch.data = 'findex'
				1783	length, newi = getnextarg(length, buf, pp, i)
				1784	ingroupch = pp[i:newi]
				1785	del pp[i:newi]
				1786	length = length - (newi-i)
				1787
				1788	ingroupch.append(chunk(PLAIN, ch.where, ' '))
				1789	ingroupch.append(chunk(CSNAME, ch.where, 'r'))
				1790	ingroupch.append(chunk(GROUP, ch.where, [\
				1791	chunk(PLAIN, ch.where, \
				1792	'(operator)')]))
				1793
				1794	pp.insert(i, chunk(GROUP, ch.where, ingroupch))
				1795	length, i = length+1, i+1
				1796
				1797
				1798	elif s(buf, ch.data) == 'bimodindex':
				1799	ch.chtype = chunk_type(CSLINE)
				1800	ch.data = 'pindex'
				1801	length, newi = getnextarg(length, buf, pp, i)
				1802	ingroupch = pp[i:newi]
				1803	del pp[i:newi]
				1804	length = length - (newi-i)
				1805
				1806	ingroupch.append(chunk(PLAIN, ch.where, ' '))
				1807	ingroupch.append(chunk(CSNAME, ch.where, 'r'))
				1808	ingroupch.append(chunk(GROUP, ch.where, [\
				1809	chunk(PLAIN, ch.where, \
				1810	'(built-in)')]))
				1811
				1812	pp.insert(i, chunk(GROUP, ch.where, ingroupch))
				1813	length, i = length+1, i+1
				1814
				1815	elif s(buf, ch.data) == 'sectcode':
				1816	ch.data = 'code'
				1817
				1818
				1819	elif s(buf, ch.data) == 'stmodindex':
				1820	ch.chtype = chunk_type(CSLINE)
				1821	# use the program index as module index
				1822	ch.data = 'pindex'
				1823	length, newi = getnextarg(length, buf, pp, i)
				1824	ingroupch = pp[i:newi]
				1825	del pp[i:newi]
				1826	length = length - (newi-i)
				1827
				1828	ingroupch.append(chunk(PLAIN, ch.where, ' '))
				1829	ingroupch.append(chunk(CSNAME, ch.where, 'r'))
				1830	ingroupch.append(chunk(GROUP, ch.where, [\
				1831	chunk(PLAIN, ch.where, \
				1832	'(standard)')]))
				1833
				1834	pp.insert(i, chunk(GROUP, ch.where, ingroupch))
				1835	length, i = length+1, i+1
				1836
				1837
				1838	elif s(buf, ch.data) == 'stindex':
				1839	# XXX must actually go to newindex st
				1840	wh = ch.where
				1841	ch.chtype = chunk_type(CSLINE)
				1842	ch.data = 'cindex'
				1843	length, newi = getnextarg(length, buf, pp, i)
				1844	ingroupch = [chunk(CSNAME, wh, 'code'), \
				1845	chunk(GROUP, wh, pp[i:newi])]
				1846
				1847	del pp[i:newi]
				1848	length = length - (newi-i)
				1849
				1850	t = ingroupch[:]
				1851	t.append(chunk(PLAIN, wh, ' statement'))
				1852
				1853	pp.insert(i, chunk(GROUP, wh, t))
				1854	i, length = i+1, length+1
				1855
				1856	pp.insert(i, chunk(CSLINE, wh, 'cindex'))
				1857	i, length = i+1, length+1
				1858
				1859	t = ingroupch[:]
				1860	t.insert(0, chunk(PLAIN, wh, 'statement, '))
				1861
				1862	pp.insert(i, chunk(GROUP, wh, t))
				1863	i, length = i+1, length+1
				1864
				1865
				1866	elif s(buf, ch.data) == 'indexii':
				1867	#\indexii{A}{B} --->
				1868	# @cindex A B
				1869	# @cindex B, A
				1870	length, newi = getnextarg(length, buf, pp, i)
				1871	cp11 = pp[i:newi]
				1872	cp21 = crcopy(pp[i:newi])
				1873	del pp[i:newi]
				1874	length = length - (newi-i)
				1875	length, newi = getnextarg(length, buf, pp, i)
				1876	cp12 = pp[i:newi]
				1877	cp22 = crcopy(pp[i:newi])
				1878	del pp[i:newi]
				1879	length = length - (newi-i)
				1880
				1881	ch.chtype = chunk_type(CSLINE)
				1882	ch.data = 'cindex'
				1883	pp.insert(i, chunk(GROUP, ch.where, cp11 + [\
				1884	chunk(PLAIN, ch.where, ' ')] + cp12))
				1885	i, length = i+1, length+1
				1886	pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
				1887	chunk(GROUP, ch.where, cp22 + [\
				1888	chunk(PLAIN, ch.where, ', ')]+ cp21)]
				1889	i, length = i+2, length+2
				1890
				1891	elif s(buf, ch.data) == 'indexiii':
				1892	length, newi = getnextarg(length, buf, pp, i)
				1893	cp11 = pp[i:newi]
				1894	cp21 = crcopy(pp[i:newi])
				1895	cp31 = crcopy(pp[i:newi])
				1896	del pp[i:newi]
				1897	length = length - (newi-i)
				1898	length, newi = getnextarg(length, buf, pp, i)
				1899	cp12 = pp[i:newi]
				1900	cp22 = crcopy(pp[i:newi])
				1901	cp32 = crcopy(pp[i:newi])
				1902	del pp[i:newi]
				1903	length = length - (newi-i)
				1904	length, newi = getnextarg(length, buf, pp, i)
				1905	cp13 = pp[i:newi]
				1906	cp23 = crcopy(pp[i:newi])
				1907	cp33 = crcopy(pp[i:newi])
				1908	del pp[i:newi]
				1909	length = length - (newi-i)
				1910
				1911	ch.chtype = chunk_type(CSLINE)
				1912	ch.data = 'cindex'
				1913	pp.insert(i, chunk(GROUP, ch.where, cp11 + [\
				1914	chunk(PLAIN, ch.where, ' ')] + cp12 \
				1915	+ [chunk(PLAIN, ch.where, ' ')] \
				1916	+ cp13))
				1917	i, length = i+1, length+1
				1918	pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
				1919	chunk(GROUP, ch.where, cp22 + [\
				1920	chunk(PLAIN, ch.where, ' ')]+ cp23\
				1921	+ [chunk(PLAIN, ch.where, ', ')] +\
				1922	cp21)]
				1923	i, length = i+2, length+2
				1924	pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
				1925	chunk(GROUP, ch.where, cp33 + [\
				1926	chunk(PLAIN, ch.where, ', ')]+ cp31\
				1927	+ [chunk(PLAIN, ch.where, ' ')] +\
				1928	cp32)]
				1929	i, length = i+2, length+2
				1930
				1931
				1932	elif s(buf, ch.data) == 'indexiv':
				1933	length, newi = getnextarg(length, buf, pp, i)
				1934	cp11 = pp[i:newi]
				1935	cp21 = crcopy(pp[i:newi])
				1936	cp31 = crcopy(pp[i:newi])
				1937	cp41 = crcopy(pp[i:newi])
				1938	del pp[i:newi]
				1939	length = length - (newi-i)
				1940	length, newi = getnextarg(length, buf, pp, i)
				1941	cp12 = pp[i:newi]
				1942	cp22 = crcopy(pp[i:newi])
				1943	cp32 = crcopy(pp[i:newi])
				1944	cp42 = crcopy(pp[i:newi])
				1945	del pp[i:newi]
				1946	length = length - (newi-i)
				1947	length, newi = getnextarg(length, buf, pp, i)
				1948	cp13 = pp[i:newi]
				1949	cp23 = crcopy(pp[i:newi])
				1950	cp33 = crcopy(pp[i:newi])
				1951	cp43 = crcopy(pp[i:newi])
				1952	del pp[i:newi]
				1953	length = length - (newi-i)
				1954	length, newi = getnextarg(length, buf, pp, i)
				1955	cp14 = pp[i:newi]
				1956	cp24 = crcopy(pp[i:newi])
				1957	cp34 = crcopy(pp[i:newi])
				1958	cp44 = crcopy(pp[i:newi])
				1959	del pp[i:newi]
				1960	length = length - (newi-i)
				1961
				1962	ch.chtype = chunk_type(CSLINE)
				1963	ch.data = 'cindex'
				1964	ingroupch = cp11 + \
				1965	spacech + cp12 + \
				1966	spacech + cp13 + \
				1967	spacech + cp14
				1968	pp.insert(i, chunk(GROUP, ch.where, ingroupch))
				1969	i, length = i+1, length+1
				1970	ingroupch = cp22 + \
				1971	spacech + cp23 + \
				1972	spacech + cp24 + \
				1973	commach + cp21
				1974	pp[i:i] = cindexch + [\
				1975	chunk(GROUP, ch.where, ingroupch)]
				1976	i, length = i+2, length+2
				1977	ingroupch = cp33 + \
				1978	spacech + cp34 + \
				1979	commach + cp31 + \
				1980	spacech + cp32
				1981	pp[i:i] = cindexch + [\
				1982	chunk(GROUP, ch.where, ingroupch)]
				1983	i, length = i+2, length+2
				1984	ingroupch = cp44 + \
				1985	commach + cp41 + \
				1986	spacech + cp42 + \
				1987	spacech + cp43
				1988	pp[i:i] = cindexch + [\
				1989	chunk(GROUP, ch.where, ingroupch)]
				1990	i, length = i+2, length+2
				1991
				1992
				1993
				1994	else:
				1995	print 'don\'t know what to do with keyword ' + `s(buf, ch.data)`
				1996
				1997
				1998
				1999	re_atsign = regex.compile('[@{}]')
				2000	re_newline = regex.compile('\n')
				2001
				2002	def dumpit(buf, wm, pp):
				2003
				2004	global out
				2005
				2006	i, length = 0, len(pp)
				2007
				2008	addspace = 0
				2009
				2010	while 1:
				2011	if len(pp) != length:
				2012	raise 'FATAL', 'inconsistent length'
				2013	if i == length:
				2014	break
				2015	ch = pp[i]
				2016	i = i + 1
				2017
				2018	if addspace:
				2019	dospace = 1
				2020	addspace = 0
				2021	else:
				2022	dospace = 0
				2023
				2024	if ch.chtype == chunk_type(CSNAME):
				2025	wm('@' + s(buf, ch.data))
				2026	if s(buf, ch.data) == 'node' and \
				2027	pp[i].chtype == chunk_type(PLAIN) and \
				2028	s(buf, pp[i].data) in out.doublenodes:
				2029	##XXX doesnt work yet??
				2030	wm(' ZZZ-' + zfill(`i`, 4))
				2031	if s(buf, ch.data)[0] in string.letters:
				2032	addspace = 1
				2033	elif ch.chtype == chunk_type(PLAIN):
				2034	if dospace and s(buf, ch.data) not in (' ', '\t'):
				2035	wm(' ')
				2036	text = s(buf, ch.data)
				2037	while 1:
				2038	pos = re_atsign.search(text)
				2039	if pos < 0:
				2040	break
				2041	wm(text[:pos] + '@' + text[pos])
				2042	text = text[pos+1:]
				2043	wm(text)
				2044	elif ch.chtype == chunk_type(GROUP):
				2045	wm('{')
				2046	dumpit(buf, wm, ch.data)
				2047	wm('}')
				2048	elif ch.chtype == chunk_type(DENDLINE):
				2049	wm('\n\n')
				2050	while i != length and pp[i].chtype in \
				2051	(chunk_type(DENDLINE), chunk_type(ENDLINE)):
				2052	i = i + 1
				2053	elif ch.chtype == chunk_type(OTHER):
				2054	wm(s(buf, ch.data))
				2055	elif ch.chtype == chunk_type(ACTIVE):
				2056	wm(s(buf, ch.data))
				2057	elif ch.chtype == chunk_type(ENDLINE):
				2058	wm('\n')
				2059	elif ch.chtype == chunk_type(CSLINE):
				2060	if i >= 2 and pp[i-2].chtype not in \
				2061	(chunk_type(ENDLINE), chunk_type(DENDLINE)) \
				2062	and (pp[i-2].chtype != chunk_type(PLAIN) \
				2063	or s(buf, pp[i-2].data)[-1] != '\n'):
				2064
				2065	wm('\n')
				2066	wm('@' + s(buf, ch.data))
				2067	if i == length:
				2068	raise error, 'CSLINE expected another chunk'
				2069	if pp[i].chtype != chunk_type(GROUP):
				2070	raise error, 'CSLINE expected GROUP'
				2071	if type(pp[i].data) != type([]):
				2072	raise error, 'GROUP chould contain []-data'
				2073
				2074	wobj = Wobj().init()
				2075	dumpit(buf, wobj.write, pp[i].data)
				2076	i = i + 1
				2077	text = wobj.data
				2078	del wobj
				2079	if text:
				2080	wm(' ')
				2081	while 1:
				2082	pos = re_newline.search(text)
				2083	if pos < 0:
				2084	break
				2085	print 'WARNING: found newline in csline arg'
				2086	wm(text[:pos] + ' ')
				2087	text = text[pos+1:]
				2088	wm(text)
				2089	if i >= length or \
				2090	pp[i].chtype not in (chunk_type(CSLINE), \
				2091	chunk_type(ENDLINE), chunk_type(DENDLINE)) \
				2092	and (pp[i].chtype != chunk_type(PLAIN) \
				2093	or s(buf, pp[i].data)[0] != '\n'):
				2094	wm('\n')
				2095
				2096	elif ch.chtype == chunk_type(COMMENT):
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	2097	## print 'COMMENT: previous chunk =', pp[i-2]
Guido van Rossum	557ed94	1995-03-28 13:33:45 +0000	[diff] [blame]	2098	## if pp[i-2].chtype == chunk_type(PLAIN):
				2099	## print 'PLAINTEXT =', `s(buf, pp[i-2].data)`
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	2100	if s(buf, ch.data) and \
				2101	regex.match('^[ \t]*$', s(buf, ch.data)) < 0:
				2102	if i >= 2 and pp[i-2].chtype not in \
				2103	(chunk_type(ENDLINE), chunk_type(DENDLINE)) \
				2104	and not (pp[i-2].chtype == chunk_type(PLAIN) \
				2105	and regex.match('\$.\\\|\n\$[ \t]\n$', s(buf, pp[i-2].data)) >= 0):
				2106	print 'ADDING NEWLINE'
				2107	wm('\n')
				2108	wm('@c ' + s(buf, ch.data))
				2109	elif ch.chtype == chunk_type(IGNORE):
				2110	pass
				2111	else:
				2112	try:
				2113	str = `s(buf, ch.data)`
				2114	except TypeError:
				2115	str = `ch.data`
				2116	if len(str) > 400:
				2117	str = str[:400] + '...'
				2118	print 'warning:', ch.chtype, 'not handled, data ' + str
				2119
				2120
				2121
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	2122	def main():
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	2123	outfile = None
				2124	headerfile = 'texipre.dat'
				2125	trailerfile = 'texipost.dat'
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	2126
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	2127	try:
				2128	opts, args = getopt.getopt(sys.argv[1:], 'o:h:t:')
				2129	except getopt.error:
				2130	args = []
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	2131
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	2132	if not args:
				2133	print 'usage: partparse [-o outfile] [-h headerfile]',
				2134	print '[-t trailerfile] file ...'
				2135	sys.exit(2)
				2136
				2137	for opt, arg in opts:
				2138	if opt == '-o': outfile = arg
				2139	if opt == '-h': headerfile = arg
				2140	if opt == '-t': trailerfile = arg
				2141
				2142	if not outfile:
				2143	root, ext = os.path.splitext(args[0])
				2144	outfile = root + '.texi'
				2145
				2146	if outfile in args:
				2147	print 'will not overwrite input file', outfile
				2148	sys.exit(2)
				2149
				2150	outf = open(outfile, 'w')
				2151	outf.write(open(headerfile, 'r').read())
				2152
				2153	for file in args:
				2154	if len(args) > 1: print '='20, file, '='20
				2155	buf = open(file, 'r').read()
				2156	w, pp = parseit(buf)
				2157	startchange()
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	2158	changeit(buf, pp)
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	2159	dumpit(buf, outf.write, pp)
				2160
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	2161	outf.write(open(trailerfile, 'r').read())
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	2162
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	2163	outf.close()
Guido van Rossum	95cd2ef	1992-12-08 14:37:55 +0000	[diff] [blame]	2164
Guido van Rossum	7a2dba2	1993-11-05 14:45:11 +0000	[diff] [blame]	2165	main()