blob: 2f072bbdf1e1ff9c68d8057d270cd74b33a3665f [file] [log] [blame]
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001#
2# partparse.py: parse a by-Guido-written-and-by-Jan-Hein-edited LaTeX file,
3# and generate texinfo source.
4#
5# This is *not* a good example of good programming practices. In fact, this
6# file could use a complete rewrite, in order to become faster, more
7# easy extensible and maintainable.
8#
9# However, I added some comments on a few places for the pityful person who
10# would ever need to take a look into this file.
11#
12# Have I been clear enough??
13#
14# -jh
15
16
Guido van Rossum7a2dba21993-11-05 14:45:11 +000017import sys, string, regex, getopt, os
Guido van Rossum95cd2ef1992-12-08 14:37:55 +000018
Guido van Rossum49604d31996-09-10 22:19:51 +000019from types import IntType, ListType, StringType, TupleType
20
Guido van Rossum95cd2ef1992-12-08 14:37:55 +000021# Different parse modes for phase 1
22MODE_REGULAR = 0
23MODE_VERBATIM = 1
24MODE_CS_SCAN = 2
25MODE_COMMENT = 3
26MODE_MATH = 4
27MODE_DMATH = 5
28MODE_GOBBLEWHITE = 6
29
Guido van Rossum5f18d6c1996-09-10 22:34:20 +000030the_modes = (MODE_REGULAR, MODE_VERBATIM, MODE_CS_SCAN, MODE_COMMENT,
31 MODE_MATH, MODE_DMATH, MODE_GOBBLEWHITE)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +000032
33# Show the neighbourhood of the scanned buffer
34def epsilon(buf, where):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +000035 wmt, wpt = where - 10, where + 10
36 if wmt < 0:
37 wmt = 0
38 if wpt > len(buf):
39 wpt = len(buf)
40 return ' Context ' + `buf[wmt:where]` + '.' + `buf[where:wpt]` + '.'
Guido van Rossum95cd2ef1992-12-08 14:37:55 +000041
42# Should return the line number. never worked
43def lin():
Guido van Rossum5f18d6c1996-09-10 22:34:20 +000044 global lineno
45 return ' Line ' + `lineno` + '.'
Guido van Rossum95cd2ef1992-12-08 14:37:55 +000046
47# Displays the recursion level.
48def lv(lvl):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +000049 return ' Level ' + `lvl` + '.'
Guido van Rossum95cd2ef1992-12-08 14:37:55 +000050
51# Combine the three previous functions. Used often.
52def lle(lvl, buf, where):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +000053 return lv(lvl) + lin() + epsilon(buf, where)
54
55
Guido van Rossum95cd2ef1992-12-08 14:37:55 +000056# This class is only needed for _symbolic_ representation of the parse mode.
57class Mode:
Guido van Rossum5f18d6c1996-09-10 22:34:20 +000058 def __init__(self, arg):
59 if arg not in the_modes:
60 raise ValueError, 'mode not in the_modes'
61 self.mode = arg
Guido van Rossum95cd2ef1992-12-08 14:37:55 +000062
Guido van Rossum5f18d6c1996-09-10 22:34:20 +000063 def __cmp__(self, other):
64 if type(self) != type(other):
65 other = mode(other)
66 return cmp(self.mode, other.mode)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +000067
Guido van Rossum5f18d6c1996-09-10 22:34:20 +000068 def __repr__(self):
69 if self.mode == MODE_REGULAR:
70 return 'MODE_REGULAR'
71 elif self.mode == MODE_VERBATIM:
72 return 'MODE_VERBATIM'
73 elif self.mode == MODE_CS_SCAN:
74 return 'MODE_CS_SCAN'
75 elif self.mode == MODE_COMMENT:
76 return 'MODE_COMMENT'
77 elif self.mode == MODE_MATH:
78 return 'MODE_MATH'
79 elif self.mode == MODE_DMATH:
80 return 'MODE_DMATH'
81 elif self.mode == MODE_GOBBLEWHITE:
82 return 'MODE_GOBBLEWHITE'
83 else:
84 raise ValueError, 'mode not in the_modes'
Guido van Rossum95cd2ef1992-12-08 14:37:55 +000085
86# just a wrapper around a class initialisation
Guido van Rossum49604d31996-09-10 22:19:51 +000087mode = Mode
Guido van Rossum95cd2ef1992-12-08 14:37:55 +000088
89
90# After phase 1, the text consists of chunks, with a certain type
91# this type will be assigned to the chtype member of the chunk
92# the where-field contains the file position where this is found
93# and the data field contains (1): a tuple describing start- end end
94# positions of the substring (can be used as slice for the buf-variable),
95# (2) just a string, mostly generated by the changeit routine,
96# or (3) a list, describing a (recursive) subgroup of chunks
97PLAIN = 0 # ASSUME PLAINTEXT, data = the text
98GROUP = 1 # GROUP ({}), data = [chunk, chunk,..]
99CSNAME = 2 # CONTROL SEQ TOKEN, data = the command
100COMMENT = 3 # data is the actual comment
101DMATH = 4 # DISPLAYMATH, data = [chunk, chunk,..]
102MATH = 5 # MATH, see DISPLAYMATH
103OTHER = 6 # CHAR WITH CATCODE OTHER, data = char
104ACTIVE = 7 # ACTIVE CHAR
105GOBBLEDWHITE = 8 # Gobbled LWSP, after CSNAME
106ENDLINE = 9 # END-OF-LINE, data = '\n'
107DENDLINE = 10 # DOUBLE EOL, data='\n', indicates \par
108ENV = 11 # LaTeX-environment
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000109 # data =(envname,[ch,ch,ch,.])
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000110CSLINE = 12 # for texi: next chunk will be one group
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000111 # of args. Will be set all on 1 line
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000112IGNORE = 13 # IGNORE this data
113ENDENV = 14 # TEMP END OF GROUP INDICATOR
114IF = 15 # IF-directive
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000115 # data = (flag,negate,[ch, ch, ch,...])
116the_types = (PLAIN, GROUP, CSNAME, COMMENT, DMATH, MATH, OTHER, ACTIVE,
117 GOBBLEDWHITE, ENDLINE, DENDLINE, ENV, CSLINE, IGNORE, ENDENV, IF)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000118
119# class, just to display symbolic name
120class ChunkType:
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000121 def __init__(self, chunk_type):
122 if chunk_type not in the_types:
123 raise ValueError, 'chunk_type not in the_types'
124 self.chunk_type = chunk_type
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000125
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000126 def __cmp__(self, other):
127 if type(self) != type(other):
128 other = chunk_type(other)
129 return cmp(self.chunk_type, other.chunk_type)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000130
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000131 def __repr__(self):
132 if self.chunk_type == PLAIN:
133 return 'PLAIN'
134 elif self.chunk_type == GROUP:
135 return 'GROUP'
136 elif self.chunk_type == CSNAME:
137 return 'CSNAME'
138 elif self.chunk_type == COMMENT:
139 return 'COMMENT'
140 elif self.chunk_type == DMATH:
141 return 'DMATH'
142 elif self.chunk_type == MATH:
143 return 'MATH'
144 elif self.chunk_type == OTHER:
145 return 'OTHER'
146 elif self.chunk_type == ACTIVE:
147 return 'ACTIVE'
148 elif self.chunk_type == GOBBLEDWHITE:
149 return 'GOBBLEDWHITE'
150 elif self.chunk_type == DENDLINE:
151 return 'DENDLINE'
152 elif self.chunk_type == ENDLINE:
153 return 'ENDLINE'
154 elif self.chunk_type == ENV:
155 return 'ENV'
156 elif self.chunk_type == CSLINE:
157 return 'CSLINE'
158 elif self.chunk_type == IGNORE:
159 return 'IGNORE'
160 elif self.chunk_type == ENDENV:
161 return 'ENDENV'
162 elif self.chunk_type == IF:
163 return 'IF'
164 else:
165 raise ValueError, 'chunk_type not in the_types'
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000166
167# ...and the wrapper
Guido van Rossum49604d31996-09-10 22:19:51 +0000168_all_chunk_types = {}
169for t in the_types:
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000170 _all_chunk_types[t] = ChunkType(t)
Guido van Rossum49604d31996-09-10 22:19:51 +0000171
172def chunk_type(t):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000173 return _all_chunk_types[t]
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000174
175# store a type object of the ChunkType-class-instance...
176chunk_type_type = type(chunk_type(0))
Guido van Rossum49604d31996-09-10 22:19:51 +0000177
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000178# this class contains a part of the parsed buffer
179class Chunk:
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000180 def __init__(self, chtype, where, data):
181 if type(chtype) != chunk_type_type:
182 chtype = chunk_type(chtype)
183 self.chtype = chtype
184 if type(where) != IntType:
185 raise TypeError, "'where' is not a number"
186 self.where = where
187 self.data = data
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000188
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000189 def __repr__(self):
190 return 'chunk' + `self.chtype, self.where, self.data`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000191
192# and the wrapper
Guido van Rossum49604d31996-09-10 22:19:51 +0000193chunk = Chunk
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000194
195
196error = 'partparse.error'
197
198#
199# TeX's catcodes...
200#
201CC_ESCAPE = 0
202CC_LBRACE = 1
203CC_RBRACE = 2
204CC_MATHSHIFT = 3
205CC_ALIGNMENT = 4
206CC_ENDLINE = 5
207CC_PARAMETER = 6
208CC_SUPERSCRIPT = 7
209CC_SUBSCRIPT = 8
210CC_IGNORE = 9
211CC_WHITE = 10
212CC_LETTER = 11
213CC_OTHER = 12
214CC_ACTIVE = 13
215CC_COMMENT = 14
216CC_INVALID = 15
217
218# and the names
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000219cc_names = [
220 'CC_ESCAPE',
221 'CC_LBRACE',
222 'CC_RBRACE',
223 'CC_MATHSHIFT',
224 'CC_ALIGNMENT',
225 'CC_ENDLINE',
226 'CC_PARAMETER',
227 'CC_SUPERSCRIPT',
228 'CC_SUBSCRIPT',
229 'CC_IGNORE',
230 'CC_WHITE',
231 'CC_LETTER',
232 'CC_OTHER',
233 'CC_ACTIVE',
234 'CC_COMMENT',
235 'CC_INVALID',
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000236 ]
237
238# Show a list of catcode-name-symbols
239def pcl(codelist):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000240 result = ''
241 for i in codelist:
242 result = result + cc_names[i] + ', '
243 return '[' + result[:-2] + ']'
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000244
245# the name of the catcode (ACTIVE, OTHER, etc.)
246def pc(code):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000247 return cc_names[code]
248
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000249
250# Which catcodes make the parser stop parsing regular plaintext
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000251regular_stopcodes = [CC_ESCAPE, CC_LBRACE, CC_RBRACE, CC_MATHSHIFT,
252 CC_ALIGNMENT, CC_PARAMETER, CC_SUPERSCRIPT, CC_SUBSCRIPT,
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000253 CC_IGNORE, CC_ACTIVE, CC_COMMENT, CC_INVALID, CC_ENDLINE]
254
255# same for scanning a control sequence name
256csname_scancodes = [CC_LETTER]
257
258# same for gobbling LWSP
259white_scancodes = [CC_WHITE]
260##white_scancodes = [CC_WHITE, CC_ENDLINE]
261
262# make a list of all catcode id's, except for catcode ``other''
263all_but_other_codes = range(16)
264del all_but_other_codes[CC_OTHER]
265##print all_but_other_codes
266
267# when does a comment end
268comment_stopcodes = [CC_ENDLINE]
269
270# gather all characters together, specified by a list of catcodes
271def code2string(cc, codelist):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000272 ##print 'code2string: codelist = ' + pcl(codelist),
273 result = ''
274 for category in codelist:
275 if cc[category]:
276 result = result + cc[category]
277 ##print 'result = ' + `result`
278 return result
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000279
280# automatically generate all characters of catcode other, being the
281# complement set in the ASCII range (128 characters)
282def make_other_codes(cc):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000283 otherchars = range(256) # could be made 256, no problem
284 for category in all_but_other_codes:
285 if cc[category]:
286 for c in cc[category]:
287 otherchars[ord(c)] = None
288 result = ''
289 for i in otherchars:
290 if i != None:
291 result = result + chr(i)
292 return result
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000293
294# catcode dump (which characters have which catcodes).
295def dump_cc(name, cc):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000296 ##print '\t' + name
297 ##print '=' * (8+len(name))
298 if len(cc) != 16:
299 raise TypeError, 'cc not good cat class'
Guido van Rossum7a2dba21993-11-05 14:45:11 +0000300## for i in range(16):
301## print pc(i) + '\t' + `cc[i]`
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000302
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000303
304# In the beginning,....
305epoch_cc = [None] * 16
306##dump_cc('epoch_cc', epoch_cc)
307
308
309# INITEX
310initex_cc = epoch_cc[:]
311initex_cc[CC_ESCAPE] = '\\'
312initex_cc[CC_ENDLINE], initex_cc[CC_IGNORE], initex_cc[CC_WHITE] = \
313 '\n', '\0', ' '
314initex_cc[CC_LETTER] = string.uppercase + string.lowercase
315initex_cc[CC_COMMENT], initex_cc[CC_INVALID] = '%', '\x7F'
316#initex_cc[CC_OTHER] = make_other_codes(initex_cc) I don't need them, anyway
317##dump_cc('initex_cc', initex_cc)
318
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000319
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000320# LPLAIN: LaTeX catcode setting (see lplain.tex)
321lplain_cc = initex_cc[:]
322lplain_cc[CC_LBRACE], lplain_cc[CC_RBRACE] = '{', '}'
323lplain_cc[CC_MATHSHIFT] = '$'
324lplain_cc[CC_ALIGNMENT] = '&'
325lplain_cc[CC_PARAMETER] = '#'
326lplain_cc[CC_SUPERSCRIPT] = '^\x0B' # '^' and C-k
327lplain_cc[CC_SUBSCRIPT] = '_\x01' # '_' and C-a
328lplain_cc[CC_WHITE] = lplain_cc[CC_WHITE] + '\t'
329lplain_cc[CC_ACTIVE] = '~\x0C' # '~' and C-l
330lplain_cc[CC_OTHER] = make_other_codes(lplain_cc)
331##dump_cc('lplain_cc', lplain_cc)
332
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000333
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000334# Guido's LaTeX environment catcoded '_' as ``other''
335# my own purpose catlist
336my_cc = lplain_cc[:]
337my_cc[CC_SUBSCRIPT] = my_cc[CC_SUBSCRIPT][1:] # remove '_' here
338my_cc[CC_OTHER] = my_cc[CC_OTHER] + '_' # add it to OTHER list
339dump_cc('my_cc', my_cc)
340
341
342
343# needed for un_re, my equivalent for regexp-quote in Emacs
344re_meaning = '\\[]^$'
345
346def un_re(str):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000347 result = ''
348 for i in str:
349 if i in re_meaning:
350 result = result + '\\'
351 result = result + i
352 return result
353
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000354# NOTE the negate ('^') operator in *some* of the regexps below
355def make_rc_regular(cc):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000356 # problems here if '[]' are included!!
357 return regex.compile('[' + code2string(cc, regular_stopcodes) + ']')
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000358
359def make_rc_cs_scan(cc):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000360 return regex.compile('[^' + code2string(cc, csname_scancodes) + ']')
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000361
362def make_rc_comment(cc):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000363 return regex.compile('[' + code2string(cc, comment_stopcodes) + ']')
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000364
365def make_rc_endwhite(cc):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000366 return regex.compile('[^' + code2string(cc, white_scancodes) + ']')
367
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000368
369
370# regular: normal mode:
371rc_regular = make_rc_regular(my_cc)
372
373# scan: scan a command sequence e.g. `newlength' or `mbox' or `;', `,' or `$'
374rc_cs_scan = make_rc_cs_scan(my_cc)
375rc_comment = make_rc_comment(my_cc)
376rc_endwhite = make_rc_endwhite(my_cc)
377
378
379# parseit (BUF, PARSEMODE=mode(MODE_REGULAR), START=0, RECURSION-LEVEL=0)
380# RECURSION-LEVEL will is incremented on entry.
381# result contains the list of chunks returned
382# together with this list, the buffer position is returned
383
384# RECURSION-LEVEL will be set to zero *again*, when recursively a
385# {,D}MATH-mode scan has been enetered.
386# This has been done in order to better check for environment-mismatches
387
388def parseit(buf, *rest):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000389 global lineno
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000390
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000391 if len(rest) == 3:
392 parsemode, start, lvl = rest
393 elif len(rest) == 2:
394 parsemode, start, lvl = rest + (0, )
395 elif len(rest) == 1:
396 parsemode, start, lvl = rest + (0, 0)
397 elif len(rest) == 0:
398 parsemode, start, lvl = mode(MODE_REGULAR), 0, 0
399 else:
400 raise TypeError, 'usage: parseit(buf[, parsemode[, start[, level]]])'
401 result = []
402 end = len(buf)
403 if lvl == 0 and parsemode == mode(MODE_REGULAR):
404 lineno = 1
405 lvl = lvl + 1
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000406
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000407 ##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')'
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000408
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000409 #
410 # some of the more regular modes...
411 #
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000412
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000413 if parsemode in (mode(MODE_REGULAR), mode(MODE_DMATH), mode(MODE_MATH)):
414 cstate = []
415 newpos = start
416 curpmode = parsemode
417 while 1:
418 where = newpos
419 #print '\tnew round: ' + epsilon(buf, where)
420 if where == end:
421 if lvl > 1 or curpmode != mode(MODE_REGULAR):
422 # not the way we started...
423 raise EOFError, 'premature end of file.' + lle(lvl, buf, where)
424 # the real ending of lvl-1 parse
425 return end, result
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000426
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000427 pos = rc_regular.search(buf, where)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000428
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000429 if pos < 0:
430 pos = end
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000431
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000432 if pos != where:
433 newpos, c = pos, chunk(PLAIN, where, (where, pos))
434 result.append(c)
435 continue
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000436
437
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000438 #
439 # ok, pos == where and pos != end
440 #
441 foundchar = buf[where]
442 if foundchar in my_cc[CC_LBRACE]:
443 # recursive subgroup parse...
444 newpos, data = parseit(buf, curpmode, where+1, lvl)
445 result.append(chunk(GROUP, where, data))
446
447 elif foundchar in my_cc[CC_RBRACE]:
448 if lvl <= 1:
449 raise error, 'ENDGROUP while in base level.' + lle(lvl, buf, where)
450 if lvl == 1 and mode != mode(MODE_REGULAR):
451 raise error, 'endgroup while in math mode. +lin() + epsilon(buf, where)'
452 return where + 1, result
453
454 elif foundchar in my_cc[CC_ESCAPE]:
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000455 #
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000456 # call the routine that actually deals with
457 # this problem. If do_ret is None, than
458 # return the value of do_ret
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000459 #
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000460 # Note that handle_cs might call this routine
461 # recursively again...
462 #
463 do_ret, newpos = handlecs(buf, where,
464 curpmode, lvl, result, end)
465 if do_ret != None:
466 return do_ret
467
468 elif foundchar in my_cc[CC_COMMENT]:
469 newpos, data = parseit(buf,
470 mode(MODE_COMMENT), where+1, lvl)
471 result.append(chunk(COMMENT, where, data))
472
473 elif foundchar in my_cc[CC_MATHSHIFT]:
474 # note that recursive calls to math-mode
475 # scanning are called with recursion-level 0
476 # again, in order to check for bad mathend
477 #
478 if where + 1 != end and buf[where + 1] in my_cc[CC_MATHSHIFT]:
479 #
480 # double mathshift, e.g. '$$'
481 #
482 if curpmode == mode(MODE_REGULAR):
483 newpos, data = parseit(buf,
484 mode(MODE_DMATH),
485 where+2, 0)
486 result.append(chunk(DMATH,
487 where, data))
488 elif curpmode == mode(MODE_MATH):
489 raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
490 elif lvl != 1:
491 raise error, 'bad mathend.' + lle(lvl, buf, where)
492 else:
493 return where + 2, result
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000494 else:
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000495 #
496 # single math shift, e.g. '$'
497 #
498 if curpmode == mode(MODE_REGULAR):
499 newpos, data = parseit(buf,
500 mode(MODE_MATH),
501 where+1, 0)
502 result.append(chunk(MATH,
503 where, data))
504 elif curpmode == mode(MODE_DMATH):
505 raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
506 elif lvl != 1:
507 raise error, 'bad mathend.' + lv(lvl, buf, where)
508 else:
509 return where + 1, result
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000510
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000511 elif foundchar in my_cc[CC_IGNORE]:
512 print 'warning: ignored char', `foundchar`
513 newpos = where + 1
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000514
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000515 elif foundchar in my_cc[CC_ACTIVE]:
516 result.append(chunk(ACTIVE, where, foundchar))
517 newpos = where + 1
518
519 elif foundchar in my_cc[CC_INVALID]:
520 raise error, 'invalid char ' + `foundchar`
521 newpos = where + 1
522
523 elif foundchar in my_cc[CC_ENDLINE]:
524 #
525 # after an end of line, eat the rest of
526 # whitespace on the beginning of the next line
527 # this is what LaTeX more or less does
528 #
529 # also, try to indicate double newlines (\par)
530 #
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000531 lineno = lineno + 1
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000532 savedwhere = where
533 newpos, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), where + 1, lvl)
534 if newpos != end and buf[newpos] in my_cc[CC_ENDLINE]:
535 result.append(chunk(DENDLINE, savedwhere, foundchar))
536 else:
537 result.append(chunk(ENDLINE, savedwhere, foundchar))
538 else:
539 result.append(chunk(OTHER, where, foundchar))
540 newpos = where + 1
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000541
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000542 elif parsemode == mode(MODE_CS_SCAN):
543 #
544 # scan for a control sequence token. `\ape', `\nut' or `\%'
545 #
546 if start == end:
547 raise EOFError, 'can\'t find end of csname'
548 pos = rc_cs_scan.search(buf, start)
549 if pos < 0:
550 pos = end
551 if pos == start:
552 # first non-letter right where we started the search
553 # ---> the control sequence name consists of one single
554 # character. Also: don't eat white space...
555 if buf[pos] in my_cc[CC_ENDLINE]:
556 lineno = lineno + 1
557 pos = pos + 1
558 return pos, (start, pos)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000559 else:
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000560 spos = pos
561 if buf[pos] == '\n':
562 lineno = lineno + 1
563 spos = pos + 1
564 pos2, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), spos, lvl)
565 return pos2, (start, pos)
566
567 elif parsemode == mode(MODE_GOBBLEWHITE):
568 if start == end:
569 return start, ''
570 pos = rc_endwhite.search(buf, start)
571 if pos < 0:
572 pos = start
573 return pos, (start, pos)
574
575 elif parsemode == mode(MODE_COMMENT):
576 pos = rc_comment.search(buf, start)
577 lineno = lineno + 1
578 if pos < 0:
579 print 'no newline perhaps?'
580 raise EOFError, 'can\'t find end of comment'
581 pos = pos + 1
582 pos2, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), pos, lvl)
583 return pos2, (start, pos)
584
585
586 else:
587 raise error, 'Unknown mode (' + `parsemode` + ')'
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000588
589
590#moreresult = cswitch(buf[x1:x2], buf, newpos, parsemode, lvl)
591
592#boxcommands = 'mbox', 'fbox'
593#defcommands = 'def', 'newcommand'
594
595endverbstr = '\\end{verbatim}'
596
597re_endverb = regex.compile(un_re(endverbstr))
598
599#
600# handlecs: helper function for parseit, for the special thing we might
601# wanna do after certain command control sequences
602# returns: None or return_data, newpos
603#
604# in the latter case, the calling function is instructed to immediately
605# return with the data in return_data
606#
607def handlecs(buf, where, curpmode, lvl, result, end):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000608 global lineno
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000609
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000610 # get the control sequence name...
611 newpos, data = parseit(buf, mode(MODE_CS_SCAN), where+1, lvl)
612 saveddata = data
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000613
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000614 if s(buf, data) in ('begin', 'end'):
615 # skip the expected '{' and get the LaTeX-envname '}'
616 newpos, data = parseit(buf, mode(MODE_REGULAR), newpos+1, lvl)
617 if len(data) != 1:
618 raise error, 'expected 1 chunk of data.' + \
619 lle(lvl, buf, where)
Guido van Rossum49604d31996-09-10 22:19:51 +0000620
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000621 # yucky, we've got an environment
622 envname = s(buf, data[0].data)
623 ##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl)
624 if s(buf, saveddata) == 'begin' and envname == 'verbatim':
625 # verbatim deserves special treatment
626 pos = re_endverb.search(buf, newpos)
627 if pos < 0:
628 raise error, `endverbstr` + ' not found.' + lle(lvl, buf, where)
629 result.append(chunk(ENV, where, (envname, [chunk(PLAIN, newpos, (newpos, pos))])))
630 newpos = pos + len(endverbstr)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000631
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000632 elif s(buf, saveddata) == 'begin':
633 # start parsing recursively... If that parse returns
634 # from an '\end{...}', then should the last item of
635 # the returned data be a string containing the ended
636 # environment
637 newpos, data = parseit(buf, curpmode, newpos, lvl)
638 if not data or type(data[-1]) is not StringType:
639 raise error, 'missing \'end\'' + lle(lvl, buf, where) + epsilon(buf, newpos)
640 retenv = data[-1]
641 del data[-1]
642 if retenv != envname:
643 #[`retenv`, `envname`]
644 raise error, 'environments do not match.' + \
645 lle(lvl, buf, where) + \
646 epsilon(buf, newpos)
647 result.append(chunk(ENV, where, (retenv, data)))
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000648 else:
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000649 # 'end'... append the environment name, as just
650 # pointed out, and order parsit to return...
651 result.append(envname)
652 ##print 'POINT of return: ' + epsilon(buf, newpos)
653 # the tuple will be returned by parseit
654 return (newpos, result), newpos
655
656 # end of \begin ... \end handling
657
658 elif s(buf, data)[0:2] == 'if':
659 # another scary monster: the 'if' directive
660 flag = s(buf, data)[2:]
661
662 # recursively call parseit, just like environment above..
663 # the last item of data should contain the if-termination
664 # e.g., 'else' of 'fi'
665 newpos, data = parseit(buf, curpmode, newpos, lvl)
666 if not data or data[-1] not in ('else', 'fi'):
667 raise error, 'wrong if... termination' + \
668 lle(lvl, buf, where) + epsilon(buf, newpos)
669
670 ifterm = data[-1]
671 del data[-1]
672 # 0 means dont_negate flag
673 result.append(chunk(IF, where, (flag, 0, data)))
674 if ifterm == 'else':
675 # do the whole thing again, there is only one way
676 # to end this one, by 'fi'
677 newpos, data = parseit(buf, curpmode, newpos, lvl)
678 if not data or data[-1] not in ('fi', ):
679 raise error, 'wrong if...else... termination' \
680 + lle(lvl, buf, where) \
681 + epsilon(buf, newpos)
682
683 ifterm = data[-1]
684 del data[-1]
685 result.append(chunk(IF, where, (flag, 1, data)))
686 #done implicitely: return None, newpos
687
688 elif s(buf, data) in ('else', 'fi'):
689 result.append(s(buf, data))
690 # order calling party to return tuple
691 return (newpos, result), newpos
692
693 # end of \if, \else, ... \fi handling
694
695 elif s(buf, saveddata) == 'verb':
696 x2 = saveddata[1]
697 result.append(chunk(CSNAME, where, data))
698 if x2 == end:
699 raise error, 'premature end of command.' + lle(lvl, buf, where)
700 delimchar = buf[x2]
701 ##print 'VERB: delimchar ' + `delimchar`
702 pos = regex.compile(un_re(delimchar)).search(buf, x2 + 1)
703 if pos < 0:
704 raise error, 'end of \'verb\' argument (' + \
705 `delimchar` + ') not found.' + \
706 lle(lvl, buf, where)
707 result.append(chunk(GROUP, x2, [chunk(PLAIN, x2+1, (x2+1, pos))]))
708 newpos = pos + 1
709 else:
710 result.append(chunk(CSNAME, where, data))
711 return None, newpos
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000712
713# this is just a function to get the string value if the possible data-tuple
714def s(buf, data):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000715 if type(data) is StringType:
716 return data
717 if len(data) != 2 or not (type(data[0]) is type(data[1]) is IntType):
718 raise TypeError, 'expected tuple of 2 integers'
719 x1, x2 = data
720 return buf[x1:x2]
Guido van Rossum49604d31996-09-10 22:19:51 +0000721
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000722
723##length, data1, i = getnextarg(length, buf, pp, i + 1)
724
725# make a deep-copy of some chunks
726def crcopy(r):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000727 return map(chunkcopy, r)
Guido van Rossum49604d31996-09-10 22:19:51 +0000728
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000729
730# copy a chunk, would better be a method of class Chunk...
731def chunkcopy(ch):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000732 if ch.chtype == chunk_type(GROUP):
733 return chunk(GROUP, ch.where, map(chunkcopy, ch.data))
734 else:
735 return chunk(ch.chtype, ch.where, ch.data)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000736
737
738# get next argument for TeX-macro, flatten a group (insert between)
739# or return Command Sequence token, or give back one character
740def getnextarg(length, buf, pp, item):
741
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000742 ##wobj = Wobj()
743 ##dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
744 ##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000745
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000746 while item < length and pp[item].chtype == chunk_type(ENDLINE):
747 del pp[item]
748 length = length - 1
749 if item >= length:
750 raise error, 'no next arg.' + epsilon(buf, pp[-1].where)
751 if pp[item].chtype == chunk_type(GROUP):
752 newpp = pp[item].data
753 del pp[item]
754 length = length - 1
755 changeit(buf, newpp)
756 length = length + len(newpp)
757 pp[item:item] = newpp
758 item = item + len(newpp)
759 if len(newpp) < 10:
760 wobj = Wobj()
761 dumpit(buf, wobj.write, newpp)
762 ##print 'GETNEXTARG: inserted ' + `wobj.data`
763 return length, item
764 elif pp[item].chtype == chunk_type(PLAIN):
765 #grab one char
766 print 'WARNING: grabbing one char'
767 if len(s(buf, pp[item].data)) > 1:
768 pp.insert(item, chunk(PLAIN, pp[item].where, s(buf, pp[item].data)[:1]))
769 item, length = item+1, length+1
770 pp[item].data = s(buf, pp[item].data)[1:]
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000771 else:
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000772 item = item+1
773 return length, item
774 else:
775 ch = pp[item]
776 try:
777 str = `s(buf, ch.data)`
778 except TypeError:
779 str = `ch.data`
780 if len(str) > 400:
781 str = str[:400] + '...'
782 print 'GETNEXTARG:', ch.chtype, 'not handled, data ' + str
783 return length, item
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000784
785
786# this one is needed to find the end of LaTeX's optional argument, like
787# item[...]
788re_endopt = regex.compile(']')
789
790# get a LaTeX-optional argument, you know, the square braces '[' and ']'
791def getoptarg(length, buf, pp, item):
792
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000793 wobj = Wobj()
794 dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
795 ##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000796
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000797 if item >= length or \
798 pp[item].chtype != chunk_type(PLAIN) or \
799 s(buf, pp[item].data)[0] != '[':
800 return length, item
801
802 pp[item].data = s(buf, pp[item].data)[1:]
803 if len(pp[item].data) == 0:
804 del pp[item]
805 length = length-1
806
807 while 1:
808 if item == length:
809 raise error, 'No end of optional arg found'
810 if pp[item].chtype == chunk_type(PLAIN):
811 text = s(buf, pp[item].data)
812 pos = re_endopt.search(text)
813 if pos >= 0:
814 pp[item].data = text[:pos]
815 if pos == 0:
816 del pp[item]
817 length = length-1
818 else:
819 item=item+1
820 text = text[pos+1:]
821
822 while text and text[0] in ' \t':
823 text = text[1:]
824
825 if text:
826 pp.insert(item, chunk(PLAIN, 0, text))
827 length = length + 1
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000828 return length, item
829
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000830 item = item+1
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000831
832
833# Wobj just add write-requests to the ``data'' attribute
834class Wobj:
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000835 data = ''
Guido van Rossum49604d31996-09-10 22:19:51 +0000836
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000837 def write(self, data):
838 self.data = self.data + data
Guido van Rossumb819bdf1995-03-15 11:26:26 +0000839
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000840# ignore these commands
Guido van Rossumb819bdf1995-03-15 11:26:26 +0000841ignoredcommands = ('bcode', 'ecode')
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000842# map commands like these to themselves as plaintext
Guido van Rossum7760cde1995-03-17 16:03:11 +0000843wordsselves = ('UNIX', 'ABC', 'C', 'ASCII', 'EOF', 'LaTeX')
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000844# \{ --> {, \} --> }, etc
Guido van Rossum7760cde1995-03-17 16:03:11 +0000845themselves = ('{', '}', '.', '@', ' ', '\n') + wordsselves
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000846# these ones also themselves (see argargs macro in myformat.sty)
847inargsselves = (',', '[', ']', '(', ')')
848# this is how *I* would show the difference between emph and strong
849# code 1 means: fold to uppercase
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000850markcmds = {'code': ('', ''), 'var': 1, 'emph': ('_', '_'),
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000851 'strong': ('*', '*')}
852
853# recognise patter {\FONTCHANGE-CMD TEXT} to \MAPPED-FC-CMD{TEXT}
854fontchanges = {'rm': 'r', 'it': 'i', 'em': 'emph', 'bf': 'b', 'tt': 't'}
855
856# transparent for these commands
Guido van Rossum7760cde1995-03-17 16:03:11 +0000857for_texi = ('emph', 'var', 'strong', 'code', 'kbd', 'key', 'dfn', 'samp',
858 'file', 'r', 'i', 't')
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000859
860
861# try to remove macros and return flat text
862def flattext(buf, pp):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000863 pp = crcopy(pp)
864 ##print '---> FLATTEXT ' + `pp`
865 wobj = Wobj()
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000866
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000867 i, length = 0, len(pp)
868 while 1:
869 if len(pp) != length:
870 raise 'FATAL', 'inconsistent length'
871 if i >= length:
872 break
873 ch = pp[i]
874 i = i+1
875 if ch.chtype == chunk_type(PLAIN):
876 pass
877 elif ch.chtype == chunk_type(CSNAME):
878 if s(buf, ch.data) in themselves or hist.inargs and s(buf, ch.data) in inargsselves:
879 ch.chtype = chunk_type(PLAIN)
880 elif s(buf, ch.data) == 'e':
881 ch.chtype = chunk_type(PLAIN)
882 ch.data = '\\'
883 elif len(s(buf, ch.data)) == 1 \
884 and s(buf, ch.data) in onlylatexspecial:
885 ch.chtype = chunk_type(PLAIN)
886 # if it is followed by an empty group,
887 # remove that group, it was needed for
888 # a true space
889 if i < length \
890 and pp[i].chtype==chunk_type(GROUP) \
891 and len(pp[i].data) == 0:
892 del pp[i]
893 length = length-1
894
895 elif s(buf, ch.data) in markcmds.keys():
896 length, newi = getnextarg(length, buf, pp, i)
897 str = flattext(buf, pp[i:newi])
898 del pp[i:newi]
899 length = length - (newi - i)
900 ch.chtype = chunk_type(PLAIN)
901 markcmd = s(buf, ch.data)
902 x = markcmds[markcmd]
903 if type(x) == TupleType:
904 pre, after = x
905 str = pre+str+after
906 elif x == 1:
907 str = string.upper(str)
908 else:
909 raise 'FATAL', 'corrupt markcmds'
910 ch.data = str
911 else:
912 if s(buf, ch.data) not in ignoredcommands:
913 print 'WARNING: deleting command ' + `s(buf, ch.data)`
914 print 'PP' + `pp[i-1]`
915 del pp[i-1]
916 i, length = i-1, length-1
917 elif ch.chtype == chunk_type(GROUP):
918 length, newi = getnextarg(length, buf, pp, i-1)
919 i = i-1
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000920## str = flattext(buf, crcopy(pp[i-1:newi]))
921## del pp[i:newi]
922## length = length - (newi - i)
923## ch.chtype = chunk_type(PLAIN)
924## ch.data = str
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000925 else:
926 pass
927
928 dumpit(buf, wobj.write, pp)
929 ##print 'FLATTEXT: RETURNING ' + `wobj.data`
930 return wobj.data
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000931
932# try to generate node names (a bit shorter than the chapter title)
933# note that the \nodename command (see elsewhere) overules these efforts
934def invent_node_names(text):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000935 words = string.split(text)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000936
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000937 ##print 'WORDS ' + `words`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000938
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000939 if len(words) == 2 \
940 and string.lower(words[0]) == 'built-in' \
941 and string.lower(words[1]) not in ('modules', 'functions'):
942 return words[1]
943 if len(words) == 3 and string.lower(words[1]) == 'module':
944 return words[2]
945 if len(words) == 3 and string.lower(words[1]) == 'object':
946 return string.join(words[0:2])
947 if len(words) > 4 and string.lower(string.join(words[-4:])) == \
948 'methods and data attributes':
949 return string.join(words[:2])
950 return text
951
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000952re_commas_etc = regex.compile('[,`\'@{}]')
953
954re_whitespace = regex.compile('[ \t]*')
955
956
957##nodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
958
959# look if the next non-white stuff is also a command, resulting in skipping
960# double endlines (DENDLINE) too, and thus omitting \par's
961# Sometimes this is too much, maybe consider DENDLINE's as stop
962def next_command_p(length, buf, pp, i, cmdname):
963
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000964 while 1:
965 if i >= len(pp):
966 break
967 ch = pp[i]
968 i = i+1
969 if ch.chtype == chunk_type(ENDLINE):
970 continue
971 if ch.chtype == chunk_type(DENDLINE):
972 continue
973 if ch.chtype == chunk_type(PLAIN):
974 if re_whitespace.search(s(buf, ch.data)) == 0 and \
975 re_whitespace.match(s(buf, ch.data)) == len(s(buf, ch.data)):
976 continue
977 return -1
978 if ch.chtype == chunk_type(CSNAME):
979 if s(buf, ch.data) == cmdname:
980 return i # _after_ the command
981 return -1
982 return -1
983
984
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000985# things that are special to LaTeX, but not to texi..
986onlylatexspecial = '_~^$#&%'
987
Guido van Rossum23301a91993-05-24 14:19:37 +0000988class Struct: pass
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000989
990hist = Struct()
991out = Struct()
992
993def startchange():
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000994 global hist, out
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000995
Guido van Rossum5f18d6c1996-09-10 22:34:20 +0000996 hist.inenv = []
997 hist.nodenames = []
998 hist.cindex = []
999 hist.inargs = 0
1000 hist.enumeratenesting, hist.itemizenesting = 0, 0
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001001
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001002 out.doublenodes = []
1003 out.doublecindeces = []
1004
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001005
1006spacech = [chunk(PLAIN, 0, ' ')]
1007commach = [chunk(PLAIN, 0, ', ')]
1008cindexch = [chunk(CSLINE, 0, 'cindex')]
1009
1010# the standard variation in symbols for itemize
1011itemizesymbols = ['bullet', 'minus', 'dots']
1012
1013# same for enumerate
1014enumeratesymbols = ['1', 'A', 'a']
1015
1016##
1017## \begin{ {func,data,exc}desc }{name}...
1018## the resulting texi-code is dependent on the contents of indexsubitem
1019##
1020
1021# indexsubitem: `['XXX', 'function']
1022# funcdesc:
1023# deffn {`idxsi`} NAME (FUNCARGS)
1024
1025# indexsubitem: `['XXX', 'method']`
1026# funcdesc:
1027# defmethod {`idxsi[0]`} NAME (FUNCARGS)
1028
1029# indexsubitem: `['in', 'module', 'MODNAME']'
1030# datadesc:
1031# defcv data {`idxsi[1:]`} NAME
1032# excdesc:
1033# defcv exception {`idxsi[1:]`} NAME
1034# funcdesc:
1035# deffn {function of `idxsi[1:]`} NAME (FUNCARGS)
1036
1037# indexsubitem: `['OBJECT', 'attribute']'
1038# datadesc
1039# defcv attribute {`OBJECT`} NAME
1040
1041
1042## this routine will be called on \begin{funcdesc}{NAME}{ARGS}
1043## or \funcline{NAME}{ARGS}
1044##
1045def do_funcdesc(length, buf, pp, i):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001046 startpoint = i-1
1047 ch = pp[startpoint]
1048 wh = ch.where
1049 length, newi = getnextarg(length, buf, pp, i)
1050 funcname = chunk(GROUP, wh, pp[i:newi])
1051 del pp[i:newi]
1052 length = length - (newi-i)
1053 save = hist.inargs
1054 hist.inargs = 1
1055 length, newi = getnextarg(length, buf, pp, i)
1056 hist.inargs = save
1057 del save
1058 the_args = [chunk(PLAIN, wh, '()'[0])] + pp[i:newi] + \
1059 [chunk(PLAIN, wh, '()'[1])]
1060 del pp[i:newi]
1061 length = length - (newi-i)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001062
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001063 idxsi = hist.indexsubitem # words
1064 command = ''
1065 cat_class = ''
1066 if idxsi and idxsi[-1] in ('method', 'protocol'):
1067 command = 'defmethod'
1068 cat_class = string.join(idxsi[:-1])
1069 elif len(idxsi) == 2 and idxsi[1] == 'function':
1070 command = 'deffn'
1071 cat_class = string.join(idxsi)
1072 elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
1073 command = 'deffn'
1074 cat_class = 'function of ' + string.join(idxsi[1:])
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001075
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001076 if not command:
1077 raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001078
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001079 ch.chtype = chunk_type(CSLINE)
1080 ch.data = command
1081
1082 cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
1083 cslinearg.append(chunk(PLAIN, wh, ' '))
1084 cslinearg.append(funcname)
1085 cslinearg.append(chunk(PLAIN, wh, ' '))
1086 l = len(cslinearg)
1087 cslinearg[l:l] = the_args
1088
1089 pp.insert(i, chunk(GROUP, wh, cslinearg))
1090 i, length = i+1, length+1
1091 hist.command = command
1092 return length, i
1093
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001094
1095## this routine will be called on \begin{excdesc}{NAME}
1096## or \excline{NAME}
1097##
1098def do_excdesc(length, buf, pp, i):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001099 startpoint = i-1
1100 ch = pp[startpoint]
1101 wh = ch.where
1102 length, newi = getnextarg(length, buf, pp, i)
1103 excname = chunk(GROUP, wh, pp[i:newi])
1104 del pp[i:newi]
1105 length = length - (newi-i)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001106
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001107 idxsi = hist.indexsubitem # words
1108 command = ''
1109 cat_class = ''
1110 class_class = ''
1111 if len(idxsi) == 2 and idxsi[1] == 'exception':
1112 command = 'defvr'
1113 cat_class = string.join(idxsi)
1114 elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
1115 command = 'defcv'
1116 cat_class = 'exception'
1117 class_class = string.join(idxsi[1:])
1118 elif len(idxsi) == 4 and idxsi[:3] == ['exception', 'in', 'module']:
1119 command = 'defcv'
1120 cat_class = 'exception'
1121 class_class = string.join(idxsi[2:])
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001122
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001123
1124 if not command:
1125 raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
1126
1127 ch.chtype = chunk_type(CSLINE)
1128 ch.data = command
1129
1130 cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
1131 cslinearg.append(chunk(PLAIN, wh, ' '))
1132 if class_class:
1133 cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001134 cslinearg.append(chunk(PLAIN, wh, ' '))
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001135 cslinearg.append(excname)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001136
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001137 pp.insert(i, chunk(GROUP, wh, cslinearg))
1138 i, length = i+1, length+1
1139 hist.command = command
1140 return length, i
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001141
1142## same for datadesc or dataline...
1143def do_datadesc(length, buf, pp, i):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001144 startpoint = i-1
1145 ch = pp[startpoint]
1146 wh = ch.where
1147 length, newi = getnextarg(length, buf, pp, i)
1148 dataname = chunk(GROUP, wh, pp[i:newi])
1149 del pp[i:newi]
1150 length = length - (newi-i)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001151
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001152 idxsi = hist.indexsubitem # words
1153 command = ''
1154 cat_class = ''
1155 class_class = ''
1156 if idxsi[-1] in ('attribute', 'option'):
1157 command = 'defcv'
1158 cat_class = idxsi[-1]
1159 class_class = string.join(idxsi[:-1])
1160 elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
1161 command = 'defcv'
1162 cat_class = 'data'
1163 class_class = string.join(idxsi[1:])
1164 elif len(idxsi) == 4 and idxsi[:3] == ['data', 'in', 'module']:
1165 command = 'defcv'
1166 cat_class = 'data'
1167 class_class = string.join(idxsi[2:])
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001168
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001169
1170 if not command:
1171 raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
1172
1173 ch.chtype = chunk_type(CSLINE)
1174 ch.data = command
1175
1176 cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
1177 cslinearg.append(chunk(PLAIN, wh, ' '))
1178 if class_class:
1179 cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001180 cslinearg.append(chunk(PLAIN, wh, ' '))
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001181 cslinearg.append(dataname)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001182
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001183 pp.insert(i, chunk(GROUP, wh, cslinearg))
1184 i, length = i+1, length+1
1185 hist.command = command
1186 return length, i
1187
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001188
1189# regular indices: those that are not set in tt font by default....
1190regindices = ('cindex', )
1191
1192# remove illegal characters from node names
1193def rm_commas_etc(text):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001194 result = ''
1195 changed = 0
1196 while 1:
1197 pos = re_commas_etc.search(text)
1198 if pos >= 0:
1199 changed = 1
1200 result = result + text[:pos]
1201 text = text[pos+1:]
1202 else:
1203 result = result + text
1204 break
1205 if changed:
1206 print 'Warning: nodename changhed to ' + `result`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001207
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001208 return result
1209
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001210# boolean flags
1211flags = {'texi': 1}
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001212
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001213
1214##
1215## changeit: the actual routine, that changes the contents of the parsed
1216## chunks
1217##
1218
1219def changeit(buf, pp):
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001220 global onlylatexspecial, hist, out
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001221
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001222 i, length = 0, len(pp)
1223 while 1:
1224 # sanity check: length should always equal len(pp)
1225 if len(pp) != length:
1226 raise 'FATAL', 'inconsistent length. thought ' + `length` + ', but should really be ' + `len(pp)`
1227 if i >= length:
1228 break
1229 ch = pp[i]
1230 i = i + 1
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001231
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001232 if type(ch) is StringType:
1233 #normally, only chunks are present in pp,
1234 # but in some cases, some extra info
1235 # has been inserted, e.g., the \end{...} clauses
1236 raise 'FATAL', 'got string, probably too many ' + `end`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001237
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001238 if ch.chtype == chunk_type(GROUP):
1239 # check for {\em ...} constructs
1240 if ch.data and \
1241 ch.data[0].chtype == chunk_type(CSNAME) and \
1242 s(buf, ch.data[0].data) in fontchanges.keys():
1243 k = s(buf, ch.data[0].data)
1244 del ch.data[0]
1245 pp.insert(i-1, chunk(CSNAME, ch.where, fontchanges[k]))
1246 length, i = length+1, i+1
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001247
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001248 # recursively parse the contents of the group
1249 changeit(buf, ch.data)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001250
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001251 elif ch.chtype == chunk_type(IF):
1252 # \if...
1253 flag, negate, data = ch.data
1254 ##print 'IF: flag, negate = ' + `flag, negate`
1255 if flag not in flags.keys():
1256 raise error, 'unknown flag ' + `flag`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001257
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001258 value = flags[flag]
1259 if negate:
1260 value = (not value)
1261 del pp[i-1]
1262 length, i = length-1, i-1
1263 if value:
1264 pp[i:i] = data
1265 length = length + len(data)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001266
1267
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001268 elif ch.chtype == chunk_type(ENV):
1269 # \begin{...} ....
1270 envname, data = ch.data
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001271
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001272 #push this environment name on stack
1273 hist.inenv.insert(0, envname)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001274
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001275 #append an endenv chunk after grouped data
1276 data.append(chunk(ENDENV, ch.where, envname))
1277 ##[`data`]
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001278
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001279 #delete this object
1280 del pp[i-1]
1281 i, length = i-1, length-1
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001282
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001283 #insert found data
1284 pp[i:i] = data
1285 length = length + len(data)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001286
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001287 if envname == 'verbatim':
1288 pp[i:i] = [chunk(CSLINE, ch.where, 'example'),
1289 chunk(GROUP, ch.where, [])]
1290 length, i = length+2, i+2
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001291
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001292 elif envname == 'itemize':
1293 if hist.itemizenesting > len(itemizesymbols):
1294 raise error, 'too deep itemize nesting'
1295 ingroupch = [chunk(CSNAME, ch.where,
1296 itemizesymbols[hist.itemizenesting])]
1297 hist.itemizenesting = hist.itemizenesting + 1
1298 pp[i:i] = [chunk(CSLINE, ch.where, 'itemize'),
1299 chunk(GROUP, ch.where, ingroupch)]
1300 length, i = length+2, i+2
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001301
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001302 elif envname == 'enumerate':
1303 if hist.enumeratenesting > len(enumeratesymbols):
1304 raise error, 'too deep enumerate nesting'
1305 ingroupch = [chunk(PLAIN, ch.where,
1306 enumeratesymbols[hist.enumeratenesting])]
1307 hist.enumeratenesting = hist.enumeratenesting + 1
1308 pp[i:i] = [chunk(CSLINE, ch.where, 'enumerate'),
1309 chunk(GROUP, ch.where, ingroupch)]
1310 length, i = length+2, i+2
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001311
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001312 elif envname == 'description':
1313 ingroupch = [chunk(CSNAME, ch.where, 'b')]
1314 pp[i:i] = [chunk(CSLINE, ch.where, 'table'),
1315 chunk(GROUP, ch.where, ingroupch)]
1316 length, i = length+2, i+2
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001317
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001318 elif (envname == 'tableiii') or (envname == 'tableii'):
1319 if (envname == 'tableii'):
1320 ltable = 2
1321 else:
1322 ltable = 3
1323 wh = ch.where
1324 newcode = []
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001325
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001326 #delete tabular format description
1327 # e.g., {|l|c|l|}
1328 length, newi = getnextarg(length, buf, pp, i)
1329 del pp[i:newi]
1330 length = length - (newi-i)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001331
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001332 newcode.append(chunk(CSLINE, wh, 'table'))
1333 ingroupch = [chunk(CSNAME, wh, 'asis')]
1334 newcode.append(chunk(GROUP, wh, ingroupch))
1335 newcode.append(chunk(CSLINE, wh, 'item'))
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001336
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001337 #get the name of macro for @item
1338 # e.g., {code}
1339 length, newi = getnextarg(length, buf, pp, i)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001340
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001341 if newi-i != 1:
1342 raise error, 'Sorry, expected 1 chunk argument'
1343 if pp[i].chtype != chunk_type(PLAIN):
1344 raise error, 'Sorry, expected plain text argument'
1345 hist.itemargmacro = s(buf, pp[i].data)
1346 del pp[i:newi]
1347 length = length - (newi-i)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001348
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001349 itembody = []
1350 for count in range(ltable):
1351 length, newi = getnextarg(length, buf, pp, i)
1352 emphgroup = [
1353 chunk(CSNAME, wh, 'emph'),
1354 chunk(GROUP, 0, pp[i:newi])]
1355 del pp[i:newi]
1356 length = length - (newi-i)
1357 if count == 0:
1358 itemarg = emphgroup
1359 elif count == ltable-1:
1360 itembody = itembody + \
1361 [chunk(PLAIN, wh, ' --- ')] + emphgroup
1362 else:
1363 itembody = emphgroup
1364 newcode.append(chunk(GROUP, wh, itemarg))
1365 newcode = newcode + itembody + [chunk(DENDLINE, wh, '\n')]
1366 pp[i:i] = newcode
1367 l = len(newcode)
1368 length, i = length+l, i+l
1369 del newcode, l
1370
1371 if length != len(pp):
1372 raise 'STILL, SOMETHING wrong', `i`
1373
1374
1375 elif envname == 'funcdesc':
1376 pp.insert(i, chunk(PLAIN, ch.where, ''))
1377 i, length = i+1, length+1
1378 length, i = do_funcdesc(length, buf, pp, i)
1379
1380 elif envname == 'excdesc':
1381 pp.insert(i, chunk(PLAIN, ch.where, ''))
1382 i, length = i+1, length+1
1383 length, i = do_excdesc(length, buf, pp, i)
1384
1385 elif envname == 'datadesc':
1386 pp.insert(i, chunk(PLAIN, ch.where, ''))
1387 i, length = i+1, length+1
1388 length, i = do_datadesc(length, buf, pp, i)
1389
1390 else:
1391 print 'WARNING: don\'t know what to do with env ' + `envname`
1392
1393 elif ch.chtype == chunk_type(ENDENV):
1394 envname = ch.data
1395 if envname != hist.inenv[0]:
1396 raise error, '\'end\' does not match. Name ' + `envname` + ', expected ' + `hist.inenv[0]`
1397 del hist.inenv[0]
1398 del pp[i-1]
1399 i, length = i-1, length-1
1400
1401 if envname == 'verbatim':
1402 pp[i:i] = [
1403 chunk(CSLINE, ch.where, 'end'),
1404 chunk(GROUP, ch.where, [
1405 chunk(PLAIN, ch.where, 'example')])]
1406 i, length = i+2, length+2
1407 elif envname == 'itemize':
1408 hist.itemizenesting = hist.itemizenesting - 1
1409 pp[i:i] = [
1410 chunk(CSLINE, ch.where, 'end'),
1411 chunk(GROUP, ch.where, [
1412 chunk(PLAIN, ch.where, 'itemize')])]
1413 i, length = i+2, length+2
1414 elif envname == 'enumerate':
1415 hist.enumeratenesting = hist.enumeratenesting-1
1416 pp[i:i] = [
1417 chunk(CSLINE, ch.where, 'end'),
1418 chunk(GROUP, ch.where, [
1419 chunk(PLAIN, ch.where, 'enumerate')])]
1420 i, length = i+2, length+2
1421 elif envname == 'description':
1422 pp[i:i] = [
1423 chunk(CSLINE, ch.where, 'end'),
1424 chunk(GROUP, ch.where, [
1425 chunk(PLAIN, ch.where, 'table')])]
1426 i, length = i+2, length+2
1427 elif (envname == 'tableiii') or (envname == 'tableii'):
1428 pp[i:i] = [
1429 chunk(CSLINE, ch.where, 'end'),
1430 chunk(GROUP, ch.where, [
1431 chunk(PLAIN, ch.where, 'table')])]
1432 i, length = i+2, length + 2
1433 pp.insert(i, chunk(DENDLINE, ch.where, '\n'))
1434 i, length = i+1, length+1
1435
1436 elif envname in ('funcdesc', 'excdesc', 'datadesc'):
1437 pp[i:i] = [
1438 chunk(CSLINE, ch.where, 'end'),
1439 chunk(GROUP, ch.where, [
1440 chunk(PLAIN, ch.where, hist.command)])]
1441 i, length = i+2, length+2
1442 else:
1443 print 'WARNING: ending env ' + `envname` + 'has no actions'
1444
1445 elif ch.chtype == chunk_type(CSNAME):
1446 # control name transformations
1447 if s(buf, ch.data) == 'optional':
1448 pp[i-1].chtype = chunk_type (PLAIN)
1449 pp[i-1].data = '['
1450 if (i < length) and \
1451 (pp[i].chtype == chunk_type(GROUP)):
1452 cp=pp[i].data
1453 pp[i:i+1]=cp + [
1454 chunk(PLAIN, ch.where, ']')]
1455 length = length+len(cp)
1456 elif s(buf, ch.data) in ignoredcommands:
1457 del pp[i-1]
1458 i, length = i-1, length-1
1459 elif s(buf, ch.data) == '@' and \
1460 i != length and \
1461 pp[i].chtype == chunk_type(PLAIN) and \
1462 s(buf, pp[i].data)[0] == '.':
1463 # \@. --> \. --> @.
1464 ch.data = '.'
1465 del pp[i]
1466 length = length-1
1467 elif s(buf, ch.data) == '\\':
1468 # \\ --> \* --> @*
1469 ch.data = '*'
1470 elif len(s(buf, ch.data)) == 1 and \
1471 s(buf, ch.data) in onlylatexspecial:
1472 ch.chtype = chunk_type(PLAIN)
1473 # check if such a command is followed by
1474 # an empty group: e.g., `\%{}'. If so, remove
1475 # this empty group too
1476 if i < length and \
1477 pp[i].chtype == chunk_type(GROUP) \
1478 and len(pp[i].data) == 0:
1479 del pp[i]
1480 length = length-1
1481
1482 elif hist.inargs and s(buf, ch.data) in inargsselves:
1483 # This is the special processing of the
1484 # arguments of the \begin{funcdesc}... or
1485 # \funcline... arguments
1486 # \, --> , \[ --> [, \] --> ]
1487 ch.chtype = chunk_type(PLAIN)
1488
1489 elif s(buf, ch.data) == 'renewcommand':
1490 # \renewcommand{\indexsubitem}....
1491 i, length = i-1, length-1
1492 del pp[i]
1493 length, newi = getnextarg(length, buf, pp, i)
1494 if newi-i == 1 \
1495 and i < length \
1496 and pp[i].chtype == chunk_type(CSNAME) \
1497 and s(buf, pp[i].data) == 'indexsubitem':
1498 del pp[i:newi]
1499 length = length - (newi-i)
1500 length, newi = getnextarg(length, buf, pp, i)
1501 text = flattext(buf, pp[i:newi])
1502 if text[:1] != '(' or text[-1:] != ')':
1503 raise error, 'expected indexsubitme enclosed in braces'
1504 words = string.split(text[1:-1])
1505 hist.indexsubitem = words
1506 del text, words
1507 else:
1508 print 'WARNING: renewcommand with unsupported arg removed'
1509 del pp[i:newi]
1510 length = length - (newi-i)
1511
1512 elif s(buf, ch.data) == 'item':
1513 ch.chtype = chunk_type(CSLINE)
1514 length, newi = getoptarg(length, buf, pp, i)
1515 ingroupch = pp[i:newi]
1516 del pp[i:newi]
1517 length = length - (newi-i)
1518 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1519 i, length = i+1, length+1
1520
1521 elif s(buf, ch.data) == 'ttindex':
1522 idxsi = hist.indexsubitem
1523
1524 cat_class = ''
1525 if len(idxsi) >= 2 and idxsi[1] in \
1526 ('method', 'function', 'protocol'):
1527 command = 'findex'
1528 elif len(idxsi) >= 2 and idxsi[1] in \
1529 ('exception', 'object'):
1530 command = 'vindex'
1531 else:
1532 print 'WARNING: can\'t categorize ' + `idxsi` + ' for \'ttindex\' command'
1533 command = 'cindex'
1534
1535 if not cat_class:
1536 cat_class = '('+string.join(idxsi)+')'
1537
1538 ch.chtype = chunk_type(CSLINE)
1539 ch.data = command
1540
1541 length, newi = getnextarg(length, buf, pp, i)
1542 arg = pp[i:newi]
1543 del pp[i:newi]
1544 length = length - (newi-i)
1545
1546 cat_arg = [chunk(PLAIN, ch.where, cat_class)]
1547
1548 # determine what should be set in roman, and
1549 # what in tt-font
1550 if command in regindices:
1551
1552 arg = [chunk(CSNAME, ch.where, 't'),
1553 chunk(GROUP, ch.where, arg)]
1554 else:
1555 cat_arg = [chunk(CSNAME, ch.where, 'r'),
1556 chunk(GROUP, ch.where, cat_arg)]
1557
1558 ingroupch = arg + \
1559 [chunk(PLAIN, ch.where, ' ')] + \
1560 cat_arg
1561
1562 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1563 length, i = length+1, i+1
1564
1565
1566 elif s(buf, ch.data) == 'ldots':
1567 # \ldots --> \dots{} --> @dots{}
1568 ch.data = 'dots'
1569 if i == length \
1570 or pp[i].chtype != chunk_type(GROUP) \
1571 or pp[i].data != []:
1572 pp.insert(i, chunk(GROUP, ch.where, []))
1573 i, length = i+1, length+1
1574 elif s(buf, ch.data) in wordsselves:
1575 # \UNIX --> UNIX
1576 ch.chtype = chunk_type(PLAIN)
1577 if i != length \
1578 and pp[i].chtype == chunk_type(GROUP) \
1579 and pp[i].data == []:
1580 del pp[i]
1581 length = length-1
1582 elif s(buf, ch.data) in for_texi:
1583 pass
1584
1585 elif s(buf, ch.data) == 'e':
1586 # "\e" --> "\"
1587 ch.data = '\\'
1588 ch.chtype = chunk_type(PLAIN)
1589 elif (s(buf, ch.data) == 'lineiii') or\
1590 (s(buf, ch.data) == 'lineii'):
1591 # This is the most tricky one
1592 # \lineiii{a1}{a2}[{a3}] -->
1593 # @item @<cts. of itemargmacro>{a1}
1594 # a2 [ -- a3]
1595 #
1596 ##print 'LINEIIIIII!!!!!!!'
Guido van Rossum49604d31996-09-10 22:19:51 +00001597## wobj = Wobj()
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001598## dumpit(buf, wobj.write, pp[i-1:i+5])
1599## print '--->' + wobj.data + '<----'
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001600 if not hist.inenv:
1601 raise error, 'no environment for lineiii'
1602 if (hist.inenv[0] != 'tableiii') and \
1603 (hist.inenv[0] != 'tableii'):
1604 raise error, \
1605 'wrong command (' + \
1606 s(buf, ch.data)+ \
1607 ') in wrong environment (' \
1608 + `hist.inenv[0]` + ')'
1609 ch.chtype = chunk_type(CSLINE)
1610 ch.data = 'item'
1611 length, newi = getnextarg(length, buf, pp, i)
1612 ingroupch = [chunk(CSNAME, 0,
1613 hist.itemargmacro),
1614 chunk(GROUP, 0, pp[i:newi])]
1615 del pp[i:newi]
1616 length = length - (newi-i)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001617## print 'ITEM ARG: --->',
Guido van Rossum49604d31996-09-10 22:19:51 +00001618## wobj = Wobj()
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001619## dumpit(buf, wobj.write, ingroupch)
1620## print wobj.data, '<---'
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001621 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1622 grouppos = i
1623 i, length = i+1, length+1
1624 length, i = getnextarg(length, buf, pp, i)
1625 length, newi = getnextarg(length, buf, pp, i)
1626 if newi > i:
1627 # we have a 3rd arg
1628 pp.insert(i, chunk(PLAIN, ch.where, ' --- '))
1629 i = newi + 1
1630 length = length + 1
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001631## pp[grouppos].data = pp[grouppos].data \
1632## + [chunk(PLAIN, ch.where, ' ')] \
1633## + pp[i:newi]
1634## del pp[i:newi]
1635## length = length - (newi-i)
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001636 if length != len(pp):
1637 raise 'IN LINEIII IS THE ERR', `i`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001638
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001639 elif s(buf, ch.data) in ('chapter', 'section', 'subsection', 'subsubsection'):
1640 #\xxxsection{A} ---->
1641 # @node A, , ,
1642 # @xxxsection A
1643 ## also: remove commas and quotes
1644 ch.chtype = chunk_type(CSLINE)
1645 length, newi = getnextarg(length, buf, pp, i)
1646 afternodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
1647 if afternodenamecmd < 0:
1648 cp1 = crcopy(pp[i:newi])
1649 pp[i:newi] = [
1650 chunk(GROUP, ch.where,
1651 pp[i:newi])]
1652 length, newi = length - (newi-i) + 1, i+1
1653 text = flattext(buf, cp1)
1654 text = invent_node_names(text)
1655 else:
1656 length, endarg = getnextarg(length, buf, pp, afternodenamecmd)
1657 cp1 = crcopy(pp[afternodenamecmd:endarg])
1658 del pp[newi:endarg]
1659 length = length - (endarg-newi)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001660
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001661 pp[i:newi] = [
1662 chunk(GROUP, ch.where,
1663 pp[i:newi])]
1664 length, newi = length - (newi-i) + 1, i + 1
1665 text = flattext(buf, cp1)
1666 if text[-1] == '.':
1667 text = text[:-1]
Guido van Rossum7a2dba21993-11-05 14:45:11 +00001668## print 'FLATTEXT:', `text`
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001669 if text in hist.nodenames:
1670 print 'WARNING: node name ' + `text` + ' already used'
1671 out.doublenodes.append(text)
1672 else:
1673 hist.nodenames.append(text)
1674 text = rm_commas_etc(text)
1675 pp[i-1:i-1] = [
1676 chunk(CSLINE, ch.where, 'node'),
1677 chunk(GROUP, ch.where, [
1678 chunk(PLAIN, ch.where, text+', , ,')
1679 ])]
1680 i, length = newi+2, length+2
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001681
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001682 elif s(buf,ch.data) == 'funcline':
1683 # fold it to a very short environment
1684 pp[i-1:i-1] = [
1685 chunk(CSLINE, ch.where, 'end'),
1686 chunk(GROUP, ch.where, [
1687 chunk(PLAIN, ch.where, hist.command)])]
1688 i, length = i+2, length+2
1689 length, i = do_funcdesc(length, buf, pp, i)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001690
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001691 elif s(buf,ch.data) == 'dataline':
1692 pp[i-1:i-1] = [
1693 chunk(CSLINE, ch.where, 'end'),
1694 chunk(GROUP, ch.where, [
1695 chunk(PLAIN, ch.where, hist.command)])]
1696 i, length = i+2, length+2
1697 length, i = do_datadesc(length, buf, pp, i)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001698
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001699 elif s(buf,ch.data) == 'excline':
1700 pp[i-1:i-1] = [
1701 chunk(CSLINE, ch.where, 'end'),
1702 chunk(GROUP, ch.where, [
1703 chunk(PLAIN, ch.where, hist.command)])]
1704 i, length = i+2, length+2
1705 length, i = do_excdesc(length, buf, pp, i)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001706
1707
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001708 elif s(buf, ch.data) == 'index':
1709 #\index{A} --->
1710 # @cindex A
1711 ch.chtype = chunk_type(CSLINE)
1712 ch.data = 'cindex'
1713 length, newi = getnextarg(length, buf, pp, i)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001714
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001715 ingroupch = pp[i:newi]
1716 del pp[i:newi]
1717 length = length - (newi-i)
1718 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1719 length, i = length+1, i+1
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001720
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001721 elif s(buf, ch.data) == 'bifuncindex':
1722 ch.chtype = chunk_type(CSLINE)
1723 ch.data = 'findex'
1724 length, newi = getnextarg(length, buf, pp, i)
1725 ingroupch = pp[i:newi]
1726 del pp[i:newi]
1727 length = length - (newi-i)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001728
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001729 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1730 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1731 ingroupch.append(chunk(GROUP, ch.where, [
1732 chunk(PLAIN, ch.where,
1733 '(built-in function)')]))
1734
1735 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1736 length, i = length+1, i+1
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001737
1738
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001739 elif s(buf, ch.data) == 'obindex':
1740 ch.chtype = chunk_type(CSLINE)
1741 ch.data = 'findex'
1742 length, newi = getnextarg(length, buf, pp, i)
1743 ingroupch = pp[i:newi]
1744 del pp[i:newi]
1745 length = length - (newi-i)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001746
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001747 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1748 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1749 ingroupch.append(chunk(GROUP, ch.where, [
1750 chunk(PLAIN, ch.where,
1751 '(object)')]))
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001752
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001753 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1754 length, i = length+1, i+1
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001755
1756
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001757 elif s(buf, ch.data) == 'opindex':
1758 ch.chtype = chunk_type(CSLINE)
1759 ch.data = 'findex'
1760 length, newi = getnextarg(length, buf, pp, i)
1761 ingroupch = pp[i:newi]
1762 del pp[i:newi]
1763 length = length - (newi-i)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001764
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001765 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1766 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1767 ingroupch.append(chunk(GROUP, ch.where, [
1768 chunk(PLAIN, ch.where,
1769 '(operator)')]))
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001770
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001771 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1772 length, i = length+1, i+1
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001773
1774
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001775 elif s(buf, ch.data) == 'bimodindex':
1776 ch.chtype = chunk_type(CSLINE)
1777 ch.data = 'pindex'
1778 length, newi = getnextarg(length, buf, pp, i)
1779 ingroupch = pp[i:newi]
1780 del pp[i:newi]
1781 length = length - (newi-i)
1782
1783 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1784 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1785 ingroupch.append(chunk(GROUP, ch.where, [
1786 chunk(PLAIN, ch.where,
1787 '(built-in)')]))
1788
1789 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1790 length, i = length+1, i+1
1791
1792 elif s(buf, ch.data) == 'sectcode':
1793 ch.data = 'code'
1794
1795
1796 elif s(buf, ch.data) == 'stmodindex':
1797 ch.chtype = chunk_type(CSLINE)
1798 # use the program index as module index
1799 ch.data = 'pindex'
1800 length, newi = getnextarg(length, buf, pp, i)
1801 ingroupch = pp[i:newi]
1802 del pp[i:newi]
1803 length = length - (newi-i)
1804
1805 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1806 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1807 ingroupch.append(chunk(GROUP, ch.where, [
1808 chunk(PLAIN, ch.where,
1809 '(standard)')]))
1810
1811 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1812 length, i = length+1, i+1
1813
1814
1815 elif s(buf, ch.data) == 'stindex':
1816 # XXX must actually go to newindex st
1817 wh = ch.where
1818 ch.chtype = chunk_type(CSLINE)
1819 ch.data = 'cindex'
1820 length, newi = getnextarg(length, buf, pp, i)
1821 ingroupch = [chunk(CSNAME, wh, 'code'),
1822 chunk(GROUP, wh, pp[i:newi])]
1823
1824 del pp[i:newi]
1825 length = length - (newi-i)
1826
1827 t = ingroupch[:]
1828 t.append(chunk(PLAIN, wh, ' statement'))
1829
1830 pp.insert(i, chunk(GROUP, wh, t))
1831 i, length = i+1, length+1
1832
1833 pp.insert(i, chunk(CSLINE, wh, 'cindex'))
1834 i, length = i+1, length+1
1835
1836 t = ingroupch[:]
1837 t.insert(0, chunk(PLAIN, wh, 'statement, '))
1838
1839 pp.insert(i, chunk(GROUP, wh, t))
1840 i, length = i+1, length+1
1841
1842
1843 elif s(buf, ch.data) == 'indexii':
1844 #\indexii{A}{B} --->
1845 # @cindex A B
1846 # @cindex B, A
1847 length, newi = getnextarg(length, buf, pp, i)
1848 cp11 = pp[i:newi]
1849 cp21 = crcopy(pp[i:newi])
1850 del pp[i:newi]
1851 length = length - (newi-i)
1852 length, newi = getnextarg(length, buf, pp, i)
1853 cp12 = pp[i:newi]
1854 cp22 = crcopy(pp[i:newi])
1855 del pp[i:newi]
1856 length = length - (newi-i)
1857
1858 ch.chtype = chunk_type(CSLINE)
1859 ch.data = 'cindex'
1860 pp.insert(i, chunk(GROUP, ch.where, cp11 + [
1861 chunk(PLAIN, ch.where, ' ')] + cp12))
1862 i, length = i+1, length+1
1863 pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'),
1864 chunk(GROUP, ch.where, cp22 + [
1865 chunk(PLAIN, ch.where, ', ')]+ cp21)]
1866 i, length = i+2, length+2
1867
1868 elif s(buf, ch.data) == 'indexiii':
1869 length, newi = getnextarg(length, buf, pp, i)
1870 cp11 = pp[i:newi]
1871 cp21 = crcopy(pp[i:newi])
1872 cp31 = crcopy(pp[i:newi])
1873 del pp[i:newi]
1874 length = length - (newi-i)
1875 length, newi = getnextarg(length, buf, pp, i)
1876 cp12 = pp[i:newi]
1877 cp22 = crcopy(pp[i:newi])
1878 cp32 = crcopy(pp[i:newi])
1879 del pp[i:newi]
1880 length = length - (newi-i)
1881 length, newi = getnextarg(length, buf, pp, i)
1882 cp13 = pp[i:newi]
1883 cp23 = crcopy(pp[i:newi])
1884 cp33 = crcopy(pp[i:newi])
1885 del pp[i:newi]
1886 length = length - (newi-i)
1887
1888 ch.chtype = chunk_type(CSLINE)
1889 ch.data = 'cindex'
1890 pp.insert(i, chunk(GROUP, ch.where, cp11 + [
1891 chunk(PLAIN, ch.where, ' ')] + cp12
1892 + [chunk(PLAIN, ch.where, ' ')]
1893 + cp13))
1894 i, length = i+1, length+1
1895 pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'),
1896 chunk(GROUP, ch.where, cp22 + [
1897 chunk(PLAIN, ch.where, ' ')]+ cp23
1898 + [chunk(PLAIN, ch.where, ', ')] +
1899 cp21)]
1900 i, length = i+2, length+2
1901 pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'),
1902 chunk(GROUP, ch.where, cp33 + [
1903 chunk(PLAIN, ch.where, ', ')]+ cp31
1904 + [chunk(PLAIN, ch.where, ' ')] +
1905 cp32)]
1906 i, length = i+2, length+2
1907
1908
1909 elif s(buf, ch.data) == 'indexiv':
1910 length, newi = getnextarg(length, buf, pp, i)
1911 cp11 = pp[i:newi]
1912 cp21 = crcopy(pp[i:newi])
1913 cp31 = crcopy(pp[i:newi])
1914 cp41 = crcopy(pp[i:newi])
1915 del pp[i:newi]
1916 length = length - (newi-i)
1917 length, newi = getnextarg(length, buf, pp, i)
1918 cp12 = pp[i:newi]
1919 cp22 = crcopy(pp[i:newi])
1920 cp32 = crcopy(pp[i:newi])
1921 cp42 = crcopy(pp[i:newi])
1922 del pp[i:newi]
1923 length = length - (newi-i)
1924 length, newi = getnextarg(length, buf, pp, i)
1925 cp13 = pp[i:newi]
1926 cp23 = crcopy(pp[i:newi])
1927 cp33 = crcopy(pp[i:newi])
1928 cp43 = crcopy(pp[i:newi])
1929 del pp[i:newi]
1930 length = length - (newi-i)
1931 length, newi = getnextarg(length, buf, pp, i)
1932 cp14 = pp[i:newi]
1933 cp24 = crcopy(pp[i:newi])
1934 cp34 = crcopy(pp[i:newi])
1935 cp44 = crcopy(pp[i:newi])
1936 del pp[i:newi]
1937 length = length - (newi-i)
1938
1939 ch.chtype = chunk_type(CSLINE)
1940 ch.data = 'cindex'
1941 ingroupch = cp11 + \
1942 spacech + cp12 + \
1943 spacech + cp13 + \
1944 spacech + cp14
1945 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1946 i, length = i+1, length+1
1947 ingroupch = cp22 + \
1948 spacech + cp23 + \
1949 spacech + cp24 + \
1950 commach + cp21
1951 pp[i:i] = cindexch + [
1952 chunk(GROUP, ch.where, ingroupch)]
1953 i, length = i+2, length+2
1954 ingroupch = cp33 + \
1955 spacech + cp34 + \
1956 commach + cp31 + \
1957 spacech + cp32
1958 pp[i:i] = cindexch + [
1959 chunk(GROUP, ch.where, ingroupch)]
1960 i, length = i+2, length+2
1961 ingroupch = cp44 + \
1962 commach + cp41 + \
1963 spacech + cp42 + \
1964 spacech + cp43
1965 pp[i:i] = cindexch + [
1966 chunk(GROUP, ch.where, ingroupch)]
1967 i, length = i+2, length+2
1968
1969
1970
1971 else:
1972 print 'don\'t know what to do with keyword ' + `s(buf, ch.data)`
1973
1974
1975
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001976re_atsign = regex.compile('[@{}]')
1977re_newline = regex.compile('\n')
1978
1979def dumpit(buf, wm, pp):
1980
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001981 global out
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001982
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00001983 i, length = 0, len(pp)
1984
1985 addspace = 0
1986
1987 while 1:
1988 if len(pp) != length:
1989 raise 'FATAL', 'inconsistent length'
1990 if i == length:
1991 break
1992 ch = pp[i]
1993 i = i + 1
1994
1995 if addspace:
1996 dospace = 1
1997 addspace = 0
1998 else:
1999 dospace = 0
2000
2001 if ch.chtype == chunk_type(CSNAME):
2002 wm('@' + s(buf, ch.data))
2003 if s(buf, ch.data) == 'node' and \
2004 pp[i].chtype == chunk_type(PLAIN) and \
2005 s(buf, pp[i].data) in out.doublenodes:
2006 ##XXX doesnt work yet??
2007 wm(' ZZZ-' + zfill(`i`, 4))
2008 if s(buf, ch.data)[0] in string.letters:
2009 addspace = 1
2010 elif ch.chtype == chunk_type(PLAIN):
2011 if dospace and s(buf, ch.data) not in (' ', '\t'):
2012 wm(' ')
2013 text = s(buf, ch.data)
2014 while 1:
2015 pos = re_atsign.search(text)
2016 if pos < 0:
2017 break
2018 wm(text[:pos] + '@' + text[pos])
2019 text = text[pos+1:]
2020 wm(text)
2021 elif ch.chtype == chunk_type(GROUP):
2022 wm('{')
2023 dumpit(buf, wm, ch.data)
2024 wm('}')
2025 elif ch.chtype == chunk_type(DENDLINE):
2026 wm('\n\n')
2027 while i != length and pp[i].chtype in \
2028 (chunk_type(DENDLINE), chunk_type(ENDLINE)):
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002029 i = i + 1
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00002030 elif ch.chtype == chunk_type(OTHER):
2031 wm(s(buf, ch.data))
2032 elif ch.chtype == chunk_type(ACTIVE):
2033 wm(s(buf, ch.data))
2034 elif ch.chtype == chunk_type(ENDLINE):
2035 wm('\n')
2036 elif ch.chtype == chunk_type(CSLINE):
2037 if i >= 2 and pp[i-2].chtype not in \
2038 (chunk_type(ENDLINE), chunk_type(DENDLINE)) \
2039 and (pp[i-2].chtype != chunk_type(PLAIN)
2040 or s(buf, pp[i-2].data)[-1] != '\n'):
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002041
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00002042 wm('\n')
2043 wm('@' + s(buf, ch.data))
2044 if i == length:
2045 raise error, 'CSLINE expected another chunk'
2046 if pp[i].chtype != chunk_type(GROUP):
2047 raise error, 'CSLINE expected GROUP'
2048 if type(pp[i].data) != ListType:
2049 raise error, 'GROUP chould contain []-data'
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002050
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00002051 wobj = Wobj()
2052 dumpit(buf, wobj.write, pp[i].data)
2053 i = i + 1
2054 text = wobj.data
2055 del wobj
2056 if text:
2057 wm(' ')
2058 while 1:
2059 pos = re_newline.search(text)
2060 if pos < 0:
2061 break
2062 print 'WARNING: found newline in csline arg'
2063 wm(text[:pos] + ' ')
2064 text = text[pos+1:]
2065 wm(text)
2066 if i >= length or \
2067 pp[i].chtype not in (chunk_type(CSLINE),
2068 chunk_type(ENDLINE), chunk_type(DENDLINE)) \
2069 and (pp[i].chtype != chunk_type(PLAIN)
2070 or s(buf, pp[i].data)[0] != '\n'):
2071 wm('\n')
Guido van Rossum49604d31996-09-10 22:19:51 +00002072
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00002073 elif ch.chtype == chunk_type(COMMENT):
Guido van Rossum7a2dba21993-11-05 14:45:11 +00002074## print 'COMMENT: previous chunk =', pp[i-2]
Guido van Rossum557ed941995-03-28 13:33:45 +00002075## if pp[i-2].chtype == chunk_type(PLAIN):
2076## print 'PLAINTEXT =', `s(buf, pp[i-2].data)`
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00002077 if s(buf, ch.data) and \
2078 regex.match('^[ \t]*$', s(buf, ch.data)) < 0:
2079 if i >= 2 and pp[i-2].chtype not in \
2080 (chunk_type(ENDLINE), chunk_type(DENDLINE)) \
2081 and not (pp[i-2].chtype == chunk_type(PLAIN)
2082 and regex.match('\\(.\\|\n\\)*[ \t]*\n$', s(buf, pp[i-2].data)) >= 0):
2083 print 'ADDING NEWLINE'
2084 wm('\n')
2085 wm('@c ' + s(buf, ch.data))
2086 elif ch.chtype == chunk_type(IGNORE):
2087 pass
2088 else:
2089 try:
2090 str = `s(buf, ch.data)`
2091 except TypeError:
2092 str = `ch.data`
2093 if len(str) > 400:
2094 str = str[:400] + '...'
2095 print 'warning:', ch.chtype, 'not handled, data ' + str
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002096
2097
2098
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002099def main():
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00002100 outfile = None
2101 headerfile = 'texipre.dat'
2102 trailerfile = 'texipost.dat'
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002103
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00002104 try:
2105 opts, args = getopt.getopt(sys.argv[1:], 'o:h:t:')
2106 except getopt.error:
2107 args = []
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002108
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00002109 if not args:
2110 print 'usage: partparse [-o outfile] [-h headerfile]',
2111 print '[-t trailerfile] file ...'
2112 sys.exit(2)
Guido van Rossum7a2dba21993-11-05 14:45:11 +00002113
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00002114 for opt, arg in opts:
2115 if opt == '-o': outfile = arg
2116 if opt == '-h': headerfile = arg
2117 if opt == '-t': trailerfile = arg
Guido van Rossum7a2dba21993-11-05 14:45:11 +00002118
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00002119 if not outfile:
2120 root, ext = os.path.splitext(args[0])
2121 outfile = root + '.texi'
Guido van Rossum7a2dba21993-11-05 14:45:11 +00002122
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00002123 if outfile in args:
2124 print 'will not overwrite input file', outfile
2125 sys.exit(2)
Guido van Rossum7a2dba21993-11-05 14:45:11 +00002126
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00002127 outf = open(outfile, 'w')
2128 outf.write(open(headerfile, 'r').read())
Guido van Rossum7a2dba21993-11-05 14:45:11 +00002129
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00002130 for file in args:
2131 if len(args) > 1: print '='*20, file, '='*20
2132 buf = open(file, 'r').read()
2133 w, pp = parseit(buf)
2134 startchange()
2135 changeit(buf, pp)
2136 dumpit(buf, outf.write, pp)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002137
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00002138 outf.write(open(trailerfile, 'r').read())
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002139
Guido van Rossum5f18d6c1996-09-10 22:34:20 +00002140 outf.close()
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002141
Guido van Rossum49604d31996-09-10 22:19:51 +00002142if __name__ == "__main__":
2143 main()