blob: db53a01c4f6b57c3b892acaa7c2bbbbb59b8334b [file] [log] [blame]
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001#
2# partparse.py: parse a by-Guido-written-and-by-Jan-Hein-edited LaTeX file,
3# and generate texinfo source.
4#
5# This is *not* a good example of good programming practices. In fact, this
6# file could use a complete rewrite, in order to become faster, more
7# easy extensible and maintainable.
8#
9# However, I added some comments on a few places for the pityful person who
10# would ever need to take a look into this file.
11#
12# Have I been clear enough??
13#
14# -jh
15
16
Guido van Rossum7a2dba21993-11-05 14:45:11 +000017import sys, string, regex, getopt, os
Guido van Rossum95cd2ef1992-12-08 14:37:55 +000018
Guido van Rossum49604d31996-09-10 22:19:51 +000019from types import IntType, ListType, StringType, TupleType
20
Guido van Rossum95cd2ef1992-12-08 14:37:55 +000021# Different parse modes for phase 1
22MODE_REGULAR = 0
23MODE_VERBATIM = 1
24MODE_CS_SCAN = 2
25MODE_COMMENT = 3
26MODE_MATH = 4
27MODE_DMATH = 5
28MODE_GOBBLEWHITE = 6
29
30the_modes = MODE_REGULAR, MODE_VERBATIM, MODE_CS_SCAN, MODE_COMMENT, \
31 MODE_MATH, MODE_DMATH, MODE_GOBBLEWHITE
32
33# Show the neighbourhood of the scanned buffer
34def epsilon(buf, where):
35 wmt, wpt = where - 10, where + 10
36 if wmt < 0:
37 wmt = 0
38 if wpt > len(buf):
39 wpt = len(buf)
40 return ' Context ' + `buf[wmt:where]` + '.' + `buf[where:wpt]` + '.'
41
42# Should return the line number. never worked
43def lin():
44 global lineno
45 return ' Line ' + `lineno` + '.'
46
47# Displays the recursion level.
48def lv(lvl):
49 return ' Level ' + `lvl` + '.'
50
51# Combine the three previous functions. Used often.
52def lle(lvl, buf, where):
53 return lv(lvl) + lin() + epsilon(buf, where)
54
55
56# This class is only needed for _symbolic_ representation of the parse mode.
57class Mode:
Guido van Rossum49604d31996-09-10 22:19:51 +000058 def __init__(self, arg):
Guido van Rossum95cd2ef1992-12-08 14:37:55 +000059 if arg not in the_modes:
60 raise ValueError, 'mode not in the_modes'
61 self.mode = arg
Guido van Rossum95cd2ef1992-12-08 14:37:55 +000062
63 def __cmp__(self, other):
64 if type(self) != type(other):
65 other = mode(other)
66 return cmp(self.mode, other.mode)
67
68 def __repr__(self):
69 if self.mode == MODE_REGULAR:
70 return 'MODE_REGULAR'
71 elif self.mode == MODE_VERBATIM:
72 return 'MODE_VERBATIM'
73 elif self.mode == MODE_CS_SCAN:
74 return 'MODE_CS_SCAN'
75 elif self.mode == MODE_COMMENT:
76 return 'MODE_COMMENT'
77 elif self.mode == MODE_MATH:
78 return 'MODE_MATH'
79 elif self.mode == MODE_DMATH:
80 return 'MODE_DMATH'
81 elif self.mode == MODE_GOBBLEWHITE:
82 return 'MODE_GOBBLEWHITE'
83 else:
84 raise ValueError, 'mode not in the_modes'
85
86# just a wrapper around a class initialisation
Guido van Rossum49604d31996-09-10 22:19:51 +000087mode = Mode
Guido van Rossum95cd2ef1992-12-08 14:37:55 +000088
89
90# After phase 1, the text consists of chunks, with a certain type
91# this type will be assigned to the chtype member of the chunk
92# the where-field contains the file position where this is found
93# and the data field contains (1): a tuple describing start- end end
94# positions of the substring (can be used as slice for the buf-variable),
95# (2) just a string, mostly generated by the changeit routine,
96# or (3) a list, describing a (recursive) subgroup of chunks
97PLAIN = 0 # ASSUME PLAINTEXT, data = the text
98GROUP = 1 # GROUP ({}), data = [chunk, chunk,..]
99CSNAME = 2 # CONTROL SEQ TOKEN, data = the command
100COMMENT = 3 # data is the actual comment
101DMATH = 4 # DISPLAYMATH, data = [chunk, chunk,..]
102MATH = 5 # MATH, see DISPLAYMATH
103OTHER = 6 # CHAR WITH CATCODE OTHER, data = char
104ACTIVE = 7 # ACTIVE CHAR
105GOBBLEDWHITE = 8 # Gobbled LWSP, after CSNAME
106ENDLINE = 9 # END-OF-LINE, data = '\n'
107DENDLINE = 10 # DOUBLE EOL, data='\n', indicates \par
108ENV = 11 # LaTeX-environment
109 # data =(envname,[ch,ch,ch,.])
110CSLINE = 12 # for texi: next chunk will be one group
111 # of args. Will be set all on 1 line
112IGNORE = 13 # IGNORE this data
113ENDENV = 14 # TEMP END OF GROUP INDICATOR
114IF = 15 # IF-directive
115 # data = (flag,negate,[ch, ch, ch,...])
116the_types = PLAIN, GROUP, CSNAME, COMMENT, DMATH, MATH, OTHER, ACTIVE, \
117 GOBBLEDWHITE, ENDLINE, DENDLINE, ENV, CSLINE, IGNORE, ENDENV, IF
118
119# class, just to display symbolic name
120class ChunkType:
Guido van Rossum49604d31996-09-10 22:19:51 +0000121 def __init__(self, chunk_type):
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000122 if chunk_type not in the_types:
Guido van Rossum49604d31996-09-10 22:19:51 +0000123 raise ValueError, 'chunk_type not in the_types'
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000124 self.chunk_type = chunk_type
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000125
126 def __cmp__(self, other):
127 if type(self) != type(other):
128 other = chunk_type(other)
129 return cmp(self.chunk_type, other.chunk_type)
130
131 def __repr__(self):
132 if self.chunk_type == PLAIN:
133 return 'PLAIN'
134 elif self.chunk_type == GROUP:
135 return 'GROUP'
136 elif self.chunk_type == CSNAME:
137 return 'CSNAME'
138 elif self.chunk_type == COMMENT:
139 return 'COMMENT'
140 elif self.chunk_type == DMATH:
141 return 'DMATH'
142 elif self.chunk_type == MATH:
143 return 'MATH'
144 elif self.chunk_type == OTHER:
145 return 'OTHER'
146 elif self.chunk_type == ACTIVE:
147 return 'ACTIVE'
148 elif self.chunk_type == GOBBLEDWHITE:
149 return 'GOBBLEDWHITE'
150 elif self.chunk_type == DENDLINE:
151 return 'DENDLINE'
152 elif self.chunk_type == ENDLINE:
153 return 'ENDLINE'
154 elif self.chunk_type == ENV:
155 return 'ENV'
156 elif self.chunk_type == CSLINE:
157 return 'CSLINE'
158 elif self.chunk_type == IGNORE:
159 return 'IGNORE'
160 elif self.chunk_type == ENDENV:
161 return 'ENDENV'
162 elif self.chunk_type == IF:
163 return 'IF'
164 else:
165 raise ValueError, 'chunk_type not in the_types'
166
167# ...and the wrapper
Guido van Rossum49604d31996-09-10 22:19:51 +0000168_all_chunk_types = {}
169for t in the_types:
170 _all_chunk_types[t] = ChunkType(t)
171
172def chunk_type(t):
173 return _all_chunk_types[t]
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000174
175# store a type object of the ChunkType-class-instance...
176chunk_type_type = type(chunk_type(0))
Guido van Rossum49604d31996-09-10 22:19:51 +0000177
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000178# this class contains a part of the parsed buffer
179class Chunk:
Guido van Rossum49604d31996-09-10 22:19:51 +0000180 def __init__(self, chtype, where, data):
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000181 if type(chtype) != chunk_type_type:
182 chtype = chunk_type(chtype)
183 self.chtype = chtype
Guido van Rossum49604d31996-09-10 22:19:51 +0000184 if type(where) != IntType:
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000185 raise TypeError, '\'where\' is not a number'
186 self.where = where
187 self.data = data
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000188
189 def __repr__(self):
190 return 'chunk' + `self.chtype, self.where, self.data`
191
192# and the wrapper
Guido van Rossum49604d31996-09-10 22:19:51 +0000193chunk = Chunk
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000194
195
196error = 'partparse.error'
197
198#
199# TeX's catcodes...
200#
201CC_ESCAPE = 0
202CC_LBRACE = 1
203CC_RBRACE = 2
204CC_MATHSHIFT = 3
205CC_ALIGNMENT = 4
206CC_ENDLINE = 5
207CC_PARAMETER = 6
208CC_SUPERSCRIPT = 7
209CC_SUBSCRIPT = 8
210CC_IGNORE = 9
211CC_WHITE = 10
212CC_LETTER = 11
213CC_OTHER = 12
214CC_ACTIVE = 13
215CC_COMMENT = 14
216CC_INVALID = 15
217
218# and the names
219cc_names = [\
220 'CC_ESCAPE', \
221 'CC_LBRACE', \
222 'CC_RBRACE', \
223 'CC_MATHSHIFT', \
224 'CC_ALIGNMENT', \
225 'CC_ENDLINE', \
226 'CC_PARAMETER', \
227 'CC_SUPERSCRIPT', \
228 'CC_SUBSCRIPT', \
229 'CC_IGNORE', \
230 'CC_WHITE', \
231 'CC_LETTER', \
232 'CC_OTHER', \
233 'CC_ACTIVE', \
234 'CC_COMMENT', \
235 'CC_INVALID', \
236 ]
237
238# Show a list of catcode-name-symbols
239def pcl(codelist):
240 result = ''
241 for i in codelist:
242 result = result + cc_names[i] + ', '
243 return '[' + result[:-2] + ']'
244
245# the name of the catcode (ACTIVE, OTHER, etc.)
246def pc(code):
247 return cc_names[code]
248
249
250# Which catcodes make the parser stop parsing regular plaintext
251regular_stopcodes = [CC_ESCAPE, CC_LBRACE, CC_RBRACE, CC_MATHSHIFT, \
252 CC_ALIGNMENT, CC_PARAMETER, CC_SUPERSCRIPT, CC_SUBSCRIPT, \
253 CC_IGNORE, CC_ACTIVE, CC_COMMENT, CC_INVALID, CC_ENDLINE]
254
255# same for scanning a control sequence name
256csname_scancodes = [CC_LETTER]
257
258# same for gobbling LWSP
259white_scancodes = [CC_WHITE]
260##white_scancodes = [CC_WHITE, CC_ENDLINE]
261
262# make a list of all catcode id's, except for catcode ``other''
263all_but_other_codes = range(16)
264del all_but_other_codes[CC_OTHER]
265##print all_but_other_codes
266
267# when does a comment end
268comment_stopcodes = [CC_ENDLINE]
269
270# gather all characters together, specified by a list of catcodes
271def code2string(cc, codelist):
Guido van Rossum7a2dba21993-11-05 14:45:11 +0000272 ##print 'code2string: codelist = ' + pcl(codelist),
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000273 result = ''
Guido van Rossum7a2dba21993-11-05 14:45:11 +0000274 for category in codelist:
275 if cc[category]:
276 result = result + cc[category]
277 ##print 'result = ' + `result`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000278 return result
279
280# automatically generate all characters of catcode other, being the
281# complement set in the ASCII range (128 characters)
282def make_other_codes(cc):
Guido van Rossum7a2dba21993-11-05 14:45:11 +0000283 otherchars = range(256) # could be made 256, no problem
284 for category in all_but_other_codes:
285 if cc[category]:
286 for c in cc[category]:
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000287 otherchars[ord(c)] = None
288 result = ''
289 for i in otherchars:
290 if i != None:
291 result = result + chr(i)
292 return result
293
294# catcode dump (which characters have which catcodes).
295def dump_cc(name, cc):
Guido van Rossum7a2dba21993-11-05 14:45:11 +0000296 ##print '\t' + name
297 ##print '=' * (8+len(name))
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000298 if len(cc) != 16:
299 raise TypeError, 'cc not good cat class'
Guido van Rossum7a2dba21993-11-05 14:45:11 +0000300## for i in range(16):
301## print pc(i) + '\t' + `cc[i]`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000302
303
304# In the beginning,....
305epoch_cc = [None] * 16
306##dump_cc('epoch_cc', epoch_cc)
307
308
309# INITEX
310initex_cc = epoch_cc[:]
311initex_cc[CC_ESCAPE] = '\\'
312initex_cc[CC_ENDLINE], initex_cc[CC_IGNORE], initex_cc[CC_WHITE] = \
313 '\n', '\0', ' '
314initex_cc[CC_LETTER] = string.uppercase + string.lowercase
315initex_cc[CC_COMMENT], initex_cc[CC_INVALID] = '%', '\x7F'
316#initex_cc[CC_OTHER] = make_other_codes(initex_cc) I don't need them, anyway
317##dump_cc('initex_cc', initex_cc)
318
319
320# LPLAIN: LaTeX catcode setting (see lplain.tex)
321lplain_cc = initex_cc[:]
322lplain_cc[CC_LBRACE], lplain_cc[CC_RBRACE] = '{', '}'
323lplain_cc[CC_MATHSHIFT] = '$'
324lplain_cc[CC_ALIGNMENT] = '&'
325lplain_cc[CC_PARAMETER] = '#'
326lplain_cc[CC_SUPERSCRIPT] = '^\x0B' # '^' and C-k
327lplain_cc[CC_SUBSCRIPT] = '_\x01' # '_' and C-a
328lplain_cc[CC_WHITE] = lplain_cc[CC_WHITE] + '\t'
329lplain_cc[CC_ACTIVE] = '~\x0C' # '~' and C-l
330lplain_cc[CC_OTHER] = make_other_codes(lplain_cc)
331##dump_cc('lplain_cc', lplain_cc)
332
333
334# Guido's LaTeX environment catcoded '_' as ``other''
335# my own purpose catlist
336my_cc = lplain_cc[:]
337my_cc[CC_SUBSCRIPT] = my_cc[CC_SUBSCRIPT][1:] # remove '_' here
338my_cc[CC_OTHER] = my_cc[CC_OTHER] + '_' # add it to OTHER list
339dump_cc('my_cc', my_cc)
340
341
342
343# needed for un_re, my equivalent for regexp-quote in Emacs
344re_meaning = '\\[]^$'
345
346def un_re(str):
347 result = ''
348 for i in str:
349 if i in re_meaning:
350 result = result + '\\'
351 result = result + i
352 return result
353
354# NOTE the negate ('^') operator in *some* of the regexps below
355def make_rc_regular(cc):
356 # problems here if '[]' are included!!
357 return regex.compile('[' + code2string(cc, regular_stopcodes) + ']')
358
359def make_rc_cs_scan(cc):
360 return regex.compile('[^' + code2string(cc, csname_scancodes) + ']')
361
362def make_rc_comment(cc):
363 return regex.compile('[' + code2string(cc, comment_stopcodes) + ']')
364
365def make_rc_endwhite(cc):
366 return regex.compile('[^' + code2string(cc, white_scancodes) + ']')
367
368
369
370# regular: normal mode:
371rc_regular = make_rc_regular(my_cc)
372
373# scan: scan a command sequence e.g. `newlength' or `mbox' or `;', `,' or `$'
374rc_cs_scan = make_rc_cs_scan(my_cc)
375rc_comment = make_rc_comment(my_cc)
376rc_endwhite = make_rc_endwhite(my_cc)
377
378
379# parseit (BUF, PARSEMODE=mode(MODE_REGULAR), START=0, RECURSION-LEVEL=0)
380# RECURSION-LEVEL will is incremented on entry.
381# result contains the list of chunks returned
382# together with this list, the buffer position is returned
383
384# RECURSION-LEVEL will be set to zero *again*, when recursively a
385# {,D}MATH-mode scan has been enetered.
386# This has been done in order to better check for environment-mismatches
387
388def parseit(buf, *rest):
389 global lineno
390
391 if len(rest) == 3:
392 parsemode, start, lvl = rest
393 elif len(rest) == 2:
394 parsemode, start, lvl = rest + (0, )
395 elif len(rest) == 1:
396 parsemode, start, lvl = rest + (0, 0)
397 elif len(rest) == 0:
398 parsemode, start, lvl = mode(MODE_REGULAR), 0, 0
399 else:
400 raise TypeError, 'usage: parseit(buf[, parsemode[, start[, level]]])'
401 result = []
402 end = len(buf)
403 if lvl == 0 and parsemode == mode(MODE_REGULAR):
404 lineno = 1
405 lvl = lvl + 1
406
407 ##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')'
408
409 #
410 # some of the more regular modes...
411 #
412
413 if parsemode in (mode(MODE_REGULAR), mode(MODE_DMATH), mode(MODE_MATH)):
414 cstate = []
415 newpos = start
416 curpmode = parsemode
417 while 1:
418 where = newpos
419 #print '\tnew round: ' + epsilon(buf, where)
420 if where == end:
421 if lvl > 1 or curpmode != mode(MODE_REGULAR):
422 # not the way we started...
423 raise EOFError, 'premature end of file.' + lle(lvl, buf, where)
424 # the real ending of lvl-1 parse
425 return end, result
426
427 pos = rc_regular.search(buf, where)
428
429 if pos < 0:
430 pos = end
431
432 if pos != where:
433 newpos, c = pos, chunk(PLAIN, where, (where, pos))
434 result.append(c)
435 continue
436
437
438 #
439 # ok, pos == where and pos != end
440 #
441 foundchar = buf[where]
442 if foundchar in my_cc[CC_LBRACE]:
443 # recursive subgroup parse...
444 newpos, data = parseit(buf, curpmode, where+1, lvl)
445 result.append(chunk(GROUP, where, data))
446
447 elif foundchar in my_cc[CC_RBRACE]:
448 if lvl <= 1:
449 raise error, 'ENDGROUP while in base level.' + lle(lvl, buf, where)
450 if lvl == 1 and mode != mode(MODE_REGULAR):
451 raise error, 'endgroup while in math mode. +lin() + epsilon(buf, where)'
452 return where + 1, result
453
454 elif foundchar in my_cc[CC_ESCAPE]:
455 #
456 # call the routine that actually deals with
457 # this problem. If do_ret is None, than
458 # return the value of do_ret
459 #
460 # Note that handle_cs might call this routine
461 # recursively again...
462 #
463 do_ret, newpos = handlecs(buf, where, \
464 curpmode, lvl, result, end)
465 if do_ret != None:
466 return do_ret
467
468 elif foundchar in my_cc[CC_COMMENT]:
469 newpos, data = parseit(buf, \
470 mode(MODE_COMMENT), where+1, lvl)
471 result.append(chunk(COMMENT, where, data))
472
473 elif foundchar in my_cc[CC_MATHSHIFT]:
474 # note that recursive calls to math-mode
475 # scanning are called with recursion-level 0
476 # again, in order to check for bad mathend
477 #
478 if where + 1 != end and \
479 buf[where + 1] in \
480 my_cc[CC_MATHSHIFT]:
481 #
482 # double mathshift, e.g. '$$'
483 #
484 if curpmode == mode(MODE_REGULAR):
485 newpos, data = parseit(buf, \
486 mode(MODE_DMATH), \
487 where+2, 0)
488 result.append(chunk(DMATH, \
489 where, data))
490 elif curpmode == mode(MODE_MATH):
491 raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
492 elif lvl != 1:
493 raise error, 'bad mathend.' + \
494 lle(lvl, buf, where)
495 else:
496 return where + 2, result
497 else:
498 #
499 # single math shift, e.g. '$'
500 #
501 if curpmode == mode(MODE_REGULAR):
502 newpos, data = parseit(buf, \
503 mode(MODE_MATH), \
504 where+1, 0)
505 result.append(chunk(MATH, \
506 where, data))
507 elif curpmode == mode(MODE_DMATH):
508 raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
509 elif lvl != 1:
510 raise error, 'bad mathend.' + \
511 lv(lvl, buf, where)
512 else:
513 return where + 1, result
514
515 elif foundchar in my_cc[CC_IGNORE]:
516 print 'warning: ignored char', `foundchar`
517 newpos = where + 1
518
519 elif foundchar in my_cc[CC_ACTIVE]:
520 result.append(chunk(ACTIVE, where, foundchar))
521 newpos = where + 1
522
523 elif foundchar in my_cc[CC_INVALID]:
524 raise error, 'invalid char ' + `foundchar`
525 newpos = where + 1
526
527 elif foundchar in my_cc[CC_ENDLINE]:
528 #
529 # after an end of line, eat the rest of
530 # whitespace on the beginning of the next line
531 # this is what LaTeX more or less does
532 #
533 # also, try to indicate double newlines (\par)
534 #
535 lineno = lineno + 1
536 savedwhere = where
537 newpos, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), where + 1, lvl)
538 if newpos != end and buf[newpos] in \
539 my_cc[CC_ENDLINE]:
540 result.append(chunk(DENDLINE, \
541 savedwhere, foundchar))
542 else:
543 result.append(chunk(ENDLINE, \
544 savedwhere, foundchar))
545 else:
546 result.append(chunk(OTHER, where, foundchar))
547 newpos = where + 1
548
549 elif parsemode == mode(MODE_CS_SCAN):
550 #
551 # scan for a control sequence token. `\ape', `\nut' or `\%'
552 #
553 if start == end:
554 raise EOFError, 'can\'t find end of csname'
555 pos = rc_cs_scan.search(buf, start)
556 if pos < 0:
557 pos = end
558 if pos == start:
559 # first non-letter right where we started the search
560 # ---> the control sequence name consists of one single
561 # character. Also: don't eat white space...
562 if buf[pos] in my_cc[CC_ENDLINE]:
563 lineno = lineno + 1
564 pos = pos + 1
565 return pos, (start, pos)
566 else:
567 spos = pos
568 if buf[pos] == '\n':
569 lineno = lineno + 1
570 spos = pos + 1
571 pos2, dummy = parseit(buf, \
572 mode(MODE_GOBBLEWHITE), spos, lvl)
573 return pos2, (start, pos)
574
575 elif parsemode == mode(MODE_GOBBLEWHITE):
576 if start == end:
577 return start, ''
578 pos = rc_endwhite.search(buf, start)
579 if pos < 0:
580 pos = start
581 return pos, (start, pos)
582
583 elif parsemode == mode(MODE_COMMENT):
584 pos = rc_comment.search(buf, start)
585 lineno = lineno + 1
586 if pos < 0:
587 print 'no newline perhaps?'
588 raise EOFError, 'can\'t find end of comment'
589 pos = pos + 1
590 pos2, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), pos, lvl)
591 return pos2, (start, pos)
592
593
594 else:
595 raise error, 'Unknown mode (' + `parsemode` + ')'
596
597
598#moreresult = cswitch(buf[x1:x2], buf, newpos, parsemode, lvl)
599
600#boxcommands = 'mbox', 'fbox'
601#defcommands = 'def', 'newcommand'
602
603endverbstr = '\\end{verbatim}'
604
605re_endverb = regex.compile(un_re(endverbstr))
606
607#
608# handlecs: helper function for parseit, for the special thing we might
609# wanna do after certain command control sequences
610# returns: None or return_data, newpos
611#
612# in the latter case, the calling function is instructed to immediately
613# return with the data in return_data
614#
615def handlecs(buf, where, curpmode, lvl, result, end):
616 global lineno
617
618 # get the control sequence name...
619 newpos, data = parseit(buf, mode(MODE_CS_SCAN), where+1, lvl)
620 saveddata = data
621
622 if s(buf, data) in ('begin', 'end'):
623 # skip the expected '{' and get the LaTeX-envname '}'
624 newpos, data = parseit(buf, mode(MODE_REGULAR), newpos+1, lvl)
625 if len(data) != 1:
626 raise error, 'expected 1 chunk of data.' + \
627 lle(lvl, buf, where)
628
629 # yucky, we've got an environment
630 envname = s(buf, data[0].data)
631 ##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl)
632 if s(buf, saveddata) == 'begin' and envname == 'verbatim':
633 # verbatim deserves special treatment
634 pos = re_endverb.search(buf, newpos)
635 if pos < 0:
636 raise error, `endverbstr` + ' not found.' + lle(lvl, buf, where)
637 result.append(chunk(ENV, where, (envname, [chunk(PLAIN, newpos, (newpos, pos))])))
638 newpos = pos + len(endverbstr)
Guido van Rossum49604d31996-09-10 22:19:51 +0000639
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000640 elif s(buf, saveddata) == 'begin':
641 # start parsing recursively... If that parse returns
642 # from an '\end{...}', then should the last item of
643 # the returned data be a string containing the ended
644 # environment
645 newpos, data = parseit(buf, curpmode, newpos, lvl)
Guido van Rossum49604d31996-09-10 22:19:51 +0000646 if not data or type(data[-1]) is not StringType:
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000647 raise error, 'missing \'end\'' + lle(lvl, buf, where) + epsilon(buf, newpos)
648 retenv = data[-1]
649 del data[-1]
650 if retenv != envname:
651 #[`retenv`, `envname`]
652 raise error, 'environments do not match.' + \
653 lle(lvl, buf, where) + \
654 epsilon(buf, newpos)
655 result.append(chunk(ENV, where, (retenv, data)))
656 else:
657 # 'end'... append the environment name, as just
658 # pointed out, and order parsit to return...
659 result.append(envname)
660 ##print 'POINT of return: ' + epsilon(buf, newpos)
661 # the tuple will be returned by parseit
662 return (newpos, result), newpos
663
664 # end of \begin ... \end handling
665
666 elif s(buf, data)[0:2] == 'if':
667 # another scary monster: the 'if' directive
668 flag = s(buf, data)[2:]
669
670 # recursively call parseit, just like environment above..
671 # the last item of data should contain the if-termination
672 # e.g., 'else' of 'fi'
673 newpos, data = parseit(buf, curpmode, newpos, lvl)
674 if not data or data[-1] not in ('else', 'fi'):
675 raise error, 'wrong if... termination' + \
676 lle(lvl, buf, where) + epsilon(buf, newpos)
677
678 ifterm = data[-1]
679 del data[-1]
680 # 0 means dont_negate flag
681 result.append(chunk(IF, where, (flag, 0, data)))
682 if ifterm == 'else':
683 # do the whole thing again, there is only one way
684 # to end this one, by 'fi'
685 newpos, data = parseit(buf, curpmode, newpos, lvl)
686 if not data or data[-1] not in ('fi', ):
687 raise error, 'wrong if...else... termination' \
688 + lle(lvl, buf, where) \
689 + epsilon(buf, newpos)
690
691 ifterm = data[-1]
692 del data[-1]
693 result.append(chunk(IF, where, (flag, 1, data)))
694 #done implicitely: return None, newpos
695
696 elif s(buf, data) in ('else', 'fi'):
697 result.append(s(buf, data))
698 # order calling party to return tuple
699 return (newpos, result), newpos
700
701 # end of \if, \else, ... \fi handling
702
703 elif s(buf, saveddata) == 'verb':
704 x2 = saveddata[1]
705 result.append(chunk(CSNAME, where, data))
706 if x2 == end:
707 raise error, 'premature end of command.' + lle(lvl, buf, where)
708 delimchar = buf[x2]
Guido van Rossum7a2dba21993-11-05 14:45:11 +0000709 ##print 'VERB: delimchar ' + `delimchar`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000710 pos = regex.compile(un_re(delimchar)).search(buf, x2 + 1)
711 if pos < 0:
712 raise error, 'end of \'verb\' argument (' + \
713 `delimchar` + ') not found.' + \
714 lle(lvl, buf, where)
715 result.append(chunk(GROUP, x2, [chunk(PLAIN, x2+1, (x2+1, pos))]))
716 newpos = pos + 1
717 else:
718 result.append(chunk(CSNAME, where, data))
719 return None, newpos
720
721# this is just a function to get the string value if the possible data-tuple
722def s(buf, data):
Guido van Rossum49604d31996-09-10 22:19:51 +0000723 if type(data) is StringType:
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000724 return data
Guido van Rossum49604d31996-09-10 22:19:51 +0000725 if len(data) != 2 or not (type(data[0]) is type(data[1]) is IntType):
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000726 raise TypeError, 'expected tuple of 2 integers'
727 x1, x2 = data
728 return buf[x1:x2]
Guido van Rossum49604d31996-09-10 22:19:51 +0000729
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000730
731##length, data1, i = getnextarg(length, buf, pp, i + 1)
732
733# make a deep-copy of some chunks
734def crcopy(r):
Guido van Rossum49604d31996-09-10 22:19:51 +0000735 return map(chunkcopy, r)
736
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000737
738# copy a chunk, would better be a method of class Chunk...
739def chunkcopy(ch):
740 if ch.chtype == chunk_type(GROUP):
Guido van Rossum49604d31996-09-10 22:19:51 +0000741 return chunk(GROUP, ch.where, map(chunkcopy, ch.data))
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000742 else:
743 return chunk(ch.chtype, ch.where, ch.data)
744
745
746# get next argument for TeX-macro, flatten a group (insert between)
747# or return Command Sequence token, or give back one character
748def getnextarg(length, buf, pp, item):
749
Guido van Rossum49604d31996-09-10 22:19:51 +0000750 ##wobj = Wobj()
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000751 ##dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
752 ##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
753
754 while item < length and pp[item].chtype == chunk_type(ENDLINE):
755 del pp[item]
756 length = length - 1
757 if item >= length:
758 raise error, 'no next arg.' + epsilon(buf, pp[-1].where)
759 if pp[item].chtype == chunk_type(GROUP):
760 newpp = pp[item].data
761 del pp[item]
762 length = length - 1
763 changeit(buf, newpp)
764 length = length + len(newpp)
765 pp[item:item] = newpp
766 item = item + len(newpp)
767 if len(newpp) < 10:
Guido van Rossum49604d31996-09-10 22:19:51 +0000768 wobj = Wobj()
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000769 dumpit(buf, wobj.write, newpp)
770 ##print 'GETNEXTARG: inserted ' + `wobj.data`
771 return length, item
772 elif pp[item].chtype == chunk_type(PLAIN):
773 #grab one char
774 print 'WARNING: grabbing one char'
775 if len(s(buf, pp[item].data)) > 1:
776 pp.insert(item, chunk(PLAIN, pp[item].where, s(buf, pp[item].data)[:1]))
777 item, length = item+1, length+1
778 pp[item].data = s(buf, pp[item].data)[1:]
779 else:
780 item = item+1
781 return length, item
782 else:
Guido van Rossumb819bdf1995-03-15 11:26:26 +0000783 ch = pp[item]
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000784 try:
785 str = `s(buf, ch.data)`
786 except TypeError:
787 str = `ch.data`
788 if len(str) > 400:
789 str = str[:400] + '...'
790 print 'GETNEXTARG:', ch.chtype, 'not handled, data ' + str
791 return length, item
792
793
794# this one is needed to find the end of LaTeX's optional argument, like
795# item[...]
796re_endopt = regex.compile(']')
797
798# get a LaTeX-optional argument, you know, the square braces '[' and ']'
799def getoptarg(length, buf, pp, item):
800
Guido van Rossum49604d31996-09-10 22:19:51 +0000801 wobj = Wobj()
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000802 dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
803 ##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
804
805 if item >= length or \
806 pp[item].chtype != chunk_type(PLAIN) or \
807 s(buf, pp[item].data)[0] != '[':
808 return length, item
809
810 pp[item].data = s(buf, pp[item].data)[1:]
811 if len(pp[item].data) == 0:
812 del pp[item]
813 length = length-1
814
815 while 1:
816 if item == length:
817 raise error, 'No end of optional arg found'
818 if pp[item].chtype == chunk_type(PLAIN):
819 text = s(buf, pp[item].data)
820 pos = re_endopt.search(text)
821 if pos >= 0:
822 pp[item].data = text[:pos]
823 if pos == 0:
824 del pp[item]
825 length = length-1
826 else:
827 item=item+1
828 text = text[pos+1:]
829
830 while text and text[0] in ' \t':
831 text = text[1:]
832
833 if text:
834 pp.insert(item, chunk(PLAIN, 0, text))
835 length = length + 1
836 return length, item
837
838 item = item+1
839
840
841# Wobj just add write-requests to the ``data'' attribute
842class Wobj:
Guido van Rossum49604d31996-09-10 22:19:51 +0000843 data = ''
844
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000845 def write(self, data):
846 self.data = self.data + data
Guido van Rossumb819bdf1995-03-15 11:26:26 +0000847
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000848# ignore these commands
Guido van Rossumb819bdf1995-03-15 11:26:26 +0000849ignoredcommands = ('bcode', 'ecode')
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000850# map commands like these to themselves as plaintext
Guido van Rossum7760cde1995-03-17 16:03:11 +0000851wordsselves = ('UNIX', 'ABC', 'C', 'ASCII', 'EOF', 'LaTeX')
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000852# \{ --> {, \} --> }, etc
Guido van Rossum7760cde1995-03-17 16:03:11 +0000853themselves = ('{', '}', '.', '@', ' ', '\n') + wordsselves
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000854# these ones also themselves (see argargs macro in myformat.sty)
855inargsselves = (',', '[', ']', '(', ')')
856# this is how *I* would show the difference between emph and strong
857# code 1 means: fold to uppercase
858markcmds = {'code': ('', ''), 'var': 1, 'emph': ('_', '_'), \
859 'strong': ('*', '*')}
860
861# recognise patter {\FONTCHANGE-CMD TEXT} to \MAPPED-FC-CMD{TEXT}
862fontchanges = {'rm': 'r', 'it': 'i', 'em': 'emph', 'bf': 'b', 'tt': 't'}
863
864# transparent for these commands
Guido van Rossum7760cde1995-03-17 16:03:11 +0000865for_texi = ('emph', 'var', 'strong', 'code', 'kbd', 'key', 'dfn', 'samp',
866 'file', 'r', 'i', 't')
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000867
868
869# try to remove macros and return flat text
870def flattext(buf, pp):
871 pp = crcopy(pp)
Guido van Rossum7a2dba21993-11-05 14:45:11 +0000872 ##print '---> FLATTEXT ' + `pp`
Guido van Rossum49604d31996-09-10 22:19:51 +0000873 wobj = Wobj()
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000874
875 i, length = 0, len(pp)
876 while 1:
877 if len(pp) != length:
878 raise 'FATAL', 'inconsistent length'
879 if i >= length:
880 break
881 ch = pp[i]
882 i = i+1
883 if ch.chtype == chunk_type(PLAIN):
884 pass
885 elif ch.chtype == chunk_type(CSNAME):
886 if s(buf, ch.data) in themselves or hist.inargs and s(buf, ch.data) in inargsselves:
887 ch.chtype = chunk_type(PLAIN)
888 elif s(buf, ch.data) == 'e':
889 ch.chtype = chunk_type(PLAIN)
890 ch.data = '\\'
891 elif len(s(buf, ch.data)) == 1 \
892 and s(buf, ch.data) in onlylatexspecial:
893 ch.chtype = chunk_type(PLAIN)
894 # if it is followed by an empty group,
895 # remove that group, it was needed for
896 # a true space
897 if i < length \
898 and pp[i].chtype==chunk_type(GROUP) \
899 and len(pp[i].data) == 0:
900 del pp[i]
901 length = length-1
902
903 elif s(buf, ch.data) in markcmds.keys():
904 length, newi = getnextarg(length, buf, pp, i)
905 str = flattext(buf, pp[i:newi])
906 del pp[i:newi]
907 length = length - (newi - i)
908 ch.chtype = chunk_type(PLAIN)
909 markcmd = s(buf, ch.data)
910 x = markcmds[markcmd]
Guido van Rossum49604d31996-09-10 22:19:51 +0000911 if type(x) == TupleType:
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000912 pre, after = x
913 str = pre+str+after
914 elif x == 1:
915 str = string.upper(str)
916 else:
917 raise 'FATAL', 'corrupt markcmds'
918 ch.data = str
919 else:
920 if s(buf, ch.data) not in ignoredcommands:
921 print 'WARNING: deleting command ' + `s(buf, ch.data)`
922 print 'PP' + `pp[i-1]`
923 del pp[i-1]
924 i, length = i-1, length-1
925 elif ch.chtype == chunk_type(GROUP):
926 length, newi = getnextarg(length, buf, pp, i-1)
927 i = i-1
928## str = flattext(buf, crcopy(pp[i-1:newi]))
929## del pp[i:newi]
930## length = length - (newi - i)
931## ch.chtype = chunk_type(PLAIN)
932## ch.data = str
933 else:
934 pass
935
936 dumpit(buf, wobj.write, pp)
Guido van Rossum7a2dba21993-11-05 14:45:11 +0000937 ##print 'FLATTEXT: RETURNING ' + `wobj.data`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000938 return wobj.data
939
940# try to generate node names (a bit shorter than the chapter title)
941# note that the \nodename command (see elsewhere) overules these efforts
942def invent_node_names(text):
943 words = string.split(text)
944
Guido van Rossum7a2dba21993-11-05 14:45:11 +0000945 ##print 'WORDS ' + `words`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000946
947 if len(words) == 2 \
948 and string.lower(words[0]) == 'built-in' \
949 and string.lower(words[1]) not in ('modules', 'functions'):
950 return words[1]
951 if len(words) == 3 and string.lower(words[1]) == 'module':
952 return words[2]
953 if len(words) == 3 and string.lower(words[1]) == 'object':
954 return string.join(words[0:2])
955 if len(words) > 4 and string.lower(string.join(words[-4:])) == \
956 'methods and data attributes':
957 return string.join(words[:2])
958 return text
959
960re_commas_etc = regex.compile('[,`\'@{}]')
961
962re_whitespace = regex.compile('[ \t]*')
963
964
965##nodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
966
967# look if the next non-white stuff is also a command, resulting in skipping
968# double endlines (DENDLINE) too, and thus omitting \par's
969# Sometimes this is too much, maybe consider DENDLINE's as stop
970def next_command_p(length, buf, pp, i, cmdname):
971
972 while 1:
973 if i >= len(pp):
974 break
975 ch = pp[i]
976 i = i+1
977 if ch.chtype == chunk_type(ENDLINE):
978 continue
979 if ch.chtype == chunk_type(DENDLINE):
980 continue
981 if ch.chtype == chunk_type(PLAIN):
982 if re_whitespace.search(s(buf, ch.data)) == 0 and \
983 re_whitespace.match(s(buf, ch.data)) == len(s(buf, ch.data)):
984 continue
985 return -1
986 if ch.chtype == chunk_type(CSNAME):
987 if s(buf, ch.data) == cmdname:
988 return i # _after_ the command
989 return -1
990 return -1
991
992
993# things that are special to LaTeX, but not to texi..
994onlylatexspecial = '_~^$#&%'
995
Guido van Rossum23301a91993-05-24 14:19:37 +0000996class Struct: pass
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000997
998hist = Struct()
999out = Struct()
1000
1001def startchange():
1002 global hist, out
1003
1004 hist.inenv = []
1005 hist.nodenames = []
1006 hist.cindex = []
1007 hist.inargs = 0
1008 hist.enumeratenesting, hist.itemizenesting = 0, 0
1009
1010 out.doublenodes = []
1011 out.doublecindeces = []
1012
1013
1014spacech = [chunk(PLAIN, 0, ' ')]
1015commach = [chunk(PLAIN, 0, ', ')]
1016cindexch = [chunk(CSLINE, 0, 'cindex')]
1017
1018# the standard variation in symbols for itemize
1019itemizesymbols = ['bullet', 'minus', 'dots']
1020
1021# same for enumerate
1022enumeratesymbols = ['1', 'A', 'a']
1023
1024##
1025## \begin{ {func,data,exc}desc }{name}...
1026## the resulting texi-code is dependent on the contents of indexsubitem
1027##
1028
1029# indexsubitem: `['XXX', 'function']
1030# funcdesc:
1031# deffn {`idxsi`} NAME (FUNCARGS)
1032
1033# indexsubitem: `['XXX', 'method']`
1034# funcdesc:
1035# defmethod {`idxsi[0]`} NAME (FUNCARGS)
1036
1037# indexsubitem: `['in', 'module', 'MODNAME']'
1038# datadesc:
1039# defcv data {`idxsi[1:]`} NAME
1040# excdesc:
1041# defcv exception {`idxsi[1:]`} NAME
1042# funcdesc:
1043# deffn {function of `idxsi[1:]`} NAME (FUNCARGS)
1044
1045# indexsubitem: `['OBJECT', 'attribute']'
1046# datadesc
1047# defcv attribute {`OBJECT`} NAME
1048
1049
1050## this routine will be called on \begin{funcdesc}{NAME}{ARGS}
1051## or \funcline{NAME}{ARGS}
1052##
1053def do_funcdesc(length, buf, pp, i):
1054 startpoint = i-1
1055 ch = pp[startpoint]
1056 wh = ch.where
1057 length, newi = getnextarg(length, buf, pp, i)
1058 funcname = chunk(GROUP, wh, pp[i:newi])
1059 del pp[i:newi]
1060 length = length - (newi-i)
1061 save = hist.inargs
1062 hist.inargs = 1
1063 length, newi = getnextarg(length, buf, pp, i)
1064 hist.inargs = save
1065 del save
1066 the_args = [chunk(PLAIN, wh, '()'[0])] + \
1067 pp[i:newi] + \
1068 [chunk(PLAIN, wh, '()'[1])]
1069 del pp[i:newi]
1070 length = length - (newi-i)
1071
1072 idxsi = hist.indexsubitem # words
1073 command = ''
1074 cat_class = ''
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001075 if idxsi and idxsi[-1] in ('method', 'protocol'):
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001076 command = 'defmethod'
1077 cat_class = string.join(idxsi[:-1])
1078 elif len(idxsi) == 2 and idxsi[1] == 'function':
1079 command = 'deffn'
1080 cat_class = string.join(idxsi)
1081 elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
1082 command = 'deffn'
1083 cat_class = 'function of ' + string.join(idxsi[1:])
1084
1085 if not command:
1086 raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
1087
1088 ch.chtype = chunk_type(CSLINE)
1089 ch.data = command
1090
1091 cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
1092 cslinearg.append(chunk(PLAIN, wh, ' '))
1093 cslinearg.append(funcname)
1094 cslinearg.append(chunk(PLAIN, wh, ' '))
1095 l = len(cslinearg)
1096 cslinearg[l:l] = the_args
1097
1098 pp.insert(i, chunk(GROUP, wh, cslinearg))
1099 i, length = i+1, length+1
1100 hist.command = command
1101 return length, i
1102
1103
1104## this routine will be called on \begin{excdesc}{NAME}
1105## or \excline{NAME}
1106##
1107def do_excdesc(length, buf, pp, i):
1108 startpoint = i-1
1109 ch = pp[startpoint]
1110 wh = ch.where
1111 length, newi = getnextarg(length, buf, pp, i)
1112 excname = chunk(GROUP, wh, pp[i:newi])
1113 del pp[i:newi]
1114 length = length - (newi-i)
1115
1116 idxsi = hist.indexsubitem # words
1117 command = ''
1118 cat_class = ''
1119 class_class = ''
1120 if len(idxsi) == 2 and idxsi[1] == 'exception':
1121 command = 'defvr'
1122 cat_class = string.join(idxsi)
1123 elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
1124 command = 'defcv'
1125 cat_class = 'exception'
1126 class_class = string.join(idxsi[1:])
1127 elif len(idxsi) == 4 and idxsi[:3] == ['exception', 'in', 'module']:
1128 command = 'defcv'
1129 cat_class = 'exception'
1130 class_class = string.join(idxsi[2:])
1131
1132
1133 if not command:
1134 raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
1135
1136 ch.chtype = chunk_type(CSLINE)
1137 ch.data = command
1138
1139 cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
1140 cslinearg.append(chunk(PLAIN, wh, ' '))
1141 if class_class:
1142 cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
1143 cslinearg.append(chunk(PLAIN, wh, ' '))
1144 cslinearg.append(excname)
1145
1146 pp.insert(i, chunk(GROUP, wh, cslinearg))
1147 i, length = i+1, length+1
1148 hist.command = command
1149 return length, i
1150
1151## same for datadesc or dataline...
1152def do_datadesc(length, buf, pp, i):
1153 startpoint = i-1
1154 ch = pp[startpoint]
1155 wh = ch.where
1156 length, newi = getnextarg(length, buf, pp, i)
1157 dataname = chunk(GROUP, wh, pp[i:newi])
1158 del pp[i:newi]
1159 length = length - (newi-i)
1160
1161 idxsi = hist.indexsubitem # words
1162 command = ''
1163 cat_class = ''
1164 class_class = ''
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001165 if idxsi[-1] in ('attribute', 'option'):
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001166 command = 'defcv'
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001167 cat_class = idxsi[-1]
Sjoerd Mullender1cfb6b81994-12-14 15:28:22 +00001168 class_class = string.join(idxsi[:-1])
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001169 elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
1170 command = 'defcv'
1171 cat_class = 'data'
1172 class_class = string.join(idxsi[1:])
1173 elif len(idxsi) == 4 and idxsi[:3] == ['data', 'in', 'module']:
1174 command = 'defcv'
1175 cat_class = 'data'
1176 class_class = string.join(idxsi[2:])
1177
1178
1179 if not command:
1180 raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
1181
1182 ch.chtype = chunk_type(CSLINE)
1183 ch.data = command
1184
1185 cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
1186 cslinearg.append(chunk(PLAIN, wh, ' '))
1187 if class_class:
1188 cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
1189 cslinearg.append(chunk(PLAIN, wh, ' '))
1190 cslinearg.append(dataname)
1191
1192 pp.insert(i, chunk(GROUP, wh, cslinearg))
1193 i, length = i+1, length+1
1194 hist.command = command
1195 return length, i
1196
1197
1198# regular indices: those that are not set in tt font by default....
1199regindices = ('cindex', )
1200
1201# remove illegal characters from node names
1202def rm_commas_etc(text):
1203 result = ''
1204 changed = 0
1205 while 1:
1206 pos = re_commas_etc.search(text)
1207 if pos >= 0:
1208 changed = 1
1209 result = result + text[:pos]
1210 text = text[pos+1:]
1211 else:
1212 result = result + text
1213 break
1214 if changed:
1215 print 'Warning: nodename changhed to ' + `result`
1216
1217 return result
1218
1219# boolean flags
1220flags = {'texi': 1}
1221
1222
1223##
1224## changeit: the actual routine, that changes the contents of the parsed
1225## chunks
1226##
1227
1228def changeit(buf, pp):
1229 global onlylatexspecial, hist, out
1230
1231 i, length = 0, len(pp)
1232 while 1:
1233 # sanity check: length should always equal len(pp)
1234 if len(pp) != length:
1235 raise 'FATAL', 'inconsistent length. thought ' + `length` + ', but should really be ' + `len(pp)`
1236 if i >= length:
1237 break
1238 ch = pp[i]
1239 i = i + 1
1240
Guido van Rossum49604d31996-09-10 22:19:51 +00001241 if type(ch) is StringType:
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001242 #normally, only chunks are present in pp,
1243 # but in some cases, some extra info
1244 # has been inserted, e.g., the \end{...} clauses
1245 raise 'FATAL', 'got string, probably too many ' + `end`
1246
1247 if ch.chtype == chunk_type(GROUP):
1248 # check for {\em ...} constructs
1249 if ch.data and \
1250 ch.data[0].chtype == chunk_type(CSNAME) and \
1251 s(buf, ch.data[0].data) in fontchanges.keys():
1252 k = s(buf, ch.data[0].data)
1253 del ch.data[0]
1254 pp.insert(i-1, chunk(CSNAME, ch.where, fontchanges[k]))
1255 length, i = length+1, i+1
1256
1257 # recursively parse the contents of the group
1258 changeit(buf, ch.data)
1259
1260 elif ch.chtype == chunk_type(IF):
1261 # \if...
1262 flag, negate, data = ch.data
Guido van Rossum7a2dba21993-11-05 14:45:11 +00001263 ##print 'IF: flag, negate = ' + `flag, negate`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001264 if flag not in flags.keys():
1265 raise error, 'unknown flag ' + `flag`
1266
1267 value = flags[flag]
1268 if negate:
1269 value = (not value)
1270 del pp[i-1]
1271 length, i = length-1, i-1
1272 if value:
1273 pp[i:i] = data
1274 length = length + len(data)
1275
1276
1277 elif ch.chtype == chunk_type(ENV):
1278 # \begin{...} ....
1279 envname, data = ch.data
1280
1281 #push this environment name on stack
1282 hist.inenv.insert(0, envname)
1283
1284 #append an endenv chunk after grouped data
1285 data.append(chunk(ENDENV, ch.where, envname))
1286 ##[`data`]
1287
1288 #delete this object
1289 del pp[i-1]
1290 i, length = i-1, length-1
1291
1292 #insert found data
1293 pp[i:i] = data
1294 length = length + len(data)
1295
1296 if envname == 'verbatim':
1297 pp[i:i] = [chunk(CSLINE, ch.where, 'example'), \
1298 chunk(GROUP, ch.where, [])]
1299 length, i = length+2, i+2
1300
1301 elif envname == 'itemize':
1302 if hist.itemizenesting > len(itemizesymbols):
1303 raise error, 'too deep itemize nesting'
1304 ingroupch = [chunk(CSNAME, ch.where,\
1305 itemizesymbols[hist.itemizenesting])]
1306 hist.itemizenesting = hist.itemizenesting + 1
1307 pp[i:i] = [chunk(CSLINE, ch.where, 'itemize'),\
1308 chunk(GROUP, ch.where, ingroupch)]
1309 length, i = length+2, i+2
1310
1311 elif envname == 'enumerate':
1312 if hist.enumeratenesting > len(enumeratesymbols):
1313 raise error, 'too deep enumerate nesting'
1314 ingroupch = [chunk(PLAIN, ch.where,\
1315 enumeratesymbols[hist.enumeratenesting])]
1316 hist.enumeratenesting = hist.enumeratenesting + 1
1317 pp[i:i] = [chunk(CSLINE, ch.where, 'enumerate'),\
1318 chunk(GROUP, ch.where, ingroupch)]
1319 length, i = length+2, i+2
1320
1321 elif envname == 'description':
1322 ingroupch = [chunk(CSNAME, ch.where, 'b')]
1323 pp[i:i] = [chunk(CSLINE, ch.where, 'table'), \
1324 chunk(GROUP, ch.where, ingroupch)]
1325 length, i = length+2, i+2
1326
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001327 elif (envname == 'tableiii') or \
1328 (envname == 'tableii'):
1329 if (envname == 'tableii'):
1330 ltable = 2
1331 else:
1332 ltable = 3
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001333 wh = ch.where
1334 newcode = []
1335
1336 #delete tabular format description
1337 # e.g., {|l|c|l|}
1338 length, newi = getnextarg(length, buf, pp, i)
1339 del pp[i:newi]
1340 length = length - (newi-i)
1341
1342 newcode.append(chunk(CSLINE, wh, 'table'))
1343 ingroupch = [chunk(CSNAME, wh, 'asis')]
1344 newcode.append(chunk(GROUP, wh, ingroupch))
1345 newcode.append(chunk(CSLINE, wh, 'item'))
1346
1347 #get the name of macro for @item
1348 # e.g., {code}
1349 length, newi = getnextarg(length, buf, pp, i)
1350
1351 if newi-i != 1:
1352 raise error, 'Sorry, expected 1 chunk argument'
1353 if pp[i].chtype != chunk_type(PLAIN):
1354 raise error, 'Sorry, expected plain text argument'
1355 hist.itemargmacro = s(buf, pp[i].data)
1356 del pp[i:newi]
1357 length = length - (newi-i)
1358
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001359 itembody = []
1360 for count in range(ltable):
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001361 length, newi = getnextarg(length, buf, pp, i)
1362 emphgroup = [\
1363 chunk(CSNAME, wh, 'emph'), \
1364 chunk(GROUP, 0, pp[i:newi])]
1365 del pp[i:newi]
1366 length = length - (newi-i)
1367 if count == 0:
1368 itemarg = emphgroup
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001369 elif count == ltable-1:
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001370 itembody = itembody + \
1371 [chunk(PLAIN, wh, ' --- ')] + \
1372 emphgroup
1373 else:
1374 itembody = emphgroup
1375 newcode.append(chunk(GROUP, wh, itemarg))
1376 newcode = newcode + itembody + [chunk(DENDLINE, wh, '\n')]
1377 pp[i:i] = newcode
1378 l = len(newcode)
1379 length, i = length+l, i+l
1380 del newcode, l
1381
1382 if length != len(pp):
1383 raise 'STILL, SOMETHING wrong', `i`
1384
1385
1386 elif envname == 'funcdesc':
1387 pp.insert(i, chunk(PLAIN, ch.where, ''))
1388 i, length = i+1, length+1
1389 length, i = do_funcdesc(length, buf, pp, i)
1390
1391 elif envname == 'excdesc':
1392 pp.insert(i, chunk(PLAIN, ch.where, ''))
1393 i, length = i+1, length+1
1394 length, i = do_excdesc(length, buf, pp, i)
1395
1396 elif envname == 'datadesc':
1397 pp.insert(i, chunk(PLAIN, ch.where, ''))
1398 i, length = i+1, length+1
1399 length, i = do_datadesc(length, buf, pp, i)
1400
1401 else:
1402 print 'WARNING: don\'t know what to do with env ' + `envname`
1403
1404 elif ch.chtype == chunk_type(ENDENV):
1405 envname = ch.data
1406 if envname != hist.inenv[0]:
1407 raise error, '\'end\' does not match. Name ' + `envname` + ', expected ' + `hist.inenv[0]`
1408 del hist.inenv[0]
1409 del pp[i-1]
1410 i, length = i-1, length-1
1411
1412 if envname == 'verbatim':
1413 pp[i:i] = [\
1414 chunk(CSLINE, ch.where, 'end'), \
1415 chunk(GROUP, ch.where, [\
1416 chunk(PLAIN, ch.where, 'example')])]
1417 i, length = i+2, length+2
1418 elif envname == 'itemize':
1419 hist.itemizenesting = hist.itemizenesting - 1
1420 pp[i:i] = [\
1421 chunk(CSLINE, ch.where, 'end'), \
1422 chunk(GROUP, ch.where, [\
1423 chunk(PLAIN, ch.where, 'itemize')])]
1424 i, length = i+2, length+2
1425 elif envname == 'enumerate':
1426 hist.enumeratenesting = hist.enumeratenesting-1
1427 pp[i:i] = [\
1428 chunk(CSLINE, ch.where, 'end'), \
1429 chunk(GROUP, ch.where, [\
1430 chunk(PLAIN, ch.where, 'enumerate')])]
1431 i, length = i+2, length+2
1432 elif envname == 'description':
1433 pp[i:i] = [\
1434 chunk(CSLINE, ch.where, 'end'), \
1435 chunk(GROUP, ch.where, [\
1436 chunk(PLAIN, ch.where, 'table')])]
1437 i, length = i+2, length+2
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001438 elif (envname == 'tableiii') or (envname == 'tableii'):
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001439 pp[i:i] = [\
1440 chunk(CSLINE, ch.where, 'end'), \
1441 chunk(GROUP, ch.where, [\
1442 chunk(PLAIN, ch.where, 'table')])]
1443 i, length = i+2, length + 2
1444 pp.insert(i, chunk(DENDLINE, ch.where, '\n'))
1445 i, length = i+1, length+1
1446
1447 elif envname in ('funcdesc', 'excdesc', 'datadesc'):
1448 pp[i:i] = [\
1449 chunk(CSLINE, ch.where, 'end'), \
1450 chunk(GROUP, ch.where, [\
1451 chunk(PLAIN, ch.where, hist.command)])]
1452 i, length = i+2, length+2
1453 else:
1454 print 'WARNING: ending env ' + `envname` + 'has no actions'
1455
1456 elif ch.chtype == chunk_type(CSNAME):
1457 # control name transformations
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001458 if s(buf, ch.data) == 'optional':
1459 pp[i-1].chtype = chunk_type (PLAIN)
1460 pp[i-1].data = '['
1461 if (i < length) and \
1462 (pp[i].chtype == chunk_type(GROUP)):
1463 cp=pp[i].data
1464 pp[i:i+1]=cp + [\
1465 chunk(PLAIN, ch.where, ']')]
1466 length = length+len(cp)
1467 elif s(buf, ch.data) in ignoredcommands:
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001468 del pp[i-1]
1469 i, length = i-1, length-1
1470 elif s(buf, ch.data) == '@' and \
1471 i != length and \
1472 pp[i].chtype == chunk_type(PLAIN) and \
1473 s(buf, pp[i].data)[0] == '.':
1474 # \@. --> \. --> @.
1475 ch.data = '.'
1476 del pp[i]
1477 length = length-1
1478 elif s(buf, ch.data) == '\\':
1479 # \\ --> \* --> @*
1480 ch.data = '*'
1481 elif len(s(buf, ch.data)) == 1 and \
1482 s(buf, ch.data) in onlylatexspecial:
1483 ch.chtype = chunk_type(PLAIN)
1484 # check if such a command is followed by
1485 # an empty group: e.g., `\%{}'. If so, remove
1486 # this empty group too
1487 if i < length and \
1488 pp[i].chtype == chunk_type(GROUP) \
1489 and len(pp[i].data) == 0:
1490 del pp[i]
1491 length = length-1
1492
1493 elif hist.inargs and s(buf, ch.data) in inargsselves:
1494 # This is the special processing of the
1495 # arguments of the \begin{funcdesc}... or
1496 # \funcline... arguments
1497 # \, --> , \[ --> [, \] --> ]
1498 ch.chtype = chunk_type(PLAIN)
1499
1500 elif s(buf, ch.data) == 'renewcommand':
1501 # \renewcommand{\indexsubitem}....
1502 i, length = i-1, length-1
1503 del pp[i]
1504 length, newi = getnextarg(length, buf, pp, i)
1505 if newi-i == 1 \
1506 and i < length \
1507 and pp[i].chtype == chunk_type(CSNAME) \
1508 and s(buf, pp[i].data) == 'indexsubitem':
1509 del pp[i:newi]
1510 length = length - (newi-i)
1511 length, newi = getnextarg(length, buf, pp, i)
1512 text = flattext(buf, pp[i:newi])
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001513 if text[:1] != '(' or text[-1:] != ')':
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001514 raise error, 'expected indexsubitme enclosed in braces'
1515 words = string.split(text[1:-1])
1516 hist.indexsubitem = words
1517 del text, words
1518 else:
1519 print 'WARNING: renewcommand with unsupported arg removed'
1520 del pp[i:newi]
1521 length = length - (newi-i)
1522
1523 elif s(buf, ch.data) == 'item':
1524 ch.chtype = chunk_type(CSLINE)
1525 length, newi = getoptarg(length, buf, pp, i)
1526 ingroupch = pp[i:newi]
1527 del pp[i:newi]
1528 length = length - (newi-i)
1529 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1530 i, length = i+1, length+1
1531
1532 elif s(buf, ch.data) == 'ttindex':
1533 idxsi = hist.indexsubitem
1534
1535 cat_class = ''
1536 if len(idxsi) >= 2 and idxsi[1] in \
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001537 ('method', 'function', 'protocol'):
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001538 command = 'findex'
1539 elif len(idxsi) >= 2 and idxsi[1] in \
1540 ('exception', 'object'):
1541 command = 'vindex'
1542 else:
Guido van Rossum7a2dba21993-11-05 14:45:11 +00001543 print 'WARNING: can\'t categorize ' + `idxsi` + ' for \'ttindex\' command'
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001544 command = 'cindex'
1545
1546 if not cat_class:
1547 cat_class = '('+string.join(idxsi)+')'
1548
1549 ch.chtype = chunk_type(CSLINE)
1550 ch.data = command
1551
1552 length, newi = getnextarg(length, buf, pp, i)
1553 arg = pp[i:newi]
1554 del pp[i:newi]
1555 length = length - (newi-i)
1556
1557 cat_arg = [chunk(PLAIN, ch.where, cat_class)]
1558
1559 # determine what should be set in roman, and
1560 # what in tt-font
1561 if command in regindices:
1562
1563 arg = [chunk(CSNAME, ch.where, 't'), \
1564 chunk(GROUP, ch.where, arg)]
1565 else:
1566 cat_arg = [chunk(CSNAME, ch.where, 'r'), \
1567 chunk(GROUP, ch.where, cat_arg)]
1568
1569 ingroupch = arg + \
1570 [chunk(PLAIN, ch.where, ' ')] + \
1571 cat_arg
1572
1573 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1574 length, i = length+1, i+1
1575
1576
1577 elif s(buf, ch.data) == 'ldots':
1578 # \ldots --> \dots{} --> @dots{}
1579 ch.data = 'dots'
1580 if i == length \
1581 or pp[i].chtype != chunk_type(GROUP) \
1582 or pp[i].data != []:
1583 pp.insert(i, chunk(GROUP, ch.where, []))
1584 i, length = i+1, length+1
1585 elif s(buf, ch.data) in wordsselves:
1586 # \UNIX --> UNIX
1587 ch.chtype = chunk_type(PLAIN)
1588 if i != length \
1589 and pp[i].chtype == chunk_type(GROUP) \
1590 and pp[i].data == []:
1591 del pp[i]
1592 length = length-1
1593 elif s(buf, ch.data) in for_texi:
1594 pass
1595
1596 elif s(buf, ch.data) == 'e':
Guido van Rossum49604d31996-09-10 22:19:51 +00001597 # "\e" --> "\"
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001598 ch.data = '\\'
1599 ch.chtype = chunk_type(PLAIN)
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001600 elif (s(buf, ch.data) == 'lineiii') or\
1601 (s(buf, ch.data) == 'lineii'):
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001602 # This is the most tricky one
1603 # \lineiii{a1}{a2}[{a3}] -->
1604 # @item @<cts. of itemargmacro>{a1}
1605 # a2 [ -- a3]
1606 #
1607 ##print 'LINEIIIIII!!!!!!!'
Guido van Rossum49604d31996-09-10 22:19:51 +00001608## wobj = Wobj()
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001609## dumpit(buf, wobj.write, pp[i-1:i+5])
1610## print '--->' + wobj.data + '<----'
1611 if not hist.inenv:
1612 raise error, \
1613 'no environment for lineiii'
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001614 if (hist.inenv[0] != 'tableiii') and\
1615 (hist.inenv[0] != 'tableii'):
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001616 raise error, \
1617 'wrong command (' + \
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001618 s(buf, ch.data)+ \
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001619 ') in wrong environment (' \
1620 + `hist.inenv[0]` + ')'
1621 ch.chtype = chunk_type(CSLINE)
1622 ch.data = 'item'
1623 length, newi = getnextarg(length, buf, pp, i)
1624 ingroupch = [chunk(CSNAME, 0, \
1625 hist.itemargmacro), \
1626 chunk(GROUP, 0, pp[i:newi])]
1627 del pp[i:newi]
1628 length = length - (newi-i)
1629## print 'ITEM ARG: --->',
Guido van Rossum49604d31996-09-10 22:19:51 +00001630## wobj = Wobj()
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001631## dumpit(buf, wobj.write, ingroupch)
1632## print wobj.data, '<---'
1633 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1634 grouppos = i
1635 i, length = i+1, length+1
1636 length, i = getnextarg(length, buf, pp, i)
1637 length, newi = getnextarg(length, buf, pp, i)
1638 if newi > i:
1639 # we have a 3rd arg
1640 pp.insert(i, chunk(PLAIN, ch.where, ' --- '))
1641 i = newi + 1
1642 length = length + 1
1643## pp[grouppos].data = pp[grouppos].data \
1644## + [chunk(PLAIN, ch.where, ' ')] \
1645## + pp[i:newi]
1646## del pp[i:newi]
1647## length = length - (newi-i)
1648 if length != len(pp):
1649 raise 'IN LINEIII IS THE ERR', `i`
1650
1651 elif s(buf, ch.data) in ('chapter', 'section', 'subsection', 'subsubsection'):
1652 #\xxxsection{A} ---->
1653 # @node A, , ,
1654 # @xxxsection A
1655 ## also: remove commas and quotes
1656 ch.chtype = chunk_type(CSLINE)
1657 length, newi = getnextarg(length, buf, pp, i)
1658 afternodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
1659 if afternodenamecmd < 0:
1660 cp1 = crcopy(pp[i:newi])
1661 pp[i:newi] = [\
1662 chunk(GROUP, ch.where, \
1663 pp[i:newi])]
1664 length, newi = length - (newi-i) + 1, \
1665 i+1
1666 text = flattext(buf, cp1)
1667 text = invent_node_names(text)
1668 else:
1669 length, endarg = getnextarg(length, buf, pp, afternodenamecmd)
1670 cp1 = crcopy(pp[afternodenamecmd:endarg])
1671 del pp[newi:endarg]
1672 length = length - (endarg-newi)
1673
1674 pp[i:newi] = [\
1675 chunk(GROUP, ch.where, \
1676 pp[i:newi])]
1677 length, newi = length - (newi-i) + 1, \
1678 i + 1
1679 text = flattext(buf, cp1)
1680 if text[-1] == '.':
1681 text = text[:-1]
Guido van Rossum7a2dba21993-11-05 14:45:11 +00001682## print 'FLATTEXT:', `text`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001683 if text in hist.nodenames:
1684 print 'WARNING: node name ' + `text` + ' already used'
1685 out.doublenodes.append(text)
1686 else:
1687 hist.nodenames.append(text)
1688 text = rm_commas_etc(text)
1689 pp[i-1:i-1] = [\
1690 chunk(CSLINE, ch.where, 'node'), \
1691 chunk(GROUP, ch.where, [\
1692 chunk(PLAIN, ch.where, text+', , ,')\
1693 ])]
1694 i, length = newi+2, length+2
1695
1696 elif s(buf,ch.data) == 'funcline':
1697 # fold it to a very short environment
1698 pp[i-1:i-1] = [\
1699 chunk(CSLINE, ch.where, 'end'), \
1700 chunk(GROUP, ch.where, [\
1701 chunk(PLAIN, ch.where, hist.command)])]
1702 i, length = i+2, length+2
1703 length, i = do_funcdesc(length, buf, pp, i)
1704
1705 elif s(buf,ch.data) == 'dataline':
1706 pp[i-1:i-1] = [\
1707 chunk(CSLINE, ch.where, 'end'), \
1708 chunk(GROUP, ch.where, [\
1709 chunk(PLAIN, ch.where, hist.command)])]
1710 i, length = i+2, length+2
1711 length, i = do_datadesc(length, buf, pp, i)
1712
1713 elif s(buf,ch.data) == 'excline':
1714 pp[i-1:i-1] = [\
1715 chunk(CSLINE, ch.where, 'end'), \
1716 chunk(GROUP, ch.where, [\
1717 chunk(PLAIN, ch.where, hist.command)])]
1718 i, length = i+2, length+2
1719 length, i = do_excdesc(length, buf, pp, i)
1720
1721
1722 elif s(buf, ch.data) == 'index':
1723 #\index{A} --->
1724 # @cindex A
1725 ch.chtype = chunk_type(CSLINE)
1726 ch.data = 'cindex'
1727 length, newi = getnextarg(length, buf, pp, i)
1728
1729 ingroupch = pp[i:newi]
1730 del pp[i:newi]
1731 length = length - (newi-i)
1732 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1733 length, i = length+1, i+1
1734
1735 elif s(buf, ch.data) == 'bifuncindex':
1736 ch.chtype = chunk_type(CSLINE)
1737 ch.data = 'findex'
1738 length, newi = getnextarg(length, buf, pp, i)
1739 ingroupch = pp[i:newi]
1740 del pp[i:newi]
1741 length = length - (newi-i)
1742
1743 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1744 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1745 ingroupch.append(chunk(GROUP, ch.where, [\
1746 chunk(PLAIN, ch.where, \
1747 '(built-in function)')]))
1748
1749 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1750 length, i = length+1, i+1
1751
1752
Guido van Rossum7760cde1995-03-17 16:03:11 +00001753 elif s(buf, ch.data) == 'obindex':
1754 ch.chtype = chunk_type(CSLINE)
1755 ch.data = 'findex'
1756 length, newi = getnextarg(length, buf, pp, i)
1757 ingroupch = pp[i:newi]
1758 del pp[i:newi]
1759 length = length - (newi-i)
1760
1761 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1762 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1763 ingroupch.append(chunk(GROUP, ch.where, [\
1764 chunk(PLAIN, ch.where, \
1765 '(object)')]))
1766
1767 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1768 length, i = length+1, i+1
1769
1770
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001771 elif s(buf, ch.data) == 'opindex':
1772 ch.chtype = chunk_type(CSLINE)
1773 ch.data = 'findex'
1774 length, newi = getnextarg(length, buf, pp, i)
1775 ingroupch = pp[i:newi]
1776 del pp[i:newi]
1777 length = length - (newi-i)
1778
1779 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1780 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1781 ingroupch.append(chunk(GROUP, ch.where, [\
1782 chunk(PLAIN, ch.where, \
1783 '(operator)')]))
1784
1785 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1786 length, i = length+1, i+1
1787
1788
1789 elif s(buf, ch.data) == 'bimodindex':
1790 ch.chtype = chunk_type(CSLINE)
1791 ch.data = 'pindex'
1792 length, newi = getnextarg(length, buf, pp, i)
1793 ingroupch = pp[i:newi]
1794 del pp[i:newi]
1795 length = length - (newi-i)
1796
1797 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1798 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1799 ingroupch.append(chunk(GROUP, ch.where, [\
1800 chunk(PLAIN, ch.where, \
1801 '(built-in)')]))
1802
1803 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1804 length, i = length+1, i+1
1805
1806 elif s(buf, ch.data) == 'sectcode':
1807 ch.data = 'code'
1808
1809
1810 elif s(buf, ch.data) == 'stmodindex':
1811 ch.chtype = chunk_type(CSLINE)
1812 # use the program index as module index
1813 ch.data = 'pindex'
1814 length, newi = getnextarg(length, buf, pp, i)
1815 ingroupch = pp[i:newi]
1816 del pp[i:newi]
1817 length = length - (newi-i)
1818
1819 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1820 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1821 ingroupch.append(chunk(GROUP, ch.where, [\
1822 chunk(PLAIN, ch.where, \
1823 '(standard)')]))
1824
1825 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1826 length, i = length+1, i+1
1827
1828
1829 elif s(buf, ch.data) == 'stindex':
1830 # XXX must actually go to newindex st
1831 wh = ch.where
1832 ch.chtype = chunk_type(CSLINE)
1833 ch.data = 'cindex'
1834 length, newi = getnextarg(length, buf, pp, i)
1835 ingroupch = [chunk(CSNAME, wh, 'code'), \
1836 chunk(GROUP, wh, pp[i:newi])]
1837
1838 del pp[i:newi]
1839 length = length - (newi-i)
1840
1841 t = ingroupch[:]
1842 t.append(chunk(PLAIN, wh, ' statement'))
1843
1844 pp.insert(i, chunk(GROUP, wh, t))
1845 i, length = i+1, length+1
1846
1847 pp.insert(i, chunk(CSLINE, wh, 'cindex'))
1848 i, length = i+1, length+1
1849
1850 t = ingroupch[:]
1851 t.insert(0, chunk(PLAIN, wh, 'statement, '))
1852
1853 pp.insert(i, chunk(GROUP, wh, t))
1854 i, length = i+1, length+1
1855
1856
1857 elif s(buf, ch.data) == 'indexii':
1858 #\indexii{A}{B} --->
1859 # @cindex A B
1860 # @cindex B, A
1861 length, newi = getnextarg(length, buf, pp, i)
1862 cp11 = pp[i:newi]
1863 cp21 = crcopy(pp[i:newi])
1864 del pp[i:newi]
1865 length = length - (newi-i)
1866 length, newi = getnextarg(length, buf, pp, i)
1867 cp12 = pp[i:newi]
1868 cp22 = crcopy(pp[i:newi])
1869 del pp[i:newi]
1870 length = length - (newi-i)
1871
1872 ch.chtype = chunk_type(CSLINE)
1873 ch.data = 'cindex'
1874 pp.insert(i, chunk(GROUP, ch.where, cp11 + [\
1875 chunk(PLAIN, ch.where, ' ')] + cp12))
1876 i, length = i+1, length+1
1877 pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
1878 chunk(GROUP, ch.where, cp22 + [\
1879 chunk(PLAIN, ch.where, ', ')]+ cp21)]
1880 i, length = i+2, length+2
1881
1882 elif s(buf, ch.data) == 'indexiii':
1883 length, newi = getnextarg(length, buf, pp, i)
1884 cp11 = pp[i:newi]
1885 cp21 = crcopy(pp[i:newi])
1886 cp31 = crcopy(pp[i:newi])
1887 del pp[i:newi]
1888 length = length - (newi-i)
1889 length, newi = getnextarg(length, buf, pp, i)
1890 cp12 = pp[i:newi]
1891 cp22 = crcopy(pp[i:newi])
1892 cp32 = crcopy(pp[i:newi])
1893 del pp[i:newi]
1894 length = length - (newi-i)
1895 length, newi = getnextarg(length, buf, pp, i)
1896 cp13 = pp[i:newi]
1897 cp23 = crcopy(pp[i:newi])
1898 cp33 = crcopy(pp[i:newi])
1899 del pp[i:newi]
1900 length = length - (newi-i)
1901
1902 ch.chtype = chunk_type(CSLINE)
1903 ch.data = 'cindex'
1904 pp.insert(i, chunk(GROUP, ch.where, cp11 + [\
1905 chunk(PLAIN, ch.where, ' ')] + cp12 \
1906 + [chunk(PLAIN, ch.where, ' ')] \
1907 + cp13))
1908 i, length = i+1, length+1
1909 pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
1910 chunk(GROUP, ch.where, cp22 + [\
1911 chunk(PLAIN, ch.where, ' ')]+ cp23\
1912 + [chunk(PLAIN, ch.where, ', ')] +\
1913 cp21)]
1914 i, length = i+2, length+2
1915 pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
1916 chunk(GROUP, ch.where, cp33 + [\
1917 chunk(PLAIN, ch.where, ', ')]+ cp31\
1918 + [chunk(PLAIN, ch.where, ' ')] +\
1919 cp32)]
1920 i, length = i+2, length+2
1921
1922
1923 elif s(buf, ch.data) == 'indexiv':
1924 length, newi = getnextarg(length, buf, pp, i)
1925 cp11 = pp[i:newi]
1926 cp21 = crcopy(pp[i:newi])
1927 cp31 = crcopy(pp[i:newi])
1928 cp41 = crcopy(pp[i:newi])
1929 del pp[i:newi]
1930 length = length - (newi-i)
1931 length, newi = getnextarg(length, buf, pp, i)
1932 cp12 = pp[i:newi]
1933 cp22 = crcopy(pp[i:newi])
1934 cp32 = crcopy(pp[i:newi])
1935 cp42 = crcopy(pp[i:newi])
1936 del pp[i:newi]
1937 length = length - (newi-i)
1938 length, newi = getnextarg(length, buf, pp, i)
1939 cp13 = pp[i:newi]
1940 cp23 = crcopy(pp[i:newi])
1941 cp33 = crcopy(pp[i:newi])
1942 cp43 = crcopy(pp[i:newi])
1943 del pp[i:newi]
1944 length = length - (newi-i)
1945 length, newi = getnextarg(length, buf, pp, i)
1946 cp14 = pp[i:newi]
1947 cp24 = crcopy(pp[i:newi])
1948 cp34 = crcopy(pp[i:newi])
1949 cp44 = crcopy(pp[i:newi])
1950 del pp[i:newi]
1951 length = length - (newi-i)
1952
1953 ch.chtype = chunk_type(CSLINE)
1954 ch.data = 'cindex'
1955 ingroupch = cp11 + \
1956 spacech + cp12 + \
1957 spacech + cp13 + \
1958 spacech + cp14
1959 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1960 i, length = i+1, length+1
1961 ingroupch = cp22 + \
1962 spacech + cp23 + \
1963 spacech + cp24 + \
1964 commach + cp21
1965 pp[i:i] = cindexch + [\
1966 chunk(GROUP, ch.where, ingroupch)]
1967 i, length = i+2, length+2
1968 ingroupch = cp33 + \
1969 spacech + cp34 + \
1970 commach + cp31 + \
1971 spacech + cp32
1972 pp[i:i] = cindexch + [\
1973 chunk(GROUP, ch.where, ingroupch)]
1974 i, length = i+2, length+2
1975 ingroupch = cp44 + \
1976 commach + cp41 + \
1977 spacech + cp42 + \
1978 spacech + cp43
1979 pp[i:i] = cindexch + [\
1980 chunk(GROUP, ch.where, ingroupch)]
1981 i, length = i+2, length+2
1982
1983
1984
1985 else:
1986 print 'don\'t know what to do with keyword ' + `s(buf, ch.data)`
1987
1988
1989
1990re_atsign = regex.compile('[@{}]')
1991re_newline = regex.compile('\n')
1992
1993def dumpit(buf, wm, pp):
1994
1995 global out
1996
1997 i, length = 0, len(pp)
1998
1999 addspace = 0
2000
2001 while 1:
2002 if len(pp) != length:
2003 raise 'FATAL', 'inconsistent length'
2004 if i == length:
2005 break
2006 ch = pp[i]
2007 i = i + 1
2008
2009 if addspace:
2010 dospace = 1
2011 addspace = 0
2012 else:
2013 dospace = 0
2014
2015 if ch.chtype == chunk_type(CSNAME):
2016 wm('@' + s(buf, ch.data))
2017 if s(buf, ch.data) == 'node' and \
2018 pp[i].chtype == chunk_type(PLAIN) and \
2019 s(buf, pp[i].data) in out.doublenodes:
2020 ##XXX doesnt work yet??
2021 wm(' ZZZ-' + zfill(`i`, 4))
2022 if s(buf, ch.data)[0] in string.letters:
2023 addspace = 1
2024 elif ch.chtype == chunk_type(PLAIN):
2025 if dospace and s(buf, ch.data) not in (' ', '\t'):
2026 wm(' ')
2027 text = s(buf, ch.data)
2028 while 1:
2029 pos = re_atsign.search(text)
2030 if pos < 0:
2031 break
2032 wm(text[:pos] + '@' + text[pos])
2033 text = text[pos+1:]
2034 wm(text)
2035 elif ch.chtype == chunk_type(GROUP):
2036 wm('{')
2037 dumpit(buf, wm, ch.data)
2038 wm('}')
2039 elif ch.chtype == chunk_type(DENDLINE):
2040 wm('\n\n')
2041 while i != length and pp[i].chtype in \
2042 (chunk_type(DENDLINE), chunk_type(ENDLINE)):
2043 i = i + 1
2044 elif ch.chtype == chunk_type(OTHER):
2045 wm(s(buf, ch.data))
2046 elif ch.chtype == chunk_type(ACTIVE):
2047 wm(s(buf, ch.data))
2048 elif ch.chtype == chunk_type(ENDLINE):
2049 wm('\n')
2050 elif ch.chtype == chunk_type(CSLINE):
2051 if i >= 2 and pp[i-2].chtype not in \
2052 (chunk_type(ENDLINE), chunk_type(DENDLINE)) \
2053 and (pp[i-2].chtype != chunk_type(PLAIN) \
2054 or s(buf, pp[i-2].data)[-1] != '\n'):
Guido van Rossum49604d31996-09-10 22:19:51 +00002055
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002056 wm('\n')
2057 wm('@' + s(buf, ch.data))
2058 if i == length:
2059 raise error, 'CSLINE expected another chunk'
2060 if pp[i].chtype != chunk_type(GROUP):
2061 raise error, 'CSLINE expected GROUP'
Guido van Rossum49604d31996-09-10 22:19:51 +00002062 if type(pp[i].data) != ListType:
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002063 raise error, 'GROUP chould contain []-data'
Guido van Rossum49604d31996-09-10 22:19:51 +00002064
2065 wobj = Wobj()
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002066 dumpit(buf, wobj.write, pp[i].data)
2067 i = i + 1
2068 text = wobj.data
2069 del wobj
2070 if text:
2071 wm(' ')
2072 while 1:
2073 pos = re_newline.search(text)
2074 if pos < 0:
2075 break
2076 print 'WARNING: found newline in csline arg'
2077 wm(text[:pos] + ' ')
2078 text = text[pos+1:]
2079 wm(text)
2080 if i >= length or \
2081 pp[i].chtype not in (chunk_type(CSLINE), \
2082 chunk_type(ENDLINE), chunk_type(DENDLINE)) \
2083 and (pp[i].chtype != chunk_type(PLAIN) \
2084 or s(buf, pp[i].data)[0] != '\n'):
2085 wm('\n')
2086
2087 elif ch.chtype == chunk_type(COMMENT):
Guido van Rossum7a2dba21993-11-05 14:45:11 +00002088## print 'COMMENT: previous chunk =', pp[i-2]
Guido van Rossum557ed941995-03-28 13:33:45 +00002089## if pp[i-2].chtype == chunk_type(PLAIN):
2090## print 'PLAINTEXT =', `s(buf, pp[i-2].data)`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002091 if s(buf, ch.data) and \
2092 regex.match('^[ \t]*$', s(buf, ch.data)) < 0:
2093 if i >= 2 and pp[i-2].chtype not in \
2094 (chunk_type(ENDLINE), chunk_type(DENDLINE)) \
2095 and not (pp[i-2].chtype == chunk_type(PLAIN) \
2096 and regex.match('\\(.\\|\n\\)*[ \t]*\n$', s(buf, pp[i-2].data)) >= 0):
2097 print 'ADDING NEWLINE'
2098 wm('\n')
2099 wm('@c ' + s(buf, ch.data))
2100 elif ch.chtype == chunk_type(IGNORE):
2101 pass
2102 else:
2103 try:
2104 str = `s(buf, ch.data)`
2105 except TypeError:
2106 str = `ch.data`
2107 if len(str) > 400:
2108 str = str[:400] + '...'
2109 print 'warning:', ch.chtype, 'not handled, data ' + str
2110
2111
2112
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002113def main():
Guido van Rossum7a2dba21993-11-05 14:45:11 +00002114 outfile = None
2115 headerfile = 'texipre.dat'
2116 trailerfile = 'texipost.dat'
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002117
Guido van Rossum7a2dba21993-11-05 14:45:11 +00002118 try:
2119 opts, args = getopt.getopt(sys.argv[1:], 'o:h:t:')
2120 except getopt.error:
2121 args = []
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002122
Guido van Rossum7a2dba21993-11-05 14:45:11 +00002123 if not args:
2124 print 'usage: partparse [-o outfile] [-h headerfile]',
2125 print '[-t trailerfile] file ...'
2126 sys.exit(2)
2127
2128 for opt, arg in opts:
2129 if opt == '-o': outfile = arg
2130 if opt == '-h': headerfile = arg
2131 if opt == '-t': trailerfile = arg
2132
2133 if not outfile:
2134 root, ext = os.path.splitext(args[0])
2135 outfile = root + '.texi'
2136
2137 if outfile in args:
2138 print 'will not overwrite input file', outfile
2139 sys.exit(2)
2140
2141 outf = open(outfile, 'w')
2142 outf.write(open(headerfile, 'r').read())
2143
2144 for file in args:
2145 if len(args) > 1: print '='*20, file, '='*20
2146 buf = open(file, 'r').read()
2147 w, pp = parseit(buf)
2148 startchange()
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002149 changeit(buf, pp)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002150 dumpit(buf, outf.write, pp)
2151
Guido van Rossum7a2dba21993-11-05 14:45:11 +00002152 outf.write(open(trailerfile, 'r').read())
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002153
Guido van Rossum7a2dba21993-11-05 14:45:11 +00002154 outf.close()
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002155
Guido van Rossum49604d31996-09-10 22:19:51 +00002156if __name__ == "__main__":
2157 main()