blob: 6128bbfaf971a1829b634e4ac73f98319bfb4ffa [file] [log] [blame]
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001#
2# partparse.py: parse a by-Guido-written-and-by-Jan-Hein-edited LaTeX file,
3# and generate texinfo source.
4#
5# This is *not* a good example of good programming practices. In fact, this
6# file could use a complete rewrite, in order to become faster, more
7# easy extensible and maintainable.
8#
9# However, I added some comments on a few places for the pityful person who
10# would ever need to take a look into this file.
11#
12# Have I been clear enough??
13#
14# -jh
15
16
Guido van Rossum7a2dba21993-11-05 14:45:11 +000017import sys, string, regex, getopt, os
Guido van Rossum95cd2ef1992-12-08 14:37:55 +000018
19# Different parse modes for phase 1
20MODE_REGULAR = 0
21MODE_VERBATIM = 1
22MODE_CS_SCAN = 2
23MODE_COMMENT = 3
24MODE_MATH = 4
25MODE_DMATH = 5
26MODE_GOBBLEWHITE = 6
27
28the_modes = MODE_REGULAR, MODE_VERBATIM, MODE_CS_SCAN, MODE_COMMENT, \
29 MODE_MATH, MODE_DMATH, MODE_GOBBLEWHITE
30
31# Show the neighbourhood of the scanned buffer
32def epsilon(buf, where):
33 wmt, wpt = where - 10, where + 10
34 if wmt < 0:
35 wmt = 0
36 if wpt > len(buf):
37 wpt = len(buf)
38 return ' Context ' + `buf[wmt:where]` + '.' + `buf[where:wpt]` + '.'
39
40# Should return the line number. never worked
41def lin():
42 global lineno
43 return ' Line ' + `lineno` + '.'
44
45# Displays the recursion level.
46def lv(lvl):
47 return ' Level ' + `lvl` + '.'
48
49# Combine the three previous functions. Used often.
50def lle(lvl, buf, where):
51 return lv(lvl) + lin() + epsilon(buf, where)
52
53
54# This class is only needed for _symbolic_ representation of the parse mode.
55class Mode:
56 def init(self, arg):
57 if arg not in the_modes:
58 raise ValueError, 'mode not in the_modes'
59 self.mode = arg
60 return self
61
62 def __cmp__(self, other):
63 if type(self) != type(other):
64 other = mode(other)
65 return cmp(self.mode, other.mode)
66
67 def __repr__(self):
68 if self.mode == MODE_REGULAR:
69 return 'MODE_REGULAR'
70 elif self.mode == MODE_VERBATIM:
71 return 'MODE_VERBATIM'
72 elif self.mode == MODE_CS_SCAN:
73 return 'MODE_CS_SCAN'
74 elif self.mode == MODE_COMMENT:
75 return 'MODE_COMMENT'
76 elif self.mode == MODE_MATH:
77 return 'MODE_MATH'
78 elif self.mode == MODE_DMATH:
79 return 'MODE_DMATH'
80 elif self.mode == MODE_GOBBLEWHITE:
81 return 'MODE_GOBBLEWHITE'
82 else:
83 raise ValueError, 'mode not in the_modes'
84
85# just a wrapper around a class initialisation
86def mode(arg):
87 return Mode().init(arg)
88
89
90# After phase 1, the text consists of chunks, with a certain type
91# this type will be assigned to the chtype member of the chunk
92# the where-field contains the file position where this is found
93# and the data field contains (1): a tuple describing start- end end
94# positions of the substring (can be used as slice for the buf-variable),
95# (2) just a string, mostly generated by the changeit routine,
96# or (3) a list, describing a (recursive) subgroup of chunks
97PLAIN = 0 # ASSUME PLAINTEXT, data = the text
98GROUP = 1 # GROUP ({}), data = [chunk, chunk,..]
99CSNAME = 2 # CONTROL SEQ TOKEN, data = the command
100COMMENT = 3 # data is the actual comment
101DMATH = 4 # DISPLAYMATH, data = [chunk, chunk,..]
102MATH = 5 # MATH, see DISPLAYMATH
103OTHER = 6 # CHAR WITH CATCODE OTHER, data = char
104ACTIVE = 7 # ACTIVE CHAR
105GOBBLEDWHITE = 8 # Gobbled LWSP, after CSNAME
106ENDLINE = 9 # END-OF-LINE, data = '\n'
107DENDLINE = 10 # DOUBLE EOL, data='\n', indicates \par
108ENV = 11 # LaTeX-environment
109 # data =(envname,[ch,ch,ch,.])
110CSLINE = 12 # for texi: next chunk will be one group
111 # of args. Will be set all on 1 line
112IGNORE = 13 # IGNORE this data
113ENDENV = 14 # TEMP END OF GROUP INDICATOR
114IF = 15 # IF-directive
115 # data = (flag,negate,[ch, ch, ch,...])
116the_types = PLAIN, GROUP, CSNAME, COMMENT, DMATH, MATH, OTHER, ACTIVE, \
117 GOBBLEDWHITE, ENDLINE, DENDLINE, ENV, CSLINE, IGNORE, ENDENV, IF
118
119# class, just to display symbolic name
120class ChunkType:
121 def init(self, chunk_type):
122 if chunk_type not in the_types:
123 raise 'ValueError', 'chunk_type not in the_types'
124 self.chunk_type = chunk_type
125 return self
126
127 def __cmp__(self, other):
128 if type(self) != type(other):
129 other = chunk_type(other)
130 return cmp(self.chunk_type, other.chunk_type)
131
132 def __repr__(self):
133 if self.chunk_type == PLAIN:
134 return 'PLAIN'
135 elif self.chunk_type == GROUP:
136 return 'GROUP'
137 elif self.chunk_type == CSNAME:
138 return 'CSNAME'
139 elif self.chunk_type == COMMENT:
140 return 'COMMENT'
141 elif self.chunk_type == DMATH:
142 return 'DMATH'
143 elif self.chunk_type == MATH:
144 return 'MATH'
145 elif self.chunk_type == OTHER:
146 return 'OTHER'
147 elif self.chunk_type == ACTIVE:
148 return 'ACTIVE'
149 elif self.chunk_type == GOBBLEDWHITE:
150 return 'GOBBLEDWHITE'
151 elif self.chunk_type == DENDLINE:
152 return 'DENDLINE'
153 elif self.chunk_type == ENDLINE:
154 return 'ENDLINE'
155 elif self.chunk_type == ENV:
156 return 'ENV'
157 elif self.chunk_type == CSLINE:
158 return 'CSLINE'
159 elif self.chunk_type == IGNORE:
160 return 'IGNORE'
161 elif self.chunk_type == ENDENV:
162 return 'ENDENV'
163 elif self.chunk_type == IF:
164 return 'IF'
165 else:
166 raise ValueError, 'chunk_type not in the_types'
167
168# ...and the wrapper
169def chunk_type(type):
170 return ChunkType().init(type)
171
172# store a type object of the ChunkType-class-instance...
173chunk_type_type = type(chunk_type(0))
174
175# this class contains a part of the parsed buffer
176class Chunk:
177 def init(self, chtype, where, data):
178 if type(chtype) != chunk_type_type:
179 chtype = chunk_type(chtype)
180 self.chtype = chtype
181 if type(where) != type(0):
182 raise TypeError, '\'where\' is not a number'
183 self.where = where
184 self.data = data
185 ##print 'CHUNK', self
186 return self
187
188 def __repr__(self):
189 return 'chunk' + `self.chtype, self.where, self.data`
190
191# and the wrapper
192def chunk(chtype, where, data):
193 return Chunk().init(chtype, where, data)
194
195
196
197error = 'partparse.error'
198
199#
200# TeX's catcodes...
201#
202CC_ESCAPE = 0
203CC_LBRACE = 1
204CC_RBRACE = 2
205CC_MATHSHIFT = 3
206CC_ALIGNMENT = 4
207CC_ENDLINE = 5
208CC_PARAMETER = 6
209CC_SUPERSCRIPT = 7
210CC_SUBSCRIPT = 8
211CC_IGNORE = 9
212CC_WHITE = 10
213CC_LETTER = 11
214CC_OTHER = 12
215CC_ACTIVE = 13
216CC_COMMENT = 14
217CC_INVALID = 15
218
219# and the names
220cc_names = [\
221 'CC_ESCAPE', \
222 'CC_LBRACE', \
223 'CC_RBRACE', \
224 'CC_MATHSHIFT', \
225 'CC_ALIGNMENT', \
226 'CC_ENDLINE', \
227 'CC_PARAMETER', \
228 'CC_SUPERSCRIPT', \
229 'CC_SUBSCRIPT', \
230 'CC_IGNORE', \
231 'CC_WHITE', \
232 'CC_LETTER', \
233 'CC_OTHER', \
234 'CC_ACTIVE', \
235 'CC_COMMENT', \
236 'CC_INVALID', \
237 ]
238
239# Show a list of catcode-name-symbols
240def pcl(codelist):
241 result = ''
242 for i in codelist:
243 result = result + cc_names[i] + ', '
244 return '[' + result[:-2] + ']'
245
246# the name of the catcode (ACTIVE, OTHER, etc.)
247def pc(code):
248 return cc_names[code]
249
250
251# Which catcodes make the parser stop parsing regular plaintext
252regular_stopcodes = [CC_ESCAPE, CC_LBRACE, CC_RBRACE, CC_MATHSHIFT, \
253 CC_ALIGNMENT, CC_PARAMETER, CC_SUPERSCRIPT, CC_SUBSCRIPT, \
254 CC_IGNORE, CC_ACTIVE, CC_COMMENT, CC_INVALID, CC_ENDLINE]
255
256# same for scanning a control sequence name
257csname_scancodes = [CC_LETTER]
258
259# same for gobbling LWSP
260white_scancodes = [CC_WHITE]
261##white_scancodes = [CC_WHITE, CC_ENDLINE]
262
263# make a list of all catcode id's, except for catcode ``other''
264all_but_other_codes = range(16)
265del all_but_other_codes[CC_OTHER]
266##print all_but_other_codes
267
268# when does a comment end
269comment_stopcodes = [CC_ENDLINE]
270
271# gather all characters together, specified by a list of catcodes
272def code2string(cc, codelist):
Guido van Rossum7a2dba21993-11-05 14:45:11 +0000273 ##print 'code2string: codelist = ' + pcl(codelist),
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000274 result = ''
Guido van Rossum7a2dba21993-11-05 14:45:11 +0000275 for category in codelist:
276 if cc[category]:
277 result = result + cc[category]
278 ##print 'result = ' + `result`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000279 return result
280
281# automatically generate all characters of catcode other, being the
282# complement set in the ASCII range (128 characters)
283def make_other_codes(cc):
Guido van Rossum7a2dba21993-11-05 14:45:11 +0000284 otherchars = range(256) # could be made 256, no problem
285 for category in all_but_other_codes:
286 if cc[category]:
287 for c in cc[category]:
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000288 otherchars[ord(c)] = None
289 result = ''
290 for i in otherchars:
291 if i != None:
292 result = result + chr(i)
293 return result
294
295# catcode dump (which characters have which catcodes).
296def dump_cc(name, cc):
Guido van Rossum7a2dba21993-11-05 14:45:11 +0000297 ##print '\t' + name
298 ##print '=' * (8+len(name))
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000299 if len(cc) != 16:
300 raise TypeError, 'cc not good cat class'
Guido van Rossum7a2dba21993-11-05 14:45:11 +0000301## for i in range(16):
302## print pc(i) + '\t' + `cc[i]`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000303
304
305# In the beginning,....
306epoch_cc = [None] * 16
307##dump_cc('epoch_cc', epoch_cc)
308
309
310# INITEX
311initex_cc = epoch_cc[:]
312initex_cc[CC_ESCAPE] = '\\'
313initex_cc[CC_ENDLINE], initex_cc[CC_IGNORE], initex_cc[CC_WHITE] = \
314 '\n', '\0', ' '
315initex_cc[CC_LETTER] = string.uppercase + string.lowercase
316initex_cc[CC_COMMENT], initex_cc[CC_INVALID] = '%', '\x7F'
317#initex_cc[CC_OTHER] = make_other_codes(initex_cc) I don't need them, anyway
318##dump_cc('initex_cc', initex_cc)
319
320
321# LPLAIN: LaTeX catcode setting (see lplain.tex)
322lplain_cc = initex_cc[:]
323lplain_cc[CC_LBRACE], lplain_cc[CC_RBRACE] = '{', '}'
324lplain_cc[CC_MATHSHIFT] = '$'
325lplain_cc[CC_ALIGNMENT] = '&'
326lplain_cc[CC_PARAMETER] = '#'
327lplain_cc[CC_SUPERSCRIPT] = '^\x0B' # '^' and C-k
328lplain_cc[CC_SUBSCRIPT] = '_\x01' # '_' and C-a
329lplain_cc[CC_WHITE] = lplain_cc[CC_WHITE] + '\t'
330lplain_cc[CC_ACTIVE] = '~\x0C' # '~' and C-l
331lplain_cc[CC_OTHER] = make_other_codes(lplain_cc)
332##dump_cc('lplain_cc', lplain_cc)
333
334
335# Guido's LaTeX environment catcoded '_' as ``other''
336# my own purpose catlist
337my_cc = lplain_cc[:]
338my_cc[CC_SUBSCRIPT] = my_cc[CC_SUBSCRIPT][1:] # remove '_' here
339my_cc[CC_OTHER] = my_cc[CC_OTHER] + '_' # add it to OTHER list
340dump_cc('my_cc', my_cc)
341
342
343
344# needed for un_re, my equivalent for regexp-quote in Emacs
345re_meaning = '\\[]^$'
346
347def un_re(str):
348 result = ''
349 for i in str:
350 if i in re_meaning:
351 result = result + '\\'
352 result = result + i
353 return result
354
355# NOTE the negate ('^') operator in *some* of the regexps below
356def make_rc_regular(cc):
357 # problems here if '[]' are included!!
358 return regex.compile('[' + code2string(cc, regular_stopcodes) + ']')
359
360def make_rc_cs_scan(cc):
361 return regex.compile('[^' + code2string(cc, csname_scancodes) + ']')
362
363def make_rc_comment(cc):
364 return regex.compile('[' + code2string(cc, comment_stopcodes) + ']')
365
366def make_rc_endwhite(cc):
367 return regex.compile('[^' + code2string(cc, white_scancodes) + ']')
368
369
370
371# regular: normal mode:
372rc_regular = make_rc_regular(my_cc)
373
374# scan: scan a command sequence e.g. `newlength' or `mbox' or `;', `,' or `$'
375rc_cs_scan = make_rc_cs_scan(my_cc)
376rc_comment = make_rc_comment(my_cc)
377rc_endwhite = make_rc_endwhite(my_cc)
378
379
380# parseit (BUF, PARSEMODE=mode(MODE_REGULAR), START=0, RECURSION-LEVEL=0)
381# RECURSION-LEVEL will is incremented on entry.
382# result contains the list of chunks returned
383# together with this list, the buffer position is returned
384
385# RECURSION-LEVEL will be set to zero *again*, when recursively a
386# {,D}MATH-mode scan has been enetered.
387# This has been done in order to better check for environment-mismatches
388
389def parseit(buf, *rest):
390 global lineno
391
392 if len(rest) == 3:
393 parsemode, start, lvl = rest
394 elif len(rest) == 2:
395 parsemode, start, lvl = rest + (0, )
396 elif len(rest) == 1:
397 parsemode, start, lvl = rest + (0, 0)
398 elif len(rest) == 0:
399 parsemode, start, lvl = mode(MODE_REGULAR), 0, 0
400 else:
401 raise TypeError, 'usage: parseit(buf[, parsemode[, start[, level]]])'
402 result = []
403 end = len(buf)
404 if lvl == 0 and parsemode == mode(MODE_REGULAR):
405 lineno = 1
406 lvl = lvl + 1
407
408 ##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')'
409
410 #
411 # some of the more regular modes...
412 #
413
414 if parsemode in (mode(MODE_REGULAR), mode(MODE_DMATH), mode(MODE_MATH)):
415 cstate = []
416 newpos = start
417 curpmode = parsemode
418 while 1:
419 where = newpos
420 #print '\tnew round: ' + epsilon(buf, where)
421 if where == end:
422 if lvl > 1 or curpmode != mode(MODE_REGULAR):
423 # not the way we started...
424 raise EOFError, 'premature end of file.' + lle(lvl, buf, where)
425 # the real ending of lvl-1 parse
426 return end, result
427
428 pos = rc_regular.search(buf, where)
429
430 if pos < 0:
431 pos = end
432
433 if pos != where:
434 newpos, c = pos, chunk(PLAIN, where, (where, pos))
435 result.append(c)
436 continue
437
438
439 #
440 # ok, pos == where and pos != end
441 #
442 foundchar = buf[where]
443 if foundchar in my_cc[CC_LBRACE]:
444 # recursive subgroup parse...
445 newpos, data = parseit(buf, curpmode, where+1, lvl)
446 result.append(chunk(GROUP, where, data))
447
448 elif foundchar in my_cc[CC_RBRACE]:
449 if lvl <= 1:
450 raise error, 'ENDGROUP while in base level.' + lle(lvl, buf, where)
451 if lvl == 1 and mode != mode(MODE_REGULAR):
452 raise error, 'endgroup while in math mode. +lin() + epsilon(buf, where)'
453 return where + 1, result
454
455 elif foundchar in my_cc[CC_ESCAPE]:
456 #
457 # call the routine that actually deals with
458 # this problem. If do_ret is None, than
459 # return the value of do_ret
460 #
461 # Note that handle_cs might call this routine
462 # recursively again...
463 #
464 do_ret, newpos = handlecs(buf, where, \
465 curpmode, lvl, result, end)
466 if do_ret != None:
467 return do_ret
468
469 elif foundchar in my_cc[CC_COMMENT]:
470 newpos, data = parseit(buf, \
471 mode(MODE_COMMENT), where+1, lvl)
472 result.append(chunk(COMMENT, where, data))
473
474 elif foundchar in my_cc[CC_MATHSHIFT]:
475 # note that recursive calls to math-mode
476 # scanning are called with recursion-level 0
477 # again, in order to check for bad mathend
478 #
479 if where + 1 != end and \
480 buf[where + 1] in \
481 my_cc[CC_MATHSHIFT]:
482 #
483 # double mathshift, e.g. '$$'
484 #
485 if curpmode == mode(MODE_REGULAR):
486 newpos, data = parseit(buf, \
487 mode(MODE_DMATH), \
488 where+2, 0)
489 result.append(chunk(DMATH, \
490 where, data))
491 elif curpmode == mode(MODE_MATH):
492 raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
493 elif lvl != 1:
494 raise error, 'bad mathend.' + \
495 lle(lvl, buf, where)
496 else:
497 return where + 2, result
498 else:
499 #
500 # single math shift, e.g. '$'
501 #
502 if curpmode == mode(MODE_REGULAR):
503 newpos, data = parseit(buf, \
504 mode(MODE_MATH), \
505 where+1, 0)
506 result.append(chunk(MATH, \
507 where, data))
508 elif curpmode == mode(MODE_DMATH):
509 raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
510 elif lvl != 1:
511 raise error, 'bad mathend.' + \
512 lv(lvl, buf, where)
513 else:
514 return where + 1, result
515
516 elif foundchar in my_cc[CC_IGNORE]:
517 print 'warning: ignored char', `foundchar`
518 newpos = where + 1
519
520 elif foundchar in my_cc[CC_ACTIVE]:
521 result.append(chunk(ACTIVE, where, foundchar))
522 newpos = where + 1
523
524 elif foundchar in my_cc[CC_INVALID]:
525 raise error, 'invalid char ' + `foundchar`
526 newpos = where + 1
527
528 elif foundchar in my_cc[CC_ENDLINE]:
529 #
530 # after an end of line, eat the rest of
531 # whitespace on the beginning of the next line
532 # this is what LaTeX more or less does
533 #
534 # also, try to indicate double newlines (\par)
535 #
536 lineno = lineno + 1
537 savedwhere = where
538 newpos, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), where + 1, lvl)
539 if newpos != end and buf[newpos] in \
540 my_cc[CC_ENDLINE]:
541 result.append(chunk(DENDLINE, \
542 savedwhere, foundchar))
543 else:
544 result.append(chunk(ENDLINE, \
545 savedwhere, foundchar))
546 else:
547 result.append(chunk(OTHER, where, foundchar))
548 newpos = where + 1
549
550 elif parsemode == mode(MODE_CS_SCAN):
551 #
552 # scan for a control sequence token. `\ape', `\nut' or `\%'
553 #
554 if start == end:
555 raise EOFError, 'can\'t find end of csname'
556 pos = rc_cs_scan.search(buf, start)
557 if pos < 0:
558 pos = end
559 if pos == start:
560 # first non-letter right where we started the search
561 # ---> the control sequence name consists of one single
562 # character. Also: don't eat white space...
563 if buf[pos] in my_cc[CC_ENDLINE]:
564 lineno = lineno + 1
565 pos = pos + 1
566 return pos, (start, pos)
567 else:
568 spos = pos
569 if buf[pos] == '\n':
570 lineno = lineno + 1
571 spos = pos + 1
572 pos2, dummy = parseit(buf, \
573 mode(MODE_GOBBLEWHITE), spos, lvl)
574 return pos2, (start, pos)
575
576 elif parsemode == mode(MODE_GOBBLEWHITE):
577 if start == end:
578 return start, ''
579 pos = rc_endwhite.search(buf, start)
580 if pos < 0:
581 pos = start
582 return pos, (start, pos)
583
584 elif parsemode == mode(MODE_COMMENT):
585 pos = rc_comment.search(buf, start)
586 lineno = lineno + 1
587 if pos < 0:
588 print 'no newline perhaps?'
589 raise EOFError, 'can\'t find end of comment'
590 pos = pos + 1
591 pos2, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), pos, lvl)
592 return pos2, (start, pos)
593
594
595 else:
596 raise error, 'Unknown mode (' + `parsemode` + ')'
597
598
599#moreresult = cswitch(buf[x1:x2], buf, newpos, parsemode, lvl)
600
601#boxcommands = 'mbox', 'fbox'
602#defcommands = 'def', 'newcommand'
603
604endverbstr = '\\end{verbatim}'
605
606re_endverb = regex.compile(un_re(endverbstr))
607
608#
609# handlecs: helper function for parseit, for the special thing we might
610# wanna do after certain command control sequences
611# returns: None or return_data, newpos
612#
613# in the latter case, the calling function is instructed to immediately
614# return with the data in return_data
615#
616def handlecs(buf, where, curpmode, lvl, result, end):
617 global lineno
618
619 # get the control sequence name...
620 newpos, data = parseit(buf, mode(MODE_CS_SCAN), where+1, lvl)
621 saveddata = data
622
623 if s(buf, data) in ('begin', 'end'):
624 # skip the expected '{' and get the LaTeX-envname '}'
625 newpos, data = parseit(buf, mode(MODE_REGULAR), newpos+1, lvl)
626 if len(data) != 1:
627 raise error, 'expected 1 chunk of data.' + \
628 lle(lvl, buf, where)
629
630 # yucky, we've got an environment
631 envname = s(buf, data[0].data)
632 ##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl)
633 if s(buf, saveddata) == 'begin' and envname == 'verbatim':
634 # verbatim deserves special treatment
635 pos = re_endverb.search(buf, newpos)
636 if pos < 0:
637 raise error, `endverbstr` + ' not found.' + lle(lvl, buf, where)
638 result.append(chunk(ENV, where, (envname, [chunk(PLAIN, newpos, (newpos, pos))])))
639 newpos = pos + len(endverbstr)
640
641 elif s(buf, saveddata) == 'begin':
642 # start parsing recursively... If that parse returns
643 # from an '\end{...}', then should the last item of
644 # the returned data be a string containing the ended
645 # environment
646 newpos, data = parseit(buf, curpmode, newpos, lvl)
647 if not data or type(data[-1]) != type(''):
648 raise error, 'missing \'end\'' + lle(lvl, buf, where) + epsilon(buf, newpos)
649 retenv = data[-1]
650 del data[-1]
651 if retenv != envname:
652 #[`retenv`, `envname`]
653 raise error, 'environments do not match.' + \
654 lle(lvl, buf, where) + \
655 epsilon(buf, newpos)
656 result.append(chunk(ENV, where, (retenv, data)))
657 else:
658 # 'end'... append the environment name, as just
659 # pointed out, and order parsit to return...
660 result.append(envname)
661 ##print 'POINT of return: ' + epsilon(buf, newpos)
662 # the tuple will be returned by parseit
663 return (newpos, result), newpos
664
665 # end of \begin ... \end handling
666
667 elif s(buf, data)[0:2] == 'if':
668 # another scary monster: the 'if' directive
669 flag = s(buf, data)[2:]
670
671 # recursively call parseit, just like environment above..
672 # the last item of data should contain the if-termination
673 # e.g., 'else' of 'fi'
674 newpos, data = parseit(buf, curpmode, newpos, lvl)
675 if not data or data[-1] not in ('else', 'fi'):
676 raise error, 'wrong if... termination' + \
677 lle(lvl, buf, where) + epsilon(buf, newpos)
678
679 ifterm = data[-1]
680 del data[-1]
681 # 0 means dont_negate flag
682 result.append(chunk(IF, where, (flag, 0, data)))
683 if ifterm == 'else':
684 # do the whole thing again, there is only one way
685 # to end this one, by 'fi'
686 newpos, data = parseit(buf, curpmode, newpos, lvl)
687 if not data or data[-1] not in ('fi', ):
688 raise error, 'wrong if...else... termination' \
689 + lle(lvl, buf, where) \
690 + epsilon(buf, newpos)
691
692 ifterm = data[-1]
693 del data[-1]
694 result.append(chunk(IF, where, (flag, 1, data)))
695 #done implicitely: return None, newpos
696
697 elif s(buf, data) in ('else', 'fi'):
698 result.append(s(buf, data))
699 # order calling party to return tuple
700 return (newpos, result), newpos
701
702 # end of \if, \else, ... \fi handling
703
704 elif s(buf, saveddata) == 'verb':
705 x2 = saveddata[1]
706 result.append(chunk(CSNAME, where, data))
707 if x2 == end:
708 raise error, 'premature end of command.' + lle(lvl, buf, where)
709 delimchar = buf[x2]
Guido van Rossum7a2dba21993-11-05 14:45:11 +0000710 ##print 'VERB: delimchar ' + `delimchar`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000711 pos = regex.compile(un_re(delimchar)).search(buf, x2 + 1)
712 if pos < 0:
713 raise error, 'end of \'verb\' argument (' + \
714 `delimchar` + ') not found.' + \
715 lle(lvl, buf, where)
716 result.append(chunk(GROUP, x2, [chunk(PLAIN, x2+1, (x2+1, pos))]))
717 newpos = pos + 1
718 else:
719 result.append(chunk(CSNAME, where, data))
720 return None, newpos
721
722# this is just a function to get the string value if the possible data-tuple
723def s(buf, data):
724 if type(data) == type(''):
725 return data
726 if len(data) != 2 or not (type(data[0]) == type(data[1]) == type(0)):
727 raise TypeError, 'expected tuple of 2 integers'
728 x1, x2 = data
729 return buf[x1:x2]
730
731
732##length, data1, i = getnextarg(length, buf, pp, i + 1)
733
734# make a deep-copy of some chunks
735def crcopy(r):
736 result = []
737 for x in r:
738 result.append(chunkcopy(x))
739 return result
740
741
742
743# copy a chunk, would better be a method of class Chunk...
744def chunkcopy(ch):
745 if ch.chtype == chunk_type(GROUP):
746 listc = ch.data[:]
747 for i in range(len(listc)):
748 listc[i] = chunkcopy(listc[i])
749 return chunk(GROUP, ch.where, listc)
750 else:
751 return chunk(ch.chtype, ch.where, ch.data)
752
753
754# get next argument for TeX-macro, flatten a group (insert between)
755# or return Command Sequence token, or give back one character
756def getnextarg(length, buf, pp, item):
757
758 ##wobj = Wobj().init()
759 ##dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
760 ##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
761
762 while item < length and pp[item].chtype == chunk_type(ENDLINE):
763 del pp[item]
764 length = length - 1
765 if item >= length:
766 raise error, 'no next arg.' + epsilon(buf, pp[-1].where)
767 if pp[item].chtype == chunk_type(GROUP):
768 newpp = pp[item].data
769 del pp[item]
770 length = length - 1
771 changeit(buf, newpp)
772 length = length + len(newpp)
773 pp[item:item] = newpp
774 item = item + len(newpp)
775 if len(newpp) < 10:
776 wobj = Wobj().init()
777 dumpit(buf, wobj.write, newpp)
778 ##print 'GETNEXTARG: inserted ' + `wobj.data`
779 return length, item
780 elif pp[item].chtype == chunk_type(PLAIN):
781 #grab one char
782 print 'WARNING: grabbing one char'
783 if len(s(buf, pp[item].data)) > 1:
784 pp.insert(item, chunk(PLAIN, pp[item].where, s(buf, pp[item].data)[:1]))
785 item, length = item+1, length+1
786 pp[item].data = s(buf, pp[item].data)[1:]
787 else:
788 item = item+1
789 return length, item
790 else:
Guido van Rossumb819bdf1995-03-15 11:26:26 +0000791 ch = pp[item]
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000792 try:
793 str = `s(buf, ch.data)`
794 except TypeError:
795 str = `ch.data`
796 if len(str) > 400:
797 str = str[:400] + '...'
798 print 'GETNEXTARG:', ch.chtype, 'not handled, data ' + str
799 return length, item
800
801
802# this one is needed to find the end of LaTeX's optional argument, like
803# item[...]
804re_endopt = regex.compile(']')
805
806# get a LaTeX-optional argument, you know, the square braces '[' and ']'
807def getoptarg(length, buf, pp, item):
808
809 wobj = Wobj().init()
810 dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
811 ##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
812
813 if item >= length or \
814 pp[item].chtype != chunk_type(PLAIN) or \
815 s(buf, pp[item].data)[0] != '[':
816 return length, item
817
818 pp[item].data = s(buf, pp[item].data)[1:]
819 if len(pp[item].data) == 0:
820 del pp[item]
821 length = length-1
822
823 while 1:
824 if item == length:
825 raise error, 'No end of optional arg found'
826 if pp[item].chtype == chunk_type(PLAIN):
827 text = s(buf, pp[item].data)
828 pos = re_endopt.search(text)
829 if pos >= 0:
830 pp[item].data = text[:pos]
831 if pos == 0:
832 del pp[item]
833 length = length-1
834 else:
835 item=item+1
836 text = text[pos+1:]
837
838 while text and text[0] in ' \t':
839 text = text[1:]
840
841 if text:
842 pp.insert(item, chunk(PLAIN, 0, text))
843 length = length + 1
844 return length, item
845
846 item = item+1
847
848
849# Wobj just add write-requests to the ``data'' attribute
850class Wobj:
851 def init(self):
852 self.data = ''
853 return self
854 def write(self, data):
855 self.data = self.data + data
Guido van Rossumb819bdf1995-03-15 11:26:26 +0000856
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000857# ignore these commands
Guido van Rossumb819bdf1995-03-15 11:26:26 +0000858ignoredcommands = ('bcode', 'ecode')
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000859# map commands like these to themselves as plaintext
Guido van Rossum7760cde1995-03-17 16:03:11 +0000860wordsselves = ('UNIX', 'ABC', 'C', 'ASCII', 'EOF', 'LaTeX')
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000861# \{ --> {, \} --> }, etc
Guido van Rossum7760cde1995-03-17 16:03:11 +0000862themselves = ('{', '}', '.', '@', ' ', '\n') + wordsselves
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000863# these ones also themselves (see argargs macro in myformat.sty)
864inargsselves = (',', '[', ']', '(', ')')
865# this is how *I* would show the difference between emph and strong
866# code 1 means: fold to uppercase
867markcmds = {'code': ('', ''), 'var': 1, 'emph': ('_', '_'), \
868 'strong': ('*', '*')}
869
870# recognise patter {\FONTCHANGE-CMD TEXT} to \MAPPED-FC-CMD{TEXT}
871fontchanges = {'rm': 'r', 'it': 'i', 'em': 'emph', 'bf': 'b', 'tt': 't'}
872
873# transparent for these commands
Guido van Rossum7760cde1995-03-17 16:03:11 +0000874for_texi = ('emph', 'var', 'strong', 'code', 'kbd', 'key', 'dfn', 'samp',
875 'file', 'r', 'i', 't')
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000876
877
878# try to remove macros and return flat text
879def flattext(buf, pp):
880 pp = crcopy(pp)
Guido van Rossum7a2dba21993-11-05 14:45:11 +0000881 ##print '---> FLATTEXT ' + `pp`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000882 wobj = Wobj().init()
883
884 i, length = 0, len(pp)
885 while 1:
886 if len(pp) != length:
887 raise 'FATAL', 'inconsistent length'
888 if i >= length:
889 break
890 ch = pp[i]
891 i = i+1
892 if ch.chtype == chunk_type(PLAIN):
893 pass
894 elif ch.chtype == chunk_type(CSNAME):
895 if s(buf, ch.data) in themselves or hist.inargs and s(buf, ch.data) in inargsselves:
896 ch.chtype = chunk_type(PLAIN)
897 elif s(buf, ch.data) == 'e':
898 ch.chtype = chunk_type(PLAIN)
899 ch.data = '\\'
900 elif len(s(buf, ch.data)) == 1 \
901 and s(buf, ch.data) in onlylatexspecial:
902 ch.chtype = chunk_type(PLAIN)
903 # if it is followed by an empty group,
904 # remove that group, it was needed for
905 # a true space
906 if i < length \
907 and pp[i].chtype==chunk_type(GROUP) \
908 and len(pp[i].data) == 0:
909 del pp[i]
910 length = length-1
911
912 elif s(buf, ch.data) in markcmds.keys():
913 length, newi = getnextarg(length, buf, pp, i)
914 str = flattext(buf, pp[i:newi])
915 del pp[i:newi]
916 length = length - (newi - i)
917 ch.chtype = chunk_type(PLAIN)
918 markcmd = s(buf, ch.data)
919 x = markcmds[markcmd]
920 if type(x) == type(()):
921 pre, after = x
922 str = pre+str+after
923 elif x == 1:
924 str = string.upper(str)
925 else:
926 raise 'FATAL', 'corrupt markcmds'
927 ch.data = str
928 else:
929 if s(buf, ch.data) not in ignoredcommands:
930 print 'WARNING: deleting command ' + `s(buf, ch.data)`
931 print 'PP' + `pp[i-1]`
932 del pp[i-1]
933 i, length = i-1, length-1
934 elif ch.chtype == chunk_type(GROUP):
935 length, newi = getnextarg(length, buf, pp, i-1)
936 i = i-1
937## str = flattext(buf, crcopy(pp[i-1:newi]))
938## del pp[i:newi]
939## length = length - (newi - i)
940## ch.chtype = chunk_type(PLAIN)
941## ch.data = str
942 else:
943 pass
944
945 dumpit(buf, wobj.write, pp)
Guido van Rossum7a2dba21993-11-05 14:45:11 +0000946 ##print 'FLATTEXT: RETURNING ' + `wobj.data`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000947 return wobj.data
948
949# try to generate node names (a bit shorter than the chapter title)
950# note that the \nodename command (see elsewhere) overules these efforts
951def invent_node_names(text):
952 words = string.split(text)
953
Guido van Rossum7a2dba21993-11-05 14:45:11 +0000954 ##print 'WORDS ' + `words`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +0000955
956 if len(words) == 2 \
957 and string.lower(words[0]) == 'built-in' \
958 and string.lower(words[1]) not in ('modules', 'functions'):
959 return words[1]
960 if len(words) == 3 and string.lower(words[1]) == 'module':
961 return words[2]
962 if len(words) == 3 and string.lower(words[1]) == 'object':
963 return string.join(words[0:2])
964 if len(words) > 4 and string.lower(string.join(words[-4:])) == \
965 'methods and data attributes':
966 return string.join(words[:2])
967 return text
968
969re_commas_etc = regex.compile('[,`\'@{}]')
970
971re_whitespace = regex.compile('[ \t]*')
972
973
974##nodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
975
976# look if the next non-white stuff is also a command, resulting in skipping
977# double endlines (DENDLINE) too, and thus omitting \par's
978# Sometimes this is too much, maybe consider DENDLINE's as stop
979def next_command_p(length, buf, pp, i, cmdname):
980
981 while 1:
982 if i >= len(pp):
983 break
984 ch = pp[i]
985 i = i+1
986 if ch.chtype == chunk_type(ENDLINE):
987 continue
988 if ch.chtype == chunk_type(DENDLINE):
989 continue
990 if ch.chtype == chunk_type(PLAIN):
991 if re_whitespace.search(s(buf, ch.data)) == 0 and \
992 re_whitespace.match(s(buf, ch.data)) == len(s(buf, ch.data)):
993 continue
994 return -1
995 if ch.chtype == chunk_type(CSNAME):
996 if s(buf, ch.data) == cmdname:
997 return i # _after_ the command
998 return -1
999 return -1
1000
1001
1002# things that are special to LaTeX, but not to texi..
1003onlylatexspecial = '_~^$#&%'
1004
Guido van Rossum23301a91993-05-24 14:19:37 +00001005class Struct: pass
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001006
1007hist = Struct()
1008out = Struct()
1009
1010def startchange():
1011 global hist, out
1012
1013 hist.inenv = []
1014 hist.nodenames = []
1015 hist.cindex = []
1016 hist.inargs = 0
1017 hist.enumeratenesting, hist.itemizenesting = 0, 0
1018
1019 out.doublenodes = []
1020 out.doublecindeces = []
1021
1022
1023spacech = [chunk(PLAIN, 0, ' ')]
1024commach = [chunk(PLAIN, 0, ', ')]
1025cindexch = [chunk(CSLINE, 0, 'cindex')]
1026
1027# the standard variation in symbols for itemize
1028itemizesymbols = ['bullet', 'minus', 'dots']
1029
1030# same for enumerate
1031enumeratesymbols = ['1', 'A', 'a']
1032
1033##
1034## \begin{ {func,data,exc}desc }{name}...
1035## the resulting texi-code is dependent on the contents of indexsubitem
1036##
1037
1038# indexsubitem: `['XXX', 'function']
1039# funcdesc:
1040# deffn {`idxsi`} NAME (FUNCARGS)
1041
1042# indexsubitem: `['XXX', 'method']`
1043# funcdesc:
1044# defmethod {`idxsi[0]`} NAME (FUNCARGS)
1045
1046# indexsubitem: `['in', 'module', 'MODNAME']'
1047# datadesc:
1048# defcv data {`idxsi[1:]`} NAME
1049# excdesc:
1050# defcv exception {`idxsi[1:]`} NAME
1051# funcdesc:
1052# deffn {function of `idxsi[1:]`} NAME (FUNCARGS)
1053
1054# indexsubitem: `['OBJECT', 'attribute']'
1055# datadesc
1056# defcv attribute {`OBJECT`} NAME
1057
1058
1059## this routine will be called on \begin{funcdesc}{NAME}{ARGS}
1060## or \funcline{NAME}{ARGS}
1061##
1062def do_funcdesc(length, buf, pp, i):
1063 startpoint = i-1
1064 ch = pp[startpoint]
1065 wh = ch.where
1066 length, newi = getnextarg(length, buf, pp, i)
1067 funcname = chunk(GROUP, wh, pp[i:newi])
1068 del pp[i:newi]
1069 length = length - (newi-i)
1070 save = hist.inargs
1071 hist.inargs = 1
1072 length, newi = getnextarg(length, buf, pp, i)
1073 hist.inargs = save
1074 del save
1075 the_args = [chunk(PLAIN, wh, '()'[0])] + \
1076 pp[i:newi] + \
1077 [chunk(PLAIN, wh, '()'[1])]
1078 del pp[i:newi]
1079 length = length - (newi-i)
1080
1081 idxsi = hist.indexsubitem # words
1082 command = ''
1083 cat_class = ''
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001084 if idxsi and idxsi[-1] in ('method', 'protocol'):
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001085 command = 'defmethod'
1086 cat_class = string.join(idxsi[:-1])
1087 elif len(idxsi) == 2 and idxsi[1] == 'function':
1088 command = 'deffn'
1089 cat_class = string.join(idxsi)
1090 elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
1091 command = 'deffn'
1092 cat_class = 'function of ' + string.join(idxsi[1:])
1093
1094 if not command:
1095 raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
1096
1097 ch.chtype = chunk_type(CSLINE)
1098 ch.data = command
1099
1100 cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
1101 cslinearg.append(chunk(PLAIN, wh, ' '))
1102 cslinearg.append(funcname)
1103 cslinearg.append(chunk(PLAIN, wh, ' '))
1104 l = len(cslinearg)
1105 cslinearg[l:l] = the_args
1106
1107 pp.insert(i, chunk(GROUP, wh, cslinearg))
1108 i, length = i+1, length+1
1109 hist.command = command
1110 return length, i
1111
1112
1113## this routine will be called on \begin{excdesc}{NAME}
1114## or \excline{NAME}
1115##
1116def do_excdesc(length, buf, pp, i):
1117 startpoint = i-1
1118 ch = pp[startpoint]
1119 wh = ch.where
1120 length, newi = getnextarg(length, buf, pp, i)
1121 excname = chunk(GROUP, wh, pp[i:newi])
1122 del pp[i:newi]
1123 length = length - (newi-i)
1124
1125 idxsi = hist.indexsubitem # words
1126 command = ''
1127 cat_class = ''
1128 class_class = ''
1129 if len(idxsi) == 2 and idxsi[1] == 'exception':
1130 command = 'defvr'
1131 cat_class = string.join(idxsi)
1132 elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
1133 command = 'defcv'
1134 cat_class = 'exception'
1135 class_class = string.join(idxsi[1:])
1136 elif len(idxsi) == 4 and idxsi[:3] == ['exception', 'in', 'module']:
1137 command = 'defcv'
1138 cat_class = 'exception'
1139 class_class = string.join(idxsi[2:])
1140
1141
1142 if not command:
1143 raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
1144
1145 ch.chtype = chunk_type(CSLINE)
1146 ch.data = command
1147
1148 cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
1149 cslinearg.append(chunk(PLAIN, wh, ' '))
1150 if class_class:
1151 cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
1152 cslinearg.append(chunk(PLAIN, wh, ' '))
1153 cslinearg.append(excname)
1154
1155 pp.insert(i, chunk(GROUP, wh, cslinearg))
1156 i, length = i+1, length+1
1157 hist.command = command
1158 return length, i
1159
1160## same for datadesc or dataline...
1161def do_datadesc(length, buf, pp, i):
1162 startpoint = i-1
1163 ch = pp[startpoint]
1164 wh = ch.where
1165 length, newi = getnextarg(length, buf, pp, i)
1166 dataname = chunk(GROUP, wh, pp[i:newi])
1167 del pp[i:newi]
1168 length = length - (newi-i)
1169
1170 idxsi = hist.indexsubitem # words
1171 command = ''
1172 cat_class = ''
1173 class_class = ''
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001174 if idxsi[-1] in ('attribute', 'option'):
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001175 command = 'defcv'
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001176 cat_class = idxsi[-1]
Sjoerd Mullender1cfb6b81994-12-14 15:28:22 +00001177 class_class = string.join(idxsi[:-1])
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001178 elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
1179 command = 'defcv'
1180 cat_class = 'data'
1181 class_class = string.join(idxsi[1:])
1182 elif len(idxsi) == 4 and idxsi[:3] == ['data', 'in', 'module']:
1183 command = 'defcv'
1184 cat_class = 'data'
1185 class_class = string.join(idxsi[2:])
1186
1187
1188 if not command:
1189 raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
1190
1191 ch.chtype = chunk_type(CSLINE)
1192 ch.data = command
1193
1194 cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
1195 cslinearg.append(chunk(PLAIN, wh, ' '))
1196 if class_class:
1197 cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
1198 cslinearg.append(chunk(PLAIN, wh, ' '))
1199 cslinearg.append(dataname)
1200
1201 pp.insert(i, chunk(GROUP, wh, cslinearg))
1202 i, length = i+1, length+1
1203 hist.command = command
1204 return length, i
1205
1206
1207# regular indices: those that are not set in tt font by default....
1208regindices = ('cindex', )
1209
1210# remove illegal characters from node names
1211def rm_commas_etc(text):
1212 result = ''
1213 changed = 0
1214 while 1:
1215 pos = re_commas_etc.search(text)
1216 if pos >= 0:
1217 changed = 1
1218 result = result + text[:pos]
1219 text = text[pos+1:]
1220 else:
1221 result = result + text
1222 break
1223 if changed:
1224 print 'Warning: nodename changhed to ' + `result`
1225
1226 return result
1227
1228# boolean flags
1229flags = {'texi': 1}
1230
1231
1232##
1233## changeit: the actual routine, that changes the contents of the parsed
1234## chunks
1235##
1236
1237def changeit(buf, pp):
1238 global onlylatexspecial, hist, out
1239
1240 i, length = 0, len(pp)
1241 while 1:
1242 # sanity check: length should always equal len(pp)
1243 if len(pp) != length:
1244 raise 'FATAL', 'inconsistent length. thought ' + `length` + ', but should really be ' + `len(pp)`
1245 if i >= length:
1246 break
1247 ch = pp[i]
1248 i = i + 1
1249
1250 if type(ch) == type(''):
1251 #normally, only chunks are present in pp,
1252 # but in some cases, some extra info
1253 # has been inserted, e.g., the \end{...} clauses
1254 raise 'FATAL', 'got string, probably too many ' + `end`
1255
1256 if ch.chtype == chunk_type(GROUP):
1257 # check for {\em ...} constructs
1258 if ch.data and \
1259 ch.data[0].chtype == chunk_type(CSNAME) and \
1260 s(buf, ch.data[0].data) in fontchanges.keys():
1261 k = s(buf, ch.data[0].data)
1262 del ch.data[0]
1263 pp.insert(i-1, chunk(CSNAME, ch.where, fontchanges[k]))
1264 length, i = length+1, i+1
1265
1266 # recursively parse the contents of the group
1267 changeit(buf, ch.data)
1268
1269 elif ch.chtype == chunk_type(IF):
1270 # \if...
1271 flag, negate, data = ch.data
Guido van Rossum7a2dba21993-11-05 14:45:11 +00001272 ##print 'IF: flag, negate = ' + `flag, negate`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001273 if flag not in flags.keys():
1274 raise error, 'unknown flag ' + `flag`
1275
1276 value = flags[flag]
1277 if negate:
1278 value = (not value)
1279 del pp[i-1]
1280 length, i = length-1, i-1
1281 if value:
1282 pp[i:i] = data
1283 length = length + len(data)
1284
1285
1286 elif ch.chtype == chunk_type(ENV):
1287 # \begin{...} ....
1288 envname, data = ch.data
1289
1290 #push this environment name on stack
1291 hist.inenv.insert(0, envname)
1292
1293 #append an endenv chunk after grouped data
1294 data.append(chunk(ENDENV, ch.where, envname))
1295 ##[`data`]
1296
1297 #delete this object
1298 del pp[i-1]
1299 i, length = i-1, length-1
1300
1301 #insert found data
1302 pp[i:i] = data
1303 length = length + len(data)
1304
1305 if envname == 'verbatim':
1306 pp[i:i] = [chunk(CSLINE, ch.where, 'example'), \
1307 chunk(GROUP, ch.where, [])]
1308 length, i = length+2, i+2
1309
1310 elif envname == 'itemize':
1311 if hist.itemizenesting > len(itemizesymbols):
1312 raise error, 'too deep itemize nesting'
1313 ingroupch = [chunk(CSNAME, ch.where,\
1314 itemizesymbols[hist.itemizenesting])]
1315 hist.itemizenesting = hist.itemizenesting + 1
1316 pp[i:i] = [chunk(CSLINE, ch.where, 'itemize'),\
1317 chunk(GROUP, ch.where, ingroupch)]
1318 length, i = length+2, i+2
1319
1320 elif envname == 'enumerate':
1321 if hist.enumeratenesting > len(enumeratesymbols):
1322 raise error, 'too deep enumerate nesting'
1323 ingroupch = [chunk(PLAIN, ch.where,\
1324 enumeratesymbols[hist.enumeratenesting])]
1325 hist.enumeratenesting = hist.enumeratenesting + 1
1326 pp[i:i] = [chunk(CSLINE, ch.where, 'enumerate'),\
1327 chunk(GROUP, ch.where, ingroupch)]
1328 length, i = length+2, i+2
1329
1330 elif envname == 'description':
1331 ingroupch = [chunk(CSNAME, ch.where, 'b')]
1332 pp[i:i] = [chunk(CSLINE, ch.where, 'table'), \
1333 chunk(GROUP, ch.where, ingroupch)]
1334 length, i = length+2, i+2
1335
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001336 elif (envname == 'tableiii') or \
1337 (envname == 'tableii'):
1338 if (envname == 'tableii'):
1339 ltable = 2
1340 else:
1341 ltable = 3
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001342 wh = ch.where
1343 newcode = []
1344
1345 #delete tabular format description
1346 # e.g., {|l|c|l|}
1347 length, newi = getnextarg(length, buf, pp, i)
1348 del pp[i:newi]
1349 length = length - (newi-i)
1350
1351 newcode.append(chunk(CSLINE, wh, 'table'))
1352 ingroupch = [chunk(CSNAME, wh, 'asis')]
1353 newcode.append(chunk(GROUP, wh, ingroupch))
1354 newcode.append(chunk(CSLINE, wh, 'item'))
1355
1356 #get the name of macro for @item
1357 # e.g., {code}
1358 length, newi = getnextarg(length, buf, pp, i)
1359
1360 if newi-i != 1:
1361 raise error, 'Sorry, expected 1 chunk argument'
1362 if pp[i].chtype != chunk_type(PLAIN):
1363 raise error, 'Sorry, expected plain text argument'
1364 hist.itemargmacro = s(buf, pp[i].data)
1365 del pp[i:newi]
1366 length = length - (newi-i)
1367
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001368 itembody = []
1369 for count in range(ltable):
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001370 length, newi = getnextarg(length, buf, pp, i)
1371 emphgroup = [\
1372 chunk(CSNAME, wh, 'emph'), \
1373 chunk(GROUP, 0, pp[i:newi])]
1374 del pp[i:newi]
1375 length = length - (newi-i)
1376 if count == 0:
1377 itemarg = emphgroup
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001378 elif count == ltable-1:
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001379 itembody = itembody + \
1380 [chunk(PLAIN, wh, ' --- ')] + \
1381 emphgroup
1382 else:
1383 itembody = emphgroup
1384 newcode.append(chunk(GROUP, wh, itemarg))
1385 newcode = newcode + itembody + [chunk(DENDLINE, wh, '\n')]
1386 pp[i:i] = newcode
1387 l = len(newcode)
1388 length, i = length+l, i+l
1389 del newcode, l
1390
1391 if length != len(pp):
1392 raise 'STILL, SOMETHING wrong', `i`
1393
1394
1395 elif envname == 'funcdesc':
1396 pp.insert(i, chunk(PLAIN, ch.where, ''))
1397 i, length = i+1, length+1
1398 length, i = do_funcdesc(length, buf, pp, i)
1399
1400 elif envname == 'excdesc':
1401 pp.insert(i, chunk(PLAIN, ch.where, ''))
1402 i, length = i+1, length+1
1403 length, i = do_excdesc(length, buf, pp, i)
1404
1405 elif envname == 'datadesc':
1406 pp.insert(i, chunk(PLAIN, ch.where, ''))
1407 i, length = i+1, length+1
1408 length, i = do_datadesc(length, buf, pp, i)
1409
1410 else:
1411 print 'WARNING: don\'t know what to do with env ' + `envname`
1412
1413 elif ch.chtype == chunk_type(ENDENV):
1414 envname = ch.data
1415 if envname != hist.inenv[0]:
1416 raise error, '\'end\' does not match. Name ' + `envname` + ', expected ' + `hist.inenv[0]`
1417 del hist.inenv[0]
1418 del pp[i-1]
1419 i, length = i-1, length-1
1420
1421 if envname == 'verbatim':
1422 pp[i:i] = [\
1423 chunk(CSLINE, ch.where, 'end'), \
1424 chunk(GROUP, ch.where, [\
1425 chunk(PLAIN, ch.where, 'example')])]
1426 i, length = i+2, length+2
1427 elif envname == 'itemize':
1428 hist.itemizenesting = hist.itemizenesting - 1
1429 pp[i:i] = [\
1430 chunk(CSLINE, ch.where, 'end'), \
1431 chunk(GROUP, ch.where, [\
1432 chunk(PLAIN, ch.where, 'itemize')])]
1433 i, length = i+2, length+2
1434 elif envname == 'enumerate':
1435 hist.enumeratenesting = hist.enumeratenesting-1
1436 pp[i:i] = [\
1437 chunk(CSLINE, ch.where, 'end'), \
1438 chunk(GROUP, ch.where, [\
1439 chunk(PLAIN, ch.where, 'enumerate')])]
1440 i, length = i+2, length+2
1441 elif envname == 'description':
1442 pp[i:i] = [\
1443 chunk(CSLINE, ch.where, 'end'), \
1444 chunk(GROUP, ch.where, [\
1445 chunk(PLAIN, ch.where, 'table')])]
1446 i, length = i+2, length+2
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001447 elif (envname == 'tableiii') or (envname == 'tableii'):
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001448 pp[i:i] = [\
1449 chunk(CSLINE, ch.where, 'end'), \
1450 chunk(GROUP, ch.where, [\
1451 chunk(PLAIN, ch.where, 'table')])]
1452 i, length = i+2, length + 2
1453 pp.insert(i, chunk(DENDLINE, ch.where, '\n'))
1454 i, length = i+1, length+1
1455
1456 elif envname in ('funcdesc', 'excdesc', 'datadesc'):
1457 pp[i:i] = [\
1458 chunk(CSLINE, ch.where, 'end'), \
1459 chunk(GROUP, ch.where, [\
1460 chunk(PLAIN, ch.where, hist.command)])]
1461 i, length = i+2, length+2
1462 else:
1463 print 'WARNING: ending env ' + `envname` + 'has no actions'
1464
1465 elif ch.chtype == chunk_type(CSNAME):
1466 # control name transformations
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001467 if s(buf, ch.data) == 'optional':
1468 pp[i-1].chtype = chunk_type (PLAIN)
1469 pp[i-1].data = '['
1470 if (i < length) and \
1471 (pp[i].chtype == chunk_type(GROUP)):
1472 cp=pp[i].data
1473 pp[i:i+1]=cp + [\
1474 chunk(PLAIN, ch.where, ']')]
1475 length = length+len(cp)
1476 elif s(buf, ch.data) in ignoredcommands:
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001477 del pp[i-1]
1478 i, length = i-1, length-1
1479 elif s(buf, ch.data) == '@' and \
1480 i != length and \
1481 pp[i].chtype == chunk_type(PLAIN) and \
1482 s(buf, pp[i].data)[0] == '.':
1483 # \@. --> \. --> @.
1484 ch.data = '.'
1485 del pp[i]
1486 length = length-1
1487 elif s(buf, ch.data) == '\\':
1488 # \\ --> \* --> @*
1489 ch.data = '*'
1490 elif len(s(buf, ch.data)) == 1 and \
1491 s(buf, ch.data) in onlylatexspecial:
1492 ch.chtype = chunk_type(PLAIN)
1493 # check if such a command is followed by
1494 # an empty group: e.g., `\%{}'. If so, remove
1495 # this empty group too
1496 if i < length and \
1497 pp[i].chtype == chunk_type(GROUP) \
1498 and len(pp[i].data) == 0:
1499 del pp[i]
1500 length = length-1
1501
1502 elif hist.inargs and s(buf, ch.data) in inargsselves:
1503 # This is the special processing of the
1504 # arguments of the \begin{funcdesc}... or
1505 # \funcline... arguments
1506 # \, --> , \[ --> [, \] --> ]
1507 ch.chtype = chunk_type(PLAIN)
1508
1509 elif s(buf, ch.data) == 'renewcommand':
1510 # \renewcommand{\indexsubitem}....
1511 i, length = i-1, length-1
1512 del pp[i]
1513 length, newi = getnextarg(length, buf, pp, i)
1514 if newi-i == 1 \
1515 and i < length \
1516 and pp[i].chtype == chunk_type(CSNAME) \
1517 and s(buf, pp[i].data) == 'indexsubitem':
1518 del pp[i:newi]
1519 length = length - (newi-i)
1520 length, newi = getnextarg(length, buf, pp, i)
1521 text = flattext(buf, pp[i:newi])
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001522 if text[:1] != '(' or text[-1:] != ')':
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001523 raise error, 'expected indexsubitme enclosed in braces'
1524 words = string.split(text[1:-1])
1525 hist.indexsubitem = words
1526 del text, words
1527 else:
1528 print 'WARNING: renewcommand with unsupported arg removed'
1529 del pp[i:newi]
1530 length = length - (newi-i)
1531
1532 elif s(buf, ch.data) == 'item':
1533 ch.chtype = chunk_type(CSLINE)
1534 length, newi = getoptarg(length, buf, pp, i)
1535 ingroupch = pp[i:newi]
1536 del pp[i:newi]
1537 length = length - (newi-i)
1538 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1539 i, length = i+1, length+1
1540
1541 elif s(buf, ch.data) == 'ttindex':
1542 idxsi = hist.indexsubitem
1543
1544 cat_class = ''
1545 if len(idxsi) >= 2 and idxsi[1] in \
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001546 ('method', 'function', 'protocol'):
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001547 command = 'findex'
1548 elif len(idxsi) >= 2 and idxsi[1] in \
1549 ('exception', 'object'):
1550 command = 'vindex'
1551 else:
Guido van Rossum7a2dba21993-11-05 14:45:11 +00001552 print 'WARNING: can\'t categorize ' + `idxsi` + ' for \'ttindex\' command'
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001553 command = 'cindex'
1554
1555 if not cat_class:
1556 cat_class = '('+string.join(idxsi)+')'
1557
1558 ch.chtype = chunk_type(CSLINE)
1559 ch.data = command
1560
1561 length, newi = getnextarg(length, buf, pp, i)
1562 arg = pp[i:newi]
1563 del pp[i:newi]
1564 length = length - (newi-i)
1565
1566 cat_arg = [chunk(PLAIN, ch.where, cat_class)]
1567
1568 # determine what should be set in roman, and
1569 # what in tt-font
1570 if command in regindices:
1571
1572 arg = [chunk(CSNAME, ch.where, 't'), \
1573 chunk(GROUP, ch.where, arg)]
1574 else:
1575 cat_arg = [chunk(CSNAME, ch.where, 'r'), \
1576 chunk(GROUP, ch.where, cat_arg)]
1577
1578 ingroupch = arg + \
1579 [chunk(PLAIN, ch.where, ' ')] + \
1580 cat_arg
1581
1582 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1583 length, i = length+1, i+1
1584
1585
1586 elif s(buf, ch.data) == 'ldots':
1587 # \ldots --> \dots{} --> @dots{}
1588 ch.data = 'dots'
1589 if i == length \
1590 or pp[i].chtype != chunk_type(GROUP) \
1591 or pp[i].data != []:
1592 pp.insert(i, chunk(GROUP, ch.where, []))
1593 i, length = i+1, length+1
1594 elif s(buf, ch.data) in wordsselves:
1595 # \UNIX --> UNIX
1596 ch.chtype = chunk_type(PLAIN)
1597 if i != length \
1598 and pp[i].chtype == chunk_type(GROUP) \
1599 and pp[i].data == []:
1600 del pp[i]
1601 length = length-1
1602 elif s(buf, ch.data) in for_texi:
1603 pass
1604
1605 elif s(buf, ch.data) == 'e':
1606 # \e --> \
1607 ch.data = '\\'
1608 ch.chtype = chunk_type(PLAIN)
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001609 elif (s(buf, ch.data) == 'lineiii') or\
1610 (s(buf, ch.data) == 'lineii'):
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001611 # This is the most tricky one
1612 # \lineiii{a1}{a2}[{a3}] -->
1613 # @item @<cts. of itemargmacro>{a1}
1614 # a2 [ -- a3]
1615 #
1616 ##print 'LINEIIIIII!!!!!!!'
1617## wobj = Wobj().init()
1618## dumpit(buf, wobj.write, pp[i-1:i+5])
1619## print '--->' + wobj.data + '<----'
1620 if not hist.inenv:
1621 raise error, \
1622 'no environment for lineiii'
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001623 if (hist.inenv[0] != 'tableiii') and\
1624 (hist.inenv[0] != 'tableii'):
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001625 raise error, \
1626 'wrong command (' + \
Guido van Rossumb819bdf1995-03-15 11:26:26 +00001627 s(buf, ch.data)+ \
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001628 ') in wrong environment (' \
1629 + `hist.inenv[0]` + ')'
1630 ch.chtype = chunk_type(CSLINE)
1631 ch.data = 'item'
1632 length, newi = getnextarg(length, buf, pp, i)
1633 ingroupch = [chunk(CSNAME, 0, \
1634 hist.itemargmacro), \
1635 chunk(GROUP, 0, pp[i:newi])]
1636 del pp[i:newi]
1637 length = length - (newi-i)
1638## print 'ITEM ARG: --->',
1639## wobj = Wobj().init()
1640## dumpit(buf, wobj.write, ingroupch)
1641## print wobj.data, '<---'
1642 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1643 grouppos = i
1644 i, length = i+1, length+1
1645 length, i = getnextarg(length, buf, pp, i)
1646 length, newi = getnextarg(length, buf, pp, i)
1647 if newi > i:
1648 # we have a 3rd arg
1649 pp.insert(i, chunk(PLAIN, ch.where, ' --- '))
1650 i = newi + 1
1651 length = length + 1
1652## pp[grouppos].data = pp[grouppos].data \
1653## + [chunk(PLAIN, ch.where, ' ')] \
1654## + pp[i:newi]
1655## del pp[i:newi]
1656## length = length - (newi-i)
1657 if length != len(pp):
1658 raise 'IN LINEIII IS THE ERR', `i`
1659
1660 elif s(buf, ch.data) in ('chapter', 'section', 'subsection', 'subsubsection'):
1661 #\xxxsection{A} ---->
1662 # @node A, , ,
1663 # @xxxsection A
1664 ## also: remove commas and quotes
1665 ch.chtype = chunk_type(CSLINE)
1666 length, newi = getnextarg(length, buf, pp, i)
1667 afternodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
1668 if afternodenamecmd < 0:
1669 cp1 = crcopy(pp[i:newi])
1670 pp[i:newi] = [\
1671 chunk(GROUP, ch.where, \
1672 pp[i:newi])]
1673 length, newi = length - (newi-i) + 1, \
1674 i+1
1675 text = flattext(buf, cp1)
1676 text = invent_node_names(text)
1677 else:
1678 length, endarg = getnextarg(length, buf, pp, afternodenamecmd)
1679 cp1 = crcopy(pp[afternodenamecmd:endarg])
1680 del pp[newi:endarg]
1681 length = length - (endarg-newi)
1682
1683 pp[i:newi] = [\
1684 chunk(GROUP, ch.where, \
1685 pp[i:newi])]
1686 length, newi = length - (newi-i) + 1, \
1687 i + 1
1688 text = flattext(buf, cp1)
1689 if text[-1] == '.':
1690 text = text[:-1]
Guido van Rossum7a2dba21993-11-05 14:45:11 +00001691## print 'FLATTEXT:', `text`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001692 if text in hist.nodenames:
1693 print 'WARNING: node name ' + `text` + ' already used'
1694 out.doublenodes.append(text)
1695 else:
1696 hist.nodenames.append(text)
1697 text = rm_commas_etc(text)
1698 pp[i-1:i-1] = [\
1699 chunk(CSLINE, ch.where, 'node'), \
1700 chunk(GROUP, ch.where, [\
1701 chunk(PLAIN, ch.where, text+', , ,')\
1702 ])]
1703 i, length = newi+2, length+2
1704
1705 elif s(buf,ch.data) == 'funcline':
1706 # fold it to a very short environment
1707 pp[i-1:i-1] = [\
1708 chunk(CSLINE, ch.where, 'end'), \
1709 chunk(GROUP, ch.where, [\
1710 chunk(PLAIN, ch.where, hist.command)])]
1711 i, length = i+2, length+2
1712 length, i = do_funcdesc(length, buf, pp, i)
1713
1714 elif s(buf,ch.data) == 'dataline':
1715 pp[i-1:i-1] = [\
1716 chunk(CSLINE, ch.where, 'end'), \
1717 chunk(GROUP, ch.where, [\
1718 chunk(PLAIN, ch.where, hist.command)])]
1719 i, length = i+2, length+2
1720 length, i = do_datadesc(length, buf, pp, i)
1721
1722 elif s(buf,ch.data) == 'excline':
1723 pp[i-1:i-1] = [\
1724 chunk(CSLINE, ch.where, 'end'), \
1725 chunk(GROUP, ch.where, [\
1726 chunk(PLAIN, ch.where, hist.command)])]
1727 i, length = i+2, length+2
1728 length, i = do_excdesc(length, buf, pp, i)
1729
1730
1731 elif s(buf, ch.data) == 'index':
1732 #\index{A} --->
1733 # @cindex A
1734 ch.chtype = chunk_type(CSLINE)
1735 ch.data = 'cindex'
1736 length, newi = getnextarg(length, buf, pp, i)
1737
1738 ingroupch = pp[i:newi]
1739 del pp[i:newi]
1740 length = length - (newi-i)
1741 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1742 length, i = length+1, i+1
1743
1744 elif s(buf, ch.data) == 'bifuncindex':
1745 ch.chtype = chunk_type(CSLINE)
1746 ch.data = 'findex'
1747 length, newi = getnextarg(length, buf, pp, i)
1748 ingroupch = pp[i:newi]
1749 del pp[i:newi]
1750 length = length - (newi-i)
1751
1752 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1753 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1754 ingroupch.append(chunk(GROUP, ch.where, [\
1755 chunk(PLAIN, ch.where, \
1756 '(built-in function)')]))
1757
1758 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1759 length, i = length+1, i+1
1760
1761
Guido van Rossum7760cde1995-03-17 16:03:11 +00001762 elif s(buf, ch.data) == 'obindex':
1763 ch.chtype = chunk_type(CSLINE)
1764 ch.data = 'findex'
1765 length, newi = getnextarg(length, buf, pp, i)
1766 ingroupch = pp[i:newi]
1767 del pp[i:newi]
1768 length = length - (newi-i)
1769
1770 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1771 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1772 ingroupch.append(chunk(GROUP, ch.where, [\
1773 chunk(PLAIN, ch.where, \
1774 '(object)')]))
1775
1776 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1777 length, i = length+1, i+1
1778
1779
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001780 elif s(buf, ch.data) == 'opindex':
1781 ch.chtype = chunk_type(CSLINE)
1782 ch.data = 'findex'
1783 length, newi = getnextarg(length, buf, pp, i)
1784 ingroupch = pp[i:newi]
1785 del pp[i:newi]
1786 length = length - (newi-i)
1787
1788 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1789 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1790 ingroupch.append(chunk(GROUP, ch.where, [\
1791 chunk(PLAIN, ch.where, \
1792 '(operator)')]))
1793
1794 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1795 length, i = length+1, i+1
1796
1797
1798 elif s(buf, ch.data) == 'bimodindex':
1799 ch.chtype = chunk_type(CSLINE)
1800 ch.data = 'pindex'
1801 length, newi = getnextarg(length, buf, pp, i)
1802 ingroupch = pp[i:newi]
1803 del pp[i:newi]
1804 length = length - (newi-i)
1805
1806 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1807 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1808 ingroupch.append(chunk(GROUP, ch.where, [\
1809 chunk(PLAIN, ch.where, \
1810 '(built-in)')]))
1811
1812 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1813 length, i = length+1, i+1
1814
1815 elif s(buf, ch.data) == 'sectcode':
1816 ch.data = 'code'
1817
1818
1819 elif s(buf, ch.data) == 'stmodindex':
1820 ch.chtype = chunk_type(CSLINE)
1821 # use the program index as module index
1822 ch.data = 'pindex'
1823 length, newi = getnextarg(length, buf, pp, i)
1824 ingroupch = pp[i:newi]
1825 del pp[i:newi]
1826 length = length - (newi-i)
1827
1828 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1829 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1830 ingroupch.append(chunk(GROUP, ch.where, [\
1831 chunk(PLAIN, ch.where, \
1832 '(standard)')]))
1833
1834 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1835 length, i = length+1, i+1
1836
1837
1838 elif s(buf, ch.data) == 'stindex':
1839 # XXX must actually go to newindex st
1840 wh = ch.where
1841 ch.chtype = chunk_type(CSLINE)
1842 ch.data = 'cindex'
1843 length, newi = getnextarg(length, buf, pp, i)
1844 ingroupch = [chunk(CSNAME, wh, 'code'), \
1845 chunk(GROUP, wh, pp[i:newi])]
1846
1847 del pp[i:newi]
1848 length = length - (newi-i)
1849
1850 t = ingroupch[:]
1851 t.append(chunk(PLAIN, wh, ' statement'))
1852
1853 pp.insert(i, chunk(GROUP, wh, t))
1854 i, length = i+1, length+1
1855
1856 pp.insert(i, chunk(CSLINE, wh, 'cindex'))
1857 i, length = i+1, length+1
1858
1859 t = ingroupch[:]
1860 t.insert(0, chunk(PLAIN, wh, 'statement, '))
1861
1862 pp.insert(i, chunk(GROUP, wh, t))
1863 i, length = i+1, length+1
1864
1865
1866 elif s(buf, ch.data) == 'indexii':
1867 #\indexii{A}{B} --->
1868 # @cindex A B
1869 # @cindex B, A
1870 length, newi = getnextarg(length, buf, pp, i)
1871 cp11 = pp[i:newi]
1872 cp21 = crcopy(pp[i:newi])
1873 del pp[i:newi]
1874 length = length - (newi-i)
1875 length, newi = getnextarg(length, buf, pp, i)
1876 cp12 = pp[i:newi]
1877 cp22 = crcopy(pp[i:newi])
1878 del pp[i:newi]
1879 length = length - (newi-i)
1880
1881 ch.chtype = chunk_type(CSLINE)
1882 ch.data = 'cindex'
1883 pp.insert(i, chunk(GROUP, ch.where, cp11 + [\
1884 chunk(PLAIN, ch.where, ' ')] + cp12))
1885 i, length = i+1, length+1
1886 pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
1887 chunk(GROUP, ch.where, cp22 + [\
1888 chunk(PLAIN, ch.where, ', ')]+ cp21)]
1889 i, length = i+2, length+2
1890
1891 elif s(buf, ch.data) == 'indexiii':
1892 length, newi = getnextarg(length, buf, pp, i)
1893 cp11 = pp[i:newi]
1894 cp21 = crcopy(pp[i:newi])
1895 cp31 = crcopy(pp[i:newi])
1896 del pp[i:newi]
1897 length = length - (newi-i)
1898 length, newi = getnextarg(length, buf, pp, i)
1899 cp12 = pp[i:newi]
1900 cp22 = crcopy(pp[i:newi])
1901 cp32 = crcopy(pp[i:newi])
1902 del pp[i:newi]
1903 length = length - (newi-i)
1904 length, newi = getnextarg(length, buf, pp, i)
1905 cp13 = pp[i:newi]
1906 cp23 = crcopy(pp[i:newi])
1907 cp33 = crcopy(pp[i:newi])
1908 del pp[i:newi]
1909 length = length - (newi-i)
1910
1911 ch.chtype = chunk_type(CSLINE)
1912 ch.data = 'cindex'
1913 pp.insert(i, chunk(GROUP, ch.where, cp11 + [\
1914 chunk(PLAIN, ch.where, ' ')] + cp12 \
1915 + [chunk(PLAIN, ch.where, ' ')] \
1916 + cp13))
1917 i, length = i+1, length+1
1918 pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
1919 chunk(GROUP, ch.where, cp22 + [\
1920 chunk(PLAIN, ch.where, ' ')]+ cp23\
1921 + [chunk(PLAIN, ch.where, ', ')] +\
1922 cp21)]
1923 i, length = i+2, length+2
1924 pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
1925 chunk(GROUP, ch.where, cp33 + [\
1926 chunk(PLAIN, ch.where, ', ')]+ cp31\
1927 + [chunk(PLAIN, ch.where, ' ')] +\
1928 cp32)]
1929 i, length = i+2, length+2
1930
1931
1932 elif s(buf, ch.data) == 'indexiv':
1933 length, newi = getnextarg(length, buf, pp, i)
1934 cp11 = pp[i:newi]
1935 cp21 = crcopy(pp[i:newi])
1936 cp31 = crcopy(pp[i:newi])
1937 cp41 = crcopy(pp[i:newi])
1938 del pp[i:newi]
1939 length = length - (newi-i)
1940 length, newi = getnextarg(length, buf, pp, i)
1941 cp12 = pp[i:newi]
1942 cp22 = crcopy(pp[i:newi])
1943 cp32 = crcopy(pp[i:newi])
1944 cp42 = crcopy(pp[i:newi])
1945 del pp[i:newi]
1946 length = length - (newi-i)
1947 length, newi = getnextarg(length, buf, pp, i)
1948 cp13 = pp[i:newi]
1949 cp23 = crcopy(pp[i:newi])
1950 cp33 = crcopy(pp[i:newi])
1951 cp43 = crcopy(pp[i:newi])
1952 del pp[i:newi]
1953 length = length - (newi-i)
1954 length, newi = getnextarg(length, buf, pp, i)
1955 cp14 = pp[i:newi]
1956 cp24 = crcopy(pp[i:newi])
1957 cp34 = crcopy(pp[i:newi])
1958 cp44 = crcopy(pp[i:newi])
1959 del pp[i:newi]
1960 length = length - (newi-i)
1961
1962 ch.chtype = chunk_type(CSLINE)
1963 ch.data = 'cindex'
1964 ingroupch = cp11 + \
1965 spacech + cp12 + \
1966 spacech + cp13 + \
1967 spacech + cp14
1968 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1969 i, length = i+1, length+1
1970 ingroupch = cp22 + \
1971 spacech + cp23 + \
1972 spacech + cp24 + \
1973 commach + cp21
1974 pp[i:i] = cindexch + [\
1975 chunk(GROUP, ch.where, ingroupch)]
1976 i, length = i+2, length+2
1977 ingroupch = cp33 + \
1978 spacech + cp34 + \
1979 commach + cp31 + \
1980 spacech + cp32
1981 pp[i:i] = cindexch + [\
1982 chunk(GROUP, ch.where, ingroupch)]
1983 i, length = i+2, length+2
1984 ingroupch = cp44 + \
1985 commach + cp41 + \
1986 spacech + cp42 + \
1987 spacech + cp43
1988 pp[i:i] = cindexch + [\
1989 chunk(GROUP, ch.where, ingroupch)]
1990 i, length = i+2, length+2
1991
1992
1993
1994 else:
1995 print 'don\'t know what to do with keyword ' + `s(buf, ch.data)`
1996
1997
1998
1999re_atsign = regex.compile('[@{}]')
2000re_newline = regex.compile('\n')
2001
2002def dumpit(buf, wm, pp):
2003
2004 global out
2005
2006 i, length = 0, len(pp)
2007
2008 addspace = 0
2009
2010 while 1:
2011 if len(pp) != length:
2012 raise 'FATAL', 'inconsistent length'
2013 if i == length:
2014 break
2015 ch = pp[i]
2016 i = i + 1
2017
2018 if addspace:
2019 dospace = 1
2020 addspace = 0
2021 else:
2022 dospace = 0
2023
2024 if ch.chtype == chunk_type(CSNAME):
2025 wm('@' + s(buf, ch.data))
2026 if s(buf, ch.data) == 'node' and \
2027 pp[i].chtype == chunk_type(PLAIN) and \
2028 s(buf, pp[i].data) in out.doublenodes:
2029 ##XXX doesnt work yet??
2030 wm(' ZZZ-' + zfill(`i`, 4))
2031 if s(buf, ch.data)[0] in string.letters:
2032 addspace = 1
2033 elif ch.chtype == chunk_type(PLAIN):
2034 if dospace and s(buf, ch.data) not in (' ', '\t'):
2035 wm(' ')
2036 text = s(buf, ch.data)
2037 while 1:
2038 pos = re_atsign.search(text)
2039 if pos < 0:
2040 break
2041 wm(text[:pos] + '@' + text[pos])
2042 text = text[pos+1:]
2043 wm(text)
2044 elif ch.chtype == chunk_type(GROUP):
2045 wm('{')
2046 dumpit(buf, wm, ch.data)
2047 wm('}')
2048 elif ch.chtype == chunk_type(DENDLINE):
2049 wm('\n\n')
2050 while i != length and pp[i].chtype in \
2051 (chunk_type(DENDLINE), chunk_type(ENDLINE)):
2052 i = i + 1
2053 elif ch.chtype == chunk_type(OTHER):
2054 wm(s(buf, ch.data))
2055 elif ch.chtype == chunk_type(ACTIVE):
2056 wm(s(buf, ch.data))
2057 elif ch.chtype == chunk_type(ENDLINE):
2058 wm('\n')
2059 elif ch.chtype == chunk_type(CSLINE):
2060 if i >= 2 and pp[i-2].chtype not in \
2061 (chunk_type(ENDLINE), chunk_type(DENDLINE)) \
2062 and (pp[i-2].chtype != chunk_type(PLAIN) \
2063 or s(buf, pp[i-2].data)[-1] != '\n'):
2064
2065 wm('\n')
2066 wm('@' + s(buf, ch.data))
2067 if i == length:
2068 raise error, 'CSLINE expected another chunk'
2069 if pp[i].chtype != chunk_type(GROUP):
2070 raise error, 'CSLINE expected GROUP'
2071 if type(pp[i].data) != type([]):
2072 raise error, 'GROUP chould contain []-data'
2073
2074 wobj = Wobj().init()
2075 dumpit(buf, wobj.write, pp[i].data)
2076 i = i + 1
2077 text = wobj.data
2078 del wobj
2079 if text:
2080 wm(' ')
2081 while 1:
2082 pos = re_newline.search(text)
2083 if pos < 0:
2084 break
2085 print 'WARNING: found newline in csline arg'
2086 wm(text[:pos] + ' ')
2087 text = text[pos+1:]
2088 wm(text)
2089 if i >= length or \
2090 pp[i].chtype not in (chunk_type(CSLINE), \
2091 chunk_type(ENDLINE), chunk_type(DENDLINE)) \
2092 and (pp[i].chtype != chunk_type(PLAIN) \
2093 or s(buf, pp[i].data)[0] != '\n'):
2094 wm('\n')
2095
2096 elif ch.chtype == chunk_type(COMMENT):
Guido van Rossum7a2dba21993-11-05 14:45:11 +00002097## print 'COMMENT: previous chunk =', pp[i-2]
Guido van Rossum557ed941995-03-28 13:33:45 +00002098## if pp[i-2].chtype == chunk_type(PLAIN):
2099## print 'PLAINTEXT =', `s(buf, pp[i-2].data)`
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002100 if s(buf, ch.data) and \
2101 regex.match('^[ \t]*$', s(buf, ch.data)) < 0:
2102 if i >= 2 and pp[i-2].chtype not in \
2103 (chunk_type(ENDLINE), chunk_type(DENDLINE)) \
2104 and not (pp[i-2].chtype == chunk_type(PLAIN) \
2105 and regex.match('\\(.\\|\n\\)*[ \t]*\n$', s(buf, pp[i-2].data)) >= 0):
2106 print 'ADDING NEWLINE'
2107 wm('\n')
2108 wm('@c ' + s(buf, ch.data))
2109 elif ch.chtype == chunk_type(IGNORE):
2110 pass
2111 else:
2112 try:
2113 str = `s(buf, ch.data)`
2114 except TypeError:
2115 str = `ch.data`
2116 if len(str) > 400:
2117 str = str[:400] + '...'
2118 print 'warning:', ch.chtype, 'not handled, data ' + str
2119
2120
2121
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002122def main():
Guido van Rossum7a2dba21993-11-05 14:45:11 +00002123 outfile = None
2124 headerfile = 'texipre.dat'
2125 trailerfile = 'texipost.dat'
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002126
Guido van Rossum7a2dba21993-11-05 14:45:11 +00002127 try:
2128 opts, args = getopt.getopt(sys.argv[1:], 'o:h:t:')
2129 except getopt.error:
2130 args = []
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002131
Guido van Rossum7a2dba21993-11-05 14:45:11 +00002132 if not args:
2133 print 'usage: partparse [-o outfile] [-h headerfile]',
2134 print '[-t trailerfile] file ...'
2135 sys.exit(2)
2136
2137 for opt, arg in opts:
2138 if opt == '-o': outfile = arg
2139 if opt == '-h': headerfile = arg
2140 if opt == '-t': trailerfile = arg
2141
2142 if not outfile:
2143 root, ext = os.path.splitext(args[0])
2144 outfile = root + '.texi'
2145
2146 if outfile in args:
2147 print 'will not overwrite input file', outfile
2148 sys.exit(2)
2149
2150 outf = open(outfile, 'w')
2151 outf.write(open(headerfile, 'r').read())
2152
2153 for file in args:
2154 if len(args) > 1: print '='*20, file, '='*20
2155 buf = open(file, 'r').read()
2156 w, pp = parseit(buf)
2157 startchange()
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002158 changeit(buf, pp)
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002159 dumpit(buf, outf.write, pp)
2160
Guido van Rossum7a2dba21993-11-05 14:45:11 +00002161 outf.write(open(trailerfile, 'r').read())
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002162
Guido van Rossum7a2dba21993-11-05 14:45:11 +00002163 outf.close()
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00002164
Guido van Rossum7a2dba21993-11-05 14:45:11 +00002165main()