blob: 83b58bd89d7da1942f910845e78fd30b12371c70 [file] [log] [blame]
Guido van Rossum95cd2ef1992-12-08 14:37:55 +00001#
2# partparse.py: parse a by-Guido-written-and-by-Jan-Hein-edited LaTeX file,
3# and generate texinfo source.
4#
5# This is *not* a good example of good programming practices. In fact, this
6# file could use a complete rewrite, in order to become faster, more
7# easy extensible and maintainable.
8#
9# However, I added some comments on a few places for the pityful person who
10# would ever need to take a look into this file.
11#
12# Have I been clear enough??
13#
14# -jh
15
16
17import sys, string, regex
18
19# Different parse modes for phase 1
20MODE_REGULAR = 0
21MODE_VERBATIM = 1
22MODE_CS_SCAN = 2
23MODE_COMMENT = 3
24MODE_MATH = 4
25MODE_DMATH = 5
26MODE_GOBBLEWHITE = 6
27
28the_modes = MODE_REGULAR, MODE_VERBATIM, MODE_CS_SCAN, MODE_COMMENT, \
29 MODE_MATH, MODE_DMATH, MODE_GOBBLEWHITE
30
31# Show the neighbourhood of the scanned buffer
32def epsilon(buf, where):
33 wmt, wpt = where - 10, where + 10
34 if wmt < 0:
35 wmt = 0
36 if wpt > len(buf):
37 wpt = len(buf)
38 return ' Context ' + `buf[wmt:where]` + '.' + `buf[where:wpt]` + '.'
39
40# Should return the line number. never worked
41def lin():
42 global lineno
43 return ' Line ' + `lineno` + '.'
44
45# Displays the recursion level.
46def lv(lvl):
47 return ' Level ' + `lvl` + '.'
48
49# Combine the three previous functions. Used often.
50def lle(lvl, buf, where):
51 return lv(lvl) + lin() + epsilon(buf, where)
52
53
54# This class is only needed for _symbolic_ representation of the parse mode.
55class Mode:
56 def init(self, arg):
57 if arg not in the_modes:
58 raise ValueError, 'mode not in the_modes'
59 self.mode = arg
60 return self
61
62 def __cmp__(self, other):
63 if type(self) != type(other):
64 other = mode(other)
65 return cmp(self.mode, other.mode)
66
67 def __repr__(self):
68 if self.mode == MODE_REGULAR:
69 return 'MODE_REGULAR'
70 elif self.mode == MODE_VERBATIM:
71 return 'MODE_VERBATIM'
72 elif self.mode == MODE_CS_SCAN:
73 return 'MODE_CS_SCAN'
74 elif self.mode == MODE_COMMENT:
75 return 'MODE_COMMENT'
76 elif self.mode == MODE_MATH:
77 return 'MODE_MATH'
78 elif self.mode == MODE_DMATH:
79 return 'MODE_DMATH'
80 elif self.mode == MODE_GOBBLEWHITE:
81 return 'MODE_GOBBLEWHITE'
82 else:
83 raise ValueError, 'mode not in the_modes'
84
85# just a wrapper around a class initialisation
86def mode(arg):
87 return Mode().init(arg)
88
89
90# After phase 1, the text consists of chunks, with a certain type
91# this type will be assigned to the chtype member of the chunk
92# the where-field contains the file position where this is found
93# and the data field contains (1): a tuple describing start- end end
94# positions of the substring (can be used as slice for the buf-variable),
95# (2) just a string, mostly generated by the changeit routine,
96# or (3) a list, describing a (recursive) subgroup of chunks
97PLAIN = 0 # ASSUME PLAINTEXT, data = the text
98GROUP = 1 # GROUP ({}), data = [chunk, chunk,..]
99CSNAME = 2 # CONTROL SEQ TOKEN, data = the command
100COMMENT = 3 # data is the actual comment
101DMATH = 4 # DISPLAYMATH, data = [chunk, chunk,..]
102MATH = 5 # MATH, see DISPLAYMATH
103OTHER = 6 # CHAR WITH CATCODE OTHER, data = char
104ACTIVE = 7 # ACTIVE CHAR
105GOBBLEDWHITE = 8 # Gobbled LWSP, after CSNAME
106ENDLINE = 9 # END-OF-LINE, data = '\n'
107DENDLINE = 10 # DOUBLE EOL, data='\n', indicates \par
108ENV = 11 # LaTeX-environment
109 # data =(envname,[ch,ch,ch,.])
110CSLINE = 12 # for texi: next chunk will be one group
111 # of args. Will be set all on 1 line
112IGNORE = 13 # IGNORE this data
113ENDENV = 14 # TEMP END OF GROUP INDICATOR
114IF = 15 # IF-directive
115 # data = (flag,negate,[ch, ch, ch,...])
116the_types = PLAIN, GROUP, CSNAME, COMMENT, DMATH, MATH, OTHER, ACTIVE, \
117 GOBBLEDWHITE, ENDLINE, DENDLINE, ENV, CSLINE, IGNORE, ENDENV, IF
118
119# class, just to display symbolic name
120class ChunkType:
121 def init(self, chunk_type):
122 if chunk_type not in the_types:
123 raise 'ValueError', 'chunk_type not in the_types'
124 self.chunk_type = chunk_type
125 return self
126
127 def __cmp__(self, other):
128 if type(self) != type(other):
129 other = chunk_type(other)
130 return cmp(self.chunk_type, other.chunk_type)
131
132 def __repr__(self):
133 if self.chunk_type == PLAIN:
134 return 'PLAIN'
135 elif self.chunk_type == GROUP:
136 return 'GROUP'
137 elif self.chunk_type == CSNAME:
138 return 'CSNAME'
139 elif self.chunk_type == COMMENT:
140 return 'COMMENT'
141 elif self.chunk_type == DMATH:
142 return 'DMATH'
143 elif self.chunk_type == MATH:
144 return 'MATH'
145 elif self.chunk_type == OTHER:
146 return 'OTHER'
147 elif self.chunk_type == ACTIVE:
148 return 'ACTIVE'
149 elif self.chunk_type == GOBBLEDWHITE:
150 return 'GOBBLEDWHITE'
151 elif self.chunk_type == DENDLINE:
152 return 'DENDLINE'
153 elif self.chunk_type == ENDLINE:
154 return 'ENDLINE'
155 elif self.chunk_type == ENV:
156 return 'ENV'
157 elif self.chunk_type == CSLINE:
158 return 'CSLINE'
159 elif self.chunk_type == IGNORE:
160 return 'IGNORE'
161 elif self.chunk_type == ENDENV:
162 return 'ENDENV'
163 elif self.chunk_type == IF:
164 return 'IF'
165 else:
166 raise ValueError, 'chunk_type not in the_types'
167
168# ...and the wrapper
169def chunk_type(type):
170 return ChunkType().init(type)
171
172# store a type object of the ChunkType-class-instance...
173chunk_type_type = type(chunk_type(0))
174
175# this class contains a part of the parsed buffer
176class Chunk:
177 def init(self, chtype, where, data):
178 if type(chtype) != chunk_type_type:
179 chtype = chunk_type(chtype)
180 self.chtype = chtype
181 if type(where) != type(0):
182 raise TypeError, '\'where\' is not a number'
183 self.where = where
184 self.data = data
185 ##print 'CHUNK', self
186 return self
187
188 def __repr__(self):
189 return 'chunk' + `self.chtype, self.where, self.data`
190
191# and the wrapper
192def chunk(chtype, where, data):
193 return Chunk().init(chtype, where, data)
194
195
196
197error = 'partparse.error'
198
199#
200# TeX's catcodes...
201#
202CC_ESCAPE = 0
203CC_LBRACE = 1
204CC_RBRACE = 2
205CC_MATHSHIFT = 3
206CC_ALIGNMENT = 4
207CC_ENDLINE = 5
208CC_PARAMETER = 6
209CC_SUPERSCRIPT = 7
210CC_SUBSCRIPT = 8
211CC_IGNORE = 9
212CC_WHITE = 10
213CC_LETTER = 11
214CC_OTHER = 12
215CC_ACTIVE = 13
216CC_COMMENT = 14
217CC_INVALID = 15
218
219# and the names
220cc_names = [\
221 'CC_ESCAPE', \
222 'CC_LBRACE', \
223 'CC_RBRACE', \
224 'CC_MATHSHIFT', \
225 'CC_ALIGNMENT', \
226 'CC_ENDLINE', \
227 'CC_PARAMETER', \
228 'CC_SUPERSCRIPT', \
229 'CC_SUBSCRIPT', \
230 'CC_IGNORE', \
231 'CC_WHITE', \
232 'CC_LETTER', \
233 'CC_OTHER', \
234 'CC_ACTIVE', \
235 'CC_COMMENT', \
236 'CC_INVALID', \
237 ]
238
239# Show a list of catcode-name-symbols
240def pcl(codelist):
241 result = ''
242 for i in codelist:
243 result = result + cc_names[i] + ', '
244 return '[' + result[:-2] + ']'
245
246# the name of the catcode (ACTIVE, OTHER, etc.)
247def pc(code):
248 return cc_names[code]
249
250
251# Which catcodes make the parser stop parsing regular plaintext
252regular_stopcodes = [CC_ESCAPE, CC_LBRACE, CC_RBRACE, CC_MATHSHIFT, \
253 CC_ALIGNMENT, CC_PARAMETER, CC_SUPERSCRIPT, CC_SUBSCRIPT, \
254 CC_IGNORE, CC_ACTIVE, CC_COMMENT, CC_INVALID, CC_ENDLINE]
255
256# same for scanning a control sequence name
257csname_scancodes = [CC_LETTER]
258
259# same for gobbling LWSP
260white_scancodes = [CC_WHITE]
261##white_scancodes = [CC_WHITE, CC_ENDLINE]
262
263# make a list of all catcode id's, except for catcode ``other''
264all_but_other_codes = range(16)
265del all_but_other_codes[CC_OTHER]
266##print all_but_other_codes
267
268# when does a comment end
269comment_stopcodes = [CC_ENDLINE]
270
271# gather all characters together, specified by a list of catcodes
272def code2string(cc, codelist):
273 print 'code2string: codelist = ' + pcl(codelist),
274 result = ''
275 for catagory in codelist:
276 if cc[catagory]:
277 result = result + cc[catagory]
278 print 'result = ' + `result`
279 return result
280
281# automatically generate all characters of catcode other, being the
282# complement set in the ASCII range (128 characters)
283def make_other_codes(cc):
284 otherchars = range(128) # could be made 256, no problem
285 for catagory in all_but_other_codes:
286 if cc[catagory]:
287 for c in cc[catagory]:
288 otherchars[ord(c)] = None
289 result = ''
290 for i in otherchars:
291 if i != None:
292 result = result + chr(i)
293 return result
294
295# catcode dump (which characters have which catcodes).
296def dump_cc(name, cc):
297 print '\t' + name
298 print '=' * (8+len(name))
299 if len(cc) != 16:
300 raise TypeError, 'cc not good cat class'
301 for i in range(16):
302 print pc(i) + '\t' + `cc[i]`
303
304
305# In the beginning,....
306epoch_cc = [None] * 16
307##dump_cc('epoch_cc', epoch_cc)
308
309
310# INITEX
311initex_cc = epoch_cc[:]
312initex_cc[CC_ESCAPE] = '\\'
313initex_cc[CC_ENDLINE], initex_cc[CC_IGNORE], initex_cc[CC_WHITE] = \
314 '\n', '\0', ' '
315initex_cc[CC_LETTER] = string.uppercase + string.lowercase
316initex_cc[CC_COMMENT], initex_cc[CC_INVALID] = '%', '\x7F'
317#initex_cc[CC_OTHER] = make_other_codes(initex_cc) I don't need them, anyway
318##dump_cc('initex_cc', initex_cc)
319
320
321# LPLAIN: LaTeX catcode setting (see lplain.tex)
322lplain_cc = initex_cc[:]
323lplain_cc[CC_LBRACE], lplain_cc[CC_RBRACE] = '{', '}'
324lplain_cc[CC_MATHSHIFT] = '$'
325lplain_cc[CC_ALIGNMENT] = '&'
326lplain_cc[CC_PARAMETER] = '#'
327lplain_cc[CC_SUPERSCRIPT] = '^\x0B' # '^' and C-k
328lplain_cc[CC_SUBSCRIPT] = '_\x01' # '_' and C-a
329lplain_cc[CC_WHITE] = lplain_cc[CC_WHITE] + '\t'
330lplain_cc[CC_ACTIVE] = '~\x0C' # '~' and C-l
331lplain_cc[CC_OTHER] = make_other_codes(lplain_cc)
332##dump_cc('lplain_cc', lplain_cc)
333
334
335# Guido's LaTeX environment catcoded '_' as ``other''
336# my own purpose catlist
337my_cc = lplain_cc[:]
338my_cc[CC_SUBSCRIPT] = my_cc[CC_SUBSCRIPT][1:] # remove '_' here
339my_cc[CC_OTHER] = my_cc[CC_OTHER] + '_' # add it to OTHER list
340dump_cc('my_cc', my_cc)
341
342
343
344# needed for un_re, my equivalent for regexp-quote in Emacs
345re_meaning = '\\[]^$'
346
347def un_re(str):
348 result = ''
349 for i in str:
350 if i in re_meaning:
351 result = result + '\\'
352 result = result + i
353 return result
354
355# NOTE the negate ('^') operator in *some* of the regexps below
356def make_rc_regular(cc):
357 # problems here if '[]' are included!!
358 return regex.compile('[' + code2string(cc, regular_stopcodes) + ']')
359
360def make_rc_cs_scan(cc):
361 return regex.compile('[^' + code2string(cc, csname_scancodes) + ']')
362
363def make_rc_comment(cc):
364 return regex.compile('[' + code2string(cc, comment_stopcodes) + ']')
365
366def make_rc_endwhite(cc):
367 return regex.compile('[^' + code2string(cc, white_scancodes) + ']')
368
369
370
371# regular: normal mode:
372rc_regular = make_rc_regular(my_cc)
373
374# scan: scan a command sequence e.g. `newlength' or `mbox' or `;', `,' or `$'
375rc_cs_scan = make_rc_cs_scan(my_cc)
376rc_comment = make_rc_comment(my_cc)
377rc_endwhite = make_rc_endwhite(my_cc)
378
379
380# parseit (BUF, PARSEMODE=mode(MODE_REGULAR), START=0, RECURSION-LEVEL=0)
381# RECURSION-LEVEL will is incremented on entry.
382# result contains the list of chunks returned
383# together with this list, the buffer position is returned
384
385# RECURSION-LEVEL will be set to zero *again*, when recursively a
386# {,D}MATH-mode scan has been enetered.
387# This has been done in order to better check for environment-mismatches
388
389def parseit(buf, *rest):
390 global lineno
391
392 if len(rest) == 3:
393 parsemode, start, lvl = rest
394 elif len(rest) == 2:
395 parsemode, start, lvl = rest + (0, )
396 elif len(rest) == 1:
397 parsemode, start, lvl = rest + (0, 0)
398 elif len(rest) == 0:
399 parsemode, start, lvl = mode(MODE_REGULAR), 0, 0
400 else:
401 raise TypeError, 'usage: parseit(buf[, parsemode[, start[, level]]])'
402 result = []
403 end = len(buf)
404 if lvl == 0 and parsemode == mode(MODE_REGULAR):
405 lineno = 1
406 lvl = lvl + 1
407
408 ##print 'parseit(' + epsilon(buf, start) + ', ' + `parsemode` + ', ' + `start` + ', ' + `lvl` + ')'
409
410 #
411 # some of the more regular modes...
412 #
413
414 if parsemode in (mode(MODE_REGULAR), mode(MODE_DMATH), mode(MODE_MATH)):
415 cstate = []
416 newpos = start
417 curpmode = parsemode
418 while 1:
419 where = newpos
420 #print '\tnew round: ' + epsilon(buf, where)
421 if where == end:
422 if lvl > 1 or curpmode != mode(MODE_REGULAR):
423 # not the way we started...
424 raise EOFError, 'premature end of file.' + lle(lvl, buf, where)
425 # the real ending of lvl-1 parse
426 return end, result
427
428 pos = rc_regular.search(buf, where)
429
430 if pos < 0:
431 pos = end
432
433 if pos != where:
434 newpos, c = pos, chunk(PLAIN, where, (where, pos))
435 result.append(c)
436 continue
437
438
439 #
440 # ok, pos == where and pos != end
441 #
442 foundchar = buf[where]
443 if foundchar in my_cc[CC_LBRACE]:
444 # recursive subgroup parse...
445 newpos, data = parseit(buf, curpmode, where+1, lvl)
446 result.append(chunk(GROUP, where, data))
447
448 elif foundchar in my_cc[CC_RBRACE]:
449 if lvl <= 1:
450 raise error, 'ENDGROUP while in base level.' + lle(lvl, buf, where)
451 if lvl == 1 and mode != mode(MODE_REGULAR):
452 raise error, 'endgroup while in math mode. +lin() + epsilon(buf, where)'
453 return where + 1, result
454
455 elif foundchar in my_cc[CC_ESCAPE]:
456 #
457 # call the routine that actually deals with
458 # this problem. If do_ret is None, than
459 # return the value of do_ret
460 #
461 # Note that handle_cs might call this routine
462 # recursively again...
463 #
464 do_ret, newpos = handlecs(buf, where, \
465 curpmode, lvl, result, end)
466 if do_ret != None:
467 return do_ret
468
469 elif foundchar in my_cc[CC_COMMENT]:
470 newpos, data = parseit(buf, \
471 mode(MODE_COMMENT), where+1, lvl)
472 result.append(chunk(COMMENT, where, data))
473
474 elif foundchar in my_cc[CC_MATHSHIFT]:
475 # note that recursive calls to math-mode
476 # scanning are called with recursion-level 0
477 # again, in order to check for bad mathend
478 #
479 if where + 1 != end and \
480 buf[where + 1] in \
481 my_cc[CC_MATHSHIFT]:
482 #
483 # double mathshift, e.g. '$$'
484 #
485 if curpmode == mode(MODE_REGULAR):
486 newpos, data = parseit(buf, \
487 mode(MODE_DMATH), \
488 where+2, 0)
489 result.append(chunk(DMATH, \
490 where, data))
491 elif curpmode == mode(MODE_MATH):
492 raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
493 elif lvl != 1:
494 raise error, 'bad mathend.' + \
495 lle(lvl, buf, where)
496 else:
497 return where + 2, result
498 else:
499 #
500 # single math shift, e.g. '$'
501 #
502 if curpmode == mode(MODE_REGULAR):
503 newpos, data = parseit(buf, \
504 mode(MODE_MATH), \
505 where+1, 0)
506 result.append(chunk(MATH, \
507 where, data))
508 elif curpmode == mode(MODE_DMATH):
509 raise error, 'wrong math delimiiter' + lin() + epsilon(buf, where)
510 elif lvl != 1:
511 raise error, 'bad mathend.' + \
512 lv(lvl, buf, where)
513 else:
514 return where + 1, result
515
516 elif foundchar in my_cc[CC_IGNORE]:
517 print 'warning: ignored char', `foundchar`
518 newpos = where + 1
519
520 elif foundchar in my_cc[CC_ACTIVE]:
521 result.append(chunk(ACTIVE, where, foundchar))
522 newpos = where + 1
523
524 elif foundchar in my_cc[CC_INVALID]:
525 raise error, 'invalid char ' + `foundchar`
526 newpos = where + 1
527
528 elif foundchar in my_cc[CC_ENDLINE]:
529 #
530 # after an end of line, eat the rest of
531 # whitespace on the beginning of the next line
532 # this is what LaTeX more or less does
533 #
534 # also, try to indicate double newlines (\par)
535 #
536 lineno = lineno + 1
537 savedwhere = where
538 newpos, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), where + 1, lvl)
539 if newpos != end and buf[newpos] in \
540 my_cc[CC_ENDLINE]:
541 result.append(chunk(DENDLINE, \
542 savedwhere, foundchar))
543 else:
544 result.append(chunk(ENDLINE, \
545 savedwhere, foundchar))
546 else:
547 result.append(chunk(OTHER, where, foundchar))
548 newpos = where + 1
549
550 elif parsemode == mode(MODE_CS_SCAN):
551 #
552 # scan for a control sequence token. `\ape', `\nut' or `\%'
553 #
554 if start == end:
555 raise EOFError, 'can\'t find end of csname'
556 pos = rc_cs_scan.search(buf, start)
557 if pos < 0:
558 pos = end
559 if pos == start:
560 # first non-letter right where we started the search
561 # ---> the control sequence name consists of one single
562 # character. Also: don't eat white space...
563 if buf[pos] in my_cc[CC_ENDLINE]:
564 lineno = lineno + 1
565 pos = pos + 1
566 return pos, (start, pos)
567 else:
568 spos = pos
569 if buf[pos] == '\n':
570 lineno = lineno + 1
571 spos = pos + 1
572 pos2, dummy = parseit(buf, \
573 mode(MODE_GOBBLEWHITE), spos, lvl)
574 return pos2, (start, pos)
575
576 elif parsemode == mode(MODE_GOBBLEWHITE):
577 if start == end:
578 return start, ''
579 pos = rc_endwhite.search(buf, start)
580 if pos < 0:
581 pos = start
582 return pos, (start, pos)
583
584 elif parsemode == mode(MODE_COMMENT):
585 pos = rc_comment.search(buf, start)
586 lineno = lineno + 1
587 if pos < 0:
588 print 'no newline perhaps?'
589 raise EOFError, 'can\'t find end of comment'
590 pos = pos + 1
591 pos2, dummy = parseit(buf, mode(MODE_GOBBLEWHITE), pos, lvl)
592 return pos2, (start, pos)
593
594
595 else:
596 raise error, 'Unknown mode (' + `parsemode` + ')'
597
598
599#moreresult = cswitch(buf[x1:x2], buf, newpos, parsemode, lvl)
600
601#boxcommands = 'mbox', 'fbox'
602#defcommands = 'def', 'newcommand'
603
604endverbstr = '\\end{verbatim}'
605
606re_endverb = regex.compile(un_re(endverbstr))
607
608#
609# handlecs: helper function for parseit, for the special thing we might
610# wanna do after certain command control sequences
611# returns: None or return_data, newpos
612#
613# in the latter case, the calling function is instructed to immediately
614# return with the data in return_data
615#
616def handlecs(buf, where, curpmode, lvl, result, end):
617 global lineno
618
619 # get the control sequence name...
620 newpos, data = parseit(buf, mode(MODE_CS_SCAN), where+1, lvl)
621 saveddata = data
622
623 if s(buf, data) in ('begin', 'end'):
624 # skip the expected '{' and get the LaTeX-envname '}'
625 newpos, data = parseit(buf, mode(MODE_REGULAR), newpos+1, lvl)
626 if len(data) != 1:
627 raise error, 'expected 1 chunk of data.' + \
628 lle(lvl, buf, where)
629
630 # yucky, we've got an environment
631 envname = s(buf, data[0].data)
632 ##print 'FOUND ' + s(buf, saveddata) + '. Name ' + `envname` + '.' + lv(lvl)
633 if s(buf, saveddata) == 'begin' and envname == 'verbatim':
634 # verbatim deserves special treatment
635 pos = re_endverb.search(buf, newpos)
636 if pos < 0:
637 raise error, `endverbstr` + ' not found.' + lle(lvl, buf, where)
638 result.append(chunk(ENV, where, (envname, [chunk(PLAIN, newpos, (newpos, pos))])))
639 newpos = pos + len(endverbstr)
640
641 elif s(buf, saveddata) == 'begin':
642 # start parsing recursively... If that parse returns
643 # from an '\end{...}', then should the last item of
644 # the returned data be a string containing the ended
645 # environment
646 newpos, data = parseit(buf, curpmode, newpos, lvl)
647 if not data or type(data[-1]) != type(''):
648 raise error, 'missing \'end\'' + lle(lvl, buf, where) + epsilon(buf, newpos)
649 retenv = data[-1]
650 del data[-1]
651 if retenv != envname:
652 #[`retenv`, `envname`]
653 raise error, 'environments do not match.' + \
654 lle(lvl, buf, where) + \
655 epsilon(buf, newpos)
656 result.append(chunk(ENV, where, (retenv, data)))
657 else:
658 # 'end'... append the environment name, as just
659 # pointed out, and order parsit to return...
660 result.append(envname)
661 ##print 'POINT of return: ' + epsilon(buf, newpos)
662 # the tuple will be returned by parseit
663 return (newpos, result), newpos
664
665 # end of \begin ... \end handling
666
667 elif s(buf, data)[0:2] == 'if':
668 # another scary monster: the 'if' directive
669 flag = s(buf, data)[2:]
670
671 # recursively call parseit, just like environment above..
672 # the last item of data should contain the if-termination
673 # e.g., 'else' of 'fi'
674 newpos, data = parseit(buf, curpmode, newpos, lvl)
675 if not data or data[-1] not in ('else', 'fi'):
676 raise error, 'wrong if... termination' + \
677 lle(lvl, buf, where) + epsilon(buf, newpos)
678
679 ifterm = data[-1]
680 del data[-1]
681 # 0 means dont_negate flag
682 result.append(chunk(IF, where, (flag, 0, data)))
683 if ifterm == 'else':
684 # do the whole thing again, there is only one way
685 # to end this one, by 'fi'
686 newpos, data = parseit(buf, curpmode, newpos, lvl)
687 if not data or data[-1] not in ('fi', ):
688 raise error, 'wrong if...else... termination' \
689 + lle(lvl, buf, where) \
690 + epsilon(buf, newpos)
691
692 ifterm = data[-1]
693 del data[-1]
694 result.append(chunk(IF, where, (flag, 1, data)))
695 #done implicitely: return None, newpos
696
697 elif s(buf, data) in ('else', 'fi'):
698 result.append(s(buf, data))
699 # order calling party to return tuple
700 return (newpos, result), newpos
701
702 # end of \if, \else, ... \fi handling
703
704 elif s(buf, saveddata) == 'verb':
705 x2 = saveddata[1]
706 result.append(chunk(CSNAME, where, data))
707 if x2 == end:
708 raise error, 'premature end of command.' + lle(lvl, buf, where)
709 delimchar = buf[x2]
710 print 'VERB: delimchar ' + `delimchar`
711 pos = regex.compile(un_re(delimchar)).search(buf, x2 + 1)
712 if pos < 0:
713 raise error, 'end of \'verb\' argument (' + \
714 `delimchar` + ') not found.' + \
715 lle(lvl, buf, where)
716 result.append(chunk(GROUP, x2, [chunk(PLAIN, x2+1, (x2+1, pos))]))
717 newpos = pos + 1
718 else:
719 result.append(chunk(CSNAME, where, data))
720 return None, newpos
721
722# this is just a function to get the string value if the possible data-tuple
723def s(buf, data):
724 if type(data) == type(''):
725 return data
726 if len(data) != 2 or not (type(data[0]) == type(data[1]) == type(0)):
727 raise TypeError, 'expected tuple of 2 integers'
728 x1, x2 = data
729 return buf[x1:x2]
730
731
732##length, data1, i = getnextarg(length, buf, pp, i + 1)
733
734# make a deep-copy of some chunks
735def crcopy(r):
736 result = []
737 for x in r:
738 result.append(chunkcopy(x))
739 return result
740
741
742
743# copy a chunk, would better be a method of class Chunk...
744def chunkcopy(ch):
745 if ch.chtype == chunk_type(GROUP):
746 listc = ch.data[:]
747 for i in range(len(listc)):
748 listc[i] = chunkcopy(listc[i])
749 return chunk(GROUP, ch.where, listc)
750 else:
751 return chunk(ch.chtype, ch.where, ch.data)
752
753
754# get next argument for TeX-macro, flatten a group (insert between)
755# or return Command Sequence token, or give back one character
756def getnextarg(length, buf, pp, item):
757
758 ##wobj = Wobj().init()
759 ##dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
760 ##print 'GETNEXTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
761
762 while item < length and pp[item].chtype == chunk_type(ENDLINE):
763 del pp[item]
764 length = length - 1
765 if item >= length:
766 raise error, 'no next arg.' + epsilon(buf, pp[-1].where)
767 if pp[item].chtype == chunk_type(GROUP):
768 newpp = pp[item].data
769 del pp[item]
770 length = length - 1
771 changeit(buf, newpp)
772 length = length + len(newpp)
773 pp[item:item] = newpp
774 item = item + len(newpp)
775 if len(newpp) < 10:
776 wobj = Wobj().init()
777 dumpit(buf, wobj.write, newpp)
778 ##print 'GETNEXTARG: inserted ' + `wobj.data`
779 return length, item
780 elif pp[item].chtype == chunk_type(PLAIN):
781 #grab one char
782 print 'WARNING: grabbing one char'
783 if len(s(buf, pp[item].data)) > 1:
784 pp.insert(item, chunk(PLAIN, pp[item].where, s(buf, pp[item].data)[:1]))
785 item, length = item+1, length+1
786 pp[item].data = s(buf, pp[item].data)[1:]
787 else:
788 item = item+1
789 return length, item
790 else:
791 try:
792 str = `s(buf, ch.data)`
793 except TypeError:
794 str = `ch.data`
795 if len(str) > 400:
796 str = str[:400] + '...'
797 print 'GETNEXTARG:', ch.chtype, 'not handled, data ' + str
798 return length, item
799
800
801# this one is needed to find the end of LaTeX's optional argument, like
802# item[...]
803re_endopt = regex.compile(']')
804
805# get a LaTeX-optional argument, you know, the square braces '[' and ']'
806def getoptarg(length, buf, pp, item):
807
808 wobj = Wobj().init()
809 dumpit(buf, wobj.write, pp[item:min(length, item + 5)])
810 ##print 'GETOPTARG, (len, item) =', `length, item` + ' ---> ' + wobj.data + ' <---'
811
812 if item >= length or \
813 pp[item].chtype != chunk_type(PLAIN) or \
814 s(buf, pp[item].data)[0] != '[':
815 return length, item
816
817 pp[item].data = s(buf, pp[item].data)[1:]
818 if len(pp[item].data) == 0:
819 del pp[item]
820 length = length-1
821
822 while 1:
823 if item == length:
824 raise error, 'No end of optional arg found'
825 if pp[item].chtype == chunk_type(PLAIN):
826 text = s(buf, pp[item].data)
827 pos = re_endopt.search(text)
828 if pos >= 0:
829 pp[item].data = text[:pos]
830 if pos == 0:
831 del pp[item]
832 length = length-1
833 else:
834 item=item+1
835 text = text[pos+1:]
836
837 while text and text[0] in ' \t':
838 text = text[1:]
839
840 if text:
841 pp.insert(item, chunk(PLAIN, 0, text))
842 length = length + 1
843 return length, item
844
845 item = item+1
846
847
848# Wobj just add write-requests to the ``data'' attribute
849class Wobj:
850 def init(self):
851 self.data = ''
852 return self
853 def write(self, data):
854 self.data = self.data + data
855
856# ignore these commands
857ignoredcommands = ('bcode', 'ecode')
858# map commands like these to themselves as plaintext
859wordsselves = ('UNIX', 'ABC', 'C', 'ASCII', 'EOF')
860# \{ --> {, \} --> }, etc
861themselves = ('{', '}', '.', '@') + wordsselves
862# these ones also themselves (see argargs macro in myformat.sty)
863inargsselves = (',', '[', ']', '(', ')')
864# this is how *I* would show the difference between emph and strong
865# code 1 means: fold to uppercase
866markcmds = {'code': ('', ''), 'var': 1, 'emph': ('_', '_'), \
867 'strong': ('*', '*')}
868
869# recognise patter {\FONTCHANGE-CMD TEXT} to \MAPPED-FC-CMD{TEXT}
870fontchanges = {'rm': 'r', 'it': 'i', 'em': 'emph', 'bf': 'b', 'tt': 't'}
871
872# transparent for these commands
873for_texi = ('emph', 'var', 'strong', 'code', 'kbd', 'key', 'dfn', 'samp', \
874 'r', 'i', 't')
875
876
877# try to remove macros and return flat text
878def flattext(buf, pp):
879 pp = crcopy(pp)
880 print '---> FLATTEXT ' + `pp`
881 wobj = Wobj().init()
882
883 i, length = 0, len(pp)
884 while 1:
885 if len(pp) != length:
886 raise 'FATAL', 'inconsistent length'
887 if i >= length:
888 break
889 ch = pp[i]
890 i = i+1
891 if ch.chtype == chunk_type(PLAIN):
892 pass
893 elif ch.chtype == chunk_type(CSNAME):
894 if s(buf, ch.data) in themselves or hist.inargs and s(buf, ch.data) in inargsselves:
895 ch.chtype = chunk_type(PLAIN)
896 elif s(buf, ch.data) == 'e':
897 ch.chtype = chunk_type(PLAIN)
898 ch.data = '\\'
899 elif len(s(buf, ch.data)) == 1 \
900 and s(buf, ch.data) in onlylatexspecial:
901 ch.chtype = chunk_type(PLAIN)
902 # if it is followed by an empty group,
903 # remove that group, it was needed for
904 # a true space
905 if i < length \
906 and pp[i].chtype==chunk_type(GROUP) \
907 and len(pp[i].data) == 0:
908 del pp[i]
909 length = length-1
910
911 elif s(buf, ch.data) in markcmds.keys():
912 length, newi = getnextarg(length, buf, pp, i)
913 str = flattext(buf, pp[i:newi])
914 del pp[i:newi]
915 length = length - (newi - i)
916 ch.chtype = chunk_type(PLAIN)
917 markcmd = s(buf, ch.data)
918 x = markcmds[markcmd]
919 if type(x) == type(()):
920 pre, after = x
921 str = pre+str+after
922 elif x == 1:
923 str = string.upper(str)
924 else:
925 raise 'FATAL', 'corrupt markcmds'
926 ch.data = str
927 else:
928 if s(buf, ch.data) not in ignoredcommands:
929 print 'WARNING: deleting command ' + `s(buf, ch.data)`
930 print 'PP' + `pp[i-1]`
931 del pp[i-1]
932 i, length = i-1, length-1
933 elif ch.chtype == chunk_type(GROUP):
934 length, newi = getnextarg(length, buf, pp, i-1)
935 i = i-1
936## str = flattext(buf, crcopy(pp[i-1:newi]))
937## del pp[i:newi]
938## length = length - (newi - i)
939## ch.chtype = chunk_type(PLAIN)
940## ch.data = str
941 else:
942 pass
943
944 dumpit(buf, wobj.write, pp)
945 print 'FLATTEXT: RETURNING ' + `wobj.data`
946 return wobj.data
947
948# try to generate node names (a bit shorter than the chapter title)
949# note that the \nodename command (see elsewhere) overules these efforts
950def invent_node_names(text):
951 words = string.split(text)
952
953 print 'WORDS ' + `words`
954
955 if len(words) == 2 \
956 and string.lower(words[0]) == 'built-in' \
957 and string.lower(words[1]) not in ('modules', 'functions'):
958 return words[1]
959 if len(words) == 3 and string.lower(words[1]) == 'module':
960 return words[2]
961 if len(words) == 3 and string.lower(words[1]) == 'object':
962 return string.join(words[0:2])
963 if len(words) > 4 and string.lower(string.join(words[-4:])) == \
964 'methods and data attributes':
965 return string.join(words[:2])
966 return text
967
968re_commas_etc = regex.compile('[,`\'@{}]')
969
970re_whitespace = regex.compile('[ \t]*')
971
972
973##nodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
974
975# look if the next non-white stuff is also a command, resulting in skipping
976# double endlines (DENDLINE) too, and thus omitting \par's
977# Sometimes this is too much, maybe consider DENDLINE's as stop
978def next_command_p(length, buf, pp, i, cmdname):
979
980 while 1:
981 if i >= len(pp):
982 break
983 ch = pp[i]
984 i = i+1
985 if ch.chtype == chunk_type(ENDLINE):
986 continue
987 if ch.chtype == chunk_type(DENDLINE):
988 continue
989 if ch.chtype == chunk_type(PLAIN):
990 if re_whitespace.search(s(buf, ch.data)) == 0 and \
991 re_whitespace.match(s(buf, ch.data)) == len(s(buf, ch.data)):
992 continue
993 return -1
994 if ch.chtype == chunk_type(CSNAME):
995 if s(buf, ch.data) == cmdname:
996 return i # _after_ the command
997 return -1
998 return -1
999
1000
1001# things that are special to LaTeX, but not to texi..
1002onlylatexspecial = '_~^$#&%'
1003
1004class Struct(): pass
1005
1006hist = Struct()
1007out = Struct()
1008
1009def startchange():
1010 global hist, out
1011
1012 hist.inenv = []
1013 hist.nodenames = []
1014 hist.cindex = []
1015 hist.inargs = 0
1016 hist.enumeratenesting, hist.itemizenesting = 0, 0
1017
1018 out.doublenodes = []
1019 out.doublecindeces = []
1020
1021
1022spacech = [chunk(PLAIN, 0, ' ')]
1023commach = [chunk(PLAIN, 0, ', ')]
1024cindexch = [chunk(CSLINE, 0, 'cindex')]
1025
1026# the standard variation in symbols for itemize
1027itemizesymbols = ['bullet', 'minus', 'dots']
1028
1029# same for enumerate
1030enumeratesymbols = ['1', 'A', 'a']
1031
1032##
1033## \begin{ {func,data,exc}desc }{name}...
1034## the resulting texi-code is dependent on the contents of indexsubitem
1035##
1036
1037# indexsubitem: `['XXX', 'function']
1038# funcdesc:
1039# deffn {`idxsi`} NAME (FUNCARGS)
1040
1041# indexsubitem: `['XXX', 'method']`
1042# funcdesc:
1043# defmethod {`idxsi[0]`} NAME (FUNCARGS)
1044
1045# indexsubitem: `['in', 'module', 'MODNAME']'
1046# datadesc:
1047# defcv data {`idxsi[1:]`} NAME
1048# excdesc:
1049# defcv exception {`idxsi[1:]`} NAME
1050# funcdesc:
1051# deffn {function of `idxsi[1:]`} NAME (FUNCARGS)
1052
1053# indexsubitem: `['OBJECT', 'attribute']'
1054# datadesc
1055# defcv attribute {`OBJECT`} NAME
1056
1057
1058## this routine will be called on \begin{funcdesc}{NAME}{ARGS}
1059## or \funcline{NAME}{ARGS}
1060##
1061def do_funcdesc(length, buf, pp, i):
1062 startpoint = i-1
1063 ch = pp[startpoint]
1064 wh = ch.where
1065 length, newi = getnextarg(length, buf, pp, i)
1066 funcname = chunk(GROUP, wh, pp[i:newi])
1067 del pp[i:newi]
1068 length = length - (newi-i)
1069 save = hist.inargs
1070 hist.inargs = 1
1071 length, newi = getnextarg(length, buf, pp, i)
1072 hist.inargs = save
1073 del save
1074 the_args = [chunk(PLAIN, wh, '()'[0])] + \
1075 pp[i:newi] + \
1076 [chunk(PLAIN, wh, '()'[1])]
1077 del pp[i:newi]
1078 length = length - (newi-i)
1079
1080 idxsi = hist.indexsubitem # words
1081 command = ''
1082 cat_class = ''
1083 if idxsi and idxsi[-1] == 'method':
1084 command = 'defmethod'
1085 cat_class = string.join(idxsi[:-1])
1086 elif len(idxsi) == 2 and idxsi[1] == 'function':
1087 command = 'deffn'
1088 cat_class = string.join(idxsi)
1089 elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
1090 command = 'deffn'
1091 cat_class = 'function of ' + string.join(idxsi[1:])
1092
1093 if not command:
1094 raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
1095
1096 ch.chtype = chunk_type(CSLINE)
1097 ch.data = command
1098
1099 cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
1100 cslinearg.append(chunk(PLAIN, wh, ' '))
1101 cslinearg.append(funcname)
1102 cslinearg.append(chunk(PLAIN, wh, ' '))
1103 l = len(cslinearg)
1104 cslinearg[l:l] = the_args
1105
1106 pp.insert(i, chunk(GROUP, wh, cslinearg))
1107 i, length = i+1, length+1
1108 hist.command = command
1109 return length, i
1110
1111
1112## this routine will be called on \begin{excdesc}{NAME}
1113## or \excline{NAME}
1114##
1115def do_excdesc(length, buf, pp, i):
1116 startpoint = i-1
1117 ch = pp[startpoint]
1118 wh = ch.where
1119 length, newi = getnextarg(length, buf, pp, i)
1120 excname = chunk(GROUP, wh, pp[i:newi])
1121 del pp[i:newi]
1122 length = length - (newi-i)
1123
1124 idxsi = hist.indexsubitem # words
1125 command = ''
1126 cat_class = ''
1127 class_class = ''
1128 if len(idxsi) == 2 and idxsi[1] == 'exception':
1129 command = 'defvr'
1130 cat_class = string.join(idxsi)
1131 elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
1132 command = 'defcv'
1133 cat_class = 'exception'
1134 class_class = string.join(idxsi[1:])
1135 elif len(idxsi) == 4 and idxsi[:3] == ['exception', 'in', 'module']:
1136 command = 'defcv'
1137 cat_class = 'exception'
1138 class_class = string.join(idxsi[2:])
1139
1140
1141 if not command:
1142 raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
1143
1144 ch.chtype = chunk_type(CSLINE)
1145 ch.data = command
1146
1147 cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
1148 cslinearg.append(chunk(PLAIN, wh, ' '))
1149 if class_class:
1150 cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
1151 cslinearg.append(chunk(PLAIN, wh, ' '))
1152 cslinearg.append(excname)
1153
1154 pp.insert(i, chunk(GROUP, wh, cslinearg))
1155 i, length = i+1, length+1
1156 hist.command = command
1157 return length, i
1158
1159## same for datadesc or dataline...
1160def do_datadesc(length, buf, pp, i):
1161 startpoint = i-1
1162 ch = pp[startpoint]
1163 wh = ch.where
1164 length, newi = getnextarg(length, buf, pp, i)
1165 dataname = chunk(GROUP, wh, pp[i:newi])
1166 del pp[i:newi]
1167 length = length - (newi-i)
1168
1169 idxsi = hist.indexsubitem # words
1170 command = ''
1171 cat_class = ''
1172 class_class = ''
1173 if len(idxsi) == 2 and idxsi[1] == 'attribute':
1174 command = 'defcv'
1175 cat_class = 'attribute'
1176 class_class = idxsi[0]
1177 elif len(idxsi) == 3 and idxsi[:2] == ['in', 'module']:
1178 command = 'defcv'
1179 cat_class = 'data'
1180 class_class = string.join(idxsi[1:])
1181 elif len(idxsi) == 4 and idxsi[:3] == ['data', 'in', 'module']:
1182 command = 'defcv'
1183 cat_class = 'data'
1184 class_class = string.join(idxsi[2:])
1185
1186
1187 if not command:
1188 raise error, 'don\'t know what to do with indexsubitem ' + `idxsi`
1189
1190 ch.chtype = chunk_type(CSLINE)
1191 ch.data = command
1192
1193 cslinearg = [chunk(GROUP, wh, [chunk(PLAIN, wh, cat_class)])]
1194 cslinearg.append(chunk(PLAIN, wh, ' '))
1195 if class_class:
1196 cslinearg.append(chunk(GROUP, wh, [chunk(PLAIN, wh, class_class)]))
1197 cslinearg.append(chunk(PLAIN, wh, ' '))
1198 cslinearg.append(dataname)
1199
1200 pp.insert(i, chunk(GROUP, wh, cslinearg))
1201 i, length = i+1, length+1
1202 hist.command = command
1203 return length, i
1204
1205
1206# regular indices: those that are not set in tt font by default....
1207regindices = ('cindex', )
1208
1209# remove illegal characters from node names
1210def rm_commas_etc(text):
1211 result = ''
1212 changed = 0
1213 while 1:
1214 pos = re_commas_etc.search(text)
1215 if pos >= 0:
1216 changed = 1
1217 result = result + text[:pos]
1218 text = text[pos+1:]
1219 else:
1220 result = result + text
1221 break
1222 if changed:
1223 print 'Warning: nodename changhed to ' + `result`
1224
1225 return result
1226
1227# boolean flags
1228flags = {'texi': 1}
1229
1230
1231##
1232## changeit: the actual routine, that changes the contents of the parsed
1233## chunks
1234##
1235
1236def changeit(buf, pp):
1237 global onlylatexspecial, hist, out
1238
1239 i, length = 0, len(pp)
1240 while 1:
1241 # sanity check: length should always equal len(pp)
1242 if len(pp) != length:
1243 raise 'FATAL', 'inconsistent length. thought ' + `length` + ', but should really be ' + `len(pp)`
1244 if i >= length:
1245 break
1246 ch = pp[i]
1247 i = i + 1
1248
1249 if type(ch) == type(''):
1250 #normally, only chunks are present in pp,
1251 # but in some cases, some extra info
1252 # has been inserted, e.g., the \end{...} clauses
1253 raise 'FATAL', 'got string, probably too many ' + `end`
1254
1255 if ch.chtype == chunk_type(GROUP):
1256 # check for {\em ...} constructs
1257 if ch.data and \
1258 ch.data[0].chtype == chunk_type(CSNAME) and \
1259 s(buf, ch.data[0].data) in fontchanges.keys():
1260 k = s(buf, ch.data[0].data)
1261 del ch.data[0]
1262 pp.insert(i-1, chunk(CSNAME, ch.where, fontchanges[k]))
1263 length, i = length+1, i+1
1264
1265 # recursively parse the contents of the group
1266 changeit(buf, ch.data)
1267
1268 elif ch.chtype == chunk_type(IF):
1269 # \if...
1270 flag, negate, data = ch.data
1271 print 'IF: flag, negate = ' + `flag, negate`
1272 if flag not in flags.keys():
1273 raise error, 'unknown flag ' + `flag`
1274
1275 value = flags[flag]
1276 if negate:
1277 value = (not value)
1278 del pp[i-1]
1279 length, i = length-1, i-1
1280 if value:
1281 pp[i:i] = data
1282 length = length + len(data)
1283
1284
1285 elif ch.chtype == chunk_type(ENV):
1286 # \begin{...} ....
1287 envname, data = ch.data
1288
1289 #push this environment name on stack
1290 hist.inenv.insert(0, envname)
1291
1292 #append an endenv chunk after grouped data
1293 data.append(chunk(ENDENV, ch.where, envname))
1294 ##[`data`]
1295
1296 #delete this object
1297 del pp[i-1]
1298 i, length = i-1, length-1
1299
1300 #insert found data
1301 pp[i:i] = data
1302 length = length + len(data)
1303
1304 if envname == 'verbatim':
1305 pp[i:i] = [chunk(CSLINE, ch.where, 'example'), \
1306 chunk(GROUP, ch.where, [])]
1307 length, i = length+2, i+2
1308
1309 elif envname == 'itemize':
1310 if hist.itemizenesting > len(itemizesymbols):
1311 raise error, 'too deep itemize nesting'
1312 ingroupch = [chunk(CSNAME, ch.where,\
1313 itemizesymbols[hist.itemizenesting])]
1314 hist.itemizenesting = hist.itemizenesting + 1
1315 pp[i:i] = [chunk(CSLINE, ch.where, 'itemize'),\
1316 chunk(GROUP, ch.where, ingroupch)]
1317 length, i = length+2, i+2
1318
1319 elif envname == 'enumerate':
1320 if hist.enumeratenesting > len(enumeratesymbols):
1321 raise error, 'too deep enumerate nesting'
1322 ingroupch = [chunk(PLAIN, ch.where,\
1323 enumeratesymbols[hist.enumeratenesting])]
1324 hist.enumeratenesting = hist.enumeratenesting + 1
1325 pp[i:i] = [chunk(CSLINE, ch.where, 'enumerate'),\
1326 chunk(GROUP, ch.where, ingroupch)]
1327 length, i = length+2, i+2
1328
1329 elif envname == 'description':
1330 ingroupch = [chunk(CSNAME, ch.where, 'b')]
1331 pp[i:i] = [chunk(CSLINE, ch.where, 'table'), \
1332 chunk(GROUP, ch.where, ingroupch)]
1333 length, i = length+2, i+2
1334
1335 elif envname == 'tableiii':
1336 wh = ch.where
1337 newcode = []
1338
1339 #delete tabular format description
1340 # e.g., {|l|c|l|}
1341 length, newi = getnextarg(length, buf, pp, i)
1342 del pp[i:newi]
1343 length = length - (newi-i)
1344
1345 newcode.append(chunk(CSLINE, wh, 'table'))
1346 ingroupch = [chunk(CSNAME, wh, 'asis')]
1347 newcode.append(chunk(GROUP, wh, ingroupch))
1348 newcode.append(chunk(CSLINE, wh, 'item'))
1349
1350 #get the name of macro for @item
1351 # e.g., {code}
1352 length, newi = getnextarg(length, buf, pp, i)
1353
1354 if newi-i != 1:
1355 raise error, 'Sorry, expected 1 chunk argument'
1356 if pp[i].chtype != chunk_type(PLAIN):
1357 raise error, 'Sorry, expected plain text argument'
1358 hist.itemargmacro = s(buf, pp[i].data)
1359 del pp[i:newi]
1360 length = length - (newi-i)
1361
1362 for count in range(3):
1363 length, newi = getnextarg(length, buf, pp, i)
1364 emphgroup = [\
1365 chunk(CSNAME, wh, 'emph'), \
1366 chunk(GROUP, 0, pp[i:newi])]
1367 del pp[i:newi]
1368 length = length - (newi-i)
1369 if count == 0:
1370 itemarg = emphgroup
1371 elif count == 2:
1372 itembody = itembody + \
1373 [chunk(PLAIN, wh, ' --- ')] + \
1374 emphgroup
1375 else:
1376 itembody = emphgroup
1377 newcode.append(chunk(GROUP, wh, itemarg))
1378 newcode = newcode + itembody + [chunk(DENDLINE, wh, '\n')]
1379 pp[i:i] = newcode
1380 l = len(newcode)
1381 length, i = length+l, i+l
1382 del newcode, l
1383
1384 if length != len(pp):
1385 raise 'STILL, SOMETHING wrong', `i`
1386
1387
1388 elif envname == 'funcdesc':
1389 pp.insert(i, chunk(PLAIN, ch.where, ''))
1390 i, length = i+1, length+1
1391 length, i = do_funcdesc(length, buf, pp, i)
1392
1393 elif envname == 'excdesc':
1394 pp.insert(i, chunk(PLAIN, ch.where, ''))
1395 i, length = i+1, length+1
1396 length, i = do_excdesc(length, buf, pp, i)
1397
1398 elif envname == 'datadesc':
1399 pp.insert(i, chunk(PLAIN, ch.where, ''))
1400 i, length = i+1, length+1
1401 length, i = do_datadesc(length, buf, pp, i)
1402
1403 else:
1404 print 'WARNING: don\'t know what to do with env ' + `envname`
1405
1406 elif ch.chtype == chunk_type(ENDENV):
1407 envname = ch.data
1408 if envname != hist.inenv[0]:
1409 raise error, '\'end\' does not match. Name ' + `envname` + ', expected ' + `hist.inenv[0]`
1410 del hist.inenv[0]
1411 del pp[i-1]
1412 i, length = i-1, length-1
1413
1414 if envname == 'verbatim':
1415 pp[i:i] = [\
1416 chunk(CSLINE, ch.where, 'end'), \
1417 chunk(GROUP, ch.where, [\
1418 chunk(PLAIN, ch.where, 'example')])]
1419 i, length = i+2, length+2
1420 elif envname == 'itemize':
1421 hist.itemizenesting = hist.itemizenesting - 1
1422 pp[i:i] = [\
1423 chunk(CSLINE, ch.where, 'end'), \
1424 chunk(GROUP, ch.where, [\
1425 chunk(PLAIN, ch.where, 'itemize')])]
1426 i, length = i+2, length+2
1427 elif envname == 'enumerate':
1428 hist.enumeratenesting = hist.enumeratenesting-1
1429 pp[i:i] = [\
1430 chunk(CSLINE, ch.where, 'end'), \
1431 chunk(GROUP, ch.where, [\
1432 chunk(PLAIN, ch.where, 'enumerate')])]
1433 i, length = i+2, length+2
1434 elif envname == 'description':
1435 pp[i:i] = [\
1436 chunk(CSLINE, ch.where, 'end'), \
1437 chunk(GROUP, ch.where, [\
1438 chunk(PLAIN, ch.where, 'table')])]
1439 i, length = i+2, length+2
1440 elif envname == 'tableiii':
1441 pp[i:i] = [\
1442 chunk(CSLINE, ch.where, 'end'), \
1443 chunk(GROUP, ch.where, [\
1444 chunk(PLAIN, ch.where, 'table')])]
1445 i, length = i+2, length + 2
1446 pp.insert(i, chunk(DENDLINE, ch.where, '\n'))
1447 i, length = i+1, length+1
1448
1449 elif envname in ('funcdesc', 'excdesc', 'datadesc'):
1450 pp[i:i] = [\
1451 chunk(CSLINE, ch.where, 'end'), \
1452 chunk(GROUP, ch.where, [\
1453 chunk(PLAIN, ch.where, hist.command)])]
1454 i, length = i+2, length+2
1455 else:
1456 print 'WARNING: ending env ' + `envname` + 'has no actions'
1457
1458 elif ch.chtype == chunk_type(CSNAME):
1459 # control name transformations
1460 if s(buf, ch.data) in ignoredcommands:
1461 del pp[i-1]
1462 i, length = i-1, length-1
1463 elif s(buf, ch.data) == '@' and \
1464 i != length and \
1465 pp[i].chtype == chunk_type(PLAIN) and \
1466 s(buf, pp[i].data)[0] == '.':
1467 # \@. --> \. --> @.
1468 ch.data = '.'
1469 del pp[i]
1470 length = length-1
1471 elif s(buf, ch.data) == '\\':
1472 # \\ --> \* --> @*
1473 ch.data = '*'
1474 elif len(s(buf, ch.data)) == 1 and \
1475 s(buf, ch.data) in onlylatexspecial:
1476 ch.chtype = chunk_type(PLAIN)
1477 # check if such a command is followed by
1478 # an empty group: e.g., `\%{}'. If so, remove
1479 # this empty group too
1480 if i < length and \
1481 pp[i].chtype == chunk_type(GROUP) \
1482 and len(pp[i].data) == 0:
1483 del pp[i]
1484 length = length-1
1485
1486 elif hist.inargs and s(buf, ch.data) in inargsselves:
1487 # This is the special processing of the
1488 # arguments of the \begin{funcdesc}... or
1489 # \funcline... arguments
1490 # \, --> , \[ --> [, \] --> ]
1491 ch.chtype = chunk_type(PLAIN)
1492
1493 elif s(buf, ch.data) == 'renewcommand':
1494 # \renewcommand{\indexsubitem}....
1495 i, length = i-1, length-1
1496 del pp[i]
1497 length, newi = getnextarg(length, buf, pp, i)
1498 if newi-i == 1 \
1499 and i < length \
1500 and pp[i].chtype == chunk_type(CSNAME) \
1501 and s(buf, pp[i].data) == 'indexsubitem':
1502 del pp[i:newi]
1503 length = length - (newi-i)
1504 length, newi = getnextarg(length, buf, pp, i)
1505 text = flattext(buf, pp[i:newi])
1506 if text[0] != '(' or text[-1] != ')':
1507 raise error, 'expected indexsubitme enclosed in braces'
1508 words = string.split(text[1:-1])
1509 hist.indexsubitem = words
1510 del text, words
1511 else:
1512 print 'WARNING: renewcommand with unsupported arg removed'
1513 del pp[i:newi]
1514 length = length - (newi-i)
1515
1516 elif s(buf, ch.data) == 'item':
1517 ch.chtype = chunk_type(CSLINE)
1518 length, newi = getoptarg(length, buf, pp, i)
1519 ingroupch = pp[i:newi]
1520 del pp[i:newi]
1521 length = length - (newi-i)
1522 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1523 i, length = i+1, length+1
1524
1525 elif s(buf, ch.data) == 'ttindex':
1526 idxsi = hist.indexsubitem
1527
1528 cat_class = ''
1529 if len(idxsi) >= 2 and idxsi[1] in \
1530 ('method', 'function'):
1531 command = 'findex'
1532 elif len(idxsi) >= 2 and idxsi[1] in \
1533 ('exception', 'object'):
1534 command = 'vindex'
1535 else:
1536 print 'WARNING: can\'t catagorize ' + `idxsi` + ' for \'ttindex\' command'
1537 command = 'cindex'
1538
1539 if not cat_class:
1540 cat_class = '('+string.join(idxsi)+')'
1541
1542 ch.chtype = chunk_type(CSLINE)
1543 ch.data = command
1544
1545 length, newi = getnextarg(length, buf, pp, i)
1546 arg = pp[i:newi]
1547 del pp[i:newi]
1548 length = length - (newi-i)
1549
1550 cat_arg = [chunk(PLAIN, ch.where, cat_class)]
1551
1552 # determine what should be set in roman, and
1553 # what in tt-font
1554 if command in regindices:
1555
1556 arg = [chunk(CSNAME, ch.where, 't'), \
1557 chunk(GROUP, ch.where, arg)]
1558 else:
1559 cat_arg = [chunk(CSNAME, ch.where, 'r'), \
1560 chunk(GROUP, ch.where, cat_arg)]
1561
1562 ingroupch = arg + \
1563 [chunk(PLAIN, ch.where, ' ')] + \
1564 cat_arg
1565
1566 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1567 length, i = length+1, i+1
1568
1569
1570 elif s(buf, ch.data) == 'ldots':
1571 # \ldots --> \dots{} --> @dots{}
1572 ch.data = 'dots'
1573 if i == length \
1574 or pp[i].chtype != chunk_type(GROUP) \
1575 or pp[i].data != []:
1576 pp.insert(i, chunk(GROUP, ch.where, []))
1577 i, length = i+1, length+1
1578 elif s(buf, ch.data) in wordsselves:
1579 # \UNIX --> UNIX
1580 ch.chtype = chunk_type(PLAIN)
1581 if i != length \
1582 and pp[i].chtype == chunk_type(GROUP) \
1583 and pp[i].data == []:
1584 del pp[i]
1585 length = length-1
1586 elif s(buf, ch.data) in for_texi:
1587 pass
1588
1589 elif s(buf, ch.data) == 'e':
1590 # \e --> \
1591 ch.data = '\\'
1592 ch.chtype = chunk_type(PLAIN)
1593 elif s(buf, ch.data) == 'lineiii':
1594 # This is the most tricky one
1595 # \lineiii{a1}{a2}[{a3}] -->
1596 # @item @<cts. of itemargmacro>{a1}
1597 # a2 [ -- a3]
1598 #
1599 ##print 'LINEIIIIII!!!!!!!'
1600## wobj = Wobj().init()
1601## dumpit(buf, wobj.write, pp[i-1:i+5])
1602## print '--->' + wobj.data + '<----'
1603 if not hist.inenv:
1604 raise error, \
1605 'no environment for lineiii'
1606 if hist.inenv[0] != 'tableiii':
1607 raise error, \
1608 'wrong command (' + \
1609 `'lineiii'` + \
1610 ') in wrong environment (' \
1611 + `hist.inenv[0]` + ')'
1612 ch.chtype = chunk_type(CSLINE)
1613 ch.data = 'item'
1614 length, newi = getnextarg(length, buf, pp, i)
1615 ingroupch = [chunk(CSNAME, 0, \
1616 hist.itemargmacro), \
1617 chunk(GROUP, 0, pp[i:newi])]
1618 del pp[i:newi]
1619 length = length - (newi-i)
1620## print 'ITEM ARG: --->',
1621## wobj = Wobj().init()
1622## dumpit(buf, wobj.write, ingroupch)
1623## print wobj.data, '<---'
1624 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1625 grouppos = i
1626 i, length = i+1, length+1
1627 length, i = getnextarg(length, buf, pp, i)
1628 length, newi = getnextarg(length, buf, pp, i)
1629 if newi > i:
1630 # we have a 3rd arg
1631 pp.insert(i, chunk(PLAIN, ch.where, ' --- '))
1632 i = newi + 1
1633 length = length + 1
1634## pp[grouppos].data = pp[grouppos].data \
1635## + [chunk(PLAIN, ch.where, ' ')] \
1636## + pp[i:newi]
1637## del pp[i:newi]
1638## length = length - (newi-i)
1639 if length != len(pp):
1640 raise 'IN LINEIII IS THE ERR', `i`
1641
1642 elif s(buf, ch.data) in ('chapter', 'section', 'subsection', 'subsubsection'):
1643 #\xxxsection{A} ---->
1644 # @node A, , ,
1645 # @xxxsection A
1646 ## also: remove commas and quotes
1647 ch.chtype = chunk_type(CSLINE)
1648 length, newi = getnextarg(length, buf, pp, i)
1649 afternodenamecmd = next_command_p(length, buf, pp, newi, 'nodename')
1650 if afternodenamecmd < 0:
1651 cp1 = crcopy(pp[i:newi])
1652 pp[i:newi] = [\
1653 chunk(GROUP, ch.where, \
1654 pp[i:newi])]
1655 length, newi = length - (newi-i) + 1, \
1656 i+1
1657 text = flattext(buf, cp1)
1658 text = invent_node_names(text)
1659 else:
1660 length, endarg = getnextarg(length, buf, pp, afternodenamecmd)
1661 cp1 = crcopy(pp[afternodenamecmd:endarg])
1662 del pp[newi:endarg]
1663 length = length - (endarg-newi)
1664
1665 pp[i:newi] = [\
1666 chunk(GROUP, ch.where, \
1667 pp[i:newi])]
1668 length, newi = length - (newi-i) + 1, \
1669 i + 1
1670 text = flattext(buf, cp1)
1671 if text[-1] == '.':
1672 text = text[:-1]
1673 print 'FLATTEXT:', `text`
1674 if text in hist.nodenames:
1675 print 'WARNING: node name ' + `text` + ' already used'
1676 out.doublenodes.append(text)
1677 else:
1678 hist.nodenames.append(text)
1679 text = rm_commas_etc(text)
1680 pp[i-1:i-1] = [\
1681 chunk(CSLINE, ch.where, 'node'), \
1682 chunk(GROUP, ch.where, [\
1683 chunk(PLAIN, ch.where, text+', , ,')\
1684 ])]
1685 i, length = newi+2, length+2
1686
1687 elif s(buf,ch.data) == 'funcline':
1688 # fold it to a very short environment
1689 pp[i-1:i-1] = [\
1690 chunk(CSLINE, ch.where, 'end'), \
1691 chunk(GROUP, ch.where, [\
1692 chunk(PLAIN, ch.where, hist.command)])]
1693 i, length = i+2, length+2
1694 length, i = do_funcdesc(length, buf, pp, i)
1695
1696 elif s(buf,ch.data) == 'dataline':
1697 pp[i-1:i-1] = [\
1698 chunk(CSLINE, ch.where, 'end'), \
1699 chunk(GROUP, ch.where, [\
1700 chunk(PLAIN, ch.where, hist.command)])]
1701 i, length = i+2, length+2
1702 length, i = do_datadesc(length, buf, pp, i)
1703
1704 elif s(buf,ch.data) == 'excline':
1705 pp[i-1:i-1] = [\
1706 chunk(CSLINE, ch.where, 'end'), \
1707 chunk(GROUP, ch.where, [\
1708 chunk(PLAIN, ch.where, hist.command)])]
1709 i, length = i+2, length+2
1710 length, i = do_excdesc(length, buf, pp, i)
1711
1712
1713 elif s(buf, ch.data) == 'index':
1714 #\index{A} --->
1715 # @cindex A
1716 ch.chtype = chunk_type(CSLINE)
1717 ch.data = 'cindex'
1718 length, newi = getnextarg(length, buf, pp, i)
1719
1720 ingroupch = pp[i:newi]
1721 del pp[i:newi]
1722 length = length - (newi-i)
1723 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1724 length, i = length+1, i+1
1725
1726 elif s(buf, ch.data) == 'bifuncindex':
1727 ch.chtype = chunk_type(CSLINE)
1728 ch.data = 'findex'
1729 length, newi = getnextarg(length, buf, pp, i)
1730 ingroupch = pp[i:newi]
1731 del pp[i:newi]
1732 length = length - (newi-i)
1733
1734 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1735 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1736 ingroupch.append(chunk(GROUP, ch.where, [\
1737 chunk(PLAIN, ch.where, \
1738 '(built-in function)')]))
1739
1740 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1741 length, i = length+1, i+1
1742
1743
1744 elif s(buf, ch.data) == 'opindex':
1745 ch.chtype = chunk_type(CSLINE)
1746 ch.data = 'findex'
1747 length, newi = getnextarg(length, buf, pp, i)
1748 ingroupch = pp[i:newi]
1749 del pp[i:newi]
1750 length = length - (newi-i)
1751
1752 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1753 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1754 ingroupch.append(chunk(GROUP, ch.where, [\
1755 chunk(PLAIN, ch.where, \
1756 '(operator)')]))
1757
1758 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1759 length, i = length+1, i+1
1760
1761
1762 elif s(buf, ch.data) == 'bimodindex':
1763 ch.chtype = chunk_type(CSLINE)
1764 ch.data = 'pindex'
1765 length, newi = getnextarg(length, buf, pp, i)
1766 ingroupch = pp[i:newi]
1767 del pp[i:newi]
1768 length = length - (newi-i)
1769
1770 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1771 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1772 ingroupch.append(chunk(GROUP, ch.where, [\
1773 chunk(PLAIN, ch.where, \
1774 '(built-in)')]))
1775
1776 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1777 length, i = length+1, i+1
1778
1779 elif s(buf, ch.data) == 'sectcode':
1780 ch.data = 'code'
1781
1782
1783 elif s(buf, ch.data) == 'stmodindex':
1784 ch.chtype = chunk_type(CSLINE)
1785 # use the program index as module index
1786 ch.data = 'pindex'
1787 length, newi = getnextarg(length, buf, pp, i)
1788 ingroupch = pp[i:newi]
1789 del pp[i:newi]
1790 length = length - (newi-i)
1791
1792 ingroupch.append(chunk(PLAIN, ch.where, ' '))
1793 ingroupch.append(chunk(CSNAME, ch.where, 'r'))
1794 ingroupch.append(chunk(GROUP, ch.where, [\
1795 chunk(PLAIN, ch.where, \
1796 '(standard)')]))
1797
1798 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1799 length, i = length+1, i+1
1800
1801
1802 elif s(buf, ch.data) == 'stindex':
1803 # XXX must actually go to newindex st
1804 wh = ch.where
1805 ch.chtype = chunk_type(CSLINE)
1806 ch.data = 'cindex'
1807 length, newi = getnextarg(length, buf, pp, i)
1808 ingroupch = [chunk(CSNAME, wh, 'code'), \
1809 chunk(GROUP, wh, pp[i:newi])]
1810
1811 del pp[i:newi]
1812 length = length - (newi-i)
1813
1814 t = ingroupch[:]
1815 t.append(chunk(PLAIN, wh, ' statement'))
1816
1817 pp.insert(i, chunk(GROUP, wh, t))
1818 i, length = i+1, length+1
1819
1820 pp.insert(i, chunk(CSLINE, wh, 'cindex'))
1821 i, length = i+1, length+1
1822
1823 t = ingroupch[:]
1824 t.insert(0, chunk(PLAIN, wh, 'statement, '))
1825
1826 pp.insert(i, chunk(GROUP, wh, t))
1827 i, length = i+1, length+1
1828
1829
1830 elif s(buf, ch.data) == 'indexii':
1831 #\indexii{A}{B} --->
1832 # @cindex A B
1833 # @cindex B, A
1834 length, newi = getnextarg(length, buf, pp, i)
1835 cp11 = pp[i:newi]
1836 cp21 = crcopy(pp[i:newi])
1837 del pp[i:newi]
1838 length = length - (newi-i)
1839 length, newi = getnextarg(length, buf, pp, i)
1840 cp12 = pp[i:newi]
1841 cp22 = crcopy(pp[i:newi])
1842 del pp[i:newi]
1843 length = length - (newi-i)
1844
1845 ch.chtype = chunk_type(CSLINE)
1846 ch.data = 'cindex'
1847 pp.insert(i, chunk(GROUP, ch.where, cp11 + [\
1848 chunk(PLAIN, ch.where, ' ')] + cp12))
1849 i, length = i+1, length+1
1850 pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
1851 chunk(GROUP, ch.where, cp22 + [\
1852 chunk(PLAIN, ch.where, ', ')]+ cp21)]
1853 i, length = i+2, length+2
1854
1855 elif s(buf, ch.data) == 'indexiii':
1856 length, newi = getnextarg(length, buf, pp, i)
1857 cp11 = pp[i:newi]
1858 cp21 = crcopy(pp[i:newi])
1859 cp31 = crcopy(pp[i:newi])
1860 del pp[i:newi]
1861 length = length - (newi-i)
1862 length, newi = getnextarg(length, buf, pp, i)
1863 cp12 = pp[i:newi]
1864 cp22 = crcopy(pp[i:newi])
1865 cp32 = crcopy(pp[i:newi])
1866 del pp[i:newi]
1867 length = length - (newi-i)
1868 length, newi = getnextarg(length, buf, pp, i)
1869 cp13 = pp[i:newi]
1870 cp23 = crcopy(pp[i:newi])
1871 cp33 = crcopy(pp[i:newi])
1872 del pp[i:newi]
1873 length = length - (newi-i)
1874
1875 ch.chtype = chunk_type(CSLINE)
1876 ch.data = 'cindex'
1877 pp.insert(i, chunk(GROUP, ch.where, cp11 + [\
1878 chunk(PLAIN, ch.where, ' ')] + cp12 \
1879 + [chunk(PLAIN, ch.where, ' ')] \
1880 + cp13))
1881 i, length = i+1, length+1
1882 pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
1883 chunk(GROUP, ch.where, cp22 + [\
1884 chunk(PLAIN, ch.where, ' ')]+ cp23\
1885 + [chunk(PLAIN, ch.where, ', ')] +\
1886 cp21)]
1887 i, length = i+2, length+2
1888 pp[i:i] = [chunk(CSLINE, ch.where, 'cindex'), \
1889 chunk(GROUP, ch.where, cp33 + [\
1890 chunk(PLAIN, ch.where, ', ')]+ cp31\
1891 + [chunk(PLAIN, ch.where, ' ')] +\
1892 cp32)]
1893 i, length = i+2, length+2
1894
1895
1896 elif s(buf, ch.data) == 'indexiv':
1897 length, newi = getnextarg(length, buf, pp, i)
1898 cp11 = pp[i:newi]
1899 cp21 = crcopy(pp[i:newi])
1900 cp31 = crcopy(pp[i:newi])
1901 cp41 = crcopy(pp[i:newi])
1902 del pp[i:newi]
1903 length = length - (newi-i)
1904 length, newi = getnextarg(length, buf, pp, i)
1905 cp12 = pp[i:newi]
1906 cp22 = crcopy(pp[i:newi])
1907 cp32 = crcopy(pp[i:newi])
1908 cp42 = crcopy(pp[i:newi])
1909 del pp[i:newi]
1910 length = length - (newi-i)
1911 length, newi = getnextarg(length, buf, pp, i)
1912 cp13 = pp[i:newi]
1913 cp23 = crcopy(pp[i:newi])
1914 cp33 = crcopy(pp[i:newi])
1915 cp43 = crcopy(pp[i:newi])
1916 del pp[i:newi]
1917 length = length - (newi-i)
1918 length, newi = getnextarg(length, buf, pp, i)
1919 cp14 = pp[i:newi]
1920 cp24 = crcopy(pp[i:newi])
1921 cp34 = crcopy(pp[i:newi])
1922 cp44 = crcopy(pp[i:newi])
1923 del pp[i:newi]
1924 length = length - (newi-i)
1925
1926 ch.chtype = chunk_type(CSLINE)
1927 ch.data = 'cindex'
1928 ingroupch = cp11 + \
1929 spacech + cp12 + \
1930 spacech + cp13 + \
1931 spacech + cp14
1932 pp.insert(i, chunk(GROUP, ch.where, ingroupch))
1933 i, length = i+1, length+1
1934 ingroupch = cp22 + \
1935 spacech + cp23 + \
1936 spacech + cp24 + \
1937 commach + cp21
1938 pp[i:i] = cindexch + [\
1939 chunk(GROUP, ch.where, ingroupch)]
1940 i, length = i+2, length+2
1941 ingroupch = cp33 + \
1942 spacech + cp34 + \
1943 commach + cp31 + \
1944 spacech + cp32
1945 pp[i:i] = cindexch + [\
1946 chunk(GROUP, ch.where, ingroupch)]
1947 i, length = i+2, length+2
1948 ingroupch = cp44 + \
1949 commach + cp41 + \
1950 spacech + cp42 + \
1951 spacech + cp43
1952 pp[i:i] = cindexch + [\
1953 chunk(GROUP, ch.where, ingroupch)]
1954 i, length = i+2, length+2
1955
1956
1957
1958 else:
1959 print 'don\'t know what to do with keyword ' + `s(buf, ch.data)`
1960
1961
1962
1963re_atsign = regex.compile('[@{}]')
1964re_newline = regex.compile('\n')
1965
1966def dumpit(buf, wm, pp):
1967
1968 global out
1969
1970 i, length = 0, len(pp)
1971
1972 addspace = 0
1973
1974 while 1:
1975 if len(pp) != length:
1976 raise 'FATAL', 'inconsistent length'
1977 if i == length:
1978 break
1979 ch = pp[i]
1980 i = i + 1
1981
1982 if addspace:
1983 dospace = 1
1984 addspace = 0
1985 else:
1986 dospace = 0
1987
1988 if ch.chtype == chunk_type(CSNAME):
1989 wm('@' + s(buf, ch.data))
1990 if s(buf, ch.data) == 'node' and \
1991 pp[i].chtype == chunk_type(PLAIN) and \
1992 s(buf, pp[i].data) in out.doublenodes:
1993 ##XXX doesnt work yet??
1994 wm(' ZZZ-' + zfill(`i`, 4))
1995 if s(buf, ch.data)[0] in string.letters:
1996 addspace = 1
1997 elif ch.chtype == chunk_type(PLAIN):
1998 if dospace and s(buf, ch.data) not in (' ', '\t'):
1999 wm(' ')
2000 text = s(buf, ch.data)
2001 while 1:
2002 pos = re_atsign.search(text)
2003 if pos < 0:
2004 break
2005 wm(text[:pos] + '@' + text[pos])
2006 text = text[pos+1:]
2007 wm(text)
2008 elif ch.chtype == chunk_type(GROUP):
2009 wm('{')
2010 dumpit(buf, wm, ch.data)
2011 wm('}')
2012 elif ch.chtype == chunk_type(DENDLINE):
2013 wm('\n\n')
2014 while i != length and pp[i].chtype in \
2015 (chunk_type(DENDLINE), chunk_type(ENDLINE)):
2016 i = i + 1
2017 elif ch.chtype == chunk_type(OTHER):
2018 wm(s(buf, ch.data))
2019 elif ch.chtype == chunk_type(ACTIVE):
2020 wm(s(buf, ch.data))
2021 elif ch.chtype == chunk_type(ENDLINE):
2022 wm('\n')
2023 elif ch.chtype == chunk_type(CSLINE):
2024 if i >= 2 and pp[i-2].chtype not in \
2025 (chunk_type(ENDLINE), chunk_type(DENDLINE)) \
2026 and (pp[i-2].chtype != chunk_type(PLAIN) \
2027 or s(buf, pp[i-2].data)[-1] != '\n'):
2028
2029 wm('\n')
2030 wm('@' + s(buf, ch.data))
2031 if i == length:
2032 raise error, 'CSLINE expected another chunk'
2033 if pp[i].chtype != chunk_type(GROUP):
2034 raise error, 'CSLINE expected GROUP'
2035 if type(pp[i].data) != type([]):
2036 raise error, 'GROUP chould contain []-data'
2037
2038 wobj = Wobj().init()
2039 dumpit(buf, wobj.write, pp[i].data)
2040 i = i + 1
2041 text = wobj.data
2042 del wobj
2043 if text:
2044 wm(' ')
2045 while 1:
2046 pos = re_newline.search(text)
2047 if pos < 0:
2048 break
2049 print 'WARNING: found newline in csline arg'
2050 wm(text[:pos] + ' ')
2051 text = text[pos+1:]
2052 wm(text)
2053 if i >= length or \
2054 pp[i].chtype not in (chunk_type(CSLINE), \
2055 chunk_type(ENDLINE), chunk_type(DENDLINE)) \
2056 and (pp[i].chtype != chunk_type(PLAIN) \
2057 or s(buf, pp[i].data)[0] != '\n'):
2058 wm('\n')
2059
2060 elif ch.chtype == chunk_type(COMMENT):
2061 print 'COMMENT: previous chunk =', pp[i-2]
2062 if pp[i-2].chtype == chunk_type(PLAIN):
2063 print 'PLAINTEXT =', `s(buf, pp[i-2].data)`
2064 if s(buf, ch.data) and \
2065 regex.match('^[ \t]*$', s(buf, ch.data)) < 0:
2066 if i >= 2 and pp[i-2].chtype not in \
2067 (chunk_type(ENDLINE), chunk_type(DENDLINE)) \
2068 and not (pp[i-2].chtype == chunk_type(PLAIN) \
2069 and regex.match('\\(.\\|\n\\)*[ \t]*\n$', s(buf, pp[i-2].data)) >= 0):
2070 print 'ADDING NEWLINE'
2071 wm('\n')
2072 wm('@c ' + s(buf, ch.data))
2073 elif ch.chtype == chunk_type(IGNORE):
2074 pass
2075 else:
2076 try:
2077 str = `s(buf, ch.data)`
2078 except TypeError:
2079 str = `ch.data`
2080 if len(str) > 400:
2081 str = str[:400] + '...'
2082 print 'warning:', ch.chtype, 'not handled, data ' + str
2083
2084
2085
2086from posix import popen
2087
2088def main():
2089
2090
2091 buf = open(sys.argv[1], 'r').read()
2092 restargs = sys.argv[2:]
2093
2094 w, pp = parseit(buf)
2095 startchange()
2096## try:
2097 while 1:
2098 changeit(buf, pp)
2099## pass
2100 break
2101
2102## finally:
2103 while 1:
2104 outf = open('@out.texi', 'w')
2105 preamble = open('texipre.dat', 'r')
2106 while 1:
2107 l = preamble.readline()
2108 if not l:
2109 preamble.close()
2110 break
2111 outf.write(l)
2112
2113 dumpit(buf, outf.write, pp)
2114
2115 while restargs:
2116 del buf, pp
2117 buf = open(restargs[0], 'r').read()
2118 del restargs[0]
2119 w, pp = parseit(buf)
2120 startchange()
2121 changeit(buf, pp)
2122 dumpit(buf, outf.write, pp)
2123
2124 postamble = open('texipost.dat', 'r')
2125 while 1:
2126 l = postamble.readline()
2127 if not l:
2128 postamble.close()
2129 break
2130 outf.write(l)
2131
2132 outf.close()
2133
2134## pass
2135 break
2136
2137