blob: 636459a3154b004b73b1477d8d64d3103f869c13 [file] [log] [blame]
Fred Drake361ee651998-03-06 21:29:00 +00001#! /usr/bin/env python
2
3"""Convert a LaTeX .toc file to some PDFTeX magic to create that neat outline.
4
5The output file has an extension of '.bkm' instead of '.out', since hyperref
Fred Drake8c1e1141998-10-07 14:12:20 +00006already uses that extension.
Fred Drake361ee651998-03-06 21:29:00 +00007"""
8
Fred Drake473a90e1998-03-07 15:34:50 +00009import getopt
Fred Drake361ee651998-03-06 21:29:00 +000010import os
11import re
12import string
13import sys
14
15
16# Ench item in an entry is a tuple of:
17#
18# Section #, Title String, Page #, List of Sub-entries
Fred Drake25d4e281998-05-14 20:07:10 +000019#
20# The return value of parse_toc() is such a tuple.
Fred Drake361ee651998-03-06 21:29:00 +000021
22cline_re = r"""^
23\\contentsline\ \{([a-z]*)} # type of section in $1
24\{(?:\\numberline\ \{([0-9.A-Z]+)})? # section number
25(.*)} # title string
Fred Drake77878412000-10-07 12:50:05 +000026\{(\d+)}$""" # page number
Fred Drake361ee651998-03-06 21:29:00 +000027
28cline_rx = re.compile(cline_re, re.VERBOSE)
29
30OUTER_TO_INNER = -1
31
32_transition_map = {
33 ('chapter', 'section'): OUTER_TO_INNER,
34 ('section', 'subsection'): OUTER_TO_INNER,
35 ('subsection', 'subsubsection'): OUTER_TO_INNER,
36 ('subsubsection', 'subsection'): 1,
37 ('subsection', 'section'): 1,
38 ('section', 'chapter'): 1,
39 ('subsection', 'chapter'): 2,
40 ('subsubsection', 'section'): 2,
41 ('subsubsection', 'chapter'): 3,
42 }
43
Fred Drake8c1e1141998-10-07 14:12:20 +000044INCLUDED_LEVELS = ("chapter", "section", "subsection", "subsubsection")
45
46
Fred Drake473a90e1998-03-07 15:34:50 +000047def parse_toc(fp, bigpart=None):
Fred Drake361ee651998-03-06 21:29:00 +000048 toc = top = []
49 stack = [toc]
Fred Drake473a90e1998-03-07 15:34:50 +000050 level = bigpart or 'chapter'
Fred Drake361ee651998-03-06 21:29:00 +000051 lineno = 0
52 while 1:
Fred Drake77878412000-10-07 12:50:05 +000053 line = fp.readline()
54 if not line:
55 break
56 lineno = lineno + 1
57 m = cline_rx.match(line)
58 if m:
59 stype, snum, title, pageno = m.group(1, 2, 3, 4)
60 title = clean_title(title)
Fred Drake071972e2002-10-16 15:30:17 +000061 entry = (stype, snum, title, int(pageno), [])
Fred Drake77878412000-10-07 12:50:05 +000062 if stype == level:
63 toc.append(entry)
64 else:
Fred Drake8c1e1141998-10-07 14:12:20 +000065 if stype not in INCLUDED_LEVELS:
66 # we don't want paragraphs & subparagraphs
67 continue
Fred Drake77878412000-10-07 12:50:05 +000068 direction = _transition_map[(level, stype)]
69 if direction == OUTER_TO_INNER:
70 toc = toc[-1][-1]
71 stack.insert(0, toc)
72 toc.append(entry)
73 else:
74 for i in range(direction):
75 del stack[0]
76 toc = stack[0]
77 toc.append(entry)
78 level = stype
79 else:
80 sys.stderr.write("l.%s: " + line)
Fred Drake361ee651998-03-06 21:29:00 +000081 return top
82
83
Fred Drakeac77b791998-03-10 14:02:35 +000084hackscore_rx = re.compile(r"\\hackscore\s*{[^}]*}")
85raisebox_rx = re.compile(r"\\raisebox\s*{[^}]*}")
86title_rx = re.compile(r"\\([a-zA-Z])+\s+")
Fred Drake361ee651998-03-06 21:29:00 +000087title_trans = string.maketrans("", "")
88
89def clean_title(title):
Fred Drakeac77b791998-03-10 14:02:35 +000090 title = raisebox_rx.sub("", title)
91 title = hackscore_rx.sub(r"\\_", title)
92 pos = 0
Fred Drake361ee651998-03-06 21:29:00 +000093 while 1:
Fred Drake77878412000-10-07 12:50:05 +000094 m = title_rx.search(title, pos)
95 if m:
96 start = m.start()
97 if title[start:start+15] != "\\textunderscore":
98 title = title[:start] + title[m.end():]
99 pos = start + 1
100 else:
101 break
Fred Drake071972e2002-10-16 15:30:17 +0000102 title = title.translate(title_trans, "{}")
Fred Drakeac77b791998-03-10 14:02:35 +0000103 return title
Fred Drake361ee651998-03-06 21:29:00 +0000104
105
106def write_toc(toc, fp):
107 for entry in toc:
Fred Drake77878412000-10-07 12:50:05 +0000108 write_toc_entry(entry, fp, 0)
Fred Drake361ee651998-03-06 21:29:00 +0000109
110def write_toc_entry(entry, fp, layer):
111 stype, snum, title, pageno, toc = entry
Fred Drakebf88c381998-04-15 17:50:22 +0000112 s = "\\pdfoutline goto name{page%03d}" % pageno
Fred Drake361ee651998-03-06 21:29:00 +0000113 if toc:
Fred Drake77878412000-10-07 12:50:05 +0000114 s = "%s count -%d" % (s, len(toc))
Fred Drake361ee651998-03-06 21:29:00 +0000115 if snum:
Fred Drake77878412000-10-07 12:50:05 +0000116 title = "%s %s" % (snum, title)
Fred Drake361ee651998-03-06 21:29:00 +0000117 s = "%s {%s}\n" % (s, title)
118 fp.write(s)
119 for entry in toc:
Fred Drake77878412000-10-07 12:50:05 +0000120 write_toc_entry(entry, fp, layer + 1)
Fred Drake361ee651998-03-06 21:29:00 +0000121
122
Fred Drake7c8754f1999-03-03 19:25:56 +0000123def process(ifn, ofn, bigpart=None):
124 toc = parse_toc(open(ifn), bigpart)
125 write_toc(toc, open(ofn, "w"))
126
127
Fred Drake361ee651998-03-06 21:29:00 +0000128def main():
Fred Drake473a90e1998-03-07 15:34:50 +0000129 bigpart = None
130 opts, args = getopt.getopt(sys.argv[1:], "c:")
131 if opts:
Fred Drake77878412000-10-07 12:50:05 +0000132 bigpart = opts[0][1]
Fred Drake473a90e1998-03-07 15:34:50 +0000133 if not args:
Fred Drake77878412000-10-07 12:50:05 +0000134 usage()
135 sys.exit(2)
Fred Drake473a90e1998-03-07 15:34:50 +0000136 for filename in args:
Fred Drake77878412000-10-07 12:50:05 +0000137 base, ext = os.path.splitext(filename)
138 ext = ext or ".toc"
Fred Drake7c8754f1999-03-03 19:25:56 +0000139 process(base + ext, base + ".bkm", bigpart)
Fred Drake361ee651998-03-06 21:29:00 +0000140
141
142if __name__ == "__main__":
143 main()