blob: ab669ba95955a13a2b13da86f2e835185776bd02 [file] [log] [blame]
Fred Drake361ee651998-03-06 21:29:00 +00001#! /usr/bin/env python
2
3"""Convert a LaTeX .toc file to some PDFTeX magic to create that neat outline.
4
5The output file has an extension of '.bkm' instead of '.out', since hyperref
Fred Drake8c1e1141998-10-07 14:12:20 +00006already uses that extension.
Fred Drake361ee651998-03-06 21:29:00 +00007"""
8
Fred Drake473a90e1998-03-07 15:34:50 +00009import getopt
Fred Drake361ee651998-03-06 21:29:00 +000010import os
11import re
12import string
13import sys
14
15
16# Ench item in an entry is a tuple of:
17#
18# Section #, Title String, Page #, List of Sub-entries
Fred Drake25d4e281998-05-14 20:07:10 +000019#
20# The return value of parse_toc() is such a tuple.
Fred Drake361ee651998-03-06 21:29:00 +000021
22cline_re = r"""^
23\\contentsline\ \{([a-z]*)} # type of section in $1
24\{(?:\\numberline\ \{([0-9.A-Z]+)})? # section number
25(.*)} # title string
Fred Drake77878412000-10-07 12:50:05 +000026\{(\d+)}$""" # page number
Fred Drake361ee651998-03-06 21:29:00 +000027
28cline_rx = re.compile(cline_re, re.VERBOSE)
29
30OUTER_TO_INNER = -1
31
32_transition_map = {
33 ('chapter', 'section'): OUTER_TO_INNER,
34 ('section', 'subsection'): OUTER_TO_INNER,
35 ('subsection', 'subsubsection'): OUTER_TO_INNER,
36 ('subsubsection', 'subsection'): 1,
37 ('subsection', 'section'): 1,
38 ('section', 'chapter'): 1,
39 ('subsection', 'chapter'): 2,
40 ('subsubsection', 'section'): 2,
41 ('subsubsection', 'chapter'): 3,
42 }
43
Fred Drake8c1e1141998-10-07 14:12:20 +000044INCLUDED_LEVELS = ("chapter", "section", "subsection", "subsubsection")
45
46
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000047class BadSectionNesting(Exception):
48 """Raised for unsupported section level transitions."""
49
50 def __init__(self, level, newsection, path, lineno):
51 self.level = level
52 self.newsection = newsection
53 self.path = path
54 self.lineno = lineno
55
56 def __str__(self):
57 return ("illegal transition from %s to %s at %s (line %s)"
58 % (self.level, self.newsection, self.path, self.lineno))
59
60
Fred Drake473a90e1998-03-07 15:34:50 +000061def parse_toc(fp, bigpart=None):
Fred Drake361ee651998-03-06 21:29:00 +000062 toc = top = []
63 stack = [toc]
Fred Drake473a90e1998-03-07 15:34:50 +000064 level = bigpart or 'chapter'
Fred Drake361ee651998-03-06 21:29:00 +000065 lineno = 0
66 while 1:
Fred Drake77878412000-10-07 12:50:05 +000067 line = fp.readline()
68 if not line:
69 break
70 lineno = lineno + 1
71 m = cline_rx.match(line)
72 if m:
73 stype, snum, title, pageno = m.group(1, 2, 3, 4)
74 title = clean_title(title)
Fred Drake071972e2002-10-16 15:30:17 +000075 entry = (stype, snum, title, int(pageno), [])
Fred Drake77878412000-10-07 12:50:05 +000076 if stype == level:
77 toc.append(entry)
78 else:
Fred Drake8c1e1141998-10-07 14:12:20 +000079 if stype not in INCLUDED_LEVELS:
80 # we don't want paragraphs & subparagraphs
81 continue
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000082 try:
83 direction = _transition_map[(level, stype)]
84 except KeyError:
85 raise BadSectionNesting(level, stype, fp.name, lineno)
Fred Drake77878412000-10-07 12:50:05 +000086 if direction == OUTER_TO_INNER:
87 toc = toc[-1][-1]
88 stack.insert(0, toc)
89 toc.append(entry)
90 else:
91 for i in range(direction):
92 del stack[0]
93 toc = stack[0]
94 toc.append(entry)
95 level = stype
96 else:
97 sys.stderr.write("l.%s: " + line)
Fred Drake361ee651998-03-06 21:29:00 +000098 return top
99
100
Fred Drakeac77b791998-03-10 14:02:35 +0000101hackscore_rx = re.compile(r"\\hackscore\s*{[^}]*}")
102raisebox_rx = re.compile(r"\\raisebox\s*{[^}]*}")
103title_rx = re.compile(r"\\([a-zA-Z])+\s+")
Fred Drake361ee651998-03-06 21:29:00 +0000104title_trans = string.maketrans("", "")
105
106def clean_title(title):
Fred Drakeac77b791998-03-10 14:02:35 +0000107 title = raisebox_rx.sub("", title)
108 title = hackscore_rx.sub(r"\\_", title)
109 pos = 0
Fred Drake361ee651998-03-06 21:29:00 +0000110 while 1:
Fred Drake77878412000-10-07 12:50:05 +0000111 m = title_rx.search(title, pos)
112 if m:
113 start = m.start()
114 if title[start:start+15] != "\\textunderscore":
115 title = title[:start] + title[m.end():]
116 pos = start + 1
117 else:
118 break
Fred Drake071972e2002-10-16 15:30:17 +0000119 title = title.translate(title_trans, "{}")
Fred Drakeac77b791998-03-10 14:02:35 +0000120 return title
Fred Drake361ee651998-03-06 21:29:00 +0000121
122
123def write_toc(toc, fp):
124 for entry in toc:
Fred Drake77878412000-10-07 12:50:05 +0000125 write_toc_entry(entry, fp, 0)
Fred Drake361ee651998-03-06 21:29:00 +0000126
127def write_toc_entry(entry, fp, layer):
128 stype, snum, title, pageno, toc = entry
Fred Drakebf88c381998-04-15 17:50:22 +0000129 s = "\\pdfoutline goto name{page%03d}" % pageno
Fred Drake361ee651998-03-06 21:29:00 +0000130 if toc:
Fred Drake77878412000-10-07 12:50:05 +0000131 s = "%s count -%d" % (s, len(toc))
Fred Drake361ee651998-03-06 21:29:00 +0000132 if snum:
Fred Drake77878412000-10-07 12:50:05 +0000133 title = "%s %s" % (snum, title)
Fred Drake361ee651998-03-06 21:29:00 +0000134 s = "%s {%s}\n" % (s, title)
135 fp.write(s)
136 for entry in toc:
Fred Drake77878412000-10-07 12:50:05 +0000137 write_toc_entry(entry, fp, layer + 1)
Fred Drake361ee651998-03-06 21:29:00 +0000138
139
Fred Drake7c8754f1999-03-03 19:25:56 +0000140def process(ifn, ofn, bigpart=None):
141 toc = parse_toc(open(ifn), bigpart)
142 write_toc(toc, open(ofn, "w"))
143
144
Fred Drake361ee651998-03-06 21:29:00 +0000145def main():
Fred Drake473a90e1998-03-07 15:34:50 +0000146 bigpart = None
147 opts, args = getopt.getopt(sys.argv[1:], "c:")
148 if opts:
Fred Drake77878412000-10-07 12:50:05 +0000149 bigpart = opts[0][1]
Fred Drake473a90e1998-03-07 15:34:50 +0000150 if not args:
Fred Drake77878412000-10-07 12:50:05 +0000151 usage()
152 sys.exit(2)
Fred Drake473a90e1998-03-07 15:34:50 +0000153 for filename in args:
Fred Drake77878412000-10-07 12:50:05 +0000154 base, ext = os.path.splitext(filename)
155 ext = ext or ".toc"
Fred Drake7c8754f1999-03-03 19:25:56 +0000156 process(base + ext, base + ".bkm", bigpart)
Fred Drake361ee651998-03-06 21:29:00 +0000157
158
159if __name__ == "__main__":
160 main()