blob: feeb09346ed779086fd44f9e2edc442948166423 [file] [log] [blame]
Jean-Paul Calderone897bc252008-02-18 20:50:23 -05001#! /usr/bin/env python
2# -*- Python -*-
3"""usage: %(program)s [options...] file ...
4
5Options specifying formats to build:
6 --html HyperText Markup Language (default)
7 --pdf Portable Document Format
8 --ps PostScript
9 --dvi 'DeVice Indepentent' format from TeX
10 --text ASCII text (requires lynx)
11
12 More than one output format may be specified, or --all.
13
14HTML options:
15 --address, -a Specify an address for page footers.
16 --link Specify the number of levels to include on each page.
17 --split, -s Specify a section level for page splitting, default: %(max_split_depth)s.
18 --iconserver, -i Specify location of icons (default: ../).
19 --image-type Specify the image type to use in HTML output;
20 values: gif (default), png.
21 --numeric Don't rename the HTML files; just keep node#.html for
22 the filenames.
23 --style Specify the CSS file to use for the output (filename,
24 not a URL).
25 --up-link URL to a parent document.
26 --up-title Title of a parent document.
27
28Other options:
29 --a4 Format for A4 paper.
30 --letter Format for US letter paper (the default).
31 --help, -H Show this text.
32 --logging, -l Log stdout and stderr to a file (*.how).
33 --debugging, -D Echo commands as they are executed.
34 --keep, -k Keep temporary files around.
35 --quiet, -q Do not print command output to stdout.
36 (stderr is also lost, sorry; see *.how for errors)
37"""
38
39import getopt
40import glob
41import os
42import re
43import shutil
44import string
45import sys
46import tempfile
47
48
49MYDIR = os.path.abspath(sys.path[0])
50
51ISTFILE = os.path.join(MYDIR, "texinputs", "python.ist")
52NODE2LABEL_SCRIPT = os.path.join(MYDIR, "node2label.pl")
53L2H_INIT_FILE = os.path.join(MYDIR, "perl", "l2hinit.perl")
54
55BIBTEX_BINARY = "bibtex"
56DVIPS_BINARY = "dvips"
57LATEX_BINARY = "latex"
58LATEX2HTML_BINARY = "latex2html"
59LYNX_BINARY = "lynx"
60MAKEINDEX_BINARY = "makeindex"
61PDFLATEX_BINARY = "pdflatex"
62PERL_BINARY = "perl"
63PYTHON_BINARY = "python"
64
65
66def usage(options):
67 print __doc__ % options
68
69def error(options, message, err=2):
70 sys.stdout = sys.stderr
71 print message
72 print
73 usage(options)
74 sys.exit(2)
75
76
77class Options:
78 program = os.path.basename(sys.argv[0])
79 #
80 address = ''
81 builddir = None
82 debugging = 0
83 discard_temps = 1
84 have_temps = 0
85 icon_server = None
86 image_type = "gif"
87 logging = 0
88 max_link_depth = 3
89 max_split_depth = 6
90 paper = "letter"
91 quiet = 0
92 runs = 0
93 numeric = 0
94 global_module_index = None
95 style_file = os.path.join(MYDIR, "html", "style.css")
96 about_file = os.path.join(MYDIR, "html", "about.dat")
97 up_link = None
98 up_title = None
99 #
100 DEFAULT_FORMATS = ("html",)
101 ALL_FORMATS = ("dvi", "html", "pdf", "ps", "text")
102
103 def __init__(self):
104 self.formats = []
105 self.l2h_init_files = []
106
107 def __getitem__(self, key):
108 # This is used when formatting the usage message.
109 try:
110 return getattr(self, key)
111 except AttributeError:
112 raise KeyError, key
113
114 def parse(self, args):
115 opts, args = getopt.getopt(args, "Hi:a:s:lDkqr:",
116 ["all", "postscript", "help", "iconserver=",
117 "address=", "a4", "letter", "l2h-init=",
118 "link=", "split=", "logging", "debugging",
119 "keep", "quiet", "runs=", "image-type=",
120 "about=", "numeric", "style=", "paper=",
121 "up-link=", "up-title=", "dir=",
122 "global-module-index="]
123 + list(self.ALL_FORMATS))
124 for opt, arg in opts:
125 if opt == "--all":
126 self.formats = list(self.ALL_FORMATS)
127 elif opt in ("-H", "--help"):
128 usage(self)
129 sys.exit()
130 elif opt == "--iconserver":
131 self.icon_server = arg
132 elif opt in ("-a", "--address"):
133 self.address = arg
134 elif opt == "--a4":
135 self.paper = "a4"
136 elif opt == "--letter":
137 self.paper = "letter"
138 elif opt == "--link":
139 self.max_link_depth = int(arg)
140 elif opt in ("-s", "--split"):
141 self.max_split_depth = int(arg)
142 elif opt in ("-l", "--logging"):
143 self.logging = self.logging + 1
144 elif opt in ("-D", "--debugging"):
145 self.debugging = self.debugging + 1
146 elif opt in ("-k", "--keep"):
147 self.discard_temps = 0
148 elif opt in ("-q", "--quiet"):
149 self.quiet = 1
150 elif opt in ("-r", "--runs"):
151 self.runs = int(arg)
152 elif opt == "--image-type":
153 self.image_type = arg
154 elif opt == "--about":
155 # always make this absolute:
156 self.about_file = os.path.normpath(
157 os.path.abspath(arg))
158 elif opt == "--numeric":
159 self.numeric = 1
160 elif opt == "--style":
161 self.style_file = os.path.abspath(arg)
162 elif opt == "--l2h-init":
163 self.l2h_init_files.append(os.path.abspath(arg))
164 elif opt == "--up-link":
165 self.up_link = arg
166 elif opt == "--up-title":
167 self.up_title = arg
168 elif opt == "--global-module-index":
169 self.global_module_index = arg
170 elif opt == "--dir":
171 self.builddir = arg
172 elif opt == "--paper":
173 self.paper = arg
174 #
175 # Format specifiers:
176 #
177 elif opt[2:] in self.ALL_FORMATS:
178 self.add_format(opt[2:])
179 elif opt == "--postscript":
180 # synonym for --ps
181 self.add_format("ps")
182 self.initialize()
183 #
184 # return the args to allow the caller access:
185 #
186 return args
187
188 def add_format(self, format):
189 """Add a format to the formats list if not present."""
190 if not format in self.formats:
191 self.formats.append(format)
192
193 def initialize(self):
194 """Complete initialization. This is needed if parse() isn't used."""
195 # add the default format if no formats were specified:
196 if not self.formats:
197 self.formats = self.DEFAULT_FORMATS
198 # determine the base set of texinputs directories:
199 texinputs = string.split(os.environ.get("TEXINPUTS", ""), os.pathsep)
200 if not texinputs:
201 texinputs = ['']
202 self.base_texinputs = [
203 os.path.join(MYDIR, "paper-" + self.paper),
204 os.path.join(MYDIR, "texinputs"),
205 ] + texinputs
206
207
208class Job:
209 latex_runs = 0
210
211 def __init__(self, options, path):
212 self.options = options
213 self.doctype = get_doctype(path)
214 self.filedir, self.doc = split_pathname(path)
215 self.log_filename = self.doc + ".how"
216 if os.path.exists(self.log_filename):
217 os.unlink(self.log_filename)
218 if os.path.exists(self.doc + ".l2h"):
219 self.l2h_aux_init_file = tempfile.mktemp()
220 else:
221 self.l2h_aux_init_file = self.doc + ".l2h"
222 self.write_l2h_aux_init_file()
223
224 def build(self):
225 self.setup_texinputs()
226 formats = self.options.formats
227 if "dvi" in formats or "ps" in formats:
228 self.build_dvi()
229 if "pdf" in formats:
230 self.build_pdf()
231 if "ps" in formats:
232 self.build_ps()
233 if "html" in formats:
234 self.require_temps()
235 self.build_html(self.options.builddir or self.doc)
236 if self.options.icon_server == ".":
237 pattern = os.path.join(MYDIR, "html", "icons",
238 "*." + self.options.image_type)
239 imgs = glob.glob(pattern)
240 if not imgs:
241 self.warning(
242 "Could not locate support images of type %s."
243 % `self.options.image_type`)
244 for fn in imgs:
245 new_fn = os.path.join(self.doc, os.path.basename(fn))
246 shutil.copyfile(fn, new_fn)
247 if "text" in formats:
248 self.require_temps()
249 tempdir = self.doc
250 need_html = "html" not in formats
251 if self.options.max_split_depth != 1:
252 fp = open(self.l2h_aux_init_file, "a")
253 fp.write("# re-hack this file for --text:\n")
254 l2hoption(fp, "MAX_SPLIT_DEPTH", "1")
255 fp.write("1;\n")
256 fp.close()
257 tempdir = self.doc + "-temp-html"
258 need_html = 1
259 if need_html:
260 self.build_html(tempdir, max_split_depth=1)
261 self.build_text(tempdir)
262 if self.options.discard_temps:
263 self.cleanup()
264
265 def setup_texinputs(self):
266 texinputs = [self.filedir] + list(self.options.base_texinputs)
267 os.environ["TEXINPUTS"] = string.join(['.']+texinputs, os.pathsep)
268 self.message("TEXINPUTS=" + os.environ["TEXINPUTS"])
269
270 def build_aux(self, binary=None):
271 if binary is None:
272 binary = LATEX_BINARY
273 new_index( "%s.ind" % self.doc, "genindex")
274 new_index("mod%s.ind" % self.doc, "modindex")
275 self.run("%s %s" % (binary, self.doc))
276 self.use_bibtex = check_for_bibtex(self.doc + ".aux")
277 self.latex_runs = 1
278
279 def build_dvi(self):
280 self.use_latex(LATEX_BINARY)
281
282 def build_pdf(self):
283 self.use_latex(PDFLATEX_BINARY)
284
285 def use_latex(self, binary):
286 self.require_temps(binary=binary)
287 if self.latex_runs < 2:
288 if os.path.isfile("mod%s.idx" % self.doc):
289 self.run("%s mod%s.idx" % (MAKEINDEX_BINARY, self.doc))
290 use_indfix = 0
291 if os.path.isfile(self.doc + ".idx"):
292 use_indfix = 1
293 # call to Doc/tools/fix_hack omitted; doesn't appear necessary
294 self.run("%s %s.idx" % (MAKEINDEX_BINARY, self.doc))
295 import indfix
296 indfix.process(self.doc + ".ind")
297 if self.use_bibtex:
298 self.run("%s %s" % (BIBTEX_BINARY, self.doc))
299 self.process_synopsis_files()
300 #
301 # let the doctype-specific handler do some intermediate work:
302 #
303 self.run("%s %s" % (binary, self.doc))
304 self.latex_runs = self.latex_runs + 1
305 if os.path.isfile("mod%s.idx" % self.doc):
306 self.run("%s -s %s mod%s.idx"
307 % (MAKEINDEX_BINARY, ISTFILE, self.doc))
308 if use_indfix:
309 self.run("%s -s %s %s.idx"
310 % (MAKEINDEX_BINARY, ISTFILE, self.doc))
311 indfix.process(self.doc + ".ind")
312 self.process_synopsis_files()
313 #
314 # and now finish it off:
315 #
316 if os.path.isfile(self.doc + ".toc") and binary == PDFLATEX_BINARY:
317 import toc2bkm
318 if self.doctype == "manual":
319 bigpart = "chapter"
320 else:
321 bigpart = "section"
322 toc2bkm.process(self.doc + ".toc", self.doc + ".bkm", bigpart)
323 if self.use_bibtex:
324 self.run("%s %s" % (BIBTEX_BINARY, self.doc))
325 self.run("%s %s" % (binary, self.doc))
326 self.latex_runs = self.latex_runs + 1
327
328 def process_synopsis_files(self):
329 synopsis_files = glob.glob(self.doc + "*.syn")
330 for path in synopsis_files:
331 uniqify_module_table(path)
332
333 def build_ps(self):
334 self.run("%s -N0 -o %s.ps %s" % (DVIPS_BINARY, self.doc, self.doc))
335
336 def build_html(self, builddir=None, max_split_depth=None):
337 if builddir is None:
338 builddir = self.doc
339 if max_split_depth is None:
340 max_split_depth = self.options.max_split_depth
341 texfile = None
342 for p in string.split(os.environ["TEXINPUTS"], os.pathsep):
343 fn = os.path.join(p, self.doc + ".tex")
344 if os.path.isfile(fn):
345 texfile = fn
346 break
347 if not texfile:
348 self.warning("Could not locate %s.tex; aborting." % self.doc)
349 sys.exit(1)
350 # remove leading ./ (or equiv.); might avoid problems w/ dvips
351 if texfile[:2] == os.curdir + os.sep:
352 texfile = texfile[2:]
353 # build the command line and run LaTeX2HTML:
354 if not os.path.isdir(builddir):
355 os.mkdir(builddir)
356 else:
357 for fname in glob.glob(os.path.join(builddir, "*.html")):
358 os.unlink(fname)
359 args = [LATEX2HTML_BINARY,
360 "-init_file", self.l2h_aux_init_file,
361 "-dir", builddir,
362 texfile
363 ]
364 self.run(string.join(args)) # XXX need quoting!
365 # ... postprocess
366 shutil.copyfile(self.options.style_file,
367 os.path.join(builddir, self.doc + ".css"))
368 shutil.copyfile(os.path.join(builddir, self.doc + ".html"),
369 os.path.join(builddir, "index.html"))
370 if max_split_depth != 1:
371 if self.options.numeric:
372 label_file = os.path.join(builddir, "labels.pl")
373 fp = open(label_file)
374 about_node = None
375 target = " = q/about/;\n"
376 x = len(target)
377 while 1:
378 line = fp.readline()
379 if not line:
380 break
381 if line[-x:] == target:
382 line = fp.readline()
383 m = re.search(r"\|(node\d+\.[a-z]+)\|", line)
384 about_node = m.group(1)
385 shutil.copyfile(os.path.join(builddir, about_node),
386 os.path.join(builddir, "about.html"))
387 break
388 else:
389 pwd = os.getcwd()
390 try:
391 os.chdir(builddir)
392 self.run("%s %s *.html" % (PERL_BINARY, NODE2LABEL_SCRIPT))
393 finally:
394 os.chdir(pwd)
395
396 def build_text(self, tempdir=None):
397 if tempdir is None:
398 tempdir = self.doc
399 indexfile = os.path.join(tempdir, "index.html")
400 self.run("%s -nolist -dump %s >%s.txt"
401 % (LYNX_BINARY, indexfile, self.doc))
402
403 def require_temps(self, binary=None):
404 if not self.latex_runs:
405 self.build_aux(binary=binary)
406
407 def write_l2h_aux_init_file(self):
408 options = self.options
409 fp = open(self.l2h_aux_init_file, "w")
410 d = string_to_perl(os.path.dirname(L2H_INIT_FILE))
411 fp.write("package main;\n"
412 "push (@INC, '%s');\n"
413 "$mydir = '%s';\n"
414 % (d, d))
415 fp.write(open(L2H_INIT_FILE).read())
416 for filename in options.l2h_init_files:
417 fp.write("\n# initialization code incorporated from:\n# ")
418 fp.write(filename)
419 fp.write("\n")
420 fp.write(open(filename).read())
421 fp.write("\n"
422 "# auxillary init file for latex2html\n"
423 "# generated by mkhowto\n"
424 "$NO_AUTO_LINK = 1;\n"
425 )
426 l2hoption(fp, "ABOUT_FILE", options.about_file)
427 l2hoption(fp, "ICONSERVER", options.icon_server)
428 l2hoption(fp, "IMAGE_TYPE", options.image_type)
429 l2hoption(fp, "ADDRESS", options.address)
430 l2hoption(fp, "MAX_LINK_DEPTH", options.max_link_depth)
431 l2hoption(fp, "MAX_SPLIT_DEPTH", options.max_split_depth)
432 l2hoption(fp, "EXTERNAL_UP_LINK", options.up_link)
433 l2hoption(fp, "EXTERNAL_UP_TITLE", options.up_title)
434 l2hoption(fp, "GLOBAL_MODULE_INDEX", options.global_module_index)
435 fp.write("1;\n")
436 fp.close()
437
438 def cleanup(self):
439 self.__have_temps = 0
440 for pattern in ("%s.aux", "%s.log", "%s.out", "%s.toc", "%s.bkm",
441 "%s.idx", "%s.ilg", "%s.ind", "%s.pla",
442 "%s.bbl", "%s.blg",
443 "mod%s.idx", "mod%s.ind", "mod%s.ilg",
444 ):
445 safe_unlink(pattern % self.doc)
446 map(safe_unlink, glob.glob(self.doc + "*.syn"))
447 for spec in ("IMG*", "*.pl", "WARNINGS", "index.dat", "modindex.dat"):
448 pattern = os.path.join(self.doc, spec)
449 map(safe_unlink, glob.glob(pattern))
450 if "dvi" not in self.options.formats:
451 safe_unlink(self.doc + ".dvi")
452 if os.path.isdir(self.doc + "-temp-html"):
453 shutil.rmtree(self.doc + "-temp-html", ignore_errors=1)
454 if not self.options.logging:
455 os.unlink(self.log_filename)
456 if not self.options.debugging:
457 os.unlink(self.l2h_aux_init_file)
458
459 def run(self, command):
460 self.message(command)
461 rc = os.system("(%s) </dev/null >>%s 2>&1"
462 % (command, self.log_filename))
463 if rc:
464 self.warning(
465 "Session transcript and error messages are in %s."
466 % self.log_filename)
467 sys.stderr.write("The relevant lines from the transcript are:\n")
468 sys.stderr.write("-" * 72 + "\n")
469 sys.stderr.writelines(get_run_transcript(self.log_filename))
470 sys.exit(rc)
471
472 def message(self, msg):
473 msg = "+++ " + msg
474 if not self.options.quiet:
475 print msg
476 self.log(msg + "\n")
477
478 def warning(self, msg):
479 msg = "*** %s\n" % msg
480 sys.stderr.write(msg)
481 self.log(msg)
482
483 def log(self, msg):
484 fp = open(self.log_filename, "a")
485 fp.write(msg)
486 fp.close()
487
488
489def get_run_transcript(filename):
490 """Return lines from the transcript file for the most recent run() call."""
491 fp = open(filename)
492 lines = fp.readlines()
493 fp.close()
494 lines.reverse()
495 L = []
496 for line in lines:
497 L.append(line)
498 if line[:4] == "+++ ":
499 break
500 L.reverse()
501 return L
502
503
504def safe_unlink(path):
505 """Unlink a file without raising an error if it doesn't exist."""
506 try:
507 os.unlink(path)
508 except os.error:
509 pass
510
511
512def split_pathname(path):
513 path = os.path.normpath(os.path.join(os.getcwd(), path))
514 dirname, basename = os.path.split(path)
515 if basename[-4:] == ".tex":
516 basename = basename[:-4]
517 return dirname, basename
518
519
520_doctype_rx = re.compile(r"\\documentclass(?:\[[^]]*\])?{([a-zA-Z]*)}")
521def get_doctype(path):
522 fp = open(path)
523 doctype = None
524 while 1:
525 line = fp.readline()
526 if not line:
527 break
528 m = _doctype_rx.match(line)
529 if m:
530 doctype = m.group(1)
531 break
532 fp.close()
533 return doctype
534
535
536def main():
537 options = Options()
538 try:
539 args = options.parse(sys.argv[1:])
540 except getopt.error, msg:
541 error(options, msg)
542 if not args:
543 # attempt to locate single .tex file in current directory:
544 args = glob.glob("*.tex")
545 if not args:
546 error(options, "No file to process.")
547 if len(args) > 1:
548 error(options, "Could not deduce which files should be processed.")
549 #
550 # parameters are processed, let's go!
551 #
552 for path in args:
553 Job(options, path).build()
554
555
556def l2hoption(fp, option, value):
557 if value:
558 fp.write('$%s = "%s";\n' % (option, string_to_perl(str(value))))
559
560
561_to_perl = {}
562for c in map(chr, range(1, 256)):
563 _to_perl[c] = c
564_to_perl["@"] = "\\@"
565_to_perl["$"] = "\\$"
566_to_perl['"'] = '\\"'
567
568def string_to_perl(s):
569 return string.join(map(_to_perl.get, s), '')
570
571
572def check_for_bibtex(filename):
573 fp = open(filename)
574 pos = string.find(fp.read(), r"\bibdata{")
575 fp.close()
576 return pos >= 0
577
578def uniqify_module_table(filename):
579 lines = open(filename).readlines()
580 if len(lines) > 1:
581 if lines[-1] == lines[-2]:
582 del lines[-1]
583 open(filename, "w").writelines(lines)
584
585
586def new_index(filename, label="genindex"):
587 fp = open(filename, "w")
588 fp.write(r"""\
589\begin{theindex}
590\label{%s}
591\end{theindex}
592""" % label)
593 fp.close()
594
595
596if __name__ == "__main__":
597 main()