| #! /usr/bin/env python |
| # -*- Python -*- |
| """usage: %(program)s [options...] file ... |
| |
| Options specifying formats to build: |
| --html HyperText Markup Language (default) |
| --pdf Portable Document Format |
| --ps PostScript |
| --dvi 'DeVice Indepentent' format from TeX |
| --text ASCII text (requires lynx) |
| |
| More than one output format may be specified, or --all. |
| |
| HTML options: |
| --address, -a Specify an address for page footers. |
| --dir Specify the directory for HTML output. |
| --link Specify the number of levels to include on each page. |
| --split, -s Specify a section level for page splitting, default: %(max_split_depth)s. |
| --iconserver, -i Specify location of icons (default: ./). |
| --image-type Specify the image type to use in HTML output; |
| values: gif, png (default). |
| --numeric Don't rename the HTML files; just keep node#.html for |
| the filenames. |
| --style Specify the CSS file to use for the output (filename, |
| not a URL). |
| --up-link URL to a parent document. |
| --up-title Title of a parent document. |
| --favicon Icon to display in the browsers location bar. |
| |
| Other options: |
| --a4 Format for A4 paper. |
| --letter Format for US letter paper (the default). |
| --help, -H Show this text. |
| --logging, -l Log stdout and stderr to a file (*.how). |
| --debugging, -D Echo commands as they are executed. |
| --keep, -k Keep temporary files around. |
| --quiet, -q Do not print command output to stdout. |
| (stderr is also lost, sorry; see *.how for errors) |
| """ |
| |
| import getopt |
| import glob |
| import os |
| import re |
| import shutil |
| import sys |
| |
| |
| MYDIR = os.path.abspath(sys.path[0]) |
| TOPDIR = os.path.dirname(MYDIR) |
| |
| ISTFILE = os.path.join(TOPDIR, "texinputs", "python.ist") |
| NODE2LABEL_SCRIPT = os.path.join(MYDIR, "node2label.pl") |
| L2H_INIT_FILE = os.path.join(TOPDIR, "perl", "l2hinit.perl") |
| |
| BIBTEX_BINARY = "bibtex" |
| DVIPS_BINARY = "dvips" |
| LATEX_BINARY = "latex" |
| LATEX2HTML_BINARY = "latex2html" |
| LYNX_BINARY = "lynx" |
| MAKEINDEX_BINARY = "makeindex" |
| PDFLATEX_BINARY = "pdflatex" |
| PERL_BINARY = "perl" |
| PYTHON_BINARY = "python" |
| |
| |
| def usage(options, file): |
| print >>file, __doc__ % options |
| |
| def error(options, message, err=2): |
| print >>sys.stderr, message |
| print >>sys.stderr |
| usage(options, sys.stderr) |
| sys.exit(2) |
| |
| |
| class Options: |
| program = os.path.basename(sys.argv[0]) |
| # |
| address = '' |
| builddir = None |
| debugging = 0 |
| discard_temps = 1 |
| have_temps = 0 |
| icon_server = "." |
| image_type = "png" |
| logging = 0 |
| max_link_depth = 3 |
| max_split_depth = 6 |
| paper = "letter" |
| quiet = 0 |
| runs = 0 |
| numeric = 0 |
| global_module_index = None |
| style_file = os.path.join(TOPDIR, "html", "style.css") |
| about_file = os.path.join(TOPDIR, "html", "about.dat") |
| up_link = None |
| up_title = None |
| favicon = None |
| # |
| # 'dvips_safe' is a weird option. It is used mostly to make |
| # LaTeX2HTML not try to be too smart about protecting the user |
| # from a bad version of dvips -- some versions would core dump if |
| # the path to the source DVI contained a dot, and it's appearantly |
| # difficult to determine if the version available has that bug. |
| # This option gets set when PostScript output is requested |
| # (because we're going to run dvips regardless, and we'll either |
| # know it succeeds before LaTeX2HTML is run, or we'll have |
| # detected the failure and bailed), or the user asserts that it's |
| # safe from the command line. |
| # |
| # So, why does LaTeX2HTML think it appropriate to protect the user |
| # from a dvips that's only potentially going to core dump? Only |
| # because they want to avoid doing a lot of work just to have to |
| # bail later with no useful intermediates. Unfortunately, they |
| # bail *before* they know whether dvips will be needed at all. |
| # I've gone around the bush a few times with the LaTeX2HTML |
| # developers over whether this is appropriate behavior, and they |
| # don't seem interested in changing their position. |
| # |
| dvips_safe = 0 |
| # |
| DEFAULT_FORMATS = ("html",) |
| ALL_FORMATS = ("dvi", "html", "pdf", "ps", "text") |
| |
| def __init__(self): |
| self.formats = [] |
| self.l2h_init_files = [] |
| |
| def __getitem__(self, key): |
| # This is used when formatting the usage message. |
| try: |
| return getattr(self, key) |
| except AttributeError: |
| raise KeyError, key |
| |
| def parse(self, args): |
| opts, args = getopt.getopt(args, "Hi:a:s:lDkqr:", |
| ["all", "postscript", "help", "iconserver=", |
| "address=", "a4", "letter", "l2h-init=", |
| "link=", "split=", "logging", "debugging", |
| "keep", "quiet", "runs=", "image-type=", |
| "about=", "numeric", "style=", "paper=", |
| "up-link=", "up-title=", "dir=", |
| "global-module-index=", "dvips-safe", |
| "favicon="] |
| + list(self.ALL_FORMATS)) |
| for opt, arg in opts: |
| if opt == "--all": |
| self.formats = list(self.ALL_FORMATS) |
| self.dvips_safe = "ps" in self.formats |
| elif opt in ("-H", "--help"): |
| usage(self, sys.stdout) |
| sys.exit() |
| elif opt == "--iconserver": |
| self.icon_server = arg |
| elif opt in ("-a", "--address"): |
| self.address = arg |
| elif opt == "--a4": |
| self.paper = "a4" |
| elif opt == "--letter": |
| self.paper = "letter" |
| elif opt == "--link": |
| self.max_link_depth = int(arg) |
| elif opt in ("-s", "--split"): |
| self.max_split_depth = int(arg) |
| elif opt in ("-l", "--logging"): |
| self.logging = self.logging + 1 |
| elif opt in ("-D", "--debugging"): |
| self.debugging = self.debugging + 1 |
| elif opt in ("-k", "--keep"): |
| self.discard_temps = 0 |
| elif opt in ("-q", "--quiet"): |
| self.quiet = 1 |
| elif opt in ("-r", "--runs"): |
| self.runs = int(arg) |
| elif opt == "--image-type": |
| self.image_type = arg |
| elif opt == "--about": |
| # always make this absolute: |
| self.about_file = os.path.normpath( |
| os.path.abspath(arg)) |
| elif opt == "--numeric": |
| self.numeric = 1 |
| elif opt == "--style": |
| self.style_file = os.path.abspath(arg) |
| elif opt == "--l2h-init": |
| self.l2h_init_files.append(os.path.abspath(arg)) |
| elif opt == "--favicon": |
| self.favicon = arg |
| elif opt == "--up-link": |
| self.up_link = arg |
| elif opt == "--up-title": |
| self.up_title = arg |
| elif opt == "--global-module-index": |
| self.global_module_index = arg |
| elif opt == "--dir": |
| if os.sep == "\\": |
| arg = re.sub("/", "\\\\", arg) |
| self.builddir = os.path.expanduser(arg) |
| elif opt == "--paper": |
| self.paper = arg |
| elif opt == "--dvips-safe": |
| self.dvips_safe = 1 |
| # |
| # Format specifiers: |
| # |
| elif opt[2:] in self.ALL_FORMATS: |
| self.add_format(opt[2:]) |
| elif opt == "--postscript": |
| # synonym for --ps |
| self.add_format("ps") |
| self.initialize() |
| # |
| # return the args to allow the caller access: |
| # |
| return args |
| |
| def add_format(self, format): |
| """Add a format to the formats list if not present.""" |
| if not format in self.formats: |
| if format == "ps": |
| # assume this is safe since we're going to run it anyway |
| self.dvips_safe = 1 |
| self.formats.append(format) |
| |
| def initialize(self): |
| """Complete initialization. This is needed if parse() isn't used.""" |
| # add the default format if no formats were specified: |
| if not self.formats: |
| self.formats = self.DEFAULT_FORMATS |
| # determine the base set of texinputs directories: |
| texinputs = os.environ.get("TEXINPUTS", "").split(os.pathsep) |
| if not texinputs: |
| texinputs = [''] |
| mydirs = [os.path.join(TOPDIR, "paper-" + self.paper), |
| os.path.join(TOPDIR, "texinputs"), |
| ] |
| if '' in texinputs: |
| i = texinputs.index('') |
| texinputs[i:i] = mydirs |
| else: |
| texinputs += mydirs |
| self.base_texinputs = texinputs |
| if self.builddir: |
| self.builddir = os.path.abspath(self.builddir) |
| |
| |
| class Job: |
| latex_runs = 0 |
| |
| def __init__(self, options, path): |
| self.options = options |
| self.doctype = get_doctype(path) |
| self.filedir, self.doc = split_pathname(path) |
| self.builddir = os.path.abspath(options.builddir or self.doc) |
| if ("html" in options.formats or "text" in options.formats): |
| if not os.path.exists(self.builddir): |
| os.mkdir(self.builddir) |
| self.log_filename = os.path.join(self.builddir, self.doc + ".how") |
| else: |
| self.log_filename = os.path.abspath(self.doc + ".how") |
| if os.path.exists(self.log_filename): |
| os.unlink(self.log_filename) |
| l2hconf = self.doc + ".l2h" |
| if os.path.exists(l2hconf): |
| if os.path.exists(l2hconf + "~"): |
| os.unlink(l2hconf + "~") |
| os.rename(l2hconf, l2hconf + "~") |
| self.l2h_aux_init_file = self.doc + ".l2h" |
| self.write_l2h_aux_init_file() |
| |
| def build(self): |
| self.setup_texinputs() |
| formats = self.options.formats |
| if "dvi" in formats or "ps" in formats: |
| self.build_dvi() |
| if "pdf" in formats: |
| self.build_pdf() |
| if "ps" in formats: |
| self.build_ps() |
| if "html" in formats: |
| self.require_temps() |
| self.build_html(self.builddir) |
| if self.options.icon_server == ".": |
| pattern = os.path.join(TOPDIR, "html", "icons", |
| "*." + self.options.image_type) |
| imgs = glob.glob(pattern) |
| if not imgs: |
| self.warning( |
| "Could not locate support images of type %s." |
| % `self.options.image_type`) |
| for fn in imgs: |
| new_fn = os.path.join(self.builddir, os.path.basename(fn)) |
| shutil.copyfile(fn, new_fn) |
| if "text" in formats: |
| self.require_temps() |
| tempdir = self.doc |
| need_html = "html" not in formats |
| if self.options.max_split_depth != 1: |
| fp = open(self.l2h_aux_init_file, "a") |
| fp.write("# re-hack this file for --text:\n") |
| l2hoption(fp, "MAX_SPLIT_DEPTH", "1") |
| fp.write("1;\n") |
| fp.close() |
| tempdir = self.doc + "-temp-html" |
| need_html = 1 |
| if need_html: |
| self.build_html(tempdir, max_split_depth=1) |
| self.build_text(tempdir) |
| if self.options.discard_temps: |
| self.cleanup() |
| |
| def setup_texinputs(self): |
| texinputs = [self.filedir] + self.options.base_texinputs |
| os.environ["TEXINPUTS"] = os.pathsep.join(texinputs) |
| self.message("TEXINPUTS=" + os.environ["TEXINPUTS"]) |
| |
| def build_aux(self, binary=None): |
| if binary is None: |
| binary = LATEX_BINARY |
| new_index( "%s.ind" % self.doc, "genindex") |
| new_index("mod%s.ind" % self.doc, "modindex") |
| self.run("%s %s" % (binary, self.doc)) |
| self.use_bibtex = check_for_bibtex(self.doc + ".aux") |
| self.latex_runs = 1 |
| |
| def build_dvi(self): |
| self.use_latex(LATEX_BINARY) |
| |
| def build_pdf(self): |
| self.use_latex(PDFLATEX_BINARY) |
| |
| def use_latex(self, binary): |
| self.require_temps(binary=binary) |
| if self.latex_runs < 2: |
| if os.path.isfile("mod%s.idx" % self.doc): |
| self.run("%s mod%s.idx" % (MAKEINDEX_BINARY, self.doc)) |
| use_indfix = 0 |
| if os.path.isfile(self.doc + ".idx"): |
| use_indfix = 1 |
| # call to Doc/tools/fix_hack omitted; doesn't appear necessary |
| self.run("%s %s.idx" % (MAKEINDEX_BINARY, self.doc)) |
| import indfix |
| indfix.process(self.doc + ".ind") |
| if self.use_bibtex: |
| self.run("%s %s" % (BIBTEX_BINARY, self.doc)) |
| self.process_synopsis_files() |
| self.run("%s %s" % (binary, self.doc)) |
| self.latex_runs = self.latex_runs + 1 |
| if os.path.isfile("mod%s.idx" % self.doc): |
| self.run("%s -s %s mod%s.idx" |
| % (MAKEINDEX_BINARY, ISTFILE, self.doc)) |
| if use_indfix: |
| self.run("%s -s %s %s.idx" |
| % (MAKEINDEX_BINARY, ISTFILE, self.doc)) |
| indfix.process(self.doc + ".ind") |
| self.process_synopsis_files() |
| # |
| # and now finish it off: |
| # |
| if os.path.isfile(self.doc + ".toc") and binary == PDFLATEX_BINARY: |
| import toc2bkm |
| if self.doctype == "manual": |
| bigpart = "chapter" |
| else: |
| bigpart = "section" |
| toc2bkm.process(self.doc + ".toc", self.doc + ".bkm", bigpart) |
| if self.use_bibtex: |
| self.run("%s %s" % (BIBTEX_BINARY, self.doc)) |
| self.run("%s %s" % (binary, self.doc)) |
| self.latex_runs = self.latex_runs + 1 |
| |
| def process_synopsis_files(self): |
| synopsis_files = glob.glob(self.doc + "*.syn") |
| for path in synopsis_files: |
| uniqify_module_table(path) |
| |
| def build_ps(self): |
| self.run("%s -N0 -o %s.ps %s" % (DVIPS_BINARY, self.doc, self.doc)) |
| |
| def build_html(self, builddir, max_split_depth=None): |
| if max_split_depth is None: |
| max_split_depth = self.options.max_split_depth |
| texfile = None |
| for p in os.environ["TEXINPUTS"].split(os.pathsep): |
| fn = os.path.join(p, self.doc + ".tex") |
| if os.path.isfile(fn): |
| texfile = fn |
| break |
| if not texfile: |
| self.warning("Could not locate %s.tex; aborting." % self.doc) |
| sys.exit(1) |
| # remove leading ./ (or equiv.); might avoid problems w/ dvips |
| if texfile[:2] == os.curdir + os.sep: |
| texfile = texfile[2:] |
| # build the command line and run LaTeX2HTML: |
| if not os.path.isdir(builddir): |
| os.mkdir(builddir) |
| else: |
| for fname in glob.glob(os.path.join(builddir, "*.html")): |
| os.unlink(fname) |
| args = [LATEX2HTML_BINARY, |
| "-init_file", self.l2h_aux_init_file, |
| "-dir", builddir, |
| texfile |
| ] |
| self.run(" ".join(args)) # XXX need quoting! |
| # ... postprocess |
| shutil.copyfile(self.options.style_file, |
| os.path.join(builddir, self.doc + ".css")) |
| shutil.copyfile(os.path.join(builddir, self.doc + ".html"), |
| os.path.join(builddir, "index.html")) |
| if max_split_depth != 1: |
| label_file = os.path.join(builddir, "labels.pl") |
| fp = open(label_file) |
| about_node = None |
| target = " = q/about/;\n" |
| x = len(target) |
| while 1: |
| line = fp.readline() |
| if not line: |
| break |
| if line[-x:] == target: |
| line = fp.readline() |
| m = re.search(r"\|(node\d+\.[a-z]+)\|", line) |
| about_node = m.group(1) |
| shutil.copyfile(os.path.join(builddir, about_node), |
| os.path.join(builddir, "about.html")) |
| break |
| if not self.options.numeric: |
| pwd = os.getcwd() |
| try: |
| os.chdir(builddir) |
| self.run("%s %s *.html" % (PERL_BINARY, NODE2LABEL_SCRIPT)) |
| finally: |
| os.chdir(pwd) |
| # These files need to be cleaned up here since builddir there |
| # can be more than one, so we clean each of them. |
| if self.options.discard_temps: |
| for fn in ("images.tex", "images.log", "images.aux"): |
| safe_unlink(os.path.join(builddir, fn)) |
| |
| def build_text(self, tempdir=None): |
| if tempdir is None: |
| tempdir = self.doc |
| indexfile = os.path.join(tempdir, "index.html") |
| self.run("%s -nolist -dump %s >%s.txt" |
| % (LYNX_BINARY, indexfile, self.doc)) |
| |
| def require_temps(self, binary=None): |
| if not self.latex_runs: |
| self.build_aux(binary=binary) |
| |
| def write_l2h_aux_init_file(self): |
| options = self.options |
| fp = open(self.l2h_aux_init_file, "w") |
| d = string_to_perl(os.path.dirname(L2H_INIT_FILE)) |
| fp.write("package main;\n" |
| "push (@INC, '%s');\n" |
| "$mydir = '%s';\n" |
| % (d, d)) |
| fp.write(open(L2H_INIT_FILE).read()) |
| for filename in options.l2h_init_files: |
| fp.write("\n# initialization code incorporated from:\n# ") |
| fp.write(filename) |
| fp.write("\n") |
| fp.write(open(filename).read()) |
| fp.write("\n" |
| "# auxillary init file for latex2html\n" |
| "# generated by mkhowto\n" |
| "$NO_AUTO_LINK = 1;\n" |
| ) |
| l2hoption(fp, "ABOUT_FILE", options.about_file) |
| l2hoption(fp, "ICONSERVER", options.icon_server) |
| l2hoption(fp, "IMAGE_TYPE", options.image_type) |
| l2hoption(fp, "ADDRESS", options.address) |
| l2hoption(fp, "MAX_LINK_DEPTH", options.max_link_depth) |
| l2hoption(fp, "MAX_SPLIT_DEPTH", options.max_split_depth) |
| l2hoption(fp, "EXTERNAL_UP_LINK", options.up_link) |
| l2hoption(fp, "EXTERNAL_UP_TITLE", options.up_title) |
| l2hoption(fp, "FAVORITES_ICON", options.favicon) |
| l2hoption(fp, "GLOBAL_MODULE_INDEX", options.global_module_index) |
| l2hoption(fp, "DVIPS_SAFE", options.dvips_safe) |
| fp.write("1;\n") |
| fp.close() |
| |
| def cleanup(self): |
| self.__have_temps = 0 |
| for pattern in ("%s.aux", "%s.log", "%s.out", "%s.toc", "%s.bkm", |
| "%s.idx", "%s.ilg", "%s.ind", "%s.pla", |
| "%s.bbl", "%s.blg", |
| "mod%s.idx", "mod%s.ind", "mod%s.ilg", |
| ): |
| safe_unlink(pattern % self.doc) |
| map(safe_unlink, glob.glob(self.doc + "*.syn")) |
| for spec in ("IMG*", "*.pl", "WARNINGS", "index.dat", "modindex.dat"): |
| pattern = os.path.join(self.doc, spec) |
| map(safe_unlink, glob.glob(pattern)) |
| if "dvi" not in self.options.formats: |
| safe_unlink(self.doc + ".dvi") |
| if os.path.isdir(self.doc + "-temp-html"): |
| shutil.rmtree(self.doc + "-temp-html", ignore_errors=1) |
| if not self.options.logging: |
| os.unlink(self.log_filename) |
| if not self.options.debugging: |
| os.unlink(self.l2h_aux_init_file) |
| |
| def run(self, command): |
| self.message(command) |
| if sys.platform.startswith("win"): |
| rc = os.system(command) |
| else: |
| rc = os.system("(%s) </dev/null >>%s 2>&1" |
| % (command, self.log_filename)) |
| if rc: |
| self.warning( |
| "Session transcript and error messages are in %s." |
| % self.log_filename) |
| result = 1 |
| if hasattr(os, "WIFEXITED"): |
| if os.WIFEXITED(rc): |
| result = os.WEXITSTATUS(rc) |
| self.warning("Exited with status %s." % result) |
| else: |
| self.warning("Killed by signal %s." % os.WSTOPSIG(rc)) |
| else: |
| self.warning("Return code: %s" % rc) |
| sys.stderr.write("The relevant lines from the transcript are:\n") |
| sys.stderr.write("-" * 72 + "\n") |
| sys.stderr.writelines(get_run_transcript(self.log_filename)) |
| sys.exit(result) |
| |
| def message(self, msg): |
| msg = "+++ " + msg |
| if not self.options.quiet: |
| print msg |
| self.log(msg + "\n") |
| |
| def warning(self, msg): |
| msg = "*** %s\n" % msg |
| sys.stderr.write(msg) |
| self.log(msg) |
| |
| def log(self, msg): |
| fp = open(self.log_filename, "a") |
| fp.write(msg) |
| fp.close() |
| |
| |
| def get_run_transcript(filename): |
| """Return lines from the transcript file for the most recent run() call.""" |
| fp = open(filename) |
| lines = fp.readlines() |
| fp.close() |
| lines.reverse() |
| L = [] |
| for line in lines: |
| L.append(line) |
| if line[:4] == "+++ ": |
| break |
| L.reverse() |
| return L |
| |
| |
| def safe_unlink(path): |
| """Unlink a file without raising an error if it doesn't exist.""" |
| try: |
| os.unlink(path) |
| except os.error: |
| pass |
| |
| |
| def split_pathname(path): |
| path = os.path.abspath(path) |
| dirname, basename = os.path.split(path) |
| if basename[-4:] == ".tex": |
| basename = basename[:-4] |
| return dirname, basename |
| |
| |
| _doctype_rx = re.compile(r"\\documentclass(?:\[[^]]*\])?{([a-zA-Z]*)}") |
| def get_doctype(path): |
| fp = open(path) |
| doctype = None |
| while 1: |
| line = fp.readline() |
| if not line: |
| break |
| m = _doctype_rx.match(line) |
| if m: |
| doctype = m.group(1) |
| break |
| fp.close() |
| return doctype |
| |
| |
| def main(): |
| options = Options() |
| try: |
| args = options.parse(sys.argv[1:]) |
| except getopt.error, msg: |
| error(options, msg) |
| if not args: |
| # attempt to locate single .tex file in current directory: |
| args = glob.glob("*.tex") |
| if not args: |
| error(options, "No file to process.") |
| if len(args) > 1: |
| error(options, "Could not deduce which files should be processed.") |
| # |
| # parameters are processed, let's go! |
| # |
| for path in args: |
| Job(options, path).build() |
| |
| |
| def l2hoption(fp, option, value): |
| if value: |
| fp.write('$%s = "%s";\n' % (option, string_to_perl(str(value)))) |
| |
| |
| _to_perl = {} |
| for c in map(chr, range(1, 256)): |
| _to_perl[c] = c |
| _to_perl["@"] = "\\@" |
| _to_perl["$"] = "\\$" |
| _to_perl['"'] = '\\"' |
| |
| def string_to_perl(s): |
| return ''.join(map(_to_perl.get, s)) |
| |
| |
| def check_for_bibtex(filename): |
| fp = open(filename) |
| pos = fp.read().find(r"\bibdata{") |
| fp.close() |
| return pos >= 0 |
| |
| def uniqify_module_table(filename): |
| lines = open(filename).readlines() |
| if len(lines) > 1: |
| if lines[-1] == lines[-2]: |
| del lines[-1] |
| open(filename, "w").writelines(lines) |
| |
| |
| def new_index(filename, label="genindex"): |
| fp = open(filename, "w") |
| fp.write(r"""\ |
| \begin{theindex} |
| \label{%s} |
| \end{theindex} |
| """ % label) |
| fp.close() |
| |
| |
| if __name__ == "__main__": |
| main() |