| Fred Drake | 8b88093 | 1999-03-03 20:24:30 +0000 | [diff] [blame] | 1 | #! /usr/bin/env python | 
|  | 2 | #  -*- Python -*- | 
|  | 3 | """usage: %(program)s [options...] file ... | 
|  | 4 |  | 
|  | 5 | Options specifying formats to build: | 
|  | 6 | --html		HyperText Markup Language | 
|  | 7 | --pdf		Portable Document Format (default) | 
|  | 8 | --ps		PostScript | 
|  | 9 | --dvi		'DeVice Indepentent' format from TeX | 
|  | 10 | --text		ASCII text (requires lynx) | 
|  | 11 |  | 
|  | 12 | More than one output format may be specified, or --all. | 
|  | 13 |  | 
|  | 14 | HTML options: | 
|  | 15 | --address, -a	Specify an address for page footers. | 
|  | 16 | --link		Specify the number of levels to include on each page. | 
|  | 17 | --split, -s		Specify a section level for page splitting, default: %(max_split_depth)s. | 
|  | 18 | --iconserver, -i	Specify location of icons (default: ../). | 
|  | 19 |  | 
|  | 20 | Other options: | 
|  | 21 | --a4		Format for A4 paper. | 
|  | 22 | --letter		Format for US letter paper (the default). | 
|  | 23 | --help, -H		Show this text. | 
|  | 24 | --logging, -l	Log stdout and stderr to a file (*.how). | 
|  | 25 | --debugging, -D	Echo commands as they are executed. | 
|  | 26 | --keep, -k		Keep temporary files around. | 
|  | 27 | --quiet, -q		Do not print command output to stdout. | 
|  | 28 | (stderr is also lost,  sorry; see *.how for errors) | 
|  | 29 | """ | 
|  | 30 |  | 
|  | 31 | import getopt | 
|  | 32 | import glob | 
|  | 33 | import os | 
|  | 34 | import shutil | 
|  | 35 | import string | 
|  | 36 | import sys | 
|  | 37 | import tempfile | 
|  | 38 |  | 
|  | 39 |  | 
|  | 40 | MYDIR = os.path.normpath(os.path.join(os.getcwd(), sys.path[0])) | 
|  | 41 | TOPDIR = os.path.normpath(os.path.join(MYDIR, os.pardir)) | 
|  | 42 |  | 
|  | 43 | ISTFILE = os.path.join(TOPDIR, "texinputs", "python.ist") | 
|  | 44 | NODE2LABEL_SCRIPT = os.path.join(MYDIR, "node2label.pl") | 
|  | 45 | L2H_INIT_FILE = os.path.join(TOPDIR, "perl", "l2hinit.perl") | 
|  | 46 |  | 
|  | 47 | BIBTEX_BINARY = "bibtex" | 
|  | 48 | DVIPS_BINARY = "dvips" | 
|  | 49 | LATEX_BINARY = "latex" | 
|  | 50 | LATEX2HTML_BINARY = "latex2html" | 
|  | 51 | LYNX_BINARY = "lynx" | 
|  | 52 | MAKEINDEX_BINARY = "makeindex" | 
|  | 53 | PDFLATEX_BINARY = "pdflatex" | 
|  | 54 | PERL_BINARY = "perl" | 
|  | 55 | PYTHON_BINARY = "python" | 
|  | 56 |  | 
|  | 57 |  | 
|  | 58 | def usage(options): | 
|  | 59 | print __doc__ % options | 
|  | 60 |  | 
|  | 61 | def error(options, message, err=2): | 
|  | 62 | sys.stdout = sys.stderr | 
|  | 63 | print message | 
|  | 64 | print | 
|  | 65 | usage(options) | 
|  | 66 | sys.exit(2) | 
|  | 67 |  | 
|  | 68 |  | 
|  | 69 | class Options: | 
|  | 70 | program = os.path.basename(sys.argv[0]) | 
|  | 71 | # | 
|  | 72 | address = '' | 
|  | 73 | debugging = 0 | 
|  | 74 | discard_temps = 1 | 
|  | 75 | have_temps = 0 | 
|  | 76 | icon_server = None | 
|  | 77 | logging = 0 | 
|  | 78 | max_link_depth = 3 | 
|  | 79 | max_split_depth = 6 | 
|  | 80 | paper = "letter" | 
|  | 81 | quiet = 0 | 
|  | 82 | style_file = os.path.join(TOPDIR, "html", "style.css") | 
|  | 83 | # | 
|  | 84 | DEFAULT_FORMATS = ("pdf",) | 
|  | 85 | ALL_FORMATS = ("dvi", "html", "pdf", "ps", "text") | 
|  | 86 |  | 
|  | 87 | def __init__(self): | 
|  | 88 | self.config_files = [] | 
|  | 89 | self.formats = [] | 
|  | 90 |  | 
|  | 91 | def __getitem__(self, key): | 
|  | 92 | # This is used when formatting the usage message. | 
|  | 93 | try: | 
|  | 94 | return getattr(self, key) | 
|  | 95 | except AttributeError: | 
|  | 96 | raise KeyError, key | 
|  | 97 |  | 
|  | 98 | def parse(self, args): | 
|  | 99 | opts, args = getopt.getopt(args, "Hi:a:s:lDkq", | 
|  | 100 | ["all", "postscript", "help", "iconserver=", | 
|  | 101 | "address=", "a4", "l2h-config=", "letter", | 
|  | 102 | "link=", "split=", "logging", "debugging", | 
|  | 103 | "keep", "quiet"] + list(self.ALL_FORMATS)) | 
|  | 104 | for opt, arg in opts: | 
|  | 105 | if opt == "--all": | 
|  | 106 | self.formats = list(self.ALL_FORMATS) | 
|  | 107 | elif opt in ("-H", "--help"): | 
|  | 108 | usage(self) | 
|  | 109 | sys.exit() | 
|  | 110 | elif opt == "--iconserver": | 
|  | 111 | self.icon_server = arg | 
|  | 112 | elif opt in ("-a", "--address"): | 
|  | 113 | self.address = arg | 
|  | 114 | elif opt == "--a4": | 
|  | 115 | self.paper = "a4" | 
|  | 116 | elif opt == "--letter": | 
|  | 117 | self.paper = "letter" | 
|  | 118 | elif opt == "--l2h-config": | 
|  | 119 | self.config_files.append(arg) | 
|  | 120 | elif opt == "--link": | 
|  | 121 | self.max_link_depth = int(arg) | 
|  | 122 | elif opt in ("-s", "--split"): | 
|  | 123 | self.max_split_depth = int(arg) | 
|  | 124 | elif opt in ("-l", "--logging"): | 
|  | 125 | self.logging = self.logging + 1 | 
|  | 126 | elif opt in ("-D", "--debugging"): | 
|  | 127 | self.debugging = self.debugging + 1 | 
|  | 128 | elif opt in ("-k", "--keep"): | 
|  | 129 | self.discard_temps = 0 | 
|  | 130 | elif opt in ("-q", "--quiet"): | 
|  | 131 | self.quiet = 1 | 
|  | 132 | # | 
|  | 133 | # Format specifiers: | 
|  | 134 | # | 
|  | 135 | elif opt[2:] in self.ALL_FORMATS: | 
|  | 136 | self.add_format(opt[2:]) | 
|  | 137 | elif opt == "--postscript": | 
|  | 138 | # synonym for --ps | 
|  | 139 | self.add_format("ps") | 
|  | 140 | self.initialize() | 
|  | 141 | # | 
|  | 142 | # return the args to allow the caller access: | 
|  | 143 | # | 
|  | 144 | return args | 
|  | 145 |  | 
|  | 146 | def add_format(self, format): | 
|  | 147 | """Add a format to the formats list if not present.""" | 
|  | 148 | if not format in self.formats: | 
|  | 149 | self.formats.append(format) | 
|  | 150 |  | 
|  | 151 | def initialize(self): | 
|  | 152 | """Complete initialization.  This is needed if parse() isn't used.""" | 
|  | 153 | # add the default format if no formats were specified: | 
|  | 154 | if not self.formats: | 
|  | 155 | self.formats = self.DEFAULT_FORMATS | 
|  | 156 | # determine the base set of texinputs directories: | 
|  | 157 | texinputs = string.split(os.environ.get("TEXINPUTS", ""), os.pathsep) | 
|  | 158 | if not texinputs: | 
|  | 159 | texinputs = [''] | 
|  | 160 | self.base_texinputs = [ | 
|  | 161 | os.path.join(TOPDIR, "paper-" + self.paper), | 
|  | 162 | os.path.join(TOPDIR, "texinputs"), | 
|  | 163 | ] + texinputs | 
|  | 164 |  | 
|  | 165 |  | 
|  | 166 | class Job: | 
|  | 167 | def __init__(self, options, path): | 
|  | 168 | self.options = options | 
|  | 169 | self.filedir, self.doc = split_pathname(path) | 
|  | 170 | self.log_filename = self.doc + ".how" | 
|  | 171 | if os.path.exists(self.log_filename): | 
|  | 172 | os.unlink(self.log_filename) | 
|  | 173 | if os.path.exists(self.doc + ".l2h"): | 
|  | 174 | self.l2h_aux_init_file = tempfile.mktemp() | 
|  | 175 | else: | 
|  | 176 | self.l2h_aux_init_file = self.doc + ".l2h" | 
|  | 177 | self.write_l2h_aux_init_file() | 
|  | 178 |  | 
|  | 179 | def build(self): | 
|  | 180 | self.setup_texinputs() | 
|  | 181 | formats = self.options.formats | 
|  | 182 | if "dvi" in formats or "ps" in formats: | 
|  | 183 | self.build_dvi() | 
|  | 184 | if "pdf" in formats: | 
|  | 185 | self.build_pdf() | 
|  | 186 | if "ps" in formats: | 
|  | 187 | self.build_ps() | 
|  | 188 | if "html" in formats: | 
|  | 189 | self.require_temps() | 
|  | 190 | self.build_html(self.doc) | 
|  | 191 | if self.options.icon_server == ".": | 
|  | 192 | pattern = os.path.join(TOPDIR, "html", "icons", "*.gif") | 
|  | 193 | for fn in glob.glob(pattern): | 
|  | 194 | new_fn = os.path.join(self.doc, os.path.basename(fn)) | 
|  | 195 | shutil.copyfile(fn, new_fn) | 
|  | 196 | if "text" in formats: | 
|  | 197 | self.require_temps() | 
|  | 198 | tempdir = self.doc | 
|  | 199 | need_html = "html" not in formats | 
|  | 200 | if self.options.max_split_depth != 1: | 
|  | 201 | fp = open(self.l2h_aux_init_file, "a") | 
|  | 202 | fp.write("# re-hack this file for --text:\n") | 
|  | 203 | l2hoption(fp, "MAX_SPLIT_DEPTH", "1") | 
|  | 204 | fp.write("1;\n") | 
|  | 205 | fp.close() | 
|  | 206 | tempdir = self.doc + "-temp-html" | 
|  | 207 | need_html = 1 | 
|  | 208 | if need_html: | 
|  | 209 | self.build_html(tempdir, max_split_depth=1) | 
|  | 210 | self.build_text(tempdir) | 
|  | 211 | if self.options.discard_temps: | 
|  | 212 | self.cleanup() | 
|  | 213 |  | 
|  | 214 | def setup_texinputs(self): | 
|  | 215 | texinputs = [self.filedir] + list(self.options.base_texinputs) | 
|  | 216 | os.environ["TEXINPUTS"] = string.join(texinputs, os.pathsep) | 
|  | 217 |  | 
|  | 218 | __have_temps = 0 | 
|  | 219 | def build_aux(self, binary=None): | 
|  | 220 | if binary is None: | 
|  | 221 | binary = LATEX_BINARY | 
|  | 222 | new_index(   "%s.ind" % self.doc, "genindex") | 
|  | 223 | new_index("mod%s.ind" % self.doc, "modindex") | 
|  | 224 | self.run("%s %s" % (binary, self.doc)) | 
|  | 225 | self.use_bibtex = check_for_bibtex(self.doc + ".aux") | 
|  | 226 | self.__have_temps = 1 | 
|  | 227 |  | 
|  | 228 | def build_dvi(self): | 
|  | 229 | self.use_latex(LATEX_BINARY) | 
|  | 230 |  | 
|  | 231 | def build_pdf(self): | 
|  | 232 | self.use_latex(PDFLATEX_BINARY) | 
|  | 233 |  | 
|  | 234 | def use_latex(self, binary): | 
|  | 235 | self.require_temps(binary=binary) | 
|  | 236 | if os.path.isfile("mod%s.idx" % self.doc): | 
|  | 237 | self.run("%s mod%s.idx" % (MAKEINDEX_BINARY, self.doc)) | 
|  | 238 | if os.path.isfile(self.doc + ".idx"): | 
|  | 239 | # call to Doc/tools/fix_hack omitted; doesn't appear necessary | 
|  | 240 | self.run("%s %s.idx" % (MAKEINDEX_BINARY, self.doc)) | 
|  | 241 | import indfix | 
|  | 242 | indfix.process(self.doc + ".ind") | 
|  | 243 | if self.use_bibtex: | 
|  | 244 | self.run("%s %s" % (BIBTEX_BINARY, self.doc)) | 
|  | 245 | synopsis_file = self.doc + ".syn" | 
|  | 246 | if os.path.isfile(synopsis_file): | 
|  | 247 | # impose uniq requirement on last line.... | 
|  | 248 | uniqify_module_table(synopsis_file) | 
|  | 249 | self.run("%s %s" % (binary, self.doc)) | 
|  | 250 | if os.path.isfile("mod%s.idx" % self.doc): | 
|  | 251 | self.run("%s -s %s mod%s.idx" | 
|  | 252 | % (MAKEINDEX_BINARY, ISTFILE, self.doc)) | 
|  | 253 | if os.path.isfile(self.doc + ".idx"): | 
|  | 254 | self.run("%s -s %s %s.idx" % (MAKEINDEX_BINARY, ISTFILE, self.doc)) | 
|  | 255 | if os.path.isfile(self.doc + ".toc") and binary == PDFLATEX_BINARY: | 
|  | 256 | import toc2bkm | 
|  | 257 | toc2bkm.process(self.doc + ".toc", self.doc + ".bkm", "section") | 
|  | 258 | if os.path.isfile(synopsis_file): | 
|  | 259 | # impose uniq requirement on last line.... | 
|  | 260 | uniqify_module_table(synopsis_file) | 
|  | 261 | if self.use_bibtex: | 
|  | 262 | self.run("%s %s" % (BIBTEX_BINARY, self.doc)) | 
|  | 263 | self.run("%s %s" % (binary, self.doc)) | 
|  | 264 |  | 
|  | 265 | def build_ps(self): | 
|  | 266 | self.run("%s -N0 -o %s.ps %s" % (DVIPS_BINARY, self.doc, self.doc)) | 
|  | 267 |  | 
|  | 268 | def build_html(self, builddir=None, max_split_depth=None): | 
|  | 269 | if builddir is None: | 
|  | 270 | builddir = self.doc | 
|  | 271 | if max_split_depth is None: | 
|  | 272 | max_split_depth = self.options.max_split_depth | 
|  | 273 | texfile = None | 
|  | 274 | for p in string.split(os.environ["TEXINPUTS"], os.pathsep): | 
|  | 275 | fn = os.path.join(p, self.doc + ".tex") | 
|  | 276 | if os.path.isfile(fn): | 
|  | 277 | texfile = fn | 
|  | 278 | break | 
|  | 279 | if not texfile: | 
|  | 280 | sys.stderr.write("Could not locate %s.tex; aborting.\n" % self.doc) | 
|  | 281 | sys.exit(1) | 
|  | 282 | # remove leading ./ (or equiv.); might avoid problems w/ dvips | 
|  | 283 | if texfile[:2] == os.curdir + os.sep: | 
|  | 284 | texfile = texfile[2:] | 
|  | 285 | # build the command line and run LaTeX2HTML: | 
|  | 286 | args = [LATEX2HTML_BINARY, | 
|  | 287 | "-init_file", L2H_INIT_FILE, | 
|  | 288 | "-init_file", self.l2h_aux_init_file, | 
|  | 289 | "-dir", builddir, | 
|  | 290 | texfile | 
|  | 291 | ] | 
|  | 292 | self.run(string.join(args))     # XXX need quoting! | 
|  | 293 | # ... postprocess | 
|  | 294 | shutil.copyfile(self.options.style_file, | 
|  | 295 | os.path.join(builddir, self.doc + ".css")) | 
|  | 296 | if max_split_depth != 1: | 
|  | 297 | pwd = os.getcwd() | 
|  | 298 | try: | 
|  | 299 | os.chdir(builddir) | 
|  | 300 | self.run("%s %s *.html" % (PERL_BINARY, NODE2LABEL_SCRIPT)) | 
|  | 301 | finally: | 
|  | 302 | os.chdir(pwd) | 
|  | 303 |  | 
|  | 304 | def build_text(self, tempdir=None): | 
|  | 305 | if tempdir is None: | 
|  | 306 | tempdir = self.doc | 
|  | 307 | indexfile = os.path.join(tempdir, "index.html") | 
|  | 308 | self.run("%s -nolist -dump %s >%s.txt" | 
|  | 309 | % (LYNX_BINARY, indexfile, self.doc)) | 
|  | 310 |  | 
|  | 311 | def require_temps(self, binary=None): | 
|  | 312 | if not self.__have_temps: | 
|  | 313 | self.build_aux(binary=binary) | 
|  | 314 |  | 
|  | 315 | def write_l2h_aux_init_file(self): | 
|  | 316 | fp = open(self.l2h_aux_init_file, "w") | 
|  | 317 | fp.write("# auxillary init file for latex2html\n" | 
|  | 318 | "# generated by mkhowto\n" | 
|  | 319 | ) | 
|  | 320 | options = self.options | 
|  | 321 | for fn in options.config_files: | 
|  | 322 | fp.write(open(fn).read()) | 
|  | 323 | fp.write("\n" | 
|  | 324 | "\n" | 
|  | 325 | 'print "\nInitializing from file: %s\";\n\n' | 
|  | 326 | % string_to_perl(fn)) | 
|  | 327 | l2hoption(fp, "ICONSERVER", options.icon_server) | 
|  | 328 | l2hoption(fp, "ADDRESS", options.address) | 
|  | 329 | l2hoption(fp, "MAX_LINK_DEPTH", options.max_link_depth) | 
|  | 330 | l2hoption(fp, "MAX_SPLIT_DEPTH", options.max_split_depth) | 
|  | 331 | fp.write("1;\n") | 
|  | 332 | fp.close() | 
|  | 333 |  | 
|  | 334 | def cleanup(self): | 
|  | 335 | self.__have_temps = 0 | 
|  | 336 | for pattern in ("%s.aux", "%s.log", "%s.out", "%s.toc", "%s.bkm", | 
|  | 337 | "%s.idx", "%s.ilg", "%s.ind", "%s.syn", "%s.pla", | 
|  | 338 | "%s.bbl", "%s.blg", | 
|  | 339 | "mod%s.idx", "mod%s.ind", "mod%s.ilg", | 
|  | 340 | ): | 
|  | 341 | safe_unlink(pattern % self.doc) | 
|  | 342 | for spec in ("IMG*", "*.pl", "WARNINGS", "index.dat", "modindex.dat"): | 
|  | 343 | pattern = os.path.join(self.doc, spec) | 
|  | 344 | map(safe_unlink, glob.glob(pattern)) | 
|  | 345 | if "dvi" not in self.options.formats: | 
|  | 346 | safe_unlink(self.doc + ".dvi") | 
|  | 347 | if os.path.isdir(self.doc + "-temp-html"): | 
|  | 348 | shutil.rmtree(self.doc + "-temp-html", ignore_errors=1) | 
|  | 349 | if not self.options.logging: | 
|  | 350 | os.unlink(self.log_filename) | 
|  | 351 | if not self.options.debugging: | 
|  | 352 | os.unlink(self.l2h_aux_init_file) | 
|  | 353 |  | 
|  | 354 | def run(self, command): | 
|  | 355 | if not self.options.quiet: | 
|  | 356 | print "+++", command | 
|  | 357 | fp = open(self.log_filename, "a") | 
|  | 358 | fp.write("+++ %s\n" % command) | 
|  | 359 | fp.close() | 
|  | 360 | rc = os.system("(%s) >>%s 2>&1" % (command, self.log_filename)) | 
|  | 361 | if rc: | 
|  | 362 | sys.stderr.write( | 
|  | 363 | "Session transcript and error messages are in %s.\n" | 
|  | 364 | % self.log_filename) | 
|  | 365 | sys.exit(rc) | 
|  | 366 |  | 
|  | 367 |  | 
|  | 368 | def safe_unlink(path): | 
|  | 369 | try: | 
|  | 370 | os.unlink(path) | 
|  | 371 | except os.error: | 
|  | 372 | pass | 
|  | 373 |  | 
|  | 374 |  | 
|  | 375 | def split_pathname(pathname): | 
|  | 376 | pathname = os.path.normpath(os.path.join(os.getcwd(), pathname)) | 
|  | 377 | dirname, basename = os.path.split(pathname) | 
|  | 378 | if basename[-4:] == ".tex": | 
|  | 379 | basename = basename[:-4] | 
|  | 380 | return dirname, basename | 
|  | 381 |  | 
|  | 382 |  | 
|  | 383 | def main(): | 
|  | 384 | options = Options() | 
|  | 385 | try: | 
|  | 386 | args = options.parse(sys.argv[1:]) | 
|  | 387 | except getopt.error, msg: | 
|  | 388 | error(options, msg) | 
|  | 389 | if not args: | 
|  | 390 | # attempt to locate single .tex file in current directory: | 
|  | 391 | args = glob.glob("*.tex") | 
|  | 392 | if not args: | 
|  | 393 | error(options, "No file to process.") | 
|  | 394 | if len(args) > 1: | 
|  | 395 | error(options, "Could not deduce which files should be processed.") | 
|  | 396 | # | 
|  | 397 | # parameters are processed, let's go! | 
|  | 398 | # | 
|  | 399 | for path in args: | 
|  | 400 | Job(options, path).build() | 
|  | 401 |  | 
|  | 402 |  | 
|  | 403 | def l2hoption(fp, option, value): | 
|  | 404 | if value: | 
|  | 405 | fp.write('$%s = "%s";\n' % (option, string_to_perl(str(value)))) | 
|  | 406 |  | 
|  | 407 |  | 
|  | 408 | _to_perl = {} | 
|  | 409 | for c in map(chr, range(1, 256)): | 
|  | 410 | _to_perl[c] = c | 
|  | 411 | _to_perl["@"] = "\\@" | 
|  | 412 | _to_perl["$"] = "\\$" | 
|  | 413 | _to_perl['"'] = '\\"' | 
|  | 414 |  | 
|  | 415 | def string_to_perl(s): | 
|  | 416 | return string.join(map(_to_perl.get, s), '') | 
|  | 417 |  | 
|  | 418 |  | 
|  | 419 | def check_for_bibtex(filename): | 
|  | 420 | fp = open(filename) | 
|  | 421 | pos = string.find(fp.read(), r"\bibdata{") | 
|  | 422 | fp.close() | 
|  | 423 | return pos >= 0 | 
|  | 424 |  | 
|  | 425 | def uniqify_module_table(filename): | 
|  | 426 | lines = open(filename).readlines() | 
|  | 427 | if len(lines) > 1: | 
|  | 428 | if lines[-1] == lines[-2]: | 
|  | 429 | del lines[-1] | 
|  | 430 | open(filename, "w").writelines(lines) | 
|  | 431 |  | 
|  | 432 |  | 
|  | 433 | def new_index(filename, label="genindex"): | 
|  | 434 | fp = open(filename, "w") | 
|  | 435 | fp.write(r"""\ | 
|  | 436 | \begin{theindex} | 
|  | 437 | \label{%s} | 
|  | 438 | \end{theindex} | 
|  | 439 | """ % label) | 
|  | 440 | fp.close() | 
|  | 441 |  | 
|  | 442 |  | 
|  | 443 | if __name__ == "__main__": | 
|  | 444 | main() |