Raymond Hettinger | 71e0033 | 2003-05-10 03:30:13 +0000 | [diff] [blame^] | 1 | """ TeXcheck.py -- rough syntax checking on Python style LaTeX documents. |
| 2 | |
| 3 | Written by Raymond D. Hettinger <python at rcn.com> |
| 4 | Copyright (c) 2003 Python Software Foundation. All rights reserved. |
| 5 | |
| 6 | Designed to catch common markup errors including: |
| 7 | * Unbalanced or mismatched parenthesis, brackets, and braces. |
| 8 | * Unbalanced of mismatched \begin and \end blocks. |
| 9 | * Misspelled or invalid LaTeX commands. |
| 10 | * Use of forward slashes instead of backslashes for commands. |
| 11 | |
| 12 | Command line usage: |
| 13 | python texcheck.py [-h] [-k keyword] foobar.tex |
| 14 | |
| 15 | Options: |
| 16 | -m Munge parenthesis and brackets. [0,n) would normally mismatch. |
| 17 | -k keyword: Keyword is a valid LaTeX command. Do not include the backslash. |
| 18 | -f: Forward-slash warnings suppressed. |
| 19 | -d: Delimiter check only (useful for non-LaTeX files). |
| 20 | -h: Help |
| 21 | -s lineno: Start at lineno (useful for skipping complex sections). |
| 22 | -v: Verbose. Shows current delimiter and unclosed delimiters. |
| 23 | """ |
| 24 | |
| 25 | # Todo: |
| 26 | # Add tableiii/lineiii cross-checking |
| 27 | # Add braces matching |
| 28 | |
| 29 | import re |
| 30 | import sets |
| 31 | import sys |
| 32 | import getopt |
| 33 | from itertools import izip, count, islice |
| 34 | |
| 35 | cmdstr = r""" |
| 36 | \section \module \declaremodule \modulesynopsis \moduleauthor |
| 37 | \sectionauthor \versionadded \code \class \method \begin |
| 38 | \optional \var \ref \end \subsection \lineiii \hline \label |
| 39 | \indexii \textrm \ldots \keyword \stindex \index \item \note |
| 40 | \withsubitem \ttindex \footnote \citetitle \samp \opindex |
| 41 | \noindent \exception \strong \dfn \ctype \obindex \character |
| 42 | \indexiii \function \bifuncindex \refmodule \refbimodindex |
| 43 | \subsubsection \nodename \member \chapter \emph \ASCII \UNIX |
| 44 | \regexp \program \production \token \productioncont \term |
| 45 | \grammartoken \lineii \seemodule \file \EOF \documentclass |
| 46 | \usepackage \title \input \maketitle \ifhtml \fi \url \Cpp |
| 47 | \tableofcontents \kbd \programopt \envvar \refstmodindex |
| 48 | \cfunction \constant \NULL \moreargs \cfuncline \cdata |
| 49 | \textasciicircum \n \ABC \setindexsubitem \versionchanged |
| 50 | \deprecated \seetext \newcommand \POSIX \pep \warning \rfc |
| 51 | \verbatiminput \methodline \textgreater \seetitle \lineiv |
| 52 | \funclineni \ulink \manpage \funcline \dataline \unspecified |
| 53 | \textbackslash \mimetype \mailheader \seepep \textunderscore |
| 54 | \longprogramopt \infinity \plusminus \shortversion \version |
| 55 | \refmodindex \seerfc \makeindex \makemodindex \renewcommand |
| 56 | \indexname \appendix |
| 57 | """ |
| 58 | |
| 59 | def matchclose(c_lineno, c_symbol, openers, pairmap): |
| 60 | "Verify that closing delimiter matches most recent opening delimiter" |
| 61 | try: |
| 62 | o_lineno, o_symbol = openers.pop() |
| 63 | except IndexError: |
| 64 | msg = "Delimiter mismatch. On line %d, encountered closing '%s' without corresponding open" % (c_lineno, c_symbol) |
| 65 | raise Exception, msg |
| 66 | if o_symbol in pairmap.get(c_symbol, [c_symbol]): return |
| 67 | msg = "Opener '%s' on line %d was not closed before encountering '%s' on line %d" % (o_symbol, o_lineno, c_symbol, c_lineno) |
| 68 | raise Exception, msg |
| 69 | |
| 70 | def checkit(source, opts, morecmds=[]): |
| 71 | """Check the LaTex formatting in a sequence of lines. |
| 72 | |
| 73 | Opts is a mapping of options to option values if any: |
| 74 | -m munge parenthesis and brackets |
| 75 | -f forward slash warnings to be skipped |
| 76 | -d delimiters only checking |
| 77 | -v verbose listing on delimiters |
| 78 | -s lineno: linenumber to start scan (default is 1). |
| 79 | |
| 80 | Morecmds is a sequence of LaTex commands (without backslashes) that |
| 81 | are to be considered valid in the scan. |
| 82 | """ |
| 83 | |
| 84 | texcmd = re.compile(r'\\[A-Za-z]+') |
| 85 | |
| 86 | validcmds = sets.Set(cmdstr.split()) |
| 87 | for cmd in morecmds: |
| 88 | validcmds.add('\\' + cmd) |
| 89 | |
| 90 | openers = [] # Stack of pending open delimiters |
| 91 | |
| 92 | if '-m' in opts: |
| 93 | pairmap = {']':'[(', ')':'(['} # Munged openers |
| 94 | else: |
| 95 | pairmap = {']':'[', ')':'('} # Normal opener for a given closer |
| 96 | openpunct = sets.Set('([') # Set of valid openers |
| 97 | |
| 98 | delimiters = re.compile(r'\\(begin|end){([_a-zA-Z]+)}|([()\[\]])') |
| 99 | |
| 100 | startline = int(opts.get('-s', '1')) |
| 101 | lineno = 0 |
| 102 | |
| 103 | for lineno, line in izip(count(startline), islice(source, startline-1, None)): |
| 104 | line = line.rstrip() |
| 105 | |
| 106 | if '-f' not in opts and '/' in line: |
| 107 | # Warn whenever forward slashes encountered |
| 108 | line = line.rstrip() |
| 109 | print 'Warning, forward slash on line %d: %s' % (lineno, line) |
| 110 | |
| 111 | if '-d' not in opts: |
| 112 | # Validate commands |
| 113 | nc = line.find(r'\newcommand') |
| 114 | if nc != -1: |
| 115 | start = line.find('{', nc) |
| 116 | end = line.find('}', start) |
| 117 | validcmds.add(line[start+1:end]) |
| 118 | for cmd in texcmd.findall(line): |
| 119 | if cmd not in validcmds: |
| 120 | print r'Warning, unknown tex cmd on line %d: \%s' % (lineno, cmd) |
| 121 | |
| 122 | # Check balancing of open/close markers (parens, brackets, etc) |
| 123 | for begend, name, punct in delimiters.findall(line): |
| 124 | if '-v' in opts: |
| 125 | print lineno, '|', begend, name, punct, |
| 126 | if begend == 'begin' and '-d' not in opts: |
| 127 | openers.append((lineno, name)) |
| 128 | elif punct in openpunct: |
| 129 | openers.append((lineno, punct)) |
| 130 | elif begend == 'end' and '-d' not in opts: |
| 131 | matchclose(lineno, name, openers, pairmap) |
| 132 | elif punct in pairmap: |
| 133 | matchclose(lineno, punct, openers, pairmap) |
| 134 | if '-v' in opts: |
| 135 | print ' --> ', openers |
| 136 | |
| 137 | for lineno, symbol in openers: |
| 138 | print "Unmatched open delimiter '%s' on line %d", (symbol, lineno) |
| 139 | print 'Done checking %d lines.' % (lineno,) |
| 140 | return 0 |
| 141 | |
| 142 | def main(args=None): |
| 143 | if args is None: |
| 144 | args = sys.argv[1:] |
| 145 | optitems, arglist = getopt.getopt(args, "k:mfdhs:v") |
| 146 | opts = dict(optitems) |
| 147 | if '-h' in opts or args==[]: |
| 148 | print __doc__ |
| 149 | return 0 |
| 150 | |
| 151 | if len(arglist) < 1: |
| 152 | print 'Please specify a file to be checked' |
| 153 | return 1 |
| 154 | |
| 155 | morecmds = [v for k,v in optitems if k=='-k'] |
| 156 | |
| 157 | try: |
| 158 | f = open(arglist[0]) |
| 159 | except IOError: |
| 160 | print 'Cannot open file %s.' % arglist[0] |
| 161 | return 2 |
| 162 | |
| 163 | return(checkit(f, opts, morecmds)) |
| 164 | |
| 165 | if __name__ == '__main__': |
| 166 | sys.exit(main()) |
| 167 | |