Raymond Hettinger | 71e0033 | 2003-05-10 03:30:13 +0000 | [diff] [blame] | 1 | """ TeXcheck.py -- rough syntax checking on Python style LaTeX documents. |
| 2 | |
| 3 | Written by Raymond D. Hettinger <python at rcn.com> |
| 4 | Copyright (c) 2003 Python Software Foundation. All rights reserved. |
| 5 | |
| 6 | Designed to catch common markup errors including: |
| 7 | * Unbalanced or mismatched parenthesis, brackets, and braces. |
| 8 | * Unbalanced of mismatched \begin and \end blocks. |
| 9 | * Misspelled or invalid LaTeX commands. |
| 10 | * Use of forward slashes instead of backslashes for commands. |
| 11 | |
| 12 | Command line usage: |
| 13 | python texcheck.py [-h] [-k keyword] foobar.tex |
| 14 | |
| 15 | Options: |
| 16 | -m Munge parenthesis and brackets. [0,n) would normally mismatch. |
| 17 | -k keyword: Keyword is a valid LaTeX command. Do not include the backslash. |
| 18 | -f: Forward-slash warnings suppressed. |
| 19 | -d: Delimiter check only (useful for non-LaTeX files). |
| 20 | -h: Help |
| 21 | -s lineno: Start at lineno (useful for skipping complex sections). |
| 22 | -v: Verbose. Shows current delimiter and unclosed delimiters. |
| 23 | """ |
| 24 | |
Raymond Hettinger | 71e0033 | 2003-05-10 03:30:13 +0000 | [diff] [blame] | 25 | import re |
| 26 | import sets |
| 27 | import sys |
| 28 | import getopt |
| 29 | from itertools import izip, count, islice |
| 30 | |
| 31 | cmdstr = r""" |
| 32 | \section \module \declaremodule \modulesynopsis \moduleauthor |
| 33 | \sectionauthor \versionadded \code \class \method \begin |
| 34 | \optional \var \ref \end \subsection \lineiii \hline \label |
| 35 | \indexii \textrm \ldots \keyword \stindex \index \item \note |
| 36 | \withsubitem \ttindex \footnote \citetitle \samp \opindex |
| 37 | \noindent \exception \strong \dfn \ctype \obindex \character |
| 38 | \indexiii \function \bifuncindex \refmodule \refbimodindex |
| 39 | \subsubsection \nodename \member \chapter \emph \ASCII \UNIX |
| 40 | \regexp \program \production \token \productioncont \term |
| 41 | \grammartoken \lineii \seemodule \file \EOF \documentclass |
| 42 | \usepackage \title \input \maketitle \ifhtml \fi \url \Cpp |
| 43 | \tableofcontents \kbd \programopt \envvar \refstmodindex |
| 44 | \cfunction \constant \NULL \moreargs \cfuncline \cdata |
| 45 | \textasciicircum \n \ABC \setindexsubitem \versionchanged |
| 46 | \deprecated \seetext \newcommand \POSIX \pep \warning \rfc |
| 47 | \verbatiminput \methodline \textgreater \seetitle \lineiv |
| 48 | \funclineni \ulink \manpage \funcline \dataline \unspecified |
| 49 | \textbackslash \mimetype \mailheader \seepep \textunderscore |
| 50 | \longprogramopt \infinity \plusminus \shortversion \version |
| 51 | \refmodindex \seerfc \makeindex \makemodindex \renewcommand |
Raymond Hettinger | 0fd525f | 2003-05-10 07:41:55 +0000 | [diff] [blame^] | 52 | \indexname \appendix \protect \indexiv \mbox \textasciitilde |
| 53 | \platform \seeurl \leftmargin \labelwidth \localmoduletable |
Raymond Hettinger | 71e0033 | 2003-05-10 03:30:13 +0000 | [diff] [blame] | 54 | """ |
| 55 | |
| 56 | def matchclose(c_lineno, c_symbol, openers, pairmap): |
| 57 | "Verify that closing delimiter matches most recent opening delimiter" |
| 58 | try: |
| 59 | o_lineno, o_symbol = openers.pop() |
| 60 | except IndexError: |
| 61 | msg = "Delimiter mismatch. On line %d, encountered closing '%s' without corresponding open" % (c_lineno, c_symbol) |
| 62 | raise Exception, msg |
| 63 | if o_symbol in pairmap.get(c_symbol, [c_symbol]): return |
| 64 | msg = "Opener '%s' on line %d was not closed before encountering '%s' on line %d" % (o_symbol, o_lineno, c_symbol, c_lineno) |
| 65 | raise Exception, msg |
| 66 | |
| 67 | def checkit(source, opts, morecmds=[]): |
Raymond Hettinger | 0fd525f | 2003-05-10 07:41:55 +0000 | [diff] [blame^] | 68 | """Check the LaTeX formatting in a sequence of lines. |
Raymond Hettinger | 71e0033 | 2003-05-10 03:30:13 +0000 | [diff] [blame] | 69 | |
| 70 | Opts is a mapping of options to option values if any: |
| 71 | -m munge parenthesis and brackets |
| 72 | -f forward slash warnings to be skipped |
| 73 | -d delimiters only checking |
| 74 | -v verbose listing on delimiters |
| 75 | -s lineno: linenumber to start scan (default is 1). |
| 76 | |
Raymond Hettinger | 0fd525f | 2003-05-10 07:41:55 +0000 | [diff] [blame^] | 77 | Morecmds is a sequence of LaTeX commands (without backslashes) that |
Raymond Hettinger | 71e0033 | 2003-05-10 03:30:13 +0000 | [diff] [blame] | 78 | are to be considered valid in the scan. |
| 79 | """ |
| 80 | |
| 81 | texcmd = re.compile(r'\\[A-Za-z]+') |
| 82 | |
| 83 | validcmds = sets.Set(cmdstr.split()) |
| 84 | for cmd in morecmds: |
| 85 | validcmds.add('\\' + cmd) |
| 86 | |
| 87 | openers = [] # Stack of pending open delimiters |
| 88 | |
| 89 | if '-m' in opts: |
| 90 | pairmap = {']':'[(', ')':'(['} # Munged openers |
| 91 | else: |
| 92 | pairmap = {']':'[', ')':'('} # Normal opener for a given closer |
| 93 | openpunct = sets.Set('([') # Set of valid openers |
| 94 | |
| 95 | delimiters = re.compile(r'\\(begin|end){([_a-zA-Z]+)}|([()\[\]])') |
| 96 | |
Raymond Hettinger | 0fd525f | 2003-05-10 07:41:55 +0000 | [diff] [blame^] | 97 | tablestart = re.compile(r'\\begin{(?:long)?table([iv]+)}') |
| 98 | tableline = re.compile(r'\\line([iv]+){') |
| 99 | tableend = re.compile(r'\\end{(?:long)?table([iv]+)}') |
| 100 | tablelevel = '' |
| 101 | tablestartline = 0 |
| 102 | |
Raymond Hettinger | 71e0033 | 2003-05-10 03:30:13 +0000 | [diff] [blame] | 103 | startline = int(opts.get('-s', '1')) |
| 104 | lineno = 0 |
| 105 | |
| 106 | for lineno, line in izip(count(startline), islice(source, startline-1, None)): |
| 107 | line = line.rstrip() |
| 108 | |
| 109 | if '-f' not in opts and '/' in line: |
| 110 | # Warn whenever forward slashes encountered |
| 111 | line = line.rstrip() |
| 112 | print 'Warning, forward slash on line %d: %s' % (lineno, line) |
| 113 | |
| 114 | if '-d' not in opts: |
| 115 | # Validate commands |
| 116 | nc = line.find(r'\newcommand') |
| 117 | if nc != -1: |
| 118 | start = line.find('{', nc) |
| 119 | end = line.find('}', start) |
| 120 | validcmds.add(line[start+1:end]) |
| 121 | for cmd in texcmd.findall(line): |
| 122 | if cmd not in validcmds: |
| 123 | print r'Warning, unknown tex cmd on line %d: \%s' % (lineno, cmd) |
| 124 | |
| 125 | # Check balancing of open/close markers (parens, brackets, etc) |
| 126 | for begend, name, punct in delimiters.findall(line): |
| 127 | if '-v' in opts: |
| 128 | print lineno, '|', begend, name, punct, |
| 129 | if begend == 'begin' and '-d' not in opts: |
| 130 | openers.append((lineno, name)) |
| 131 | elif punct in openpunct: |
| 132 | openers.append((lineno, punct)) |
| 133 | elif begend == 'end' and '-d' not in opts: |
| 134 | matchclose(lineno, name, openers, pairmap) |
| 135 | elif punct in pairmap: |
| 136 | matchclose(lineno, punct, openers, pairmap) |
| 137 | if '-v' in opts: |
| 138 | print ' --> ', openers |
| 139 | |
Raymond Hettinger | 0fd525f | 2003-05-10 07:41:55 +0000 | [diff] [blame^] | 140 | # Check table levels (make sure lineii only inside lineiii) |
| 141 | m = tablestart.search(line) |
| 142 | if m: |
| 143 | tablelevel = m.group(1) |
| 144 | tablestartline = lineno |
| 145 | m = tableline.search(line) |
| 146 | if m and m.group(1) != tablelevel: |
| 147 | print r'Warning, \line%s on line %d does not match \table%s on line %d' % (m.group(1), lineno, tablelevel, tablestartline) |
| 148 | if tableend.search(line): |
| 149 | tablelevel = '' |
| 150 | |
Raymond Hettinger | 71e0033 | 2003-05-10 03:30:13 +0000 | [diff] [blame] | 151 | for lineno, symbol in openers: |
Raymond Hettinger | 0fd525f | 2003-05-10 07:41:55 +0000 | [diff] [blame^] | 152 | print "Unmatched open delimiter '%s' on line %d" % (symbol, lineno) |
Raymond Hettinger | 71e0033 | 2003-05-10 03:30:13 +0000 | [diff] [blame] | 153 | print 'Done checking %d lines.' % (lineno,) |
| 154 | return 0 |
| 155 | |
| 156 | def main(args=None): |
| 157 | if args is None: |
| 158 | args = sys.argv[1:] |
| 159 | optitems, arglist = getopt.getopt(args, "k:mfdhs:v") |
| 160 | opts = dict(optitems) |
| 161 | if '-h' in opts or args==[]: |
| 162 | print __doc__ |
| 163 | return 0 |
| 164 | |
| 165 | if len(arglist) < 1: |
| 166 | print 'Please specify a file to be checked' |
| 167 | return 1 |
| 168 | |
| 169 | morecmds = [v for k,v in optitems if k=='-k'] |
| 170 | |
| 171 | try: |
| 172 | f = open(arglist[0]) |
| 173 | except IOError: |
| 174 | print 'Cannot open file %s.' % arglist[0] |
| 175 | return 2 |
| 176 | |
| 177 | return(checkit(f, opts, morecmds)) |
| 178 | |
| 179 | if __name__ == '__main__': |
| 180 | sys.exit(main()) |
| 181 | |