| """ TeXcheck.py -- rough syntax checking on Python style LaTeX documents. |
| |
| Written by Raymond D. Hettinger <python at rcn.com> |
| Copyright (c) 2003 Python Software Foundation. All rights reserved. |
| |
| Designed to catch common markup errors including: |
| * Unbalanced or mismatched parenthesis, brackets, and braces. |
| * Unbalanced or mismatched \\begin and \\end blocks. |
| * Misspelled or invalid LaTeX commands. |
| * Use of forward slashes instead of backslashes for commands. |
| * Table line size mismatches. |
| |
| Sample command line usage: |
| python texcheck.py -k chapterheading -m lib/librandomtex *.tex |
| |
| Options: |
| -m Munge parenthesis and brackets. [0,n) would normally mismatch. |
| -k keyword: Keyword is a valid LaTeX command. Do not include the backslash. |
| -d: Delimiter check only (useful for non-LaTeX files). |
| -h: Help |
| -s lineno: Start at lineno (useful for skipping complex sections). |
| -v: Verbose. Trace the matching of //begin and //end blocks. |
| """ |
| |
| import re |
| import sys |
| import getopt |
| from itertools import izip, count, islice |
| import glob |
| |
| cmdstr = r""" |
| \section \module \declaremodule \modulesynopsis \moduleauthor |
| \sectionauthor \versionadded \code \class \method \begin |
| \optional \var \ref \end \subsection \lineiii \hline \label |
| \indexii \textrm \ldots \keyword \stindex \index \item \note |
| \withsubitem \ttindex \footnote \citetitle \samp \opindex |
| \noindent \exception \strong \dfn \ctype \obindex \character |
| \indexiii \function \bifuncindex \refmodule \refbimodindex |
| \subsubsection \nodename \member \chapter \emph \ASCII \UNIX |
| \regexp \program \production \token \productioncont \term |
| \grammartoken \lineii \seemodule \file \EOF \documentclass |
| \usepackage \title \input \maketitle \ifhtml \fi \url \Cpp |
| \tableofcontents \kbd \programopt \envvar \refstmodindex |
| \cfunction \constant \NULL \moreargs \cfuncline \cdata |
| \textasciicircum \n \ABC \setindexsubitem \versionchanged |
| \deprecated \seetext \newcommand \POSIX \pep \warning \rfc |
| \verbatiminput \methodline \textgreater \seetitle \lineiv |
| \funclineni \ulink \manpage \funcline \dataline \unspecified |
| \textbackslash \mimetype \mailheader \seepep \textunderscore |
| \longprogramopt \infinity \plusminus \shortversion \version |
| \refmodindex \seerfc \makeindex \makemodindex \renewcommand |
| \indexname \appendix \protect \indexiv \mbox \textasciitilde |
| \platform \seeurl \leftmargin \labelwidth \localmoduletable |
| \LaTeX \copyright \memberline \backslash \pi \centerline |
| \caption \vspace \textwidth \menuselection \textless |
| \makevar \csimplemacro \menuselection \bfcode \sub \release |
| \email \kwindex \refexmodindex \filenq \e \menuselection |
| \exindex \linev \newsgroup \verbatim \setshortversion |
| \author \authoraddress \paragraph \subparagraph \cmemberline |
| \textbar \C \seelink |
| """ |
| |
| def matchclose(c_lineno, c_symbol, openers, pairmap): |
| "Verify that closing delimiter matches most recent opening delimiter" |
| try: |
| o_lineno, o_symbol = openers.pop() |
| except IndexError: |
| print "\nDelimiter mismatch. On line %d, encountered closing '%s' without corresponding open" % (c_lineno, c_symbol) |
| return |
| if o_symbol in pairmap.get(c_symbol, [c_symbol]): return |
| print "\nOpener '%s' on line %d was not closed before encountering '%s' on line %d" % (o_symbol, o_lineno, c_symbol, c_lineno) |
| return |
| |
| def checkit(source, opts, morecmds=[]): |
| """Check the LaTeX formatting in a sequence of lines. |
| |
| Opts is a mapping of options to option values if any: |
| -m munge parenthesis and brackets |
| -d delimiters only checking |
| -v verbose trace of delimiter matching |
| -s lineno: linenumber to start scan (default is 1). |
| |
| Morecmds is a sequence of LaTeX commands (without backslashes) that |
| are to be considered valid in the scan. |
| """ |
| |
| texcmd = re.compile(r'\\[A-Za-z]+') |
| falsetexcmd = re.compile(r'\/([A-Za-z]+)') # Mismarked with forward slash |
| |
| validcmds = set(cmdstr.split()) |
| for cmd in morecmds: |
| validcmds.add('\\' + cmd) |
| |
| if '-m' in opts: |
| pairmap = {']':'[(', ')':'(['} # Munged openers |
| else: |
| pairmap = {']':'[', ')':'('} # Normal opener for a given closer |
| openpunct = set('([') # Set of valid openers |
| |
| delimiters = re.compile(r'\\(begin|end){([_a-zA-Z]+)}|([()\[\]])') |
| braces = re.compile(r'({)|(})') |
| doubledwords = re.compile(r'(\b[A-za-z]+\b) \b\1\b') |
| spacingmarkup = re.compile(r'\\(ABC|ASCII|C|Cpp|EOF|infinity|NULL|plusminus|POSIX|UNIX)\s') |
| |
| openers = [] # Stack of pending open delimiters |
| bracestack = [] # Stack of pending open braces |
| |
| tablestart = re.compile(r'\\begin{(?:long)?table([iv]+)}') |
| tableline = re.compile(r'\\line([iv]+){') |
| tableend = re.compile(r'\\end{(?:long)?table([iv]+)}') |
| tablelevel = '' |
| tablestartline = 0 |
| |
| startline = int(opts.get('-s', '1')) |
| lineno = 0 |
| |
| for lineno, line in izip(count(startline), islice(source, startline-1, None)): |
| line = line.rstrip() |
| |
| # Check balancing of open/close parenthesis, brackets, and begin/end blocks |
| for begend, name, punct in delimiters.findall(line): |
| if '-v' in opts: |
| print lineno, '|', begend, name, punct, |
| if begend == 'begin' and '-d' not in opts: |
| openers.append((lineno, name)) |
| elif punct in openpunct: |
| openers.append((lineno, punct)) |
| elif begend == 'end' and '-d' not in opts: |
| matchclose(lineno, name, openers, pairmap) |
| elif punct in pairmap: |
| matchclose(lineno, punct, openers, pairmap) |
| if '-v' in opts: |
| print ' --> ', openers |
| |
| # Balance opening and closing braces |
| for open, close in braces.findall(line): |
| if open == '{': |
| bracestack.append(lineno) |
| if close == '}': |
| try: |
| bracestack.pop() |
| except IndexError: |
| print r'Warning, unmatched } on line %s.' % (lineno,) |
| |
| # Optionally, skip LaTeX specific checks |
| if '-d' in opts: |
| continue |
| |
| # Warn whenever forward slashes encountered with a LaTeX command |
| for cmd in falsetexcmd.findall(line): |
| if '822' in line or '.html' in line: |
| continue # Ignore false positives for urls and for /rfc822 |
| if '\\' + cmd in validcmds: |
| print 'Warning, forward slash used on line %d with cmd: /%s' % (lineno, cmd) |
| |
| # Check for markup requiring {} for correct spacing |
| for cmd in spacingmarkup.findall(line): |
| print r'Warning, \%s should be written as \%s{} on line %d' % (cmd, cmd, lineno) |
| |
| # Validate commands |
| nc = line.find(r'\newcommand') |
| if nc != -1: |
| start = line.find('{', nc) |
| end = line.find('}', start) |
| validcmds.add(line[start+1:end]) |
| for cmd in texcmd.findall(line): |
| if cmd not in validcmds: |
| print r'Warning, unknown tex cmd on line %d: \%s' % (lineno, cmd) |
| |
| # Check table levels (make sure lineii only inside tableii) |
| m = tablestart.search(line) |
| if m: |
| tablelevel = m.group(1) |
| tablestartline = lineno |
| m = tableline.search(line) |
| if m and m.group(1) != tablelevel: |
| print r'Warning, \line%s on line %d does not match \table%s on line %d' % (m.group(1), lineno, tablelevel, tablestartline) |
| if tableend.search(line): |
| tablelevel = '' |
| |
| # Style guide warnings |
| if 'e.g.' in line or 'i.e.' in line: |
| print r'Style warning, avoid use of i.e or e.g. on line %d' % (lineno,) |
| |
| for dw in doubledwords.findall(line): |
| print r'Doubled word warning. "%s" on line %d' % (dw, lineno) |
| |
| lastline = lineno |
| for lineno, symbol in openers: |
| print "Unmatched open delimiter '%s' on line %d" % (symbol, lineno) |
| for lineno in bracestack: |
| print "Unmatched { on line %d" % (lineno,) |
| print 'Done checking %d lines.' % (lastline,) |
| return 0 |
| |
| def main(args=None): |
| if args is None: |
| args = sys.argv[1:] |
| optitems, arglist = getopt.getopt(args, "k:mdhs:v") |
| opts = dict(optitems) |
| if '-h' in opts or args==[]: |
| print __doc__ |
| return 0 |
| |
| if len(arglist) < 1: |
| print 'Please specify a file to be checked' |
| return 1 |
| |
| for i, filespec in enumerate(arglist): |
| if '*' in filespec or '?' in filespec: |
| arglist[i:i+1] = glob.glob(filespec) |
| |
| morecmds = [v for k,v in optitems if k=='-k'] |
| err = [] |
| |
| for filename in arglist: |
| print '=' * 30 |
| print "Checking", filename |
| try: |
| f = open(filename) |
| except IOError: |
| print 'Cannot open file %s.' % arglist[0] |
| return 2 |
| |
| try: |
| err.append(checkit(f, opts, morecmds)) |
| finally: |
| f.close() |
| |
| return max(err) |
| |
| if __name__ == '__main__': |
| sys.exit(main()) |