blob: c27659627a477899afdbee33b91fdd5dcd80e4d9 [file] [log] [blame]
Raymond Hettinger71e00332003-05-10 03:30:13 +00001""" TeXcheck.py -- rough syntax checking on Python style LaTeX documents.
2
3 Written by Raymond D. Hettinger <python at rcn.com>
4 Copyright (c) 2003 Python Software Foundation. All rights reserved.
5
6Designed to catch common markup errors including:
7* Unbalanced or mismatched parenthesis, brackets, and braces.
8* Unbalanced of mismatched \begin and \end blocks.
9* Misspelled or invalid LaTeX commands.
10* Use of forward slashes instead of backslashes for commands.
Raymond Hettinger4f0c6b22003-05-10 09:04:37 +000011* Table line size mismatches (only \lineii used in a tableii).
Raymond Hettinger71e00332003-05-10 03:30:13 +000012
13Command line usage:
14 python texcheck.py [-h] [-k keyword] foobar.tex
15
16Options:
Raymond Hettinger62aa9942003-05-12 23:33:28 +000017 -m Munge parenthesis and brackets. [0,n) would normally mismatch.
18 -k keyword: Keyword is a valid LaTeX command. Do not include the backslash.
Raymond Hettinger71e00332003-05-10 03:30:13 +000019 -f: Forward-slash warnings suppressed.
20 -d: Delimiter check only (useful for non-LaTeX files).
21 -h: Help
22 -s lineno: Start at lineno (useful for skipping complex sections).
23 -v: Verbose. Shows current delimiter and unclosed delimiters.
24"""
25
Raymond Hettinger71e00332003-05-10 03:30:13 +000026import re
27import sets
28import sys
29import getopt
30from itertools import izip, count, islice
31
32cmdstr = r"""
33 \section \module \declaremodule \modulesynopsis \moduleauthor
34 \sectionauthor \versionadded \code \class \method \begin
35 \optional \var \ref \end \subsection \lineiii \hline \label
36 \indexii \textrm \ldots \keyword \stindex \index \item \note
37 \withsubitem \ttindex \footnote \citetitle \samp \opindex
38 \noindent \exception \strong \dfn \ctype \obindex \character
39 \indexiii \function \bifuncindex \refmodule \refbimodindex
40 \subsubsection \nodename \member \chapter \emph \ASCII \UNIX
41 \regexp \program \production \token \productioncont \term
42 \grammartoken \lineii \seemodule \file \EOF \documentclass
43 \usepackage \title \input \maketitle \ifhtml \fi \url \Cpp
44 \tableofcontents \kbd \programopt \envvar \refstmodindex
45 \cfunction \constant \NULL \moreargs \cfuncline \cdata
46 \textasciicircum \n \ABC \setindexsubitem \versionchanged
47 \deprecated \seetext \newcommand \POSIX \pep \warning \rfc
48 \verbatiminput \methodline \textgreater \seetitle \lineiv
49 \funclineni \ulink \manpage \funcline \dataline \unspecified
50 \textbackslash \mimetype \mailheader \seepep \textunderscore
51 \longprogramopt \infinity \plusminus \shortversion \version
52 \refmodindex \seerfc \makeindex \makemodindex \renewcommand
Raymond Hettinger0fd525f2003-05-10 07:41:55 +000053 \indexname \appendix \protect \indexiv \mbox \textasciitilde
54 \platform \seeurl \leftmargin \labelwidth \localmoduletable
Raymond Hettinger62aa9942003-05-12 23:33:28 +000055 \LaTeX \copyright \memberline \backslash \pi \centerline
56 \caption \vspace \textwidth \menuselection \textless
57 \makevar \csimplemacro \menuselection \bfcode \sub \release
58 \email \kwindex \refexmodindex \filenq \e \menuselection
59 \exindex \linev \newsgroup \verbatim \setshortversion
Raymond Hettinger71e00332003-05-10 03:30:13 +000060"""
61
62def matchclose(c_lineno, c_symbol, openers, pairmap):
63 "Verify that closing delimiter matches most recent opening delimiter"
64 try:
65 o_lineno, o_symbol = openers.pop()
66 except IndexError:
67 msg = "Delimiter mismatch. On line %d, encountered closing '%s' without corresponding open" % (c_lineno, c_symbol)
68 raise Exception, msg
69 if o_symbol in pairmap.get(c_symbol, [c_symbol]): return
70 msg = "Opener '%s' on line %d was not closed before encountering '%s' on line %d" % (o_symbol, o_lineno, c_symbol, c_lineno)
71 raise Exception, msg
72
73def checkit(source, opts, morecmds=[]):
Raymond Hettinger0fd525f2003-05-10 07:41:55 +000074 """Check the LaTeX formatting in a sequence of lines.
Raymond Hettinger71e00332003-05-10 03:30:13 +000075
76 Opts is a mapping of options to option values if any:
77 -m munge parenthesis and brackets
78 -f forward slash warnings to be skipped
79 -d delimiters only checking
80 -v verbose listing on delimiters
81 -s lineno: linenumber to start scan (default is 1).
82
Raymond Hettinger0fd525f2003-05-10 07:41:55 +000083 Morecmds is a sequence of LaTeX commands (without backslashes) that
Raymond Hettinger71e00332003-05-10 03:30:13 +000084 are to be considered valid in the scan.
85 """
86
87 texcmd = re.compile(r'\\[A-Za-z]+')
88
89 validcmds = sets.Set(cmdstr.split())
90 for cmd in morecmds:
91 validcmds.add('\\' + cmd)
92
Raymond Hettinger71e00332003-05-10 03:30:13 +000093 if '-m' in opts:
94 pairmap = {']':'[(', ')':'(['} # Munged openers
95 else:
96 pairmap = {']':'[', ')':'('} # Normal opener for a given closer
97 openpunct = sets.Set('([') # Set of valid openers
98
99 delimiters = re.compile(r'\\(begin|end){([_a-zA-Z]+)}|([()\[\]])')
Raymond Hettinger62aa9942003-05-12 23:33:28 +0000100 braces = re.compile(r'({)|(})')
101
102 openers = [] # Stack of pending open delimiters
103 bracestack = [] # Stack of pending open braces
Raymond Hettinger71e00332003-05-10 03:30:13 +0000104
Raymond Hettinger0fd525f2003-05-10 07:41:55 +0000105 tablestart = re.compile(r'\\begin{(?:long)?table([iv]+)}')
106 tableline = re.compile(r'\\line([iv]+){')
107 tableend = re.compile(r'\\end{(?:long)?table([iv]+)}')
108 tablelevel = ''
109 tablestartline = 0
110
Raymond Hettinger71e00332003-05-10 03:30:13 +0000111 startline = int(opts.get('-s', '1'))
112 lineno = 0
113
114 for lineno, line in izip(count(startline), islice(source, startline-1, None)):
115 line = line.rstrip()
116
Raymond Hettinger62aa9942003-05-12 23:33:28 +0000117 if '/' in line and '-f' not in opts and '-d' not in opts:
Raymond Hettinger71e00332003-05-10 03:30:13 +0000118 # Warn whenever forward slashes encountered
119 line = line.rstrip()
120 print 'Warning, forward slash on line %d: %s' % (lineno, line)
121
122 if '-d' not in opts:
123 # Validate commands
124 nc = line.find(r'\newcommand')
125 if nc != -1:
126 start = line.find('{', nc)
127 end = line.find('}', start)
128 validcmds.add(line[start+1:end])
129 for cmd in texcmd.findall(line):
130 if cmd not in validcmds:
131 print r'Warning, unknown tex cmd on line %d: \%s' % (lineno, cmd)
132
Raymond Hettinger62aa9942003-05-12 23:33:28 +0000133 # Check balancing of open/close parenthesis and brackets
Raymond Hettinger71e00332003-05-10 03:30:13 +0000134 for begend, name, punct in delimiters.findall(line):
135 if '-v' in opts:
136 print lineno, '|', begend, name, punct,
137 if begend == 'begin' and '-d' not in opts:
138 openers.append((lineno, name))
139 elif punct in openpunct:
140 openers.append((lineno, punct))
141 elif begend == 'end' and '-d' not in opts:
142 matchclose(lineno, name, openers, pairmap)
143 elif punct in pairmap:
144 matchclose(lineno, punct, openers, pairmap)
145 if '-v' in opts:
146 print ' --> ', openers
147
Raymond Hettinger62aa9942003-05-12 23:33:28 +0000148 # Balance opening and closing braces
149 for open, close in braces.findall(line):
150 if open == '{':
151 bracestack.append(lineno)
152 if close == '}':
153 try:
154 bracestack.pop()
155 except IndexError:
156 print r'Warning, unmatched } on line %s.' % (lineno,)
157 if '-v' in opts:
158 print ' --> ', bracestack
159
Raymond Hettinger4f0c6b22003-05-10 09:04:37 +0000160 # Check table levels (make sure lineii only inside tableii)
Raymond Hettinger0fd525f2003-05-10 07:41:55 +0000161 m = tablestart.search(line)
162 if m:
163 tablelevel = m.group(1)
164 tablestartline = lineno
165 m = tableline.search(line)
166 if m and m.group(1) != tablelevel:
167 print r'Warning, \line%s on line %d does not match \table%s on line %d' % (m.group(1), lineno, tablelevel, tablestartline)
168 if tableend.search(line):
169 tablelevel = ''
170
Raymond Hettinger4f0c6b22003-05-10 09:04:37 +0000171 lastline = lineno
Raymond Hettinger71e00332003-05-10 03:30:13 +0000172 for lineno, symbol in openers:
Raymond Hettinger0fd525f2003-05-10 07:41:55 +0000173 print "Unmatched open delimiter '%s' on line %d" % (symbol, lineno)
Raymond Hettinger62aa9942003-05-12 23:33:28 +0000174 for lineno in bracestack:
175 print "Unmatched { on line %d" % (lineno,)
Raymond Hettinger4f0c6b22003-05-10 09:04:37 +0000176 print 'Done checking %d lines.' % (lastline,)
Raymond Hettinger71e00332003-05-10 03:30:13 +0000177 return 0
178
179def main(args=None):
180 if args is None:
181 args = sys.argv[1:]
182 optitems, arglist = getopt.getopt(args, "k:mfdhs:v")
183 opts = dict(optitems)
184 if '-h' in opts or args==[]:
185 print __doc__
186 return 0
187
188 if len(arglist) < 1:
189 print 'Please specify a file to be checked'
190 return 1
191
192 morecmds = [v for k,v in optitems if k=='-k']
193
194 try:
195 f = open(arglist[0])
196 except IOError:
197 print 'Cannot open file %s.' % arglist[0]
198 return 2
199
200 return(checkit(f, opts, morecmds))
201
202if __name__ == '__main__':
203 sys.exit(main())
204