blob: bc31c38a809873c46b644f298eda37b5e4d414e1 [file] [log] [blame]
Raymond Hettinger71e00332003-05-10 03:30:13 +00001""" TeXcheck.py -- rough syntax checking on Python style LaTeX documents.
2
3 Written by Raymond D. Hettinger <python at rcn.com>
4 Copyright (c) 2003 Python Software Foundation. All rights reserved.
5
6Designed to catch common markup errors including:
7* Unbalanced or mismatched parenthesis, brackets, and braces.
8* Unbalanced of mismatched \begin and \end blocks.
9* Misspelled or invalid LaTeX commands.
10* Use of forward slashes instead of backslashes for commands.
Raymond Hettinger4f0c6b22003-05-10 09:04:37 +000011* Table line size mismatches (only \lineii used in a tableii).
Raymond Hettinger71e00332003-05-10 03:30:13 +000012
13Command line usage:
14 python texcheck.py [-h] [-k keyword] foobar.tex
15
16Options:
Raymond Hettinger62aa9942003-05-12 23:33:28 +000017 -m Munge parenthesis and brackets. [0,n) would normally mismatch.
18 -k keyword: Keyword is a valid LaTeX command. Do not include the backslash.
Raymond Hettinger71e00332003-05-10 03:30:13 +000019 -d: Delimiter check only (useful for non-LaTeX files).
20 -h: Help
21 -s lineno: Start at lineno (useful for skipping complex sections).
22 -v: Verbose. Shows current delimiter and unclosed delimiters.
23"""
24
Raymond Hettinger71e00332003-05-10 03:30:13 +000025import re
26import sets
27import sys
28import getopt
29from itertools import izip, count, islice
30
31cmdstr = r"""
32 \section \module \declaremodule \modulesynopsis \moduleauthor
33 \sectionauthor \versionadded \code \class \method \begin
34 \optional \var \ref \end \subsection \lineiii \hline \label
35 \indexii \textrm \ldots \keyword \stindex \index \item \note
36 \withsubitem \ttindex \footnote \citetitle \samp \opindex
37 \noindent \exception \strong \dfn \ctype \obindex \character
38 \indexiii \function \bifuncindex \refmodule \refbimodindex
39 \subsubsection \nodename \member \chapter \emph \ASCII \UNIX
40 \regexp \program \production \token \productioncont \term
41 \grammartoken \lineii \seemodule \file \EOF \documentclass
42 \usepackage \title \input \maketitle \ifhtml \fi \url \Cpp
43 \tableofcontents \kbd \programopt \envvar \refstmodindex
44 \cfunction \constant \NULL \moreargs \cfuncline \cdata
45 \textasciicircum \n \ABC \setindexsubitem \versionchanged
46 \deprecated \seetext \newcommand \POSIX \pep \warning \rfc
47 \verbatiminput \methodline \textgreater \seetitle \lineiv
48 \funclineni \ulink \manpage \funcline \dataline \unspecified
49 \textbackslash \mimetype \mailheader \seepep \textunderscore
50 \longprogramopt \infinity \plusminus \shortversion \version
51 \refmodindex \seerfc \makeindex \makemodindex \renewcommand
Raymond Hettinger0fd525f2003-05-10 07:41:55 +000052 \indexname \appendix \protect \indexiv \mbox \textasciitilde
53 \platform \seeurl \leftmargin \labelwidth \localmoduletable
Raymond Hettinger62aa9942003-05-12 23:33:28 +000054 \LaTeX \copyright \memberline \backslash \pi \centerline
55 \caption \vspace \textwidth \menuselection \textless
56 \makevar \csimplemacro \menuselection \bfcode \sub \release
57 \email \kwindex \refexmodindex \filenq \e \menuselection
58 \exindex \linev \newsgroup \verbatim \setshortversion
Raymond Hettinger71e00332003-05-10 03:30:13 +000059"""
60
61def matchclose(c_lineno, c_symbol, openers, pairmap):
62 "Verify that closing delimiter matches most recent opening delimiter"
63 try:
64 o_lineno, o_symbol = openers.pop()
65 except IndexError:
66 msg = "Delimiter mismatch. On line %d, encountered closing '%s' without corresponding open" % (c_lineno, c_symbol)
67 raise Exception, msg
68 if o_symbol in pairmap.get(c_symbol, [c_symbol]): return
69 msg = "Opener '%s' on line %d was not closed before encountering '%s' on line %d" % (o_symbol, o_lineno, c_symbol, c_lineno)
70 raise Exception, msg
71
72def checkit(source, opts, morecmds=[]):
Raymond Hettinger0fd525f2003-05-10 07:41:55 +000073 """Check the LaTeX formatting in a sequence of lines.
Raymond Hettinger71e00332003-05-10 03:30:13 +000074
75 Opts is a mapping of options to option values if any:
76 -m munge parenthesis and brackets
Raymond Hettinger71e00332003-05-10 03:30:13 +000077 -d delimiters only checking
Raymond Hettinger071b0bc2003-05-14 18:15:55 +000078 -v verbose listing of delimiters
Raymond Hettinger71e00332003-05-10 03:30:13 +000079 -s lineno: linenumber to start scan (default is 1).
80
Raymond Hettinger0fd525f2003-05-10 07:41:55 +000081 Morecmds is a sequence of LaTeX commands (without backslashes) that
Raymond Hettinger71e00332003-05-10 03:30:13 +000082 are to be considered valid in the scan.
83 """
84
85 texcmd = re.compile(r'\\[A-Za-z]+')
Raymond Hettinger071b0bc2003-05-14 18:15:55 +000086 falsetexcmd = re.compile(r'\/([A-Za-z]+)') # Mismarked with forward slash
Raymond Hettinger71e00332003-05-10 03:30:13 +000087
88 validcmds = sets.Set(cmdstr.split())
89 for cmd in morecmds:
90 validcmds.add('\\' + cmd)
91
Raymond Hettinger71e00332003-05-10 03:30:13 +000092 if '-m' in opts:
93 pairmap = {']':'[(', ')':'(['} # Munged openers
94 else:
95 pairmap = {']':'[', ')':'('} # Normal opener for a given closer
96 openpunct = sets.Set('([') # Set of valid openers
97
98 delimiters = re.compile(r'\\(begin|end){([_a-zA-Z]+)}|([()\[\]])')
Raymond Hettinger62aa9942003-05-12 23:33:28 +000099 braces = re.compile(r'({)|(})')
100
101 openers = [] # Stack of pending open delimiters
102 bracestack = [] # Stack of pending open braces
Raymond Hettinger71e00332003-05-10 03:30:13 +0000103
Raymond Hettinger0fd525f2003-05-10 07:41:55 +0000104 tablestart = re.compile(r'\\begin{(?:long)?table([iv]+)}')
105 tableline = re.compile(r'\\line([iv]+){')
106 tableend = re.compile(r'\\end{(?:long)?table([iv]+)}')
107 tablelevel = ''
108 tablestartline = 0
109
Raymond Hettinger71e00332003-05-10 03:30:13 +0000110 startline = int(opts.get('-s', '1'))
111 lineno = 0
112
113 for lineno, line in izip(count(startline), islice(source, startline-1, None)):
114 line = line.rstrip()
115
Raymond Hettinger071b0bc2003-05-14 18:15:55 +0000116 if '/' in line and '-d' not in opts:
117 # Warn whenever forward slashes encountered with a LaTeX command
118 for cmd in falsetexcmd.findall(line):
119 if '\\' + cmd in validcmds:
120 print 'Warning, forward slash used on line %d with cmd: /%s' % (lineno, cmd)
Raymond Hettinger71e00332003-05-10 03:30:13 +0000121
122 if '-d' not in opts:
123 # Validate commands
124 nc = line.find(r'\newcommand')
125 if nc != -1:
126 start = line.find('{', nc)
127 end = line.find('}', start)
128 validcmds.add(line[start+1:end])
129 for cmd in texcmd.findall(line):
130 if cmd not in validcmds:
131 print r'Warning, unknown tex cmd on line %d: \%s' % (lineno, cmd)
132
Raymond Hettinger62aa9942003-05-12 23:33:28 +0000133 # Check balancing of open/close parenthesis and brackets
Raymond Hettinger71e00332003-05-10 03:30:13 +0000134 for begend, name, punct in delimiters.findall(line):
135 if '-v' in opts:
136 print lineno, '|', begend, name, punct,
137 if begend == 'begin' and '-d' not in opts:
138 openers.append((lineno, name))
139 elif punct in openpunct:
140 openers.append((lineno, punct))
141 elif begend == 'end' and '-d' not in opts:
142 matchclose(lineno, name, openers, pairmap)
143 elif punct in pairmap:
144 matchclose(lineno, punct, openers, pairmap)
145 if '-v' in opts:
146 print ' --> ', openers
147
Raymond Hettinger62aa9942003-05-12 23:33:28 +0000148 # Balance opening and closing braces
149 for open, close in braces.findall(line):
150 if open == '{':
151 bracestack.append(lineno)
152 if close == '}':
153 try:
154 bracestack.pop()
155 except IndexError:
156 print r'Warning, unmatched } on line %s.' % (lineno,)
157 if '-v' in opts:
158 print ' --> ', bracestack
159
Raymond Hettinger4f0c6b22003-05-10 09:04:37 +0000160 # Check table levels (make sure lineii only inside tableii)
Raymond Hettinger0fd525f2003-05-10 07:41:55 +0000161 m = tablestart.search(line)
162 if m:
163 tablelevel = m.group(1)
164 tablestartline = lineno
165 m = tableline.search(line)
166 if m and m.group(1) != tablelevel:
167 print r'Warning, \line%s on line %d does not match \table%s on line %d' % (m.group(1), lineno, tablelevel, tablestartline)
168 if tableend.search(line):
169 tablelevel = ''
170
Raymond Hettinger4f0c6b22003-05-10 09:04:37 +0000171 lastline = lineno
Raymond Hettinger71e00332003-05-10 03:30:13 +0000172 for lineno, symbol in openers:
Raymond Hettinger0fd525f2003-05-10 07:41:55 +0000173 print "Unmatched open delimiter '%s' on line %d" % (symbol, lineno)
Raymond Hettinger62aa9942003-05-12 23:33:28 +0000174 for lineno in bracestack:
175 print "Unmatched { on line %d" % (lineno,)
Raymond Hettinger4f0c6b22003-05-10 09:04:37 +0000176 print 'Done checking %d lines.' % (lastline,)
Raymond Hettinger71e00332003-05-10 03:30:13 +0000177 return 0
178
179def main(args=None):
180 if args is None:
181 args = sys.argv[1:]
Raymond Hettinger071b0bc2003-05-14 18:15:55 +0000182 optitems, arglist = getopt.getopt(args, "k:mdhs:v")
Raymond Hettinger71e00332003-05-10 03:30:13 +0000183 opts = dict(optitems)
184 if '-h' in opts or args==[]:
185 print __doc__
186 return 0
187
188 if len(arglist) < 1:
189 print 'Please specify a file to be checked'
190 return 1
191
192 morecmds = [v for k,v in optitems if k=='-k']
193
194 try:
195 f = open(arglist[0])
196 except IOError:
197 print 'Cannot open file %s.' % arglist[0]
198 return 2
199
200 return(checkit(f, opts, morecmds))
201
202if __name__ == '__main__':
203 sys.exit(main())
204