blob: ed3f42d2b6533d763a4ef3815b42c29e824c613e [file] [log] [blame]
Raymond Hettinger71e00332003-05-10 03:30:13 +00001""" TeXcheck.py -- rough syntax checking on Python style LaTeX documents.
2
3 Written by Raymond D. Hettinger <python at rcn.com>
4 Copyright (c) 2003 Python Software Foundation. All rights reserved.
5
6Designed to catch common markup errors including:
7* Unbalanced or mismatched parenthesis, brackets, and braces.
8* Unbalanced of mismatched \begin and \end blocks.
9* Misspelled or invalid LaTeX commands.
10* Use of forward slashes instead of backslashes for commands.
11
12Command line usage:
13 python texcheck.py [-h] [-k keyword] foobar.tex
14
15Options:
16 -m Munge parenthesis and brackets. [0,n) would normally mismatch.
17 -k keyword: Keyword is a valid LaTeX command. Do not include the backslash.
18 -f: Forward-slash warnings suppressed.
19 -d: Delimiter check only (useful for non-LaTeX files).
20 -h: Help
21 -s lineno: Start at lineno (useful for skipping complex sections).
22 -v: Verbose. Shows current delimiter and unclosed delimiters.
23"""
24
Raymond Hettinger71e00332003-05-10 03:30:13 +000025import re
26import sets
27import sys
28import getopt
29from itertools import izip, count, islice
30
31cmdstr = r"""
32 \section \module \declaremodule \modulesynopsis \moduleauthor
33 \sectionauthor \versionadded \code \class \method \begin
34 \optional \var \ref \end \subsection \lineiii \hline \label
35 \indexii \textrm \ldots \keyword \stindex \index \item \note
36 \withsubitem \ttindex \footnote \citetitle \samp \opindex
37 \noindent \exception \strong \dfn \ctype \obindex \character
38 \indexiii \function \bifuncindex \refmodule \refbimodindex
39 \subsubsection \nodename \member \chapter \emph \ASCII \UNIX
40 \regexp \program \production \token \productioncont \term
41 \grammartoken \lineii \seemodule \file \EOF \documentclass
42 \usepackage \title \input \maketitle \ifhtml \fi \url \Cpp
43 \tableofcontents \kbd \programopt \envvar \refstmodindex
44 \cfunction \constant \NULL \moreargs \cfuncline \cdata
45 \textasciicircum \n \ABC \setindexsubitem \versionchanged
46 \deprecated \seetext \newcommand \POSIX \pep \warning \rfc
47 \verbatiminput \methodline \textgreater \seetitle \lineiv
48 \funclineni \ulink \manpage \funcline \dataline \unspecified
49 \textbackslash \mimetype \mailheader \seepep \textunderscore
50 \longprogramopt \infinity \plusminus \shortversion \version
51 \refmodindex \seerfc \makeindex \makemodindex \renewcommand
Raymond Hettinger0fd525f2003-05-10 07:41:55 +000052 \indexname \appendix \protect \indexiv \mbox \textasciitilde
53 \platform \seeurl \leftmargin \labelwidth \localmoduletable
Raymond Hettinger71e00332003-05-10 03:30:13 +000054"""
55
56def matchclose(c_lineno, c_symbol, openers, pairmap):
57 "Verify that closing delimiter matches most recent opening delimiter"
58 try:
59 o_lineno, o_symbol = openers.pop()
60 except IndexError:
61 msg = "Delimiter mismatch. On line %d, encountered closing '%s' without corresponding open" % (c_lineno, c_symbol)
62 raise Exception, msg
63 if o_symbol in pairmap.get(c_symbol, [c_symbol]): return
64 msg = "Opener '%s' on line %d was not closed before encountering '%s' on line %d" % (o_symbol, o_lineno, c_symbol, c_lineno)
65 raise Exception, msg
66
67def checkit(source, opts, morecmds=[]):
Raymond Hettinger0fd525f2003-05-10 07:41:55 +000068 """Check the LaTeX formatting in a sequence of lines.
Raymond Hettinger71e00332003-05-10 03:30:13 +000069
70 Opts is a mapping of options to option values if any:
71 -m munge parenthesis and brackets
72 -f forward slash warnings to be skipped
73 -d delimiters only checking
74 -v verbose listing on delimiters
75 -s lineno: linenumber to start scan (default is 1).
76
Raymond Hettinger0fd525f2003-05-10 07:41:55 +000077 Morecmds is a sequence of LaTeX commands (without backslashes) that
Raymond Hettinger71e00332003-05-10 03:30:13 +000078 are to be considered valid in the scan.
79 """
80
81 texcmd = re.compile(r'\\[A-Za-z]+')
82
83 validcmds = sets.Set(cmdstr.split())
84 for cmd in morecmds:
85 validcmds.add('\\' + cmd)
86
87 openers = [] # Stack of pending open delimiters
88
89 if '-m' in opts:
90 pairmap = {']':'[(', ')':'(['} # Munged openers
91 else:
92 pairmap = {']':'[', ')':'('} # Normal opener for a given closer
93 openpunct = sets.Set('([') # Set of valid openers
94
95 delimiters = re.compile(r'\\(begin|end){([_a-zA-Z]+)}|([()\[\]])')
96
Raymond Hettinger0fd525f2003-05-10 07:41:55 +000097 tablestart = re.compile(r'\\begin{(?:long)?table([iv]+)}')
98 tableline = re.compile(r'\\line([iv]+){')
99 tableend = re.compile(r'\\end{(?:long)?table([iv]+)}')
100 tablelevel = ''
101 tablestartline = 0
102
Raymond Hettinger71e00332003-05-10 03:30:13 +0000103 startline = int(opts.get('-s', '1'))
104 lineno = 0
105
106 for lineno, line in izip(count(startline), islice(source, startline-1, None)):
107 line = line.rstrip()
108
109 if '-f' not in opts and '/' in line:
110 # Warn whenever forward slashes encountered
111 line = line.rstrip()
112 print 'Warning, forward slash on line %d: %s' % (lineno, line)
113
114 if '-d' not in opts:
115 # Validate commands
116 nc = line.find(r'\newcommand')
117 if nc != -1:
118 start = line.find('{', nc)
119 end = line.find('}', start)
120 validcmds.add(line[start+1:end])
121 for cmd in texcmd.findall(line):
122 if cmd not in validcmds:
123 print r'Warning, unknown tex cmd on line %d: \%s' % (lineno, cmd)
124
125 # Check balancing of open/close markers (parens, brackets, etc)
126 for begend, name, punct in delimiters.findall(line):
127 if '-v' in opts:
128 print lineno, '|', begend, name, punct,
129 if begend == 'begin' and '-d' not in opts:
130 openers.append((lineno, name))
131 elif punct in openpunct:
132 openers.append((lineno, punct))
133 elif begend == 'end' and '-d' not in opts:
134 matchclose(lineno, name, openers, pairmap)
135 elif punct in pairmap:
136 matchclose(lineno, punct, openers, pairmap)
137 if '-v' in opts:
138 print ' --> ', openers
139
Raymond Hettinger0fd525f2003-05-10 07:41:55 +0000140 # Check table levels (make sure lineii only inside lineiii)
141 m = tablestart.search(line)
142 if m:
143 tablelevel = m.group(1)
144 tablestartline = lineno
145 m = tableline.search(line)
146 if m and m.group(1) != tablelevel:
147 print r'Warning, \line%s on line %d does not match \table%s on line %d' % (m.group(1), lineno, tablelevel, tablestartline)
148 if tableend.search(line):
149 tablelevel = ''
150
Raymond Hettinger71e00332003-05-10 03:30:13 +0000151 for lineno, symbol in openers:
Raymond Hettinger0fd525f2003-05-10 07:41:55 +0000152 print "Unmatched open delimiter '%s' on line %d" % (symbol, lineno)
Raymond Hettinger71e00332003-05-10 03:30:13 +0000153 print 'Done checking %d lines.' % (lineno,)
154 return 0
155
156def main(args=None):
157 if args is None:
158 args = sys.argv[1:]
159 optitems, arglist = getopt.getopt(args, "k:mfdhs:v")
160 opts = dict(optitems)
161 if '-h' in opts or args==[]:
162 print __doc__
163 return 0
164
165 if len(arglist) < 1:
166 print 'Please specify a file to be checked'
167 return 1
168
169 morecmds = [v for k,v in optitems if k=='-k']
170
171 try:
172 f = open(arglist[0])
173 except IOError:
174 print 'Cannot open file %s.' % arglist[0]
175 return 2
176
177 return(checkit(f, opts, morecmds))
178
179if __name__ == '__main__':
180 sys.exit(main())
181