blob: 5264b24176842a3c175858540369d20e64b84ee8 [file] [log] [blame]
Raymond Hettinger71e00332003-05-10 03:30:13 +00001""" TeXcheck.py -- rough syntax checking on Python style LaTeX documents.
2
3 Written by Raymond D. Hettinger <python at rcn.com>
4 Copyright (c) 2003 Python Software Foundation. All rights reserved.
5
6Designed to catch common markup errors including:
7* Unbalanced or mismatched parenthesis, brackets, and braces.
8* Unbalanced of mismatched \begin and \end blocks.
9* Misspelled or invalid LaTeX commands.
10* Use of forward slashes instead of backslashes for commands.
Raymond Hettinger4f0c6b22003-05-10 09:04:37 +000011* Table line size mismatches (only \lineii used in a tableii).
Raymond Hettinger71e00332003-05-10 03:30:13 +000012
13Command line usage:
14 python texcheck.py [-h] [-k keyword] foobar.tex
15
16Options:
17 -m Munge parenthesis and brackets. [0,n) would normally mismatch.
18 -k keyword: Keyword is a valid LaTeX command. Do not include the backslash.
19 -f: Forward-slash warnings suppressed.
20 -d: Delimiter check only (useful for non-LaTeX files).
21 -h: Help
22 -s lineno: Start at lineno (useful for skipping complex sections).
23 -v: Verbose. Shows current delimiter and unclosed delimiters.
24"""
25
Raymond Hettinger71e00332003-05-10 03:30:13 +000026import re
27import sets
28import sys
29import getopt
30from itertools import izip, count, islice
31
32cmdstr = r"""
33 \section \module \declaremodule \modulesynopsis \moduleauthor
34 \sectionauthor \versionadded \code \class \method \begin
35 \optional \var \ref \end \subsection \lineiii \hline \label
36 \indexii \textrm \ldots \keyword \stindex \index \item \note
37 \withsubitem \ttindex \footnote \citetitle \samp \opindex
38 \noindent \exception \strong \dfn \ctype \obindex \character
39 \indexiii \function \bifuncindex \refmodule \refbimodindex
40 \subsubsection \nodename \member \chapter \emph \ASCII \UNIX
41 \regexp \program \production \token \productioncont \term
42 \grammartoken \lineii \seemodule \file \EOF \documentclass
43 \usepackage \title \input \maketitle \ifhtml \fi \url \Cpp
44 \tableofcontents \kbd \programopt \envvar \refstmodindex
45 \cfunction \constant \NULL \moreargs \cfuncline \cdata
46 \textasciicircum \n \ABC \setindexsubitem \versionchanged
47 \deprecated \seetext \newcommand \POSIX \pep \warning \rfc
48 \verbatiminput \methodline \textgreater \seetitle \lineiv
49 \funclineni \ulink \manpage \funcline \dataline \unspecified
50 \textbackslash \mimetype \mailheader \seepep \textunderscore
51 \longprogramopt \infinity \plusminus \shortversion \version
52 \refmodindex \seerfc \makeindex \makemodindex \renewcommand
Raymond Hettinger0fd525f2003-05-10 07:41:55 +000053 \indexname \appendix \protect \indexiv \mbox \textasciitilde
54 \platform \seeurl \leftmargin \labelwidth \localmoduletable
Raymond Hettinger71e00332003-05-10 03:30:13 +000055"""
56
57def matchclose(c_lineno, c_symbol, openers, pairmap):
58 "Verify that closing delimiter matches most recent opening delimiter"
59 try:
60 o_lineno, o_symbol = openers.pop()
61 except IndexError:
62 msg = "Delimiter mismatch. On line %d, encountered closing '%s' without corresponding open" % (c_lineno, c_symbol)
63 raise Exception, msg
64 if o_symbol in pairmap.get(c_symbol, [c_symbol]): return
65 msg = "Opener '%s' on line %d was not closed before encountering '%s' on line %d" % (o_symbol, o_lineno, c_symbol, c_lineno)
66 raise Exception, msg
67
68def checkit(source, opts, morecmds=[]):
Raymond Hettinger0fd525f2003-05-10 07:41:55 +000069 """Check the LaTeX formatting in a sequence of lines.
Raymond Hettinger71e00332003-05-10 03:30:13 +000070
71 Opts is a mapping of options to option values if any:
72 -m munge parenthesis and brackets
73 -f forward slash warnings to be skipped
74 -d delimiters only checking
75 -v verbose listing on delimiters
76 -s lineno: linenumber to start scan (default is 1).
77
Raymond Hettinger0fd525f2003-05-10 07:41:55 +000078 Morecmds is a sequence of LaTeX commands (without backslashes) that
Raymond Hettinger71e00332003-05-10 03:30:13 +000079 are to be considered valid in the scan.
80 """
81
82 texcmd = re.compile(r'\\[A-Za-z]+')
83
84 validcmds = sets.Set(cmdstr.split())
85 for cmd in morecmds:
86 validcmds.add('\\' + cmd)
87
88 openers = [] # Stack of pending open delimiters
89
90 if '-m' in opts:
91 pairmap = {']':'[(', ')':'(['} # Munged openers
92 else:
93 pairmap = {']':'[', ')':'('} # Normal opener for a given closer
94 openpunct = sets.Set('([') # Set of valid openers
95
96 delimiters = re.compile(r'\\(begin|end){([_a-zA-Z]+)}|([()\[\]])')
97
Raymond Hettinger0fd525f2003-05-10 07:41:55 +000098 tablestart = re.compile(r'\\begin{(?:long)?table([iv]+)}')
99 tableline = re.compile(r'\\line([iv]+){')
100 tableend = re.compile(r'\\end{(?:long)?table([iv]+)}')
101 tablelevel = ''
102 tablestartline = 0
103
Raymond Hettinger71e00332003-05-10 03:30:13 +0000104 startline = int(opts.get('-s', '1'))
105 lineno = 0
106
107 for lineno, line in izip(count(startline), islice(source, startline-1, None)):
108 line = line.rstrip()
109
110 if '-f' not in opts and '/' in line:
111 # Warn whenever forward slashes encountered
112 line = line.rstrip()
113 print 'Warning, forward slash on line %d: %s' % (lineno, line)
114
115 if '-d' not in opts:
116 # Validate commands
117 nc = line.find(r'\newcommand')
118 if nc != -1:
119 start = line.find('{', nc)
120 end = line.find('}', start)
121 validcmds.add(line[start+1:end])
122 for cmd in texcmd.findall(line):
123 if cmd not in validcmds:
124 print r'Warning, unknown tex cmd on line %d: \%s' % (lineno, cmd)
125
126 # Check balancing of open/close markers (parens, brackets, etc)
127 for begend, name, punct in delimiters.findall(line):
128 if '-v' in opts:
129 print lineno, '|', begend, name, punct,
130 if begend == 'begin' and '-d' not in opts:
131 openers.append((lineno, name))
132 elif punct in openpunct:
133 openers.append((lineno, punct))
134 elif begend == 'end' and '-d' not in opts:
135 matchclose(lineno, name, openers, pairmap)
136 elif punct in pairmap:
137 matchclose(lineno, punct, openers, pairmap)
138 if '-v' in opts:
139 print ' --> ', openers
140
Raymond Hettinger4f0c6b22003-05-10 09:04:37 +0000141 # Check table levels (make sure lineii only inside tableii)
Raymond Hettinger0fd525f2003-05-10 07:41:55 +0000142 m = tablestart.search(line)
143 if m:
144 tablelevel = m.group(1)
145 tablestartline = lineno
146 m = tableline.search(line)
147 if m and m.group(1) != tablelevel:
148 print r'Warning, \line%s on line %d does not match \table%s on line %d' % (m.group(1), lineno, tablelevel, tablestartline)
149 if tableend.search(line):
150 tablelevel = ''
151
Raymond Hettinger4f0c6b22003-05-10 09:04:37 +0000152 lastline = lineno
Raymond Hettinger71e00332003-05-10 03:30:13 +0000153 for lineno, symbol in openers:
Raymond Hettinger0fd525f2003-05-10 07:41:55 +0000154 print "Unmatched open delimiter '%s' on line %d" % (symbol, lineno)
Raymond Hettinger4f0c6b22003-05-10 09:04:37 +0000155 print 'Done checking %d lines.' % (lastline,)
Raymond Hettinger71e00332003-05-10 03:30:13 +0000156 return 0
157
158def main(args=None):
159 if args is None:
160 args = sys.argv[1:]
161 optitems, arglist = getopt.getopt(args, "k:mfdhs:v")
162 opts = dict(optitems)
163 if '-h' in opts or args==[]:
164 print __doc__
165 return 0
166
167 if len(arglist) < 1:
168 print 'Please specify a file to be checked'
169 return 1
170
171 morecmds = [v for k,v in optitems if k=='-k']
172
173 try:
174 f = open(arglist[0])
175 except IOError:
176 print 'Cannot open file %s.' % arglist[0]
177 return 2
178
179 return(checkit(f, opts, morecmds))
180
181if __name__ == '__main__':
182 sys.exit(main())
183