blob: 96acfc1ec8dff1020b47b102db4eb0d919aa1e13 [file] [log] [blame]
Raymond Hettinger71e00332003-05-10 03:30:13 +00001""" TeXcheck.py -- rough syntax checking on Python style LaTeX documents.
2
3 Written by Raymond D. Hettinger <python at rcn.com>
4 Copyright (c) 2003 Python Software Foundation. All rights reserved.
5
6Designed to catch common markup errors including:
7* Unbalanced or mismatched parenthesis, brackets, and braces.
8* Unbalanced of mismatched \begin and \end blocks.
9* Misspelled or invalid LaTeX commands.
10* Use of forward slashes instead of backslashes for commands.
11
12Command line usage:
13 python texcheck.py [-h] [-k keyword] foobar.tex
14
15Options:
16 -m Munge parenthesis and brackets. [0,n) would normally mismatch.
17 -k keyword: Keyword is a valid LaTeX command. Do not include the backslash.
18 -f: Forward-slash warnings suppressed.
19 -d: Delimiter check only (useful for non-LaTeX files).
20 -h: Help
21 -s lineno: Start at lineno (useful for skipping complex sections).
22 -v: Verbose. Shows current delimiter and unclosed delimiters.
23"""
24
25# Todo:
26# Add tableiii/lineiii cross-checking
27# Add braces matching
28
29import re
30import sets
31import sys
32import getopt
33from itertools import izip, count, islice
34
35cmdstr = r"""
36 \section \module \declaremodule \modulesynopsis \moduleauthor
37 \sectionauthor \versionadded \code \class \method \begin
38 \optional \var \ref \end \subsection \lineiii \hline \label
39 \indexii \textrm \ldots \keyword \stindex \index \item \note
40 \withsubitem \ttindex \footnote \citetitle \samp \opindex
41 \noindent \exception \strong \dfn \ctype \obindex \character
42 \indexiii \function \bifuncindex \refmodule \refbimodindex
43 \subsubsection \nodename \member \chapter \emph \ASCII \UNIX
44 \regexp \program \production \token \productioncont \term
45 \grammartoken \lineii \seemodule \file \EOF \documentclass
46 \usepackage \title \input \maketitle \ifhtml \fi \url \Cpp
47 \tableofcontents \kbd \programopt \envvar \refstmodindex
48 \cfunction \constant \NULL \moreargs \cfuncline \cdata
49 \textasciicircum \n \ABC \setindexsubitem \versionchanged
50 \deprecated \seetext \newcommand \POSIX \pep \warning \rfc
51 \verbatiminput \methodline \textgreater \seetitle \lineiv
52 \funclineni \ulink \manpage \funcline \dataline \unspecified
53 \textbackslash \mimetype \mailheader \seepep \textunderscore
54 \longprogramopt \infinity \plusminus \shortversion \version
55 \refmodindex \seerfc \makeindex \makemodindex \renewcommand
56 \indexname \appendix
57"""
58
59def matchclose(c_lineno, c_symbol, openers, pairmap):
60 "Verify that closing delimiter matches most recent opening delimiter"
61 try:
62 o_lineno, o_symbol = openers.pop()
63 except IndexError:
64 msg = "Delimiter mismatch. On line %d, encountered closing '%s' without corresponding open" % (c_lineno, c_symbol)
65 raise Exception, msg
66 if o_symbol in pairmap.get(c_symbol, [c_symbol]): return
67 msg = "Opener '%s' on line %d was not closed before encountering '%s' on line %d" % (o_symbol, o_lineno, c_symbol, c_lineno)
68 raise Exception, msg
69
70def checkit(source, opts, morecmds=[]):
71 """Check the LaTex formatting in a sequence of lines.
72
73 Opts is a mapping of options to option values if any:
74 -m munge parenthesis and brackets
75 -f forward slash warnings to be skipped
76 -d delimiters only checking
77 -v verbose listing on delimiters
78 -s lineno: linenumber to start scan (default is 1).
79
80 Morecmds is a sequence of LaTex commands (without backslashes) that
81 are to be considered valid in the scan.
82 """
83
84 texcmd = re.compile(r'\\[A-Za-z]+')
85
86 validcmds = sets.Set(cmdstr.split())
87 for cmd in morecmds:
88 validcmds.add('\\' + cmd)
89
90 openers = [] # Stack of pending open delimiters
91
92 if '-m' in opts:
93 pairmap = {']':'[(', ')':'(['} # Munged openers
94 else:
95 pairmap = {']':'[', ')':'('} # Normal opener for a given closer
96 openpunct = sets.Set('([') # Set of valid openers
97
98 delimiters = re.compile(r'\\(begin|end){([_a-zA-Z]+)}|([()\[\]])')
99
100 startline = int(opts.get('-s', '1'))
101 lineno = 0
102
103 for lineno, line in izip(count(startline), islice(source, startline-1, None)):
104 line = line.rstrip()
105
106 if '-f' not in opts and '/' in line:
107 # Warn whenever forward slashes encountered
108 line = line.rstrip()
109 print 'Warning, forward slash on line %d: %s' % (lineno, line)
110
111 if '-d' not in opts:
112 # Validate commands
113 nc = line.find(r'\newcommand')
114 if nc != -1:
115 start = line.find('{', nc)
116 end = line.find('}', start)
117 validcmds.add(line[start+1:end])
118 for cmd in texcmd.findall(line):
119 if cmd not in validcmds:
120 print r'Warning, unknown tex cmd on line %d: \%s' % (lineno, cmd)
121
122 # Check balancing of open/close markers (parens, brackets, etc)
123 for begend, name, punct in delimiters.findall(line):
124 if '-v' in opts:
125 print lineno, '|', begend, name, punct,
126 if begend == 'begin' and '-d' not in opts:
127 openers.append((lineno, name))
128 elif punct in openpunct:
129 openers.append((lineno, punct))
130 elif begend == 'end' and '-d' not in opts:
131 matchclose(lineno, name, openers, pairmap)
132 elif punct in pairmap:
133 matchclose(lineno, punct, openers, pairmap)
134 if '-v' in opts:
135 print ' --> ', openers
136
137 for lineno, symbol in openers:
138 print "Unmatched open delimiter '%s' on line %d", (symbol, lineno)
139 print 'Done checking %d lines.' % (lineno,)
140 return 0
141
142def main(args=None):
143 if args is None:
144 args = sys.argv[1:]
145 optitems, arglist = getopt.getopt(args, "k:mfdhs:v")
146 opts = dict(optitems)
147 if '-h' in opts or args==[]:
148 print __doc__
149 return 0
150
151 if len(arglist) < 1:
152 print 'Please specify a file to be checked'
153 return 1
154
155 morecmds = [v for k,v in optitems if k=='-k']
156
157 try:
158 f = open(arglist[0])
159 except IOError:
160 print 'Cannot open file %s.' % arglist[0]
161 return 2
162
163 return(checkit(f, opts, morecmds))
164
165if __name__ == '__main__':
166 sys.exit(main())
167