blob: cbcb8eb801b1356ff654b190ae8f9e9175cea929 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Georg Brandl45f53372009-01-03 21:15:20 +00002# -*- coding: utf-8 -*-
3
4# Check for stylistic and formal issues in .rst and .py
5# files included in the documentation.
6#
7# 01/2009, Georg Brandl
8
Benjamin Petersonb58dda72009-01-18 22:27:04 +00009# TODO: - wrong versions in versionadded/changed
10# - wrong markup after versionchanged directive
11
Georg Brandl45f53372009-01-03 21:15:20 +000012import os
13import re
14import sys
15import getopt
Julien Palardb9735422020-12-18 10:48:08 +010016from string import ascii_letters
Georg Brandl45f53372009-01-03 21:15:20 +000017from os.path import join, splitext, abspath, exists
18from collections import defaultdict
19
20directives = [
21 # standard docutils ones
22 'admonition', 'attention', 'caution', 'class', 'compound', 'container',
23 'contents', 'csv-table', 'danger', 'date', 'default-role', 'epigraph',
24 'error', 'figure', 'footer', 'header', 'highlights', 'hint', 'image',
25 'important', 'include', 'line-block', 'list-table', 'meta', 'note',
26 'parsed-literal', 'pull-quote', 'raw', 'replace',
27 'restructuredtext-test-directive', 'role', 'rubric', 'sectnum', 'sidebar',
28 'table', 'target-notes', 'tip', 'title', 'topic', 'unicode', 'warning',
Georg Brandl95988f92014-10-30 22:35:55 +010029 # Sphinx and Python docs custom ones
Georg Brandl45f53372009-01-03 21:15:20 +000030 'acks', 'attribute', 'autoattribute', 'autoclass', 'autodata',
Cheryl Sabella2d6097d2018-10-12 10:55:20 -040031 'autoexception', 'autofunction', 'automethod', 'automodule',
32 'availability', 'centered', 'cfunction', 'class', 'classmethod', 'cmacro',
33 'cmdoption', 'cmember', 'code-block', 'confval', 'cssclass', 'ctype',
34 'currentmodule', 'cvar', 'data', 'decorator', 'decoratormethod',
35 'deprecated-removed', 'deprecated(?!-removed)', 'describe', 'directive',
36 'doctest', 'envvar', 'event', 'exception', 'function', 'glossary',
37 'highlight', 'highlightlang', 'impl-detail', 'index', 'literalinclude',
38 'method', 'miscnews', 'module', 'moduleauthor', 'opcode', 'pdbcommand',
39 'productionlist', 'program', 'role', 'sectionauthor', 'seealso',
40 'sourcecode', 'staticmethod', 'tabularcolumns', 'testcode', 'testoutput',
41 'testsetup', 'toctree', 'todo', 'todolist', 'versionadded',
42 'versionchanged'
Georg Brandl45f53372009-01-03 21:15:20 +000043]
44
45all_directives = '(' + '|'.join(directives) + ')'
Georg Brandl2305b3c2016-02-25 20:14:10 +010046seems_directive_re = re.compile(r'(?<!\.)\.\. %s([^a-z:]|:(?!:))' % all_directives)
Georg Brandl45f53372009-01-03 21:15:20 +000047default_role_re = re.compile(r'(^| )`\w([^`]*?\w)?`($| )')
Georg Brandl3b4cf552014-10-30 22:49:54 +010048leaked_markup_re = re.compile(r'[a-z]::\s|`|\.\.\s*\w+:')
Georg Brandl45f53372009-01-03 21:15:20 +000049
50
51checkers = {}
52
53checker_props = {'severity': 1, 'falsepositives': False}
54
Georg Brandlf2b56512014-10-30 22:30:01 +010055
Georg Brandl45f53372009-01-03 21:15:20 +000056def checker(*suffixes, **kwds):
57 """Decorator to register a function as a checker."""
58 def deco(func):
59 for suffix in suffixes:
60 checkers.setdefault(suffix, []).append(func)
61 for prop in checker_props:
62 setattr(func, prop, kwds.get(prop, checker_props[prop]))
63 return func
64 return deco
65
66
67@checker('.py', severity=4)
68def check_syntax(fn, lines):
69 """Check Python examples for valid syntax."""
Benjamin Peterson28d88b42009-01-09 03:03:23 +000070 code = ''.join(lines)
71 if '\r' in code:
72 if os.name != 'nt':
73 yield 0, '\\r in code file'
74 code = code.replace('\r', '')
Georg Brandl45f53372009-01-03 21:15:20 +000075 try:
Georg Brandl45f53372009-01-03 21:15:20 +000076 compile(code, fn, 'exec')
77 except SyntaxError as err:
78 yield err.lineno, 'not compilable: %s' % err
79
80
81@checker('.rst', severity=2)
82def check_suspicious_constructs(fn, lines):
83 """Check for suspicious reST constructs."""
84 inprod = False
85 for lno, line in enumerate(lines):
Georg Brandl2305b3c2016-02-25 20:14:10 +010086 if seems_directive_re.search(line):
Georg Brandl45f53372009-01-03 21:15:20 +000087 yield lno+1, 'comment seems to be intended as a directive'
88 if '.. productionlist::' in line:
89 inprod = True
90 elif not inprod and default_role_re.search(line):
91 yield lno+1, 'default role used'
92 elif inprod and not line.strip():
93 inprod = False
94
95
96@checker('.py', '.rst')
97def check_whitespace(fn, lines):
98 """Check for whitespace and line length issues."""
Georg Brandl45f53372009-01-03 21:15:20 +000099 for lno, line in enumerate(lines):
100 if '\r' in line:
101 yield lno+1, '\\r in line'
102 if '\t' in line:
103 yield lno+1, 'OMG TABS!!!1'
104 if line[:-1].rstrip(' \t') != line[:-1]:
105 yield lno+1, 'trailing whitespace'
Georg Brandld5097882009-01-03 21:30:40 +0000106
107
108@checker('.rst', severity=0)
109def check_line_length(fn, lines):
110 """Check for line length; this checker is not run by default."""
111 for lno, line in enumerate(lines):
112 if len(line) > 81:
Georg Brandl45f53372009-01-03 21:15:20 +0000113 # don't complain about tables, links and function signatures
114 if line.lstrip()[0] not in '+|' and \
115 'http://' not in line and \
116 not line.lstrip().startswith(('.. function',
117 '.. method',
118 '.. cfunction')):
119 yield lno+1, "line too long"
120
121
122@checker('.html', severity=2, falsepositives=True)
123def check_leaked_markup(fn, lines):
124 """Check HTML files for leaked reST markup; this only works if
125 the HTML files have been built.
126 """
127 for lno, line in enumerate(lines):
128 if leaked_markup_re.search(line):
129 yield lno+1, 'possibly leaked markup: %r' % line
130
131
Julien Palardb9735422020-12-18 10:48:08 +0100132def hide_literal_blocks(lines):
133 """Tool to remove literal blocks from given lines.
134
135 It yields empty lines in place of blocks, so line numbers are
136 still meaningful.
137 """
138 in_block = False
139 for line in lines:
140 if line.endswith("::\n"):
141 in_block = True
142 elif in_block:
143 if line == "\n" or line.startswith(" "):
144 line = "\n"
145 else:
146 in_block = False
147 yield line
148
149
150def type_of_explicit_markup(line):
151 if re.match(fr'\.\. {all_directives}::', line):
152 return 'directive'
153 if re.match(r'\.\. \[[0-9]+\] ', line):
154 return 'footnote'
155 if re.match(r'\.\. \[[^\]]+\] ', line):
156 return 'citation'
157 if re.match(r'\.\. _.*[^_]: ', line):
158 return 'target'
159 if re.match(r'\.\. \|[^\|]*\| ', line):
160 return 'substitution_definition'
161 return 'comment'
162
163
164def hide_comments(lines):
165 """Tool to remove comments from given lines.
166
167 It yields empty lines in place of comments, so line numbers are
168 still meaningfull.
169 """
170 in_multiline_comment = False
171 for line in lines:
172 if line == "..\n":
173 in_multiline_comment = True
174 elif in_multiline_comment:
175 if line == "\n" or line.startswith(" "):
176 line = "\n"
177 else:
178 in_multiline_comment = False
179 if line.startswith(".. ") and type_of_explicit_markup(line) == 'comment':
180 line = "\n"
181 yield line
182
183
184
185@checker(".rst", severity=2)
186def check_missing_surrogate_space_on_plural(fn, lines):
187 r"""Check for missing 'backslash-space' between a code sample a letter.
188
189 Good: ``Point``\ s
190 Bad: ``Point``s
191 """
192 in_code_sample = False
193 check_next_one = False
194 for lno, line in enumerate(hide_comments(hide_literal_blocks(lines))):
195 tokens = line.split("``")
196 for token_no, token in enumerate(tokens):
197 if check_next_one:
198 if token[0] in ascii_letters:
199 yield lno + 1, f"Missing backslash-space between code sample and {token!r}."
200 check_next_one = False
201 if token_no == len(tokens) - 1:
202 continue
203 if in_code_sample:
204 check_next_one = True
205 in_code_sample = not in_code_sample
206
Georg Brandl45f53372009-01-03 21:15:20 +0000207def main(argv):
208 usage = '''\
209Usage: %s [-v] [-f] [-s sev] [-i path]* [path]
210
211Options: -v verbose (print all checked file names)
212 -f enable checkers that yield many false positives
213 -s sev only show problems with severity >= sev
214 -i path ignore subdir or file path
215''' % argv[0]
216 try:
217 gopts, args = getopt.getopt(argv[1:], 'vfs:i:')
218 except getopt.GetoptError:
219 print(usage)
220 return 2
221
222 verbose = False
223 severity = 1
224 ignore = []
225 falsepos = False
226 for opt, val in gopts:
227 if opt == '-v':
228 verbose = True
229 elif opt == '-f':
230 falsepos = True
231 elif opt == '-s':
232 severity = int(val)
233 elif opt == '-i':
234 ignore.append(abspath(val))
235
236 if len(args) == 0:
237 path = '.'
238 elif len(args) == 1:
239 path = args[0]
240 else:
241 print(usage)
242 return 2
243
244 if not exists(path):
245 print('Error: path %s does not exist' % path)
246 return 2
247
248 count = defaultdict(int)
Georg Brandl45f53372009-01-03 21:15:20 +0000249
250 for root, dirs, files in os.walk(path):
Georg Brandl45f53372009-01-03 21:15:20 +0000251 # ignore subdirs in ignore list
252 if abspath(root) in ignore:
253 del dirs[:]
254 continue
255
256 for fn in files:
257 fn = join(root, fn)
258 if fn[:2] == './':
259 fn = fn[2:]
260
261 # ignore files in ignore list
262 if abspath(fn) in ignore:
263 continue
264
265 ext = splitext(fn)[1]
266 checkerlist = checkers.get(ext, None)
267 if not checkerlist:
268 continue
269
270 if verbose:
271 print('Checking %s...' % fn)
272
273 try:
Zachary Ware4aa30dc2015-07-21 22:50:29 -0500274 with open(fn, 'r', encoding='utf-8') as f:
Georg Brandl45f53372009-01-03 21:15:20 +0000275 lines = list(f)
276 except (IOError, OSError) as err:
277 print('%s: cannot open: %s' % (fn, err))
278 count[4] += 1
279 continue
280
281 for checker in checkerlist:
282 if checker.falsepositives and not falsepos:
283 continue
284 csev = checker.severity
285 if csev >= severity:
286 for lno, msg in checker(fn, lines):
Georg Brandl420ca772010-03-12 10:04:37 +0000287 print('[%d] %s:%d: %s' % (csev, fn, lno, msg))
Georg Brandl45f53372009-01-03 21:15:20 +0000288 count[csev] += 1
289 if verbose:
290 print()
291 if not count:
292 if severity > 1:
293 print('No problems with severity >= %d found.' % severity)
294 else:
295 print('No problems found.')
296 else:
297 for severity in sorted(count):
298 number = count[severity]
299 print('%d problem%s with severity %d found.' %
300 (number, number > 1 and 's' or '', severity))
301 return int(bool(count))
302
303
304if __name__ == '__main__':
305 sys.exit(main(sys.argv))