Blame - clang/docs/tools/dump_ast_matchers.py - toolchain/llvm-project

2012-08-27 18:49:12 +0000

[diff] [blame]

1

#!/usr/bin/env python

2

# A tool to parse ASTMatchers.h and update the documentation in

3

# ../LibASTMatchersReference.html automatically. Run from the

4

# directory in which this file is located to update the docs.

import collections

import re

import urllib2

MATCHERS_FILE = '../../include/clang/ASTMatchers/ASTMatchers.h'

11

12

# Each matcher is documented in one row of the form:

13

# result | name | argA

14

# The subsequent row contains the documentation and is hidden by default,

15

# becoming visible via javascript when the user clicks the matcher name.

16

TD_TEMPLATE="""

Manuel Klimek

8bad947

2012-09-07 13:10:32 +0000

[diff] [blame]

17

<tr><td>%(result)s</td><td class="name" onclick="toggle('%(id)s')"><a name="%(id)sAnchor">%(name)s</a></td><td>%(args)s</td></tr>

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

18

<tr><td colspan="4" class="doc" id="%(id)s"><pre>%(comment)s</pre></td></tr>

19

"""

20

21

# We categorize the matchers into these three categories in the reference:

22

node_matchers = {}

23

narrowing_matchers = {}

24

traversal_matchers = {}

25

26

# We output multiple rows per matcher if the matcher can be used on multiple

27

# node types. Thus, we need a new id per row to control the documentation

28

# pop-up. ids[name] keeps track of those ids.

29

ids = collections.defaultdict(int)

30

31

# Cache for doxygen urls we have already verified.

doxygen_probes = {}

def esc(text):

"""Escape any html in the given text."""

36

text = re.sub(r'&', '&', text)

37

text = re.sub(r'<', '<', text)

38

text = re.sub(r'>', '>', text)

39

def link_if_exists(m):

40

name = m.group(1)

Sylvestre Ledru

bc5c3f5

2018-11-04 17:02:00 +0000

[diff] [blame]

41

url = 'https://clang.llvm.org/doxygen/classclang_1_1%s.html' % name

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

42

if url not in doxygen_probes:

43

try:

Serge Guelton

2018-12-18 08:36:33 +0000

[diff] [blame^]

44

print('Probing %s...' % url)

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

45

urllib2.urlopen(url)

46

doxygen_probes[url] = True

47

except:

48

doxygen_probes[url] = False

49

if doxygen_probes[url]:

Aaron Ballman

672dde2

2016-01-22 23:15:00 +0000

[diff] [blame]

50

return r'Matcher<<a href="%s">%s</a>>' % (url, name)

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

else:

return m.group(0)

text = re.sub(

r'Matcher<([^\*&]+)>', link_if_exists, text)

55

return text

56

57

def extract_result_types(comment):

58

"""Extracts a list of result types from the given comment.

59

60

We allow annotations in the comment of the matcher to specify what

61

nodes a matcher can match on. Those comments have the form:

62

Usable as: Any Matcher | (Matcher<T1>[, Matcher<t2>[, ...]])

63

64

Returns ['*'] in case of 'Any Matcher', or ['T1', 'T2', ...].

65

Returns the empty list if no 'Usable as' specification could be

parsed.

"""

result_types = []

m = re.search(r'Usable as: Any Matcher[\s\n]*$', comment, re.S)

if m:

return ['*']

while True:

m = re.match(r'^(.*)Matcher<([^>]+)>\s*,?[\s\n]*$', comment, re.S)

74

if not m:

75

if re.search(r'Usable as:\s*$', comment):

return result_types

else:

return None

result_types += [m.group(2)]

80

comment = m.group(1)

81

82

def strip_doxygen(comment):

83

"""Returns the given comment without \-escaped words."""

84

# If there is only a doxygen keyword in the line, delete the whole line.

85

comment = re.sub(r'^\\[^\s]+\n', r'', comment, flags=re.M)

Aaron Ballman

c35724c

2016-01-21 15:18:25 +0000

[diff] [blame]

86

87

# If there is a doxygen \see command, change the \see prefix into "See also:".

88

# FIXME: it would be better to turn this into a link to the target instead.

89

comment = re.sub(r'\\see', r'See also:', comment)

90

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

91

# Delete the doxygen command and the following whitespace.

92

comment = re.sub(r'\\[^\s]+\s+', r'', comment)

93

return comment

94

95

def unify_arguments(args):

96

"""Gets rid of anything the user doesn't care about in the argument list."""

97

args = re.sub(r'internal::', r'', args)

Benjamin Kramer

2018-01-17 16:50:14 +0000

[diff] [blame]

98

args = re.sub(r'extern const\s+(.*)&', r'\1 ', args)

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

99

args = re.sub(r'&', r' ', args)

100

args = re.sub(r'(^|\s)M\d?(\s)', r'\1Matcher<*>\2', args)

101

return args

102

103

def add_matcher(result_type, name, args, comment, is_dyncast=False):

104

"""Adds a matcher to one of our categories."""

105

if name == 'id':

106

# FIXME: Figure out whether we want to support the 'id' matcher.

107

return

108

matcher_id = '%s%d' % (name, ids[name])

109

ids[name] += 1

110

args = unify_arguments(args)

111

matcher_html = TD_TEMPLATE % {

112

'result': esc('Matcher<%s>' % result_type),

113

'name': name,

114

'args': esc(args),

115

'comment': esc(strip_doxygen(comment)),

'id': matcher_id,

}

if is_dyncast:

node_matchers[result_type + name] = matcher_html

120

# Use a heuristic to figure out whether a matcher is a narrowing or

121

# traversal matcher. By default, matchers that take other matchers as

122

# arguments (and are not node matchers) do traversal. We specifically

123

# exclude known narrowing matchers that also take other matchers as

124

# arguments.

125

elif ('Matcher<' not in args or

126

name in ['allOf', 'anyOf', 'anything', 'unless']):

Manuel Klimek

0389269

2014-02-24 10:40:22 +0000

[diff] [blame]

127

narrowing_matchers[result_type + name + esc(args)] = matcher_html

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

128

else:

Manuel Klimek

0389269

2014-02-24 10:40:22 +0000

[diff] [blame]

129

traversal_matchers[result_type + name + esc(args)] = matcher_html

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

130

131

def act_on_decl(declaration, comment, allowed_types):

132

"""Parse the matcher out of the given declaration and comment.

133

134

If 'allowed_types' is set, it contains a list of node types the matcher

135

can match on, as extracted from the static type asserts in the matcher

136

definition.

137

"""

138

if declaration.strip():

139

# Node matchers are defined by writing:

140

# VariadicDynCastAllOfMatcher<ResultType, ArgumentType> name;

Manuel Klimek

2013-01-09 09:38:21 +0000

[diff] [blame]

141

m = re.match(r""".*Variadic(?:DynCast)?AllOfMatcher\s*<

142

\s*([^\s,]+)\s*(?:,

143

\s*([^\s>]+)\s*)?>

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

144

\s*([^\s;]+)\s*;\s*$""", declaration, flags=re.X)

145

if m:

146

result, inner, name = m.groups()

Manuel Klimek

2013-01-09 09:38:21 +0000

[diff] [blame]

147

if not inner:

148

inner = result

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

149

add_matcher(result, name, 'Matcher<%s>...' % inner,

150

comment, is_dyncast=True)

151

return

152

Benjamin Kramer

2018-01-17 16:50:14 +0000

[diff] [blame]

153

# Special case of type matchers:

154

# AstTypeMatcher<ArgumentType> name

155

m = re.match(r""".*AstTypeMatcher\s*<

156

\s*([^\s>]+)\s*>

157

\s*([^\s;]+)\s*;\s*$""", declaration, flags=re.X)

Manuel Klimek

2013-01-09 09:38:21 +0000

[diff] [blame]

158

if m:

159

inner, name = m.groups()

160

add_matcher('Type', name, 'Matcher<%s>...' % inner,

161

comment, is_dyncast=True)

Manuel Klimek

dba64f1

2013-07-25 06:05:50 +0000

[diff] [blame]

162

# FIXME: re-enable once we have implemented casting on the TypeLoc

163

# hierarchy.

164

# add_matcher('TypeLoc', '%sLoc' % name, 'Matcher<%sLoc>...' % inner,

165

# comment, is_dyncast=True)

Manuel Klimek

2013-01-09 09:38:21 +0000

[diff] [blame]

166

return

167

Benjamin Kramer

2018-01-17 16:50:14 +0000

[diff] [blame]

168

# Parse the various matcher definition macros.

169

m = re.match(""".*AST_TYPE(LOC)?_TRAVERSE_MATCHER(?:_DECL)?\(

Manuel Klimek

2013-01-09 09:38:21 +0000

[diff] [blame]

170

\s*([^\s,]+\s*),

Samuel Benzaquen

79656e1

2013-07-15 19:25:06 +0000

[diff] [blame]

171

\s*(?:[^\s,]+\s*),

Benjamin Kramer

57dd9bd

2015-03-07 20:38:15 +0000

[diff] [blame]

172

\s*AST_POLYMORPHIC_SUPPORTED_TYPES$([^)]*)$

Manuel Klimek

2013-01-09 09:38:21 +0000

[diff] [blame]

173

\)\s*;\s*$""", declaration, flags=re.X)

174

if m:

Manuel Klimek

5d09328

2015-08-14 11:47:51 +0000

[diff] [blame]

175

loc, name, results = m.groups()[0:3]

Samuel Benzaquen

79656e1

2013-07-15 19:25:06 +0000

[diff] [blame]

176

result_types = [r.strip() for r in results.split(',')]

177

178

comment_result_types = extract_result_types(comment)

179

if (comment_result_types and

180

sorted(result_types) != sorted(comment_result_types)):

181

raise Exception('Inconsistent documentation for: %s' % name)

Manuel Klimek

2013-01-09 09:38:21 +0000

[diff] [blame]

182

for result_type in result_types:

183

add_matcher(result_type, name, 'Matcher<Type>', comment)

Stephen Kelly

7b79fb4

2018-10-09 08:24:18 +0000

[diff] [blame]

184

# if loc:

185

# add_matcher('%sLoc' % result_type, '%sLoc' % name, 'Matcher<TypeLoc>',

186

# comment)

Manuel Klimek

2013-01-09 09:38:21 +0000

[diff] [blame]

187

return

188

Samuel Benzaquen

2013-06-21 15:51:31 +0000

[diff] [blame]

189

m = re.match(r"""^\s*AST_POLYMORPHIC_MATCHER(_P)?(.?)(?:_OVERLOAD)?\(

190

\s*([^\s,]+)\s*,

Benjamin Kramer

57dd9bd

2015-03-07 20:38:15 +0000

[diff] [blame]

191

\s*AST_POLYMORPHIC_SUPPORTED_TYPES$([^)]*)$

Samuel Benzaquen

2013-06-21 15:51:31 +0000

[diff] [blame]

(?:,\s*([^\s,]+)\s*

,\s*([^\s,]+)\s*)?

(?:,\s*([^\s,]+)\s*

,\s*([^\s,]+)\s*)?

(?:,\s*\d+\s*)?

\)\s*{\s*$""", declaration, flags=re.X)

198

199

if m:

Manuel Klimek

5d09328

2015-08-14 11:47:51 +0000

[diff] [blame]

200

p, n, name, results = m.groups()[0:4]

201

args = m.groups()[4:]

Samuel Benzaquen

2013-06-21 15:51:31 +0000

[diff] [blame]

202

result_types = [r.strip() for r in results.split(',')]

203

if allowed_types and allowed_types != result_types:

204

raise Exception('Inconsistent documentation for: %s' % name)

205

if n not in ['', '2']:

206

raise Exception('Cannot parse "%s"' % declaration)

207

args = ', '.join('%s %s' % (args[i], args[i+1])

208

for i in range(0, len(args), 2) if args[i])

209

for result_type in result_types:

210

add_matcher(result_type, name, args, comment)

211

return

212

Samuel Benzaquen

a083935

2014-03-10 15:40:23 +0000

[diff] [blame]

213

m = re.match(r"""^\s*AST_MATCHER_FUNCTION(_P)?(.?)(?:_OVERLOAD)?\(

214

(?:\s*([^\s,]+)\s*,)?

\s*([^\s,]+)\s*

(?:,\s*([^\s,]+)\s*

,\s*([^\s,]+)\s*)?

(?:,\s*([^\s,]+)\s*

,\s*([^\s,]+)\s*)?

(?:,\s*\d+\s*)?

\)\s*{\s*$""", declaration, flags=re.X)

222

if m:

223

p, n, result, name = m.groups()[0:4]

224

args = m.groups()[4:]

225

if n not in ['', '2']:

226

raise Exception('Cannot parse "%s"' % declaration)

227

args = ', '.join('%s %s' % (args[i], args[i+1])

228

for i in range(0, len(args), 2) if args[i])

229

add_matcher(result, name, args, comment)

230

return

231

Samuel Benzaquen

2013-06-21 15:51:31 +0000

[diff] [blame]

232

m = re.match(r"""^\s*AST_MATCHER(_P)?(.?)(?:_OVERLOAD)?\(

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

233

(?:\s*([^\s,]+)\s*,)?

234

\s*([^\s,]+)\s*

Samuel Benzaquen

a4076ea

2016-05-04 20:45:00 +0000

[diff] [blame]

235

(?:,\s*([^,]+)\s*

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

236

,\s*([^\s,]+)\s*)?

237

(?:,\s*([^\s,]+)\s*

238

,\s*([^\s,]+)\s*)?

Manuel Klimek

4feac28

2013-02-06 20:36:22 +0000

[diff] [blame]

239

(?:,\s*\d+\s*)?

Benjamin Kramer

8bf200a

2018-01-17 23:14:49 +0000

[diff] [blame]

240

\)\s*{""", declaration, flags=re.X)

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

241

if m:

Samuel Benzaquen

2013-06-21 15:51:31 +0000

[diff] [blame]

242

p, n, result, name = m.groups()[0:4]

243

args = m.groups()[4:]

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

244

if not result:

245

if not allowed_types:

246

raise Exception('Did not find allowed result types for: %s' % name)

247

result_types = allowed_types

248

else:

249

result_types = [result]

250

if n not in ['', '2']:

251

raise Exception('Cannot parse "%s"' % declaration)

252

args = ', '.join('%s %s' % (args[i], args[i+1])

253

for i in range(0, len(args), 2) if args[i])

254

for result_type in result_types:

255

add_matcher(result_type, name, args, comment)

256

return

257

Samuel Benzaquen

bd7d887

2013-08-16 16:19:42 +0000

[diff] [blame]

258

# Parse ArgumentAdapting matchers.

259

m = re.match(

Benjamin Kramer

2018-01-17 16:50:14 +0000

[diff] [blame]

260

r"""^.*ArgumentAdaptingMatcherFunc<.*>\s*

261

([a-zA-Z]*);$""",

Samuel Benzaquen

bd7d887

2013-08-16 16:19:42 +0000

[diff] [blame]

262

declaration, flags=re.X)

263

if m:

264

name = m.groups()[0]

265

add_matcher('*', name, 'Matcher<*>', comment)

266

return

267

Samuel Benzaquen

922bef4

2016-02-22 21:13:02 +0000

[diff] [blame]

268

# Parse Variadic functions.

269

m = re.match(

Samuel Benzaquen

a4076ea

2016-05-04 20:45:00 +0000

[diff] [blame]

270

r"""^.*internal::VariadicFunction\s*<\s*([^,]+),\s*([^,]+),\s*[^>]+>\s*

Benjamin Kramer

2018-01-17 16:50:14 +0000

[diff] [blame]

271

([a-zA-Z]*);$""",

Samuel Benzaquen

922bef4

2016-02-22 21:13:02 +0000

[diff] [blame]

272

declaration, flags=re.X)

273

if m:

274

result, arg, name = m.groups()[:3]

275

add_matcher(result, name, '%s, ..., %s' % (arg, arg), comment)

276

return

277

Samuel Benzaquen

2013-08-27 15:11:16 +0000

[diff] [blame]

278

# Parse Variadic operator matchers.

279

m = re.match(

Benjamin Kramer

2018-01-17 16:50:14 +0000

[diff] [blame]

280

r"""^.*VariadicOperatorMatcherFunc\s*<\s*([^,]+),\s*([^\s]+)\s*>\s*

281

([a-zA-Z]*);$""",

Samuel Benzaquen

2013-08-27 15:11:16 +0000

[diff] [blame]

282

declaration, flags=re.X)

283

if m:

Manuel Klimek

4f8f890

2014-02-24 10:28:36 +0000

[diff] [blame]

284

min_args, max_args, name = m.groups()[:3]

285

if max_args == '1':

286

add_matcher('*', name, 'Matcher<*>', comment)

287

return

Benjamin Kramer

2018-01-17 16:50:14 +0000

[diff] [blame]

288

elif max_args == 'std::numeric_limits<unsigned>::max()':

Manuel Klimek

4f8f890

2014-02-24 10:28:36 +0000

[diff] [blame]

289

add_matcher('*', name, 'Matcher<*>, ..., Matcher<*>', comment)

290

return

Samuel Benzaquen

2013-08-27 15:11:16 +0000

[diff] [blame]

291

Samuel Benzaquen

bd7d887

2013-08-16 16:19:42 +0000

[diff] [blame]

292

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

293

# Parse free standing matcher functions, like:

294

# Matcher<ResultType> Name(Matcher<ArgumentType> InnerMatcher) {

295

m = re.match(r"""^\s*(.*)\s+

296

([^\s\(]+)\s*\(

297

(.*)

298

\)\s*{""", declaration, re.X)

299

if m:

300

result, name, args = m.groups()

301

args = ', '.join(p.strip() for p in args.split(','))

Manuel Klimek

2013-01-09 09:38:21 +0000

[diff] [blame]

302

m = re.match(r'.*\s+internal::(Bindable)?Matcher<([^>]+)>$', result)

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

303

if m:

Manuel Klimek

2013-01-09 09:38:21 +0000

[diff] [blame]

304

result_types = [m.group(2)]

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

305

else:

306

result_types = extract_result_types(comment)

307

if not result_types:

308

if not comment:

309

# Only overloads don't have their own doxygen comments; ignore those.

Serge Guelton

2018-12-18 08:36:33 +0000

[diff] [blame^]

310

print('Ignoring "%s"' % name)

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

311

else:

Serge Guelton

2018-12-18 08:36:33 +0000

[diff] [blame^]

312

print('Cannot determine result type for "%s"' % name)

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

313

else:

314

for result_type in result_types:

315

add_matcher(result_type, name, args, comment)

316

else:

Serge Guelton

2018-12-18 08:36:33 +0000

[diff] [blame^]

317

print('*** Unparsable: "' + declaration + '" ***')

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

318

319

def sort_table(matcher_type, matcher_map):

320

"""Returns the sorted html table for the given row map."""

321

table = ''

322

for key in sorted(matcher_map.keys()):

323

table += matcher_map[key] + '\n'

324

return ('\n' +

325

'%(table)s' +

326

'') % {

327

'type': matcher_type,

'table': table,

}

# Parse the ast matchers.

332

# We alternate between two modes:

333

# body = True: We parse the definition of a matcher. We need

334

# to parse the full definition before adding a matcher, as the

335

# definition might contain static asserts that specify the result

336

# type.

337

# body = False: We parse the comments and declaration of the matcher.

comment = ''

declaration = ''

allowed_types = []

body = False

for line in open(MATCHERS_FILE).read().splitlines():

343

if body:

344

if line.strip() and line[0] == '}':

345

if declaration:

346

act_on_decl(declaration, comment, allowed_types)

comment = ''

declaration = ''

allowed_types = []

body = False

else:

m = re.search(r'is_base_of<([^,]+), NodeType>', line)

353

if m and m.group(1):

354

allowed_types += [m.group(1)]

355

continue

356

if line.strip() and line.lstrip()[0] == '/':

Aaron Ballman

94f3e74

2018-12-11 19:30:49 +0000

[diff] [blame]

357

comment += re.sub(r'^/+\s?', '', line) + '\n'

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

358

else:

359

declaration += ' ' + line

360

if ((not line.strip()) or

361

line.rstrip()[-1] == ';' or

Samuel Benzaquen

2013-08-27 15:11:16 +0000

[diff] [blame]

362

(line.rstrip()[-1] == '{' and line.rstrip()[-3:] != '= {')):

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

363

if line.strip() and line.rstrip()[-1] == '{':

364

body = True

365

else:

366

act_on_decl(declaration, comment, allowed_types)

comment = ''

declaration = ''

allowed_types = []

node_matcher_table = sort_table('DECL', node_matchers)

372

narrowing_matcher_table = sort_table('NARROWING', narrowing_matchers)

373

traversal_matcher_table = sort_table('TRAVERSAL', traversal_matchers)

374

375

reference = open('../LibASTMatchersReference.html').read()

376

reference = re.sub(r'',

Yury Gribov

75118f5

2016-02-18 15:43:56 +0000

[diff] [blame]

377

node_matcher_table, reference, flags=re.S)

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

378

reference = re.sub(r'',

Yury Gribov

75118f5

2016-02-18 15:43:56 +0000

[diff] [blame]

379

narrowing_matcher_table, reference, flags=re.S)

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

380

reference = re.sub(r'',

Yury Gribov

75118f5

2016-02-18 15:43:56 +0000

[diff] [blame]

381

traversal_matcher_table, reference, flags=re.S)

Manuel Klimek

2012-08-27 18:49:12 +0000

[diff] [blame]

382

Benjamin Kramer

611d33a

2015-11-20 07:46:19 +0000

[diff] [blame]

383

with open('../LibASTMatchersReference.html', 'wb') as output:

Manuel Klimek