blob: b71eac6e2f404d4d67dcf21550ee36ed64b27e7e [file] [log] [blame]
David Brazdilee690a32014-12-01 17:04:16 +00001#!/usr/bin/env python3
2#
3# Copyright (C) 2014 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17
18# Checker is a testing tool which compiles a given test file and compares the
19# state of the control-flow graph before and after each optimization pass
20# against a set of assertions specified alongside the tests.
21#
22# Tests are written in Java, turned into DEX and compiled with the Optimizing
David Brazdil9a6f20e2014-12-19 11:17:21 +000023# compiler. "Check lines" are assertions formatted as comments of the Java file.
24# They begin with prefix 'CHECK' followed by a pattern that the engine attempts
25# to match in the compiler-generated output.
David Brazdilee690a32014-12-01 17:04:16 +000026#
27# Assertions are tested in groups which correspond to the individual compiler
28# passes. Each group of check lines therefore must start with a 'CHECK-START'
29# header which specifies the output group it should be tested against. The group
30# name must exactly match one of the groups recognized in the output (they can
31# be listed with the '--list-groups' command-line flag).
32#
David Brazdil9a6f20e2014-12-19 11:17:21 +000033# Matching of check lines is carried out in the order of appearance in the
34# source file. There are three types of check lines:
35# - CHECK: Must match an output line which appears in the output group
36# later than lines matched against any preceeding checks. Output
37# lines must therefore match the check lines in the same order.
38# These are referred to as "in-order" checks in the code.
39# - CHECK-DAG: Must match an output line which appears in the output group
40# later than lines matched against any preceeding in-order checks.
41# In other words, the order of output lines does not matter
42# between consecutive DAG checks.
David Brazdil48942de2015-01-07 21:19:50 +000043# - CHECK-NOT: Must not match any output line which appears in the output group
David Brazdil9a6f20e2014-12-19 11:17:21 +000044# later than lines matched against any preceeding checks and
45# earlier than lines matched against any subsequent checks.
46# Surrounding non-negative checks (or boundaries of the group)
47# therefore create a scope within which the assertion is verified.
48#
49# Check-line patterns are treated as plain text rather than regular expressions
David Brazdilee690a32014-12-01 17:04:16 +000050# but are whitespace agnostic.
51#
52# Actual regex patterns can be inserted enclosed in '{{' and '}}' brackets. If
53# curly brackets need to be used inside the body of the regex, they need to be
54# enclosed in round brackets. For example, the pattern '{{foo{2}}}' will parse
55# the invalid regex 'foo{2', but '{{(fo{2})}}' will match 'foo'.
56#
57# Regex patterns can be named and referenced later. A new variable is defined
58# with '[[name:regex]]' and can be referenced with '[[name]]'. Variables are
59# only valid within the scope of the defining group. Within a group they cannot
60# be redefined or used undefined.
61#
62# Example:
63# The following assertions can be placed in a Java source file:
64#
65# // CHECK-START: int MyClass.MyMethod() constant_folding (after)
66# // CHECK: [[ID:i[0-9]+]] IntConstant {{11|22}}
67# // CHECK: Return [ [[ID]] ]
68#
69# The engine will attempt to match the check lines against the output of the
70# group named on the first line. Together they verify that the CFG after
71# constant folding returns an integer constant with value either 11 or 22.
72#
73
74import argparse
75import os
76import re
77import shutil
78import sys
79import tempfile
David Brazdilee690a32014-12-01 17:04:16 +000080
David Brazdil2e15cd22014-12-31 17:28:38 +000081class Logger(object):
David Brazdil7cca5df2015-01-15 00:40:56 +000082
83 class Level(object):
84 NoOutput, Error, Info = range(3)
David Brazdil2e15cd22014-12-31 17:28:38 +000085
86 class Color(object):
87 Default, Blue, Gray, Purple, Red = range(5)
88
89 @staticmethod
90 def terminalCode(color, out=sys.stdout):
91 if not out.isatty():
92 return ''
93 elif color == Logger.Color.Blue:
94 return '\033[94m'
95 elif color == Logger.Color.Gray:
96 return '\033[37m'
97 elif color == Logger.Color.Purple:
98 return '\033[95m'
99 elif color == Logger.Color.Red:
100 return '\033[91m'
101 else:
102 return '\033[0m'
103
David Brazdil7cca5df2015-01-15 00:40:56 +0000104 Verbosity = Level.Info
105
David Brazdil2e15cd22014-12-31 17:28:38 +0000106 @staticmethod
David Brazdil7cca5df2015-01-15 00:40:56 +0000107 def log(text, level=Level.Info, color=Color.Default, newLine=True, out=sys.stdout):
108 if level <= Logger.Verbosity:
David Brazdil2e15cd22014-12-31 17:28:38 +0000109 text = Logger.Color.terminalCode(color, out) + text + \
110 Logger.Color.terminalCode(Logger.Color.Default, out)
111 if newLine:
David Brazdil7cca5df2015-01-15 00:40:56 +0000112 print(text, flush=True, file=out)
David Brazdil2e15cd22014-12-31 17:28:38 +0000113 else:
114 print(text, end="", flush=True, file=out)
115
116 @staticmethod
117 def fail(msg, file=None, line=-1):
118 location = ""
119 if file:
120 location += file + ":"
121 if line > 0:
122 location += str(line) + ":"
123 if location:
124 location += " "
125
David Brazdil7cca5df2015-01-15 00:40:56 +0000126 Logger.log(location, Logger.Level.Error, color=Logger.Color.Gray, newLine=False, out=sys.stderr)
127 Logger.log("error: ", Logger.Level.Error, color=Logger.Color.Red, newLine=False, out=sys.stderr)
128 Logger.log(msg, Logger.Level.Error, out=sys.stderr)
David Brazdil2e15cd22014-12-31 17:28:38 +0000129 sys.exit(1)
130
131 @staticmethod
132 def startTest(name):
133 Logger.log("TEST ", color=Logger.Color.Purple, newLine=False)
134 Logger.log(name + "... ", newLine=False)
135
136 @staticmethod
137 def testPassed():
138 Logger.log("PASS", color=Logger.Color.Blue)
139
140 @staticmethod
141 def testFailed(msg, file=None, line=-1):
142 Logger.log("FAIL", color=Logger.Color.Red)
143 Logger.fail(msg, file, line)
144
David Brazdilee690a32014-12-01 17:04:16 +0000145class CommonEqualityMixin:
146 """Mixin for class equality as equality of the fields."""
147 def __eq__(self, other):
148 return (isinstance(other, self.__class__)
149 and self.__dict__ == other.__dict__)
150
151 def __ne__(self, other):
152 return not self.__eq__(other)
153
154 def __repr__(self):
155 return "<%s: %s>" % (type(self).__name__, str(self.__dict__))
156
157
158class CheckElement(CommonEqualityMixin):
159 """Single element of the check line."""
160
161 class Variant(object):
162 """Supported language constructs."""
163 Text, Pattern, VarRef, VarDef = range(4)
164
David Brazdilbe0cc082014-12-31 11:49:30 +0000165 rStartOptional = r"("
166 rEndOptional = r")?"
167
168 rName = r"([a-zA-Z][a-zA-Z0-9]*)"
169 rRegex = r"(.+?)"
170 rPatternStartSym = r"(\{\{)"
171 rPatternEndSym = r"(\}\})"
172 rVariableStartSym = r"(\[\[)"
173 rVariableEndSym = r"(\]\])"
174 rVariableSeparator = r"(:)"
175
176 regexPattern = rPatternStartSym + rRegex + rPatternEndSym
177 regexVariable = rVariableStartSym + \
178 rName + \
179 (rStartOptional + rVariableSeparator + rRegex + rEndOptional) + \
180 rVariableEndSym
181
David Brazdilee690a32014-12-01 17:04:16 +0000182 def __init__(self, variant, name, pattern):
183 self.variant = variant
184 self.name = name
185 self.pattern = pattern
186
187 @staticmethod
188 def parseText(text):
189 return CheckElement(CheckElement.Variant.Text, None, re.escape(text))
190
191 @staticmethod
192 def parsePattern(patternElem):
David Brazdilbe0cc082014-12-31 11:49:30 +0000193 return CheckElement(CheckElement.Variant.Pattern, None, patternElem[2:-2])
David Brazdilee690a32014-12-01 17:04:16 +0000194
195 @staticmethod
196 def parseVariable(varElem):
197 colonPos = varElem.find(":")
198 if colonPos == -1:
199 # Variable reference
David Brazdilbe0cc082014-12-31 11:49:30 +0000200 name = varElem[2:-2]
David Brazdilee690a32014-12-01 17:04:16 +0000201 return CheckElement(CheckElement.Variant.VarRef, name, None)
202 else:
203 # Variable definition
204 name = varElem[2:colonPos]
David Brazdilbe0cc082014-12-31 11:49:30 +0000205 body = varElem[colonPos+1:-2]
David Brazdilee690a32014-12-01 17:04:16 +0000206 return CheckElement(CheckElement.Variant.VarDef, name, body)
207
David Brazdilee690a32014-12-01 17:04:16 +0000208class CheckLine(CommonEqualityMixin):
209 """Representation of a single assertion in the check file formed of one or
210 more regex elements. Matching against an output line is successful only
211 if all regex elements can be matched in the given order."""
212
David Brazdil9a6f20e2014-12-19 11:17:21 +0000213 class Variant(object):
214 """Supported types of assertions."""
215 InOrder, DAG, Not = range(3)
David Brazdilee690a32014-12-01 17:04:16 +0000216
David Brazdil2e15cd22014-12-31 17:28:38 +0000217 def __init__(self, content, variant=Variant.InOrder, fileName=None, lineNo=-1):
218 self.fileName = fileName
David Brazdilee690a32014-12-01 17:04:16 +0000219 self.lineNo = lineNo
David Brazdil2e15cd22014-12-31 17:28:38 +0000220 self.content = content.strip()
David Brazdilee690a32014-12-01 17:04:16 +0000221
David Brazdil2e15cd22014-12-31 17:28:38 +0000222 self.variant = variant
David Brazdil9a6f20e2014-12-19 11:17:21 +0000223 self.lineParts = self.__parse(self.content)
David Brazdilee690a32014-12-01 17:04:16 +0000224 if not self.lineParts:
David Brazdil2e15cd22014-12-31 17:28:38 +0000225 Logger.fail("Empty check line", self.fileName, self.lineNo)
226
227 if self.variant == CheckLine.Variant.Not:
228 for elem in self.lineParts:
229 if elem.variant == CheckElement.Variant.VarDef:
230 Logger.fail("CHECK-NOT lines cannot define variables", self.fileName, self.lineNo)
231
232 def __eq__(self, other):
233 return (isinstance(other, self.__class__) and
234 self.variant == other.variant and
235 self.lineParts == other.lineParts)
David Brazdilee690a32014-12-01 17:04:16 +0000236
237 # Returns True if the given Match object was at the beginning of the line.
238 def __isMatchAtStart(self, match):
239 return (match is not None) and (match.start() == 0)
240
241 # Takes in a list of Match objects and returns the minimal start point among
242 # them. If there aren't any successful matches it returns the length of
243 # the searched string.
244 def __firstMatch(self, matches, string):
245 starts = map(lambda m: len(string) if m is None else m.start(), matches)
246 return min(starts)
247
David Brazdilee690a32014-12-01 17:04:16 +0000248 # This method parses the content of a check line stripped of the initial
249 # comment symbol and the CHECK keyword.
250 def __parse(self, line):
251 lineParts = []
252 # Loop as long as there is something to parse.
253 while line:
254 # Search for the nearest occurrence of the special markers.
David Brazdilbe0cc082014-12-31 11:49:30 +0000255 matchWhitespace = re.search(r"\s+", line)
256 matchPattern = re.search(CheckElement.regexPattern, line)
257 matchVariable = re.search(CheckElement.regexVariable, line)
David Brazdilee690a32014-12-01 17:04:16 +0000258
259 # If one of the above was identified at the current position, extract them
260 # from the line, parse them and add to the list of line parts.
261 if self.__isMatchAtStart(matchWhitespace):
262 # We want to be whitespace-agnostic so whenever a check line contains
263 # a whitespace, we add a regex pattern for an arbitrary non-zero number
264 # of whitespaces.
265 line = line[matchWhitespace.end():]
David Brazdilbe0cc082014-12-31 11:49:30 +0000266 lineParts.append(CheckElement.parsePattern(r"{{\s+}}"))
David Brazdilee690a32014-12-01 17:04:16 +0000267 elif self.__isMatchAtStart(matchPattern):
268 pattern = line[0:matchPattern.end()]
269 line = line[matchPattern.end():]
270 lineParts.append(CheckElement.parsePattern(pattern))
271 elif self.__isMatchAtStart(matchVariable):
272 var = line[0:matchVariable.end()]
273 line = line[matchVariable.end():]
David Brazdil2e15cd22014-12-31 17:28:38 +0000274 lineParts.append(CheckElement.parseVariable(var))
David Brazdilee690a32014-12-01 17:04:16 +0000275 else:
276 # If we're not currently looking at a special marker, this is a plain
277 # text match all the way until the first special marker (or the end
278 # of the line).
279 firstMatch = self.__firstMatch([ matchWhitespace, matchPattern, matchVariable ], line)
280 text = line[0:firstMatch]
281 line = line[firstMatch:]
282 lineParts.append(CheckElement.parseText(text))
283 return lineParts
284
285 # Returns the regex pattern to be matched in the output line. Variable
286 # references are substituted with their current values provided in the
287 # 'varState' argument.
288 # An exception is raised if a referenced variable is undefined.
289 def __generatePattern(self, linePart, varState):
290 if linePart.variant == CheckElement.Variant.VarRef:
291 try:
292 return re.escape(varState[linePart.name])
293 except KeyError:
David Brazdil2e15cd22014-12-31 17:28:38 +0000294 Logger.testFailed("Use of undefined variable \"" + linePart.name + "\"",
295 self.fileName, self.lineNo)
David Brazdilee690a32014-12-01 17:04:16 +0000296 else:
297 return linePart.pattern
298
299 # Attempts to match the check line against a line from the output file with
300 # the given initial variable values. It returns the new variable state if
301 # successful and None otherwise.
302 def match(self, outputLine, initialVarState):
303 initialSearchFrom = 0
304 initialPattern = self.__generatePattern(self.lineParts[0], initialVarState)
305 while True:
306 # Search for the first element on the regex parts list. This will mark
307 # the point on the line from which we will attempt to match the rest of
308 # the check pattern. If this iteration produces only a partial match,
309 # the next iteration will start searching further in the output.
310 firstMatch = re.search(initialPattern, outputLine[initialSearchFrom:])
311 if firstMatch is None:
312 return None
313 matchStart = initialSearchFrom + firstMatch.start()
314 initialSearchFrom += firstMatch.start() + 1
315
316 # Do the full matching on a shadow copy of the variable state. If the
317 # matching fails half-way, we will not need to revert the state.
318 varState = dict(initialVarState)
319
320 # Now try to parse all of the parts of the check line in the right order.
321 # Variable values are updated on-the-fly, meaning that a variable can
322 # be referenced immediately after its definition.
323 fullyMatched = True
324 for part in self.lineParts:
325 pattern = self.__generatePattern(part, varState)
326 match = re.match(pattern, outputLine[matchStart:])
327 if match is None:
328 fullyMatched = False
329 break
330 matchEnd = matchStart + match.end()
331 if part.variant == CheckElement.Variant.VarDef:
332 if part.name in varState:
David Brazdil2e15cd22014-12-31 17:28:38 +0000333 Logger.testFailed("Multiple definitions of variable \"" + part.name + "\"",
334 self.fileName, self.lineNo)
David Brazdilee690a32014-12-01 17:04:16 +0000335 varState[part.name] = outputLine[matchStart:matchEnd]
336 matchStart = matchEnd
337
338 # Return the new variable state if all parts were successfully matched.
339 # Otherwise loop and try to find another start point on the same line.
340 if fullyMatched:
341 return varState
342
343
344class CheckGroup(CommonEqualityMixin):
345 """Represents a named collection of check lines which are to be matched
346 against an output group of the same name."""
347
David Brazdil2e15cd22014-12-31 17:28:38 +0000348 def __init__(self, name, lines, fileName=None, lineNo=-1):
349 self.fileName = fileName
350 self.lineNo = lineNo
351
352 if not name:
353 Logger.fail("Check group does not have a name", self.fileName, self.lineNo)
354 if not lines:
355 Logger.fail("Check group does not have a body", self.fileName, self.lineNo)
356
357 self.name = name
358 self.lines = lines
359
360 def __eq__(self, other):
361 return (isinstance(other, self.__class__) and
362 self.name == other.name and
363 self.lines == other.lines)
David Brazdilee690a32014-12-01 17:04:16 +0000364
365 def __headAndTail(self, list):
366 return list[0], list[1:]
367
David Brazdil9a6f20e2014-12-19 11:17:21 +0000368 # Splits a list of check lines at index 'i' such that lines[i] is the first
369 # element whose variant is not equal to the given parameter.
370 def __splitByVariant(self, lines, variant):
371 i = 0
372 while i < len(lines) and lines[i].variant == variant:
373 i += 1
374 return lines[:i], lines[i:]
David Brazdilee690a32014-12-01 17:04:16 +0000375
David Brazdil9a6f20e2014-12-19 11:17:21 +0000376 # Extracts the first sequence of check lines which are independent of each
377 # other's match location, i.e. either consecutive DAG lines or a single
378 # InOrder line. Any Not lines preceeding this sequence are also extracted.
379 def __nextIndependentChecks(self, checkLines):
380 notChecks, checkLines = self.__splitByVariant(checkLines, CheckLine.Variant.Not)
381 if not checkLines:
382 return notChecks, [], []
383
384 head, tail = self.__headAndTail(checkLines)
385 if head.variant == CheckLine.Variant.InOrder:
386 return notChecks, [head], tail
387 else:
388 assert head.variant == CheckLine.Variant.DAG
389 independentChecks, checkLines = self.__splitByVariant(checkLines, CheckLine.Variant.DAG)
390 return notChecks, independentChecks, checkLines
391
392 # If successful, returns the line number of the first output line matching the
393 # check line and the updated variable state. Otherwise returns -1 and None,
394 # respectively. The 'lineFilter' parameter can be used to supply a list of
395 # line numbers (counting from 1) which should be skipped.
David Brazdil2e15cd22014-12-31 17:28:38 +0000396 def __findFirstMatch(self, checkLine, outputLines, startLineNo, lineFilter, varState):
397 matchLineNo = startLineNo
David Brazdil9a6f20e2014-12-19 11:17:21 +0000398 for outputLine in outputLines:
David Brazdil2e15cd22014-12-31 17:28:38 +0000399 if matchLineNo not in lineFilter:
400 newVarState = checkLine.match(outputLine, varState)
401 if newVarState is not None:
402 return matchLineNo, newVarState
David Brazdil9a6f20e2014-12-19 11:17:21 +0000403 matchLineNo += 1
David Brazdil9a6f20e2014-12-19 11:17:21 +0000404 return -1, None
405
406 # Matches the given positive check lines against the output in order of
407 # appearance. Variable state is propagated but the scope of the search remains
408 # the same for all checks. Each output line can only be matched once.
409 # If all check lines are matched, the resulting variable state is returned
410 # together with the remaining output. The function also returns output lines
411 # which appear before either of the matched lines so they can be tested
412 # against Not checks.
David Brazdil2e15cd22014-12-31 17:28:38 +0000413 def __matchIndependentChecks(self, checkLines, outputLines, startLineNo, varState):
David Brazdil9a6f20e2014-12-19 11:17:21 +0000414 # If no checks are provided, skip over the entire output.
415 if not checkLines:
David Brazdil2e15cd22014-12-31 17:28:38 +0000416 return outputLines, [], startLineNo + len(outputLines), varState
David Brazdil9a6f20e2014-12-19 11:17:21 +0000417
418 # Keep track of which lines have been matched.
419 matchedLines = []
420
421 # Find first unused output line which matches each check line.
422 for checkLine in checkLines:
David Brazdil2e15cd22014-12-31 17:28:38 +0000423 matchLineNo, varState = \
424 self.__findFirstMatch(checkLine, outputLines, startLineNo, matchedLines, varState)
David Brazdil9a6f20e2014-12-19 11:17:21 +0000425 if varState is None:
David Brazdil2e15cd22014-12-31 17:28:38 +0000426 Logger.testFailed("Could not match check line \"" + checkLine.content + "\" " +
427 "starting from output line " + str(startLineNo),
428 self.fileName, checkLine.lineNo)
David Brazdil9a6f20e2014-12-19 11:17:21 +0000429 matchedLines.append(matchLineNo)
430
431 # Return new variable state and the output lines which lie outside the
432 # match locations of this independent group.
David Brazdil2e15cd22014-12-31 17:28:38 +0000433 minMatchLineNo = min(matchedLines)
434 maxMatchLineNo = max(matchedLines)
435 preceedingLines = outputLines[:minMatchLineNo - startLineNo]
436 remainingLines = outputLines[maxMatchLineNo - startLineNo + 1:]
437 return preceedingLines, remainingLines, maxMatchLineNo + 1, varState
David Brazdil9a6f20e2014-12-19 11:17:21 +0000438
439 # Makes sure that the given check lines do not match any of the given output
440 # lines. Variable state does not change.
David Brazdil2e15cd22014-12-31 17:28:38 +0000441 def __matchNotLines(self, checkLines, outputLines, startLineNo, varState):
David Brazdil9a6f20e2014-12-19 11:17:21 +0000442 for checkLine in checkLines:
443 assert checkLine.variant == CheckLine.Variant.Not
David Brazdil21df8892015-01-08 01:49:53 +0000444 matchLineNo, matchVarState = \
David Brazdil2e15cd22014-12-31 17:28:38 +0000445 self.__findFirstMatch(checkLine, outputLines, startLineNo, [], varState)
David Brazdil21df8892015-01-08 01:49:53 +0000446 if matchVarState is not None:
David Brazdil2e15cd22014-12-31 17:28:38 +0000447 Logger.testFailed("CHECK-NOT line \"" + checkLine.content + "\" matches output line " + \
448 str(matchLineNo), self.fileName, checkLine.lineNo)
David Brazdil9a6f20e2014-12-19 11:17:21 +0000449
450 # Matches the check lines in this group against an output group. It is
451 # responsible for running the checks in the right order and scope, and
452 # for propagating the variable state between the check lines.
453 def match(self, outputGroup):
454 varState = {}
David Brazdilee690a32014-12-01 17:04:16 +0000455 checkLines = self.lines
456 outputLines = outputGroup.body
David Brazdil2e15cd22014-12-31 17:28:38 +0000457 startLineNo = outputGroup.lineNo
David Brazdilee690a32014-12-01 17:04:16 +0000458
David Brazdilee690a32014-12-01 17:04:16 +0000459 while checkLines:
David Brazdil9a6f20e2014-12-19 11:17:21 +0000460 # Extract the next sequence of location-independent checks to be matched.
461 notChecks, independentChecks, checkLines = self.__nextIndependentChecks(checkLines)
David Brazdil2e15cd22014-12-31 17:28:38 +0000462
David Brazdil9a6f20e2014-12-19 11:17:21 +0000463 # Match the independent checks.
David Brazdil2e15cd22014-12-31 17:28:38 +0000464 notOutput, outputLines, newStartLineNo, newVarState = \
465 self.__matchIndependentChecks(independentChecks, outputLines, startLineNo, varState)
466
David Brazdil9a6f20e2014-12-19 11:17:21 +0000467 # Run the Not checks against the output lines which lie between the last
468 # two independent groups or the bounds of the output.
David Brazdil2e15cd22014-12-31 17:28:38 +0000469 self.__matchNotLines(notChecks, notOutput, startLineNo, varState)
470
David Brazdil9a6f20e2014-12-19 11:17:21 +0000471 # Update variable state.
David Brazdil2e15cd22014-12-31 17:28:38 +0000472 startLineNo = newStartLineNo
David Brazdil9a6f20e2014-12-19 11:17:21 +0000473 varState = newVarState
David Brazdilee690a32014-12-01 17:04:16 +0000474
475class OutputGroup(CommonEqualityMixin):
476 """Represents a named part of the test output against which a check group of
477 the same name is to be matched."""
478
David Brazdil2e15cd22014-12-31 17:28:38 +0000479 def __init__(self, name, body, fileName=None, lineNo=-1):
480 if not name:
481 Logger.fail("Output group does not have a name", fileName, lineNo)
482 if not body:
483 Logger.fail("Output group does not have a body", fileName, lineNo)
484
485 self.name = name
486 self.body = body
487 self.lineNo = lineNo
488
489 def __eq__(self, other):
490 return (isinstance(other, self.__class__) and
491 self.name == other.name and
492 self.body == other.body)
David Brazdilee690a32014-12-01 17:04:16 +0000493
494
495class FileSplitMixin(object):
496 """Mixin for representing text files which need to be split into smaller
497 chunks before being parsed."""
498
499 def _parseStream(self, stream):
500 lineNo = 0
501 allGroups = []
502 currentGroup = None
503
504 for line in stream:
505 lineNo += 1
506 line = line.strip()
507 if not line:
508 continue
509
510 # Let the child class process the line and return information about it.
511 # The _processLine method can modify the content of the line (or delete it
512 # entirely) and specify whether it starts a new group.
513 processedLine, newGroupName = self._processLine(line, lineNo)
514 if newGroupName is not None:
David Brazdil2e15cd22014-12-31 17:28:38 +0000515 currentGroup = (newGroupName, [], lineNo)
David Brazdilee690a32014-12-01 17:04:16 +0000516 allGroups.append(currentGroup)
517 if processedLine is not None:
David Brazdil2e15cd22014-12-31 17:28:38 +0000518 if currentGroup is not None:
519 currentGroup[1].append(processedLine)
520 else:
521 self._exceptionLineOutsideGroup(line, lineNo)
David Brazdilee690a32014-12-01 17:04:16 +0000522
523 # Finally, take the generated line groups and let the child class process
524 # each one before storing the final outcome.
David Brazdil2e15cd22014-12-31 17:28:38 +0000525 return list(map(lambda group: self._processGroup(group[0], group[1], group[2]), allGroups))
David Brazdilee690a32014-12-01 17:04:16 +0000526
527
528class CheckFile(FileSplitMixin):
529 """Collection of check groups extracted from the input test file."""
530
David Brazdil2e15cd22014-12-31 17:28:38 +0000531 def __init__(self, prefix, checkStream, fileName=None):
532 self.fileName = fileName
David Brazdilee690a32014-12-01 17:04:16 +0000533 self.prefix = prefix
534 self.groups = self._parseStream(checkStream)
535
536 # Attempts to parse a check line. The regex searches for a comment symbol
537 # followed by the CHECK keyword, given attribute and a colon at the very
538 # beginning of the line. Whitespaces are ignored.
539 def _extractLine(self, prefix, line):
David Brazdilbe0cc082014-12-31 11:49:30 +0000540 rIgnoreWhitespace = r"\s*"
541 rCommentSymbols = [r"//", r"#"]
542 regexPrefix = rIgnoreWhitespace + \
543 r"(" + r"|".join(rCommentSymbols) + r")" + \
544 rIgnoreWhitespace + \
545 prefix + r":"
David Brazdilee690a32014-12-01 17:04:16 +0000546
547 # The 'match' function succeeds only if the pattern is matched at the
548 # beginning of the line.
David Brazdilbe0cc082014-12-31 11:49:30 +0000549 match = re.match(regexPrefix, line)
David Brazdilee690a32014-12-01 17:04:16 +0000550 if match is not None:
551 return line[match.end():].strip()
552 else:
553 return None
554
David Brazdil48942de2015-01-07 21:19:50 +0000555 # This function is invoked on each line of the check file and returns a pair
556 # which instructs the parser how the line should be handled. If the line is to
557 # be included in the current check group, it is returned in the first value.
558 # If the line starts a new check group, the name of the group is returned in
559 # the second value.
David Brazdilee690a32014-12-01 17:04:16 +0000560 def _processLine(self, line, lineNo):
David Brazdil9a6f20e2014-12-19 11:17:21 +0000561 # Lines beginning with 'CHECK-START' start a new check group.
David Brazdilee690a32014-12-01 17:04:16 +0000562 startLine = self._extractLine(self.prefix + "-START", line)
563 if startLine is not None:
David Brazdil9a6f20e2014-12-19 11:17:21 +0000564 return None, startLine
565
566 # Lines starting only with 'CHECK' are matched in order.
567 plainLine = self._extractLine(self.prefix, line)
568 if plainLine is not None:
David Brazdil2e15cd22014-12-31 17:28:38 +0000569 return (plainLine, CheckLine.Variant.InOrder, lineNo), None
David Brazdil9a6f20e2014-12-19 11:17:21 +0000570
571 # 'CHECK-DAG' lines are no-order assertions.
572 dagLine = self._extractLine(self.prefix + "-DAG", line)
573 if dagLine is not None:
David Brazdil2e15cd22014-12-31 17:28:38 +0000574 return (dagLine, CheckLine.Variant.DAG, lineNo), None
David Brazdil9a6f20e2014-12-19 11:17:21 +0000575
576 # 'CHECK-NOT' lines are no-order negative assertions.
577 notLine = self._extractLine(self.prefix + "-NOT", line)
578 if notLine is not None:
David Brazdil2e15cd22014-12-31 17:28:38 +0000579 return (notLine, CheckLine.Variant.Not, lineNo), None
David Brazdil9a6f20e2014-12-19 11:17:21 +0000580
581 # Other lines are ignored.
582 return None, None
David Brazdilee690a32014-12-01 17:04:16 +0000583
584 def _exceptionLineOutsideGroup(self, line, lineNo):
David Brazdil2e15cd22014-12-31 17:28:38 +0000585 Logger.fail("Check line not inside a group", self.fileName, lineNo)
David Brazdilee690a32014-12-01 17:04:16 +0000586
David Brazdil48942de2015-01-07 21:19:50 +0000587 # Constructs a check group from the parser-collected check lines.
David Brazdil2e15cd22014-12-31 17:28:38 +0000588 def _processGroup(self, name, lines, lineNo):
589 checkLines = list(map(lambda line: CheckLine(line[0], line[1], self.fileName, line[2]), lines))
590 return CheckGroup(name, checkLines, self.fileName, lineNo)
David Brazdilee690a32014-12-01 17:04:16 +0000591
David Brazdil2e15cd22014-12-31 17:28:38 +0000592 def match(self, outputFile):
David Brazdilee690a32014-12-01 17:04:16 +0000593 for checkGroup in self.groups:
594 # TODO: Currently does not handle multiple occurrences of the same group
595 # name, e.g. when a pass is run multiple times. It will always try to
596 # match a check group against the first output group of the same name.
597 outputGroup = outputFile.findGroup(checkGroup.name)
598 if outputGroup is None:
David Brazdil2e15cd22014-12-31 17:28:38 +0000599 Logger.fail("Group \"" + checkGroup.name + "\" not found in the output",
600 self.fileName, checkGroup.lineNo)
601 Logger.startTest(checkGroup.name)
602 checkGroup.match(outputGroup)
603 Logger.testPassed()
David Brazdilee690a32014-12-01 17:04:16 +0000604
605
606class OutputFile(FileSplitMixin):
607 """Representation of the output generated by the test and split into groups
608 within which the checks are performed.
609
610 C1visualizer format is parsed with a state machine which differentiates
611 between the 'compilation' and 'cfg' blocks. The former marks the beginning
612 of a method. It is parsed for the method's name but otherwise ignored. Each
613 subsequent CFG block represents one stage of the compilation pipeline and
614 is parsed into an output group named "<method name> <pass name>".
615 """
616
617 class ParsingState:
618 OutsideBlock, InsideCompilationBlock, StartingCfgBlock, InsideCfgBlock = range(4)
619
David Brazdil2e15cd22014-12-31 17:28:38 +0000620 def __init__(self, outputStream, fileName=None):
621 self.fileName = fileName
622
David Brazdilee690a32014-12-01 17:04:16 +0000623 # Initialize the state machine
624 self.lastMethodName = None
625 self.state = OutputFile.ParsingState.OutsideBlock
626 self.groups = self._parseStream(outputStream)
627
David Brazdil48942de2015-01-07 21:19:50 +0000628 # This function is invoked on each line of the output file and returns a pair
629 # which instructs the parser how the line should be handled. If the line is to
630 # be included in the current group, it is returned in the first value. If the
631 # line starts a new output group, the name of the group is returned in the
632 # second value.
David Brazdilee690a32014-12-01 17:04:16 +0000633 def _processLine(self, line, lineNo):
634 if self.state == OutputFile.ParsingState.StartingCfgBlock:
635 # Previous line started a new 'cfg' block which means that this one must
636 # contain the name of the pass (this is enforced by C1visualizer).
637 if re.match("name\s+\"[^\"]+\"", line):
638 # Extract the pass name, prepend it with the name of the method and
639 # return as the beginning of a new group.
640 self.state = OutputFile.ParsingState.InsideCfgBlock
641 return (None, self.lastMethodName + " " + line.split("\"")[1])
642 else:
David Brazdil2e15cd22014-12-31 17:28:38 +0000643 Logger.fail("Expected output group name", self.fileName, lineNo)
David Brazdilee690a32014-12-01 17:04:16 +0000644
645 elif self.state == OutputFile.ParsingState.InsideCfgBlock:
646 if line == "end_cfg":
647 self.state = OutputFile.ParsingState.OutsideBlock
648 return (None, None)
649 else:
650 return (line, None)
651
652 elif self.state == OutputFile.ParsingState.InsideCompilationBlock:
653 # Search for the method's name. Format: method "<name>"
David Brazdil2e15cd22014-12-31 17:28:38 +0000654 if re.match("method\s+\"[^\"]*\"", line):
655 methodName = line.split("\"")[1].strip()
656 if not methodName:
657 Logger.fail("Empty method name in output", self.fileName, lineNo)
658 self.lastMethodName = methodName
David Brazdilee690a32014-12-01 17:04:16 +0000659 elif line == "end_compilation":
660 self.state = OutputFile.ParsingState.OutsideBlock
661 return (None, None)
662
David Brazdil2e15cd22014-12-31 17:28:38 +0000663 else:
664 assert self.state == OutputFile.ParsingState.OutsideBlock
David Brazdilee690a32014-12-01 17:04:16 +0000665 if line == "begin_cfg":
666 # The line starts a new group but we'll wait until the next line from
667 # which we can extract the name of the pass.
668 if self.lastMethodName is None:
David Brazdil2e15cd22014-12-31 17:28:38 +0000669 Logger.fail("Expected method header", self.fileName, lineNo)
David Brazdilee690a32014-12-01 17:04:16 +0000670 self.state = OutputFile.ParsingState.StartingCfgBlock
671 return (None, None)
672 elif line == "begin_compilation":
673 self.state = OutputFile.ParsingState.InsideCompilationBlock
674 return (None, None)
675 else:
David Brazdil2e15cd22014-12-31 17:28:38 +0000676 Logger.fail("Output line not inside a group", self.fileName, lineNo)
David Brazdilee690a32014-12-01 17:04:16 +0000677
David Brazdil48942de2015-01-07 21:19:50 +0000678 # Constructs an output group from the parser-collected output lines.
David Brazdil2e15cd22014-12-31 17:28:38 +0000679 def _processGroup(self, name, lines, lineNo):
680 return OutputGroup(name, lines, self.fileName, lineNo + 1)
David Brazdilee690a32014-12-01 17:04:16 +0000681
682 def findGroup(self, name):
683 for group in self.groups:
684 if group.name == name:
685 return group
686 return None
687
688
689def ParseArguments():
690 parser = argparse.ArgumentParser()
David Brazdil32beaff2015-01-15 01:32:23 +0000691 parser.add_argument("tested_file",
692 help="text file the checks should be verified against")
693 parser.add_argument("source_path", nargs="?",
694 help="path to file/folder with checking annotations")
David Brazdilee690a32014-12-01 17:04:16 +0000695 parser.add_argument("--check-prefix", dest="check_prefix", default="CHECK", metavar="PREFIX",
David Brazdil32beaff2015-01-15 01:32:23 +0000696 help="prefix of checks in the test files (default: CHECK)")
David Brazdilee690a32014-12-01 17:04:16 +0000697 parser.add_argument("--list-groups", dest="list_groups", action="store_true",
David Brazdil32beaff2015-01-15 01:32:23 +0000698 help="print a list of all groups found in the tested file")
David Brazdilee690a32014-12-01 17:04:16 +0000699 parser.add_argument("--dump-group", dest="dump_group", metavar="GROUP",
700 help="print the contents of an output group")
David Brazdil7cca5df2015-01-15 00:40:56 +0000701 parser.add_argument("-q", "--quiet", action="store_true",
702 help="print only errors")
David Brazdilee690a32014-12-01 17:04:16 +0000703 return parser.parse_args()
704
705
David Brazdilee690a32014-12-01 17:04:16 +0000706def ListGroups(outputFilename):
707 outputFile = OutputFile(open(outputFilename, "r"))
708 for group in outputFile.groups:
David Brazdil2e15cd22014-12-31 17:28:38 +0000709 Logger.log(group.name)
David Brazdilee690a32014-12-01 17:04:16 +0000710
711
712def DumpGroup(outputFilename, groupName):
713 outputFile = OutputFile(open(outputFilename, "r"))
714 group = outputFile.findGroup(groupName)
715 if group:
David Brazdil2e15cd22014-12-31 17:28:38 +0000716 lineNo = group.lineNo
717 maxLineNo = lineNo + len(group.body)
718 lenLineNo = len(str(maxLineNo)) + 2
719 for line in group.body:
720 Logger.log((str(lineNo) + ":").ljust(lenLineNo) + line)
721 lineNo += 1
David Brazdilee690a32014-12-01 17:04:16 +0000722 else:
David Brazdil2e15cd22014-12-31 17:28:38 +0000723 Logger.fail("Group \"" + groupName + "\" not found in the output")
David Brazdilee690a32014-12-01 17:04:16 +0000724
725
David Brazdil3f7dce82015-01-16 23:31:11 +0000726# Returns a list of files to scan for check annotations in the given path. Path
727# to a file is returned as a single-element list, directories are recursively
728# traversed and all '.java' files returned.
David Brazdil32beaff2015-01-15 01:32:23 +0000729def FindCheckFiles(path):
730 if not path:
731 Logger.fail("No source path provided")
732 elif os.path.isfile(path):
733 return [ path ]
734 elif os.path.isdir(path):
735 foundFiles = []
736 for root, dirs, files in os.walk(path):
737 for file in files:
738 if os.path.splitext(file)[1] == ".java":
739 foundFiles.append(os.path.join(root, file))
740 return foundFiles
741 else:
742 Logger.fail("Source path \"" + path + "\" not found")
David Brazdil2e15cd22014-12-31 17:28:38 +0000743
David Brazdil32beaff2015-01-15 01:32:23 +0000744
745def RunChecks(checkPrefix, checkPath, outputFilename):
746 outputBaseName = os.path.basename(outputFilename)
David Brazdil2e15cd22014-12-31 17:28:38 +0000747 outputFile = OutputFile(open(outputFilename, "r"), outputBaseName)
David Brazdil32beaff2015-01-15 01:32:23 +0000748
749 for checkFilename in FindCheckFiles(checkPath):
750 checkBaseName = os.path.basename(checkFilename)
751 checkFile = CheckFile(checkPrefix, open(checkFilename, "r"), checkBaseName)
752 checkFile.match(outputFile)
David Brazdilee690a32014-12-01 17:04:16 +0000753
754
755if __name__ == "__main__":
756 args = ParseArguments()
David Brazdil3f7dce82015-01-16 23:31:11 +0000757
David Brazdil7cca5df2015-01-15 00:40:56 +0000758 if args.quiet:
759 Logger.Verbosity = Logger.Level.Error
David Brazdilee690a32014-12-01 17:04:16 +0000760
David Brazdil3f7dce82015-01-16 23:31:11 +0000761 if args.list_groups:
762 ListGroups(args.tested_file)
763 elif args.dump_group:
764 DumpGroup(args.tested_file, args.dump_group)
765 else:
766 RunChecks(args.check_prefix, args.source_path, args.tested_file)