blob: 7a145a2a5346cc806848813566998bdee854d370 [file] [log] [blame]
Tor Norbye3a2425a2013-11-04 10:16:08 -08001"""Code parsing for Coverage."""
2
Tor Norbye2e5965e2014-07-25 12:24:15 -07003import dis, re, sys, token, tokenize
Tor Norbye3a2425a2013-11-04 10:16:08 -08004
5from coverage.backward import set, sorted, StringIO # pylint: disable=W0622
Tor Norbye2e5965e2014-07-25 12:24:15 -07006from coverage.backward import open_source, range # pylint: disable=W0622
7from coverage.backward import reversed # pylint: disable=W0622
8from coverage.backward import bytes_to_ints
Tor Norbye3a2425a2013-11-04 10:16:08 -08009from coverage.bytecode import ByteCodes, CodeObjects
10from coverage.misc import nice_pair, expensive, join_regex
11from coverage.misc import CoverageException, NoSource, NotPython
12
13
14class CodeParser(object):
15 """Parse code to find executable lines, excluded lines, etc."""
16
17 def __init__(self, text=None, filename=None, exclude=None):
18 """
19 Source can be provided as `text`, the text itself, or `filename`, from
20 which the text will be read. Excluded lines are those that match
21 `exclude`, a regex.
22
23 """
24 assert text or filename, "CodeParser needs either text or filename"
25 self.filename = filename or "<code>"
26 self.text = text
27 if not self.text:
28 try:
29 sourcef = open_source(self.filename)
30 try:
31 self.text = sourcef.read()
32 finally:
33 sourcef.close()
34 except IOError:
35 _, err, _ = sys.exc_info()
36 raise NoSource(
Tor Norbye2e5965e2014-07-25 12:24:15 -070037 "No source for code: '%s': %s" % (self.filename, err)
Tor Norbye3a2425a2013-11-04 10:16:08 -080038 )
39
Tor Norbye2e5965e2014-07-25 12:24:15 -070040 # Scrap the BOM if it exists.
41 if self.text and ord(self.text[0]) == 0xfeff:
42 self.text = self.text[1:]
43
Tor Norbye3a2425a2013-11-04 10:16:08 -080044 self.exclude = exclude
45
46 self.show_tokens = False
47
48 # The text lines of the parsed code.
49 self.lines = self.text.split('\n')
50
51 # The line numbers of excluded lines of code.
52 self.excluded = set()
53
54 # The line numbers of docstring lines.
55 self.docstrings = set()
56
57 # The line numbers of class definitions.
58 self.classdefs = set()
59
60 # A dict mapping line numbers to (lo,hi) for multi-line statements.
61 self.multiline = {}
62
63 # The line numbers that start statements.
64 self.statement_starts = set()
65
66 # Lazily-created ByteParser
67 self._byte_parser = None
68
69 def _get_byte_parser(self):
70 """Create a ByteParser on demand."""
71 if not self._byte_parser:
72 self._byte_parser = \
73 ByteParser(text=self.text, filename=self.filename)
74 return self._byte_parser
75 byte_parser = property(_get_byte_parser)
76
77 def lines_matching(self, *regexes):
78 """Find the lines matching one of a list of regexes.
79
80 Returns a set of line numbers, the lines that contain a match for one
81 of the regexes in `regexes`. The entire line needn't match, just a
82 part of it.
83
84 """
85 regex_c = re.compile(join_regex(regexes))
86 matches = set()
87 for i, ltext in enumerate(self.lines):
88 if regex_c.search(ltext):
89 matches.add(i+1)
90 return matches
91
92 def _raw_parse(self):
93 """Parse the source to find the interesting facts about its lines.
94
95 A handful of member fields are updated.
96
97 """
98 # Find lines which match an exclusion pattern.
99 if self.exclude:
100 self.excluded = self.lines_matching(self.exclude)
101
102 # Tokenize, to find excluded suites, to find docstrings, and to find
103 # multi-line statements.
104 indent = 0
105 exclude_indent = 0
106 excluding = False
107 prev_toktype = token.INDENT
108 first_line = None
109 empty = True
110
Tor Norbye2e5965e2014-07-25 12:24:15 -0700111 tokgen = generate_tokens(self.text)
Tor Norbye3a2425a2013-11-04 10:16:08 -0800112 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
Tor Norbye2e5965e2014-07-25 12:24:15 -0700113 if self.show_tokens: # pragma: not covered
Tor Norbye3a2425a2013-11-04 10:16:08 -0800114 print("%10s %5s %-20r %r" % (
115 tokenize.tok_name.get(toktype, toktype),
116 nice_pair((slineno, elineno)), ttext, ltext
117 ))
118 if toktype == token.INDENT:
119 indent += 1
120 elif toktype == token.DEDENT:
121 indent -= 1
122 elif toktype == token.NAME and ttext == 'class':
123 # Class definitions look like branches in the byte code, so
124 # we need to exclude them. The simplest way is to note the
125 # lines with the 'class' keyword.
126 self.classdefs.add(slineno)
127 elif toktype == token.OP and ttext == ':':
128 if not excluding and elineno in self.excluded:
129 # Start excluding a suite. We trigger off of the colon
130 # token so that the #pragma comment will be recognized on
131 # the same line as the colon.
132 exclude_indent = indent
133 excluding = True
134 elif toktype == token.STRING and prev_toktype == token.INDENT:
135 # Strings that are first on an indented line are docstrings.
136 # (a trick from trace.py in the stdlib.) This works for
137 # 99.9999% of cases. For the rest (!) see:
138 # http://stackoverflow.com/questions/1769332/x/1769794#1769794
Tor Norbye2e5965e2014-07-25 12:24:15 -0700139 self.docstrings.update(range(slineno, elineno+1))
Tor Norbye3a2425a2013-11-04 10:16:08 -0800140 elif toktype == token.NEWLINE:
141 if first_line is not None and elineno != first_line:
142 # We're at the end of a line, and we've ended on a
143 # different line than the first line of the statement,
144 # so record a multi-line range.
145 rng = (first_line, elineno)
146 for l in range(first_line, elineno+1):
147 self.multiline[l] = rng
148 first_line = None
149
150 if ttext.strip() and toktype != tokenize.COMMENT:
151 # A non-whitespace token.
152 empty = False
153 if first_line is None:
154 # The token is not whitespace, and is the first in a
155 # statement.
156 first_line = slineno
157 # Check whether to end an excluded suite.
158 if excluding and indent <= exclude_indent:
159 excluding = False
160 if excluding:
161 self.excluded.add(elineno)
162
163 prev_toktype = toktype
164
165 # Find the starts of the executable statements.
166 if not empty:
167 self.statement_starts.update(self.byte_parser._find_statements())
168
169 def first_line(self, line):
170 """Return the first line number of the statement including `line`."""
171 rng = self.multiline.get(line)
172 if rng:
173 first_line = rng[0]
174 else:
175 first_line = line
176 return first_line
177
Tor Norbye2e5965e2014-07-25 12:24:15 -0700178 def first_lines(self, lines, *ignores):
Tor Norbye3a2425a2013-11-04 10:16:08 -0800179 """Map the line numbers in `lines` to the correct first line of the
180 statement.
181
Tor Norbye2e5965e2014-07-25 12:24:15 -0700182 Skip any line mentioned in any of the sequences in `ignores`.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800183
Tor Norbye2e5965e2014-07-25 12:24:15 -0700184 Returns a set of the first lines.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800185
186 """
Tor Norbye2e5965e2014-07-25 12:24:15 -0700187 ignore = set()
188 for ign in ignores:
189 ignore.update(ign)
Tor Norbye3a2425a2013-11-04 10:16:08 -0800190 lset = set()
191 for l in lines:
192 if l in ignore:
193 continue
194 new_l = self.first_line(l)
195 if new_l not in ignore:
196 lset.add(new_l)
Tor Norbye2e5965e2014-07-25 12:24:15 -0700197 return lset
Tor Norbye3a2425a2013-11-04 10:16:08 -0800198
199 def parse_source(self):
200 """Parse source text to find executable lines, excluded lines, etc.
201
Tor Norbye2e5965e2014-07-25 12:24:15 -0700202 Return values are 1) a set of executable line numbers, and 2) a set of
203 excluded line numbers.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800204
205 Reported line numbers are normalized to the first line of multi-line
206 statements.
207
208 """
Tor Norbye2e5965e2014-07-25 12:24:15 -0700209 try:
210 self._raw_parse()
211 except (tokenize.TokenError, IndentationError):
212 _, tokerr, _ = sys.exc_info()
213 msg, lineno = tokerr.args
214 raise NotPython(
215 "Couldn't parse '%s' as Python source: '%s' at %s" %
216 (self.filename, msg, lineno)
217 )
Tor Norbye3a2425a2013-11-04 10:16:08 -0800218
219 excluded_lines = self.first_lines(self.excluded)
Tor Norbye2e5965e2014-07-25 12:24:15 -0700220 lines = self.first_lines(
221 self.statement_starts,
222 excluded_lines,
223 self.docstrings
224 )
Tor Norbye3a2425a2013-11-04 10:16:08 -0800225
226 return lines, excluded_lines
227
228 def arcs(self):
229 """Get information about the arcs available in the code.
230
231 Returns a sorted list of line number pairs. Line numbers have been
232 normalized to the first line of multiline statements.
233
234 """
235 all_arcs = []
236 for l1, l2 in self.byte_parser._all_arcs():
237 fl1 = self.first_line(l1)
238 fl2 = self.first_line(l2)
239 if fl1 != fl2:
240 all_arcs.append((fl1, fl2))
241 return sorted(all_arcs)
242 arcs = expensive(arcs)
243
244 def exit_counts(self):
245 """Get a mapping from line numbers to count of exits from that line.
246
247 Excluded lines are excluded.
248
249 """
250 excluded_lines = self.first_lines(self.excluded)
251 exit_counts = {}
252 for l1, l2 in self.arcs():
253 if l1 < 0:
254 # Don't ever report -1 as a line number
255 continue
256 if l1 in excluded_lines:
257 # Don't report excluded lines as line numbers.
258 continue
259 if l2 in excluded_lines:
260 # Arcs to excluded lines shouldn't count.
261 continue
262 if l1 not in exit_counts:
263 exit_counts[l1] = 0
264 exit_counts[l1] += 1
265
266 # Class definitions have one extra exit, so remove one for each:
267 for l in self.classdefs:
268 # Ensure key is there: classdefs can include excluded lines.
269 if l in exit_counts:
270 exit_counts[l] -= 1
271
272 return exit_counts
273 exit_counts = expensive(exit_counts)
274
275
276## Opcodes that guide the ByteParser.
277
278def _opcode(name):
Tor Norbye2e5965e2014-07-25 12:24:15 -0700279 """Return the opcode by name from the dis module."""
280 return dis.opmap[name]
Tor Norbye3a2425a2013-11-04 10:16:08 -0800281
282def _opcode_set(*names):
283 """Return a set of opcodes by the names in `names`."""
284 s = set()
285 for name in names:
286 try:
287 s.add(_opcode(name))
288 except KeyError:
289 pass
290 return s
291
292# Opcodes that leave the code object.
293OPS_CODE_END = _opcode_set('RETURN_VALUE')
294
295# Opcodes that unconditionally end the code chunk.
296OPS_CHUNK_END = _opcode_set(
297 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE', 'RAISE_VARARGS',
298 'BREAK_LOOP', 'CONTINUE_LOOP',
299 )
300
301# Opcodes that unconditionally begin a new code chunk. By starting new chunks
302# with unconditional jump instructions, we neatly deal with jumps to jumps
303# properly.
304OPS_CHUNK_BEGIN = _opcode_set('JUMP_ABSOLUTE', 'JUMP_FORWARD')
305
306# Opcodes that push a block on the block stack.
307OPS_PUSH_BLOCK = _opcode_set(
308 'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY', 'SETUP_WITH'
309 )
310
311# Block types for exception handling.
312OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY')
313
314# Opcodes that pop a block from the block stack.
315OPS_POP_BLOCK = _opcode_set('POP_BLOCK')
316
317# Opcodes that have a jump destination, but aren't really a jump.
Tor Norbye2e5965e2014-07-25 12:24:15 -0700318OPS_NO_JUMP = OPS_PUSH_BLOCK
Tor Norbye3a2425a2013-11-04 10:16:08 -0800319
320# Individual opcodes we need below.
321OP_BREAK_LOOP = _opcode('BREAK_LOOP')
322OP_END_FINALLY = _opcode('END_FINALLY')
323OP_COMPARE_OP = _opcode('COMPARE_OP')
324COMPARE_EXCEPTION = 10 # just have to get this const from the code.
325OP_LOAD_CONST = _opcode('LOAD_CONST')
326OP_RETURN_VALUE = _opcode('RETURN_VALUE')
327
328
329class ByteParser(object):
330 """Parse byte codes to understand the structure of code."""
331
332 def __init__(self, code=None, text=None, filename=None):
333 if code:
334 self.code = code
Tor Norbye2e5965e2014-07-25 12:24:15 -0700335 self.text = text
Tor Norbye3a2425a2013-11-04 10:16:08 -0800336 else:
337 if not text:
338 assert filename, "If no code or text, need a filename"
339 sourcef = open_source(filename)
340 try:
341 text = sourcef.read()
342 finally:
343 sourcef.close()
Tor Norbye2e5965e2014-07-25 12:24:15 -0700344 self.text = text
Tor Norbye3a2425a2013-11-04 10:16:08 -0800345
346 try:
347 # Python 2.3 and 2.4 don't like partial last lines, so be sure
348 # the text ends nicely for them.
349 self.code = compile(text + '\n', filename, "exec")
350 except SyntaxError:
351 _, synerr, _ = sys.exc_info()
352 raise NotPython(
353 "Couldn't parse '%s' as Python source: '%s' at line %d" %
354 (filename, synerr.msg, synerr.lineno)
355 )
356
357 # Alternative Python implementations don't always provide all the
358 # attributes on code objects that we need to do the analysis.
359 for attr in ['co_lnotab', 'co_firstlineno', 'co_consts', 'co_code']:
360 if not hasattr(self.code, attr):
361 raise CoverageException(
362 "This implementation of Python doesn't support code "
363 "analysis.\n"
364 "Run coverage.py under CPython for this command."
365 )
366
367 def child_parsers(self):
368 """Iterate over all the code objects nested within this one.
369
370 The iteration includes `self` as its first value.
371
372 """
Tor Norbye2e5965e2014-07-25 12:24:15 -0700373 children = CodeObjects(self.code)
374 return [ByteParser(code=c, text=self.text) for c in children]
Tor Norbye3a2425a2013-11-04 10:16:08 -0800375
376 def _bytes_lines(self):
377 """Map byte offsets to line numbers in `code`.
378
379 Uses co_lnotab described in Python/compile.c to map byte offsets to
Tor Norbye2e5965e2014-07-25 12:24:15 -0700380 line numbers. Produces a sequence: (b0, l0), (b1, l1), ...
381
382 Only byte offsets that correspond to line numbers are included in the
383 results.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800384
385 """
386 # Adapted from dis.py in the standard library.
Tor Norbye2e5965e2014-07-25 12:24:15 -0700387 byte_increments = bytes_to_ints(self.code.co_lnotab[0::2])
388 line_increments = bytes_to_ints(self.code.co_lnotab[1::2])
Tor Norbye3a2425a2013-11-04 10:16:08 -0800389
Tor Norbye3a2425a2013-11-04 10:16:08 -0800390 last_line_num = None
391 line_num = self.code.co_firstlineno
392 byte_num = 0
393 for byte_incr, line_incr in zip(byte_increments, line_increments):
394 if byte_incr:
395 if line_num != last_line_num:
Tor Norbye2e5965e2014-07-25 12:24:15 -0700396 yield (byte_num, line_num)
Tor Norbye3a2425a2013-11-04 10:16:08 -0800397 last_line_num = line_num
398 byte_num += byte_incr
399 line_num += line_incr
400 if line_num != last_line_num:
Tor Norbye2e5965e2014-07-25 12:24:15 -0700401 yield (byte_num, line_num)
Tor Norbye3a2425a2013-11-04 10:16:08 -0800402
403 def _find_statements(self):
404 """Find the statements in `self.code`.
405
Tor Norbye2e5965e2014-07-25 12:24:15 -0700406 Produce a sequence of line numbers that start statements. Recurses
407 into all code objects reachable from `self.code`.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800408
409 """
Tor Norbye3a2425a2013-11-04 10:16:08 -0800410 for bp in self.child_parsers():
411 # Get all of the lineno information from this code.
412 for _, l in bp._bytes_lines():
Tor Norbye2e5965e2014-07-25 12:24:15 -0700413 yield l
Tor Norbye3a2425a2013-11-04 10:16:08 -0800414
Tor Norbye2e5965e2014-07-25 12:24:15 -0700415 def _block_stack_repr(self, block_stack):
416 """Get a string version of `block_stack`, for debugging."""
417 blocks = ", ".join(
418 ["(%s, %r)" % (dis.opname[b[0]], b[1]) for b in block_stack]
419 )
420 return "[" + blocks + "]"
Tor Norbye3a2425a2013-11-04 10:16:08 -0800421
422 def _split_into_chunks(self):
423 """Split the code object into a list of `Chunk` objects.
424
425 Each chunk is only entered at its first instruction, though there can
426 be many exits from a chunk.
427
428 Returns a list of `Chunk` objects.
429
430 """
Tor Norbye3a2425a2013-11-04 10:16:08 -0800431 # The list of chunks so far, and the one we're working on.
432 chunks = []
433 chunk = None
Tor Norbye2e5965e2014-07-25 12:24:15 -0700434
435 # A dict mapping byte offsets of line starts to the line numbers.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800436 bytes_lines_map = dict(self._bytes_lines())
437
438 # The block stack: loops and try blocks get pushed here for the
439 # implicit jumps that can occur.
440 # Each entry is a tuple: (block type, destination)
441 block_stack = []
442
443 # Some op codes are followed by branches that should be ignored. This
444 # is a count of how many ignores are left.
445 ignore_branch = 0
446
447 # We have to handle the last two bytecodes specially.
448 ult = penult = None
449
Tor Norbye2e5965e2014-07-25 12:24:15 -0700450 # Get a set of all of the jump-to points.
451 jump_to = set()
452 bytecodes = list(ByteCodes(self.code.co_code))
453 for bc in bytecodes:
454 if bc.jump_to >= 0:
455 jump_to.add(bc.jump_to)
456
457 chunk_lineno = 0
458
459 # Walk the byte codes building chunks.
460 for bc in bytecodes:
Tor Norbye3a2425a2013-11-04 10:16:08 -0800461 # Maybe have to start a new chunk
Tor Norbye2e5965e2014-07-25 12:24:15 -0700462 start_new_chunk = False
463 first_chunk = False
Tor Norbye3a2425a2013-11-04 10:16:08 -0800464 if bc.offset in bytes_lines_map:
465 # Start a new chunk for each source line number.
Tor Norbye2e5965e2014-07-25 12:24:15 -0700466 start_new_chunk = True
467 chunk_lineno = bytes_lines_map[bc.offset]
468 first_chunk = True
469 elif bc.offset in jump_to:
470 # To make chunks have a single entrance, we have to make a new
471 # chunk when we get to a place some bytecode jumps to.
472 start_new_chunk = True
Tor Norbye3a2425a2013-11-04 10:16:08 -0800473 elif bc.op in OPS_CHUNK_BEGIN:
474 # Jumps deserve their own unnumbered chunk. This fixes
475 # problems with jumps to jumps getting confused.
Tor Norbye2e5965e2014-07-25 12:24:15 -0700476 start_new_chunk = True
477
478 if not chunk or start_new_chunk:
Tor Norbye3a2425a2013-11-04 10:16:08 -0800479 if chunk:
480 chunk.exits.add(bc.offset)
Tor Norbye2e5965e2014-07-25 12:24:15 -0700481 chunk = Chunk(bc.offset, chunk_lineno, first_chunk)
Tor Norbye3a2425a2013-11-04 10:16:08 -0800482 chunks.append(chunk)
483
484 # Look at the opcode
485 if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP:
486 if ignore_branch:
487 # Someone earlier wanted us to ignore this branch.
488 ignore_branch -= 1
489 else:
490 # The opcode has a jump, it's an exit for this chunk.
491 chunk.exits.add(bc.jump_to)
492
493 if bc.op in OPS_CODE_END:
494 # The opcode can exit the code object.
495 chunk.exits.add(-self.code.co_firstlineno)
496 if bc.op in OPS_PUSH_BLOCK:
497 # The opcode adds a block to the block_stack.
498 block_stack.append((bc.op, bc.jump_to))
499 if bc.op in OPS_POP_BLOCK:
500 # The opcode pops a block from the block stack.
501 block_stack.pop()
502 if bc.op in OPS_CHUNK_END:
503 # This opcode forces the end of the chunk.
504 if bc.op == OP_BREAK_LOOP:
505 # A break is implicit: jump where the top of the
506 # block_stack points.
507 chunk.exits.add(block_stack[-1][1])
508 chunk = None
509 if bc.op == OP_END_FINALLY:
Tor Norbye3a2425a2013-11-04 10:16:08 -0800510 # For the finally clause we need to find the closest exception
511 # block, and use its jump target as an exit.
Tor Norbye2e5965e2014-07-25 12:24:15 -0700512 for block in reversed(block_stack):
513 if block[0] in OPS_EXCEPT_BLOCKS:
514 chunk.exits.add(block[1])
Tor Norbye3a2425a2013-11-04 10:16:08 -0800515 break
516 if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION:
517 # This is an except clause. We want to overlook the next
518 # branch, so that except's don't count as branches.
519 ignore_branch += 1
520
521 penult = ult
522 ult = bc
523
524 if chunks:
525 # The last two bytecodes could be a dummy "return None" that
526 # shouldn't be counted as real code. Every Python code object seems
527 # to end with a return, and a "return None" is inserted if there
528 # isn't an explicit return in the source.
529 if ult and penult:
530 if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE:
531 if self.code.co_consts[penult.arg] is None:
532 # This is "return None", but is it dummy? A real line
533 # would be a last chunk all by itself.
534 if chunks[-1].byte != penult.offset:
535 ex = -self.code.co_firstlineno
536 # Split the last chunk
537 last_chunk = chunks[-1]
538 last_chunk.exits.remove(ex)
539 last_chunk.exits.add(penult.offset)
Tor Norbye2e5965e2014-07-25 12:24:15 -0700540 chunk = Chunk(
541 penult.offset, last_chunk.line, False
542 )
Tor Norbye3a2425a2013-11-04 10:16:08 -0800543 chunk.exits.add(ex)
544 chunks.append(chunk)
545
546 # Give all the chunks a length.
Tor Norbye2e5965e2014-07-25 12:24:15 -0700547 chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disable=W0631,C0301
Tor Norbye3a2425a2013-11-04 10:16:08 -0800548 for i in range(len(chunks)-1):
549 chunks[i].length = chunks[i+1].byte - chunks[i].byte
550
Tor Norbye2e5965e2014-07-25 12:24:15 -0700551 #self.validate_chunks(chunks)
Tor Norbye3a2425a2013-11-04 10:16:08 -0800552 return chunks
553
Tor Norbye2e5965e2014-07-25 12:24:15 -0700554 def validate_chunks(self, chunks):
555 """Validate the rule that chunks have a single entrance."""
556 # starts is the entrances to the chunks
557 starts = set([ch.byte for ch in chunks])
558 for ch in chunks:
559 assert all([(ex in starts or ex < 0) for ex in ch.exits])
560
Tor Norbye3a2425a2013-11-04 10:16:08 -0800561 def _arcs(self):
562 """Find the executable arcs in the code.
563
Tor Norbye2e5965e2014-07-25 12:24:15 -0700564 Yields pairs: (from,to). From and to are integer line numbers. If
565 from is < 0, then the arc is an entrance into the code object. If to
566 is < 0, the arc is an exit from the code object.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800567
568 """
569 chunks = self._split_into_chunks()
570
571 # A map from byte offsets to chunks jumped into.
572 byte_chunks = dict([(c.byte, c) for c in chunks])
573
Tor Norbye2e5965e2014-07-25 12:24:15 -0700574 # There's always an entrance at the first chunk.
575 yield (-1, byte_chunks[0].line)
Tor Norbye3a2425a2013-11-04 10:16:08 -0800576
Tor Norbye2e5965e2014-07-25 12:24:15 -0700577 # Traverse from the first chunk in each line, and yield arcs where
578 # the trace function will be invoked.
579 for chunk in chunks:
580 if not chunk.first:
Tor Norbye3a2425a2013-11-04 10:16:08 -0800581 continue
582
Tor Norbye2e5965e2014-07-25 12:24:15 -0700583 chunks_considered = set()
584 chunks_to_consider = [chunk]
585 while chunks_to_consider:
586 # Get the chunk we're considering, and make sure we don't
587 # consider it again
588 this_chunk = chunks_to_consider.pop()
589 chunks_considered.add(this_chunk)
Tor Norbye3a2425a2013-11-04 10:16:08 -0800590
Tor Norbye2e5965e2014-07-25 12:24:15 -0700591 # For each exit, add the line number if the trace function
592 # would be triggered, or add the chunk to those being
593 # considered if not.
594 for ex in this_chunk.exits:
Tor Norbye3a2425a2013-11-04 10:16:08 -0800595 if ex < 0:
Tor Norbye2e5965e2014-07-25 12:24:15 -0700596 yield (chunk.line, ex)
Tor Norbye3a2425a2013-11-04 10:16:08 -0800597 else:
Tor Norbye2e5965e2014-07-25 12:24:15 -0700598 next_chunk = byte_chunks[ex]
599 if next_chunk in chunks_considered:
600 continue
Tor Norbye3a2425a2013-11-04 10:16:08 -0800601
Tor Norbye2e5965e2014-07-25 12:24:15 -0700602 # The trace function is invoked if visiting the first
603 # bytecode in a line, or if the transition is a
604 # backward jump.
605 backward_jump = next_chunk.byte < this_chunk.byte
606 if next_chunk.first or backward_jump:
607 if next_chunk.line != chunk.line:
608 yield (chunk.line, next_chunk.line)
609 else:
610 chunks_to_consider.append(next_chunk)
Tor Norbye3a2425a2013-11-04 10:16:08 -0800611
612 def _all_chunks(self):
613 """Returns a list of `Chunk` objects for this code and its children.
614
615 See `_split_into_chunks` for details.
616
617 """
618 chunks = []
619 for bp in self.child_parsers():
620 chunks.extend(bp._split_into_chunks())
621
622 return chunks
623
624 def _all_arcs(self):
625 """Get the set of all arcs in this code object and its children.
626
627 See `_arcs` for details.
628
629 """
630 arcs = set()
631 for bp in self.child_parsers():
632 arcs.update(bp._arcs())
633
634 return arcs
635
636
637class Chunk(object):
Tor Norbye2e5965e2014-07-25 12:24:15 -0700638 """A sequence of byte codes with a single entrance.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800639
640 To analyze byte code, we have to divide it into chunks, sequences of byte
Tor Norbye2e5965e2014-07-25 12:24:15 -0700641 codes such that each chunk has only one entrance, the first instruction in
642 the block.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800643
644 This is almost the CS concept of `basic block`_, except that we're willing
645 to have many exits from a chunk, and "basic block" is a more cumbersome
646 term.
647
648 .. _basic block: http://en.wikipedia.org/wiki/Basic_block
649
Tor Norbye2e5965e2014-07-25 12:24:15 -0700650 `line` is the source line number containing this chunk.
651
652 `first` is true if this is the first chunk in the source line.
653
Tor Norbye3a2425a2013-11-04 10:16:08 -0800654 An exit < 0 means the chunk can leave the code (return). The exit is
655 the negative of the starting line number of the code block.
656
657 """
Tor Norbye2e5965e2014-07-25 12:24:15 -0700658 def __init__(self, byte, line, first):
Tor Norbye3a2425a2013-11-04 10:16:08 -0800659 self.byte = byte
660 self.line = line
Tor Norbye2e5965e2014-07-25 12:24:15 -0700661 self.first = first
Tor Norbye3a2425a2013-11-04 10:16:08 -0800662 self.length = 0
663 self.exits = set()
664
665 def __repr__(self):
Tor Norbye2e5965e2014-07-25 12:24:15 -0700666 if self.first:
667 bang = "!"
Tor Norbye3a2425a2013-11-04 10:16:08 -0800668 else:
Tor Norbye2e5965e2014-07-25 12:24:15 -0700669 bang = ""
670 return "<%d+%d @%d%s %r>" % (
671 self.byte, self.length, self.line, bang, list(self.exits)
672 )
Tor Norbye3a2425a2013-11-04 10:16:08 -0800673
Tor Norbye3a2425a2013-11-04 10:16:08 -0800674
Tor Norbye2e5965e2014-07-25 12:24:15 -0700675class CachedTokenizer(object):
676 """A one-element cache around tokenize.generate_tokens.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800677
Tor Norbye2e5965e2014-07-25 12:24:15 -0700678 When reporting, coverage.py tokenizes files twice, once to find the
679 structure of the file, and once to syntax-color it. Tokenizing is
680 expensive, and easily cached.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800681
Tor Norbye2e5965e2014-07-25 12:24:15 -0700682 This is a one-element cache so that our twice-in-a-row tokenizing doesn't
683 actually tokenize twice.
Tor Norbye3a2425a2013-11-04 10:16:08 -0800684
Tor Norbye2e5965e2014-07-25 12:24:15 -0700685 """
686 def __init__(self):
687 self.last_text = None
688 self.last_tokens = None
Tor Norbye3a2425a2013-11-04 10:16:08 -0800689
Tor Norbye2e5965e2014-07-25 12:24:15 -0700690 def generate_tokens(self, text):
691 """A stand-in for `tokenize.generate_tokens`."""
692 if text != self.last_text:
693 self.last_text = text
694 self.last_tokens = list(
695 tokenize.generate_tokens(StringIO(text).readline)
696 )
697 return self.last_tokens
Tor Norbye3a2425a2013-11-04 10:16:08 -0800698
Tor Norbye2e5965e2014-07-25 12:24:15 -0700699# Create our generate_tokens cache as a callable replacement function.
700generate_tokens = CachedTokenizer().generate_tokens