Blame - python/helpers/coverage/parser.py - platform/tools/idea

blob: 7a145a2a5346cc806848813566998bdee854d370 [file] [log] [blame]

Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	1	"""Code parsing for Coverage."""
				2
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	3	import dis, re, sys, token, tokenize
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	4
				5	from coverage.backward import set, sorted, StringIO # pylint: disable=W0622
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	6	from coverage.backward import open_source, range # pylint: disable=W0622
				7	from coverage.backward import reversed # pylint: disable=W0622
				8	from coverage.backward import bytes_to_ints
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	9	from coverage.bytecode import ByteCodes, CodeObjects
				10	from coverage.misc import nice_pair, expensive, join_regex
				11	from coverage.misc import CoverageException, NoSource, NotPython
				12
				13
				14	class CodeParser(object):
				15	"""Parse code to find executable lines, excluded lines, etc."""
				16
				17	def __init__(self, text=None, filename=None, exclude=None):
				18	"""
				19	Source can be provided as `text`, the text itself, or `filename`, from
				20	which the text will be read. Excluded lines are those that match
				21	`exclude`, a regex.
				22
				23	"""
				24	assert text or filename, "CodeParser needs either text or filename"
				25	self.filename = filename or "<code>"
				26	self.text = text
				27	if not self.text:
				28	try:
				29	sourcef = open_source(self.filename)
				30	try:
				31	self.text = sourcef.read()
				32	finally:
				33	sourcef.close()
				34	except IOError:
				35	_, err, _ = sys.exc_info()
				36	raise NoSource(
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	37	"No source for code: '%s': %s" % (self.filename, err)
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	38	)
				39
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	40	# Scrap the BOM if it exists.
				41	if self.text and ord(self.text[0]) == 0xfeff:
				42	self.text = self.text[1:]
				43
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	44	self.exclude = exclude
				45
				46	self.show_tokens = False
				47
				48	# The text lines of the parsed code.
				49	self.lines = self.text.split('\n')
				50
				51	# The line numbers of excluded lines of code.
				52	self.excluded = set()
				53
				54	# The line numbers of docstring lines.
				55	self.docstrings = set()
				56
				57	# The line numbers of class definitions.
				58	self.classdefs = set()
				59
				60	# A dict mapping line numbers to (lo,hi) for multi-line statements.
				61	self.multiline = {}
				62
				63	# The line numbers that start statements.
				64	self.statement_starts = set()
				65
				66	# Lazily-created ByteParser
				67	self._byte_parser = None
				68
				69	def _get_byte_parser(self):
				70	"""Create a ByteParser on demand."""
				71	if not self._byte_parser:
				72	self._byte_parser = \
				73	ByteParser(text=self.text, filename=self.filename)
				74	return self._byte_parser
				75	byte_parser = property(_get_byte_parser)
				76
				77	def lines_matching(self, *regexes):
				78	"""Find the lines matching one of a list of regexes.
				79
				80	Returns a set of line numbers, the lines that contain a match for one
				81	of the regexes in `regexes`. The entire line needn't match, just a
				82	part of it.
				83
				84	"""
				85	regex_c = re.compile(join_regex(regexes))
				86	matches = set()
				87	for i, ltext in enumerate(self.lines):
				88	if regex_c.search(ltext):
				89	matches.add(i+1)
				90	return matches
				91
				92	def _raw_parse(self):
				93	"""Parse the source to find the interesting facts about its lines.
				94
				95	A handful of member fields are updated.
				96
				97	"""
				98	# Find lines which match an exclusion pattern.
				99	if self.exclude:
				100	self.excluded = self.lines_matching(self.exclude)
				101
				102	# Tokenize, to find excluded suites, to find docstrings, and to find
				103	# multi-line statements.
				104	indent = 0
				105	exclude_indent = 0
				106	excluding = False
				107	prev_toktype = token.INDENT
				108	first_line = None
				109	empty = True
				110
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	111	tokgen = generate_tokens(self.text)
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	112	for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	113	if self.show_tokens: # pragma: not covered
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	114	print("%10s %5s %-20r %r" % (
				115	tokenize.tok_name.get(toktype, toktype),
				116	nice_pair((slineno, elineno)), ttext, ltext
				117	))
				118	if toktype == token.INDENT:
				119	indent += 1
				120	elif toktype == token.DEDENT:
				121	indent -= 1
				122	elif toktype == token.NAME and ttext == 'class':
				123	# Class definitions look like branches in the byte code, so
				124	# we need to exclude them. The simplest way is to note the
				125	# lines with the 'class' keyword.
				126	self.classdefs.add(slineno)
				127	elif toktype == token.OP and ttext == ':':
				128	if not excluding and elineno in self.excluded:
				129	# Start excluding a suite. We trigger off of the colon
				130	# token so that the #pragma comment will be recognized on
				131	# the same line as the colon.
				132	exclude_indent = indent
				133	excluding = True
				134	elif toktype == token.STRING and prev_toktype == token.INDENT:
				135	# Strings that are first on an indented line are docstrings.
				136	# (a trick from trace.py in the stdlib.) This works for
				137	# 99.9999% of cases. For the rest (!) see:
				138	# http://stackoverflow.com/questions/1769332/x/1769794#1769794
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	139	self.docstrings.update(range(slineno, elineno+1))
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	140	elif toktype == token.NEWLINE:
				141	if first_line is not None and elineno != first_line:
				142	# We're at the end of a line, and we've ended on a
				143	# different line than the first line of the statement,
				144	# so record a multi-line range.
				145	rng = (first_line, elineno)
				146	for l in range(first_line, elineno+1):
				147	self.multiline[l] = rng
				148	first_line = None
				149
				150	if ttext.strip() and toktype != tokenize.COMMENT:
				151	# A non-whitespace token.
				152	empty = False
				153	if first_line is None:
				154	# The token is not whitespace, and is the first in a
				155	# statement.
				156	first_line = slineno
				157	# Check whether to end an excluded suite.
				158	if excluding and indent <= exclude_indent:
				159	excluding = False
				160	if excluding:
				161	self.excluded.add(elineno)
				162
				163	prev_toktype = toktype
				164
				165	# Find the starts of the executable statements.
				166	if not empty:
				167	self.statement_starts.update(self.byte_parser._find_statements())
				168
				169	def first_line(self, line):
				170	"""Return the first line number of the statement including `line`."""
				171	rng = self.multiline.get(line)
				172	if rng:
				173	first_line = rng[0]
				174	else:
				175	first_line = line
				176	return first_line
				177
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	178	def first_lines(self, lines, *ignores):
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	179	"""Map the line numbers in `lines` to the correct first line of the
				180	statement.
				181
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	182	Skip any line mentioned in any of the sequences in `ignores`.
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	183
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	184	Returns a set of the first lines.
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	185
				186	"""
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	187	ignore = set()
				188	for ign in ignores:
				189	ignore.update(ign)
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	190	lset = set()
				191	for l in lines:
				192	if l in ignore:
				193	continue
				194	new_l = self.first_line(l)
				195	if new_l not in ignore:
				196	lset.add(new_l)
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	197	return lset
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	198
				199	def parse_source(self):
				200	"""Parse source text to find executable lines, excluded lines, etc.
				201
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	202	Return values are 1) a set of executable line numbers, and 2) a set of
				203	excluded line numbers.
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	204
				205	Reported line numbers are normalized to the first line of multi-line
				206	statements.
				207
				208	"""
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	209	try:
				210	self._raw_parse()
				211	except (tokenize.TokenError, IndentationError):
				212	_, tokerr, _ = sys.exc_info()
				213	msg, lineno = tokerr.args
				214	raise NotPython(
				215	"Couldn't parse '%s' as Python source: '%s' at %s" %
				216	(self.filename, msg, lineno)
				217	)
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	218
				219	excluded_lines = self.first_lines(self.excluded)
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	220	lines = self.first_lines(
				221	self.statement_starts,
				222	excluded_lines,
				223	self.docstrings
				224	)
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	225
				226	return lines, excluded_lines
				227
				228	def arcs(self):
				229	"""Get information about the arcs available in the code.
				230
				231	Returns a sorted list of line number pairs. Line numbers have been
				232	normalized to the first line of multiline statements.
				233
				234	"""
				235	all_arcs = []
				236	for l1, l2 in self.byte_parser._all_arcs():
				237	fl1 = self.first_line(l1)
				238	fl2 = self.first_line(l2)
				239	if fl1 != fl2:
				240	all_arcs.append((fl1, fl2))
				241	return sorted(all_arcs)
				242	arcs = expensive(arcs)
				243
				244	def exit_counts(self):
				245	"""Get a mapping from line numbers to count of exits from that line.
				246
				247	Excluded lines are excluded.
				248
				249	"""
				250	excluded_lines = self.first_lines(self.excluded)
				251	exit_counts = {}
				252	for l1, l2 in self.arcs():
				253	if l1 < 0:
				254	# Don't ever report -1 as a line number
				255	continue
				256	if l1 in excluded_lines:
				257	# Don't report excluded lines as line numbers.
				258	continue
				259	if l2 in excluded_lines:
				260	# Arcs to excluded lines shouldn't count.
				261	continue
				262	if l1 not in exit_counts:
				263	exit_counts[l1] = 0
				264	exit_counts[l1] += 1
				265
				266	# Class definitions have one extra exit, so remove one for each:
				267	for l in self.classdefs:
				268	# Ensure key is there: classdefs can include excluded lines.
				269	if l in exit_counts:
				270	exit_counts[l] -= 1
				271
				272	return exit_counts
				273	exit_counts = expensive(exit_counts)
				274
				275
				276	## Opcodes that guide the ByteParser.
				277
				278	def _opcode(name):
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	279	"""Return the opcode by name from the dis module."""
				280	return dis.opmap[name]
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	281
				282	def _opcode_set(*names):
				283	"""Return a set of opcodes by the names in `names`."""
				284	s = set()
				285	for name in names:
				286	try:
				287	s.add(_opcode(name))
				288	except KeyError:
				289	pass
				290	return s
				291
				292	# Opcodes that leave the code object.
				293	OPS_CODE_END = _opcode_set('RETURN_VALUE')
				294
				295	# Opcodes that unconditionally end the code chunk.
				296	OPS_CHUNK_END = _opcode_set(
				297	'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE', 'RAISE_VARARGS',
				298	'BREAK_LOOP', 'CONTINUE_LOOP',
				299	)
				300
				301	# Opcodes that unconditionally begin a new code chunk. By starting new chunks
				302	# with unconditional jump instructions, we neatly deal with jumps to jumps
				303	# properly.
				304	OPS_CHUNK_BEGIN = _opcode_set('JUMP_ABSOLUTE', 'JUMP_FORWARD')
				305
				306	# Opcodes that push a block on the block stack.
				307	OPS_PUSH_BLOCK = _opcode_set(
				308	'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY', 'SETUP_WITH'
				309	)
				310
				311	# Block types for exception handling.
				312	OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY')
				313
				314	# Opcodes that pop a block from the block stack.
				315	OPS_POP_BLOCK = _opcode_set('POP_BLOCK')
				316
				317	# Opcodes that have a jump destination, but aren't really a jump.
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	318	OPS_NO_JUMP = OPS_PUSH_BLOCK
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	319
				320	# Individual opcodes we need below.
				321	OP_BREAK_LOOP = _opcode('BREAK_LOOP')
				322	OP_END_FINALLY = _opcode('END_FINALLY')
				323	OP_COMPARE_OP = _opcode('COMPARE_OP')
				324	COMPARE_EXCEPTION = 10 # just have to get this const from the code.
				325	OP_LOAD_CONST = _opcode('LOAD_CONST')
				326	OP_RETURN_VALUE = _opcode('RETURN_VALUE')
				327
				328
				329	class ByteParser(object):
				330	"""Parse byte codes to understand the structure of code."""
				331
				332	def __init__(self, code=None, text=None, filename=None):
				333	if code:
				334	self.code = code
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	335	self.text = text
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	336	else:
				337	if not text:
				338	assert filename, "If no code or text, need a filename"
				339	sourcef = open_source(filename)
				340	try:
				341	text = sourcef.read()
				342	finally:
				343	sourcef.close()
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	344	self.text = text
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	345
				346	try:
				347	# Python 2.3 and 2.4 don't like partial last lines, so be sure
				348	# the text ends nicely for them.
				349	self.code = compile(text + '\n', filename, "exec")
				350	except SyntaxError:
				351	_, synerr, _ = sys.exc_info()
				352	raise NotPython(
				353	"Couldn't parse '%s' as Python source: '%s' at line %d" %
				354	(filename, synerr.msg, synerr.lineno)
				355	)
				356
				357	# Alternative Python implementations don't always provide all the
				358	# attributes on code objects that we need to do the analysis.
				359	for attr in ['co_lnotab', 'co_firstlineno', 'co_consts', 'co_code']:
				360	if not hasattr(self.code, attr):
				361	raise CoverageException(
				362	"This implementation of Python doesn't support code "
				363	"analysis.\n"
				364	"Run coverage.py under CPython for this command."
				365	)
				366
				367	def child_parsers(self):
				368	"""Iterate over all the code objects nested within this one.
				369
				370	The iteration includes `self` as its first value.
				371
				372	"""
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	373	children = CodeObjects(self.code)
				374	return [ByteParser(code=c, text=self.text) for c in children]
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	375
				376	def _bytes_lines(self):
				377	"""Map byte offsets to line numbers in `code`.
				378
				379	Uses co_lnotab described in Python/compile.c to map byte offsets to
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	380	line numbers. Produces a sequence: (b0, l0), (b1, l1), ...
				381
				382	Only byte offsets that correspond to line numbers are included in the
				383	results.
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	384
				385	"""
				386	# Adapted from dis.py in the standard library.
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	387	byte_increments = bytes_to_ints(self.code.co_lnotab[0::2])
				388	line_increments = bytes_to_ints(self.code.co_lnotab[1::2])
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	389
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	390	last_line_num = None
				391	line_num = self.code.co_firstlineno
				392	byte_num = 0
				393	for byte_incr, line_incr in zip(byte_increments, line_increments):
				394	if byte_incr:
				395	if line_num != last_line_num:
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	396	yield (byte_num, line_num)
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	397	last_line_num = line_num
				398	byte_num += byte_incr
				399	line_num += line_incr
				400	if line_num != last_line_num:
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	401	yield (byte_num, line_num)
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	402
				403	def _find_statements(self):
				404	"""Find the statements in `self.code`.
				405
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	406	Produce a sequence of line numbers that start statements. Recurses
				407	into all code objects reachable from `self.code`.
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	408
				409	"""
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	410	for bp in self.child_parsers():
				411	# Get all of the lineno information from this code.
				412	for _, l in bp._bytes_lines():
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	413	yield l
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	414
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	415	def _block_stack_repr(self, block_stack):
				416	"""Get a string version of `block_stack`, for debugging."""
				417	blocks = ", ".join(
				418	["(%s, %r)" % (dis.opname[b[0]], b[1]) for b in block_stack]
				419	)
				420	return "[" + blocks + "]"
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	421
				422	def _split_into_chunks(self):
				423	"""Split the code object into a list of `Chunk` objects.
				424
				425	Each chunk is only entered at its first instruction, though there can
				426	be many exits from a chunk.
				427
				428	Returns a list of `Chunk` objects.
				429
				430	"""
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	431	# The list of chunks so far, and the one we're working on.
				432	chunks = []
				433	chunk = None
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	434
				435	# A dict mapping byte offsets of line starts to the line numbers.
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	436	bytes_lines_map = dict(self._bytes_lines())
				437
				438	# The block stack: loops and try blocks get pushed here for the
				439	# implicit jumps that can occur.
				440	# Each entry is a tuple: (block type, destination)
				441	block_stack = []
				442
				443	# Some op codes are followed by branches that should be ignored. This
				444	# is a count of how many ignores are left.
				445	ignore_branch = 0
				446
				447	# We have to handle the last two bytecodes specially.
				448	ult = penult = None
				449
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	450	# Get a set of all of the jump-to points.
				451	jump_to = set()
				452	bytecodes = list(ByteCodes(self.code.co_code))
				453	for bc in bytecodes:
				454	if bc.jump_to >= 0:
				455	jump_to.add(bc.jump_to)
				456
				457	chunk_lineno = 0
				458
				459	# Walk the byte codes building chunks.
				460	for bc in bytecodes:
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	461	# Maybe have to start a new chunk
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	462	start_new_chunk = False
				463	first_chunk = False
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	464	if bc.offset in bytes_lines_map:
				465	# Start a new chunk for each source line number.
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	466	start_new_chunk = True
				467	chunk_lineno = bytes_lines_map[bc.offset]
				468	first_chunk = True
				469	elif bc.offset in jump_to:
				470	# To make chunks have a single entrance, we have to make a new
				471	# chunk when we get to a place some bytecode jumps to.
				472	start_new_chunk = True
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	473	elif bc.op in OPS_CHUNK_BEGIN:
				474	# Jumps deserve their own unnumbered chunk. This fixes
				475	# problems with jumps to jumps getting confused.
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	476	start_new_chunk = True
				477
				478	if not chunk or start_new_chunk:
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	479	if chunk:
				480	chunk.exits.add(bc.offset)
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	481	chunk = Chunk(bc.offset, chunk_lineno, first_chunk)
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	482	chunks.append(chunk)
				483
				484	# Look at the opcode
				485	if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP:
				486	if ignore_branch:
				487	# Someone earlier wanted us to ignore this branch.
				488	ignore_branch -= 1
				489	else:
				490	# The opcode has a jump, it's an exit for this chunk.
				491	chunk.exits.add(bc.jump_to)
				492
				493	if bc.op in OPS_CODE_END:
				494	# The opcode can exit the code object.
				495	chunk.exits.add(-self.code.co_firstlineno)
				496	if bc.op in OPS_PUSH_BLOCK:
				497	# The opcode adds a block to the block_stack.
				498	block_stack.append((bc.op, bc.jump_to))
				499	if bc.op in OPS_POP_BLOCK:
				500	# The opcode pops a block from the block stack.
				501	block_stack.pop()
				502	if bc.op in OPS_CHUNK_END:
				503	# This opcode forces the end of the chunk.
				504	if bc.op == OP_BREAK_LOOP:
				505	# A break is implicit: jump where the top of the
				506	# block_stack points.
				507	chunk.exits.add(block_stack[-1][1])
				508	chunk = None
				509	if bc.op == OP_END_FINALLY:
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	510	# For the finally clause we need to find the closest exception
				511	# block, and use its jump target as an exit.
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	512	for block in reversed(block_stack):
				513	if block[0] in OPS_EXCEPT_BLOCKS:
				514	chunk.exits.add(block[1])
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	515	break
				516	if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION:
				517	# This is an except clause. We want to overlook the next
				518	# branch, so that except's don't count as branches.
				519	ignore_branch += 1
				520
				521	penult = ult
				522	ult = bc
				523
				524	if chunks:
				525	# The last two bytecodes could be a dummy "return None" that
				526	# shouldn't be counted as real code. Every Python code object seems
				527	# to end with a return, and a "return None" is inserted if there
				528	# isn't an explicit return in the source.
				529	if ult and penult:
				530	if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE:
				531	if self.code.co_consts[penult.arg] is None:
				532	# This is "return None", but is it dummy? A real line
				533	# would be a last chunk all by itself.
				534	if chunks[-1].byte != penult.offset:
				535	ex = -self.code.co_firstlineno
				536	# Split the last chunk
				537	last_chunk = chunks[-1]
				538	last_chunk.exits.remove(ex)
				539	last_chunk.exits.add(penult.offset)
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	540	chunk = Chunk(
				541	penult.offset, last_chunk.line, False
				542	)
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	543	chunk.exits.add(ex)
				544	chunks.append(chunk)
				545
				546	# Give all the chunks a length.
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	547	chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disable=W0631,C0301
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	548	for i in range(len(chunks)-1):
				549	chunks[i].length = chunks[i+1].byte - chunks[i].byte
				550
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	551	#self.validate_chunks(chunks)
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	552	return chunks
				553
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	554	def validate_chunks(self, chunks):
				555	"""Validate the rule that chunks have a single entrance."""
				556	# starts is the entrances to the chunks
				557	starts = set([ch.byte for ch in chunks])
				558	for ch in chunks:
				559	assert all([(ex in starts or ex < 0) for ex in ch.exits])
				560
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	561	def _arcs(self):
				562	"""Find the executable arcs in the code.
				563
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	564	Yields pairs: (from,to). From and to are integer line numbers. If
				565	from is < 0, then the arc is an entrance into the code object. If to
				566	is < 0, the arc is an exit from the code object.
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	567
				568	"""
				569	chunks = self._split_into_chunks()
				570
				571	# A map from byte offsets to chunks jumped into.
				572	byte_chunks = dict([(c.byte, c) for c in chunks])
				573
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	574	# There's always an entrance at the first chunk.
				575	yield (-1, byte_chunks[0].line)
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	576
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	577	# Traverse from the first chunk in each line, and yield arcs where
				578	# the trace function will be invoked.
				579	for chunk in chunks:
				580	if not chunk.first:
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	581	continue
				582
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	583	chunks_considered = set()
				584	chunks_to_consider = [chunk]
				585	while chunks_to_consider:
				586	# Get the chunk we're considering, and make sure we don't
				587	# consider it again
				588	this_chunk = chunks_to_consider.pop()
				589	chunks_considered.add(this_chunk)
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	590
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	591	# For each exit, add the line number if the trace function
				592	# would be triggered, or add the chunk to those being
				593	# considered if not.
				594	for ex in this_chunk.exits:
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	595	if ex < 0:
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	596	yield (chunk.line, ex)
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	597	else:
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	598	next_chunk = byte_chunks[ex]
				599	if next_chunk in chunks_considered:
				600	continue
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	601
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	602	# The trace function is invoked if visiting the first
				603	# bytecode in a line, or if the transition is a
				604	# backward jump.
				605	backward_jump = next_chunk.byte < this_chunk.byte
				606	if next_chunk.first or backward_jump:
				607	if next_chunk.line != chunk.line:
				608	yield (chunk.line, next_chunk.line)
				609	else:
				610	chunks_to_consider.append(next_chunk)
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	611
				612	def _all_chunks(self):
				613	"""Returns a list of `Chunk` objects for this code and its children.
				614
				615	See `_split_into_chunks` for details.
				616
				617	"""
				618	chunks = []
				619	for bp in self.child_parsers():
				620	chunks.extend(bp._split_into_chunks())
				621
				622	return chunks
				623
				624	def _all_arcs(self):
				625	"""Get the set of all arcs in this code object and its children.
				626
				627	See `_arcs` for details.
				628
				629	"""
				630	arcs = set()
				631	for bp in self.child_parsers():
				632	arcs.update(bp._arcs())
				633
				634	return arcs
				635
				636
				637	class Chunk(object):
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	638	"""A sequence of byte codes with a single entrance.
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	639
				640	To analyze byte code, we have to divide it into chunks, sequences of byte
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	641	codes such that each chunk has only one entrance, the first instruction in
				642	the block.
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	643
				644	This is almost the CS concept of `basic block`_, except that we're willing
				645	to have many exits from a chunk, and "basic block" is a more cumbersome
				646	term.
				647
				648	.. _basic block: http://en.wikipedia.org/wiki/Basic_block
				649
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	650	`line` is the source line number containing this chunk.
				651
				652	`first` is true if this is the first chunk in the source line.
				653
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	654	An exit < 0 means the chunk can leave the code (return). The exit is
				655	the negative of the starting line number of the code block.
				656
				657	"""
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	658	def __init__(self, byte, line, first):
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	659	self.byte = byte
				660	self.line = line
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	661	self.first = first
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	662	self.length = 0
				663	self.exits = set()
				664
				665	def __repr__(self):
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	666	if self.first:
				667	bang = "!"
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	668	else:
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	669	bang = ""
				670	return "<%d+%d @%d%s %r>" % (
				671	self.byte, self.length, self.line, bang, list(self.exits)
				672	)
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	673
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	674
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	675	class CachedTokenizer(object):
				676	"""A one-element cache around tokenize.generate_tokens.
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	677
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	678	When reporting, coverage.py tokenizes files twice, once to find the
				679	structure of the file, and once to syntax-color it. Tokenizing is
				680	expensive, and easily cached.
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	681
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	682	This is a one-element cache so that our twice-in-a-row tokenizing doesn't
				683	actually tokenize twice.
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	684
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	685	"""
				686	def __init__(self):
				687	self.last_text = None
				688	self.last_tokens = None
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	689
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	690	def generate_tokens(self, text):
				691	"""A stand-in for `tokenize.generate_tokens`."""
				692	if text != self.last_text:
				693	self.last_text = text
				694	self.last_tokens = list(
				695	tokenize.generate_tokens(StringIO(text).readline)
				696	)
				697	return self.last_tokens
Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame]	698
Tor Norbye	2e5965e	2014-07-25 12:24:15 -0700	[diff] [blame^]	699	# Create our generate_tokens cache as a callable replacement function.
				700	generate_tokens = CachedTokenizer().generate_tokens