tools/deadlock_detector.py - platform/external/bcc - Gitiles

 #!/usr/bin/env python
 #
 # deadlock_detector  Detects potential deadlocks (lock order inversions)
 #                    on a running process. For Linux, uses BCC, eBPF.
 #
 # USAGE: deadlock_detector.py [-h] [--binary BINARY] [--dump-graph DUMP_GRAPH]
 #                             [--verbose] [--lock-symbols LOCK_SYMBOLS]
 #                             [--unlock-symbols UNLOCK_SYMBOLS]
 #                             pid
 #
 # This traces pthread mutex lock and unlock calls to build a directed graph
 # representing the mutex wait graph:
 #
 # - Nodes in the graph represent mutexes.
 # - Edge (A, B) exists if there exists some thread T where lock(A) was called
 #   and lock(B) was called before unlock(A) was called.
 #
 # If the program finds a potential lock order inversion, the program will dump
 # the cycle of mutexes and the stack traces where each mutex was acquired, and
 # then exit.
 #
 # This program can only find potential deadlocks that occur while the program
 # is tracing the process. It cannot find deadlocks that may have occurred
 # before the program was attached to the process.
 #
 # Since this traces all mutex lock and unlock events and all thread creation
 # events on the traced process, the overhead of this bpf program can be very
 # high if the process has many threads and mutexes. You should only run this on
 # a process where the slowdown is acceptable.
 #
 # Note: This tool does not work for shared mutexes or recursive mutexes.
 #
 # For shared (read-write) mutexes, a deadlock requires a cycle in the wait
 # graph where at least one of the mutexes in the cycle is acquiring exclusive
 # (write) ownership.
 #
 # For recursive mutexes, lock() is called multiple times on the same mutex.
 # However, there is no way to determine if a mutex is a recursive mutex
 # after the mutex has been created. As a result, this tool will not find
 # potential deadlocks that involve only one mutex.
 #
 # Copyright 2017 Facebook, Inc.
 # Licensed under the Apache License, Version 2.0 (the "License")
 #
 # 01-Feb-2017   Kenny Yu   Created this.

 from __future__ import (
     absolute_import, division, unicode_literals, print_function
 )
 from bcc import BPF
 from collections import defaultdict
 import argparse
 import json
 import os
 import subprocess
 import sys
 import time


 class DiGraph(object):
     '''
     Adapted from networkx: http://networkx.github.io/
     Represents a directed graph. Edges can store (key, value) attributes.
     '''

     def __init__(self):
         # Map of node -> set of nodes
         self.adjacency_map = {}
         # Map of (node1, node2) -> map string -> arbitrary attribute
         # This will not be copied in subgraph()
         self.attributes_map = {}

     def neighbors(self, node):
         return self.adjacency_map.get(node, set())

     def edges(self):
         edges = []
         for node, neighbors in self.adjacency_map.items():
             for neighbor in neighbors:
                 edges.append((node, neighbor))
         return edges

     def nodes(self):
         return self.adjacency_map.keys()

     def attributes(self, node1, node2):
         return self.attributes_map[(node1, node2)]

     def add_edge(self, node1, node2, **kwargs):
         if node1 not in self.adjacency_map:
             self.adjacency_map[node1] = set()
         if node2 not in self.adjacency_map:
             self.adjacency_map[node2] = set()
         self.adjacency_map[node1].add(node2)
         self.attributes_map[(node1, node2)] = kwargs

     def remove_node(self, node):
         self.adjacency_map.pop(node, None)
         for _, neighbors in self.adjacency_map.items():
             neighbors.discard(node)

     def subgraph(self, nodes):
         graph = DiGraph()
         for node in nodes:
             for neighbor in self.neighbors(node):
                 if neighbor in nodes:
                     graph.add_edge(node, neighbor)
         return graph

     def node_link_data(self):
         '''
         Returns the graph as a dictionary in a format that can be
         serialized.
         '''
         data = {
             'directed': True,
             'multigraph': False,
             'graph': {},
             'links': [],
             'nodes': [],
         }

         # Do one pass to build a map of node -> position in nodes
         node_to_number = {}
         for node in self.adjacency_map.keys():
             node_to_number[node] = len(data['nodes'])
             data['nodes'].append({'id': node})

         # Do another pass to build the link information
         for node, neighbors in self.adjacency_map.items():
             for neighbor in neighbors:
                 link = self.attributes_map[(node, neighbor)].copy()
                 link['source'] = node_to_number[node]
                 link['target'] = node_to_number[neighbor]
                 data['links'].append(link)
         return data


 def strongly_connected_components(G):
     '''
     Adapted from networkx: http://networkx.github.io/
     Parameters
     ----------
     G : DiGraph
     Returns
     -------
     comp : generator of sets
         A generator of sets of nodes, one for each strongly connected
         component of G.
     '''
     preorder = {}
     lowlink = {}
     scc_found = {}
     scc_queue = []
     i = 0  # Preorder counter
     for source in G.nodes():
         if source not in scc_found:
             queue = [source]
             while queue:
                 v = queue[-1]
                 if v not in preorder:
                     i = i + 1
                     preorder[v] = i
                 done = 1
                 v_nbrs = G.neighbors(v)
                 for w in v_nbrs:
                     if w not in preorder:
                         queue.append(w)
                         done = 0
                         break
                 if done == 1:
                     lowlink[v] = preorder[v]
                     for w in v_nbrs:
                         if w not in scc_found:
                             if preorder[w] > preorder[v]:
                                 lowlink[v] = min([lowlink[v], lowlink[w]])
                             else:
                                 lowlink[v] = min([lowlink[v], preorder[w]])
                     queue.pop()
                     if lowlink[v] == preorder[v]:
                         scc_found[v] = True
                         scc = {v}
                         while (
                             scc_queue and preorder[scc_queue[-1]] > preorder[v]
                         ):
                             k = scc_queue.pop()
                             scc_found[k] = True
                             scc.add(k)
                         yield scc
                     else:
                         scc_queue.append(v)


 def simple_cycles(G):
     '''
     Adapted from networkx: http://networkx.github.io/
     Parameters
     ----------
     G : DiGraph
     Returns
     -------
     cycle_generator: generator
        A generator that produces elementary cycles of the graph.
        Each cycle is represented by a list of nodes along the cycle.
     '''

     def _unblock(thisnode, blocked, B):
         stack = set([thisnode])
         while stack:
             node = stack.pop()
             if node in blocked:
                 blocked.remove(node)
                 stack.update(B[node])
                 B[node].clear()

     # Johnson's algorithm requires some ordering of the nodes.
     # We assign the arbitrary ordering given by the strongly connected comps
     # There is no need to track the ordering as each node removed as processed.
     # save the actual graph so we can mutate it here
     # We only take the edges because we do not want to
     # copy edge and node attributes here.
     subG = G.subgraph(G.nodes())
     sccs = list(strongly_connected_components(subG))
     while sccs:
         scc = sccs.pop()
         # order of scc determines ordering of nodes
         startnode = scc.pop()
         # Processing node runs 'circuit' routine from recursive version
         path = [startnode]
         blocked = set()  # vertex: blocked from search?
         closed = set()  # nodes involved in a cycle
         blocked.add(startnode)
         B = defaultdict(set)  # graph portions that yield no elementary circuit
         stack = [(startnode, list(subG.neighbors(startnode)))]
         while stack:
             thisnode, nbrs = stack[-1]
             if nbrs:
                 nextnode = nbrs.pop()
                 if nextnode == startnode:
                     yield path[:]
                     closed.update(path)
                 elif nextnode not in blocked:
                     path.append(nextnode)
                     stack.append((nextnode, list(subG.neighbors(nextnode))))
                     closed.discard(nextnode)
                     blocked.add(nextnode)
                     continue
             # done with nextnode... look for more neighbors
             if not nbrs:  # no more nbrs
                 if thisnode in closed:
                     _unblock(thisnode, blocked, B)
                 else:
                     for nbr in subG.neighbors(thisnode):
                         if thisnode not in B[nbr]:
                             B[nbr].add(thisnode)
                 stack.pop()
                 path.pop()
         # done processing this node
         subG.remove_node(startnode)
         H = subG.subgraph(scc)  # make smaller to avoid work in SCC routine
         sccs.extend(list(strongly_connected_components(H)))


 def find_cycle(graph):
     '''
     Looks for a cycle in the graph. If found, returns the first cycle.
     If nodes a1, a2, ..., an are in a cycle, then this returns:
         [(a1,a2), (a2,a3), ... (an-1,an), (an, a1)]
     Otherwise returns an empty list.
     '''
     cycles = list(simple_cycles(graph))
     if cycles:
         nodes = cycles[0]
         nodes.append(nodes[0])
         edges = []
         prev = nodes[0]
         for node in nodes[1:]:
             edges.append((prev, node))
             prev = node
         return edges
     else:
         return []


 def print_cycle(binary, graph, edges, thread_info, print_stack_trace_fn):
     '''
     Prints the cycle in the mutex graph in the following format:

     Potential Deadlock Detected!

     Cycle in lock order graph: M0 => M1 => M2 => M0

     for (m, n) in cycle:
         Mutex n acquired here while holding Mutex m in thread T:
             [ stack trace ]

         Mutex m previously acquired by thread T here:
             [ stack trace ]

     for T in all threads:
         Thread T was created here:
             [ stack trace ]
     '''

     # List of mutexes in the cycle, first and last repeated
     nodes_in_order = []
     # Map mutex address -> readable alias
     node_addr_to_name = {}
     for counter, (m, n) in enumerate(edges):
         nodes_in_order.append(m)
         # For global or static variables, try to symbolize the mutex address.
         symbol = symbolize_with_objdump(binary, m)
         if symbol:
             symbol += ' '
         node_addr_to_name[m] = 'Mutex M%d (%s0x%016x)' % (counter, symbol, m)
     nodes_in_order.append(nodes_in_order[0])

     print('----------------\nPotential Deadlock Detected!\n')
     print(
         'Cycle in lock order graph: %s\n' %
         (' => '.join([node_addr_to_name[n] for n in nodes_in_order]))
     )

     # Set of threads involved in the lock inversion
     thread_pids = set()

     # For each edge in the cycle, print where the two mutexes were held
     for (m, n) in edges:
         thread_pid = graph.attributes(m, n)['thread_pid']
         thread_comm = graph.attributes(m, n)['thread_comm']
         first_mutex_stack_id = graph.attributes(m, n)['first_mutex_stack_id']
         second_mutex_stack_id = graph.attributes(m, n)['second_mutex_stack_id']
         thread_pids.add(thread_pid)
         print(
             '%s acquired here while holding %s in Thread %d (%s):' % (
                 node_addr_to_name[n], node_addr_to_name[m], thread_pid,
                 thread_comm
             )
         )
         print_stack_trace_fn(second_mutex_stack_id)
         print('')
         print(
             '%s previously acquired by the same Thread %d (%s) here:' %
             (node_addr_to_name[m], thread_pid, thread_comm)
         )
         print_stack_trace_fn(first_mutex_stack_id)
         print('')

     # Print where the threads were created, if available
     for thread_pid in thread_pids:
         parent_pid, stack_id, parent_comm = thread_info.get(
             thread_pid, (None, None, None)
         )
         if parent_pid:
             print(
                 'Thread %d created by Thread %d (%s) here: ' %
                 (thread_pid, parent_pid, parent_comm)
             )
             print_stack_trace_fn(stack_id)
         else:
             print(
                 'Could not find stack trace where Thread %d was created' %
                 thread_pid
             )
         print('')


 def symbolize_with_objdump(binary, addr):
     '''
     Searches the binary for the address using objdump. Returns the symbol if
     it is found, otherwise returns empty string.
     '''
     try:
         command = (
             'objdump -tT %s | grep %x | awk {\'print $NF\'} | c++filt' %
             (binary, addr)
         )
         output = subprocess.check_output(command, shell=True)
         return output.decode('utf-8').strip()
     except subprocess.CalledProcessError:
         return ''


 def strlist(s):
     '''Given a comma-separated string, returns a list of substrings'''
     return s.strip().split(',')


 def main():
     examples = '''Examples:
     deadlock_detector 181        # Analyze PID 181

     deadlock_detector 181 --binary /lib/x86_64-linux-gnu/libpthread.so.0
                                  # Analyze PID 181 and locks from this binary.
                                  # If tracing a process that is running from
                                  # a dynamically-linked binary, this argument
                                  # is required and should be the path to the
                                  # pthread library.

     deadlock_detector 181 --verbose
                                  # Analyze PID 181 and print statistics about
                                  # the mutex wait graph.

     deadlock_detector 181 --lock-symbols my_mutex_lock1,my_mutex_lock2 \\
         --unlock-symbols my_mutex_unlock1,my_mutex_unlock2
                                  # Analyze PID 181 and trace custom mutex
                                  # symbols instead of pthread mutexes.

     deadlock_detector 181 --dump-graph graph.json
                                  # Analyze PID 181 and dump the mutex wait
                                  # graph to graph.json.
     '''
     parser = argparse.ArgumentParser(
         description=(
             'Detect potential deadlocks (lock inversions) in a running binary.'
             '\nMust be run as root.'
         ),
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog=examples,
     )
     parser.add_argument('pid', type=int, help='Pid to trace')
     # Binaries with `:` in the path will fail to attach uprobes on kernels
     # running without this patch: https://lkml.org/lkml/2017/1/13/585.
     # Symlinks to the binary without `:` in the path can get around this issue.
     parser.add_argument(
         '--binary',
         type=str,
         default='',
         help='If set, trace the mutexes from the binary at this path. '
         'For statically-linked binaries, this argument is not required. '
         'For dynamically-linked binaries, this argument is required and '
         'should be the path of the pthread library the binary is using. '
         'Example: /lib/x86_64-linux-gnu/libpthread.so.0',
     )
     parser.add_argument(
         '--dump-graph',
         type=str,
         default='',
         help='If set, this will dump the mutex graph to the specified file.',
     )
     parser.add_argument(
         '--verbose',
         action='store_true',
         help='Print statistics about the mutex wait graph.',
     )
     parser.add_argument(
         '--lock-symbols',
         type=strlist,
         default=['pthread_mutex_lock'],
         help='Comma-separated list of lock symbols to trace. Default is '
         'pthread_mutex_lock. These symbols cannot be inlined in the binary.',
     )
     parser.add_argument(
         '--unlock-symbols',
         type=strlist,
         default=['pthread_mutex_unlock'],
         help='Comma-separated list of unlock symbols to trace. Default is '
         'pthread_mutex_unlock. These symbols cannot be inlined in the binary.',
     )
     args = parser.parse_args()
     if not args.binary:
         try:
             args.binary = os.readlink('/proc/%d/exe' % args.pid)
         except OSError as e:
             print('%s. Is the process (pid=%d) running?' % (str(e), args.pid))
             sys.exit(1)

     bpf = BPF(src_file='deadlock_detector.c')

     # Trace where threads are created
     bpf.attach_kretprobe(event='sys_clone', fn_name='trace_clone')

     # We must trace unlock first, otherwise in the time we attached the probe
     # on lock() and have not yet attached the probe on unlock(), a thread can
     # acquire mutexes and release them, but the release events will not be
     # traced, resulting in noisy reports.
     for symbol in args.unlock_symbols:
         try:
             bpf.attach_uprobe(
                 name=args.binary,
                 sym=symbol,
                 fn_name='trace_mutex_release',
                 pid=args.pid,
             )
         except Exception as e:
             print('%s. Failed to attach to symbol: %s' % (str(e), symbol))
             sys.exit(1)
     for symbol in args.lock_symbols:
         try:
             bpf.attach_uprobe(
                 name=args.binary,
                 sym=symbol,
                 fn_name='trace_mutex_acquire',
                 pid=args.pid,
             )
         except Exception as e:
             print('%s. Failed to attach to symbol: %s' % (str(e), symbol))
             sys.exit(1)

     def print_stack_trace(stack_id):
         '''Closure that prints the symbolized stack trace.'''
         for addr in bpf.get_table('stack_traces').walk(stack_id):
             line = bpf.sym(addr, args.pid)
             # Try to symbolize with objdump if we cannot with bpf.
             if line == '[unknown]':
                 symbol = symbolize_with_objdump(args.binary, addr)
                 if symbol:
                     line = symbol
             print('@ %016x %s' % (addr, line))

     print('Tracing... Hit Ctrl-C to end.')
     while True:
         try:
             # Map of child thread pid -> parent info
             thread_info = {
                 child.value: (parent.parent_pid, parent.stack_id, parent.comm)
                 for child, parent in bpf.get_table('thread_to_parent').items()
             }

             # Mutex wait directed graph. Nodes are mutexes. Edge (A,B) exists
             # if there exists some thread T where lock(A) was called and
             # lock(B) was called before unlock(A) was called.
             graph = DiGraph()
             for key, leaf in bpf.get_table('edges').items():
                 graph.add_edge(
                     key.mutex1,
                     key.mutex2,
                     thread_pid=leaf.thread_pid,
                     thread_comm=leaf.comm.decode('utf-8'),
                     first_mutex_stack_id=leaf.mutex1_stack_id,
                     second_mutex_stack_id=leaf.mutex2_stack_id,
                 )
             if args.verbose:
                 print(
                     'Mutexes: %d, Edges: %d' %
                     (len(graph.nodes()), len(graph.edges()))
                 )
             if args.dump_graph:
                 with open(args.dump_graph, 'w') as f:
                     data = graph.node_link_data()
                     f.write(json.dumps(data, indent=2))

             cycle = find_cycle(graph)
             if cycle:
                 print_cycle(
                     args.binary, graph, cycle, thread_info, print_stack_trace
                 )
                 sys.exit(1)

             time.sleep(1)
         except KeyboardInterrupt:
             break


 if __name__ == '__main__':
     main()
	#!/usr/bin/env python
	#
	# deadlock_detector Detects potential deadlocks (lock order inversions)
	# on a running process. For Linux, uses BCC, eBPF.
	#
	# USAGE: deadlock_detector.py [-h] [--binary BINARY] [--dump-graph DUMP_GRAPH]
	# [--verbose] [--lock-symbols LOCK_SYMBOLS]
	# [--unlock-symbols UNLOCK_SYMBOLS]
	# pid
	#
	# This traces pthread mutex lock and unlock calls to build a directed graph
	# representing the mutex wait graph:
	#
	# - Nodes in the graph represent mutexes.
	# - Edge (A, B) exists if there exists some thread T where lock(A) was called
	# and lock(B) was called before unlock(A) was called.
	#
	# If the program finds a potential lock order inversion, the program will dump
	# the cycle of mutexes and the stack traces where each mutex was acquired, and
	# then exit.
	#
	# This program can only find potential deadlocks that occur while the program
	# is tracing the process. It cannot find deadlocks that may have occurred
	# before the program was attached to the process.
	#
	# Since this traces all mutex lock and unlock events and all thread creation
	# events on the traced process, the overhead of this bpf program can be very
	# high if the process has many threads and mutexes. You should only run this on
	# a process where the slowdown is acceptable.
	#
	# Note: This tool does not work for shared mutexes or recursive mutexes.
	#
	# For shared (read-write) mutexes, a deadlock requires a cycle in the wait
	# graph where at least one of the mutexes in the cycle is acquiring exclusive
	# (write) ownership.
	#
	# For recursive mutexes, lock() is called multiple times on the same mutex.
	# However, there is no way to determine if a mutex is a recursive mutex
	# after the mutex has been created. As a result, this tool will not find
	# potential deadlocks that involve only one mutex.
	#
	# Copyright 2017 Facebook, Inc.
	# Licensed under the Apache License, Version 2.0 (the "License")
	#
	# 01-Feb-2017 Kenny Yu Created this.

	from __future__ import (
	absolute_import, division, unicode_literals, print_function
	)
	from bcc import BPF
	from collections import defaultdict
	import argparse
	import json
	import os
	import subprocess
	import sys
	import time


	class DiGraph(object):
	'''
	Adapted from networkx: http://networkx.github.io/
	Represents a directed graph. Edges can store (key, value) attributes.
	'''

	def __init__(self):
	# Map of node -> set of nodes
	self.adjacency_map = {}
	# Map of (node1, node2) -> map string -> arbitrary attribute
	# This will not be copied in subgraph()
	self.attributes_map = {}

	def neighbors(self, node):
	return self.adjacency_map.get(node, set())

	def edges(self):
	edges = []
	for node, neighbors in self.adjacency_map.items():
	for neighbor in neighbors:
	edges.append((node, neighbor))
	return edges

	def nodes(self):
	return self.adjacency_map.keys()

	def attributes(self, node1, node2):
	return self.attributes_map[(node1, node2)]

	def add_edge(self, node1, node2, **kwargs):
	if node1 not in self.adjacency_map:
	self.adjacency_map[node1] = set()
	if node2 not in self.adjacency_map:
	self.adjacency_map[node2] = set()
	self.adjacency_map[node1].add(node2)
	self.attributes_map[(node1, node2)] = kwargs

	def remove_node(self, node):
	self.adjacency_map.pop(node, None)
	for _, neighbors in self.adjacency_map.items():
	neighbors.discard(node)

	def subgraph(self, nodes):
	graph = DiGraph()
	for node in nodes:
	for neighbor in self.neighbors(node):
	if neighbor in nodes:
	graph.add_edge(node, neighbor)
	return graph

	def node_link_data(self):
	'''
	Returns the graph as a dictionary in a format that can be
	serialized.
	'''
	data = {
	'directed': True,
	'multigraph': False,
	'graph': {},
	'links': [],
	'nodes': [],
	}

	# Do one pass to build a map of node -> position in nodes
	node_to_number = {}
	for node in self.adjacency_map.keys():
	node_to_number[node] = len(data['nodes'])
	data['nodes'].append({'id': node})

	# Do another pass to build the link information
	for node, neighbors in self.adjacency_map.items():
	for neighbor in neighbors:
	link = self.attributes_map[(node, neighbor)].copy()
	link['source'] = node_to_number[node]
	link['target'] = node_to_number[neighbor]
	data['links'].append(link)
	return data


	def strongly_connected_components(G):
	'''
	Adapted from networkx: http://networkx.github.io/
	Parameters
	----------
	G : DiGraph
	Returns
	-------
	comp : generator of sets
	A generator of sets of nodes, one for each strongly connected
	component of G.
	'''
	preorder = {}
	lowlink = {}
	scc_found = {}
	scc_queue = []
	i = 0 # Preorder counter
	for source in G.nodes():
	if source not in scc_found:
	queue = [source]
	while queue:
	v = queue[-1]
	if v not in preorder:
	i = i + 1
	preorder[v] = i
	done = 1
	v_nbrs = G.neighbors(v)
	for w in v_nbrs:
	if w not in preorder:
	queue.append(w)
	done = 0
	break
	if done == 1:
	lowlink[v] = preorder[v]
	for w in v_nbrs:
	if w not in scc_found:
	if preorder[w] > preorder[v]:
	lowlink[v] = min([lowlink[v], lowlink[w]])
	else:
	lowlink[v] = min([lowlink[v], preorder[w]])
	queue.pop()
	if lowlink[v] == preorder[v]:
	scc_found[v] = True
	scc = {v}
	while (
	scc_queue and preorder[scc_queue[-1]] > preorder[v]
	):
	k = scc_queue.pop()
	scc_found[k] = True
	scc.add(k)
	yield scc
	else:
	scc_queue.append(v)


	def simple_cycles(G):
	'''
	Adapted from networkx: http://networkx.github.io/
	Parameters
	----------
	G : DiGraph
	Returns
	-------
	cycle_generator: generator
	A generator that produces elementary cycles of the graph.
	Each cycle is represented by a list of nodes along the cycle.
	'''

	def _unblock(thisnode, blocked, B):
	stack = set([thisnode])
	while stack:
	node = stack.pop()
	if node in blocked:
	blocked.remove(node)
	stack.update(B[node])
	B[node].clear()

	# Johnson's algorithm requires some ordering of the nodes.
	# We assign the arbitrary ordering given by the strongly connected comps
	# There is no need to track the ordering as each node removed as processed.
	# save the actual graph so we can mutate it here
	# We only take the edges because we do not want to
	# copy edge and node attributes here.
	subG = G.subgraph(G.nodes())
	sccs = list(strongly_connected_components(subG))
	while sccs:
	scc = sccs.pop()
	# order of scc determines ordering of nodes
	startnode = scc.pop()
	# Processing node runs 'circuit' routine from recursive version
	path = [startnode]
	blocked = set() # vertex: blocked from search?
	closed = set() # nodes involved in a cycle
	blocked.add(startnode)
	B = defaultdict(set) # graph portions that yield no elementary circuit
	stack = [(startnode, list(subG.neighbors(startnode)))]
	while stack:
	thisnode, nbrs = stack[-1]
	if nbrs:
	nextnode = nbrs.pop()
	if nextnode == startnode:
	yield path[:]
	closed.update(path)
	elif nextnode not in blocked:
	path.append(nextnode)
	stack.append((nextnode, list(subG.neighbors(nextnode))))
	closed.discard(nextnode)
	blocked.add(nextnode)
	continue
	# done with nextnode... look for more neighbors
	if not nbrs: # no more nbrs
	if thisnode in closed:
	_unblock(thisnode, blocked, B)
	else:
	for nbr in subG.neighbors(thisnode):
	if thisnode not in B[nbr]:
	B[nbr].add(thisnode)
	stack.pop()
	path.pop()
	# done processing this node
	subG.remove_node(startnode)
	H = subG.subgraph(scc) # make smaller to avoid work in SCC routine
	sccs.extend(list(strongly_connected_components(H)))


	def find_cycle(graph):
	'''
	Looks for a cycle in the graph. If found, returns the first cycle.
	If nodes a1, a2, ..., an are in a cycle, then this returns:
	[(a1,a2), (a2,a3), ... (an-1,an), (an, a1)]
	Otherwise returns an empty list.
	'''
	cycles = list(simple_cycles(graph))
	if cycles:
	nodes = cycles[0]
	nodes.append(nodes[0])
	edges = []
	prev = nodes[0]
	for node in nodes[1:]:
	edges.append((prev, node))
	prev = node
	return edges
	else:
	return []


	def print_cycle(binary, graph, edges, thread_info, print_stack_trace_fn):
	'''
	Prints the cycle in the mutex graph in the following format:

	Potential Deadlock Detected!

	Cycle in lock order graph: M0 => M1 => M2 => M0

	for (m, n) in cycle:
	Mutex n acquired here while holding Mutex m in thread T:
	[ stack trace ]

	Mutex m previously acquired by thread T here:
	[ stack trace ]

	for T in all threads:
	Thread T was created here:
	[ stack trace ]
	'''

	# List of mutexes in the cycle, first and last repeated
	nodes_in_order = []
	# Map mutex address -> readable alias
	node_addr_to_name = {}
	for counter, (m, n) in enumerate(edges):
	nodes_in_order.append(m)
	# For global or static variables, try to symbolize the mutex address.
	symbol = symbolize_with_objdump(binary, m)
	if symbol:
	symbol += ' '
	node_addr_to_name[m] = 'Mutex M%d (%s0x%016x)' % (counter, symbol, m)
	nodes_in_order.append(nodes_in_order[0])

	print('----------------\nPotential Deadlock Detected!\n')
	print(
	'Cycle in lock order graph: %s\n' %
	(' => '.join([node_addr_to_name[n] for n in nodes_in_order]))
	)

	# Set of threads involved in the lock inversion
	thread_pids = set()

	# For each edge in the cycle, print where the two mutexes were held
	for (m, n) in edges:
	thread_pid = graph.attributes(m, n)['thread_pid']
	thread_comm = graph.attributes(m, n)['thread_comm']
	first_mutex_stack_id = graph.attributes(m, n)['first_mutex_stack_id']
	second_mutex_stack_id = graph.attributes(m, n)['second_mutex_stack_id']
	thread_pids.add(thread_pid)
	print(
	'%s acquired here while holding %s in Thread %d (%s):' % (
	node_addr_to_name[n], node_addr_to_name[m], thread_pid,
	thread_comm
	)
	)
	print_stack_trace_fn(second_mutex_stack_id)
	print('')
	print(
	'%s previously acquired by the same Thread %d (%s) here:' %
	(node_addr_to_name[m], thread_pid, thread_comm)
	)
	print_stack_trace_fn(first_mutex_stack_id)
	print('')

	# Print where the threads were created, if available
	for thread_pid in thread_pids:
	parent_pid, stack_id, parent_comm = thread_info.get(
	thread_pid, (None, None, None)
	)
	if parent_pid:
	print(
	'Thread %d created by Thread %d (%s) here: ' %
	(thread_pid, parent_pid, parent_comm)
	)
	print_stack_trace_fn(stack_id)
	else:
	print(
	'Could not find stack trace where Thread %d was created' %
	thread_pid
	)
	print('')


	def symbolize_with_objdump(binary, addr):
	'''
	Searches the binary for the address using objdump. Returns the symbol if
	it is found, otherwise returns empty string.
	'''
	try:
	command = (
	'objdump -tT %s \| grep %x \| awk {\'print $NF\'} \| c++filt' %
	(binary, addr)
	)
	output = subprocess.check_output(command, shell=True)
	return output.decode('utf-8').strip()
	except subprocess.CalledProcessError:
	return ''


	def strlist(s):
	'''Given a comma-separated string, returns a list of substrings'''
	return s.strip().split(',')


	def main():
	examples = '''Examples:
	deadlock_detector 181 # Analyze PID 181

	deadlock_detector 181 --binary /lib/x86_64-linux-gnu/libpthread.so.0
	# Analyze PID 181 and locks from this binary.
	# If tracing a process that is running from
	# a dynamically-linked binary, this argument
	# is required and should be the path to the
	# pthread library.

	deadlock_detector 181 --verbose
	# Analyze PID 181 and print statistics about
	# the mutex wait graph.

	deadlock_detector 181 --lock-symbols my_mutex_lock1,my_mutex_lock2 \\
	--unlock-symbols my_mutex_unlock1,my_mutex_unlock2
	# Analyze PID 181 and trace custom mutex
	# symbols instead of pthread mutexes.

	deadlock_detector 181 --dump-graph graph.json
	# Analyze PID 181 and dump the mutex wait
	# graph to graph.json.
	'''
	parser = argparse.ArgumentParser(
	description=(
	'Detect potential deadlocks (lock inversions) in a running binary.'
	'\nMust be run as root.'
	),
	formatter_class=argparse.RawDescriptionHelpFormatter,
	epilog=examples,
	)
	parser.add_argument('pid', type=int, help='Pid to trace')
	# Binaries with `:` in the path will fail to attach uprobes on kernels
	# running without this patch: https://lkml.org/lkml/2017/1/13/585.
	# Symlinks to the binary without `:` in the path can get around this issue.
	parser.add_argument(
	'--binary',
	type=str,
	default='',
	help='If set, trace the mutexes from the binary at this path. '
	'For statically-linked binaries, this argument is not required. '
	'For dynamically-linked binaries, this argument is required and '
	'should be the path of the pthread library the binary is using. '
	'Example: /lib/x86_64-linux-gnu/libpthread.so.0',
	)
	parser.add_argument(
	'--dump-graph',
	type=str,
	default='',
	help='If set, this will dump the mutex graph to the specified file.',
	)
	parser.add_argument(
	'--verbose',
	action='store_true',
	help='Print statistics about the mutex wait graph.',
	)
	parser.add_argument(
	'--lock-symbols',
	type=strlist,
	default=['pthread_mutex_lock'],
	help='Comma-separated list of lock symbols to trace. Default is '
	'pthread_mutex_lock. These symbols cannot be inlined in the binary.',
	)
	parser.add_argument(
	'--unlock-symbols',
	type=strlist,
	default=['pthread_mutex_unlock'],
	help='Comma-separated list of unlock symbols to trace. Default is '
	'pthread_mutex_unlock. These symbols cannot be inlined in the binary.',
	)
	args = parser.parse_args()
	if not args.binary:
	try:
	args.binary = os.readlink('/proc/%d/exe' % args.pid)
	except OSError as e:
	print('%s. Is the process (pid=%d) running?' % (str(e), args.pid))
	sys.exit(1)

	bpf = BPF(src_file='deadlock_detector.c')

	# Trace where threads are created
	bpf.attach_kretprobe(event='sys_clone', fn_name='trace_clone')

	# We must trace unlock first, otherwise in the time we attached the probe
	# on lock() and have not yet attached the probe on unlock(), a thread can
	# acquire mutexes and release them, but the release events will not be
	# traced, resulting in noisy reports.
	for symbol in args.unlock_symbols:
	try:
	bpf.attach_uprobe(
	name=args.binary,
	sym=symbol,
	fn_name='trace_mutex_release',
	pid=args.pid,
	)
	except Exception as e:
	print('%s. Failed to attach to symbol: %s' % (str(e), symbol))
	sys.exit(1)
	for symbol in args.lock_symbols:
	try:
	bpf.attach_uprobe(
	name=args.binary,
	sym=symbol,
	fn_name='trace_mutex_acquire',
	pid=args.pid,
	)
	except Exception as e:
	print('%s. Failed to attach to symbol: %s' % (str(e), symbol))
	sys.exit(1)

	def print_stack_trace(stack_id):
	'''Closure that prints the symbolized stack trace.'''
	for addr in bpf.get_table('stack_traces').walk(stack_id):
	line = bpf.sym(addr, args.pid)
	# Try to symbolize with objdump if we cannot with bpf.
	if line == '[unknown]':
	symbol = symbolize_with_objdump(args.binary, addr)
	if symbol:
	line = symbol
	print('@ %016x %s' % (addr, line))

	print('Tracing... Hit Ctrl-C to end.')
	while True:
	try:
	# Map of child thread pid -> parent info
	thread_info = {
	child.value: (parent.parent_pid, parent.stack_id, parent.comm)
	for child, parent in bpf.get_table('thread_to_parent').items()
	}

	# Mutex wait directed graph. Nodes are mutexes. Edge (A,B) exists
	# if there exists some thread T where lock(A) was called and
	# lock(B) was called before unlock(A) was called.
	graph = DiGraph()
	for key, leaf in bpf.get_table('edges').items():
	graph.add_edge(
	key.mutex1,
	key.mutex2,
	thread_pid=leaf.thread_pid,
	thread_comm=leaf.comm.decode('utf-8'),
	first_mutex_stack_id=leaf.mutex1_stack_id,
	second_mutex_stack_id=leaf.mutex2_stack_id,
	)
	if args.verbose:
	print(
	'Mutexes: %d, Edges: %d' %
	(len(graph.nodes()), len(graph.edges()))
	)
	if args.dump_graph:
	with open(args.dump_graph, 'w') as f:
	data = graph.node_link_data()
	f.write(json.dumps(data, indent=2))

	cycle = find_cycle(graph)
	if cycle:
	print_cycle(
	args.binary, graph, cycle, thread_info, print_stack_trace
	)
	sys.exit(1)

	time.sleep(1)
	except KeyboardInterrupt:
	break


	if __name__ == '__main__':
	main()