Blame - lldb/scripts/analyze-project-deps.py - toolchain/llvm-project

2017-03-06 17:41:00 +0000

[diff] [blame]

3

import argparse

Zachary Turner

2017-03-21 22:46:46 +0000

[diff] [blame]

4

import itertools

Zachary Turner

2017-03-06 17:40:36 +0000

[diff] [blame]

5

import os

6

import re

Zachary Turner

2017-03-21 22:46:46 +0000

[diff] [blame]

7

import sys

Zachary Turner

2017-03-22 18:23:14 +0000

[diff] [blame]

8

from collections import defaultdict

Zachary Turner

2017-03-06 17:40:36 +0000

[diff] [blame]

9

10

from use_lldb_suite import lldb_root

11

Zachary Turner

2017-03-06 17:41:00 +0000

[diff] [blame]

12

parser = argparse.ArgumentParser(

13

description='Analyze LLDB project #include dependencies.')

14

parser.add_argument('--show-counts', default=False, action='store_true',

15

help='When true, show the number of dependencies from each subproject')

Zachary Turner

2017-03-20 23:54:26 +0000

[diff] [blame]

16

parser.add_argument('--discover-cycles', default=False, action='store_true',

17

help='When true, find and display all project dependency cycles. Note,'

18

'this option is very slow')

19

Zachary Turner

2017-03-06 17:41:00 +0000

[diff] [blame]

20

args = parser.parse_args()

21

Zachary Turner

2017-03-06 17:40:36 +0000

[diff] [blame]

22

src_dir = os.path.join(lldb_root, "source")

23

inc_dir = os.path.join(lldb_root, "include")

src_map = {}

Zachary Turner

2017-03-06 17:41:00 +0000

[diff] [blame]

27

include_regex = re.compile('#include \"((lldb|Plugins|clang)(.*/)+).*\"')

28

Zachary Turner

2017-03-20 23:54:26 +0000

[diff] [blame]

29

def is_sublist(small, big):

Zachary Turner

2017-03-21 22:46:46 +0000

[diff] [blame]

30

it = iter(big)

Zachary Turner

2017-03-20 23:54:26 +0000

[diff] [blame]

31

return all(c in it for c in small)

32

Zachary Turner

2017-03-06 17:41:00 +0000

[diff] [blame]

33

def normalize_host(str):

34

if str.startswith("lldb/Host"):

35

return "lldb/Host"

Zachary Turner

2017-03-20 23:54:26 +0000

[diff] [blame]

36

if str.startswith("Plugins"):

37

return "lldb/" + str

38

if str.startswith("lldb/../../source"):

39

return str.replace("lldb/../../source", "lldb")

Zachary Turner

2017-03-06 17:41:00 +0000

[diff] [blame]

40

return str

Zachary Turner

2017-03-06 17:40:36 +0000

[diff] [blame]

41

42

def scan_deps(this_dir, file):

Zachary Turner

2017-03-06 17:41:00 +0000

[diff] [blame]

43

global src_map

44

deps = {}

45

this_dir = normalize_host(this_dir)

46

if this_dir in src_map:

47

deps = src_map[this_dir]

48

Zachary Turner

2017-03-06 17:40:36 +0000

[diff] [blame]

49

with open(file) as f:

50

for line in list(f):

51

m = include_regex.match(line)

Zachary Turner

2017-03-06 17:41:00 +0000

[diff] [blame]

52

if m is None:

53

continue

54

relative = m.groups()[0].rstrip("/")

55

if relative == this_dir:

56

continue

57

relative = normalize_host(relative)

58

if relative in deps:

59

deps[relative] += 1

Zachary Turner

2017-03-20 23:54:26 +0000

[diff] [blame]

60

elif relative != this_dir:

Zachary Turner

2017-03-06 17:41:00 +0000

[diff] [blame]

61

deps[relative] = 1

62

if this_dir not in src_map and len(deps) > 0:

63

src_map[this_dir] = deps

Zachary Turner

2017-03-06 17:40:36 +0000

[diff] [blame]

64

65

for (base, dirs, files) in os.walk(inc_dir):

66

dir = os.path.basename(base)

67

relative = os.path.relpath(base, inc_dir)

68

inc_files = filter(lambda x : os.path.splitext(x)[1] in [".h"], files)

Zachary Turner

2017-03-06 17:40:36 +0000

[diff] [blame]

69

relative = relative.replace("\\", "/")

70

for inc in inc_files:

71

inc_path = os.path.join(base, inc)

Zachary Turner

2017-03-06 17:41:00 +0000

[diff] [blame]

72

scan_deps(relative, inc_path)

Zachary Turner

2017-03-06 17:40:36 +0000

[diff] [blame]

73

74

for (base, dirs, files) in os.walk(src_dir):

75

dir = os.path.basename(base)

76

relative = os.path.relpath(base, src_dir)

77

src_files = filter(lambda x : os.path.splitext(x)[1] in [".cpp", ".h", ".mm"], files)

Zachary Turner

2017-03-06 17:40:36 +0000

[diff] [blame]

78

norm_base_path = os.path.normpath(os.path.join("lldb", relative))

79

norm_base_path = norm_base_path.replace("\\", "/")

80

for src in src_files:

81

src_path = os.path.join(base, src)

Zachary Turner

2017-03-06 17:41:00 +0000

[diff] [blame]

82

scan_deps(norm_base_path, src_path)

Zachary Turner

2017-03-06 17:40:36 +0000

[diff] [blame]

83

pass

84

Zachary Turner

2017-03-20 23:54:26 +0000

[diff] [blame]

85

def is_existing_cycle(path, cycles):

86

# If we have a cycle like # A -> B -> C (with an implicit -> A at the end)

87

# then we don't just want to check for an occurrence of A -> B -> C in the

88

# list of known cycles, but every possible rotation of A -> B -> C. For

89

# example, if we previously encountered B -> C -> A (with an implicit -> B

90

# at the end), then A -> B -> C is also a cycle. This is an important

91

# optimization which reduces the search space by multiple orders of

92

# magnitude.

93

for i in xrange(0,len(path)):

94

if any(is_sublist(x, path) for x in cycles):

95

return True

96

path = [path[-1]] + path[0:-1]

97

return False

98

99

def expand(path_queue, path_lengths, cycles, src_map):

100

# We do a breadth first search, to make sure we visit all paths in order

101

# of ascending length. This is an important optimization to make sure that

102

# short cycles are discovered first, which will allow us to discard longer

103

# cycles which grow the search space exponentially the longer they get.

104

while len(path_queue) > 0:

105

cur_path = path_queue.pop(0)

106

if is_existing_cycle(cur_path, cycles):

107

continue

108

109

next_len = path_lengths.pop(0) + 1

Zachary Turner

2017-03-20 23:54:26 +0000

[diff] [blame]

110

last_component = cur_path[-1]

Zachary Turner

2017-03-22 18:04:20 +0000

[diff] [blame]

111

Zachary Turner

2017-03-20 23:54:26 +0000

[diff] [blame]

112

for item in src_map[last_component]:

113

if item.startswith("clang"):

continue

if item in cur_path:

# This is a cycle. Minimize it and then check if the result is

118

# already in the list of cycles. Insert it (or not) and then

119

# exit.

120

new_index = cur_path.index(item)

121

cycle = cur_path[new_index:]

122

if not is_existing_cycle(cycle, cycles):

cycles.append(cycle)

continue

path_lengths.append(next_len)

127

path_queue.append(cur_path + [item])

pass

cycles = []

path_queue = [[x] for x in src_map.iterkeys()]

133

path_lens = [1] * len(path_queue)

134

Zachary Turner

2017-03-06 17:40:36 +0000

[diff] [blame]

135

items = list(src_map.iteritems())

136

items.sort(lambda A, B : cmp(A[0], B[0]))

137

138

for (path, deps) in items:

139

print path + ":"

Zachary Turner

2017-03-06 17:41:00 +0000

[diff] [blame]

140

sorted_deps = list(deps.iteritems())

141

if args.show_counts:

142

sorted_deps.sort(lambda A, B: cmp(A[1], B[1]))

143

for dep in sorted_deps:

144

print "\t{} [{}]".format(dep[0], dep[1])

145

else:

146

sorted_deps.sort(lambda A, B: cmp(A[0], B[0]))

147

for dep in sorted_deps:

148

print "\t{}".format(dep[0])

Zachary Turner

2017-03-20 23:54:26 +0000

[diff] [blame]

149

Zachary Turner

2017-03-21 22:46:46 +0000

[diff] [blame]

150

def iter_cycles(cycles):

151

global src_map

152

for cycle in cycles:

153

cycle.append(cycle[0])

154

zipper = list(zip(cycle[0:-1], cycle[1:]))

155

result = [(x, src_map[x][y], y) for (x,y) in zipper]

156

total = 0

157

smallest = result[0][1]

158

for (first, value, last) in result:

159

total += value

160

smallest = min(smallest, value)

161

yield (total, smallest, result)

162

Zachary Turner

2017-03-20 23:54:26 +0000

[diff] [blame]

163

if args.discover_cycles:

164

print "Analyzing cycles..."

165

166

expand(path_queue, path_lens, cycles, src_map)

167

168

average = sum([len(x)+1 for x in cycles]) / len(cycles)

169

170

print "Found {} cycles. Average cycle length = {}.".format(len(cycles), average)

Zachary Turner

2017-03-22 18:23:14 +0000

[diff] [blame]

171

counted = list(iter_cycles(cycles))

Zachary Turner

2017-03-21 22:46:46 +0000

[diff] [blame]

172

if args.show_counts:

Zachary Turner

2017-03-21 22:46:46 +0000

[diff] [blame]

173

counted.sort(lambda A, B: cmp(A[0], B[0]))

174

for (total, smallest, cycle) in counted:

175

sys.stdout.write("{} deps to break: ".format(total))

176

sys.stdout.write(cycle[0][0])

177

for (first, count, last) in cycle:

178

sys.stdout.write(" [{}->] {}".format(count, last))

179

sys.stdout.write("\n")

180

else:

181

for cycle in cycles:

182

cycle.append(cycle[0])

183

print " -> ".join(cycle)

Zachary Turner

2017-03-22 18:04:20 +0000

[diff] [blame]

184

185

print "Analyzing islands..."

186

islands = []

Zachary Turner

2017-03-22 18:23:14 +0000

[diff] [blame]

187

outgoing_counts = defaultdict(int)

188

incoming_counts = defaultdict(int)

189

for (total, smallest, cycle) in counted:

190

for (first, count, last) in cycle:

191

outgoing_counts[first] += count

192

incoming_counts[last] += count

Zachary Turner

2017-03-22 18:04:20 +0000

[diff] [blame]

193

for cycle in cycles:

194

this_cycle = set(cycle)

195

disjoints = [x for x in islands if this_cycle.isdisjoint(x)]

196

overlaps = [x for x in islands if not this_cycle.isdisjoint(x)]

197

islands = disjoints + [set.union(this_cycle, *overlaps)]

198

print "Found {} disjoint cycle islands...".format(len(islands))

199

for island in islands:

200

print "Island ({} elements)".format(len(island))

Zachary Turner

2017-03-22 18:23:14 +0000

[diff] [blame]

201

sorted = []

Zachary Turner

2017-03-22 18:04:20 +0000

[diff] [blame]

202

for node in island:

Zachary Turner

2017-03-22 18:23:14 +0000

[diff] [blame]

203

sorted.append((node, incoming_counts[node], outgoing_counts[node]))

204

sorted.sort(lambda x, y: cmp(x[1]+x[2], y[1]+y[2]))

205

for (node, inc, outg) in sorted:

206

print " {} [{} in, {} out]".format(node, inc, outg)

Zachary Turner