blob: d4d598264adeb6db6b1087bb743af2971d769d02 [file] [log] [blame]
Mike Klein5cc94cc2018-03-07 17:04:18 +00001#!/usr/bin/env python2.7
2#
3# Copyright 2017 Google Inc.
4#
5# Use of this source code is governed by a BSD-style license that can be
6# found in the LICENSE file.
7
8import re
9import subprocess
10import sys
11
12clang = 'clang-5.0'
13objdump = 'gobjdump'
14ccache = 'ccache'
15stages = 'src/jumper/SkJumper_stages.cpp'
16stages_lowp = 'src/jumper/SkJumper_stages_lowp.cpp'
17generated = 'src/jumper/SkJumper_generated.S'
18generated_win = 'src/jumper/SkJumper_generated_win.S'
19
20clang = sys.argv[1] if len(sys.argv) > 1 else clang
21objdump = sys.argv[2] if len(sys.argv) > 2 else objdump
22ccache = sys.argv[3] if len(sys.argv) > 3 else ccache
23stages = sys.argv[4] if len(sys.argv) > 4 else stages
24stages_lowp = sys.argv[5] if len(sys.argv) > 5 else stages_lowp
25generated = sys.argv[6] if len(sys.argv) > 6 else generated
26generated_win = sys.argv[7] if len(sys.argv) > 7 else generated_win
27
28clang = [ccache, clang, '-x', 'c++']
29
30
31cflags = ['-std=c++11', '-Os', '-DJUMPER_IS_OFFLINE',
32 '-momit-leaf-frame-pointer', '-ffp-contract=fast',
33 '-fno-exceptions', '-fno-rtti', '-fno-unwind-tables']
34
35x86 = [ '-m32' ]
36win = ['-DWIN', '-mno-red-zone']
37sse2 = ['-msse2', '-mno-sse3', '-mno-ssse3', '-mno-sse4.1']
38subprocess.check_call(clang + cflags + sse2 +
39 ['-c', stages] +
40 ['-o', 'sse2.o'])
41subprocess.check_call(clang + cflags + sse2 + win +
42 ['-c', stages] +
43 ['-o', 'win_sse2.o'])
44subprocess.check_call(clang + cflags + sse2 + x86 +
45 ['-c', stages] +
46 ['-o', 'x86_sse2.o'])
47subprocess.check_call(clang + cflags + sse2 + win + x86 +
48 ['-c', stages] +
49 ['-o', 'win_x86_sse2.o'])
50
51subprocess.check_call(clang + cflags + sse2 +
52 ['-c', stages_lowp] +
53 ['-o', 'lowp_sse2.o'])
54subprocess.check_call(clang + cflags + sse2 + win +
55 ['-c', stages_lowp] +
56 ['-o', 'win_lowp_sse2.o'])
57subprocess.check_call(clang + cflags + sse2 + x86 +
58 ['-c', stages_lowp] +
59 ['-o', 'x86_lowp_sse2.o'])
60subprocess.check_call(clang + cflags + sse2 + win + x86 +
61 ['-c', stages_lowp] +
62 ['-o', 'win_x86_lowp_sse2.o'])
63
64sse41 = ['-msse4.1']
65subprocess.check_call(clang + cflags + sse41 +
66 ['-c', stages] +
67 ['-o', 'sse41.o'])
68subprocess.check_call(clang + cflags + sse41 + win +
69 ['-c', stages] +
70 ['-o', 'win_sse41.o'])
71
72subprocess.check_call(clang + cflags + sse41 +
73 ['-c', stages_lowp] +
74 ['-o', 'lowp_sse41.o'])
75subprocess.check_call(clang + cflags + sse41 + win +
76 ['-c', stages_lowp] +
77 ['-o', 'win_lowp_sse41.o'])
78
79avx = ['-mavx']
80subprocess.check_call(clang + cflags + avx +
81 ['-c', stages] +
82 ['-o', 'avx.o'])
83subprocess.check_call(clang + cflags + avx + win +
84 ['-c', stages] +
85 ['-o', 'win_avx.o'])
86
87hsw = ['-mavx2', '-mfma', '-mf16c']
88subprocess.check_call(clang + cflags + hsw +
89 ['-c', stages] +
90 ['-o', 'hsw.o'])
91subprocess.check_call(clang + cflags + hsw + win +
92 ['-c', stages] +
93 ['-o', 'win_hsw.o'])
94
95subprocess.check_call(clang + cflags + hsw +
96 ['-c', stages_lowp] +
97 ['-o', 'lowp_hsw.o'])
98subprocess.check_call(clang + cflags + hsw + win +
99 ['-c', stages_lowp] +
100 ['-o', 'win_lowp_hsw.o'])
101
102skx = ['-march=skylake-avx512']
103subprocess.check_call(clang + cflags + skx +
104 ['-c', stages] +
105 ['-o', 'skx.o'])
106
107# Merge x86-64 object files to deduplicate constants.
108# (No other platform has more than one specialization.)
109subprocess.check_call(['ld', '-r', '-o', 'merged.o',
110 'skx.o', 'hsw.o', 'avx.o', 'sse41.o', 'sse2.o',
111 'lowp_hsw.o', 'lowp_sse41.o', 'lowp_sse2.o'])
112subprocess.check_call(['ld', '-r', '-o', 'win_merged.o',
113 'win_hsw.o', 'win_avx.o', 'win_sse41.o', 'win_sse2.o',
114 'win_lowp_hsw.o', 'win_lowp_sse41.o', 'win_lowp_sse2.o'])
115
116subprocess.check_call(['ld', '-r', '-o', 'x86_merged.o',
117 'x86_sse2.o',
118 'x86_lowp_sse2.o'])
119subprocess.check_call(['ld', '-r', '-o', 'win_x86_merged.o',
120 'win_x86_sse2.o',
121 'win_x86_lowp_sse2.o'])
122
123def parse_object_file(dot_o, directive, target=None):
124 globl, hidden, label, comment, align = \
125 '.globl', 'HIDDEN', ':', '// ', 'BALIGN'
126 if 'win' in dot_o:
127 globl, hidden, label, comment, align = \
128 'PUBLIC', '', ' LABEL PROC', '; ', 'ALIGN '
129
130 cmd = [objdump]
131 if target:
132 cmd += ['--target', target]
133
134 # Look for sections we know we can't handle.
135 section_headers = subprocess.check_output(cmd + ['-h', dot_o])
136 for snippet in ['.rodata']:
137 if snippet in section_headers:
138 print >>sys.stderr, 'Found %s in section.' % snippet
139 assert snippet not in section_headers
140
141 if directive == '.long':
142 disassemble = ['-d', dot_o]
143 dehex = lambda h: '0x'+h
144 else:
145 # x86-64... as long as we're using %rip-relative addressing,
146 # literal sections should be fine to just dump in with .text.
147 disassemble = ['-d', # DO NOT USE -D.
148 '-z', # Print zero bytes instead of ...
149 '--insn-width=11',
150 '-j', '.text',
151 '-j', '.literal4',
152 '-j', '.literal8',
153 '-j', '.literal16',
154 '-j', '.const',
155 dot_o]
156 dehex = lambda h: str(int(h,16))
157
158 # Ok. Let's disassemble.
159 for line in subprocess.check_output(cmd + disassemble).split('\n'):
160 line = line.strip()
161
162 if not line or line.startswith(dot_o) or line.startswith('Disassembly'):
163 continue
164
165 # E.g. 00000000000003a4 <_load_f16>:
166 m = re.match('''[0-9a-f]+ <_?(.*)>:''', line)
167 if m:
168 print
169 sym = m.group(1)
170 if sym.startswith('.literal'): # .literal4, .literal16, etc
171 print sym.replace('.literal', align)
172 elif sym.startswith('.const'): # 32-byte constants
173 print align + '32'
174 elif not sym.startswith('sk_'):
175 print >>sys.stderr, "build_stages.py can't handle '%s' (yet?)." % sym
176 assert sym.startswith('sk_')
177 else: # a stage function
178 if hidden:
179 print hidden + ' _' + sym
180 print globl + ' _' + sym
181 if 'win' not in dot_o:
182 print 'FUNCTION(_' + sym + ')'
183 print '_' + sym + label
184 continue
185
186 columns = line.split('\t')
187 #print >>sys.stderr, columns
188 code = columns[1]
189 if len(columns) >= 4:
190 inst = columns[2]
191 args = columns[3]
192 else:
193 inst, args = columns[2], ''
194 if ' ' in columns[2]:
195 inst, args = columns[2].split(' ', 1)
196 code, inst, args = code.strip(), inst.strip(), args.strip()
197
198 hexed = ','.join(dehex(x) for x in code.split(' '))
199 print ' ' + directive + ' ' + hexed + ' '*(36-len(hexed)) + \
200 comment + inst + (' '*(14-len(inst)) + args if args else '')
201
202sys.stdout = open(generated, 'w')
203
204print '''# Copyright 2017 Google Inc.
205#
206# Use of this source code is governed by a BSD-style license that can be
207# found in the LICENSE file.
208
209# This file is generated semi-automatically with this command:
210# $ src/jumper/build_stages.py
211'''
212print '#if defined(__MACH__)'
213print ' #define HIDDEN .private_extern'
214print ' #define FUNCTION(name)'
215print ' #define BALIGN4 .align 2'
216print ' #define BALIGN8 .align 3'
217print ' #define BALIGN16 .align 4'
218print ' #define BALIGN32 .align 5'
219print '#else'
220print ' .section .note.GNU-stack,"",%progbits'
221print ' #define HIDDEN .hidden'
222print ' #define FUNCTION(name) .type name,%function'
223print ' #define BALIGN4 .balign 4'
224print ' #define BALIGN8 .balign 8'
225print ' #define BALIGN16 .balign 16'
226print ' #define BALIGN32 .balign 32'
227print '#endif'
228
229print '.text'
230print '#if defined(__x86_64__)'
231print 'BALIGN32'
232parse_object_file('merged.o', '.byte')
233
234print '#elif defined(__i386__)'
235print 'BALIGN32'
236parse_object_file('x86_merged.o', '.byte')
237
238print '#endif'
239
240sys.stdout = open(generated_win, 'w')
241print '''; Copyright 2017 Google Inc.
242;
243; Use of this source code is governed by a BSD-style license that can be
244; found in the LICENSE file.
245
246; This file is generated semi-automatically with this command:
247; $ src/jumper/build_stages.py
248'''
249print 'IFDEF RAX'
250print "_text32 SEGMENT ALIGN(32) 'CODE'"
251print 'ALIGN 32'
252parse_object_file('win_merged.o', 'DB')
253
254print 'ELSE'
255print '.MODEL FLAT,C'
256print "_text32 SEGMENT ALIGN(32) 'CODE'"
257print 'ALIGN 32'
258parse_object_file('win_x86_merged.o', 'DB')
259
260print 'ENDIF'
261print 'END'