Mike Klein | 5cc94cc | 2018-03-07 17:04:18 +0000 | [diff] [blame^] | 1 | #!/usr/bin/env python2.7 |
| 2 | # |
| 3 | # Copyright 2017 Google Inc. |
| 4 | # |
| 5 | # Use of this source code is governed by a BSD-style license that can be |
| 6 | # found in the LICENSE file. |
| 7 | |
| 8 | import re |
| 9 | import subprocess |
| 10 | import sys |
| 11 | |
| 12 | clang = 'clang-5.0' |
| 13 | objdump = 'gobjdump' |
| 14 | ccache = 'ccache' |
| 15 | stages = 'src/jumper/SkJumper_stages.cpp' |
| 16 | stages_lowp = 'src/jumper/SkJumper_stages_lowp.cpp' |
| 17 | generated = 'src/jumper/SkJumper_generated.S' |
| 18 | generated_win = 'src/jumper/SkJumper_generated_win.S' |
| 19 | |
| 20 | clang = sys.argv[1] if len(sys.argv) > 1 else clang |
| 21 | objdump = sys.argv[2] if len(sys.argv) > 2 else objdump |
| 22 | ccache = sys.argv[3] if len(sys.argv) > 3 else ccache |
| 23 | stages = sys.argv[4] if len(sys.argv) > 4 else stages |
| 24 | stages_lowp = sys.argv[5] if len(sys.argv) > 5 else stages_lowp |
| 25 | generated = sys.argv[6] if len(sys.argv) > 6 else generated |
| 26 | generated_win = sys.argv[7] if len(sys.argv) > 7 else generated_win |
| 27 | |
| 28 | clang = [ccache, clang, '-x', 'c++'] |
| 29 | |
| 30 | |
| 31 | cflags = ['-std=c++11', '-Os', '-DJUMPER_IS_OFFLINE', |
| 32 | '-momit-leaf-frame-pointer', '-ffp-contract=fast', |
| 33 | '-fno-exceptions', '-fno-rtti', '-fno-unwind-tables'] |
| 34 | |
| 35 | x86 = [ '-m32' ] |
| 36 | win = ['-DWIN', '-mno-red-zone'] |
| 37 | sse2 = ['-msse2', '-mno-sse3', '-mno-ssse3', '-mno-sse4.1'] |
| 38 | subprocess.check_call(clang + cflags + sse2 + |
| 39 | ['-c', stages] + |
| 40 | ['-o', 'sse2.o']) |
| 41 | subprocess.check_call(clang + cflags + sse2 + win + |
| 42 | ['-c', stages] + |
| 43 | ['-o', 'win_sse2.o']) |
| 44 | subprocess.check_call(clang + cflags + sse2 + x86 + |
| 45 | ['-c', stages] + |
| 46 | ['-o', 'x86_sse2.o']) |
| 47 | subprocess.check_call(clang + cflags + sse2 + win + x86 + |
| 48 | ['-c', stages] + |
| 49 | ['-o', 'win_x86_sse2.o']) |
| 50 | |
| 51 | subprocess.check_call(clang + cflags + sse2 + |
| 52 | ['-c', stages_lowp] + |
| 53 | ['-o', 'lowp_sse2.o']) |
| 54 | subprocess.check_call(clang + cflags + sse2 + win + |
| 55 | ['-c', stages_lowp] + |
| 56 | ['-o', 'win_lowp_sse2.o']) |
| 57 | subprocess.check_call(clang + cflags + sse2 + x86 + |
| 58 | ['-c', stages_lowp] + |
| 59 | ['-o', 'x86_lowp_sse2.o']) |
| 60 | subprocess.check_call(clang + cflags + sse2 + win + x86 + |
| 61 | ['-c', stages_lowp] + |
| 62 | ['-o', 'win_x86_lowp_sse2.o']) |
| 63 | |
| 64 | sse41 = ['-msse4.1'] |
| 65 | subprocess.check_call(clang + cflags + sse41 + |
| 66 | ['-c', stages] + |
| 67 | ['-o', 'sse41.o']) |
| 68 | subprocess.check_call(clang + cflags + sse41 + win + |
| 69 | ['-c', stages] + |
| 70 | ['-o', 'win_sse41.o']) |
| 71 | |
| 72 | subprocess.check_call(clang + cflags + sse41 + |
| 73 | ['-c', stages_lowp] + |
| 74 | ['-o', 'lowp_sse41.o']) |
| 75 | subprocess.check_call(clang + cflags + sse41 + win + |
| 76 | ['-c', stages_lowp] + |
| 77 | ['-o', 'win_lowp_sse41.o']) |
| 78 | |
| 79 | avx = ['-mavx'] |
| 80 | subprocess.check_call(clang + cflags + avx + |
| 81 | ['-c', stages] + |
| 82 | ['-o', 'avx.o']) |
| 83 | subprocess.check_call(clang + cflags + avx + win + |
| 84 | ['-c', stages] + |
| 85 | ['-o', 'win_avx.o']) |
| 86 | |
| 87 | hsw = ['-mavx2', '-mfma', '-mf16c'] |
| 88 | subprocess.check_call(clang + cflags + hsw + |
| 89 | ['-c', stages] + |
| 90 | ['-o', 'hsw.o']) |
| 91 | subprocess.check_call(clang + cflags + hsw + win + |
| 92 | ['-c', stages] + |
| 93 | ['-o', 'win_hsw.o']) |
| 94 | |
| 95 | subprocess.check_call(clang + cflags + hsw + |
| 96 | ['-c', stages_lowp] + |
| 97 | ['-o', 'lowp_hsw.o']) |
| 98 | subprocess.check_call(clang + cflags + hsw + win + |
| 99 | ['-c', stages_lowp] + |
| 100 | ['-o', 'win_lowp_hsw.o']) |
| 101 | |
| 102 | skx = ['-march=skylake-avx512'] |
| 103 | subprocess.check_call(clang + cflags + skx + |
| 104 | ['-c', stages] + |
| 105 | ['-o', 'skx.o']) |
| 106 | |
| 107 | # Merge x86-64 object files to deduplicate constants. |
| 108 | # (No other platform has more than one specialization.) |
| 109 | subprocess.check_call(['ld', '-r', '-o', 'merged.o', |
| 110 | 'skx.o', 'hsw.o', 'avx.o', 'sse41.o', 'sse2.o', |
| 111 | 'lowp_hsw.o', 'lowp_sse41.o', 'lowp_sse2.o']) |
| 112 | subprocess.check_call(['ld', '-r', '-o', 'win_merged.o', |
| 113 | 'win_hsw.o', 'win_avx.o', 'win_sse41.o', 'win_sse2.o', |
| 114 | 'win_lowp_hsw.o', 'win_lowp_sse41.o', 'win_lowp_sse2.o']) |
| 115 | |
| 116 | subprocess.check_call(['ld', '-r', '-o', 'x86_merged.o', |
| 117 | 'x86_sse2.o', |
| 118 | 'x86_lowp_sse2.o']) |
| 119 | subprocess.check_call(['ld', '-r', '-o', 'win_x86_merged.o', |
| 120 | 'win_x86_sse2.o', |
| 121 | 'win_x86_lowp_sse2.o']) |
| 122 | |
| 123 | def parse_object_file(dot_o, directive, target=None): |
| 124 | globl, hidden, label, comment, align = \ |
| 125 | '.globl', 'HIDDEN', ':', '// ', 'BALIGN' |
| 126 | if 'win' in dot_o: |
| 127 | globl, hidden, label, comment, align = \ |
| 128 | 'PUBLIC', '', ' LABEL PROC', '; ', 'ALIGN ' |
| 129 | |
| 130 | cmd = [objdump] |
| 131 | if target: |
| 132 | cmd += ['--target', target] |
| 133 | |
| 134 | # Look for sections we know we can't handle. |
| 135 | section_headers = subprocess.check_output(cmd + ['-h', dot_o]) |
| 136 | for snippet in ['.rodata']: |
| 137 | if snippet in section_headers: |
| 138 | print >>sys.stderr, 'Found %s in section.' % snippet |
| 139 | assert snippet not in section_headers |
| 140 | |
| 141 | if directive == '.long': |
| 142 | disassemble = ['-d', dot_o] |
| 143 | dehex = lambda h: '0x'+h |
| 144 | else: |
| 145 | # x86-64... as long as we're using %rip-relative addressing, |
| 146 | # literal sections should be fine to just dump in with .text. |
| 147 | disassemble = ['-d', # DO NOT USE -D. |
| 148 | '-z', # Print zero bytes instead of ... |
| 149 | '--insn-width=11', |
| 150 | '-j', '.text', |
| 151 | '-j', '.literal4', |
| 152 | '-j', '.literal8', |
| 153 | '-j', '.literal16', |
| 154 | '-j', '.const', |
| 155 | dot_o] |
| 156 | dehex = lambda h: str(int(h,16)) |
| 157 | |
| 158 | # Ok. Let's disassemble. |
| 159 | for line in subprocess.check_output(cmd + disassemble).split('\n'): |
| 160 | line = line.strip() |
| 161 | |
| 162 | if not line or line.startswith(dot_o) or line.startswith('Disassembly'): |
| 163 | continue |
| 164 | |
| 165 | # E.g. 00000000000003a4 <_load_f16>: |
| 166 | m = re.match('''[0-9a-f]+ <_?(.*)>:''', line) |
| 167 | if m: |
| 168 | print |
| 169 | sym = m.group(1) |
| 170 | if sym.startswith('.literal'): # .literal4, .literal16, etc |
| 171 | print sym.replace('.literal', align) |
| 172 | elif sym.startswith('.const'): # 32-byte constants |
| 173 | print align + '32' |
| 174 | elif not sym.startswith('sk_'): |
| 175 | print >>sys.stderr, "build_stages.py can't handle '%s' (yet?)." % sym |
| 176 | assert sym.startswith('sk_') |
| 177 | else: # a stage function |
| 178 | if hidden: |
| 179 | print hidden + ' _' + sym |
| 180 | print globl + ' _' + sym |
| 181 | if 'win' not in dot_o: |
| 182 | print 'FUNCTION(_' + sym + ')' |
| 183 | print '_' + sym + label |
| 184 | continue |
| 185 | |
| 186 | columns = line.split('\t') |
| 187 | #print >>sys.stderr, columns |
| 188 | code = columns[1] |
| 189 | if len(columns) >= 4: |
| 190 | inst = columns[2] |
| 191 | args = columns[3] |
| 192 | else: |
| 193 | inst, args = columns[2], '' |
| 194 | if ' ' in columns[2]: |
| 195 | inst, args = columns[2].split(' ', 1) |
| 196 | code, inst, args = code.strip(), inst.strip(), args.strip() |
| 197 | |
| 198 | hexed = ','.join(dehex(x) for x in code.split(' ')) |
| 199 | print ' ' + directive + ' ' + hexed + ' '*(36-len(hexed)) + \ |
| 200 | comment + inst + (' '*(14-len(inst)) + args if args else '') |
| 201 | |
| 202 | sys.stdout = open(generated, 'w') |
| 203 | |
| 204 | print '''# Copyright 2017 Google Inc. |
| 205 | # |
| 206 | # Use of this source code is governed by a BSD-style license that can be |
| 207 | # found in the LICENSE file. |
| 208 | |
| 209 | # This file is generated semi-automatically with this command: |
| 210 | # $ src/jumper/build_stages.py |
| 211 | ''' |
| 212 | print '#if defined(__MACH__)' |
| 213 | print ' #define HIDDEN .private_extern' |
| 214 | print ' #define FUNCTION(name)' |
| 215 | print ' #define BALIGN4 .align 2' |
| 216 | print ' #define BALIGN8 .align 3' |
| 217 | print ' #define BALIGN16 .align 4' |
| 218 | print ' #define BALIGN32 .align 5' |
| 219 | print '#else' |
| 220 | print ' .section .note.GNU-stack,"",%progbits' |
| 221 | print ' #define HIDDEN .hidden' |
| 222 | print ' #define FUNCTION(name) .type name,%function' |
| 223 | print ' #define BALIGN4 .balign 4' |
| 224 | print ' #define BALIGN8 .balign 8' |
| 225 | print ' #define BALIGN16 .balign 16' |
| 226 | print ' #define BALIGN32 .balign 32' |
| 227 | print '#endif' |
| 228 | |
| 229 | print '.text' |
| 230 | print '#if defined(__x86_64__)' |
| 231 | print 'BALIGN32' |
| 232 | parse_object_file('merged.o', '.byte') |
| 233 | |
| 234 | print '#elif defined(__i386__)' |
| 235 | print 'BALIGN32' |
| 236 | parse_object_file('x86_merged.o', '.byte') |
| 237 | |
| 238 | print '#endif' |
| 239 | |
| 240 | sys.stdout = open(generated_win, 'w') |
| 241 | print '''; Copyright 2017 Google Inc. |
| 242 | ; |
| 243 | ; Use of this source code is governed by a BSD-style license that can be |
| 244 | ; found in the LICENSE file. |
| 245 | |
| 246 | ; This file is generated semi-automatically with this command: |
| 247 | ; $ src/jumper/build_stages.py |
| 248 | ''' |
| 249 | print 'IFDEF RAX' |
| 250 | print "_text32 SEGMENT ALIGN(32) 'CODE'" |
| 251 | print 'ALIGN 32' |
| 252 | parse_object_file('win_merged.o', 'DB') |
| 253 | |
| 254 | print 'ELSE' |
| 255 | print '.MODEL FLAT,C' |
| 256 | print "_text32 SEGMENT ALIGN(32) 'CODE'" |
| 257 | print 'ALIGN 32' |
| 258 | parse_object_file('win_x86_merged.o', 'DB') |
| 259 | |
| 260 | print 'ENDIF' |
| 261 | print 'END' |