prep for more constants

 - Add -z to print zero bytes instead of ...
 - avx+hsw will create 32-byte constants in .const,
   so we should disassemble those too, and align to 32 bytes.
 - The default _text section on Windows is 16-byte aligned,
   so we make a new one that's 32-byte aligned.

Change-Id: Icb2a962baa4c3735e98a992f2285eaf5cb1680fd
Reviewed-on: https://skia-review.googlesource.com/14364
Commit-Queue: Mike Klein <mtklein@chromium.org>
Reviewed-by: Herb Derby <herb@google.com>
diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S
index 4cc2405..ed23be7 100644
--- a/src/jumper/SkJumper_generated.S
+++ b/src/jumper/SkJumper_generated.S
@@ -11,12 +11,14 @@
     #define FUNCTION(name)
     #define BALIGN4  .align 2
     #define BALIGN16 .align 4
+    #define BALIGN32 .align 5
 #else
     .section .note.GNU-stack,"",%progbits
     #define HIDDEN .hidden
     #define FUNCTION(name) .type name,%function
     #define BALIGN4  .balign 4
     #define BALIGN16 .balign 16
+    #define BALIGN32 .balign 32
 #endif
 .text
 #if defined(__aarch64__)
@@ -7268,7 +7270,7 @@
   .long  0xe8bd48f0                          // pop           {r4, r5, r6, r7, fp, lr}
   .long  0xe12fff13                          // bx            r3
 #elif defined(__x86_64__)
-BALIGN16
+BALIGN32
 
 HIDDEN _sk_start_pipeline_hsw
 .globl _sk_start_pipeline_hsw
@@ -11715,7 +11717,7 @@
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128                                 // .byte         0x80
   .byte  63                                  // (bad)
-BALIGN16
+BALIGN32
 
 HIDDEN _sk_start_pipeline_avx
 .globl _sk_start_pipeline_avx
@@ -17590,7 +17592,7 @@
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128                                 // .byte         0x80
   .byte  63                                  // (bad)
-BALIGN16
+BALIGN32
 
 HIDDEN _sk_start_pipeline_sse41
 .globl _sk_start_pipeline_sse41
@@ -22106,7 +22108,7 @@
   .byte  0,0                                 // add           %al,(%rax)
   .byte  128                                 // .byte         0x80
   .byte  63                                  // (bad)
-BALIGN16
+BALIGN32
 
 HIDDEN _sk_start_pipeline_sse2
 .globl _sk_start_pipeline_sse2
diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S
index 2ba8a0a..12fc90e 100644
--- a/src/jumper/SkJumper_generated_win.S
+++ b/src/jumper/SkJumper_generated_win.S
@@ -7,8 +7,8 @@
 ;   $ src/jumper/build_stages.py
 
 IFDEF RAX
-_text SEGMENT
-ALIGN 16
+_text32 SEGMENT ALIGN(32) 'CODE'
+ALIGN 32
 
 PUBLIC _sk_start_pipeline_hsw
 _sk_start_pipeline_hsw LABEL PROC
@@ -4256,7 +4256,7 @@
   DB  0,0                                 ; add           %al,(%rax)
   DB  128                                 ; .byte         0x80
   DB  63                                  ; (bad)
-ALIGN 16
+ALIGN 32
 
 PUBLIC _sk_start_pipeline_avx
 _sk_start_pipeline_avx LABEL PROC
@@ -9934,7 +9934,7 @@
   DB  0,0                                 ; add           %al,(%rax)
   DB  128                                 ; .byte         0x80
   DB  63                                  ; (bad)
-ALIGN 16
+ALIGN 32
 
 PUBLIC _sk_start_pipeline_sse41
 _sk_start_pipeline_sse41 LABEL PROC
@@ -14253,7 +14253,7 @@
   DB  0,0                                 ; add           %al,(%rax)
   DB  128                                 ; .byte         0x80
   DB  63                                  ; (bad)
-ALIGN 16
+ALIGN 32
 
 PUBLIC _sk_start_pipeline_sse2
 _sk_start_pipeline_sse2 LABEL PROC
diff --git a/src/jumper/build_stages.py b/src/jumper/build_stages.py
index baa0b99..959913b 100755
--- a/src/jumper/build_stages.py
+++ b/src/jumper/build_stages.py
@@ -78,7 +78,7 @@
 
   # Look for sections we know we can't handle.
   section_headers = subprocess.check_output(cmd + ['-h', dot_o])
-  for snippet in ['.const', '.rodata']:
+  for snippet in ['.rodata']:
     if snippet in section_headers:
       print >>sys.stderr, 'Found %s in section.' % snippet
       assert snippet not in section_headers
@@ -90,10 +90,12 @@
     # x86-64... as long as we're using %rip-relative addressing,
     # literal sections should be fine to just dump in with .text.
     disassemble = ['-d',               # DO NOT USE -D.
+                   '-z',               # Print zero bytes instead of ...
                    '--insn-width=10',
                    '-j', '.text',
                    '-j', '.literal4',
                    '-j', '.literal16',
+                   '-j', '.const',
                    dot_o]
     dehex = lambda h: str(int(h,16))
 
@@ -111,6 +113,8 @@
       sym = m.group(1)
       if sym.startswith('.literal'):  # .literal4, .literal16, etc
         print sym.replace('.literal', align)
+      elif sym.startswith('.const'):  # 32-byte constants
+        print align + '32'
       else:  # a stage function
         if hidden:
           print hidden + ' _' + sym
@@ -150,12 +154,14 @@
 print '    #define FUNCTION(name)'
 print '    #define BALIGN4  .align 2'
 print '    #define BALIGN16 .align 4'
+print '    #define BALIGN32 .align 5'
 print '#else'
 print '    .section .note.GNU-stack,"",%progbits'
 print '    #define HIDDEN .hidden'
 print '    #define FUNCTION(name) .type name,%function'
 print '    #define BALIGN4  .balign 4'
 print '    #define BALIGN16 .balign 16'
+print '    #define BALIGN32 .balign 32'
 print '#endif'
 
 print '.text'
@@ -168,13 +174,13 @@
 parse_object_file('vfp4.o', '.long', target='elf32-littlearm')
 
 print '#elif defined(__x86_64__)'
-print 'BALIGN16'
+print 'BALIGN32'
 parse_object_file('hsw.o',   '.byte')
-print 'BALIGN16'
+print 'BALIGN32'
 parse_object_file('avx.o',   '.byte')
-print 'BALIGN16'
+print 'BALIGN32'
 parse_object_file('sse41.o', '.byte')
-print 'BALIGN16'
+print 'BALIGN32'
 parse_object_file('sse2.o',  '.byte')
 
 print '#endif'
@@ -190,14 +196,14 @@
 '''
 
 print 'IFDEF RAX'
-print '_text SEGMENT'
-print 'ALIGN 16'
+print "_text32 SEGMENT ALIGN(32) 'CODE'"
+print 'ALIGN 32'
 parse_object_file('win_hsw.o',   'DB')
-print 'ALIGN 16'
+print 'ALIGN 32'
 parse_object_file('win_avx.o',   'DB')
-print 'ALIGN 16'
+print 'ALIGN 32'
 parse_object_file('win_sse41.o', 'DB')
-print 'ALIGN 16'
+print 'ALIGN 32'
 parse_object_file('win_sse2.o',  'DB')
 print 'ENDIF'
 print 'END'