[X86] Fix Windows `i1 zeroext` conventions to use i8 instead of i32 Summary: Re-lands r328386 and r328443, reverting r328482. Incorporates fixes from @mstorsjo in D44876 (thanks!) so that small parameters in i8 and i16 do not end up in the SysV register parameters (EDI, ESI, etc). I added tests for how we receive small parameters, since that is the important part. It's always safe to store more bytes than will be read, but the assumptions you make when loading them are what really matter. I also tested this by self-hosting clang and it passed tests on win64. Reviewers: mstorsjo, hans Subscribers: hiraditya, mstorsjo, llvm-commits Differential Revision: https://reviews.llvm.org/D44900 llvm-svn: 328570

commit: 41fb2dba9cc30d08b19655a46ec133fbb3e24351 [log] [tgz]
author: Reid Kleckner <rnk@google.com> Mon Mar 26 18:49:48 2018 +0000
committer: Reid Kleckner <rnk@google.com> Mon Mar 26 18:49:48 2018 +0000
tree: c156b69b86b446ba3dd5dd14ac0584c800c7fe8d
parent: f065390f6c9ee101c2409abcd1503bb850edf51f [diff]
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index 8d3e33d..fcc9a29 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td

@@ -593,8 +593,8 @@
   // FIXME: Handle byval stuff.
   // FIXME: Handle varargs.
 
-  // Promote i1/i8/i16/v1i1 arguments to i32.
-  CCIfType<[i1, i8, i16, v1i1], CCPromoteToType<i32>>,
+  // Promote i1/v1i1 arguments to i8.
+  CCIfType<[i1, v1i1], CCPromoteToType<i8>>,
 
   // The 'nest' parameter, if any, is passed in R10.
   CCIfNest<CCAssignToReg<[R10]>>,
@@ -619,6 +619,10 @@
   CCIfType<[x86mmx], CCBitConvertToType<i64>>,
 
   // The first 4 integer arguments are passed in integer registers.
+  CCIfType<[i8 ], CCAssignToRegWithShadow<[CL  , DL  , R8B , R9B ],
+                                          [XMM0, XMM1, XMM2, XMM3]>>,
+  CCIfType<[i16], CCAssignToRegWithShadow<[CX  , DX  , R8W , R9W ],
+                                          [XMM0, XMM1, XMM2, XMM3]>>,
   CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
                                           [XMM0, XMM1, XMM2, XMM3]>>,
 
@@ -638,7 +642,7 @@
 
   // Integer/FP values get stored in stack slots that are 8 bytes in size and
   // 8-byte aligned if there are no more registers to hold them.
-  CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
+  CCIfType<[i8, i16, i32, i64, f32, f64], CCAssignToStack<8, 8>>
 ]>;
 
 def CC_X86_Win64_VectorCall : CallingConv<[
@@ -847,13 +851,15 @@
 ]>;
 
 def CC_X86_32_FastCall : CallingConv<[
-  // Promote i1/i8/i16/v1i1 arguments to i32.
-  CCIfType<[i1, i8, i16, v1i1], CCPromoteToType<i32>>,
+  // Promote i1 to i8.
+  CCIfType<[i1], CCPromoteToType<i8>>,
 
   // The 'nest' parameter, if any, is passed in EAX.
   CCIfNest<CCAssignToReg<[EAX]>>,
 
   // The first 2 integer arguments are passed in ECX/EDX
+  CCIfInReg<CCIfType<[ i8], CCAssignToReg<[ CL,  DL]>>>,
+  CCIfInReg<CCIfType<[i16], CCAssignToReg<[ CX,  DX]>>>,
   CCIfInReg<CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>>,
 
   // Otherwise, same as everything else.

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 242a008..8e9e090 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp

@@ -3034,7 +3034,11 @@
             getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
       } else {
         const TargetRegisterClass *RC;
-        if (RegVT == MVT::i32)
+        if (RegVT == MVT::i8)
+          RC = &X86::GR8RegClass;
+        else if (RegVT == MVT::i16)
+          RC = &X86::GR16RegClass;
+        else if (RegVT == MVT::i32)
           RC = &X86::GR32RegClass;
         else if (Is64Bit && RegVT == MVT::i64)
           RC = &X86::GR64RegClass;

diff --git a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
index 61718fb..defedd2 100644
--- a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
+++ b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll

@@ -420,6 +420,7 @@
 ; WIN64-KNL-NEXT:    subq $40, %rsp
 ; WIN64-KNL-NEXT:    .seh_stackalloc 40
 ; WIN64-KNL-NEXT:    .seh_endprologue
+; WIN64-KNL-NEXT:    # kill: def $dx killed $dx def $edx
 ; WIN64-KNL-NEXT:    vmovaps (%rcx), %zmm0
 ; WIN64-KNL-NEXT:    kmovw %edx, %k1
 ; WIN64-KNL-NEXT:    callq func_float16_mask
@@ -435,6 +436,7 @@
 ; WIN64-SKX-NEXT:    subq $40, %rsp
 ; WIN64-SKX-NEXT:    .seh_stackalloc 40
 ; WIN64-SKX-NEXT:    .seh_endprologue
+; WIN64-SKX-NEXT:    # kill: def $dx killed $dx def $edx
 ; WIN64-SKX-NEXT:    vmovaps (%rcx), %zmm0
 ; WIN64-SKX-NEXT:    kmovd %edx, %k1
 ; WIN64-SKX-NEXT:    callq func_float16_mask

diff --git a/llvm/test/CodeGen/X86/h-registers-0.ll b/llvm/test/CodeGen/X86/h-registers-0.ll
index 5f459c3..dfd79f3 100644
--- a/llvm/test/CodeGen/X86/h-registers-0.ll
+++ b/llvm/test/CodeGen/X86/h-registers-0.ll

@@ -98,7 +98,8 @@
 ; X86-64: movzbl %ah, %eax
 
 ; WIN64-LABEL:  qux16:
-; WIN64:  movzbl %ch, %eax
+; WIN64:  movzwl  %cx, %eax
+; WIN64:  shrl    $8, %eax
 
 ; X86-32-LABEL: qux16:
 ; X86-32: movzbl %ah, %eax

diff --git a/llvm/test/CodeGen/X86/test-shrink.ll b/llvm/test/CodeGen/X86/test-shrink.ll
index e44233f..5a59814 100644
--- a/llvm/test/CodeGen/X86/test-shrink.ll
+++ b/llvm/test/CodeGen/X86/test-shrink.ll

@@ -186,6 +186,7 @@
 ; CHECK-WIN32-64-LABEL: g16xh:
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
+; CHECK-WIN32-64-NEXT:    # kill: def $cx killed $cx def $ecx
 ; CHECK-WIN32-64-NEXT:    testl $2048, %ecx # imm = 0x800
 ; CHECK-WIN32-64-NEXT:    jne .LBB4_2
 ; CHECK-WIN32-64-NEXT:  # %bb.1: # %yes
@@ -228,6 +229,7 @@
 ; CHECK-WIN32-64-LABEL: g16xl:
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
+; CHECK-WIN32-64-NEXT:    # kill: def $cx killed $cx def $ecx
 ; CHECK-WIN32-64-NEXT:    testb $8, %cl
 ; CHECK-WIN32-64-NEXT:    jne .LBB5_2
 ; CHECK-WIN32-64-NEXT:  # %bb.1: # %yes
@@ -497,6 +499,7 @@
 ; CHECK-WIN32-64-LABEL: truncand32:
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
+; CHECK-WIN32-64-NEXT:    # kill: def $cx killed $cx def $ecx
 ; CHECK-WIN32-64-NEXT:    testl $2049, %ecx # imm = 0x801
 ; CHECK-WIN32-64-NEXT:    je .LBB11_1
 ; CHECK-WIN32-64-NEXT:  # %bb.2: # %no
@@ -543,6 +546,7 @@
 ; CHECK-WIN32-64-LABEL: testw:
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
+; CHECK-WIN32-64-NEXT:    # kill: def $cx killed $cx def $ecx
 ; CHECK-WIN32-64-NEXT:    testw $2049, %cx # imm = 0x801
 ; CHECK-WIN32-64-NEXT:    jne .LBB12_2
 ; CHECK-WIN32-64-NEXT:  # %bb.1: # %yes

diff --git a/llvm/test/CodeGen/X86/vec_cast.ll b/llvm/test/CodeGen/X86/vec_cast.ll
index e6e5982..f6b1ac1 100644
--- a/llvm/test/CodeGen/X86/vec_cast.ll
+++ b/llvm/test/CodeGen/X86/vec_cast.ll

@@ -37,6 +37,9 @@
 ;
 ; CHECK-WIN-LABEL: b:
 ; CHECK-WIN:       # %bb.0:
+; CHECK-WIN-NEXT:    # kill: def $r8w killed $r8w def $r8d
+; CHECK-WIN-NEXT:    # kill: def $dx killed $dx def $edx
+; CHECK-WIN-NEXT:    # kill: def $cx killed $cx def $ecx
 ; CHECK-WIN-NEXT:    movd %ecx, %xmm0
 ; CHECK-WIN-NEXT:    pinsrw $1, %edx, %xmm0
 ; CHECK-WIN-NEXT:    pinsrw $2, %r8d, %xmm0
@@ -58,6 +61,7 @@
 ;
 ; CHECK-WIN-LABEL: c:
 ; CHECK-WIN:       # %bb.0:
+; CHECK-WIN-NEXT:    # kill: def $cx killed $cx def $ecx
 ; CHECK-WIN-NEXT:    movd %ecx, %xmm0
 ; CHECK-WIN-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
 ; CHECK-WIN-NEXT:    psrad $16, %xmm0
@@ -100,6 +104,9 @@
 ;
 ; CHECK-WIN-LABEL: e:
 ; CHECK-WIN:       # %bb.0:
+; CHECK-WIN-NEXT:    # kill: def $r8w killed $r8w def $r8d
+; CHECK-WIN-NEXT:    # kill: def $dx killed $dx def $edx
+; CHECK-WIN-NEXT:    # kill: def $cx killed $cx def $ecx
 ; CHECK-WIN-NEXT:    movd %ecx, %xmm0
 ; CHECK-WIN-NEXT:    pinsrw $1, %edx, %xmm0
 ; CHECK-WIN-NEXT:    pinsrw $2, %r8d, %xmm0
@@ -121,6 +128,7 @@
 ;
 ; CHECK-WIN-LABEL: f:
 ; CHECK-WIN:       # %bb.0:
+; CHECK-WIN-NEXT:    # kill: def $cx killed $cx def $ecx
 ; CHECK-WIN-NEXT:    movd %ecx, %xmm0
 ; CHECK-WIN-NEXT:    pxor %xmm1, %xmm1
 ; CHECK-WIN-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]

diff --git a/llvm/test/CodeGen/X86/win-smallparams.ll b/llvm/test/CodeGen/X86/win-smallparams.ll
new file mode 100644
index 0000000..93b528a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/win-smallparams.ll

@@ -0,0 +1,67 @@
+; When we accept small parameters on Windows, make sure we do not assume they
+; are zero or sign extended in memory or in registers.
+
+; RUN: llc < %s -mtriple=x86_64-windows-msvc | FileCheck %s --check-prefix=WIN64
+; RUN: llc < %s -mtriple=x86_64-windows-gnu | FileCheck %s --check-prefix=WIN64
+; RUN: llc < %s -mtriple=i686-windows-msvc | FileCheck %s --check-prefix=WIN32
+; RUN: llc < %s -mtriple=i686-windows-gnu | FileCheck %s --check-prefix=WIN32
+
+define void @call() {
+entry:
+  %rv = call i32 @manyargs(i8 1, i16 2, i8 3, i16 4, i8 5, i16 6)
+  ret void
+}
+
+define i32 @manyargs(i8 %a, i16 %b, i8 %c, i16 %d, i8 %e, i16 %f) {
+entry:
+  %aa = sext i8 %a to i32
+  %bb = sext i16 %b to i32
+  %cc = zext i8 %c to i32
+  %dd = zext i16 %d to i32
+  %ee = zext i8 %e to i32
+  %ff = zext i16 %f to i32
+  %t0 = add i32 %aa, %bb
+  %t1 = add i32 %t0, %cc
+  %t2 = add i32 %t1, %dd
+  %t3 = add i32 %t2, %ee
+  %t4 = add i32 %t3, %ff
+  ret i32 %t4
+}
+
+; WIN64-LABEL: call:
+; WIN64-DAG: movw $6, 40(%rsp)
+; WIN64-DAG: movb $5, 32(%rsp)
+; WIN64-DAG: movb $1, %cl
+; WIN64-DAG: movw $2, %dx
+; WIN64-DAG: movb $3, %r8b
+; WIN64-DAG: movw $4, %r9w
+; WIN64: callq manyargs
+
+; WIN64-LABEL: manyargs:
+; WIN64-DAG: movsbl %cl,
+; WIN64-DAG: movswl %dx,
+; WIN64-DAG: movzbl %r8b,
+; WIN64-DAG: movzwl %r9w,
+; WIN64-DAG: movzbl 40(%rsp),
+; WIN64-DAG: movzwl 48(%rsp),
+; WIN64: retq
+
+
+; WIN32-LABEL: _call:
+; WIN32: pushl $6
+; WIN32: pushl $5
+; WIN32: pushl $4
+; WIN32: pushl $3
+; WIN32: pushl $2
+; WIN32: pushl $1
+; WIN32: calll _manyargs
+
+; WIN32-LABEL: _manyargs:
+; WIN32-DAG: movsbl 4(%esp),
+; WIN32-DAG: movswl 8(%esp),
+; WIN32-DAG: movzbl 12(%esp),
+; WIN32-DAG: movzwl 16(%esp),
+; WIN32-DAG: movzbl 20(%esp),
+; WIN32-DAG: movzwl 24(%esp),
+; WIN32: retl
+

diff --git a/llvm/test/CodeGen/X86/win32-bool.ll b/llvm/test/CodeGen/X86/win32-bool.ll
new file mode 100644
index 0000000..53607ea
--- /dev/null
+++ b/llvm/test/CodeGen/X86/win32-bool.ll

@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=i686-windows-msvc | FileCheck %s
+; RUN: llc < %s -mtriple=i686-windows-gnu | FileCheck %s
+
+define x86_fastcallcc i32 @pass_fast_bool(i1 inreg zeroext %b) {
+entry:
+  %cond = select i1 %b, i32 66, i32 0
+  ret i32 %cond
+}
+
+; CHECK-LABEL: @pass_fast_bool@4:
+; CHECK-DAG: testb %cl, %cl
+; CHECK-DAG: movl    $66,
+; CHECK:     retl
+
+define x86_vectorcallcc i32 @pass_vector_bool(i1 inreg zeroext %b) {
+entry:
+  %cond = select i1 %b, i32 66, i32 0
+  ret i32 %cond
+}
+
+; CHECK-LABEL: pass_vector_bool@@4:
+; CHECK-DAG: testb %cl, %cl
+; CHECK-DAG: movl    $66,
+; CHECK:     retl
+
+define zeroext i1 @ret_true() {
+entry:
+  ret i1 true
+}
+
+; CHECK-LABEL: ret_true:
+; CHECK:     movb $1, %al
+; CHECK:     retl

diff --git a/llvm/test/CodeGen/X86/win64-bool.ll b/llvm/test/CodeGen/X86/win64-bool.ll
new file mode 100644
index 0000000..cb77c7e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/win64-bool.ll

@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=x86_64-windows-msvc | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -mtriple=x86_64-windows-gnu | FileCheck %s --check-prefix=CHECK
+
+define i32 @pass_bool(i1 zeroext %b) {
+entry:
+  %cond = select i1 %b, i32 66, i32 0
+  ret i32 %cond
+}
+
+; CHECK-LABEL: pass_bool:
+; CHECK-DAG: testb %cl, %cl
+; CHECK-DAG: movl    $66,
+; CHECK:     cmovel {{.*}}, %eax
+; CHECK:     retq
+
+define zeroext i1 @ret_true() {
+entry:
+  ret i1 true
+}
+
+; CHECK-LABEL: ret_true:
+; CHECK:     movb $1, %al
+; CHECK:     retq

diff --git a/llvm/test/CodeGen/X86/xor.ll b/llvm/test/CodeGen/X86/xor.ll
index 6f0f0df..f73fdb2 100644
--- a/llvm/test/CodeGen/X86/xor.ll
+++ b/llvm/test/CodeGen/X86/xor.ll

@@ -167,6 +167,8 @@
 ;
 ; X64-WIN-LABEL: test5:
 ; X64-WIN:       # %bb.0: # %entry
+; X64-WIN-NEXT:    # kill: def $dx killed $dx def $edx
+; X64-WIN-NEXT:    # kill: def $cx killed $cx def $ecx
 ; X64-WIN-NEXT:    .p2align 4, 0x90
 ; X64-WIN-NEXT:  .LBB4_1: # %bb
 ; X64-WIN-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -427,7 +429,8 @@
 ;
 ; X64-WIN-LABEL: PR17487:
 ; X64-WIN:       # %bb.0:
-; X64-WIN-NEXT:    movd %ecx, %xmm0
+; X64-WIN-NEXT:    movzbl %cl, %eax
+; X64-WIN-NEXT:    movd %eax, %xmm0
 ; X64-WIN-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
 ; X64-WIN-NEXT:    pandn __xmm@{{.*}}(%rip), %xmm0
 ; X64-WIN-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
commit	41fb2dba9cc30d08b19655a46ec133fbb3e24351	[log] [tgz]
author	Reid Kleckner <rnk@google.com>	Mon Mar 26 18:49:48 2018 +0000
committer	Reid Kleckner <rnk@google.com>	Mon Mar 26 18:49:48 2018 +0000
tree	c156b69b86b446ba3dd5dd14ac0584c800c7fe8d
parent	f065390f6c9ee101c2409abcd1503bb850edf51f [diff]