Optimized loading (zextload) of i1 value from memory.
This patch is a partial revert of https://llvm.org/svn/llvm-project/llvm/trunk@237793.
Extra "and" causes performance degradation.
We assume that i1 is stored in zero-extended form. And store operation is responsible for zeroing upper bits.
Differential Revision: http://reviews.llvm.org/D17541
llvm-svn: 261828
diff --git a/llvm/test/CodeGen/X86/and-encoding.ll b/llvm/test/CodeGen/X86/and-encoding.ll
index f7bbac2..1a90bd0 100644
--- a/llvm/test/CodeGen/X86/and-encoding.ll
+++ b/llvm/test/CodeGen/X86/and-encoding.ll
@@ -15,27 +15,18 @@
ret void
}
-define void @f2(i1 *%x, i16 *%y) {
+define void @f2(i16 %x, i1 *%y) {
; CHECK-LABEL: f2:
-; CHECK: andl $1, %eax # encoding: [0x83,0xe0,0x01]
- %a = load i1, i1* %x
- %b = zext i1 %a to i16
- store i16 %b, i16* %y
+; CHECK: andl $1, %edi # encoding: [0x83,0xe7,0x01]
+ %c = trunc i16 %x to i1
+ store i1 %c, i1* %y
ret void
}
-define i32 @f3(i1 *%x) {
+define void @f3(i32 %x, i1 *%y) {
; CHECK-LABEL: f3:
-; CHECK: andl $1, %eax # encoding: [0x83,0xe0,0x01]
- %a = load i1, i1* %x
- %b = zext i1 %a to i32
- ret i32 %b
-}
-
-define i64 @f4(i1 *%x) {
-; CHECK-LABEL: f4:
-; CHECK: andl $1, %eax # encoding: [0x83,0xe0,0x01]
- %a = load i1, i1* %x
- %b = zext i1 %a to i64
- ret i64 %b
+; CHECK: andl $1, %edi # encoding: [0x83,0xe7,0x01]
+ %c = trunc i32 %x to i1
+ store i1 %c, i1* %y
+ ret void
}
diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index 41ec62c..ff98524 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -190,7 +190,6 @@
;CHECK-LABEL: test15
;CHECK: movb (%rdi), %al
-;CHECK: andb $1, %al
;CHECK: movw $-1, %ax
;CHECK: cmovew
define i16 @test15(i1 *%addr) {
@@ -202,7 +201,6 @@
;CHECK-LABEL: test16
;CHECK: movb (%rdi), %al
-;CHECK: andw $1, %ax
;CHECK: kmovw
;CHECK: kshiftlw $10
;CHECK: korw
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 66307cd..10a314d 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -1586,7 +1586,6 @@
; KNL-LABEL: f1:
; KNL: ## BB#0: ## %entry
; KNL-NEXT: movzbl {{.*}}(%rip), %edi
-; KNL-NEXT: andl $1, %edi
; KNL-NEXT: movl %edi, %eax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k0
@@ -1601,7 +1600,6 @@
; SKX-LABEL: f1:
; SKX: ## BB#0: ## %entry
; SKX-NEXT: movzbl {{.*}}(%rip), %edi
-; SKX-NEXT: andl $1, %edi
; SKX-NEXT: movl %edi, %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: kmovw %eax, %k0
@@ -1622,3 +1620,24 @@
declare void @f2(i32) #1
+define void @store_i16_i1(i16 %x, i1 *%y) {
+; CHECK-LABEL: store_i16_i1:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: movb %dil, (%rsi)
+; CHECK-NEXT: retq
+ %c = trunc i16 %x to i1
+ store i1 %c, i1* %y
+ ret void
+}
+
+define void @store_i8_i1(i8 %x, i1 *%y) {
+; CHECK-LABEL: store_i8_i1:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: movb %dil, (%rsi)
+; CHECK-NEXT: retq
+ %c = trunc i8 %x to i1
+ store i1 %c, i1* %y
+ ret void
+}
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index a05d805..45b44ed 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -1466,7 +1466,7 @@
; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm1
; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al
; SKX-NEXT: # implicit-def: %XMM0
-; SKX-NEXT: andb $1, %al
+; SKX-NEXT: testb %al, %al
; SKX-NEXT: je .LBB29_2
; SKX-NEXT: # BB#1: # %cond.load
; SKX-NEXT: vmovq %xmm1, %rax
@@ -1474,7 +1474,7 @@
; SKX-NEXT: .LBB29_2: # %else
; SKX-NEXT: kmovb %k1, -{{[0-9]+}}(%rsp)
; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al
-; SKX-NEXT: andb $1, %al
+; SKX-NEXT: testb %al, %al
; SKX-NEXT: je .LBB29_4
; SKX-NEXT: # BB#3: # %cond.load1
; SKX-NEXT: vpextrq $1, %xmm1, %rax
@@ -1482,7 +1482,7 @@
; SKX-NEXT: .LBB29_4: # %else2
; SKX-NEXT: kmovb %k1, -{{[0-9]+}}(%rsp)
; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al
-; SKX-NEXT: andb $1, %al
+; SKX-NEXT: testb %al, %al
; SKX-NEXT: je .LBB29_6
; SKX-NEXT: # BB#5: # %cond.load4
; SKX-NEXT: vextracti128 $1, %ymm1, %xmm1
@@ -1505,7 +1505,7 @@
; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm2
; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al
; SKX_32-NEXT: # implicit-def: %XMM1
-; SKX_32-NEXT: andb $1, %al
+; SKX_32-NEXT: testb %al, %al
; SKX_32-NEXT: je .LBB29_2
; SKX_32-NEXT: # BB#1: # %cond.load
; SKX_32-NEXT: vmovd %xmm2, %eax
@@ -1513,7 +1513,7 @@
; SKX_32-NEXT: .LBB29_2: # %else
; SKX_32-NEXT: kmovb %k1, {{[0-9]+}}(%esp)
; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al
-; SKX_32-NEXT: andb $1, %al
+; SKX_32-NEXT: testb %al, %al
; SKX_32-NEXT: je .LBB29_4
; SKX_32-NEXT: # BB#3: # %cond.load1
; SKX_32-NEXT: vpextrd $1, %xmm2, %eax
@@ -1522,7 +1522,7 @@
; SKX_32-NEXT: vmovdqa32 {{[0-9]+}}(%esp), %xmm0
; SKX_32-NEXT: kmovb %k1, (%esp)
; SKX_32-NEXT: movb (%esp), %al
-; SKX_32-NEXT: andb $1, %al
+; SKX_32-NEXT: testb %al, %al
; SKX_32-NEXT: je .LBB29_6
; SKX_32-NEXT: # BB#5: # %cond.load4
; SKX_32-NEXT: vpextrd $2, %xmm2, %eax
diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
index 609e2cc..5b6e773 100644
--- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
@@ -811,8 +811,6 @@
;
; Load the value of b.
; CHECK: movb _b(%rip), [[BOOL:%cl]]
-; Extract i1 from the loaded value.
-; CHECK-NEXT: andb $1, [[BOOL]]
; Create the zero value for the select assignment.
; CHECK-NEXT: xorl [[CMOVE_VAL:%eax]], [[CMOVE_VAL]]
; CHECK-NEXT: testb [[BOOL]], [[BOOL]]