[CostModel][X86] Fixed vXi8 uniform shift costs. The 'fast' costs should only work for shifts by uniform constants (uniform non-constant are lowered using the slow default implementation). Logical shifts were not taking into account that we must mask the psrlw result, so the costs needed to be doubled. Added missing AVX2/AVX512BW costs as well. llvm-svn: 291391

commit: 9c58950eeb0ac0ffe86b0ebdd43b2042d4fb9de6 [log] [tgz]
author: Simon Pilgrim <llvm-dev@redking.me.uk> Sun Jan 08 14:14:36 2017 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> Sun Jan 08 14:14:36 2017 +0000
tree: 412bdbf49e6ae2cbe5205b0168d39f6399891dd9
parent: 1fa5487c0529c80a66565ba99111644532c49aff [diff]
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index f5ebfa0..107ed93 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp

@@ -144,6 +144,10 @@
   }
 
   static const CostTblEntry AVX512BWUniformConstCostTable[] = {
+    { ISD::SHL,  MVT::v64i8,   2 }, // psllw + pand.
+    { ISD::SRL,  MVT::v64i8,   2 }, // psrlw + pand.
+    { ISD::SRA,  MVT::v64i8,   4 }, // psrlw, pand, pxor, psubb.
+
     { ISD::SDIV, MVT::v32i16,  6 }, // vpmulhw sequence
     { ISD::UDIV, MVT::v32i16,  6 }, // vpmulhuw sequence
   };
@@ -168,6 +172,10 @@
   }
 
   static const CostTblEntry AVX2UniformConstCostTable[] = {
+    { ISD::SHL,  MVT::v32i8,   2 }, // psllw + pand.
+    { ISD::SRL,  MVT::v32i8,   2 }, // psrlw + pand.
+    { ISD::SRA,  MVT::v32i8,   4 }, // psrlw, pand, pxor, psubb.
+
     { ISD::SRA,  MVT::v4i64,   4 }, // 2 x psrad + shuffle.
 
     { ISD::SDIV, MVT::v16i16,  6 }, // vpmulhw sequence
@@ -184,6 +192,14 @@
   }
 
   static const CostTblEntry SSE2UniformConstCostTable[] = {
+    { ISD::SHL,  MVT::v16i8,   2 }, // psllw + pand.
+    { ISD::SRL,  MVT::v16i8,   2 }, // psrlw + pand.
+    { ISD::SRA,  MVT::v16i8,   4 }, // psrlw, pand, pxor, psubb.
+
+    { ISD::SHL,  MVT::v32i8,   4 }, // 2*(psllw + pand).
+    { ISD::SRL,  MVT::v32i8,   4 }, // 2*(psrlw + pand).
+    { ISD::SRA,  MVT::v32i8,   8 }, // 2*(psrlw, pand, pxor, psubb).
+
     { ISD::SDIV, MVT::v16i16, 12 }, // pmulhw sequence
     { ISD::SDIV, MVT::v8i16,   6 }, // pmulhw sequence
     { ISD::UDIV, MVT::v16i16, 12 }, // pmulhuw sequence
@@ -364,20 +380,14 @@
 
   static const CostTblEntry SSE2UniformShiftCostTable[] = {
     // Uniform splats are cheaper for the following instructions.
-    { ISD::SHL,  MVT::v16i8,  1 }, // psllw.
-    { ISD::SHL,  MVT::v32i8,  2 }, // psllw.
     { ISD::SHL,  MVT::v16i16, 2 }, // psllw.
     { ISD::SHL,  MVT::v8i32,  2 }, // pslld
     { ISD::SHL,  MVT::v4i64,  2 }, // psllq.
 
-    { ISD::SRL,  MVT::v16i8,  1 }, // psrlw.
-    { ISD::SRL,  MVT::v32i8,  2 }, // psrlw.
     { ISD::SRL,  MVT::v16i16, 2 }, // psrlw.
     { ISD::SRL,  MVT::v8i32,  2 }, // psrld.
     { ISD::SRL,  MVT::v4i64,  2 }, // psrlq.
 
-    { ISD::SRA,  MVT::v16i8,  4 }, // psrlw, pand, pxor, psubb.
-    { ISD::SRA,  MVT::v32i8,  8 }, // psrlw, pand, pxor, psubb.
     { ISD::SRA,  MVT::v16i16, 2 }, // psraw.
     { ISD::SRA,  MVT::v8i32,  2 }, // psrad.
     { ISD::SRA,  MVT::v2i64,  4 }, // 2 x psrad + shuffle.

diff --git a/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll b/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll
index 52f176f..e5fff9b 100644
--- a/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll
+++ b/llvm/test/Analysis/CostModel/X86/testshiftlshr.ll

@@ -498,7 +498,7 @@
 define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) {
 entry:
   ; SSE2: shift16i8c
-  ; SSE2: cost of 1 {{.*}} lshr
+  ; SSE2: cost of 2 {{.*}} lshr
   ; SSE2-CODEGEN: shift16i8c
   ; SSE2-CODEGEN: psrlw $3
 
@@ -513,7 +513,7 @@
 define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) {
 entry:
   ; SSE2: shift32i8c
-  ; SSE2: cost of 2 {{.*}} lshr
+  ; SSE2: cost of 4 {{.*}} lshr
   ; SSE2-CODEGEN: shift32i8c
   ; SSE2-CODEGEN: psrlw $3
 

diff --git a/llvm/test/Analysis/CostModel/X86/testshiftshl.ll b/llvm/test/Analysis/CostModel/X86/testshiftshl.ll
index e385c5b..6628b9b 100644
--- a/llvm/test/Analysis/CostModel/X86/testshiftshl.ll
+++ b/llvm/test/Analysis/CostModel/X86/testshiftshl.ll

@@ -498,7 +498,7 @@
 define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) {
 entry:
   ; SSE2: shift16i8c
-  ; SSE2: cost of 1 {{.*}} shl
+  ; SSE2: cost of 2 {{.*}} shl
   ; SSE2-CODEGEN: shift16i8c
   ; SSE2-CODEGEN: psllw $3
 
@@ -513,7 +513,7 @@
 define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) {
 entry:
   ; SSE2: shift32i8c
-  ; SSE2: cost of 2 {{.*}} shl
+  ; SSE2: cost of 4 {{.*}} shl
   ; SSE2-CODEGEN: shift32i8c
   ; SSE2-CODEGEN: psllw $3
 

diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
index 3b87e6a..6756f3b 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll

@@ -606,7 +606,7 @@
 ; AVX: Found an estimated cost of 4 for instruction:   %shift
 ; AVX2: Found an estimated cost of 4 for instruction:   %shift
 ; AVX512: Found an estimated cost of 4 for instruction:   %shift
-; XOP: Found an estimated cost of 2 for instruction:   %shift
+; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
 }
@@ -616,9 +616,10 @@
 ; SSE2: Found an estimated cost of 8 for instruction:   %shift
 ; SSE41: Found an estimated cost of 8 for instruction:   %shift
 ; AVX: Found an estimated cost of 8 for instruction:   %shift
-; AVX2: Found an estimated cost of 8 for instruction:   %shift
-; AVX512: Found an estimated cost of 8 for instruction:   %shift
-; XOP: Found an estimated cost of 4 for instruction:   %shift
+; AVX2: Found an estimated cost of 4 for instruction:   %shift
+; AVX512: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 8 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 4 for instruction:   %shift
   %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <32 x i8> %shift
 }
@@ -628,10 +629,11 @@
 ; SSE2: Found an estimated cost of 16 for instruction:   %shift
 ; SSE41: Found an estimated cost of 16 for instruction:   %shift
 ; AVX: Found an estimated cost of 16 for instruction:   %shift
-; AVX2: Found an estimated cost of 16 for instruction:   %shift
-; AVX512F: Found an estimated cost of 16 for instruction:   %shift
-; AVX512BW: Found an estimated cost of 2 for instruction:   %shift
-; XOP: Found an estimated cost of 8 for instruction:   %shift
+; AVX2: Found an estimated cost of 8 for instruction:   %shift
+; AVX512F: Found an estimated cost of 8 for instruction:   %shift
+; AVX512BW: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 16 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 8 for instruction:   %shift
   %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <64 x i8> %shift
 }

diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
index 2717fcf..63e6db1 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll

@@ -611,11 +611,11 @@
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
-; SSE2: Found an estimated cost of 1 for instruction:   %shift
-; SSE41: Found an estimated cost of 1 for instruction:   %shift
-; AVX: Found an estimated cost of 1 for instruction:   %shift
-; AVX2: Found an estimated cost of 1 for instruction:   %shift
-; AVX512: Found an estimated cost of 1 for instruction:   %shift
+; SSE2: Found an estimated cost of 2 for instruction:   %shift
+; SSE41: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX2: Found an estimated cost of 2 for instruction:   %shift
+; AVX512: Found an estimated cost of 2 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
@@ -623,25 +623,27 @@
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8':
-; SSE2: Found an estimated cost of 2 for instruction:   %shift
-; SSE41: Found an estimated cost of 2 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; SSE2: Found an estimated cost of 4 for instruction:   %shift
+; SSE41: Found an estimated cost of 4 for instruction:   %shift
+; AVX: Found an estimated cost of 4 for instruction:   %shift
 ; AVX2: Found an estimated cost of 2 for instruction:   %shift
 ; AVX512: Found an estimated cost of 2 for instruction:   %shift
-; XOP: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction:   %shift
   %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <32 x i8> %shift
 }
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8':
-; SSE2: Found an estimated cost of 4 for instruction:   %shift
-; SSE41: Found an estimated cost of 4 for instruction:   %shift
-; AVX: Found an estimated cost of 4 for instruction:   %shift
+; SSE2: Found an estimated cost of 8 for instruction:   %shift
+; SSE41: Found an estimated cost of 8 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
 ; AVX2: Found an estimated cost of 4 for instruction:   %shift
 ; AVX512F: Found an estimated cost of 4 for instruction:   %shift
 ; AVX512BW: Found an estimated cost of 2 for instruction:   %shift
-; XOP: Found an estimated cost of 8 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 8 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 4 for instruction:   %shift
   %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <64 x i8> %shift
 }

diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
index 9898222..8c42bd6 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll

@@ -616,37 +616,39 @@
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
-; SSE2: Found an estimated cost of 1 for instruction:   %shift
-; SSE41: Found an estimated cost of 1 for instruction:   %shift
-; AVX: Found an estimated cost of 1 for instruction:   %shift
-; AVX2: Found an estimated cost of 1 for instruction:   %shift
-; AVX512: Found an estimated cost of 1 for instruction:   %shift
-; XOP: Found an estimated cost of 1 for instruction:   %shift
-  %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
-  ret <16 x i8> %shift
-}
-
-define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
-; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8':
 ; SSE2: Found an estimated cost of 2 for instruction:   %shift
 ; SSE41: Found an estimated cost of 2 for instruction:   %shift
 ; AVX: Found an estimated cost of 2 for instruction:   %shift
 ; AVX2: Found an estimated cost of 2 for instruction:   %shift
 ; AVX512: Found an estimated cost of 2 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %shift
+}
+
+define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8':
+; SSE2: Found an estimated cost of 4 for instruction:   %shift
+; SSE41: Found an estimated cost of 4 for instruction:   %shift
+; AVX: Found an estimated cost of 4 for instruction:   %shift
+; AVX2: Found an estimated cost of 2 for instruction:   %shift
+; AVX512: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction:   %shift
   %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <32 x i8> %shift
 }
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8':
-; SSE2: Found an estimated cost of 4 for instruction:   %shift
-; SSE41: Found an estimated cost of 4 for instruction:   %shift
-; AVX: Found an estimated cost of 4 for instruction:   %shift
+; SSE2: Found an estimated cost of 8 for instruction:   %shift
+; SSE41: Found an estimated cost of 8 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
 ; AVX2: Found an estimated cost of 4 for instruction:   %shift
 ; AVX512F: Found an estimated cost of 4 for instruction:   %shift
 ; AVX512BW: Found an estimated cost of 2 for instruction:   %shift
-; XOP: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 8 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 4 for instruction:   %shift
   %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <64 x i8> %shift
 }
commit	9c58950eeb0ac0ffe86b0ebdd43b2042d4fb9de6	[log] [tgz]
author	Simon Pilgrim <llvm-dev@redking.me.uk>	Sun Jan 08 14:14:36 2017 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	Sun Jan 08 14:14:36 2017 +0000
tree	412bdbf49e6ae2cbe5205b0168d39f6399891dd9
parent	1fa5487c0529c80a66565ba99111644532c49aff [diff]