[PowerPC] Adjust load/store costs in PPCTTI This provides more realistic costs for the insert/extractelement instructions (which are load/store pairs), accounts for the cheap unaligned Altivec load sequence, and for unaligned VSX load/stores. Bad news: MultiSource/Applications/sgefa/sgefa - 35% slowdown (this will require more investigation) SingleSource/Benchmarks/McGill/queens - 20% slowdown (we no longer vectorize this, but it was a constant store that was scalarized) MultiSource/Benchmarks/FreeBench/pcompress2/pcompress2 - 2% slowdown Good news: SingleSource/Benchmarks/Shootout/ary3 - 54% speedup SingleSource/Benchmarks/Shootout-C++/ary - 40% speedup MultiSource/Benchmarks/Ptrdist/ks/ks - 35% speedup MultiSource/Benchmarks/FreeBench/neural/neural - 30% speedup MultiSource/Benchmarks/TSVC/Symbolics-flt/Symbolics-flt - 20% speedup Unfortunately, estimating the costs of the stack-based scalarization sequences is hard, and adjusting these costs is like a game of whac-a-mole :( I'll revisit this again after we have better codegen for vector extloads and truncstores and unaligned load/stores. llvm-svn: 205658

commit: de0b413ec03d7df83cb2e0896b4ce2c19c6373cf [log] [tgz]
author: Hal Finkel <hfinkel@anl.gov> Fri Apr 04 23:51:18 2014 +0000
committer: Hal Finkel <hfinkel@anl.gov> Fri Apr 04 23:51:18 2014 +0000
tree: 6ec2df6c43a2a90c10c55b8150304263bcc1ba49
parent: b1308d525c133f256ac8e2f13ec2d3b589a2f3fc [diff]
diff --git a/llvm/test/Analysis/CostModel/PowerPC/ext.ll b/llvm/test/Analysis/CostModel/PowerPC/ext.ll
index daaa8f5..7d6a14e 100644
--- a/llvm/test/Analysis/CostModel/PowerPC/ext.ll
+++ b/llvm/test/Analysis/CostModel/PowerPC/ext.ll

@@ -13,7 +13,7 @@
   ; CHECK: cost of 1 {{.*}} sext
   %v3 = sext <4 x i16> undef to <4 x i32>
 
-  ; CHECK: cost of 216 {{.*}} sext
+  ; CHECK: cost of 112 {{.*}} sext
   %v4 = sext <8 x i16> undef to <8 x i32>
 
   ret void

diff --git a/llvm/test/Analysis/CostModel/PowerPC/insert_extract.ll b/llvm/test/Analysis/CostModel/PowerPC/insert_extract.ll
index f51963d..8dc0031 100644
--- a/llvm/test/Analysis/CostModel/PowerPC/insert_extract.ll
+++ b/llvm/test/Analysis/CostModel/PowerPC/insert_extract.ll

@@ -3,13 +3,13 @@
 target triple = "powerpc64-unknown-linux-gnu"
 
 define i32 @insert(i32 %arg) {
-  ; CHECK: cost of 13 {{.*}} insertelement
+  ; CHECK: cost of 10 {{.*}} insertelement
   %x = insertelement <4 x i32> undef, i32 %arg, i32 0
   ret i32 undef
 }
 
 define i32 @extract(<4 x i32> %arg) {
-  ; CHECK: cost of 13 {{.*}} extractelement
+  ; CHECK: cost of 3 {{.*}} extractelement
   %x = extractelement <4 x i32> %arg, i32 0
   ret i32 %x
 }

diff --git a/llvm/test/Analysis/CostModel/PowerPC/load_store.ll b/llvm/test/Analysis/CostModel/PowerPC/load_store.ll
index 8145a1d..4086278 100644
--- a/llvm/test/Analysis/CostModel/PowerPC/load_store.ll
+++ b/llvm/test/Analysis/CostModel/PowerPC/load_store.ll

@@ -31,9 +31,12 @@
 
   ; FIXME: There actually are sub-vector Altivec loads, and so we could handle
   ; this with a small expense, but we don't currently.
-  ; CHECK: cost of 60 {{.*}} load
+  ; CHECK: cost of 48 {{.*}} load
   load <4 x i16>* undef, align 2
 
+  ; CHECK: cost of 1 {{.*}} load
+  load <4 x i32>* undef, align 4
+
   ret i32 undef
 }
commit	de0b413ec03d7df83cb2e0896b4ce2c19c6373cf	[log] [tgz]
author	Hal Finkel <hfinkel@anl.gov>	Fri Apr 04 23:51:18 2014 +0000
committer	Hal Finkel <hfinkel@anl.gov>	Fri Apr 04 23:51:18 2014 +0000
tree	6ec2df6c43a2a90c10c55b8150304263bcc1ba49
parent	b1308d525c133f256ac8e2f13ec2d3b589a2f3fc [diff]