- Remove Tilmann's custom truncate lowering: it completely hosed over
DAGcombine's ability to find reasons to remove truncates when they were not
needed. Consequently, the CellSPU backend would produce correct, but _really
slow and horrible_, code.
Replaced with instruction sequences that do the equivalent truncation in
SPUInstrInfo.td.
- Re-examine how unaligned loads and stores work. Generated unaligned
load code has been tested on the CellSPU hardware; see the i32operations.c
and i64operations.c in CodeGen/CellSPU/useful-harnesses. (While they may be
toy test code, it does prove that some real world code does compile
correctly.)
- Fix truncating stores in bug 3193 (note: unpack_df.ll will still make llc
fault because i64 ult is not yet implemented.)
- Added i64 eq and neq for setcc and select/setcc; started new instruction
information file for them in SPU64InstrInfo.td. Additional i64 operations
should be added to this file and not to SPUInstrInfo.td.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61447 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll
index 4b0a957..9be714e 100644
--- a/test/CodeGen/CellSPU/call_indirect.ll
+++ b/test/CodeGen/CellSPU/call_indirect.ll
@@ -2,7 +2,7 @@
; RUN: llvm-as -o - %s | llc -march=cellspu -mattr=large_mem > %t2.s
; RUN: grep bisl %t1.s | count 7
; RUN: grep ila %t1.s | count 1
-; RUN: grep rotqbyi %t1.s | count 4
+; RUN: grep rotqby %t1.s | count 6
; RUN: grep lqa %t1.s | count 1
; RUN: grep lqd %t1.s | count 12
; RUN: grep dispatch_tab %t1.s | count 5
diff --git a/test/CodeGen/CellSPU/icmp64.ll b/test/CodeGen/CellSPU/icmp64.ll
new file mode 100644
index 0000000..d2b4fc0
--- /dev/null
+++ b/test/CodeGen/CellSPU/icmp64.ll
@@ -0,0 +1,144 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep ceq %t1.s | count 4
+; RUN: grep cgti %t1.s | count 4
+; RUN: grep gb %t1.s | count 4
+; RUN: grep fsm %t1.s | count 2
+; RUN: grep xori %t1.s | count 1
+; RUN: grep selb %t1.s | count 2
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
+; $3 = %arg1, $4 = %val1, $5 = %val2
+;
+; i64 integer comparisons:
+define i64 @icmp_eq_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp eq i64 %arg1, %arg2
+ %B = select i1 %A, i64 %val1, i64 %val2
+ ret i64 %B
+}
+
+define i1 @icmp_eq_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp eq i64 %arg1, %arg2
+ ret i1 %A
+}
+
+define i64 @icmp_ne_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp ne i64 %arg1, %arg2
+ %B = select i1 %A, i64 %val1, i64 %val2
+ ret i64 %B
+}
+
+define i1 @icmp_ne_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+ %A = icmp ne i64 %arg1, %arg2
+ ret i1 %A
+}
+
+;; define i64 @icmp_ugt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+;; entry:
+;; %A = icmp ugt i64 %arg1, %arg2
+;; %B = select i1 %A, i64 %val1, i64 %val2
+;; ret i64 %B
+;; }
+;;
+;; define i1 @icmp_ugt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+;; entry:
+;; %A = icmp ugt i64 %arg1, %arg2
+;; ret i1 %A
+;; }
+;;
+;; define i64 @icmp_uge_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+;; entry:
+;; %A = icmp uge i64 %arg1, %arg2
+;; %B = select i1 %A, i64 %val1, i64 %val2
+;; ret i64 %B
+;; }
+;;
+;; define i1 @icmp_uge_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+;; entry:
+;; %A = icmp uge i64 %arg1, %arg2
+;; ret i1 %A
+;; }
+;;
+;; define i64 @icmp_ult_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+;; entry:
+;; %A = icmp ult i64 %arg1, %arg2
+;; %B = select i1 %A, i64 %val1, i64 %val2
+;; ret i64 %B
+;; }
+;;
+;; define i1 @icmp_ult_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+;; entry:
+;; %A = icmp ult i64 %arg1, %arg2
+;; ret i1 %A
+;; }
+;;
+;; define i64 @icmp_ule_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+;; entry:
+;; %A = icmp ule i64 %arg1, %arg2
+;; %B = select i1 %A, i64 %val1, i64 %val2
+;; ret i64 %B
+;; }
+;;
+;; define i1 @icmp_ule_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+;; entry:
+;; %A = icmp ule i64 %arg1, %arg2
+;; ret i1 %A
+;; }
+;;
+;; define i64 @icmp_sgt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+;; entry:
+;; %A = icmp sgt i64 %arg1, %arg2
+;; %B = select i1 %A, i64 %val1, i64 %val2
+;; ret i64 %B
+;; }
+;;
+;; define i1 @icmp_sgt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+;; entry:
+;; %A = icmp sgt i64 %arg1, %arg2
+;; ret i1 %A
+;; }
+;;
+;; define i64 @icmp_sge_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+;; entry:
+;; %A = icmp sge i64 %arg1, %arg2
+;; %B = select i1 %A, i64 %val1, i64 %val2
+;; ret i64 %B
+;; }
+;;
+;; define i1 @icmp_sge_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+;; entry:
+;; %A = icmp sge i64 %arg1, %arg2
+;; ret i1 %A
+;; }
+;;
+;; define i64 @icmp_slt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+;; entry:
+;; %A = icmp slt i64 %arg1, %arg2
+;; %B = select i1 %A, i64 %val1, i64 %val2
+;; ret i64 %B
+;; }
+;;
+;; define i1 @icmp_slt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+;; entry:
+;; %A = icmp slt i64 %arg1, %arg2
+;; ret i1 %A
+;; }
+;;
+;; define i64 @icmp_sle_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+;; entry:
+;; %A = icmp sle i64 %arg1, %arg2
+;; %B = select i1 %A, i64 %val1, i64 %val2
+;; ret i64 %B
+;; }
+;;
+;; define i1 @icmp_sle_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+;; entry:
+;; %A = icmp sle i64 %arg1, %arg2
+;; ret i1 %A
+;; }
diff --git a/test/CodeGen/CellSPU/stores.ll b/test/CodeGen/CellSPU/stores.ll
index 28d2e5b..f2f35ef 100644
--- a/test/CodeGen/CellSPU/stores.ll
+++ b/test/CodeGen/CellSPU/stores.ll
@@ -3,8 +3,17 @@
; RUN: grep {stqd.*16(\$3)} %t1.s | count 4
; RUN: grep 16256 %t1.s | count 2
; RUN: grep 16384 %t1.s | count 1
+; RUN: grep 771 %t1.s | count 4
+; RUN: grep 515 %t1.s | count 2
+; RUN: grep 1799 %t1.s | count 2
+; RUN: grep 1543 %t1.s | count 5
+; RUN: grep 1029 %t1.s | count 3
; RUN: grep {shli.*, 4} %t1.s | count 4
; RUN: grep stqx %t1.s | count 4
+; RUN: grep ilhu %t1.s | count 11
+; RUN: grep iohl %t1.s | count 8
+; RUN: grep shufb %t1.s | count 15
+; RUN: grep frds %t1.s | count 1
; ModuleID = 'stores.bc'
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
@@ -89,3 +98,54 @@
store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %arrayidx
ret void
}
+
+; Test truncating stores:
+
+define zeroext i8 @tstore_i16_i8(i16 signext %val, i8* %dest) nounwind {
+entry:
+ %conv = trunc i16 %val to i8
+ store i8 %conv, i8* %dest
+ ret i8 %conv
+}
+
+define zeroext i8 @tstore_i32_i8(i32 %val, i8* %dest) nounwind {
+entry:
+ %conv = trunc i32 %val to i8
+ store i8 %conv, i8* %dest
+ ret i8 %conv
+}
+
+define signext i16 @tstore_i32_i16(i32 %val, i16* %dest) nounwind {
+entry:
+ %conv = trunc i32 %val to i16
+ store i16 %conv, i16* %dest
+ ret i16 %conv
+}
+
+define zeroext i8 @tstore_i64_i8(i64 %val, i8* %dest) nounwind {
+entry:
+ %conv = trunc i64 %val to i8
+ store i8 %conv, i8* %dest
+ ret i8 %conv
+}
+
+define signext i16 @tstore_i64_i16(i64 %val, i16* %dest) nounwind {
+entry:
+ %conv = trunc i64 %val to i16
+ store i16 %conv, i16* %dest
+ ret i16 %conv
+}
+
+define i32 @tstore_i64_i32(i64 %val, i32* %dest) nounwind {
+entry:
+ %conv = trunc i64 %val to i32
+ store i32 %conv, i32* %dest
+ ret i32 %conv
+}
+
+define float @tstore_f64_f32(double %val, float* %dest) nounwind {
+entry:
+ %conv = fptrunc double %val to float
+ store float %conv, float* %dest
+ ret float %conv
+}
diff --git a/test/CodeGen/CellSPU/struct_1.ll b/test/CodeGen/CellSPU/struct_1.ll
index 3df7267..82d319d 100644
--- a/test/CodeGen/CellSPU/struct_1.ll
+++ b/test/CodeGen/CellSPU/struct_1.ll
@@ -35,7 +35,7 @@
; int i2; // offset 12 [ignored]
; unsigned char c4; // offset 16 [ignored]
; unsigned char c5; // offset 17 [ignored]
-; unsigned char c6; // offset 18 [ignored]
+; unsigned char c6; // offset 18 (rotate left by 14 bytes to byte 3)
; unsigned char c7; // offset 19 (no rotate, in preferred slot)
; int i3; // offset 20 [ignored]
; int i4; // offset 24 [ignored]
diff --git a/test/CodeGen/CellSPU/trunc.ll b/test/CodeGen/CellSPU/trunc.ll
index 845feed..1c6e1f6 100644
--- a/test/CodeGen/CellSPU/trunc.ll
+++ b/test/CodeGen/CellSPU/trunc.ll
@@ -1,16 +1,12 @@
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
-; RUN: grep shufb %t1.s | count 9
+; RUN: grep shufb %t1.s | count 10
; RUN: grep {ilhu.*1799} %t1.s | count 1
-; RUN: grep {ilhu.*771} %t1.s | count 3
+; RUN: grep {ilhu.*771} %t1.s | count 1
; RUN: grep {ilhu.*1543} %t1.s | count 1
; RUN: grep {ilhu.*1029} %t1.s | count 1
-; RUN: grep {ilhu.*515} %t1.s | count 1
-; RUN: grep {iohl.*1799} %t1.s | count 1
-; RUN: grep {iohl.*771} %t1.s | count 3
-; RUN: grep {iohl.*1543} %t1.s | count 2
-; RUN: grep {iohl.*515} %t1.s | count 1
-; RUN: grep xsbh %t1.s | count 6
-; RUN: grep sfh %t1.s | count 5
+; RUN: grep {ilhu.*515} %t1.s | count 2
+; RUN: grep xsbh %t1.s | count 2
+; RUN: grep sfh %t1.s | count 1
; ModuleID = 'trunc.bc'
target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128"
@@ -41,23 +37,22 @@
; ret i64 %0
;}
-define i8 @trunc_i64_i8(i64 %u, i8 %v) nounwind readnone {
+define <16 x i8> @trunc_i64_i8(i64 %u, <16 x i8> %v) nounwind readnone {
entry:
%0 = trunc i64 %u to i8
- %1 = sub i8 %0, %v
- ret i8 %1
+ %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 10
+ ret <16 x i8> %tmp1
}
-define i16 @trunc_i64_i16(i64 %u, i16 %v) nounwind readnone {
+define <8 x i16> @trunc_i64_i16(i64 %u, <8 x i16> %v) nounwind readnone {
entry:
%0 = trunc i64 %u to i16
- %1 = sub i16 %0, %v
- ret i16 %1
+ %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 6
+ ret <8 x i16> %tmp1
}
define i32 @trunc_i64_i32(i64 %u, i32 %v) nounwind readnone {
entry:
%0 = trunc i64 %u to i32
- %1 = sub i32 %0, %v
- ret i32 %1
+ ret i32 %0
}
define i8 @trunc_i32_i8(i32 %u, i8 %v) nounwind readnone {
@@ -66,16 +61,16 @@
%1 = sub i8 %0, %v
ret i8 %1
}
-define i16 @trunc_i32_i16(i32 %u, i16 %v) nounwind readnone {
+define <8 x i16> @trunc_i32_i16(i32 %u, <8 x i16> %v) nounwind readnone {
entry:
%0 = trunc i32 %u to i16
- %1 = sub i16 %0, %v
- ret i16 %1
+ %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 3
+ ret <8 x i16> %tmp1
}
-define i8 @trunc_i16_i8(i16 %u, i8 %v) nounwind readnone {
+define <16 x i8> @trunc_i16_i8(i16 %u, <16 x i8> %v) nounwind readnone {
entry:
%0 = trunc i16 %u to i8
- %1 = sub i8 %0, %v
- ret i8 %1
+ %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 5
+ ret <16 x i8> %tmp1
}
diff --git a/test/CodeGen/CellSPU/useful-harnesses/i32operations.c b/test/CodeGen/CellSPU/useful-harnesses/i32operations.c
new file mode 100644
index 0000000..12fc30b
--- /dev/null
+++ b/test/CodeGen/CellSPU/useful-harnesses/i32operations.c
@@ -0,0 +1,69 @@
+#include <stdio.h>
+
+typedef unsigned int uint32_t;
+typedef int int32_t;
+
+const char *boolstring(int val) {
+ return val ? "true" : "false";
+}
+
+int i32_eq(int32_t a, int32_t b) {
+ return (a == b);
+}
+
+int i32_neq(int32_t a, int32_t b) {
+ return (a != b);
+}
+
+int32_t i32_eq_select(int32_t a, int32_t b, int32_t c, int32_t d) {
+ return ((a == b) ? c : d);
+}
+
+int32_t i32_neq_select(int32_t a, int32_t b, int32_t c, int32_t d) {
+ return ((a != b) ? c : d);
+}
+
+struct pred_s {
+ const char *name;
+ int (*predfunc)(int32_t, int32_t);
+ int (*selfunc)(int32_t, int32_t, int32_t, int32_t);
+};
+
+struct pred_s preds[] = {
+ { "eq", i32_eq, i32_eq_select },
+ { "neq", i32_neq, i32_neq_select }
+};
+
+int main(void) {
+ int i;
+ int32_t a = 1234567890;
+ int32_t b = 345678901;
+ int32_t c = 1234500000;
+ int32_t d = 10001;
+ int32_t e = 10000;
+
+ printf("a = %12d (0x%08x)\n", a, a);
+ printf("b = %12d (0x%08x)\n", b, b);
+ printf("c = %12d (0x%08x)\n", c, c);
+ printf("d = %12d (0x%08x)\n", d, d);
+ printf("e = %12d (0x%08x)\n", e, e);
+ printf("----------------------------------------\n");
+
+ for (i = 0; i < sizeof(preds)/sizeof(preds[0]); ++i) {
+ printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a)));
+ printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a)));
+ printf("a %s b = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, b)));
+ printf("a %s c = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, c)));
+ printf("d %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(d, e)));
+ printf("e %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(e, e)));
+
+ printf("a %s a ? c : d = %d\n", preds[i].name, (*preds[i].selfunc)(a, a, c, d));
+ printf("a %s a ? c : d == c (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, a, c, d) == c));
+ printf("a %s b ? c : d = %d\n", preds[i].name, (*preds[i].selfunc)(a, b, c, d));
+ printf("a %s b ? c : d == d (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, b, c, d) == d));
+
+ printf("----------------------------------------\n");
+ }
+
+ return 0;
+}
diff --git a/test/CodeGen/CellSPU/useful-harnesses/i64operations.c b/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
new file mode 100644
index 0000000..7b86070
--- /dev/null
+++ b/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
@@ -0,0 +1,68 @@
+#include <stdio.h>
+
+typedef unsigned long long int uint64_t;
+typedef long long int int64_t;
+
+const char *boolstring(int val) {
+ return val ? "true" : "false";
+}
+
+int i64_eq(int64_t a, int64_t b) {
+ return (a == b);
+}
+
+int i64_neq(int64_t a, int64_t b) {
+ return (a != b);
+}
+
+int64_t i64_eq_select(int64_t a, int64_t b, int64_t c, int64_t d) {
+ return ((a == b) ? c : d);
+}
+
+int64_t i64_neq_select(int64_t a, int64_t b, int64_t c, int64_t d) {
+ return ((a != b) ? c : d);
+}
+
+struct pred_s {
+ const char *name;
+ int (*predfunc)(int64_t, int64_t);
+ int64_t (*selfunc)(int64_t, int64_t, int64_t, int64_t);
+};
+
+struct pred_s preds[] = {
+ { "eq", i64_eq, i64_eq_select },
+ { "neq", i64_neq, i64_neq_select }
+};
+
+int main(void) {
+ int i;
+ int64_t a = 1234567890000LL;
+ int64_t b = 2345678901234LL;
+ int64_t c = 1234567890001LL;
+ int64_t d = 10001LL;
+ int64_t e = 10000LL;
+
+ printf("a = %16lld (0x%016llx)\n", a, a);
+ printf("b = %16lld (0x%016llx)\n", b, b);
+ printf("c = %16lld (0x%016llx)\n", c, c);
+ printf("d = %16lld (0x%016llx)\n", d, d);
+ printf("e = %16lld (0x%016llx)\n", e, e);
+ printf("----------------------------------------\n");
+
+ for (i = 0; i < sizeof(preds)/sizeof(preds[0]); ++i) {
+ printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a)));
+ printf("a %s b = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, b)));
+ printf("a %s c = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, c)));
+ printf("d %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(d, e)));
+ printf("e %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(e, e)));
+
+ printf("a %s a ? c : d = %lld\n", preds[i].name, (*preds[i].selfunc)(a, a, c, d));
+ printf("a %s a ? c : d == c (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, a, c, d) == c));
+ printf("a %s b ? c : d = %lld\n", preds[i].name, (*preds[i].selfunc)(a, b, c, d));
+ printf("a %s b ? c : d == d (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, b, c, d) == d));
+
+ printf("----------------------------------------\n");
+ }
+
+ return 0;
+}