Add DAGCombiner load combine tests for partially available values
If some of the trailing or leading bytes of a load combine pattern are zeroes we can combine the pattern to a load + zext and shift. Currently we don't support it, so the tests check the current codegen without load combine. This change will make the patch to support this kind of combine a bit more clear.
llvm-svn: 294591
diff --git a/llvm/test/CodeGen/AArch64/load-combine.ll b/llvm/test/CodeGen/AArch64/load-combine.ll
index 04de21f..59622fc 100644
--- a/llvm/test/CodeGen/AArch64/load-combine.ll
+++ b/llvm/test/CodeGen/AArch64/load-combine.ll
@@ -415,4 +415,139 @@
%tmp47 = shl nuw i32 %tmp46, 24
%tmp48 = or i32 %tmp42, %tmp47
ret i32 %tmp48
-}
\ No newline at end of file
+}
+
+; i8* p; // p is 2 byte aligned
+; (i32) p[0] | ((i32) p[1] << 8)
+define i32 @zext_load_i32_by_i8(i32* %arg) {
+; CHECK-LABEL: zext_load_i32_by_i8:
+; CHECK: ldrb w8, [x0]
+; CHECK-NEXT: ldrb w9, [x0, #1]
+; CHECK-NEXT: bfi w8, w9, #8, #8
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: ret
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
+ %tmp2 = load i8, i8* %tmp1, align 2
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ ret i32 %tmp8
+}
+
+; i8* p; // p is 2 byte aligned
+; ((i32) p[0] << 8) | ((i32) p[1] << 16)
+define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) {
+; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
+; CHECK: ldrb w8, [x0]
+; CHECK-NEXT: ldrb w9, [x0, #1]
+; CHECK-NEXT: lsl w0, w8, #8
+; CHECK-NEXT: bfi w0, w9, #16, #8
+; CHECK-NEXT: ret
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
+ %tmp2 = load i8, i8* %tmp1, align 2
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp30 = shl nuw nsw i32 %tmp3, 8
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 16
+ %tmp8 = or i32 %tmp7, %tmp30
+ ret i32 %tmp8
+}
+
+; i8* p; // p is 2 byte aligned
+; ((i32) p[0] << 16) | ((i32) p[1] << 24)
+define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
+; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
+; CHECK: ldrb w8, [x0]
+; CHECK-NEXT: ldrb w9, [x0, #1]
+; CHECK-NEXT: lsl w0, w8, #16
+; CHECK-NEXT: bfi w0, w9, #24, #8
+; CHECK-NEXT: ret
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
+ %tmp2 = load i8, i8* %tmp1, align 2
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp30 = shl nuw nsw i32 %tmp3, 16
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp5 = load i8, i8* %tmp4, align 1
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 24
+ %tmp8 = or i32 %tmp7, %tmp30
+ ret i32 %tmp8
+}
+; i8* p; // p is 2 byte aligned
+; (i32) p[1] | ((i32) p[0] << 8)
+define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
+; CHECK-LABEL: zext_load_i32_by_i8_bswap:
+; CHECK: ldrb w8, [x0, #1]
+; CHECK-NEXT: ldrb w9, [x0]
+; CHECK-NEXT: bfi w8, w9, #8, #8
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: ret
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
+ %tmp5 = load i8, i8* %tmp4, align 2
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 8
+ %tmp8 = or i32 %tmp7, %tmp3
+ ret i32 %tmp8
+}
+
+; i8* p; // p is 2 byte aligned
+; ((i32) p[1] << 8) | ((i32) p[0] << 16)
+define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) {
+; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
+; CHECK: ldrb w8, [x0, #1]
+; CHECK-NEXT: ldrb w9, [x0]
+; CHECK-NEXT: lsl w0, w8, #8
+; CHECK-NEXT: bfi w0, w9, #16, #8
+; CHECK-NEXT: ret
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp30 = shl nuw nsw i32 %tmp3, 8
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
+ %tmp5 = load i8, i8* %tmp4, align 2
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 16
+ %tmp8 = or i32 %tmp7, %tmp30
+ ret i32 %tmp8
+}
+
+; i8* p; // p is 2 byte aligned
+; ((i32) p[1] << 16) | ((i32) p[0] << 24)
+define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) {
+; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
+; CHECK: ldrb w8, [x0, #1]
+; CHECK-NEXT: ldrb w9, [x0]
+; CHECK-NEXT: lsl w0, w8, #16
+; CHECK-NEXT: bfi w0, w9, #24, #8
+; CHECK-NEXT: ret
+
+ %tmp = bitcast i32* %arg to i8*
+ %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
+ %tmp2 = load i8, i8* %tmp1, align 1
+ %tmp3 = zext i8 %tmp2 to i32
+ %tmp30 = shl nuw nsw i32 %tmp3, 16
+ %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
+ %tmp5 = load i8, i8* %tmp4, align 2
+ %tmp6 = zext i8 %tmp5 to i32
+ %tmp7 = shl nuw nsw i32 %tmp6, 24
+ %tmp8 = or i32 %tmp7, %tmp30
+ ret i32 %tmp8
+}