[DAGCombiner] allow narrowing of add followed by truncate
trunc (add X, C ) --> add (trunc X), C'
If we're throwing away the top bits of an 'add' instruction, do it in the narrow destination type.
This makes the truncate-able opcode list identical to the sibling transform done in IR (in instcombine).
This change used to show regressions for x86, but those are gone after D55494.
This gets us closer to deleting the x86 custom function (combineTruncatedArithmetic)
that does almost the same thing.
Differential Revision: https://reviews.llvm.org/D55866
llvm-svn: 350006
diff --git a/llvm/test/CodeGen/AMDGPU/imm16.ll b/llvm/test/CodeGen/AMDGPU/imm16.ll
index dcf3b36..29b80da 100644
--- a/llvm/test/CodeGen/AMDGPU/imm16.ll
+++ b/llvm/test/CodeGen/AMDGPU/imm16.ll
@@ -266,7 +266,7 @@
}
; GCN-LABEL: {{^}}add_inline_imm_neg_1_f16:
-; VI: v_add_u32_e32 [[REG:v[0-9]+]], vcc, -1
+; VI: v_add_u16_e32 [[REG:v[0-9]+]], -1, [[REG:v[0-9]+]]
; VI: buffer_store_short [[REG]]
define amdgpu_kernel void @add_inline_imm_neg_1_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
%x = load i16, i16 addrspace(1)* %in
@@ -277,7 +277,7 @@
}
; GCN-LABEL: {{^}}add_inline_imm_neg_2_f16:
-; VI: v_add_u32_e32 [[REG:v[0-9]+]], vcc, 0xfffe
+; VI: v_add_u16_e32 [[REG:v[0-9]+]], -2, [[REG:v[0-9]+]]
; VI: buffer_store_short [[REG]]
define amdgpu_kernel void @add_inline_imm_neg_2_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
%x = load i16, i16 addrspace(1)* %in
@@ -288,7 +288,7 @@
}
; GCN-LABEL: {{^}}add_inline_imm_neg_16_f16:
-; VI: v_add_u32_e32 [[REG:v[0-9]+]], vcc, 0xfff0
+; VI: v_add_u16_e32 [[REG:v[0-9]+]], -16, [[REG:v[0-9]+]]
; VI: buffer_store_short [[REG]]
define amdgpu_kernel void @add_inline_imm_neg_16_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
%x = load i16, i16 addrspace(1)* %in
diff --git a/llvm/test/CodeGen/AMDGPU/trunc-combine.ll b/llvm/test/CodeGen/AMDGPU/trunc-combine.ll
index 2325a3d..53ae976 100644
--- a/llvm/test/CodeGen/AMDGPU/trunc-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/trunc-combine.ll
@@ -25,7 +25,7 @@
; GCN: _load_dword
; GCN-NOT: _load_dword
; GCN-NOT: v_mov_b32
-; GCN: v_add_u32_e32 v0, vcc, 4, v0
+; GCN: v_add_u16_e32 v0, 4, v0
define i16 @trunc_bitcast_v2i32_to_i16(<2 x i32> %bar) {
%load0 = load i32, i32 addrspace(1)* undef
%load1 = load i32, i32 addrspace(1)* null
@@ -42,7 +42,7 @@
; GCN: _load_dword
; GCN-NOT: _load_dword
; GCN-NOT: v_mov_b32
-; GCN: v_add_u32_e32 v0, vcc, 4, v0
+; GCN: v_add_u16_e32 v0, 4, v0
define i16 @trunc_bitcast_v2f32_to_i16(<2 x float> %bar) {
%load0 = load float, float addrspace(1)* undef
%load1 = load float, float addrspace(1)* null
diff --git a/llvm/test/CodeGen/Hexagon/vect/vect-vaslw.ll b/llvm/test/CodeGen/Hexagon/vect/vect-vaslw.ll
index c662b0b..23c16760 100644
--- a/llvm/test/CodeGen/Hexagon/vect/vect-vaslw.ll
+++ b/llvm/test/CodeGen/Hexagon/vect/vect-vaslw.ll
@@ -1,5 +1,5 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
-; CHECK: vaslw
+; CHECK: vaslh
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
target triple = "hexagon-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/X86/load-combine.ll b/llvm/test/CodeGen/X86/load-combine.ll
index 3934bf5..8c69dba 100644
--- a/llvm/test/CodeGen/X86/load-combine.ll
+++ b/llvm/test/CodeGen/X86/load-combine.ll
@@ -915,7 +915,7 @@
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movl 12(%ecx,%eax), %eax
+; CHECK-NEXT: movl 12(%eax,%ecx), %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_base_offset_index:
@@ -960,7 +960,7 @@
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movl 13(%ecx,%eax), %eax
+; CHECK-NEXT: movl 13(%eax,%ecx), %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2:
@@ -1016,7 +1016,7 @@
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movl 12(%ecx,%eax), %eax
+; CHECK-NEXT: movl 12(%eax,%ecx), %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_zaext_loads:
@@ -1072,7 +1072,7 @@
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movl 12(%ecx,%eax), %eax
+; CHECK-NEXT: movl 12(%eax,%ecx), %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_zsext_loads:
diff --git a/llvm/test/CodeGen/X86/pr32329.ll b/llvm/test/CodeGen/X86/pr32329.ll
index 7ccd559..2110946 100644
--- a/llvm/test/CodeGen/X86/pr32329.ll
+++ b/llvm/test/CodeGen/X86/pr32329.ll
@@ -41,7 +41,7 @@
; X86-NEXT: movl %ebx, %edi
; X86-NEXT: subl %esi, %edi
; X86-NEXT: imull %edi, %ecx
-; X86-NEXT: addl $-1437483407, %ecx # imm = 0xAA51BE71
+; X86-NEXT: addb $113, %cl
; X86-NEXT: movl $9, %esi
; X86-NEXT: xorl %ebp, %ebp
; X86-NEXT: shldl %cl, %esi, %ebp
@@ -80,7 +80,7 @@
; X64-NEXT: movl %edi, %esi
; X64-NEXT: subl %r8d, %esi
; X64-NEXT: imull %esi, %ecx
-; X64-NEXT: addl $-1437483407, %ecx # imm = 0xAA51BE71
+; X64-NEXT: addb $113, %cl
; X64-NEXT: movl $9, %edx
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shlq %cl, %rdx
diff --git a/llvm/test/CodeGen/X86/pr32345.ll b/llvm/test/CodeGen/X86/pr32345.ll
index 65fcf05..cec3a69 100644
--- a/llvm/test/CodeGen/X86/pr32345.ll
+++ b/llvm/test/CodeGen/X86/pr32345.ll
@@ -69,8 +69,8 @@
; 6860-NEXT: xorl %ecx, %esi
; 6860-NEXT: movw %si, %ax
; 6860-NEXT: movzwl %ax, %esi
-; 6860-NEXT: addl $-16610, %ecx # imm = 0xBF1E
; 6860-NEXT: movb %cl, %bl
+; 6860-NEXT: addb $30, %bl
; 6860-NEXT: xorl %ecx, %ecx
; 6860-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; 6860-NEXT: movb %bl, %cl
@@ -98,14 +98,13 @@
;
; X64-LABEL: foo:
; X64: # %bb.0: # %bb
-; X64-NEXT: movzwl {{.*}}(%rip), %eax
; X64-NEXT: movzwl {{.*}}(%rip), %ecx
-; X64-NEXT: movl %ecx, %edx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: xorl %eax, %edx
-; X64-NEXT: movzwl %dx, %eax
+; X64-NEXT: movzwl {{.*}}(%rip), %eax
+; X64-NEXT: xorw %cx, %ax
+; X64-NEXT: xorl %ecx, %eax
+; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
-; X64-NEXT: addl $-16610, %ecx # imm = 0xBF1E
+; X64-NEXT: addb $30, %cl
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrq %cl, %rax
; X64-NEXT: movb %al, (%rax)
@@ -120,15 +119,14 @@
; 686-NEXT: .cfi_def_cfa_register %ebp
; 686-NEXT: andl $-8, %esp
; 686-NEXT: subl $8, %esp
-; 686-NEXT: movzwl var_22, %eax
; 686-NEXT: movzwl var_27, %ecx
-; 686-NEXT: movl %ecx, %edx
-; 686-NEXT: xorl %ecx, %edx
-; 686-NEXT: xorl %eax, %edx
-; 686-NEXT: movzwl %dx, %eax
+; 686-NEXT: movzwl var_22, %eax
+; 686-NEXT: xorw %cx, %ax
+; 686-NEXT: xorl %ecx, %eax
+; 686-NEXT: movzwl %ax, %eax
; 686-NEXT: movl %eax, (%esp)
; 686-NEXT: movl $0, {{[0-9]+}}(%esp)
-; 686-NEXT: addl $-16610, %ecx # imm = 0xBF1E
+; 686-NEXT: addb $30, %cl
; 686-NEXT: xorl %edx, %edx
; 686-NEXT: shrdl %cl, %edx, %eax
; 686-NEXT: testb $32, %cl
diff --git a/llvm/test/CodeGen/X86/pr33290.ll b/llvm/test/CodeGen/X86/pr33290.ll
index b5d9754..44b7dca 100644
--- a/llvm/test/CodeGen/X86/pr33290.ll
+++ b/llvm/test/CodeGen/X86/pr33290.ll
@@ -14,8 +14,8 @@
; X86-NEXT: .LBB0_1: # %for.cond
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: movzbl c, %ecx
-; X86-NEXT: leal a+2(%ecx), %ecx
; X86-NEXT: movb $0, c
+; X86-NEXT: leal a+2(%ecx), %ecx
; X86-NEXT: movl %ecx, (%eax)
; X86-NEXT: jmp .LBB0_1
;
diff --git a/llvm/test/CodeGen/X86/pr34381.ll b/llvm/test/CodeGen/X86/pr34381.ll
index 3053ddd..831b1d2 100644
--- a/llvm/test/CodeGen/X86/pr34381.ll
+++ b/llvm/test/CodeGen/X86/pr34381.ll
@@ -13,11 +13,9 @@
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movsbl {{.*}}(%rip), %eax
; CHECK-NEXT: negl %eax
-; CHECK-NEXT: cmpl %eax, {{.*}}(%rip)
-; CHECK-NEXT: setb %al
; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: addb $-1, %al
-; CHECK-NEXT: sete %cl
+; CHECK-NEXT: cmpl %eax, {{.*}}(%rip)
+; CHECK-NEXT: setb %cl
; CHECK-NEXT: movl %ecx, {{.*}}(%rip)
; CHECK-NEXT: movb {{.*}}(%rip), %al
; CHECK-NEXT: movb %al, {{.*}}(%rip)
diff --git a/llvm/test/CodeGen/X86/pr35765.ll b/llvm/test/CodeGen/X86/pr35765.ll
index 6ff504d..1c6035f 100644
--- a/llvm/test/CodeGen/X86/pr35765.ll
+++ b/llvm/test/CodeGen/X86/pr35765.ll
@@ -9,10 +9,9 @@
define void @PR35765() {
; CHECK-LABEL: PR35765:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movzwl {{.*}}(%rip), %ecx
-; CHECK-NEXT: addl $-1398, %ecx # imm = 0xFA8A
+; CHECK-NEXT: movb {{.*}}(%rip), %cl
+; CHECK-NEXT: addb $-118, %cl
; CHECK-NEXT: movl $4, %eax
-; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: shll %cl, %eax
; CHECK-NEXT: movzwl {{.*}}(%rip), %ecx
; CHECK-NEXT: movzwl {{.*}}(%rip), %edx
diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
index cb8571b..87c36ad 100644
--- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll
+++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
@@ -16,8 +16,8 @@
; ILP-NEXT: pushq %rbx
; ILP-NEXT: movq %rdi, %rax
; ILP-NEXT: xorl %r8d, %r8d
-; ILP-NEXT: incl %esi
; ILP-NEXT: addb %sil, %sil
+; ILP-NEXT: addb $2, %sil
; ILP-NEXT: orb $1, %sil
; ILP-NEXT: movl $1, %r10d
; ILP-NEXT: xorl %r14d, %r14d
@@ -25,35 +25,35 @@
; ILP-NEXT: shldq %cl, %r10, %r14
; ILP-NEXT: movl $1, %edx
; ILP-NEXT: shlq %cl, %rdx
-; ILP-NEXT: leal -128(%rsi), %r9d
; ILP-NEXT: movb $-128, %r11b
-; ILP-NEXT: xorl %ebx, %ebx
+; ILP-NEXT: subb %sil, %r11b
+; ILP-NEXT: leal -128(%rsi), %r9d
+; ILP-NEXT: xorl %edi, %edi
; ILP-NEXT: movl %r9d, %ecx
-; ILP-NEXT: shldq %cl, %r10, %rbx
+; ILP-NEXT: shldq %cl, %r10, %rdi
+; ILP-NEXT: movl $1, %ebx
+; ILP-NEXT: shlq %cl, %rbx
+; ILP-NEXT: movl %r11d, %ecx
+; ILP-NEXT: shrdq %cl, %r8, %r10
; ILP-NEXT: testb $64, %sil
; ILP-NEXT: cmovneq %rdx, %r14
; ILP-NEXT: cmovneq %r8, %rdx
-; ILP-NEXT: movl $1, %edi
-; ILP-NEXT: shlq %cl, %rdi
-; ILP-NEXT: subb %sil, %r11b
-; ILP-NEXT: movl %r11d, %ecx
-; ILP-NEXT: shrdq %cl, %r8, %r10
; ILP-NEXT: testb $64, %r11b
; ILP-NEXT: cmovneq %r8, %r10
; ILP-NEXT: testb $64, %r9b
-; ILP-NEXT: cmovneq %rdi, %rbx
-; ILP-NEXT: cmovneq %r8, %rdi
+; ILP-NEXT: cmovneq %rbx, %rdi
+; ILP-NEXT: cmovneq %r8, %rbx
; ILP-NEXT: testb %sil, %sil
; ILP-NEXT: cmovsq %r8, %r14
; ILP-NEXT: cmovsq %r8, %rdx
; ILP-NEXT: movq %r14, 8(%rax)
; ILP-NEXT: movq %rdx, (%rax)
-; ILP-NEXT: cmovnsq %r8, %rbx
-; ILP-NEXT: cmoveq %r8, %rbx
-; ILP-NEXT: movq %rbx, 24(%rax)
-; ILP-NEXT: cmovnsq %r10, %rdi
+; ILP-NEXT: cmovnsq %r8, %rdi
; ILP-NEXT: cmoveq %r8, %rdi
-; ILP-NEXT: movq %rdi, 16(%rax)
+; ILP-NEXT: movq %rdi, 24(%rax)
+; ILP-NEXT: cmovnsq %r10, %rbx
+; ILP-NEXT: cmoveq %r8, %rbx
+; ILP-NEXT: movq %rbx, 16(%rax)
; ILP-NEXT: popq %rbx
; ILP-NEXT: popq %r14
; ILP-NEXT: retq
@@ -61,8 +61,8 @@
; HYBRID-LABEL: test1:
; HYBRID: # %bb.0:
; HYBRID-NEXT: movq %rdi, %rax
-; HYBRID-NEXT: incl %esi
; HYBRID-NEXT: addb %sil, %sil
+; HYBRID-NEXT: addb $2, %sil
; HYBRID-NEXT: orb $1, %sil
; HYBRID-NEXT: movb $-128, %cl
; HYBRID-NEXT: subb %sil, %cl
@@ -104,8 +104,8 @@
; BURR-LABEL: test1:
; BURR: # %bb.0:
; BURR-NEXT: movq %rdi, %rax
-; BURR-NEXT: incl %esi
; BURR-NEXT: addb %sil, %sil
+; BURR-NEXT: addb $2, %sil
; BURR-NEXT: orb $1, %sil
; BURR-NEXT: movb $-128, %cl
; BURR-NEXT: subb %sil, %cl
@@ -148,8 +148,8 @@
; SRC: # %bb.0:
; SRC-NEXT: pushq %rbx
; SRC-NEXT: movq %rdi, %rax
-; SRC-NEXT: incl %esi
; SRC-NEXT: addb %sil, %sil
+; SRC-NEXT: addb $2, %sil
; SRC-NEXT: orb $1, %sil
; SRC-NEXT: movb $-128, %cl
; SRC-NEXT: subb %sil, %cl
@@ -195,8 +195,8 @@
; LIN-NEXT: movq %rdi, %rax
; LIN-NEXT: xorl %r9d, %r9d
; LIN-NEXT: movl $1, %r8d
-; LIN-NEXT: incl %esi
; LIN-NEXT: addb %sil, %sil
+; LIN-NEXT: addb $2, %sil
; LIN-NEXT: orb $1, %sil
; LIN-NEXT: movl $1, %edx
; LIN-NEXT: movl %esi, %ecx