[AMDGPU] Narrow lshl from 64 to 32 bit if possible

Turn expensive 64 bit shift into 32 bit if shift does not overflow int:
shl (ext x) => zext (shl x)

Differential Revision: https://reviews.llvm.org/D33367

llvm-svn: 303569
diff --git a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
index c23cc1c..907c8c2 100644
--- a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
+++ b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
@@ -50,7 +50,7 @@
 ; GCN-LABEL: {{^}}s_ubfe_sub_i32:
 ; GCN: s_load_dword [[SRC:s[0-9]+]]
 ; GCN: s_load_dword [[WIDTH:s[0-9]+]]
-; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], {{s[0-9]+}}
+; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], [[WIDTH]]
 ; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[VWIDTH]]
 define amdgpu_kernel void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -128,7 +128,7 @@
 ; GCN-LABEL: {{^}}s_sbfe_sub_i32:
 ; GCN: s_load_dword [[SRC:s[0-9]+]]
 ; GCN: s_load_dword [[WIDTH:s[0-9]+]]
-; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], {{s[0-9]+}}
+; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], [[WIDTH]]
 ; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[VWIDTH]]
 define amdgpu_kernel void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 {
   %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()