[IR] Make {extract,insert}element accept an index of any integer type. Given the following C code llvm currently generates suboptimal code for x86-64: __m128 bss4( const __m128 *ptr, size_t i, size_t j ) { float f = ptr[i][j]; return (__m128) { f, f, f, f }; } ================================================= define <4 x float> @_Z4bss4PKDv4_fmm(<4 x float>* nocapture readonly %ptr, i64 %i, i64 %j) #0 { %a1 = getelementptr inbounds <4 x float>* %ptr, i64 %i %a2 = load <4 x float>* %a1, align 16, !tbaa !1 %a3 = trunc i64 %j to i32 %a4 = extractelement <4 x float> %a2, i32 %a3 %a5 = insertelement <4 x float> undef, float %a4, i32 0 %a6 = insertelement <4 x float> %a5, float %a4, i32 1 %a7 = insertelement <4 x float> %a6, float %a4, i32 2 %a8 = insertelement <4 x float> %a7, float %a4, i32 3 ret <4 x float> %a8 } ================================================= shlq $4, %rsi addq %rdi, %rsi movslq %edx, %rax vbroadcastss (%rsi,%rax,4), %xmm0 retq ================================================= The movslq is uneeded, but is present because of the trunc to i32 and then sext back to i64 that the backend adds for vbroadcastss. We can't remove it because it changes the meaning. The IR that clang generates is already suboptimal. What clang really should emit is: %a4 = extractelement <4 x float> %a2, i64 %j This patch makes that legal. A separate patch will teach clang to do it. Differential Revision: http://reviews.llvm.org/D3519 llvm-svn: 207801

commit: 1f10c5ea943b0f13df9d61ca22dc964760ecc45f [log] [tgz]
author: Michael J. Spencer <bigcheesegs@gmail.com> Thu May 01 22:12:39 2014 +0000
committer: Michael J. Spencer <bigcheesegs@gmail.com> Thu May 01 22:12:39 2014 +0000
tree: bcd20bbda2b1af7b7f8a23d63c70ae6a753aad7e
parent: 94cb35cb054f8fe021b1c4e89eeed15cb9697d36 [diff]
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index a6595f7..3d99a0e 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst

@@ -4470,7 +4470,7 @@
 
 ::
 
-      <result> = extractelement <n x <ty>> <val>, i32 <idx>    ; yields <ty>
+      <result> = extractelement <n x <ty>> <val>, <ty2> <idx>  ; yields <ty>
 
 Overview:
 """""""""
@@ -4484,7 +4484,7 @@
 The first operand of an '``extractelement``' instruction is a value of
 :ref:`vector <t_vector>` type. The second operand is an index indicating
 the position from which to extract the element. The index may be a
-variable.
+variable of any integer type.
 
 Semantics:
 """"""""""
@@ -4510,7 +4510,7 @@
 
 ::
 
-      <result> = insertelement <n x <ty>> <val>, <ty> <elt>, i32 <idx>    ; yields <n x <ty>>
+      <result> = insertelement <n x <ty>> <val>, <ty> <elt>, <ty2> <idx>    ; yields <n x <ty>>
 
 Overview:
 """""""""
@@ -4525,7 +4525,7 @@
 :ref:`vector <t_vector>` type. The second operand is a scalar value whose
 type must equal the element type of the first operand. The third operand
 is an index indicating the position at which to insert the value. The
-index may be a variable.
+index may be a variable of any integer type.
 
 Semantics:
 """"""""""

diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 74e8439..1b2cf76 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp

@@ -1418,7 +1418,8 @@
                                   ValueList.getConstantFwdRef(Record[2],CurTy));
       break;
     }
-    case bitc::CST_CODE_CE_EXTRACTELT: { // CE_EXTRACTELT: [opty, opval, opval]
+    case bitc::CST_CODE_CE_EXTRACTELT
+        : { // CE_EXTRACTELT: [opty, opval, opty, opval]
       if (Record.size() < 3)
         return Error(InvalidRecord);
       VectorType *OpTy =
@@ -1426,20 +1427,37 @@
       if (!OpTy)
         return Error(InvalidRecord);
       Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
-      Constant *Op1 = ValueList.getConstantFwdRef(Record[2],
-                                                  Type::getInt32Ty(Context));
+      Constant *Op1 = nullptr;
+      if (Record.size() == 4) {
+        Type *IdxTy = getTypeByID(Record[2]);
+        if (!IdxTy)
+          return Error(InvalidRecord);
+        Op1 = ValueList.getConstantFwdRef(Record[3], IdxTy);
+      } else // TODO: Remove with llvm 4.0
+        Op1 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
+      if (!Op1)
+        return Error(InvalidRecord);
       V = ConstantExpr::getExtractElement(Op0, Op1);
       break;
     }
-    case bitc::CST_CODE_CE_INSERTELT: { // CE_INSERTELT: [opval, opval, opval]
+    case bitc::CST_CODE_CE_INSERTELT
+        : { // CE_INSERTELT: [opval, opval, opty, opval]
       VectorType *OpTy = dyn_cast<VectorType>(CurTy);
       if (Record.size() < 3 || !OpTy)
         return Error(InvalidRecord);
       Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
       Constant *Op1 = ValueList.getConstantFwdRef(Record[1],
                                                   OpTy->getElementType());
-      Constant *Op2 = ValueList.getConstantFwdRef(Record[2],
-                                                  Type::getInt32Ty(Context));
+      Constant *Op2 = nullptr;
+      if (Record.size() == 4) {
+        Type *IdxTy = getTypeByID(Record[2]);
+        if (!IdxTy)
+          return Error(InvalidRecord);
+        Op2 = ValueList.getConstantFwdRef(Record[3], IdxTy);
+      } else // TODO: Remove with llvm 4.0
+        Op2 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
+      if (!Op2)
+        return Error(InvalidRecord);
       V = ConstantExpr::getInsertElement(Op0, Op1, Op2);
       break;
     }
@@ -2460,7 +2478,7 @@
       unsigned OpNum = 0;
       Value *Vec, *Idx;
       if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
-          popValue(Record, OpNum, NextValueNo, Type::getInt32Ty(Context), Idx))
+          getValueTypePair(Record, OpNum, NextValueNo, Idx))
         return Error(InvalidRecord);
       I = ExtractElementInst::Create(Vec, Idx);
       InstructionList.push_back(I);
@@ -2473,7 +2491,7 @@
       if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
           popValue(Record, OpNum, NextValueNo,
                    cast<VectorType>(Vec->getType())->getElementType(), Elt) ||
-          popValue(Record, OpNum, NextValueNo, Type::getInt32Ty(Context), Idx))
+          getValueTypePair(Record, OpNum, NextValueNo, Idx))
         return Error(InvalidRecord);
       I = InsertElementInst::Create(Vec, Elt, Idx);
       InstructionList.push_back(I);

diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 92965fa..2337487 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp

@@ -1087,12 +1087,14 @@
         Code = bitc::CST_CODE_CE_EXTRACTELT;
         Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
         Record.push_back(VE.getValueID(C->getOperand(0)));
+        Record.push_back(VE.getTypeID(C->getOperand(1)->getType()));
         Record.push_back(VE.getValueID(C->getOperand(1)));
         break;
       case Instruction::InsertElement:
         Code = bitc::CST_CODE_CE_INSERTELT;
         Record.push_back(VE.getValueID(C->getOperand(0)));
         Record.push_back(VE.getValueID(C->getOperand(1)));
+        Record.push_back(VE.getTypeID(C->getOperand(2)->getType()));
         Record.push_back(VE.getValueID(C->getOperand(2)));
         break;
       case Instruction::ShuffleVector:
@@ -1253,13 +1255,13 @@
   case Instruction::ExtractElement:
     Code = bitc::FUNC_CODE_INST_EXTRACTELT;
     PushValueAndType(I.getOperand(0), InstID, Vals, VE);
-    pushValue(I.getOperand(1), InstID, Vals, VE);
+    PushValueAndType(I.getOperand(1), InstID, Vals, VE);
     break;
   case Instruction::InsertElement:
     Code = bitc::FUNC_CODE_INST_INSERTELT;
     PushValueAndType(I.getOperand(0), InstID, Vals, VE);
     pushValue(I.getOperand(1), InstID, Vals, VE);
-    pushValue(I.getOperand(2), InstID, Vals, VE);
+    PushValueAndType(I.getOperand(2), InstID, Vals, VE);
     break;
   case Instruction::ShuffleVector:
     Code = bitc::FUNC_CODE_INST_SHUFFLEVEC;

diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index 54be980..bb8d60b 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp

@@ -1937,8 +1937,8 @@
 Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
   assert(Val->getType()->isVectorTy() &&
          "Tried to create extractelement operation on non-vector type!");
-  assert(Idx->getType()->isIntegerTy(32) &&
-         "Extractelement index must be i32 type!");
+  assert(Idx->getType()->isIntegerTy() &&
+         "Extractelement index must be an integer type!");
 
   if (Constant *FC = ConstantFoldExtractElementInstruction(Val, Idx))
     return FC;          // Fold a few common cases.
@@ -1958,7 +1958,7 @@
          "Tried to create insertelement operation on non-vector type!");
   assert(Elt->getType() == Val->getType()->getVectorElementType() &&
          "Insertelement types must match!");
-  assert(Idx->getType()->isIntegerTy(32) &&
+  assert(Idx->getType()->isIntegerTy() &&
          "Insertelement index must be i32 type!");
 
   if (Constant *FC = ConstantFoldInsertElementInstruction(Val, Elt, Idx))

diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index e8bcddb..8aebb8c 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp

@@ -1479,7 +1479,7 @@
 
 
 bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) {
-  if (!Val->getType()->isVectorTy() || !Index->getType()->isIntegerTy(32))
+  if (!Val->getType()->isVectorTy() || !Index->getType()->isIntegerTy())
     return false;
   return true;
 }
@@ -1526,7 +1526,7 @@
   if (Elt->getType() != cast<VectorType>(Vec->getType())->getElementType())
     return false;// Second operand of insertelement must be vector element type.
     
-  if (!Index->getType()->isIntegerTy(32))
+  if (!Index->getType()->isIntegerTy())
     return false;  // Third operand of insertelement must be i32.
   return true;
 }

diff --git a/llvm/test/CodeGen/X86/vec_splat.ll b/llvm/test/CodeGen/X86/vec_splat.ll
index 543c96e..a02e383 100644
--- a/llvm/test/CodeGen/X86/vec_splat.ll
+++ b/llvm/test/CodeGen/X86/vec_splat.ll

@@ -32,3 +32,19 @@
 ; SSE3-LABEL: test_v2sd:
 ; SSE3: movddup
 }
+
+; Fold extract of a load into the load's address computation. This avoids spilling to the stack.
+define <4 x float> @load_extract_splat(<4 x float>* nocapture readonly %ptr, i64 %i, i64 %j) nounwind {
+  %1 = getelementptr inbounds <4 x float>* %ptr, i64 %i
+  %2 = load <4 x float>* %1, align 16
+  %3 = extractelement <4 x float> %2, i64 %j
+  %4 = insertelement <4 x float> undef, float %3, i32 0
+  %5 = insertelement <4 x float> %4, float %3, i32 1
+  %6 = insertelement <4 x float> %5, float %3, i32 2
+  %7 = insertelement <4 x float> %6, float %3, i32 3
+  ret <4 x float> %7
+  
+; AVX-LABEL: load_extract_splat
+; AVX-NOT: movs
+; AVX: vbroadcastss
+}

diff --git a/llvm/test/Feature/instructions.ll b/llvm/test/Feature/instructions.ll
index d0c303d..aa96294 100644
--- a/llvm/test/Feature/instructions.ll
+++ b/llvm/test/Feature/instructions.ll

@@ -4,11 +4,13 @@
 
 define i32 @test_extractelement(<4 x i32> %V) {
         %R = extractelement <4 x i32> %V, i32 1         ; <i32> [#uses=1]
+		%S = extractelement <4 x i32> %V, i64 1         ; <i32> [#uses=0]
         ret i32 %R
 }
 
 define <4 x i32> @test_insertelement(<4 x i32> %V) {
         %R = insertelement <4 x i32> %V, i32 0, i32 0           ; <<4 x i32>> [#uses=1]
+		%S = insertelement <4 x i32> %V, i32 0, i64 0           ; <<4 x i32>> [#uses=0]
         ret <4 x i32> %R
 }
commit	1f10c5ea943b0f13df9d61ca22dc964760ecc45f	[log] [tgz]
author	Michael J. Spencer <bigcheesegs@gmail.com>	Thu May 01 22:12:39 2014 +0000
committer	Michael J. Spencer <bigcheesegs@gmail.com>	Thu May 01 22:12:39 2014 +0000
tree	bcd20bbda2b1af7b7f8a23d63c70ae6a753aad7e
parent	94cb35cb054f8fe021b1c4e89eeed15cb9697d36 [diff]