David L Kreitzer | 01a057a | 2016-10-14 18:20:41 +0000 | [diff] [blame^] | 1 | //===------- X86InterleavedAccess.cpp --------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file contains the X86 implementation of the interleaved accesses |
| 11 | // optimization generating X86-specific instructions/intrinsics for interleaved |
| 12 | // access groups. |
| 13 | // |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #include "X86ISelLowering.h" |
| 17 | #include "X86TargetMachine.h" |
| 18 | |
| 19 | using namespace llvm; |
| 20 | |
| 21 | /// Returns true if the interleaved access group represented by the shuffles |
| 22 | /// is supported for the subtarget. Returns false otherwise. |
| 23 | static bool isSupported(const X86Subtarget &SubTarget, |
| 24 | const LoadInst *LI, |
| 25 | const ArrayRef<ShuffleVectorInst *> &Shuffles, |
| 26 | unsigned Factor) { |
| 27 | |
| 28 | const DataLayout &DL = Shuffles[0]->getModule()->getDataLayout(); |
| 29 | VectorType *ShuffleVecTy = Shuffles[0]->getType(); |
| 30 | unsigned ShuffleVecSize = DL.getTypeSizeInBits(ShuffleVecTy); |
| 31 | Type *ShuffleEltTy = ShuffleVecTy->getVectorElementType(); |
| 32 | |
| 33 | if (DL.getTypeSizeInBits(LI->getType()) < Factor * ShuffleVecSize) |
| 34 | return false; |
| 35 | |
| 36 | // Currently, lowering is supported for 64 bits on AVX. |
| 37 | if (!SubTarget.hasAVX() || ShuffleVecSize != 256 || |
| 38 | DL.getTypeSizeInBits(ShuffleEltTy) != 64 || |
| 39 | Factor != 4) |
| 40 | return false; |
| 41 | |
| 42 | return true; |
| 43 | } |
| 44 | |
| 45 | /// \brief Lower interleaved load(s) into target specific instructions/ |
| 46 | /// intrinsics. Lowering sequence varies depending on the vector-types, factor, |
| 47 | /// number of shuffles and ISA. |
| 48 | /// Currently, lowering is supported for 4x64 bits with Factor = 4 on AVX. |
| 49 | bool X86TargetLowering::lowerInterleavedLoad( |
| 50 | LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles, |
| 51 | ArrayRef<unsigned> Indices, unsigned Factor) const { |
| 52 | assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && |
| 53 | "Invalid interleave factor"); |
| 54 | assert(!Shuffles.empty() && "Empty shufflevector input"); |
| 55 | assert(Shuffles.size() == Indices.size() && |
| 56 | "Unmatched number of shufflevectors and indices"); |
| 57 | |
| 58 | if (!isSupported(Subtarget, LI, Shuffles, Factor)) |
| 59 | return false; |
| 60 | |
| 61 | VectorType *ShuffleVecTy = Shuffles[0]->getType(); |
| 62 | |
| 63 | Type *VecBasePtrTy = ShuffleVecTy->getPointerTo(LI->getPointerAddressSpace()); |
| 64 | |
| 65 | IRBuilder<> Builder(LI); |
| 66 | SmallVector<Instruction *, 4> NewLoads; |
| 67 | SmallVector<Value *, 4> NewShuffles; |
| 68 | NewShuffles.resize(Factor); |
| 69 | |
| 70 | Value *VecBasePtr = |
| 71 | Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy); |
| 72 | |
| 73 | // Generate 4 loads of type v4xT64 |
| 74 | for (unsigned Part = 0; Part < Factor; Part++) { |
| 75 | // TODO: Support inbounds GEP |
| 76 | Value *NewBasePtr = |
| 77 | Builder.CreateGEP(VecBasePtr, Builder.getInt32(Part)); |
| 78 | Instruction *NewLoad = |
| 79 | Builder.CreateAlignedLoad(NewBasePtr, LI->getAlignment()); |
| 80 | NewLoads.push_back(NewLoad); |
| 81 | } |
| 82 | |
| 83 | // dst = src1[0,1],src2[0,1] |
| 84 | uint32_t IntMask1[] = {0, 1, 4, 5}; |
| 85 | ArrayRef<unsigned int> ShuffleMask = makeArrayRef(IntMask1, 4); |
| 86 | Value *IntrVec1 = |
| 87 | Builder.CreateShuffleVector(NewLoads[0], NewLoads[2], ShuffleMask); |
| 88 | Value *IntrVec2 = |
| 89 | Builder.CreateShuffleVector(NewLoads[1], NewLoads[3], ShuffleMask); |
| 90 | |
| 91 | // dst = src1[2,3],src2[2,3] |
| 92 | uint32_t IntMask2[] = {2, 3, 6, 7}; |
| 93 | ShuffleMask = makeArrayRef(IntMask2, 4); |
| 94 | Value *IntrVec3 = |
| 95 | Builder.CreateShuffleVector(NewLoads[0], NewLoads[2], ShuffleMask); |
| 96 | Value *IntrVec4 = |
| 97 | Builder.CreateShuffleVector(NewLoads[1], NewLoads[3], ShuffleMask); |
| 98 | |
| 99 | // dst = src1[0],src2[0],src1[2],src2[2] |
| 100 | uint32_t IntMask3[] = {0, 4, 2, 6}; |
| 101 | ShuffleMask = makeArrayRef(IntMask3, 4); |
| 102 | NewShuffles[0] = Builder.CreateShuffleVector(IntrVec1, IntrVec2, ShuffleMask); |
| 103 | NewShuffles[2] = Builder.CreateShuffleVector(IntrVec3, IntrVec4, ShuffleMask); |
| 104 | |
| 105 | // dst = src1[1],src2[1],src1[3],src2[3] |
| 106 | uint32_t IntMask4[] = {1, 5, 3, 7}; |
| 107 | ShuffleMask = makeArrayRef(IntMask4, 4); |
| 108 | NewShuffles[1] = Builder.CreateShuffleVector(IntrVec1, IntrVec2, ShuffleMask); |
| 109 | NewShuffles[3] = Builder.CreateShuffleVector(IntrVec3, IntrVec4, ShuffleMask); |
| 110 | |
| 111 | for (unsigned i = 0; i < Shuffles.size(); i++) { |
| 112 | unsigned Index = Indices[i]; |
| 113 | Shuffles[i]->replaceAllUsesWith(NewShuffles[Index]); |
| 114 | } |
| 115 | |
| 116 | return true; |
| 117 | } |