[Hexagon] Generate HVX code for vector construction and access
Support for:
- build vector,
- extract vector element, subvector,
- insert vector element, subvector,
- shuffle.
llvm-svn: 319901
diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt
index ac6a5fc..9f30f2b 100644
--- a/llvm/lib/Target/Hexagon/CMakeLists.txt
+++ b/llvm/lib/Target/Hexagon/CMakeLists.txt
@@ -35,7 +35,9 @@
HexagonHazardRecognizer.cpp
HexagonInstrInfo.cpp
HexagonISelDAGToDAG.cpp
+ HexagonISelDAGToDAGHVX.cpp
HexagonISelLowering.cpp
+ HexagonISelLoweringHVX.cpp
HexagonLoopIdiomRecognition.cpp
HexagonMachineFunctionInfo.cpp
HexagonMachineScheduler.cpp
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 2551fe5..d0cd143 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -754,7 +754,6 @@
CurDAG->RemoveDeadNode(N);
}
-
void HexagonDAGToDAGISel::Select(SDNode *N) {
if (N->isMachineOpcode())
return N->setNodeId(-1); // Already selected.
@@ -772,6 +771,13 @@
case ISD::INTRINSIC_WO_CHAIN: return SelectIntrinsicWOChain(N);
}
+ if (HST->useHVXOps()) {
+ switch (N->getOpcode()) {
+ case ISD::VECTOR_SHUFFLE: return SelectHvxShuffle(N);
+ case HexagonISD::VROR: return SelectHvxRor(N);
+ }
+ }
+
SelectCode(N);
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h
index 4a7f4b7..e3e22a3 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h
@@ -26,6 +26,7 @@
class MachineFunction;
class HexagonInstrInfo;
class HexagonRegisterInfo;
+class HexagonTargetLowering;
class HexagonDAGToDAGISel : public SelectionDAGISel {
const HexagonSubtarget *HST;
@@ -100,13 +101,25 @@
void SelectConstant(SDNode *N);
void SelectConstantFP(SDNode *N);
void SelectBitcast(SDNode *N);
- void SelectVectorShuffle(SDNode *N);
- // Include the pieces autogenerated from the target description.
+ // Include the declarations autogenerated from the selection patterns.
#define GET_DAGISEL_DECL
#include "HexagonGenDAGISel.inc"
private:
+ // This is really only to get access to ReplaceNode (which is a protected
+ // member). Any other members used by HvxSelector can be moved around to
+ // make them accessible).
+ friend struct HvxSelector;
+
+ SDValue selectUndef(const SDLoc &dl, MVT ResTy) {
+ SDNode *U = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy);
+ return SDValue(U, 0);
+ }
+
+ void SelectHvxShuffle(SDNode *N);
+ void SelectHvxRor(SDNode *N);
+
bool keepsLowBits(const SDValue &Val, unsigned NumBits, SDValue &Src);
bool isOrEquivalentToAdd(const SDNode *N) const;
bool isAlignedMemNode(const MemSDNode *N) const;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
new file mode 100644
index 0000000..a636e4e
--- /dev/null
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -0,0 +1,1924 @@
+//===-- HexagonISelDAGToDAGHVX.cpp ----------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonISelDAGToDAG.h"
+#include "HexagonISelLowering.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+#include <deque>
+#include <map>
+#include <set>
+#include <utility>
+#include <vector>
+
+#define DEBUG_TYPE "hexagon-isel"
+
+using namespace llvm;
+
+// --------------------------------------------------------------------
+// Implementation of permutation networks.
+
+// Implementation of the node routing through butterfly networks:
+// - Forward delta.
+// - Reverse delta.
+// - Benes.
+//
+//
+// Forward delta network consists of log(N) steps, where N is the number
+// of inputs. In each step, an input can stay in place, or it can get
+// routed to another position[1]. The step after that consists of two
+// networks, each half in size in terms of the number of nodes. In those
+// terms, in the given step, an input can go to either the upper or the
+// lower network in the next step.
+//
+// [1] Hexagon's vdelta/vrdelta allow an element to be routed to both
+// positions as long as there is no conflict.
+
+// Here's a delta network for 8 inputs, only the switching routes are
+// shown:
+//
+// Steps:
+// |- 1 ---------------|- 2 -----|- 3 -|
+//
+// Inp[0] *** *** *** *** Out[0]
+// \ / \ / \ /
+// \ / \ / X
+// \ / \ / / \
+// Inp[1] *** \ / *** X *** *** Out[1]
+// \ \ / / \ / \ /
+// \ \ / / X X
+// \ \ / / / \ / \
+// Inp[2] *** \ \ / / *** X *** *** Out[2]
+// \ \ X / / / \ \ /
+// \ \ / \ / / / \ X
+// \ X X / / \ / \
+// Inp[3] *** \ / \ / \ / *** *** *** Out[3]
+// \ X X X /
+// \ / \ / \ / \ /
+// X X X X
+// / \ / \ / \ / \
+// / X X X \
+// Inp[4] *** / \ / \ / \ *** *** *** Out[4]
+// / X X \ \ / \ /
+// / / \ / \ \ \ / X
+// / / X \ \ \ / / \
+// Inp[5] *** / / \ \ *** X *** *** Out[5]
+// / / \ \ \ / \ /
+// / / \ \ X X
+// / / \ \ / \ / \
+// Inp[6] *** / \ *** X *** *** Out[6]
+// / \ / \ \ /
+// / \ / \ X
+// / \ / \ / \
+// Inp[7] *** *** *** *** Out[7]
+//
+//
+// Reverse delta network is same as delta network, with the steps in
+// the opposite order.
+//
+//
+// Benes network is a forward delta network immediately followed by
+// a reverse delta network.
+
+
+// Graph coloring utility used to partition nodes into two groups:
+// they will correspond to nodes routed to the upper and lower networks.
+struct Coloring {
+ enum : uint8_t {
+ None = 0,
+ Red,
+ Black
+ };
+
+ using Node = int;
+ using MapType = std::map<Node,uint8_t>;
+ static constexpr Node Ignore = Node(-1);
+
+ Coloring(ArrayRef<Node> Ord) : Order(Ord) {
+ build();
+ if (!color())
+ Colors.clear();
+ }
+
+ const MapType &colors() const {
+ return Colors;
+ }
+
+ uint8_t other(uint8_t Color) {
+ if (Color == None)
+ return Red;
+ return Color == Red ? Black : Red;
+ }
+
+ void dump() const;
+
+private:
+ ArrayRef<Node> Order;
+ MapType Colors;
+ std::set<Node> Needed;
+
+ using NodeSet = std::set<Node>;
+ std::map<Node,NodeSet> Edges;
+
+ Node conj(Node Pos) {
+ Node Num = Order.size();
+ return (Pos < Num/2) ? Pos + Num/2 : Pos - Num/2;
+ }
+
+ uint8_t getColor(Node N) {
+ auto F = Colors.find(N);
+ return F != Colors.end() ? F->second : None;
+ }
+
+ std::pair<bool,uint8_t> getUniqueColor(const NodeSet &Nodes);
+
+ void build();
+ bool color();
+};
+
+std::pair<bool,uint8_t> Coloring::getUniqueColor(const NodeSet &Nodes) {
+ uint8_t Color = None;
+ for (Node N : Nodes) {
+ uint8_t ColorN = getColor(N);
+ if (ColorN == None)
+ continue;
+ if (Color == None)
+ Color = ColorN;
+ else if (Color != None && Color != ColorN)
+ return { false, None };
+ }
+ return { true, Color };
+}
+
+void Coloring::build() {
+ // Add Order[P] and Order[conj(P)] to Edges.
+ for (unsigned P = 0; P != Order.size(); ++P) {
+ Node I = Order[P];
+ if (I != Ignore) {
+ Needed.insert(I);
+ Node PC = Order[conj(P)];
+ if (PC != Ignore && PC != I)
+ Edges[I].insert(PC);
+ }
+ }
+ // Add I and conj(I) to Edges.
+ for (unsigned I = 0; I != Order.size(); ++I) {
+ if (!Needed.count(I))
+ continue;
+ Node C = conj(I);
+ // This will create an entry in the edge table, even if I is not
+ // connected to any other node. This is necessary, because it still
+ // needs to be colored.
+ NodeSet &Is = Edges[I];
+ if (Needed.count(C))
+ Is.insert(C);
+ }
+}
+
+bool Coloring::color() {
+ SetVector<Node> FirstQ;
+ auto Enqueue = [this,&FirstQ] (Node N) {
+ SetVector<Node> Q;
+ Q.insert(N);
+ for (unsigned I = 0; I != Q.size(); ++I) {
+ NodeSet &Ns = Edges[Q[I]];
+ Q.insert(Ns.begin(), Ns.end());
+ }
+ FirstQ.insert(Q.begin(), Q.end());
+ };
+ for (Node N : Needed)
+ Enqueue(N);
+
+ for (Node N : FirstQ) {
+ if (Colors.count(N))
+ continue;
+ NodeSet &Ns = Edges[N];
+ auto P = getUniqueColor(Ns);
+ if (!P.first)
+ return false;
+ Colors[N] = other(P.second);
+ }
+
+ // First, color nodes that don't have any dups.
+ for (auto E : Edges) {
+ Node N = E.first;
+ if (!Needed.count(conj(N)) || Colors.count(N))
+ continue;
+ auto P = getUniqueColor(E.second);
+ if (!P.first)
+ return false;
+ Colors[N] = other(P.second);
+ }
+
+ // Now, nodes that are still uncolored. Since the graph can be modified
+ // in this step, create a work queue.
+ std::vector<Node> WorkQ;
+ for (auto E : Edges) {
+ Node N = E.first;
+ if (!Colors.count(N))
+ WorkQ.push_back(N);
+ }
+
+ for (unsigned I = 0; I < WorkQ.size(); ++I) {
+ Node N = WorkQ[I];
+ NodeSet &Ns = Edges[N];
+ auto P = getUniqueColor(Ns);
+ if (P.first) {
+ Colors[N] = other(P.second);
+ continue;
+ }
+
+ // Coloring failed. Split this node.
+ Node C = conj(N);
+ uint8_t ColorN = other(None);
+ uint8_t ColorC = other(ColorN);
+ NodeSet &Cs = Edges[C];
+ NodeSet CopyNs = Ns;
+ for (Node M : CopyNs) {
+ uint8_t ColorM = getColor(M);
+ if (ColorM == ColorC) {
+ // Connect M with C, disconnect M from N.
+ Cs.insert(M);
+ Edges[M].insert(C);
+ Ns.erase(M);
+ Edges[M].erase(N);
+ }
+ }
+ Colors[N] = ColorN;
+ Colors[C] = ColorC;
+ }
+
+ // Explicitly assign "None" all all uncolored nodes.
+ for (unsigned I = 0; I != Order.size(); ++I)
+ if (Colors.count(I) == 0)
+ Colors[I] = None;
+
+ return true;
+}
+
+LLVM_DUMP_METHOD
+void Coloring::dump() const {
+ dbgs() << "{ Order: {";
+ for (unsigned I = 0; I != Order.size(); ++I) {
+ Node P = Order[I];
+ if (P != Ignore)
+ dbgs() << ' ' << P;
+ else
+ dbgs() << " -";
+ }
+ dbgs() << " }\n";
+ dbgs() << " Needed: {";
+ for (Node N : Needed)
+ dbgs() << ' ' << N;
+ dbgs() << " }\n";
+
+ dbgs() << " Edges: {\n";
+ for (auto E : Edges) {
+ dbgs() << " " << E.first << " -> {";
+ for (auto N : E.second)
+ dbgs() << ' ' << N;
+ dbgs() << " }\n";
+ }
+ dbgs() << " }\n";
+
+ static const char *const Names[] = { "None", "Red", "Black" };
+ dbgs() << " Colors: {\n";
+ for (auto C : Colors)
+ dbgs() << " " << C.first << " -> " << Names[C.second] << "\n";
+ dbgs() << " }\n}\n";
+}
+
+// Base class of for reordering networks. They don't strictly need to be
+// permutations, as outputs with repeated occurrences of an input element
+// are allowed.
+struct PermNetwork {
+ using Controls = std::vector<uint8_t>;
+ using ElemType = int;
+ static constexpr ElemType Ignore = ElemType(-1);
+
+ enum : uint8_t {
+ None,
+ Pass,
+ Switch
+ };
+ enum : uint8_t {
+ Forward,
+ Reverse
+ };
+
+ PermNetwork(ArrayRef<ElemType> Ord, unsigned Mult = 1) {
+ Order.assign(Ord.data(), Ord.data()+Ord.size());
+ Log = 0;
+
+ unsigned S = Order.size();
+ while (S >>= 1)
+ ++Log;
+
+ Table.resize(Order.size());
+ for (RowType &Row : Table)
+ Row.resize(Mult*Log, None);
+ }
+
+ void getControls(Controls &V, unsigned StartAt, uint8_t Dir) const {
+ unsigned Size = Order.size();
+ V.resize(Size);
+ for (unsigned I = 0; I != Size; ++I) {
+ unsigned W = 0;
+ for (unsigned L = 0; L != Log; ++L) {
+ unsigned C = ctl(I, StartAt+L) == Switch;
+ if (Dir == Forward)
+ W |= C << (Log-1-L);
+ else
+ W |= C << L;
+ }
+ assert(isUInt<8>(W));
+ V[I] = uint8_t(W);
+ }
+ }
+
+ uint8_t ctl(ElemType Pos, unsigned Step) const {
+ return Table[Pos][Step];
+ }
+ unsigned size() const {
+ return Order.size();
+ }
+ unsigned steps() const {
+ return Log;
+ }
+
+protected:
+ unsigned Log;
+ std::vector<ElemType> Order;
+ using RowType = std::vector<uint8_t>;
+ std::vector<RowType> Table;
+};
+
+struct ForwardDeltaNetwork : public PermNetwork {
+ ForwardDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {}
+
+ bool run(Controls &V) {
+ if (!route(Order.data(), Table.data(), size(), 0))
+ return false;
+ getControls(V, 0, Forward);
+ return true;
+ }
+
+private:
+ bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
+};
+
+struct ReverseDeltaNetwork : public PermNetwork {
+ ReverseDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {}
+
+ bool run(Controls &V) {
+ if (!route(Order.data(), Table.data(), size(), 0))
+ return false;
+ getControls(V, 0, Reverse);
+ return true;
+ }
+
+private:
+ bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
+};
+
+struct BenesNetwork : public PermNetwork {
+ BenesNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord, 2) {}
+
+ bool run(Controls &F, Controls &R) {
+ if (!route(Order.data(), Table.data(), size(), 0))
+ return false;
+
+ getControls(F, 0, Forward);
+ getControls(R, Log, Reverse);
+ return true;
+ }
+
+private:
+ bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
+};
+
+
+bool ForwardDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
+ unsigned Step) {
+ bool UseUp = false, UseDown = false;
+ ElemType Num = Size;
+
+ // Cannot use coloring here, because coloring is used to determine
+ // the "big" switch, i.e. the one that changes halves, and in a forward
+ // network, a color can be simultaneously routed to both halves in the
+ // step we're working on.
+ for (ElemType J = 0; J != Num; ++J) {
+ ElemType I = P[J];
+ // I is the position in the input,
+ // J is the position in the output.
+ if (I == Ignore)
+ continue;
+ uint8_t S;
+ if (I < Num/2)
+ S = (J < Num/2) ? Pass : Switch;
+ else
+ S = (J < Num/2) ? Switch : Pass;
+
+ // U is the element in the table that needs to be updated.
+ ElemType U = (S == Pass) ? I : (I < Num/2 ? I+Num/2 : I-Num/2);
+ if (U < Num/2)
+ UseUp = true;
+ else
+ UseDown = true;
+ if (T[U][Step] != S && T[U][Step] != None)
+ return false;
+ T[U][Step] = S;
+ }
+
+ for (ElemType J = 0; J != Num; ++J)
+ if (P[J] != Ignore && P[J] >= Num/2)
+ P[J] -= Num/2;
+
+ if (Step+1 < Log) {
+ if (UseUp && !route(P, T, Size/2, Step+1))
+ return false;
+ if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
+ return false;
+ }
+ return true;
+}
+
+bool ReverseDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
+ unsigned Step) {
+ unsigned Pets = Log-1 - Step;
+ bool UseUp = false, UseDown = false;
+ ElemType Num = Size;
+
+ // In this step half-switching occurs, so coloring can be used.
+ Coloring G({P,Size});
+ const Coloring::MapType &M = G.colors();
+ if (M.empty())
+ return false;
+
+ uint8_t ColorUp = Coloring::None;
+ for (ElemType J = 0; J != Num; ++J) {
+ ElemType I = P[J];
+ // I is the position in the input,
+ // J is the position in the output.
+ if (I == Ignore)
+ continue;
+ uint8_t C = M.at(I);
+ if (C == Coloring::None)
+ continue;
+ // During "Step", inputs cannot switch halves, so if the "up" color
+ // is still unknown, make sure that it is selected in such a way that
+ // "I" will stay in the same half.
+ bool InpUp = I < Num/2;
+ if (ColorUp == Coloring::None)
+ ColorUp = InpUp ? C : G.other(C);
+ if ((C == ColorUp) != InpUp) {
+ // If I should go to a different half than where is it now, give up.
+ return false;
+ }
+
+ uint8_t S;
+ if (InpUp) {
+ S = (J < Num/2) ? Pass : Switch;
+ UseUp = true;
+ } else {
+ S = (J < Num/2) ? Switch : Pass;
+ UseDown = true;
+ }
+ T[J][Pets] = S;
+ }
+
+ // Reorder the working permutation according to the computed switch table
+ // for the last step (i.e. Pets).
+ for (ElemType J = 0; J != Size/2; ++J) {
+ ElemType PJ = P[J]; // Current values of P[J]
+ ElemType PC = P[J+Size/2]; // and P[conj(J)]
+ ElemType QJ = PJ; // New values of P[J]
+ ElemType QC = PC; // and P[conj(J)]
+ if (T[J][Pets] == Switch)
+ QC = PJ;
+ if (T[J+Size/2][Pets] == Switch)
+ QJ = PC;
+ P[J] = QJ;
+ P[J+Size/2] = QC;
+ }
+
+ for (ElemType J = 0; J != Num; ++J)
+ if (P[J] != Ignore && P[J] >= Num/2)
+ P[J] -= Num/2;
+
+ if (Step+1 < Log) {
+ if (UseUp && !route(P, T, Size/2, Step+1))
+ return false;
+ if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
+ return false;
+ }
+ return true;
+}
+
+bool BenesNetwork::route(ElemType *P, RowType *T, unsigned Size,
+ unsigned Step) {
+ Coloring G({P,Size});
+ const Coloring::MapType &M = G.colors();
+ if (M.empty())
+ return false;
+ ElemType Num = Size;
+
+ unsigned Pets = 2*Log-1 - Step;
+ bool UseUp = false, UseDown = false;
+
+ // Both assignments, i.e. Red->Up and Red->Down are valid, but they will
+ // result in different controls. Let's pick the one where the first
+ // control will be "Pass".
+ uint8_t ColorUp = Coloring::None;
+ for (ElemType J = 0; J != Num; ++J) {
+ ElemType I = P[J];
+ if (I == Ignore)
+ continue;
+ uint8_t C = M.at(I);
+ if (C == Coloring::None)
+ continue;
+ if (ColorUp == Coloring::None) {
+ ColorUp = (I < Num/2) ? Coloring::Red : Coloring::Black;
+ }
+ unsigned CI = (I < Num/2) ? I+Num/2 : I-Num/2;
+ if (C == ColorUp) {
+ if (I < Num/2)
+ T[I][Step] = Pass;
+ else
+ T[CI][Step] = Switch;
+ T[J][Pets] = (J < Num/2) ? Pass : Switch;
+ UseUp = true;
+ } else { // Down
+ if (I < Num/2)
+ T[CI][Step] = Switch;
+ else
+ T[I][Step] = Pass;
+ T[J][Pets] = (J < Num/2) ? Switch : Pass;
+ UseDown = true;
+ }
+ }
+
+ // Reorder the working permutation according to the computed switch table
+ // for the last step (i.e. Pets).
+ for (ElemType J = 0; J != Num/2; ++J) {
+ ElemType PJ = P[J]; // Current values of P[J]
+ ElemType PC = P[J+Num/2]; // and P[conj(J)]
+ ElemType QJ = PJ; // New values of P[J]
+ ElemType QC = PC; // and P[conj(J)]
+ if (T[J][Pets] == Switch)
+ QC = PJ;
+ if (T[J+Num/2][Pets] == Switch)
+ QJ = PC;
+ P[J] = QJ;
+ P[J+Num/2] = QC;
+ }
+
+ for (ElemType J = 0; J != Num; ++J)
+ if (P[J] != Ignore && P[J] >= Num/2)
+ P[J] -= Num/2;
+
+ if (Step+1 < Log) {
+ if (UseUp && !route(P, T, Size/2, Step+1))
+ return false;
+ if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
+ return false;
+ }
+ return true;
+}
+
+// --------------------------------------------------------------------
+// Support for building selection results (output instructions that are
+// parts of the final selection).
+
+struct OpRef {
+ OpRef(SDValue V) : OpV(V) {}
+ bool isValue() const { return OpV.getNode() != nullptr; }
+ bool isValid() const { return isValue() || !(OpN & Invalid); }
+ static OpRef res(int N) { return OpRef(Whole | (N & Index)); }
+ static OpRef fail() { return OpRef(Invalid); }
+
+ static OpRef lo(const OpRef &R) {
+ assert(!R.isValue());
+ return OpRef(R.OpN & (Undef | Index | LoHalf));
+ }
+ static OpRef hi(const OpRef &R) {
+ assert(!R.isValue());
+ return OpRef(R.OpN & (Undef | Index | HiHalf));
+ }
+ static OpRef undef(MVT Ty) { return OpRef(Undef | Ty.SimpleTy); }
+
+ // Direct value.
+ SDValue OpV = SDValue();
+
+ // Reference to the operand of the input node:
+ // If the 31st bit is 1, it's undef, otherwise, bits 28..0 are the
+ // operand index:
+ // If bit 30 is set, it's the high half of the operand.
+ // If bit 29 is set, it's the low half of the operand.
+ unsigned OpN = 0;
+
+ enum : unsigned {
+ Invalid = 0x10000000,
+ LoHalf = 0x20000000,
+ HiHalf = 0x40000000,
+ Whole = LoHalf | HiHalf,
+ Undef = 0x80000000,
+ Index = 0x0FFFFFFF, // Mask of the index value.
+ IndexBits = 28,
+ };
+
+ void print(raw_ostream &OS, const SelectionDAG &G) const;
+
+private:
+ OpRef(unsigned N) : OpN(N) {}
+};
+
+struct NodeTemplate {
+ NodeTemplate() = default;
+ unsigned Opc = 0;
+ MVT Ty = MVT::Other;
+ std::vector<OpRef> Ops;
+
+ void print(raw_ostream &OS, const SelectionDAG &G) const;
+};
+
+struct ResultStack {
+ ResultStack(SDNode *Inp)
+ : InpNode(Inp), InpTy(Inp->getValueType(0).getSimpleVT()) {}
+ SDNode *InpNode;
+ MVT InpTy;
+ unsigned push(const NodeTemplate &Res) {
+ List.push_back(Res);
+ return List.size()-1;
+ }
+ unsigned push(unsigned Opc, MVT Ty, std::vector<OpRef> &&Ops) {
+ NodeTemplate Res;
+ Res.Opc = Opc;
+ Res.Ty = Ty;
+ Res.Ops = Ops;
+ return push(Res);
+ }
+ bool empty() const { return List.empty(); }
+ unsigned size() const { return List.size(); }
+ unsigned top() const { return size()-1; }
+ const NodeTemplate &operator[](unsigned I) const { return List[I]; }
+ unsigned reset(unsigned NewTop) {
+ List.resize(NewTop+1);
+ return NewTop;
+ }
+
+ using BaseType = std::vector<NodeTemplate>;
+ BaseType::iterator begin() { return List.begin(); }
+ BaseType::iterator end() { return List.end(); }
+ BaseType::const_iterator begin() const { return List.begin(); }
+ BaseType::const_iterator end() const { return List.end(); }
+
+ BaseType List;
+
+ void print(raw_ostream &OS, const SelectionDAG &G) const;
+};
+
+void OpRef::print(raw_ostream &OS, const SelectionDAG &G) const {
+ if (isValue()) {
+ OpV.getNode()->print(OS, &G);
+ return;
+ }
+ if (OpN & Invalid) {
+ OS << "invalid";
+ return;
+ }
+ if (OpN & Undef) {
+ OS << "undef";
+ return;
+ }
+ if ((OpN & Whole) != Whole) {
+ assert((OpN & Whole) == LoHalf || (OpN & Whole) == HiHalf);
+ if (OpN & LoHalf)
+ OS << "lo ";
+ else
+ OS << "hi ";
+ }
+ OS << '#' << SignExtend32(OpN & Index, IndexBits);
+}
+
+void NodeTemplate::print(raw_ostream &OS, const SelectionDAG &G) const {
+ const TargetInstrInfo &TII = *G.getSubtarget().getInstrInfo();
+ OS << format("%8s", EVT(Ty).getEVTString().c_str()) << " "
+ << TII.getName(Opc);
+ bool Comma = false;
+ for (const auto &R : Ops) {
+ if (Comma)
+ OS << ',';
+ Comma = true;
+ OS << ' ';
+ R.print(OS, G);
+ }
+}
+
+void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const {
+ OS << "Input node:\n";
+ InpNode->dumpr(&G);
+ OS << "Result templates:\n";
+ for (unsigned I = 0, E = List.size(); I != E; ++I) {
+ OS << '[' << I << "] ";
+ List[I].print(OS, G);
+ OS << '\n';
+ }
+}
+
+struct ShuffleMask {
+ ShuffleMask(ArrayRef<int> M) : Mask(M) {
+ for (unsigned I = 0, E = Mask.size(); I != E; ++I) {
+ int M = Mask[I];
+ if (M == -1)
+ continue;
+ MinSrc = (MinSrc == -1) ? M : std::min(MinSrc, M);
+ MaxSrc = (MaxSrc == -1) ? M : std::max(MaxSrc, M);
+ }
+ }
+
+ ArrayRef<int> Mask;
+ int MinSrc = -1, MaxSrc = -1;
+
+ ShuffleMask lo() const {
+ size_t H = Mask.size()/2;
+ return ShuffleMask({Mask.data(), H});
+ }
+ ShuffleMask hi() const {
+ size_t H = Mask.size()/2;
+ return ShuffleMask({Mask.data()+H, H});
+ }
+};
+
+// --------------------------------------------------------------------
+// The HvxSelector class.
+
+static const HexagonTargetLowering &getHexagonLowering(SelectionDAG &G) {
+ return static_cast<const HexagonTargetLowering&>(G.getTargetLoweringInfo());
+}
+static const HexagonSubtarget &getHexagonSubtarget(SelectionDAG &G) {
+ return static_cast<const HexagonSubtarget&>(G.getSubtarget());
+}
+
+namespace llvm {
+ struct HvxSelector {
+ const HexagonTargetLowering &Lower;
+ HexagonDAGToDAGISel &ISel;
+ SelectionDAG &DAG;
+ const HexagonSubtarget &HST;
+ const unsigned HwLen;
+
+ HvxSelector(HexagonDAGToDAGISel &HS, SelectionDAG &G)
+ : Lower(getHexagonLowering(G)), ISel(HS), DAG(G),
+ HST(getHexagonSubtarget(G)), HwLen(HST.getVectorLength()) {}
+
+ MVT getSingleVT(MVT ElemTy) const {
+ unsigned NumElems = HwLen / (ElemTy.getSizeInBits()/8);
+ return MVT::getVectorVT(ElemTy, NumElems);
+ }
+
+ MVT getPairVT(MVT ElemTy) const {
+ unsigned NumElems = (2*HwLen) / (ElemTy.getSizeInBits()/8);
+ return MVT::getVectorVT(ElemTy, NumElems);
+ }
+
+ void selectShuffle(SDNode *N);
+ void selectRor(SDNode *N);
+
+ private:
+ void materialize(const ResultStack &Results);
+
+ SDValue getVectorConstant(ArrayRef<uint8_t> Data, const SDLoc &dl);
+
+ enum : unsigned {
+ None,
+ PackMux,
+ };
+ OpRef concat(OpRef Va, OpRef Vb, ResultStack &Results);
+ OpRef packs(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
+ MutableArrayRef<int> NewMask, unsigned Options = None);
+ OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
+ MutableArrayRef<int> NewMask);
+ OpRef zerous(ShuffleMask SM, OpRef Va, ResultStack &Results);
+ OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
+ ResultStack &Results);
+ OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
+ ResultStack &Results);
+
+ OpRef shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results);
+ OpRef shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
+ OpRef shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results);
+ OpRef shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
+
+ OpRef butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results);
+ OpRef contracting(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
+ OpRef expanding(ShuffleMask SM, OpRef Va, ResultStack &Results);
+ OpRef perfect(ShuffleMask SM, OpRef Va, ResultStack &Results);
+
+ bool selectVectorConstants(SDNode *N);
+ bool scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl, MVT ResTy,
+ SDValue Va, SDValue Vb, SDNode *N);
+
+ };
+}
+
+// Return a submask of A that is shorter than A by |C| elements:
+// - if C > 0, return a submask of A that starts at position C,
+// - if C <= 0, return a submask of A that starts at 0 (reduce A by |C|).
+static ArrayRef<int> subm(ArrayRef<int> A, int C) {
+ if (C > 0)
+ return { A.data()+C, A.size()-C };
+ return { A.data(), A.size()+C };
+}
+
+static void splitMask(ArrayRef<int> Mask, MutableArrayRef<int> MaskL,
+ MutableArrayRef<int> MaskR) {
+ unsigned VecLen = Mask.size();
+ assert(MaskL.size() == VecLen && MaskR.size() == VecLen);
+ for (unsigned I = 0; I != VecLen; ++I) {
+ int M = Mask[I];
+ if (M < 0) {
+ MaskL[I] = MaskR[I] = -1;
+ } else if (unsigned(M) < VecLen) {
+ MaskL[I] = M;
+ MaskR[I] = -1;
+ } else {
+ MaskL[I] = -1;
+ MaskR[I] = M-VecLen;
+ }
+ }
+}
+
+static std::pair<int,unsigned> findStrip(ArrayRef<int> A, int Inc,
+ unsigned MaxLen) {
+ assert(A.size() > 0 && A.size() >= MaxLen);
+ int F = A[0];
+ int E = F;
+ for (unsigned I = 1; I != MaxLen; ++I) {
+ if (A[I] - E != Inc)
+ return { F, I };
+ E = A[I];
+ }
+ return { F, MaxLen };
+}
+
+static bool isUndef(ArrayRef<int> Mask) {
+ for (int Idx : Mask)
+ if (Idx != -1)
+ return false;
+ return true;
+}
+
+static bool isIdentity(ArrayRef<int> Mask) {
+ unsigned Size = Mask.size();
+ return findStrip(Mask, 1, Size) == std::make_pair(0, Size);
+}
+
+static bool isPermutation(ArrayRef<int> Mask) {
+ // Check by adding all numbers only works if there is no overflow.
+ assert(Mask.size() < 0x00007FFF && "Sanity failure");
+ int Sum = 0;
+ for (int Idx : Mask) {
+ if (Idx == -1)
+ return false;
+ Sum += Idx;
+ }
+ int N = Mask.size();
+ return 2*Sum == N*(N-1);
+}
+
+bool HvxSelector::selectVectorConstants(SDNode *N) {
+ // Constant vectors are generated as loads from constant pools.
+ // Since they are generated during the selection process, the main
+ // selection algorithm is not aware of them. Select them directly
+ // here.
+ if (!N->isMachineOpcode() && N->getOpcode() == ISD::LOAD) {
+ SDValue Addr = cast<LoadSDNode>(N)->getBasePtr();
+ unsigned AddrOpc = Addr.getOpcode();
+ if (AddrOpc == HexagonISD::AT_PCREL || AddrOpc == HexagonISD::CP) {
+ if (Addr.getOperand(0).getOpcode() == ISD::TargetConstantPool) {
+ ISel.Select(N);
+ return true;
+ }
+ }
+ }
+
+ bool Selected = false;
+ for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
+ Selected = selectVectorConstants(N->getOperand(I).getNode()) || Selected;
+ return Selected;
+}
+
+void HvxSelector::materialize(const ResultStack &Results) {
+ DEBUG_WITH_TYPE("isel", {
+ dbgs() << "Materializing\n";
+ Results.print(dbgs(), DAG);
+ });
+ if (Results.empty())
+ return;
+ const SDLoc &dl(Results.InpNode);
+ std::vector<SDValue> Output;
+
+ for (unsigned I = 0, E = Results.size(); I != E; ++I) {
+ const NodeTemplate &Node = Results[I];
+ std::vector<SDValue> Ops;
+ for (const OpRef &R : Node.Ops) {
+ assert(R.isValid());
+ if (R.isValue()) {
+ Ops.push_back(R.OpV);
+ continue;
+ }
+ if (R.OpN & OpRef::Undef) {
+ MVT::SimpleValueType SVT = MVT::SimpleValueType(R.OpN & OpRef::Index);
+ Ops.push_back(ISel.selectUndef(dl, MVT(SVT)));
+ continue;
+ }
+ // R is an index of a result.
+ unsigned Part = R.OpN & OpRef::Whole;
+ int Idx = SignExtend32(R.OpN & OpRef::Index, OpRef::IndexBits);
+ if (Idx < 0)
+ Idx += I;
+ assert(Idx >= 0 && unsigned(Idx) < Output.size());
+ SDValue Op = Output[Idx];
+ MVT OpTy = Op.getValueType().getSimpleVT();
+ if (Part != OpRef::Whole) {
+ assert(Part == OpRef::LoHalf || Part == OpRef::HiHalf);
+ if (Op.getOpcode() == HexagonISD::VCOMBINE) {
+ Op = (Part == OpRef::HiHalf) ? Op.getOperand(0) : Op.getOperand(1);
+ } else {
+ MVT HalfTy = MVT::getVectorVT(OpTy.getVectorElementType(),
+ OpTy.getVectorNumElements()/2);
+ unsigned Sub = (Part == OpRef::LoHalf) ? Hexagon::vsub_lo
+ : Hexagon::vsub_hi;
+ Op = DAG.getTargetExtractSubreg(Sub, dl, HalfTy, Op);
+ }
+ }
+ Ops.push_back(Op);
+ } // for (Node : Results)
+
+ assert(Node.Ty != MVT::Other);
+ SDNode *ResN = (Node.Opc == TargetOpcode::COPY)
+ ? Ops.front().getNode()
+ : DAG.getMachineNode(Node.Opc, dl, Node.Ty, Ops);
+ Output.push_back(SDValue(ResN, 0));
+ }
+
+ SDNode *OutN = Output.back().getNode();
+ SDNode *InpN = Results.InpNode;
+ DEBUG_WITH_TYPE("isel", {
+ dbgs() << "Generated node:\n";
+ OutN->dumpr(&DAG);
+ });
+
+ ISel.ReplaceNode(InpN, OutN);
+ selectVectorConstants(OutN);
+ DAG.RemoveDeadNodes();
+}
+
+OpRef HvxSelector::concat(OpRef Lo, OpRef Hi, ResultStack &Results) {
+ DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
+ const SDLoc &dl(Results.InpNode);
+ Results.push(TargetOpcode::REG_SEQUENCE, getPairVT(MVT::i8), {
+ DAG.getTargetConstant(Hexagon::HvxWRRegClassID, dl, MVT::i32),
+ Lo, DAG.getTargetConstant(Hexagon::vsub_lo, dl, MVT::i32),
+ Hi, DAG.getTargetConstant(Hexagon::vsub_hi, dl, MVT::i32),
+ });
+ return OpRef::res(Results.top());
+}
+
+// Va, Vb are single vectors, SM can be arbitrarily long.
+OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb,
+ ResultStack &Results, MutableArrayRef<int> NewMask,
+ unsigned Options) {
+ DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
+ if (!Va.isValid() || !Vb.isValid())
+ return OpRef::fail();
+
+ int VecLen = SM.Mask.size();
+ MVT Ty = getSingleVT(MVT::i8);
+
+ if (SM.MaxSrc - SM.MinSrc < int(HwLen)) {
+ if (SM.MaxSrc < int(HwLen)) {
+ memcpy(NewMask.data(), SM.Mask.data(), sizeof(int)*VecLen);
+ return Va;
+ }
+ if (SM.MinSrc >= int(HwLen)) {
+ for (int I = 0; I != VecLen; ++I) {
+ int M = SM.Mask[I];
+ if (M != -1)
+ M -= HwLen;
+ NewMask[I] = M;
+ }
+ return Vb;
+ }
+ const SDLoc &dl(Results.InpNode);
+ SDValue S = DAG.getTargetConstant(SM.MinSrc, dl, MVT::i32);
+ if (isUInt<3>(SM.MinSrc)) {
+ Results.push(Hexagon::V6_valignbi, Ty, {Vb, Va, S});
+ } else {
+ Results.push(Hexagon::A2_tfrsi, MVT::i32, {S});
+ unsigned Top = Results.top();
+ Results.push(Hexagon::V6_valignb, Ty, {Vb, Va, OpRef::res(Top)});
+ }
+ for (int I = 0; I != VecLen; ++I) {
+ int M = SM.Mask[I];
+ if (M != -1)
+ M -= SM.MinSrc;
+ NewMask[I] = M;
+ }
+ return OpRef::res(Results.top());
+ }
+
+ if (Options & PackMux) {
+ // If elements picked from Va and Vb have all different (source) indexes
+ // (relative to the start of the argument), do a mux, and update the mask.
+ BitVector Picked(HwLen);
+ SmallVector<uint8_t,128> MuxBytes(HwLen);
+ bool CanMux = true;
+ for (int I = 0; I != VecLen; ++I) {
+ int M = SM.Mask[I];
+ if (M == -1)
+ continue;
+ if (M >= int(HwLen))
+ M -= HwLen;
+ else
+ MuxBytes[M] = 0xFF;
+ if (Picked[M]) {
+ CanMux = false;
+ break;
+ }
+ NewMask[I] = M;
+ }
+ if (CanMux)
+ return vmuxs(MuxBytes, Va, Vb, Results);
+ }
+
+ return OpRef::fail();
+}
+
+OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb,
+ ResultStack &Results, MutableArrayRef<int> NewMask) {
+ DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
+ unsigned HalfMask = 0;
+ unsigned LogHw = Log2_32(HwLen);
+ for (int M : SM.Mask) {
+ if (M == -1)
+ continue;
+ HalfMask |= (1u << (M >> LogHw));
+ }
+
+ if (HalfMask == 0)
+ return OpRef::undef(getPairVT(MVT::i8));
+
+ // If more than two halves are used, bail.
+ // TODO: be more aggressive here?
+ if (countPopulation(HalfMask) > 2)
+ return OpRef::fail();
+
+ MVT HalfTy = getSingleVT(MVT::i8);
+
+ OpRef Inp[2] = { Va, Vb };
+ OpRef Out[2] = { OpRef::undef(HalfTy), OpRef::undef(HalfTy) };
+
+ uint8_t HalfIdx[4] = { 0xFF, 0xFF, 0xFF, 0xFF };
+ unsigned Idx = 0;
+ for (unsigned I = 0; I != 4; ++I) {
+ if ((HalfMask & (1u << I)) == 0)
+ continue;
+ assert(Idx < 2);
+ OpRef Op = Inp[I/2];
+ Out[Idx] = (I & 1) ? OpRef::hi(Op) : OpRef::lo(Op);
+ HalfIdx[I] = Idx++;
+ }
+
+ int VecLen = SM.Mask.size();
+ for (int I = 0; I != VecLen; ++I) {
+ int M = SM.Mask[I];
+ if (M >= 0) {
+ uint8_t Idx = HalfIdx[M >> LogHw];
+ assert(Idx == 0 || Idx == 1);
+ M = (M & (HwLen-1)) + HwLen*Idx;
+ }
+ NewMask[I] = M;
+ }
+
+ return concat(Out[0], Out[1], Results);
+}
+
+OpRef HvxSelector::zerous(ShuffleMask SM, OpRef Va, ResultStack &Results) {
+ DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
+
+ int VecLen = SM.Mask.size();
+ SmallVector<uint8_t,128> UsedBytes(VecLen);
+ bool HasUnused = false;
+ for (int I = 0; I != VecLen; ++I) {
+ if (SM.Mask[I] != -1)
+ UsedBytes[I] = 0xFF;
+ else
+ HasUnused = true;
+ }
+ if (!HasUnused)
+ return Va;
+ SDValue B = getVectorConstant(UsedBytes, SDLoc(Results.InpNode));
+ Results.push(Hexagon::V6_vand, getSingleVT(MVT::i8), {Va, OpRef(B)});
+ return OpRef::res(Results.top());
+}
+
+OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
+ ResultStack &Results) {
+ DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
+ MVT ByteTy = getSingleVT(MVT::i8);
+ MVT BoolTy = MVT::getVectorVT(MVT::i1, 8*HwLen); // XXX
+ const SDLoc &dl(Results.InpNode);
+ SDValue B = getVectorConstant(Bytes, dl);
+ Results.push(Hexagon::V6_vd0, ByteTy, {});
+ Results.push(Hexagon::V6_veqb, BoolTy, {OpRef(B), OpRef::res(-1)});
+ Results.push(Hexagon::V6_vmux, ByteTy, {OpRef::res(-1), Va, Vb});
+ return OpRef::res(Results.top());
+}
+
+OpRef HvxSelector::vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
+ ResultStack &Results) {
+ DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
+ size_t S = Bytes.size() / 2;
+ OpRef L = vmuxs({Bytes.data(), S}, OpRef::lo(Va), OpRef::lo(Vb), Results);
+ OpRef H = vmuxs({Bytes.data()+S, S}, OpRef::hi(Va), OpRef::hi(Vb), Results);
+ return concat(L, H, Results);
+}
+
+OpRef HvxSelector::shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
+ DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
+ unsigned VecLen = SM.Mask.size();
+ assert(HwLen == VecLen);
+ assert(all_of(SM.Mask, [this](int M) { return M == -1 || M < int(HwLen); }));
+
+ if (isIdentity(SM.Mask))
+ return Va;
+ if (isUndef(SM.Mask))
+ return OpRef::undef(getSingleVT(MVT::i8));
+
+ return butterfly(SM, Va, Results);
+}
+
+OpRef HvxSelector::shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb,
+ ResultStack &Results) {
+ DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
+ OpRef C = contracting(SM, Va, Vb, Results);
+ if (C.isValid())
+ return C;
+
+ int VecLen = SM.Mask.size();
+ SmallVector<int,128> NewMask(VecLen);
+ OpRef P = packs(SM, Va, Vb, Results, NewMask);
+ if (P.isValid())
+ return shuffs1(ShuffleMask(NewMask), P, Results);
+
+ SmallVector<int,128> MaskL(VecLen), MaskR(VecLen);
+ splitMask(SM.Mask, MaskL, MaskR);
+
+ OpRef L = shuffs1(ShuffleMask(MaskL), Va, Results);
+ OpRef R = shuffs1(ShuffleMask(MaskR), Vb, Results);
+ if (!L.isValid() || !R.isValid())
+ return OpRef::fail();
+
+ SmallVector<uint8_t,128> Bytes(VecLen);
+ for (int I = 0; I != VecLen; ++I) {
+ if (MaskL[I] != -1)
+ Bytes[I] = 0xFF;
+ }
+ return vmuxs(Bytes, L, R, Results);
+}
+
+OpRef HvxSelector::shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
+ DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
+ int VecLen = SM.Mask.size();
+
+ SmallVector<int,128> PackedMask(VecLen);
+ OpRef P = packs(SM, OpRef::lo(Va), OpRef::hi(Va), Results, PackedMask);
+ if (P.isValid()) {
+ ShuffleMask PM(PackedMask);
+ OpRef E = expanding(PM, P, Results);
+ if (E.isValid())
+ return E;
+
+ OpRef L = shuffs1(PM.lo(), P, Results);
+ OpRef H = shuffs1(PM.hi(), P, Results);
+ if (L.isValid() && H.isValid())
+ return concat(L, H, Results);
+ }
+
+ OpRef R = perfect(SM, Va, Results);
+ if (R.isValid())
+ return R;
+ // TODO commute the mask and try the opposite order of the halves.
+
+ OpRef L = shuffs2(SM.lo(), OpRef::lo(Va), OpRef::hi(Va), Results);
+ OpRef H = shuffs2(SM.hi(), OpRef::lo(Va), OpRef::hi(Va), Results);
+ if (L.isValid() && H.isValid())
+ return concat(L, H, Results);
+
+ return OpRef::fail();
+}
+
+OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb,
+ ResultStack &Results) {
+ DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
+ int VecLen = SM.Mask.size();
+
+ SmallVector<int,256> PackedMask(VecLen);
+ OpRef P = packp(SM, Va, Vb, Results, PackedMask);
+ if (P.isValid())
+ return shuffp1(ShuffleMask(PackedMask), P, Results);
+
+ SmallVector<int,256> MaskL(VecLen), MaskR(VecLen);
+ OpRef L = shuffp1(ShuffleMask(MaskL), Va, Results);
+ OpRef R = shuffp1(ShuffleMask(MaskR), Vb, Results);
+ if (!L.isValid() || !R.isValid())
+ return OpRef::fail();
+
+ // Mux the results.
+ SmallVector<uint8_t,256> Bytes(VecLen);
+ for (int I = 0; I != VecLen; ++I) {
+ if (MaskL[I] != -1)
+ Bytes[I] = 0xFF;
+ }
+ return vmuxp(Bytes, L, R, Results);
+}
+
+bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
+ MVT ResTy, SDValue Va, SDValue Vb,
+ SDNode *N) {
+ DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
+ MVT ElemTy = ResTy.getVectorElementType();
+ assert(ElemTy == MVT::i8);
+ unsigned VecLen = Mask.size();
+ bool HavePairs = (2*HwLen == VecLen);
+ MVT SingleTy = getSingleVT(MVT::i8);
+
+ SmallVector<SDValue,128> Ops;
+ for (int I : Mask) {
+ if (I < 0) {
+ Ops.push_back(ISel.selectUndef(dl, ElemTy));
+ continue;
+ }
+ SDValue Vec;
+ unsigned M = I;
+ if (M < VecLen) {
+ Vec = Va;
+ } else {
+ Vec = Vb;
+ M -= VecLen;
+ }
+ if (HavePairs) {
+ if (M < HwLen) {
+ Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, Vec);
+ } else {
+ Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, Vec);
+ M -= HwLen;
+ }
+ }
+ SDValue Idx = DAG.getConstant(M, dl, MVT::i32);
+ SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemTy, {Vec, Idx});
+ SDValue L = Lower.LowerOperation(Ex, DAG);
+ assert(L.getNode());
+ Ops.push_back(L);
+ }
+
+ SDValue LV;
+ if (2*HwLen == VecLen) {
+ SDValue B0 = DAG.getBuildVector(SingleTy, dl, {Ops.data(), HwLen});
+ SDValue L0 = Lower.LowerOperation(B0, DAG);
+ SDValue B1 = DAG.getBuildVector(SingleTy, dl, {Ops.data()+HwLen, HwLen});
+ SDValue L1 = Lower.LowerOperation(B1, DAG);
+ // XXX CONCAT_VECTORS is legal for HVX vectors. Legalizing (lowering)
+ // functions may expect to be called only for illegal operations, so
+ // make sure that they are not called for legal ones. Develop a better
+ // mechanism for dealing with this.
+ LV = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, {L0, L1});
+ } else {
+ SDValue BV = DAG.getBuildVector(ResTy, dl, Ops);
+ LV = Lower.LowerOperation(BV, DAG);
+ }
+
+ assert(!N->use_empty());
+ ISel.ReplaceNode(N, LV.getNode());
+ DAG.RemoveDeadNodes();
+
+ std::deque<SDNode*> SubNodes;
+ SubNodes.push_back(LV.getNode());
+ for (unsigned I = 0; I != SubNodes.size(); ++I) {
+ for (SDValue Op : SubNodes[I]->ops())
+ SubNodes.push_back(Op.getNode());
+ }
+ while (!SubNodes.empty()) {
+ SDNode *S = SubNodes.front();
+ SubNodes.pop_front();
+ if (S->use_empty())
+ continue;
+ // This isn't great, but users need to be selected before any nodes that
+ // they use. (The reason is to match larger patterns, and avoid nodes that
+ // cannot be matched on their own, e.g. ValueType, TokenFactor, etc.).
+ bool PendingUser = llvm::any_of(S->uses(), [&SubNodes](const SDNode *U) {
+ return llvm::any_of(SubNodes, [U](const SDNode *T) {
+ return T == U;
+ });
+ });
+ if (PendingUser)
+ SubNodes.push_back(S);
+ else
+ ISel.Select(S);
+ }
+
+ DAG.RemoveDeadNodes();
+ return true;
+}
+
+OpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb,
+ ResultStack &Results) {
+ DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
+ if (!Va.isValid() || !Vb.isValid())
+ return OpRef::fail();
+
+ // Contracting shuffles, i.e. instructions that always discard some bytes
+ // from the operand vectors.
+ //
+ // V6_vshuff{e,o}b
+ // V6_vdealb4w
+ // V6_vpack{e,o}{b,h}
+
+ int VecLen = SM.Mask.size();
+ std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen);
+ MVT ResTy = getSingleVT(MVT::i8);
+
+ // The following shuffles only work for bytes and halfwords. This requires
+ // the strip length to be 1 or 2.
+ if (Strip.second != 1 && Strip.second != 2)
+ return OpRef::fail();
+
+ // The patterns for the shuffles, in terms of the starting offsets of the
+ // consecutive strips (L = length of the strip, N = VecLen):
+ //
+ // vpacke: 0, 2L, 4L ... N+0, N+2L, N+4L ... L = 1 or 2
+ // vpacko: L, 3L, 5L ... N+L, N+3L, N+5L ... L = 1 or 2
+ //
+ // vshuffe: 0, N+0, 2L, N+2L, 4L ... L = 1 or 2
+ // vshuffo: L, N+L, 3L, N+3L, 5L ... L = 1 or 2
+ //
+ // vdealb4w: 0, 4, 8 ... 2, 6, 10 ... N+0, N+4, N+8 ... N+2, N+6, N+10 ...
+
+ // The value of the element in the mask following the strip will decide
+ // what kind of a shuffle this can be.
+ int NextInMask = SM.Mask[Strip.second];
+
+ // Check if NextInMask could be 2L, 3L or 4, i.e. if it could be a mask
+ // for vpack or vdealb4w. VecLen > 4, so NextInMask for vdealb4w would
+ // satisfy this.
+ if (NextInMask < VecLen) {
+ // vpack{e,o} or vdealb4w
+ if (Strip.first == 0 && Strip.second == 1 && NextInMask == 4) {
+ int N = VecLen;
+ // Check if this is vdealb4w (L=1).
+ for (int I = 0; I != N/4; ++I)
+ if (SM.Mask[I] != 4*I)
+ return OpRef::fail();
+ for (int I = 0; I != N/4; ++I)
+ if (SM.Mask[I+N/4] != 2 + 4*I)
+ return OpRef::fail();
+ for (int I = 0; I != N/4; ++I)
+ if (SM.Mask[I+N/2] != N + 4*I)
+ return OpRef::fail();
+ for (int I = 0; I != N/4; ++I)
+ if (SM.Mask[I+3*N/4] != N+2 + 4*I)
+ return OpRef::fail();
+ // Matched mask for vdealb4w.
+ Results.push(Hexagon::V6_vdealb4w, ResTy, {Vb, Va});
+ return OpRef::res(Results.top());
+ }
+
+ // Check if this is vpack{e,o}.
+ int N = VecLen;
+ int L = Strip.second;
+ // Check if the first strip starts at 0 or at L.
+ if (Strip.first != 0 && Strip.first != L)
+ return OpRef::fail();
+ // Examine the rest of the mask.
+ for (int I = L; I < N/2; I += L) {
+ auto S = findStrip(subm(SM.Mask,I), 1, N-I);
+ // Check whether the mask element at the beginning of each strip
+ // increases by 2L each time.
+ if (S.first - Strip.first != 2*I)
+ return OpRef::fail();
+ // Check whether each strip is of the same length.
+ if (S.second != unsigned(L))
+ return OpRef::fail();
+ }
+
+ // Strip.first == 0 => vpacke
+ // Strip.first == L => vpacko
+ assert(Strip.first == 0 || Strip.first == L);
+ using namespace Hexagon;
+ NodeTemplate Res;
+ Res.Opc = Strip.second == 1 // Number of bytes.
+ ? (Strip.first == 0 ? V6_vpackeb : V6_vpackob)
+ : (Strip.first == 0 ? V6_vpackeh : V6_vpackoh);
+ Res.Ty = ResTy;
+ Res.Ops = { Vb, Va };
+ Results.push(Res);
+ return OpRef::res(Results.top());
+ }
+
+ // Check if this is vshuff{e,o}.
+ int N = VecLen;
+ int L = Strip.second;
+ std::pair<int,unsigned> PrevS = Strip;
+ bool Flip = false;
+ for (int I = L; I < N; I += L) {
+ auto S = findStrip(subm(SM.Mask,I), 1, N-I);
+ if (S.second != PrevS.second)
+ return OpRef::fail();
+ int Diff = Flip ? PrevS.first - S.first + 2*L
+ : S.first - PrevS.first;
+ if (Diff != N)
+ return OpRef::fail();
+ Flip ^= true;
+ PrevS = S;
+ }
+ // Strip.first == 0 => vshuffe
+ // Strip.first == L => vshuffo
+ assert(Strip.first == 0 || Strip.first == L);
+ using namespace Hexagon;
+ NodeTemplate Res;
+ Res.Opc = Strip.second == 1 // Number of bytes.
+ ? (Strip.first == 0 ? V6_vshuffeb : V6_vshuffob)
+ : (Strip.first == 0 ? V6_vshufeh : V6_vshufoh);
+ Res.Ty = ResTy;
+ Res.Ops = { Vb, Va };
+ Results.push(Res);
+ return OpRef::res(Results.top());
+}
+
+OpRef HvxSelector::expanding(ShuffleMask SM, OpRef Va, ResultStack &Results) {
+ DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
+ // Expanding shuffles (using all elements and inserting into larger vector):
+ //
+ // V6_vunpacku{b,h} [*]
+ //
+ // [*] Only if the upper elements (filled with 0s) are "don't care" in Mask.
+ //
+ // Note: V6_vunpacko{b,h} are or-ing the high byte/half in the result, so
+ // they are not shuffles.
+ //
+ // The argument is a single vector.
+
+ int VecLen = SM.Mask.size();
+ assert(2*HwLen == unsigned(VecLen) && "Expecting vector-pair type");
+
+ std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen);
+
+ // The patterns for the unpacks, in terms of the starting offsets of the
+ // consecutive strips (L = length of the strip, N = VecLen):
+ //
+ // vunpacku: 0, -1, L, -1, 2L, -1 ...
+
+ if (Strip.first != 0)
+ return OpRef::fail();
+
+ // The vunpackus only handle byte and half-word.
+ if (Strip.second != 1 && Strip.second != 2)
+ return OpRef::fail();
+
+ int N = VecLen;
+ int L = Strip.second;
+
+ // First, check the non-ignored strips.
+ for (int I = 2*L; I < 2*N; I += 2*L) {
+ auto S = findStrip(subm(SM.Mask,I), 1, N-I);
+ if (S.second != unsigned(L))
+ return OpRef::fail();
+ if (2*S.first != I)
+ return OpRef::fail();
+ }
+ // Check the -1s.
+ for (int I = L; I < 2*N; I += 2*L) {
+ auto S = findStrip(subm(SM.Mask,I), 0, N-I);
+ if (S.first != -1 || S.second != unsigned(L))
+ return OpRef::fail();
+ }
+
+ unsigned Opc = Strip.second == 1 ? Hexagon::V6_vunpackub
+ : Hexagon::V6_vunpackuh;
+ Results.push(Opc, getPairVT(MVT::i8), {Va});
+ return OpRef::res(Results.top());
+}
+
+OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
+ DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
+ // V6_vdeal{b,h}
+ // V6_vshuff{b,h}
+
+ // V6_vshufoe{b,h} those are quivalent to vshuffvdd(..,{1,2})
+ // V6_vshuffvdd (V6_vshuff)
+ // V6_dealvdd (V6_vdeal)
+
+ // TODO Recognize patterns for V6_vdeal{b,h} and V6_vshuff{b,h}.
+
+ int VecLen = SM.Mask.size();
+ assert(isPowerOf2_32(VecLen) && Log2_32(VecLen) <= 8);
+ unsigned LogLen = Log2_32(VecLen);
+
+ if (!isPermutation(SM.Mask))
+ return OpRef::fail();
+
+ SmallVector<unsigned,8> Perm(LogLen);
+
+ // Check if this could be a perfect shuffle, or a combination of perfect
+ // shuffles.
+ //
+ // Consider this permutation (using hex digits to make the ASCII diagrams
+ // easier to read):
+ // { 0, 8, 1, 9, 2, A, 3, B, 4, C, 5, D, 6, E, 7, F }.
+ // This is a "deal" operation: divide the input into two halves, and
+ // create the output by picking elements by alternating between these two
+ // halves:
+ // 0 1 2 3 4 5 6 7 --> 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F [*]
+ // 8 9 A B C D E F
+ //
+ // Aside from a few special explicit cases (V6_vdealb, etc.), HVX provides
+ // a somwehat different mechanism that could be used to perform shuffle/
+ // deal operations: a 2x2 transpose.
+ // Consider the halves of inputs again, they can be interpreted as a 2x8
+ // matrix. A 2x8 matrix can be looked at four 2x2 matrices concatenated
+ // together. Now, when considering 2 elements at a time, it will be a 2x4
+ // matrix (with elements 01, 23, 45, etc.), or two 2x2 matrices:
+ // 01 23 45 67
+ // 89 AB CD EF
+ // With groups of 4, this will become a single 2x2 matrix, and so on.
+ //
+ // The 2x2 transpose instruction works by transposing each of the 2x2
+ // matrices (or "sub-matrices"), given a specific group size. For example,
+ // if the group size is 1 (i.e. each element is its own group), there
+ // will be four transposes of the four 2x2 matrices that form the 2x8.
+ // For example, with the inputs as above, the result will be:
+ // 0 8 2 A 4 C 6 E
+ // 1 9 3 B 5 D 7 F
+ // Now, this result can be tranposed again, but with the group size of 2:
+ // 08 19 4C 5D
+ // 2A 3B 6E 7F
+ // If we then transpose that result, but with the group size of 4, we get:
+ // 0819 2A3B
+ // 4C5D 6E7F
+ // If we concatenate these two rows, it will be
+ // 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F
+ // which is the same as the "deal" [*] above.
+ //
+ // In general, a "deal" of individual elements is a series of 2x2 transposes,
+ // with changing group size. HVX has two instructions:
+ // Vdd = V6_vdealvdd Vu, Vv, Rt
+ // Vdd = V6_shufvdd Vu, Vv, Rt
+ // that perform exactly that. The register Rt controls which transposes are
+ // going to happen: a bit at position n (counting from 0) indicates that a
+ // transpose with a group size of 2^n will take place. If multiple bits are
+ // set, multiple transposes will happen: vdealvdd will perform them starting
+ // with the largest group size, vshuffvdd will do them in the reverse order.
+ //
+ // The main observation is that each 2x2 transpose corresponds to swapping
+ // columns of bits in the binary representation of the values.
+ //
+ // The numbers {3,2,1,0} and the log2 of the number of contiguous 1 bits
+ // in a given column. The * denote the columns that will be swapped.
+ // The transpose with the group size 2^n corresponds to swapping columns
+ // 3 (the highest log) and log2(n):
+ //
+ // 3 2 1 0 0 2 1 3 0 2 3 1
+ // * * * * * *
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ // 1 0 0 0 1 8 1 0 0 0 8 1 0 0 0 8 1 0 0 0
+ // 2 0 0 1 0 2 0 0 1 0 1 0 0 0 1 1 0 0 0 1
+ // 3 0 0 1 1 A 1 0 1 0 9 1 0 0 1 9 1 0 0 1
+ // 4 0 1 0 0 4 0 1 0 0 4 0 1 0 0 2 0 0 1 0
+ // 5 0 1 0 1 C 1 1 0 0 C 1 1 0 0 A 1 0 1 0
+ // 6 0 1 1 0 6 0 1 1 0 5 0 1 0 1 3 0 0 1 1
+ // 7 0 1 1 1 E 1 1 1 0 D 1 1 0 1 B 1 0 1 1
+ // 8 1 0 0 0 1 0 0 0 1 2 0 0 1 0 4 0 1 0 0
+ // 9 1 0 0 1 9 1 0 0 1 A 1 0 1 0 C 1 1 0 0
+ // A 1 0 1 0 3 0 0 1 1 3 0 0 1 1 5 0 1 0 1
+ // B 1 0 1 1 B 1 0 1 1 B 1 0 1 1 D 1 1 0 1
+ // C 1 1 0 0 5 0 1 0 1 6 0 1 1 0 6 0 1 1 0
+ // D 1 1 0 1 D 1 1 0 1 E 1 1 1 0 E 1 1 1 0
+ // E 1 1 1 0 7 0 1 1 1 7 0 1 1 1 7 0 1 1 1
+ // F 1 1 1 1 F 1 1 1 1 F 1 1 1 1 F 1 1 1 1
+
+ auto XorPow2 = [] (ArrayRef<int> Mask, unsigned Num) {
+ unsigned X = Mask[0] ^ Mask[Num/2];
+ // Check that the first half has the X's bits clear.
+ if ((Mask[0] & X) != 0)
+ return 0u;
+ for (unsigned I = 1; I != Num/2; ++I) {
+ if (unsigned(Mask[I] ^ Mask[I+Num/2]) != X)
+ return 0u;
+ if ((Mask[I] & X) != 0)
+ return 0u;
+ }
+ return X;
+ };
+
+ // Create a vector of log2's for each column: Perm[i] corresponds to
+ // the i-th bit (lsb is 0).
+ assert(VecLen > 2);
+ for (unsigned I = VecLen; I >= 2; I >>= 1) {
+ // Examine the initial segment of Mask of size I.
+ unsigned X = XorPow2(SM.Mask, I);
+ if (!isPowerOf2_32(X))
+ return OpRef::fail();
+ // Check the other segments of Mask.
+ for (int J = 0; J < VecLen; J += I) {
+ if (XorPow2(subm(SM.Mask, -J), I) != X)
+ return OpRef::fail();
+ }
+ Perm[Log2_32(X)] = Log2_32(I)-1;
+ }
+
+ // Once we have Perm, represent it as cycles. Denote the maximum log2
+ // (equal to log2(VecLen)-1) as M. The cycle containing M can then be
+ // written as (M a1 a2 a3 ... an). That cycle can be broken up into
+ // simple swaps as (M a1)(M a2)(M a3)...(M an), with the composition
+ // order being from left to right. Any (contiguous) segment where the
+ // values ai, ai+1...aj are either all increasing or all decreasing,
+ // can be implemented via a single vshuffvdd/vdealvdd respectively.
+ //
+ // If there is a cycle (a1 a2 ... an) that does not involve M, it can
+ // be written as (M an)(a1 a2 ... an)(M a1). The first two cycles can
+ // then be folded to get (M a1 a2 ... an)(M a1), and the above procedure
+ // can be used to generate a sequence of vshuffvdd/vdealvdd.
+ //
+ // Example:
+ // Assume M = 4 and consider a permutation (0 1)(2 3). It can be written
+ // as (4 0 1)(4 0) composed with (4 2 3)(4 2), or simply
+ // (4 0 1)(4 0)(4 2 3)(4 2).
+ // It can then be expanded into swaps as
+ // (4 0)(4 1)(4 0)(4 2)(4 3)(4 2),
+ // and broken up into "increasing" segments as
+ // [(4 0)(4 1)] [(4 0)(4 2)(4 3)] [(4 2)].
+ // This is equivalent to
+ // (4 0 1)(4 0 2 3)(4 2),
+ // which can be implemented as 3 vshufvdd instructions.
+
+ using CycleType = SmallVector<unsigned,8>;
+ std::set<CycleType> Cycles;
+ std::set<unsigned> All;
+
+ for (unsigned I : Perm)
+ All.insert(I);
+
+ // If the cycle contains LogLen-1, move it to the front of the cycle.
+ // Otherwise, return the cycle unchanged.
+ auto canonicalize = [LogLen](const CycleType &C) -> CycleType {
+ unsigned LogPos, N = C.size();
+ for (LogPos = 0; LogPos != N; ++LogPos)
+ if (C[LogPos] == LogLen-1)
+ break;
+ if (LogPos == N)
+ return C;
+
+ CycleType NewC(C.begin()+LogPos, C.end());
+ NewC.append(C.begin(), C.begin()+LogPos);
+ return NewC;
+ };
+
+ while (!All.empty()) {
+ unsigned A = *All.begin();
+ All.erase(A);
+ CycleType C;
+ C.push_back(A);
+ for (unsigned B = Perm[A]; B != A; B = Perm[B]) {
+ C.push_back(B);
+ All.erase(B);
+ }
+ if (C.size() <= 1)
+ continue;
+ Cycles.insert(canonicalize(C));
+ }
+
+ SmallVector<unsigned,8> SwapElems;
+ if (HwLen == unsigned(VecLen))
+ SwapElems.push_back(LogLen-1);
+
+ for (const CycleType &C : Cycles) {
+ unsigned First = (C[0] == LogLen-1) ? 1 : 0;
+ SwapElems.append(C.begin()+First, C.end());
+ if (First == 0)
+ SwapElems.push_back(C[0]);
+ }
+
+ const SDLoc &dl(Results.InpNode);
+ OpRef Arg = Va;
+ MVT PairTy = getPairVT(MVT::i8);
+
+ for (unsigned I = 0, E = SwapElems.size(); I != E; ) {
+ bool IsInc = I == E-1 || SwapElems[I] < SwapElems[I+1];
+ unsigned S = (1u << SwapElems[I]);
+ if (I < E-1) {
+ while (++I < E-1 && IsInc == (SwapElems[I] < SwapElems[I+1]))
+ S |= 1u << SwapElems[I];
+ // The above loop will not add a bit for the final SwapElems[I+1],
+ // so add it here.
+ S |= 1u << SwapElems[I];
+ }
+ ++I;
+
+ NodeTemplate Res;
+ Results.push(Hexagon::A2_tfrsi, MVT::i32,
+ { DAG.getTargetConstant(S, dl, MVT::i32) });
+ Res.Opc = IsInc ? Hexagon::V6_vshuffvdd : Hexagon::V6_vdealvdd;
+ Res.Ty = PairTy;
+ Res.Ops = { OpRef::hi(Arg), OpRef::lo(Arg), OpRef::res(-1) };
+ Results.push(Res);
+ Arg = OpRef::res(Results.top());
+ }
+
+ return Arg;
+}
+
+OpRef HvxSelector::butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results) {
+ DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
+ // Butterfly shuffles.
+ //
+ // V6_vdelta
+ // V6_vrdelta
+ // V6_vror
+
+ // The assumption here is that all elements picked by Mask are in the
+ // first operand to the vector_shuffle. This assumption is enforced
+ // by the caller.
+
+ MVT ResTy = getSingleVT(MVT::i8);
+ PermNetwork::Controls FC, RC;
+ const SDLoc &dl(Results.InpNode);
+ int VecLen = SM.Mask.size();
+
+ for (int M : SM.Mask) {
+ if (M != -1 && M >= VecLen)
+ return OpRef::fail();
+ }
+
+ // Try the deltas/benes for both single vectors and vector pairs.
+ ForwardDeltaNetwork FN(SM.Mask);
+ if (FN.run(FC)) {
+ SDValue Ctl = getVectorConstant(FC, dl);
+ Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(Ctl)});
+ return OpRef::res(Results.top());
+ }
+
+ // Try reverse delta.
+ ReverseDeltaNetwork RN(SM.Mask);
+ if (RN.run(RC)) {
+ SDValue Ctl = getVectorConstant(RC, dl);
+ Results.push(Hexagon::V6_vrdelta, ResTy, {Va, OpRef(Ctl)});
+ return OpRef::res(Results.top());
+ }
+
+ // Do Benes.
+ BenesNetwork BN(SM.Mask);
+ if (BN.run(FC, RC)) {
+ SDValue CtlF = getVectorConstant(FC, dl);
+ SDValue CtlR = getVectorConstant(RC, dl);
+ Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(CtlF)});
+ Results.push(Hexagon::V6_vrdelta, ResTy,
+ {OpRef::res(-1), OpRef(CtlR)});
+ return OpRef::res(Results.top());
+ }
+
+ return OpRef::fail();
+}
+
+SDValue HvxSelector::getVectorConstant(ArrayRef<uint8_t> Data,
+ const SDLoc &dl) {
+ SmallVector<SDValue, 128> Elems;
+ for (uint8_t C : Data)
+ Elems.push_back(DAG.getConstant(C, dl, MVT::i8));
+ MVT VecTy = MVT::getVectorVT(MVT::i8, Data.size());
+ SDValue BV = DAG.getBuildVector(VecTy, dl, Elems);
+ SDValue LV = Lower.LowerOperation(BV, DAG);
+ DAG.RemoveDeadNode(BV.getNode());
+ return LV;
+}
+
+void HvxSelector::selectShuffle(SDNode *N) {
+ DEBUG_WITH_TYPE("isel", {
+ dbgs() << "Starting " << __func__ << " on node:\n";
+ N->dump(&DAG);
+ });
+ MVT ResTy = N->getValueType(0).getSimpleVT();
+ // Assume that vector shuffles operate on vectors of bytes.
+ assert(ResTy.isVector() && ResTy.getVectorElementType() == MVT::i8);
+
+ auto *SN = cast<ShuffleVectorSDNode>(N);
+ std::vector<int> Mask(SN->getMask().begin(), SN->getMask().end());
+ // This shouldn't really be necessary. Is it?
+ for (int &Idx : Mask)
+ if (Idx != -1 && Idx < 0)
+ Idx = -1;
+
+ unsigned VecLen = Mask.size();
+ bool HavePairs = (2*HwLen == VecLen);
+ assert(ResTy.getSizeInBits() / 8 == VecLen);
+
+ // Vd = vector_shuffle Va, Vb, Mask
+ //
+
+ bool UseLeft = false, UseRight = false;
+ for (unsigned I = 0; I != VecLen; ++I) {
+ if (Mask[I] == -1)
+ continue;
+ unsigned Idx = Mask[I];
+ assert(Idx < 2*VecLen);
+ if (Idx < VecLen)
+ UseLeft = true;
+ else
+ UseRight = true;
+ }
+
+ DEBUG_WITH_TYPE("isel", {
+ dbgs() << "VecLen=" << VecLen << " HwLen=" << HwLen << " UseLeft="
+ << UseLeft << " UseRight=" << UseRight << " HavePairs="
+ << HavePairs << '\n';
+ });
+ // If the mask is all -1's, generate "undef".
+ if (!UseLeft && !UseRight) {
+ ISel.ReplaceNode(N, ISel.selectUndef(SDLoc(SN), ResTy).getNode());
+ DAG.RemoveDeadNode(N);
+ return;
+ }
+
+ SDValue Vec0 = N->getOperand(0);
+ SDValue Vec1 = N->getOperand(1);
+ ResultStack Results(SN);
+ Results.push(TargetOpcode::COPY, ResTy, {Vec0});
+ Results.push(TargetOpcode::COPY, ResTy, {Vec1});
+ OpRef Va = OpRef::res(Results.top()-1);
+ OpRef Vb = OpRef::res(Results.top());
+
+ OpRef Res = !HavePairs ? shuffs2(ShuffleMask(Mask), Va, Vb, Results)
+ : shuffp2(ShuffleMask(Mask), Va, Vb, Results);
+
+ bool Done = Res.isValid();
+ if (Done)
+ materialize(Results);
+ else
+ Done = scalarizeShuffle(Mask, SDLoc(N), ResTy, Vec0, Vec1, N);
+
+ if (!Done) {
+#ifndef NDEBUG
+ dbgs() << "Unhandled shuffle:\n";
+ SN->dumpr(&DAG);
+#endif
+ llvm_unreachable("Failed to select vector shuffle");
+ }
+}
+
+void HvxSelector::selectRor(SDNode *N) {
+ // If this is a rotation by less than 8, use V6_valignbi.
+ MVT Ty = N->getValueType(0).getSimpleVT();
+ const SDLoc &dl(N);
+ SDValue VecV = N->getOperand(0);
+ SDValue RotV = N->getOperand(1);
+ SDNode *NewN = nullptr;
+
+ if (auto *CN = dyn_cast<ConstantSDNode>(RotV.getNode())) {
+ unsigned S = CN->getZExtValue();
+ if (S % HST.getVectorLength() == 0) {
+ NewN = VecV.getNode();
+ } else if (isUInt<3>(S)) {
+ SDValue C = DAG.getTargetConstant(S, dl, MVT::i32);
+ NewN = DAG.getMachineNode(Hexagon::V6_valignbi, dl, Ty,
+ {VecV, VecV, C});
+ }
+ }
+
+ if (!NewN)
+ NewN = DAG.getMachineNode(Hexagon::V6_vror, dl, Ty, {VecV, RotV});
+
+ ISel.ReplaceNode(N, NewN);
+ DAG.RemoveDeadNode(N);
+}
+
+void HexagonDAGToDAGISel::SelectHvxShuffle(SDNode *N) {
+ HvxSelector(*this, *CurDAG).selectShuffle(N);
+}
+
+void HexagonDAGToDAGISel::SelectHvxRor(SDNode *N) {
+ HvxSelector(*this, *CurDAG).selectRor(N);
+}
+
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 22bbb3e..859f697 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -129,6 +129,19 @@
// Implement calling convention for Hexagon.
+static const MVT LegalV64[] = {
+ MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64
+};
+static const MVT LegalW64[] = {
+ MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64
+};
+static const MVT LegalV128[] = {
+ MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64
+};
+static const MVT LegalW128[] = {
+ MVT::v256i8, MVT::v128i16, MVT::v64i32, MVT::v32i64
+};
+
static bool
CC_Hexagon(unsigned ValNo, MVT ValVT,
MVT LocVT, CCValAssign::LocInfo LocInfo,
@@ -1978,36 +1991,52 @@
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
+ auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
+ setOperationAction(Opc, FromTy, Promote);
+ AddPromotedToType(Opc, FromTy, ToTy);
+ };
+
if (Subtarget.useHVXOps()) {
- if (Subtarget.useHVX64BOps()) {
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i8, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i16, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i64, Custom);
- // We try to generate the vpack{e/o} instructions. If we fail
- // we fall back upon ExpandOp.
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64i8, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i16, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i32, Custom);
- } else if (Subtarget.useHVX128BOps()) {
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v256i8, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i16, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i64, Custom);
- // We try to generate the vpack{e/o} instructions. If we fail
- // we fall back upon ExpandOp.
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v128i8, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i16, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v128i8, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64i16, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i32, Custom);
- } else {
- llvm_unreachable("Unrecognized HVX mode");
+ bool Use64b = Subtarget.useHVX64BOps();
+ ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
+ ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
+ MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
+ MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal);
+ setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, ByteW, Legal);
+ setOperationAction(ISD::OR, ByteV, Legal);
+
+ for (MVT T : LegalV) {
+ setIndexedLoadAction(ISD::POST_INC, T, Legal);
+ setIndexedStoreAction(ISD::POST_INC, T, Legal);
+
+ setOperationAction(ISD::BUILD_VECTOR, T, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom);
+ }
+
+ for (MVT T : LegalV) {
+ if (T == ByteV)
+ continue;
+ // Promote all shuffles and concats to operate on vectors of bytes.
+ setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
+ setPromoteTo(ISD::CONCAT_VECTORS, T, ByteV);
+ setPromoteTo(ISD::OR, T, ByteV);
+ }
+
+ for (MVT T : LegalW) {
+ if (T == ByteW)
+ continue;
+ // Promote all shuffles and concats to operate on vectors of bytes.
+ setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
+ setPromoteTo(ISD::CONCAT_VECTORS, T, ByteW);
}
}
+
// Subtarget-specific operation actions.
//
if (Subtarget.hasV5TOps()) {
@@ -2069,20 +2098,6 @@
setIndexedStoreAction(ISD::POST_INC, VT, Legal);
}
- if (Subtarget.useHVX64BOps()) {
- for (MVT VT : {MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
- MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64}) {
- setIndexedLoadAction(ISD::POST_INC, VT, Legal);
- setIndexedStoreAction(ISD::POST_INC, VT, Legal);
- }
- } else if (Subtarget.useHVX128BOps()) {
- for (MVT VT : {MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64,
- MVT::v256i8, MVT::v128i16, MVT::v64i32, MVT::v32i64}) {
- setIndexedLoadAction(ISD::POST_INC, VT, Legal);
- setIndexedStoreAction(ISD::POST_INC, VT, Legal);
- }
- }
-
computeRegisterProperties(&HRI);
//
@@ -2225,6 +2240,9 @@
case HexagonISD::VASR: return "HexagonISD::VASR";
case HexagonISD::VLSR: return "HexagonISD::VLSR";
case HexagonISD::VSPLAT: return "HexagonISD::VSPLAT";
+ case HexagonISD::VEXTRACTW: return "HexagonISD::VEXTRACTW";
+ case HexagonISD::VINSERTW0: return "HexagonISD::VINSERTW0";
+ case HexagonISD::VROR: return "HexagonISD::VROR";
case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE";
case HexagonISD::OP_END: break;
}
@@ -2252,43 +2270,11 @@
// Should we expand the build vector with shuffles?
bool HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
unsigned DefinedValues) const {
- // Hexagon vector shuffle operates on element sizes of bytes or halfwords
- EVT EltVT = VT.getVectorElementType();
- int EltBits = EltVT.getSizeInBits();
- if ((EltBits != 8) && (EltBits != 16))
- return false;
-
- return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
-}
-
-static StridedLoadKind isStridedLoad(const ArrayRef<int> &Mask) {
- int even_start = -2;
- int odd_start = -1;
- size_t mask_len = Mask.size();
- for (auto idx : Mask) {
- if ((idx - even_start) == 2)
- even_start = idx;
- else
- break;
- }
- if (even_start == (int)(mask_len * 2) - 2)
- return StridedLoadKind::Even;
- for (auto idx : Mask) {
- if ((idx - odd_start) == 2)
- odd_start = idx;
- else
- break;
- }
- if (odd_start == (int)(mask_len * 2) - 1)
- return StridedLoadKind::Odd;
-
- return StridedLoadKind::NoPattern;
+ return false;
}
bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask,
EVT VT) const {
- if (Subtarget.useHVXOps())
- return isStridedLoad(Mask) != StridedLoadKind::NoPattern;
return true;
}
@@ -2302,7 +2288,6 @@
SDValue V2 = Op.getOperand(1);
SDLoc dl(Op);
EVT VT = Op.getValueType();
- bool UseHVX = Subtarget.useHVXOps();
if (V2.isUndef())
V2 = V1;
@@ -2334,27 +2319,6 @@
DAG.getConstant(Lane, dl, MVT::i32));
}
- if (UseHVX) {
- ArrayRef<int> Mask = SVN->getMask();
- size_t MaskLen = Mask.size();
- unsigned SizeInBits = VT.getScalarSizeInBits() * MaskLen;
-
- if ((Subtarget.useHVX64BOps() && SizeInBits == 64 * 8) ||
- (Subtarget.useHVX128BOps() && SizeInBits == 128 * 8)) {
- StridedLoadKind Pattern = isStridedLoad(Mask);
- if (Pattern == StridedLoadKind::NoPattern)
- return SDValue();
-
- unsigned Opc = Pattern == StridedLoadKind::Even ? HexagonISD::VPACKE
- : HexagonISD::VPACKO;
- return DAG.getNode(Opc, dl, VT, {Op.getOperand(1), Op.getOperand(0)});
- }
- // We used to assert in the "else" part here, but that is bad for Halide
- // Halide creates intermediate double registers by interleaving two
- // concatenated vector registers. The interleaving requires vector_shuffle
- // nodes and we shouldn't barf on a double register result of a
- // vector_shuffle because it is most likely an intermediate result.
- }
// FIXME: We need to support more general vector shuffles. See
// below the comment from the ARM backend that deals in the general
// case with the vector shuffles. For now, let expand handle these.
@@ -2445,7 +2409,7 @@
SmallVector<ConstantSDNode*,4> Consts;
bool AllConst = true;
for (SDValue V : Elem) {
- if (V.getOpcode() == ISD::UNDEF)
+ if (isUndef(V))
V = DAG.getConstant(0, dl, ElemTy);
auto *C = dyn_cast<ConstantSDNode>(V.getNode());
Consts.push_back(C);
@@ -2454,7 +2418,7 @@
unsigned First, Num = Elem.size();
for (First = 0; First != Num; ++First)
- if (Elem[First].getOpcode() != ISD::UNDEF)
+ if (!isUndef(Elem[First]))
break;
if (First == Num)
return DAG.getUNDEF(VecTy);
@@ -2466,9 +2430,9 @@
Consts[1]->getZExtValue() << 16;
return DAG.getBitcast(MVT::v2i16, DAG.getConstant(V, dl, MVT::i32));
}
- SDNode *N = DAG.getMachineNode(Hexagon::A2_combine_ll, dl, MVT::i32,
- { Elem[1], Elem[0] });
- return DAG.getBitcast(MVT::v2i16, SDValue(N,0));
+ SDValue N = getNode(Hexagon::A2_combine_ll, dl, MVT::i32,
+ {Elem[1], Elem[0]}, DAG);
+ return DAG.getBitcast(MVT::v2i16, N);
}
// First try generating a constant.
@@ -2486,7 +2450,7 @@
for (unsigned i = 0; i != Num; ++i) {
if (i == First)
continue;
- if (Elem[i] == Elem[First] || Elem[i].getOpcode() == ISD::UNDEF)
+ if (Elem[i] == Elem[First] || isUndef(Elem[i]))
continue;
IsSplat = false;
break;
@@ -2507,9 +2471,9 @@
SDValue V5 = DAG.getNode(ISD::SHL, dl, MVT::i32, {V3, S8});
SDValue V6 = DAG.getNode(ISD::OR, dl, MVT::i32, {V0, V4});
SDValue V7 = DAG.getNode(ISD::OR, dl, MVT::i32, {V2, V5});
- SDNode *T0 = DAG.getMachineNode(Hexagon::A2_combine_ll, dl, MVT::i32,
- {V7, V6});
- return DAG.getBitcast(MVT::v4i8, SDValue(T0,0));
+
+ SDValue T0 = getNode(Hexagon::A2_combine_ll, dl, MVT::i32, {V7, V6}, DAG);
+ return DAG.getBitcast(MVT::v4i8, T0);
}
SDValue
@@ -2521,7 +2485,7 @@
SmallVector<ConstantSDNode*,8> Consts;
bool AllConst = true;
for (SDValue V : Elem) {
- if (V.getOpcode() == ISD::UNDEF)
+ if (isUndef(V))
V = DAG.getConstant(0, dl, ElemTy);
auto *C = dyn_cast<ConstantSDNode>(V.getNode());
Consts.push_back(C);
@@ -2530,7 +2494,7 @@
unsigned First, Num = Elem.size();
for (First = 0; First != Num; ++First)
- if (Elem[First].getOpcode() != ISD::UNDEF)
+ if (!isUndef(Elem[First]))
break;
if (First == Num)
return DAG.getUNDEF(VecTy);
@@ -2541,7 +2505,7 @@
for (unsigned i = 0; i != Num; ++i) {
if (i == First)
continue;
- if (Elem[i] == Elem[First] || Elem[i].getOpcode() == ISD::UNDEF)
+ if (Elem[i] == Elem[First] || isUndef(Elem[i]))
continue;
IsSplat = false;
break;
@@ -2570,12 +2534,7 @@
SDValue H = (ElemTy == MVT::i32)
? Elem[1]
: buildVector32({Elem.data()+Num/2, Num/2}, dl, HalfTy, DAG);
- unsigned Id = Hexagon::DoubleRegsRegClassID;
- SDNode *N = DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VecTy,
- { DAG.getTargetConstant(Id, dl, MVT::i32),
- L, DAG.getTargetConstant(Hexagon::isub_lo, dl, MVT::i32),
- H, DAG.getTargetConstant(Hexagon::isub_hi, dl, MVT::i32) });
- return SDValue(N, 0);
+ return DAG.getNode(HexagonISD::COMBINE, dl, VecTy, {H, L});
}
SDValue
@@ -2675,120 +2634,33 @@
SDValue
HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
- MVT VT = Op.getValueType().getSimpleVT();
- unsigned BW = VT.getSizeInBits();
+ MVT VecTy = ty(Op);
+ unsigned BW = VecTy.getSizeInBits();
if (BW == 32 || BW == 64) {
SmallVector<SDValue,8> Ops;
for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i)
Ops.push_back(Op.getOperand(i));
if (BW == 32)
- return buildVector32(Ops, SDLoc(Op), VT, DAG);
- return buildVector64(Ops, SDLoc(Op), VT, DAG);
+ return buildVector32(Ops, SDLoc(Op), VecTy, DAG);
+ return buildVector64(Ops, SDLoc(Op), VecTy, DAG);
}
+ if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy))
+ return LowerHvxBuildVector(Op, DAG);
+
return SDValue();
}
SDValue
HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
SelectionDAG &DAG) const {
- SDLoc dl(Op);
- bool UseHVX = Subtarget.useHVXOps();
- EVT VT = Op.getValueType();
- unsigned NElts = Op.getNumOperands();
- SDValue Vec0 = Op.getOperand(0);
- EVT VecVT = Vec0.getValueType();
- unsigned Width = VecVT.getSizeInBits();
+ MVT VecTy = ty(Op);
+ assert(!Subtarget.useHVXOps() || !Subtarget.isHVXVectorType(VecTy));
- if (NElts == 2) {
- MVT ST = VecVT.getSimpleVT();
- // We are trying to concat two v2i16 to a single v4i16, or two v4i8
- // into a single v8i8.
- if (ST == MVT::v2i16 || ST == MVT::v4i8)
- return DAG.getNode(HexagonISD::COMBINE, dl, VT, Op.getOperand(1), Vec0);
-
- if (UseHVX) {
- assert((Width == 64 * 8 && Subtarget.useHVX64BOps()) ||
- (Width == 128 * 8 && Subtarget.useHVX128BOps()));
- SDValue Vec1 = Op.getOperand(1);
- MVT OpTy = Subtarget.useHVX64BOps() ? MVT::v16i32 : MVT::v32i32;
- MVT ReTy = Subtarget.useHVX64BOps() ? MVT::v32i32 : MVT::v64i32;
- SDValue B0 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec0);
- SDValue B1 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec1);
- SDValue VC = DAG.getNode(HexagonISD::VCOMBINE, dl, ReTy, B1, B0);
- return DAG.getNode(ISD::BITCAST, dl, VT, VC);
- }
- }
-
- if (VT.getSizeInBits() != 32 && VT.getSizeInBits() != 64)
- return SDValue();
-
- SDValue C0 = DAG.getConstant(0, dl, MVT::i64);
- SDValue C32 = DAG.getConstant(32, dl, MVT::i64);
- SDValue W = DAG.getConstant(Width, dl, MVT::i64);
- // Create the "width" part of the argument to insert_rp/insertp_rp.
- SDValue S = DAG.getNode(ISD::SHL, dl, MVT::i64, W, C32);
- SDValue V = C0;
-
- for (unsigned i = 0, e = NElts; i != e; ++i) {
- unsigned N = NElts-i-1;
- SDValue OpN = Op.getOperand(N);
-
- if (VT.getSizeInBits() == 64 && OpN.getValueSizeInBits() == 32) {
- SDValue C = DAG.getConstant(0, dl, MVT::i32);
- OpN = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, OpN);
- }
- SDValue Idx = DAG.getConstant(N, dl, MVT::i64);
- SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, W);
- SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, S, Offset);
- if (VT.getSizeInBits() == 32)
- V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, {V, OpN, Or});
- else if (VT.getSizeInBits() == 64)
- V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, {V, OpN, Or});
- else
- return SDValue();
- }
-
- return DAG.getNode(ISD::BITCAST, dl, VT, V);
-}
-
-SDValue
-HexagonTargetLowering::LowerEXTRACT_SUBVECTOR_HVX(SDValue Op,
- SelectionDAG &DAG) const {
- EVT VT = Op.getOperand(0).getValueType();
- SDLoc dl(Op);
- bool UseHVX = Subtarget.useHVXOps();
- bool UseHVX64B = Subtarget.useHVX64BOps();
- // Just in case...
-
- if (!VT.isVector() || !UseHVX)
- return SDValue();
-
- EVT ResVT = Op.getValueType();
- unsigned ResSize = ResVT.getSizeInBits();
- unsigned VectorSizeInBits = UseHVX64B ? (64 * 8) : (128 * 8);
- unsigned OpSize = VT.getSizeInBits();
-
- // We deal only with cases where the result is the vector size
- // and the vector operand is a double register.
- if (!(ResVT.isByteSized() && ResSize == VectorSizeInBits) ||
- !(VT.isByteSized() && OpSize == 2 * VectorSizeInBits))
- return SDValue();
-
- ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- if (!Cst)
- return SDValue();
- unsigned Val = Cst->getZExtValue();
-
- // These two will get lowered to an appropriate EXTRACT_SUBREG in ISel.
- if (Val == 0) {
- SDValue Vec = Op.getOperand(0);
- return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, ResVT, Vec);
- }
-
- if (ResVT.getVectorNumElements() == Val) {
- SDValue Vec = Op.getOperand(0);
- return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, ResVT, Vec);
+ if (VecTy.getSizeInBits() == 64) {
+ assert(Op.getNumOperands() == 2);
+ return DAG.getNode(HexagonISD::COMBINE, SDLoc(Op), VecTy, Op.getOperand(1),
+ Op.getOperand(0));
}
return SDValue();
@@ -2798,6 +2670,10 @@
HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
SDValue Vec = Op.getOperand(0);
+ MVT VecTy = ty(Vec);
+ if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy))
+ return LowerHvxExtractElement(Op, DAG);
+
MVT ElemTy = ty(Vec).getVectorElementType();
return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ElemTy, ty(Op), DAG);
}
@@ -2808,7 +2684,7 @@
SDValue Vec = Op.getOperand(0);
MVT VecTy = ty(Vec);
if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy))
- return LowerEXTRACT_SUBVECTOR_HVX(Op, DAG);
+ return LowerHvxExtractSubvector(Op, DAG);
return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ty(Op), ty(Op), DAG);
}
@@ -2817,6 +2693,9 @@
HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
MVT VecTy = ty(Op);
+ if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy))
+ return LowerHvxInsertElement(Op, DAG);
+
return insertVector(Op.getOperand(0), Op.getOperand(1), Op.getOperand(2),
SDLoc(Op), VecTy.getVectorElementType(), DAG);
}
@@ -2824,6 +2703,9 @@
SDValue
HexagonTargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
+ if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(ty(Op)))
+ return LowerHvxInsertSubvector(Op, DAG);
+
SDValue ValV = Op.getOperand(1);
return insertVector(Op.getOperand(0), ValV, Op.getOperand(2),
SDLoc(Op), ty(ValV), DAG);
@@ -2911,6 +2793,7 @@
case ISD::PREFETCH: return LowerPREFETCH(Op, DAG);
case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG);
}
+ return SDValue();
}
/// Returns relocation base for the given PIC jumptable.
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 9f7891e..1731091 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -63,6 +63,9 @@
VCOMBINE,
VPACKE,
VPACKO,
+ VEXTRACTW,
+ VINSERTW0,
+ VROR,
TC_RETURN,
EH_RETURN,
DCFETCH,
@@ -88,6 +91,8 @@
explicit HexagonTargetLowering(const TargetMachine &TM,
const HexagonSubtarget &ST);
+ bool isHVXVectorType(MVT Ty) const;
+
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
/// optimization should implement this function.
@@ -121,9 +126,8 @@
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerEXTRACT_SUBVECTOR_HVX(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const;
@@ -281,7 +285,24 @@
return Ty;
return MVT::getIntegerVT(Ty.getSizeInBits());
}
+ MVT tyVector(MVT Ty, MVT ElemTy) const {
+ if (Ty.isVector() && Ty.getVectorElementType() == ElemTy)
+ return Ty;
+ unsigned TyWidth = Ty.getSizeInBits(), ElemWidth = ElemTy.getSizeInBits();
+ assert((TyWidth % ElemWidth) == 0);
+ return MVT::getVectorVT(ElemTy, TyWidth/ElemWidth);
+ }
+ bool isUndef(SDValue Op) const {
+ if (Op.isMachineOpcode())
+ return Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
+ return Op.getOpcode() == ISD::UNDEF;
+ }
+ SDValue getNode(unsigned MachineOpc, const SDLoc &dl, MVT Ty,
+ ArrayRef<SDValue> Ops, SelectionDAG &DAG) const {
+ SDNode *N = DAG.getMachineNode(MachineOpc, dl, Ty, Ops);
+ return SDValue(N, 0);
+ }
SDValue buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl, MVT VecTy,
SelectionDAG &DAG) const;
SDValue buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl, MVT VecTy,
@@ -291,6 +312,38 @@
SDValue insertVector(SDValue VecV, SDValue ValV, SDValue IdxV,
const SDLoc &dl, MVT ValTy, SelectionDAG &DAG) const;
+ using VectorPair = std::pair<SDValue, SDValue>;
+ using TypePair = std::pair<MVT, MVT>;
+
+ SDValue getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
+ const SDLoc &dl, SelectionDAG &DAG) const;
+
+ TypePair ty(const VectorPair &Ops) const {
+ return { Ops.first.getValueType().getSimpleVT(),
+ Ops.second.getValueType().getSimpleVT() };
+ }
+
+ MVT typeJoin(const TypePair &Tys) const;
+ TypePair typeSplit(MVT Ty) const;
+ MVT typeCastElem(MVT VecTy, MVT ElemTy) const;
+ MVT typeExtElem(MVT VecTy, unsigned Factor) const;
+ MVT typeTruncElem(MVT VecTy, unsigned Factor) const;
+
+ SDValue opJoin(const VectorPair &Ops, const SDLoc &dl,
+ SelectionDAG &DAG) const;
+ VectorPair opSplit(SDValue Vec, const SDLoc &dl, SelectionDAG &DAG) const;
+ SDValue opCastElem(SDValue Vec, MVT ElemTy, SelectionDAG &DAG) const;
+
+ SDValue convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
+ SelectionDAG &DAG) const;
+ SDValue getIndexInWord32(SDValue Idx, MVT ElemTy, SelectionDAG &DAG) const;
+
+ SDValue LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) const;
+
std::pair<const TargetRegisterClass*, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT)
const override;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
new file mode 100644
index 0000000..3a9e508
--- /dev/null
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -0,0 +1,299 @@
+//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonISelLowering.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+
+using namespace llvm;
+
+SDValue
+HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
+ const SDLoc &dl, SelectionDAG &DAG) const {
+ SmallVector<SDValue,4> IntOps;
+ IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32));
+ for (const SDValue &Op : Ops)
+ IntOps.push_back(Op);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps);
+}
+
+MVT
+HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
+ assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
+
+ MVT ElemTy = Tys.first.getVectorElementType();
+ return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() +
+ Tys.second.getVectorNumElements());
+}
+
+HexagonTargetLowering::TypePair
+HexagonTargetLowering::typeSplit(MVT VecTy) const {
+ assert(VecTy.isVector());
+ unsigned NumElem = VecTy.getVectorNumElements();
+ assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
+ MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2);
+ return { HalfTy, HalfTy };
+}
+
+MVT
+HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
+ MVT ElemTy = VecTy.getVectorElementType();
+ MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor);
+ return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
+}
+
+MVT
+HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
+ MVT ElemTy = VecTy.getVectorElementType();
+ MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor);
+ return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
+}
+
+SDValue
+HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
+ SelectionDAG &DAG) const {
+ if (ty(Vec).getVectorElementType() == ElemTy)
+ return Vec;
+ MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy);
+ return DAG.getBitcast(CastTy, Vec);
+}
+
+SDValue
+HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
+ SelectionDAG &DAG) const {
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)),
+ Ops.second, Ops.first);
+}
+
+HexagonTargetLowering::VectorPair
+HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
+ SelectionDAG &DAG) const {
+ TypePair Tys = typeSplit(ty(Vec));
+ return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
+}
+
+SDValue
+HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
+ SelectionDAG &DAG) const {
+ if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
+ ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx);
+
+ unsigned ElemWidth = ElemTy.getSizeInBits();
+ if (ElemWidth == 8)
+ return ElemIdx;
+
+ unsigned L = Log2_32(ElemWidth/8);
+ const SDLoc &dl(ElemIdx);
+ return DAG.getNode(ISD::SHL, dl, MVT::i32,
+ {ElemIdx, DAG.getConstant(L, dl, MVT::i32)});
+}
+
+SDValue
+HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
+ SelectionDAG &DAG) const {
+ unsigned ElemWidth = ElemTy.getSizeInBits();
+ assert(ElemWidth >= 8 && ElemWidth <= 32);
+ if (ElemWidth == 32)
+ return Idx;
+
+ if (ty(Idx) != MVT::i32)
+ Idx = DAG.getBitcast(MVT::i32, Idx);
+ const SDLoc &dl(Idx);
+ SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32);
+ SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask});
+ return SubIdx;
+}
+
+SDValue
+HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
+ const {
+ const SDLoc &dl(Op);
+ BuildVectorSDNode *BN = cast<BuildVectorSDNode>(Op.getNode());
+ bool IsConst = BN->isConstant();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MVT VecTy = ty(Op);
+
+ if (IsConst) {
+ SmallVector<Constant*, 128> Elems;
+ for (SDValue V : BN->op_values()) {
+ if (auto *C = dyn_cast<ConstantSDNode>(V.getNode()))
+ Elems.push_back(const_cast<ConstantInt*>(C->getConstantIntValue()));
+ }
+ Constant *CV = ConstantVector::get(Elems);
+ unsigned Align = VecTy.getSizeInBits() / 8;
+ SDValue CP = LowerConstantPool(DAG.getConstantPool(CV, VecTy, Align), DAG);
+ return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(MF), Align);
+ }
+
+ unsigned NumOps = Op.getNumOperands();
+ unsigned HwLen = Subtarget.getVectorLength();
+ unsigned ElemSize = VecTy.getVectorElementType().getSizeInBits() / 8;
+ assert(ElemSize*NumOps == HwLen);
+
+ SmallVector<SDValue,32> Words;
+ SmallVector<SDValue,32> Ops;
+ for (unsigned i = 0; i != NumOps; ++i)
+ Ops.push_back(Op.getOperand(i));
+
+ if (VecTy.getVectorElementType() != MVT::i32) {
+ assert(ElemSize < 4 && "vNi64 should have been promoted to vNi32");
+ assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
+ unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
+ MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
+ for (unsigned i = 0; i != NumOps; i += OpsPerWord) {
+ SDValue W = buildVector32({&Ops[i], OpsPerWord}, dl, PartVT, DAG);
+ Words.push_back(DAG.getBitcast(MVT::i32, W));
+ }
+ } else {
+ Words.assign(Ops.begin(), Ops.end());
+ }
+
+ // Construct two halves in parallel, then or them together.
+ assert(4*Words.size() == Subtarget.getVectorLength());
+ SDValue HalfV0 = getNode(Hexagon::V6_vd0, dl, VecTy, {}, DAG);
+ SDValue HalfV1 = getNode(Hexagon::V6_vd0, dl, VecTy, {}, DAG);
+ SDValue S = DAG.getConstant(4, dl, MVT::i32);
+ unsigned NumWords = Words.size();
+ for (unsigned i = 0; i != NumWords/2; ++i) {
+ SDValue N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
+ {HalfV0, Words[i]});
+ SDValue M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
+ {HalfV1, Words[i+NumWords/2]});
+ HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, S});
+ HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, S});
+ }
+
+ HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy,
+ {HalfV0, DAG.getConstant(HwLen/2, dl, MVT::i32)});
+ SDValue DstV = DAG.getNode(ISD::OR, dl, VecTy, {HalfV0, HalfV1});
+ return DstV;
+}
+
+SDValue
+HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
+ const {
+ // Change the type of the extracted element to i32.
+ SDValue VecV = Op.getOperand(0);
+ MVT ElemTy = ty(VecV).getVectorElementType();
+ unsigned ElemWidth = ElemTy.getSizeInBits();
+ assert(ElemWidth >= 8 && ElemWidth <= 32); // TODO i64
+
+ const SDLoc &dl(Op);
+ SDValue IdxV = Op.getOperand(1);
+ if (ty(IdxV) != MVT::i32)
+ IdxV = DAG.getBitcast(MVT::i32, IdxV);
+
+ SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
+ SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
+ {VecV, ByteIdx});
+ if (ElemTy == MVT::i32)
+ return ExWord;
+
+ // Have an extracted word, need to extract the smaller element out of it.
+ // 1. Extract the bits of (the original) IdxV that correspond to the index
+ // of the desired element in the 32-bit word.
+ SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
+ // 2. Extract the element from the word.
+ SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord);
+ return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG);
+}
+
+SDValue
+HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
+ const {
+ const SDLoc &dl(Op);
+ SDValue VecV = Op.getOperand(0);
+ SDValue ValV = Op.getOperand(1);
+ SDValue IdxV = Op.getOperand(2);
+ MVT ElemTy = ty(VecV).getVectorElementType();
+ unsigned ElemWidth = ElemTy.getSizeInBits();
+ assert(ElemWidth >= 8 && ElemWidth <= 32); // TODO i64
+
+ auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
+ SDValue ByteIdxV) {
+ MVT VecTy = ty(VecV);
+ unsigned HwLen = Subtarget.getVectorLength();
+ SDValue MaskV = DAG.getNode(ISD::AND, dl, MVT::i32,
+ {ByteIdxV, DAG.getConstant(-4, dl, MVT::i32)});
+ SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV});
+ SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV});
+ SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ {DAG.getConstant(HwLen/4, dl, MVT::i32), MaskV});
+ SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV});
+ return TorV;
+ };
+
+ SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
+ if (ElemTy == MVT::i32)
+ return InsertWord(VecV, ValV, ByteIdx);
+
+ // If this is not inserting a 32-bit word, convert it into such a thing.
+ // 1. Extract the existing word from the target vector.
+ SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
+ {ByteIdx, DAG.getConstant(2, dl, MVT::i32)});
+ SDValue Ex0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ {opCastElem(VecV, MVT::i32, DAG), WordIdx});
+ SDValue Ext = LowerHvxExtractElement(Ex0, DAG);
+
+ // 2. Treating the extracted word as a 32-bit vector, insert the given
+ // value into it.
+ SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
+ MVT SubVecTy = tyVector(ty(Ext), ElemTy);
+ SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext),
+ ValV, SubIdx, dl, SubVecTy, DAG);
+
+ // 3. Insert the 32-bit word back into the original vector.
+ return InsertWord(VecV, Ins, ByteIdx);
+}
+
+SDValue
+HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
+ const {
+ SDValue SrcV = Op.getOperand(0);
+ MVT SrcTy = ty(SrcV);
+ unsigned SrcElems = SrcTy.getVectorNumElements();
+ SDValue IdxV = Op.getOperand(1);
+ unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue();
+ MVT DstTy = ty(Op);
+ assert(Idx == 0 || DstTy.getVectorNumElements() % Idx == 0);
+ const SDLoc &dl(Op);
+ if (Idx == 0)
+ return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, DstTy, SrcV);
+ if (Idx == SrcElems/2)
+ return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, DstTy, SrcV);
+ return SDValue();
+}
+
+SDValue
+HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
+ const {
+ // Idx may be variable
+ SDValue IdxV = Op.getOperand(2);
+ auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
+ if (!IdxN)
+ return SDValue();
+ unsigned Idx = IdxN->getZExtValue();
+
+ SDValue DstV = Op.getOperand(0);
+ SDValue SrcV = Op.getOperand(1);
+ MVT DstTy = ty(DstV);
+ MVT SrcTy = ty(SrcV);
+ unsigned DstElems = DstTy.getVectorNumElements();
+ unsigned SrcElems = SrcTy.getVectorNumElements();
+ if (2*SrcElems != DstElems)
+ return SDValue();
+
+ const SDLoc &dl(Op);
+ if (Idx == 0)
+ return DAG.getTargetInsertSubreg(Hexagon::vsub_lo, dl, DstTy, DstV, SrcV);
+ if (Idx == SrcElems)
+ return DAG.getTargetInsertSubreg(Hexagon::vsub_hi, dl, DstTy, DstV, SrcV);
+ return SDValue();
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index 270575a..f1d01b0 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -19,10 +19,10 @@
// (8) Shift/permute
// (9) Arithmetic/bitwise
// (10) Bit
-// (11) Load
-// (12) Store
-// (13) Memop
-// (14) PIC
+// (11) PIC
+// (12) Load
+// (13) Store
+// (14) Memop
// (15) Call
// (16) Branch
// (17) Misc
@@ -340,6 +340,8 @@
def: Pat<(HexagonCONST32_GP tglobaladdr:$A), (A2_tfrsi imm:$A)>;
def: Pat<(HexagonJT tjumptable:$A), (A2_tfrsi imm:$A)>;
def: Pat<(HexagonCP tconstpool:$A), (A2_tfrsi imm:$A)>;
+// The HVX load patterns also match CP directly. Make sure that if
+// the selection of this opcode changes, it's updated in all places.
def: Pat<(i1 0), (PS_false)>;
def: Pat<(i1 1), (PS_true)>;
@@ -1630,7 +1632,31 @@
(I1toI32 (S4_ntstbit_r IntRegs:$Rs, IntRegs:$Rt))>;
}
-// --(11) Load -----------------------------------------------------------
+// --(11) PIC ------------------------------------------------------------
+//
+
+def SDT_HexagonAtGot
+ : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
+def SDT_HexagonAtPcrel
+ : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+// AT_GOT address-of-GOT, address-of-global, offset-in-global
+def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>;
+// AT_PCREL address-of-global
+def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>;
+
+def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)),
+ (L2_loadri_io I32:$got, imm:$addr)>;
+def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off),
+ (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>;
+def: Pat<(HexagonAtPcrel I32:$addr),
+ (C4_addipc imm:$addr)>;
+
+// The HVX load patterns also match AT_PCREL directly. Make sure that
+// if the selection of this opcode changes, it's updated in all places.
+
+
+// --(12) Load -----------------------------------------------------------
//
def extloadv2i8: PatFrag<(ops node:$ptr), (extload node:$ptr), [{
@@ -1971,6 +1997,12 @@
PatFrag ImmPred> {
def: Pat<(VT (Load I32:$Rt)), (MI I32:$Rt, 0)>;
def: Pat<(VT (Load (add I32:$Rt, ImmPred:$s))), (MI I32:$Rt, imm:$s)>;
+ // The HVX selection code for shuffles can generate vector constants.
+ // Calling "Select" on the resulting loads from CP fails without these
+ // patterns.
+ def: Pat<(VT (Load (HexagonCP tconstpool:$A))), (MI (A2_tfrsi imm:$A), 0)>;
+ def: Pat<(VT (Load (HexagonAtPcrel tconstpool:$A))),
+ (MI (C4_addipc imm:$A), 0)>;
}
@@ -1997,7 +2029,7 @@
}
-// --(12) Store ----------------------------------------------------------
+// --(13) Store ----------------------------------------------------------
//
@@ -2466,7 +2498,7 @@
}
-// --(13) Memop ----------------------------------------------------------
+// --(14) Memop ----------------------------------------------------------
//
def m5_0Imm8Pred : PatLeaf<(i32 imm), [{
@@ -2744,27 +2776,6 @@
}
-// --(14) PIC ------------------------------------------------------------
-//
-
-def SDT_HexagonAtGot
- : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
-def SDT_HexagonAtPcrel
- : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-
-// AT_GOT address-of-GOT, address-of-global, offset-in-global
-def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>;
-// AT_PCREL address-of-global
-def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>;
-
-def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)),
- (L2_loadri_io I32:$got, imm:$addr)>;
-def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off),
- (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>;
-def: Pat<(HexagonAtPcrel I32:$addr),
- (C4_addipc imm:$addr)>;
-
-
// --(15) Call -----------------------------------------------------------
//
@@ -2894,3 +2905,32 @@
[SDNPHasChain]>;
def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>;
+
+def SDTHexagonVEXTRACTW: SDTypeProfile<1, 2,
+ [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>;
+def HexagonVEXTRACTW : SDNode<"HexagonISD::VEXTRACTW", SDTHexagonVEXTRACTW>;
+
+def SDTHexagonVINSERTW0: SDTypeProfile<1, 2,
+ [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>;
+def HexagonVINSERTW0 : SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>;
+
+let Predicates = [UseHVX] in {
+ def: Pat<(concat_vectors HVI8:$Vs, HVI8:$Vt),
+ (V6_vcombine HvxVR:$Vt, HvxVR:$Vs)>;
+ def: Pat<(or HVI8:$Vs, HVI8:$Vt),
+ (V6_vor HvxVR:$Vt, HvxVR:$Vs)>;
+
+ def: Pat<(HexagonVEXTRACTW HVI8:$Vu, I32:$Rs),
+ (V6_extractw HvxVR:$Vu, I32:$Rs)>;
+ def: Pat<(HexagonVEXTRACTW HVI16:$Vu, I32:$Rs),
+ (V6_extractw HvxVR:$Vu, I32:$Rs)>;
+ def: Pat<(HexagonVEXTRACTW HVI32:$Vu, I32:$Rs),
+ (V6_extractw HvxVR:$Vu, I32:$Rs)>;
+
+ def: Pat<(HexagonVINSERTW0 HVI8:$Vu, I32:$Rt),
+ (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
+ def: Pat<(HexagonVINSERTW0 HVI16:$Vu, I32:$Rt),
+ (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
+ def: Pat<(HexagonVINSERTW0 HVI32:$Vu, I32:$Rt),
+ (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
+}