[AArch64][SVE] Asm: Support for LD1RQ load-and-replicate quad-word vector instructions.
Reviewers: fhahn, rengolin, samparker, SjoerdMeijer, javed.absar
Reviewed By: SjoerdMeijer
Differential Revision: https://reviews.llvm.org/D46250
llvm-svn: 331339
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 4578ee8..7772002 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -323,6 +323,7 @@
def SImm4s2Operand : SImmScaledMemoryIndexed<4, 2>;
def SImm4s3Operand : SImmScaledMemoryIndexed<4, 3>;
def SImm4s4Operand : SImmScaledMemoryIndexed<4, 4>;
+def SImm4s16Operand : SImmScaledMemoryIndexed<4, 16>;
def simm4s1 : Operand<i64>, ImmLeaf<i64,
[{ return Imm >=-8 && Imm <= 7; }]> {
@@ -350,6 +351,12 @@
let ParserMatchClass = SImm4s4Operand;
let DecoderMethod = "DecodeSImm<4>";
}
+def simm4s16 : Operand<i64>, ImmLeaf<i64,
+[{ return Imm >=-128 && Imm <= 112 && (Imm % 16) == 0x0; }]> {
+ let PrintMethod = "printImmScale<16>";
+ let ParserMatchClass = SImm4s16Operand;
+ let DecoderMethod = "DecodeSImm<4>";
+}
class AsmImmRange<int Low, int High> : AsmOperandClass {
let Name = "Imm" # Low # "_" # High;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index a69a76e..c68645b 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -38,6 +38,15 @@
defm LD1SB_H_IMM : sve_mem_cld_si<0b1110, "ld1sb", Z_h, ZPR16>;
defm LD1D_IMM : sve_mem_cld_si<0b1111, "ld1d", Z_d, ZPR64>;
+ defm LD1RQ_B_IMM : sve_mem_ldqr_si<0b00, "ld1rqb", Z_b, ZPR8>;
+ defm LD1RQ_H_IMM : sve_mem_ldqr_si<0b01, "ld1rqh", Z_h, ZPR16>;
+ defm LD1RQ_W_IMM : sve_mem_ldqr_si<0b10, "ld1rqw", Z_s, ZPR32>;
+ defm LD1RQ_D_IMM : sve_mem_ldqr_si<0b11, "ld1rqd", Z_d, ZPR64>;
+ defm LD1RQ_B : sve_mem_ldqr_ss<0b00, "ld1rqb", Z_b, ZPR8, GPR64NoXZRshifted8>;
+ defm LD1RQ_H : sve_mem_ldqr_ss<0b01, "ld1rqh", Z_h, ZPR16, GPR64NoXZRshifted16>;
+ defm LD1RQ_W : sve_mem_ldqr_ss<0b10, "ld1rqw", Z_s, ZPR32, GPR64NoXZRshifted32>;
+ defm LD1RQ_D : sve_mem_ldqr_ss<0b11, "ld1rqd", Z_d, ZPR64, GPR64NoXZRshifted64>;
+
// continuous load with reg+reg addressing.
defm LD1B : sve_mem_cld_ss<0b0000, "ld1b", Z_b, ZPR8, GPR64NoXZRshifted8>;
defm LD1B_H : sve_mem_cld_ss<0b0001, "ld1b", Z_h, ZPR16, GPR64NoXZRshifted8>;
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 7e1bd5a..cc30553 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3628,6 +3628,8 @@
return Error(Loc, "index must be a multiple of 3 in range [-24, 21].");
case Match_InvalidMemoryIndexed4SImm4:
return Error(Loc, "index must be a multiple of 4 in range [-32, 28].");
+ case Match_InvalidMemoryIndexed16SImm4:
+ return Error(Loc, "index must be a multiple of 16 in range [-128, 112].");
case Match_InvalidMemoryIndexedSImm9:
return Error(Loc, "index must be an integer in range [-256, 255].");
case Match_InvalidMemoryIndexed8SImm10:
@@ -4200,6 +4202,7 @@
case Match_InvalidMemoryIndexed2SImm4:
case Match_InvalidMemoryIndexed3SImm4:
case Match_InvalidMemoryIndexed4SImm4:
+ case Match_InvalidMemoryIndexed16SImm4:
case Match_InvalidMemoryIndexed4SImm7:
case Match_InvalidMemoryIndexed8SImm7:
case Match_InvalidMemoryIndexed16SImm7:
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 597e4b0..06f9b38 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -666,6 +666,64 @@
ZPRRegOp zprty>
: sve_mem_cld_si_base<dtype, 0, asm, listty, zprty>;
+class sve_mem_ldqr_si<bits<2> sz, string asm, RegisterOperand VecList>
+: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s16:$imm4),
+ asm, "\t$Zt, $Pg/z, [$Rn, $imm4]", "", []>, Sched<[]> {
+ bits<5> Zt;
+ bits<5> Rn;
+ bits<3> Pg;
+ bits<4> imm4;
+ let Inst{31-25} = 0b1010010;
+ let Inst{24-23} = sz;
+ let Inst{22-20} = 0;
+ let Inst{19-16} = imm4;
+ let Inst{15-13} = 0b001;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayLoad = 1;
+}
+
+multiclass sve_mem_ldqr_si<bits<2> sz, string asm, RegisterOperand listty,
+ ZPRRegOp zprty> {
+ def NAME : sve_mem_ldqr_si<sz, asm, listty>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
+ (!cast<Instruction>(NAME) listty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn]",
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $imm4]",
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s16:$imm4), 0>;
+}
+
+class sve_mem_ldqr_ss<bits<2> sz, string asm, RegisterOperand VecList,
+ RegisterOperand gprty>
+: I<(outs VecList:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm),
+ asm, "\t$Zt, $Pg/z, [$Rn, $Rm]", "", []>, Sched<[]> {
+ bits<5> Zt;
+ bits<3> Pg;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31-25} = 0b1010010;
+ let Inst{24-23} = sz;
+ let Inst{22-21} = 0;
+ let Inst{20-16} = Rm;
+ let Inst{15-13} = 0;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Zt;
+
+ let mayLoad = 1;
+}
+
+multiclass sve_mem_ldqr_ss<bits<2> sz, string asm, RegisterOperand listty,
+ ZPRRegOp zprty, RegisterOperand gprty> {
+ def NAME : sve_mem_ldqr_ss<sz, asm, listty, gprty>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Rm]",
+ (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>;
+}
+
class sve_mem_cld_ss_base<bits<4> dtype, bit ff, dag iops, string asm,
RegisterOperand VecList>
: I<(outs VecList:$Zt), iops,