ARM instruction itinerary fixes:
1. Cortex-a9 8-bit and 16-bit loads / stores AGU cycles are 1 cycle longer than 32-bit ones.
2. Cortex-a9 is out-of-order so model all read cycles as cycle 1.
3. Lots of other random fixes for A8 and A9.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@115121 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index 8962ec9..ff2a673 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -108,38 +108,69 @@
// use A8_Issue to enforce the 1 load/store per cycle limit
//
// Immediate offset
- InstrItinData<IIC_iLoadi , [InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [3, 1]>,
+ InstrItinData<IIC_iLoad_i , [InstrStage<1, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [3, 1]>,
+ InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [3, 1]>,
+ InstrItinData<IIC_iLoad_d_i, [InstrStage<1, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [3, 1]>,
//
// Register offset
- InstrItinData<IIC_iLoadr , [InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_r , [InstrStage<1, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
//
// Scaled register offset, issues over 2 cycles
- InstrItinData<IIC_iLoadsi , [InstrStage<2, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0], 0>,
- InstrStage<1, [A8_Pipe1]>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>,
+ InstrItinData<IIC_iLoad_si , [InstrStage<2, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0], 0>,
+ InstrStage<1, [A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_si,[InstrStage<2, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0], 0>,
+ InstrStage<1, [A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>,
//
// Immediate offset with update
- InstrItinData<IIC_iLoadiu , [InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>,
+ InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>,
+ InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>,
//
// Register offset with update
- InstrItinData<IIC_iLoadru , [InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>,
+ InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>,
//
// Scaled register offset with update, issues over 2 cycles
- InstrItinData<IIC_iLoadsiu , [InstrStage<2, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0], 0>,
- InstrStage<1, [A8_Pipe1]>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>,
+ InstrItinData<IIC_iLoad_siu , [InstrStage<2, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0], 0>,
+ InstrStage<1, [A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>,
+ InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0], 0>,
+ InstrStage<1, [A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>,
//
// Load multiple
InstrItinData<IIC_iLoadm , [InstrStage<2, [A8_Issue], 0>,
@@ -170,38 +201,69 @@
// use A8_Issue to enforce the 1 load/store per cycle limit
//
// Immediate offset
- InstrItinData<IIC_iStorei , [InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [3, 1]>,
+ InstrItinData<IIC_iStore_i , [InstrStage<1, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [3, 1]>,
+ InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [3, 1]>,
+ InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [3, 1]>,
//
// Register offset
- InstrItinData<IIC_iStorer , [InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_r , [InstrStage<1, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
//
// Scaled register offset, issues over 2 cycles
- InstrItinData<IIC_iStoresi , [InstrStage<2, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0], 0>,
- InstrStage<1, [A8_Pipe1]>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_si , [InstrStage<2, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0], 0>,
+ InstrStage<1, [A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_si,[InstrStage<2, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0], 0>,
+ InstrStage<1, [A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
//
// Immediate offset with update
- InstrItinData<IIC_iStoreiu , [InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>,
+ InstrItinData<IIC_iStore_iu , [InstrStage<1, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>,
+ InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>,
//
// Register offset with update
- InstrItinData<IIC_iStoreru , [InstrStage<1, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_ru , [InstrStage<1, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>,
//
// Scaled register offset with update, issues over 2 cycles
- InstrItinData<IIC_iStoresiu, [InstrStage<2, [A8_Issue], 0>,
- InstrStage<1, [A8_Pipe0], 0>,
- InstrStage<1, [A8_Pipe1]>,
- InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
- InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_siu, [InstrStage<2, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0], 0>,
+ InstrStage<1, [A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>,
+ InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [A8_Issue], 0>,
+ InstrStage<1, [A8_Pipe0], 0>,
+ InstrStage<1, [A8_Pipe1]>,
+ InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>,
//
// Store multiple
InstrItinData<IIC_iStorem , [InstrStage<2, [A8_Issue], 0>,