ARM instruction itinerary fixes:
1. Cortex-a9 8-bit and 16-bit loads / stores AGU cycles are 1 cycle longer than 32-bit ones.
2. Cortex-a9 is out-of-order so model all read cycles as cycle 1.
3. Lots of other random fixes for A8 and A9.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@115121 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index 8962ec9..ff2a673 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -108,38 +108,69 @@
   // use A8_Issue to enforce the 1 load/store per cycle limit
   //
   // Immediate offset
-  InstrItinData<IIC_iLoadi   , [InstrStage<1, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [3, 1]>,
+  InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [3, 1]>,
+  InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [3, 1]>,
+  InstrItinData<IIC_iLoad_d_i,  [InstrStage<1, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [3, 1]>,
   //
   // Register offset
-  InstrItinData<IIC_iLoadr   , [InstrStage<1, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+  InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+  InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+  InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
   //
   // Scaled register offset, issues over 2 cycles
-  InstrItinData<IIC_iLoadsi  , [InstrStage<2, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0], 0>,
-                                InstrStage<1, [A8_Pipe1]>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>,
+  InstrItinData<IIC_iLoad_si  , [InstrStage<2, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0], 0>,
+                                 InstrStage<1, [A8_Pipe1]>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>,
+  InstrItinData<IIC_iLoad_bh_si,[InstrStage<2, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0], 0>,
+                                 InstrStage<1, [A8_Pipe1]>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>,
   //
   // Immediate offset with update
-  InstrItinData<IIC_iLoadiu  , [InstrStage<1, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>,
+  InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>,
+  InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>,
   //
   // Register offset with update
-  InstrItinData<IIC_iLoadru  , [InstrStage<1, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>,
+  InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>,
+  InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>,
+  InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>,
   //
   // Scaled register offset with update, issues over 2 cycles
-  InstrItinData<IIC_iLoadsiu , [InstrStage<2, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0], 0>,
-                                InstrStage<1, [A8_Pipe1]>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>,
+  InstrItinData<IIC_iLoad_siu , [InstrStage<2, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0], 0>,
+                                 InstrStage<1, [A8_Pipe1]>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>,
+  InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0], 0>,
+                                 InstrStage<1, [A8_Pipe1]>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>,
   //
   // Load multiple
   InstrItinData<IIC_iLoadm   , [InstrStage<2, [A8_Issue], 0>,
@@ -170,38 +201,69 @@
   // use A8_Issue to enforce the 1 load/store per cycle limit
   //
   // Immediate offset
-  InstrItinData<IIC_iStorei  , [InstrStage<1, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [3, 1]>,
+  InstrItinData<IIC_iStore_i  , [InstrStage<1, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [3, 1]>,
+  InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [3, 1]>,
+  InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [3, 1]>,
   //
   // Register offset
-  InstrItinData<IIC_iStorer  , [InstrStage<1, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+  InstrItinData<IIC_iStore_r  , [InstrStage<1, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+  InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
   //
   // Scaled register offset, issues over 2 cycles
-  InstrItinData<IIC_iStoresi , [InstrStage<2, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0], 0>,
-                                InstrStage<1, [A8_Pipe1]>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+  InstrItinData<IIC_iStore_si , [InstrStage<2, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0], 0>,
+                                 InstrStage<1, [A8_Pipe1]>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_si,[InstrStage<2, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0], 0>,
+                                 InstrStage<1, [A8_Pipe1]>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
   //
   // Immediate offset with update
-  InstrItinData<IIC_iStoreiu , [InstrStage<1, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>,
+  InstrItinData<IIC_iStore_iu , [InstrStage<1, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>,
+  InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>,
   //
   // Register offset with update
-  InstrItinData<IIC_iStoreru  , [InstrStage<1, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>,
+  InstrItinData<IIC_iStore_ru  , [InstrStage<1, [A8_Issue], 0>,
+                                  InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                  InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A8_Issue], 0>,
+                                  InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                  InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>,
+  InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A8_Issue], 0>,
+                                  InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                  InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>,
   //
   // Scaled register offset with update, issues over 2 cycles
-  InstrItinData<IIC_iStoresiu, [InstrStage<2, [A8_Issue], 0>,
-                                InstrStage<1, [A8_Pipe0], 0>,
-                                InstrStage<1, [A8_Pipe1]>,
-                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
-                                InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>,
+  InstrItinData<IIC_iStore_siu, [InstrStage<2, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0], 0>,
+                                 InstrStage<1, [A8_Pipe1]>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [A8_Issue], 0>,
+                                 InstrStage<1, [A8_Pipe0], 0>,
+                                 InstrStage<1, [A8_Pipe1]>,
+                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                 InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>,
   //
   // Store multiple
   InstrItinData<IIC_iStorem  , [InstrStage<2, [A8_Issue], 0>,