Revert "Make NumMicroOps a variable in the subtarget's instruction itinerary." This reverts commit r159406. I noticed a performance regression so I'll back out for now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159411 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 3e4b3b9043b1ced24e07d8d1174feeee06c6912e [log] [tgz]
author: Andrew Trick <atrick@apple.com> Fri Jun 29 07:10:41 2012 +0000
committer: Andrew Trick <atrick@apple.com> Fri Jun 29 07:10:41 2012 +0000
tree: f31c2d3c63fc1da8b3b598c433759ae8391e62ed
parent: 94e3b388e561ce980c861e092bf378bf40202268 [diff]
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index b3fef29..3a180df 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp

@@ -2176,9 +2176,9 @@
 
   const MCInstrDesc &Desc = MI->getDesc();
   unsigned Class = Desc.getSchedClass();
-  int ItinUOps = ItinData->Itineraries[Class].NumMicroOps;
-  if (ItinUOps >= 0)
-    return ItinUOps;
+  unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+  if (UOps)
+    return UOps;
 
   unsigned Opc = MI->getOpcode();
   switch (Opc) {
@@ -2252,19 +2252,19 @@
         return 2;
       // 4 registers would be issued: 2, 2.
       // 5 registers would be issued: 2, 2, 1.
-      int A8UOps = (NumRegs / 2);
+      UOps = (NumRegs / 2);
       if (NumRegs % 2)
-        ++A8UOps;
-      return A8UOps;
+        ++UOps;
+      return UOps;
     } else if (Subtarget.isCortexA9()) {
-      int A9UOps = (NumRegs / 2);
+      UOps = (NumRegs / 2);
       // If there are odd number of registers or if it's not 64-bit aligned,
       // then it takes an extra AGU (Address Generation Unit) cycle.
       if ((NumRegs % 2) ||
           !MI->hasOneMemOperand() ||
           (*MI->memoperands_begin())->getAlignment() < 8)
-        ++A9UOps;
-      return A9UOps;
+        ++UOps;
+      return UOps;
     } else {
       // Assume the worst.
       return NumRegs;

diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 81d2fa3..b9a07f1 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td

@@ -70,11 +70,11 @@
 def IIC_iLoad_d_i  : InstrItinClass;
 def IIC_iLoad_d_r  : InstrItinClass;
 def IIC_iLoad_d_ru : InstrItinClass;
-def IIC_iLoad_m    : InstrItinClass;
-def IIC_iLoad_mu   : InstrItinClass;
-def IIC_iLoad_mBr  : InstrItinClass;
-def IIC_iPop       : InstrItinClass;
-def IIC_iPop_Br    : InstrItinClass;
+def IIC_iLoad_m    : InstrItinClass<0>;  // micro-coded
+def IIC_iLoad_mu   : InstrItinClass<0>;  // micro-coded
+def IIC_iLoad_mBr  : InstrItinClass<0>;  // micro-coded
+def IIC_iPop       : InstrItinClass<0>;  // micro-coded
+def IIC_iPop_Br    : InstrItinClass<0>;  // micro-coded
 def IIC_iLoadiALU  : InstrItinClass;
 def IIC_iStore_i   : InstrItinClass;
 def IIC_iStore_r   : InstrItinClass;
@@ -91,8 +91,8 @@
 def IIC_iStore_d_i   : InstrItinClass;
 def IIC_iStore_d_r   : InstrItinClass;
 def IIC_iStore_d_ru  : InstrItinClass;
-def IIC_iStore_m   : InstrItinClass;
-def IIC_iStore_mu  : InstrItinClass;
+def IIC_iStore_m   : InstrItinClass<0>;  // micro-coded
+def IIC_iStore_mu  : InstrItinClass<0>;  // micro-coded
 def IIC_Preload    : InstrItinClass;
 def IIC_Br         : InstrItinClass;
 def IIC_fpSTAT     : InstrItinClass;
@@ -126,12 +126,12 @@
 def IIC_fpSQRT64   : InstrItinClass;
 def IIC_fpLoad32   : InstrItinClass;
 def IIC_fpLoad64   : InstrItinClass;
-def IIC_fpLoad_m   : InstrItinClass;
-def IIC_fpLoad_mu  : InstrItinClass;
+def IIC_fpLoad_m   : InstrItinClass<0>;  // micro-coded
+def IIC_fpLoad_mu  : InstrItinClass<0>;  // micro-coded
 def IIC_fpStore32  : InstrItinClass;
 def IIC_fpStore64  : InstrItinClass;
-def IIC_fpStore_m  : InstrItinClass;
-def IIC_fpStore_mu : InstrItinClass;
+def IIC_fpStore_m  : InstrItinClass<0>;  // micro-coded
+def IIC_fpStore_mu : InstrItinClass<0>;  // micro-coded
 def IIC_VLD1       : InstrItinClass;
 def IIC_VLD1x2     : InstrItinClass;
 def IIC_VLD1x3     : InstrItinClass;

diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index 61de00a..eb1083c 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td

@@ -155,30 +155,28 @@
   // Load multiple, def is the 5th operand. Pipeline 0 only.
   // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
   InstrItinData<IIC_iLoad_m  , [InstrStage<2, [A8_Pipe0], 0>,
-                                InstrStage<2, [A8_LSPipe]>],
-                [1, 1, 1, 1, 3], [], -1>, // dynamic uops
+                                InstrStage<2, [A8_LSPipe]>], [1, 1, 1, 1, 3]>,
   //
   // Load multiple + update, defs are the 1st and 5th operands.
   InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>,
-                                InstrStage<3, [A8_LSPipe]>],
-                [2, 1, 1, 1, 3], [], -1>, // dynamic uops
+                                InstrStage<3, [A8_LSPipe]>], [2, 1, 1, 1, 3]>,
   //
   // Load multiple plus branch
   InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>,
                                 InstrStage<3, [A8_LSPipe]>,
                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
-                              [1, 2, 1, 1, 3], [], -1>, // dynamic uops
+                               [1, 2, 1, 1, 3]>,
   //
   // Pop, def is the 3rd operand.
   InstrItinData<IIC_iPop  ,    [InstrStage<3, [A8_Pipe0], 0>,
-                                InstrStage<3, [A8_LSPipe]>],
-                [1, 1, 3], [], -1>, // dynamic uops
+                                InstrStage<3, [A8_LSPipe]>], [1, 1, 3]>,
   //
   // Push, def is the 3th operand.
   InstrItinData<IIC_iPop_Br,   [InstrStage<3, [A8_Pipe0], 0>,
                                 InstrStage<3, [A8_LSPipe]>,
                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
-                               [1, 1, 3], [], -1>, // dynamic uops
+                               [1, 1, 3]>,
+
   //
   // iLoadi + iALUr for t2LDRpci_pic.
   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
@@ -233,13 +231,12 @@
   // Store multiple. Pipeline 0 only.
   // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
   InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>,
-                                InstrStage<2, [A8_LSPipe]>],
-                [], [], -1>, // dynamic uops
+                                InstrStage<2, [A8_LSPipe]>]>,
   //
   // Store multiple + update
   InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>,
-                                InstrStage<2, [A8_LSPipe]>],
-                [2], [], -1>, // dynamic uops
+                                InstrStage<2, [A8_LSPipe]>], [2]>,
+
   //
   // Preload
   InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
@@ -400,16 +397,14 @@
                                InstrStage<1, [A8_NLSPipe], 0>,
                                InstrStage<1, [A8_LSPipe]>,
                                InstrStage<1, [A8_NLSPipe], 0>,
-                               InstrStage<1, [A8_LSPipe]>],
-                [1, 1, 1, 2], [], -1>, // dynamic uops
+                               InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 2]>,
   //
   // FP Load Multiple + update
   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NLSPipe], 0>,
                                InstrStage<1, [A8_LSPipe]>,
                                InstrStage<1, [A8_NLSPipe], 0>,
-                               InstrStage<1, [A8_LSPipe]>],
-                [2, 1, 1, 1, 2], [], -1>, // dynamic uops
+                               InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 2]>,
   //
   // Single-precision FP Store
   InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
@@ -428,16 +423,15 @@
                                InstrStage<1, [A8_NLSPipe], 0>,
                                InstrStage<1, [A8_LSPipe]>,
                                InstrStage<1, [A8_NLSPipe], 0>,
-                               InstrStage<1, [A8_LSPipe]>],
-                [1, 1, 1, 1], [], -1>, // dynamic uops
+                               InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 1]>,
   //
   // FP Store Multiple + update
   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                 InstrStage<1, [A8_NLSPipe], 0>,
                                 InstrStage<1, [A8_LSPipe]>,
                                 InstrStage<1, [A8_NLSPipe], 0>,
-                                InstrStage<1, [A8_LSPipe]>],
-                [2, 1, 1, 1, 1], [], -1>, // dynamic uops
+                                InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 1]>,
+
   // NEON
   // Issue through integer pipeline, and execute in NEON unit.
   //

diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 1677ba6..a00577b 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td

@@ -284,8 +284,7 @@
                                 InstrStage<2, [A9_AGU], 1>,
                                 InstrStage<2, [A9_LSUnit]>],
                                [1, 1, 1, 1, 3],
-                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
-                         -1>, // dynamic uops
+                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
   //
   // Load multiple + update, defs are the 1st and 5th operands.
   InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -293,8 +292,7 @@
                                 InstrStage<2, [A9_AGU], 1>,
                                 InstrStage<2, [A9_LSUnit]>],
                                [2, 1, 1, 1, 3],
-                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
-                         -1>, // dynamic uops
+                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
   //
   // Load multiple plus branch
   InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -303,8 +301,7 @@
                                 InstrStage<2, [A9_LSUnit]>,
                                 InstrStage<1, [A9_Branch]>],
                                [1, 2, 1, 1, 3],
-                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
-                         -1>, // dynamic uops
+                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
   //
   // Pop, def is the 3rd operand.
   InstrItinData<IIC_iPop  ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -312,8 +309,7 @@
                                 InstrStage<2, [A9_AGU], 1>,
                                 InstrStage<2, [A9_LSUnit]>],
                                [1, 1, 3],
-                               [NoBypass, NoBypass, A9_LdBypass],
-                               -1>, // dynamic uops
+                               [NoBypass, NoBypass, A9_LdBypass]>,
   //
   // Pop + branch, def is the 3rd operand.
   InstrItinData<IIC_iPop_Br,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -322,8 +318,8 @@
                                 InstrStage<2, [A9_LSUnit]>,
                                 InstrStage<1, [A9_Branch]>],
                                [1, 1, 3],
-                               [NoBypass, NoBypass, A9_LdBypass],
-                               -1>, // dynamic uops
+                               [NoBypass, NoBypass, A9_LdBypass]>,
+
   //
   // iLoadi + iALUr for t2LDRpci_pic.
   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -417,15 +413,14 @@
   InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                 InstrStage<1, [A9_MUX0], 0>,
                                 InstrStage<1, [A9_AGU], 0>,
-                                InstrStage<2, [A9_LSUnit]>],
-                [], [], -1>, // dynamic uops
+                                InstrStage<2, [A9_LSUnit]>]>,
   //
   // Store multiple + update
   InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                 InstrStage<1, [A9_MUX0], 0>,
                                 InstrStage<1, [A9_AGU], 0>,
-                                InstrStage<2, [A9_LSUnit]>],
-                [2], [], -1>, // dynamic uops
+                                InstrStage<2, [A9_LSUnit]>], [2]>,
+
   //
   // Preload
   InstrItinData<IIC_Preload,   [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
@@ -722,8 +717,7 @@
                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
-                [1, 1, 1, 1], [], -1>, // dynamic uops
+                               InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
   //
   // FP Load Multiple + update
   // FIXME: assumes 2 doubles which requires 2 LS cycles.
@@ -732,8 +726,7 @@
                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
-                [2, 1, 1, 1], [], -1>, // dynamic uops
+                               InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
   //
   // Single-precision FP Store
   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -760,8 +753,7 @@
                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
-                [1, 1, 1, 1], [], -1>, // dynamic uops
+                               InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
   //
   // FP Store Multiple + update
   // FIXME: assumes 2 doubles which requires 2 LS cycles.
@@ -770,8 +762,7 @@
                                 InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                 InstrStage<2, [A9_DRegsN],   0, Reserved>,
                                 InstrStage<1, [A9_NPipe], 0>,
-                                InstrStage<2, [A9_LSUnit]>],
-                [2, 1, 1, 1], [], -1>, // dynamic uops
+                                InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
   // NEON
   // VLD1
   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
commit	3e4b3b9043b1ced24e07d8d1174feeee06c6912e	[log] [tgz]
author	Andrew Trick <atrick@apple.com>	Fri Jun 29 07:10:41 2012 +0000
committer	Andrew Trick <atrick@apple.com>	Fri Jun 29 07:10:41 2012 +0000
tree	f31c2d3c63fc1da8b3b598c433759ae8391e62ed
parent	94e3b388e561ce980c861e092bf378bf40202268 [diff]