Add PPC 440 scheduler and some associated tests (new files)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142171 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td
new file mode 100644
index 0000000..604d5c8
--- /dev/null
+++ b/lib/Target/PowerPC/PPCSchedule440.td
@@ -0,0 +1,568 @@
+//===- PPCSchedule440.td - PPC 440 Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Primary reference:
+// PowerPC 440x6 Embedded Processor Core User’s Manual.
+// IBM (as updated in) 2010.
+
+// The basic PPC 440 does not include a floating-point unit; the pipeline
+// timings here are constructed to match the FP2 unit shipped with the
+// PPC-440- and PPC-450-based Blue Gene (L and P) supercomputers.
+// References:
+// S. Chatterjee, et al. Design and exploitation of a high-performance
+// SIMD floating-point unit for Blue Gene/L.
+// IBM J. Res. & Dev. 49 (2/3) March/May 2005.
+// also:
+// Carlos Sosa and Brant Knudson. IBM System Blue Gene Solution:
+// Blue Gene/P Application Development.
+// IBM (as updated in) 2009.
+
+//===----------------------------------------------------------------------===//
+// Functional units on the PowerPC 440/450 chip sets
+//
+def IFTH1 : FuncUnit; // Fetch unit 1
+def IFTH2 : FuncUnit; // Fetch unit 2
+def PDCD1 : FuncUnit; // Decode unit 1
+def PDCD2 : FuncUnit; // Decode unit 2
+def DISS1 : FuncUnit; // Issue unit 1
+def DISS2 : FuncUnit; // Issue unit 2
+def LRACC : FuncUnit; // Register access and dispatch for
+ // the simple integer (J-pipe) and
+ // load/store (L-pipe) pipelines
+def IRACC : FuncUnit; // Register access and dispatch for
+ // the complex integer (I-pipe) pipeline
+def FRACC : FuncUnit; // Register access and dispatch for
+ // the floating-point execution (F-pipe) pipeline
+def IEXE1 : FuncUnit; // Execution stage 1 for the I pipeline
+def IEXE2 : FuncUnit; // Execution stage 2 for the I pipeline
+def IWB : FuncUnit; // Write-back unit for the I pipeline
+def JEXE1 : FuncUnit; // Execution stage 1 for the J pipeline
+def JEXE2 : FuncUnit; // Execution stage 2 for the J pipeline
+def JWB : FuncUnit; // Write-back unit for the J pipeline
+def AGEN : FuncUnit; // Address generation for the L pipeline
+def CRD : FuncUnit; // D-cache access for the L pipeline
+def LWB : FuncUnit; // Write-back unit for the L pipeline
+def FEXE1 : FuncUnit; // Execution stage 1 for the F pipeline
+def FEXE2 : FuncUnit; // Execution stage 2 for the F pipeline
+def FEXE3 : FuncUnit; // Execution stage 3 for the F pipeline
+def FEXE4 : FuncUnit; // Execution stage 4 for the F pipeline
+def FEXE5 : FuncUnit; // Execution stage 5 for the F pipeline
+def FEXE6 : FuncUnit; // Execution stage 6 for the F pipeline
+def FWB : FuncUnit; // Write-back unit for the F pipeline
+
+def LWARX_Hold : FuncUnit; // This is a pseudo-unit which is used
+ // to make sure that no lwarx/stwcx.
+ // instructions are issued while another
+ // lwarx/stwcx. is in the L pipe.
+
+def GPR_Bypass : Bypass; // The bypass for general-purpose regs.
+def FPR_Bypass : Bypass; // The bypass for floating-point regs.
+
+// Notes:
+// Instructions are held in the FRACC, LRACC and IRACC pipeline
+// stages until their source operands become ready. Exceptions:
+// - Store instructions will hold in the AGEN stage
+// - The integer multiply-accumulate instruction will hold in
+// the IEXE1 stage
+//
+// For most I-pipe operations, the result is available at the end of
+// the IEXE1 stage. Operations such as multiply and divide must
+// continue to execute in IEXE2 and IWB. Divide resides in IWB for
+// 33 cycles (multiply also calculates its result in IWB). For all
+// J-pipe instructions, the result is available
+// at the end of the JEXE1 stage. Loads have a 3-cycle latency
+// (data is not available until after the LWB stage).
+//
+// The L1 cache hit latency is four cycles for floating point loads
+// and three cycles for integer loads.
+//
+// The stwcx. instruction requires both the LRACC and the IRACC
+// dispatch stages. It must be issued from DISS0.
+//
+// All lwarx/stwcx. instructions hold in LRACC if another
+// uncommitted lwarx/stwcx. is in AGEN, CRD, or LWB.
+//
+// msync (a.k.a. sync) and mbar will hold in LWB until all load/store
+// resources are empty. AGEN and CRD are held empty until the msync/mbar
+// commits.
+//
+// Most floating-point instructions, computational and move,
+// have a 5-cycle latency. Divide takes longer (30 cycles). Instructions that
+// update the CR take 2 cycles. Stores take 3 cycles and, as mentioned above,
+// loads take 4 cycles (for L1 hit).
+
+//
+// This file defines the itinerary class data for the PPC 440 processor.
+//
+//===----------------------------------------------------------------------===//
+
+
+def PPC440Itineraries : ProcessorItineraries<
+ [IFTH1, IFTH2, PDCD1, PDCD2, DISS1, DISS2, FRACC,
+ IRACC, IEXE1, IEXE2, IWB, LRACC, JEXE1, JEXE2, JWB, AGEN, CRD, LWB,
+ FEXE1, FEXE2, FEXE3, FEXE4, FEXE5, FEXE6, FWB, LWARX_Hold],
+ [GPR_Bypass, FPR_Bypass], [
+ InstrItinData<IntGeneral , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC, LRACC]>,
+ InstrStage<1, [IEXE1, JEXE1]>,
+ InstrStage<1, [IEXE2, JEXE2]>,
+ InstrStage<1, [IWB, JWB]>],
+ [6, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntCompare , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC, LRACC]>,
+ InstrStage<1, [IEXE1, JEXE1]>,
+ InstrStage<1, [IEXE2, JEXE2]>,
+ InstrStage<1, [IWB, JWB]>],
+ [6, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntDivW , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<33, [IWB]>],
+ [40, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMFFS , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [7, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMTFSB0 , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [7, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulHW , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulHWU , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntMulLI , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntRotate , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC, LRACC]>,
+ InstrStage<1, [IEXE1, JEXE1]>,
+ InstrStage<1, [IEXE2, JEXE2]>,
+ InstrStage<1, [IWB, JWB]>],
+ [6, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntShift , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC, LRACC]>,
+ InstrStage<1, [IEXE1, JEXE1]>,
+ InstrStage<1, [IEXE2, JEXE2]>,
+ InstrStage<1, [IWB, JWB]>],
+ [6, 4, 4],
+ [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<IntTrapW , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [6, 4],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<BrB , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<BrCR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<BrMCR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4, 4],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBA , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBF , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStDCBI , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStGeneral , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [9, 5], // FIXME: should be [9, 5] for loads and
+ // [8, 5] for stores.
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStICBI , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStUX , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5, 5],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLFD , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<2, [LWB]>],
+ [9, 5, 5],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLFDU , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [9, 5, 5],
+ [NoBypass, GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<LdStLHA , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLMW , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStLWARX , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1]>,
+ InstrStage<1, [IRACC], 0>,
+ InstrStage<4, [LWARX_Hold], 0>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSTWCX , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1]>,
+ InstrStage<1, [IRACC], 0>,
+ InstrStage<4, [LWARX_Hold], 0>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<1, [AGEN]>,
+ InstrStage<1, [CRD]>,
+ InstrStage<1, [LWB]>],
+ [8, 5],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<LdStSync , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [LRACC]>,
+ InstrStage<3, [AGEN], 1>,
+ InstrStage<2, [CRD], 1>,
+ InstrStage<1, [LWB]>]>,
+ InstrItinData<SprISYNC , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC], 0>,
+ InstrStage<1, [LRACC], 0>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [FEXE1], 0>,
+ InstrStage<1, [AGEN], 0>,
+ InstrStage<1, [JEXE1], 0>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [FEXE2], 0>,
+ InstrStage<1, [CRD], 0>,
+ InstrStage<1, [JEXE2], 0>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<6, [FEXE3], 0>,
+ InstrStage<6, [LWB], 0>,
+ InstrStage<6, [JWB], 0>,
+ InstrStage<6, [IWB]>]>,
+ InstrItinData<SprMFSR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [6, 4],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [6, 4],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMTSR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<3, [IWB]>],
+ [9, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprTLBSYNC , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [7, 4],
+ [GPR_Bypass, GPR_Bypass]>,
+ InstrItinData<SprMFSPR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<3, [IWB]>],
+ [10, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMFTB , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<3, [IWB]>],
+ [10, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMTSPR , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<3, [IWB]>],
+ [10, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprMTSRIN , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<3, [IWB]>],
+ [10, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprRFI , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<SprSC , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [IRACC]>,
+ InstrStage<1, [IEXE1]>,
+ InstrStage<1, [IEXE2]>,
+ InstrStage<1, [IWB]>],
+ [8, 4],
+ [NoBypass, GPR_Bypass]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<1, [FWB]>],
+ [10, 4, 4],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPCompare , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<1, [FWB]>],
+ [10, 4, 4],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivD , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<25, [FWB]>],
+ [35, 4, 4],
+ [NoBypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPDivS , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<13, [FWB]>],
+ [23, 4, 4],
+ [NoBypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPFused , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<1, [FWB]>],
+ [10, 4, 4, 4],
+ [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+ InstrItinData<FPRes , [InstrStage<1, [IFTH1, IFTH2]>,
+ InstrStage<1, [PDCD1, PDCD2]>,
+ InstrStage<1, [DISS1, DISS2]>,
+ InstrStage<1, [FRACC]>,
+ InstrStage<1, [FEXE1]>,
+ InstrStage<1, [FEXE2]>,
+ InstrStage<1, [FEXE3]>,
+ InstrStage<1, [FEXE4]>,
+ InstrStage<1, [FEXE5]>,
+ InstrStage<1, [FEXE6]>,
+ InstrStage<1, [FWB]>],
+ [10, 4],
+ [FPR_Bypass, FPR_Bypass]>
+]>;
diff --git a/test/CodeGen/PowerPC/ppc440-fp-basic.ll b/test/CodeGen/PowerPC/ppc440-fp-basic.ll
new file mode 100644
index 0000000..51c1437
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc440-fp-basic.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=ppc32 -mcpu=440 | grep fmadd
+
+%0 = type { double, double }
+
+define void @maybe_an_fma(%0* sret %agg.result, %0* byval %a, %0* byval %b, %0* byval %c) nounwind {
+entry:
+ %a.realp = getelementptr inbounds %0* %a, i32 0, i32 0
+ %a.real = load double* %a.realp
+ %a.imagp = getelementptr inbounds %0* %a, i32 0, i32 1
+ %a.imag = load double* %a.imagp
+ %b.realp = getelementptr inbounds %0* %b, i32 0, i32 0
+ %b.real = load double* %b.realp
+ %b.imagp = getelementptr inbounds %0* %b, i32 0, i32 1
+ %b.imag = load double* %b.imagp
+ %mul.rl = fmul double %a.real, %b.real
+ %mul.rr = fmul double %a.imag, %b.imag
+ %mul.r = fsub double %mul.rl, %mul.rr
+ %mul.il = fmul double %a.imag, %b.real
+ %mul.ir = fmul double %a.real, %b.imag
+ %mul.i = fadd double %mul.il, %mul.ir
+ %c.realp = getelementptr inbounds %0* %c, i32 0, i32 0
+ %c.real = load double* %c.realp
+ %c.imagp = getelementptr inbounds %0* %c, i32 0, i32 1
+ %c.imag = load double* %c.imagp
+ %add.r = fadd double %mul.r, %c.real
+ %add.i = fadd double %mul.i, %c.imag
+ %real = getelementptr inbounds %0* %agg.result, i32 0, i32 0
+ %imag = getelementptr inbounds %0* %agg.result, i32 0, i32 1
+ store double %add.r, double* %real
+ store double %add.i, double* %imag
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/ppc440-msync.ll b/test/CodeGen/PowerPC/ppc440-msync.ll
new file mode 100644
index 0000000..4d663bc
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc440-msync.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=ppc32 -o %t
+; RUN: grep sync %t
+; RUN: not grep msync %t
+; RUN: llc < %s -march=ppc32 -mcpu=440 | grep msync
+
+define i32 @has_a_fence(i32 %a, i32 %b) nounwind {
+entry:
+ fence acquire
+ %cond = icmp eq i32 %a, %b
+ br i1 %cond, label %IfEqual, label %IfUnequal
+
+IfEqual:
+ fence release
+ br label %end
+
+IfUnequal:
+ fence release
+ ret i32 0
+
+end:
+ ret i32 1
+}
+