Adding the MicroBlaze backend.

The MicroBlaze is a highly configurable 32-bit soft-microprocessor for
use on Xilinx FPGAs. For more information see:
http://www.xilinx.com/tools/microblaze.htm
http://en.wikipedia.org/wiki/MicroBlaze

The current LLVM MicroBlaze backend generates assembly which can be
compiled using the an appropriate binutils assembler.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@96969 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
new file mode 100644
index 0000000..0acbe3f
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -0,0 +1,882 @@
+//===-- MBlazeISelLowering.cpp - MBlaze DAG Lowering Implementation -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MBlaze uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-lower"
+#include "MBlazeISelLowering.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeTargetObjectFile.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+const char *MBlazeTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+    case MBlazeISD::JmpLink    : return "MBlazeISD::JmpLink";
+    case MBlazeISD::GPRel      : return "MBlazeISD::GPRel";
+    case MBlazeISD::Wrap       : return "MBlazeISD::Wrap";
+    case MBlazeISD::ICmp       : return "MBlazeISD::ICmp";
+    case MBlazeISD::Ret        : return "MBlazeISD::Ret";
+    case MBlazeISD::Select_CC  : return "MBlazeISD::Select_CC";
+    default                    : return NULL;
+  }
+}
+
+MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
+  : TargetLowering(TM, new MBlazeTargetObjectFile()) {
+  Subtarget = &TM.getSubtarget<MBlazeSubtarget>();
+
+  // MBlaze does not have i1 type, so use i32 for
+  // setcc operations results (slt, sgt, ...).
+  setBooleanContents(ZeroOrOneBooleanContent);
+
+  // Set up the register classes
+  addRegisterClass(MVT::i32, MBlaze::CPURegsRegisterClass);
+  if (Subtarget->hasFPU()) {
+    addRegisterClass(MVT::f32, MBlaze::FGR32RegisterClass);
+    setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+  }
+
+  // Floating point operations which are not supported
+  setOperationAction(ISD::FREM,       MVT::f32, Expand);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Expand);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i16, Expand);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+  setOperationAction(ISD::FP_ROUND,   MVT::f32, Expand);
+  setOperationAction(ISD::FP_ROUND,   MVT::f64, Expand);
+  setOperationAction(ISD::FCOPYSIGN,  MVT::f32, Expand);
+  setOperationAction(ISD::FCOPYSIGN,  MVT::f64, Expand);
+  setOperationAction(ISD::FSIN,       MVT::f32, Expand);
+  setOperationAction(ISD::FCOS,       MVT::f32, Expand);
+  setOperationAction(ISD::FPOWI,      MVT::f32, Expand);
+  setOperationAction(ISD::FPOW,       MVT::f32, Expand);
+  setOperationAction(ISD::FLOG,       MVT::f32, Expand);
+  setOperationAction(ISD::FLOG2,      MVT::f32, Expand);
+  setOperationAction(ISD::FLOG10,     MVT::f32, Expand);
+  setOperationAction(ISD::FEXP,       MVT::f32, Expand);
+
+  // Load extented operations for i1 types must be promoted
+  setLoadExtAction(ISD::EXTLOAD,  MVT::i1,  Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1,  Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1,  Promote);
+
+  // MBlaze has no REM or DIVREM operations.
+  setOperationAction(ISD::UREM,    MVT::i32, Expand);
+  setOperationAction(ISD::SREM,    MVT::i32, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+  // If the processor doesn't support multiply then expand it
+  if (!Subtarget->hasMul()) {
+    setOperationAction(ISD::MUL, MVT::i32, Expand);
+  }
+
+  // If the processor doesn't support 64-bit multiply then expand
+  if (!Subtarget->hasMul() || !Subtarget->hasMul64()) {
+    setOperationAction(ISD::MULHS, MVT::i32, Expand);
+    setOperationAction(ISD::MULHS, MVT::i64, Expand);
+    setOperationAction(ISD::MULHU, MVT::i32, Expand);
+    setOperationAction(ISD::MULHU, MVT::i64, Expand);
+  }
+
+  // If the processor doesn't support division then expand
+  if (!Subtarget->hasDiv()) {
+    setOperationAction(ISD::UDIV, MVT::i32, Expand);
+    setOperationAction(ISD::SDIV, MVT::i32, Expand);
+  }
+
+  // Expand unsupported conversions
+  setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
+  setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
+
+  // Expand SELECT_CC
+  setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+  // MBlaze doesn't have MUL_LOHI
+  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+
+  // Used by legalize types to correctly generate the setcc result.
+  // Without this, every float setcc comes with a AND/OR with the result,
+  // we don't want this, since the fpcmp result goes to a flag register,
+  // which is used implicitly by brcond and select operations.
+  AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
+  AddPromotedToType(ISD::SELECT, MVT::i1, MVT::i32);
+  AddPromotedToType(ISD::SELECT_CC, MVT::i1, MVT::i32);
+
+  // MBlaze Custom Operations
+  setOperationAction(ISD::GlobalAddress,      MVT::i32,   Custom);
+  setOperationAction(ISD::GlobalTLSAddress,   MVT::i32,   Custom);
+  setOperationAction(ISD::JumpTable,          MVT::i32,   Custom);
+  setOperationAction(ISD::ConstantPool,       MVT::i32,   Custom);
+
+  // Operations not directly supported by MBlaze.
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32,   Expand);
+  setOperationAction(ISD::BR_JT,              MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,              MVT::Other, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG,  MVT::i1,    Expand);
+  setOperationAction(ISD::ROTL,               MVT::i32,   Expand);
+  setOperationAction(ISD::ROTR,               MVT::i32,   Expand);
+  setOperationAction(ISD::SHL_PARTS,          MVT::i32,   Expand);
+  setOperationAction(ISD::SRA_PARTS,          MVT::i32,   Expand);
+  setOperationAction(ISD::SRL_PARTS,          MVT::i32,   Expand);
+  setOperationAction(ISD::CTLZ,               MVT::i32,   Expand);
+  setOperationAction(ISD::CTTZ,               MVT::i32,   Expand);
+  setOperationAction(ISD::CTPOP,              MVT::i32,   Expand);
+  setOperationAction(ISD::BSWAP,              MVT::i32,   Expand);
+
+  // We don't have line number support yet.
+  setOperationAction(ISD::EH_LABEL,          MVT::Other, Expand);
+
+  // Use the default for now
+  setOperationAction(ISD::STACKSAVE,         MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE,      MVT::Other, Expand);
+  setOperationAction(ISD::MEMBARRIER,        MVT::Other, Expand);
+
+  // MBlaze doesn't have extending float->double load/store
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+  setStackPointerRegisterToSaveRestore(MBlaze::R1);
+  computeRegisterProperties();
+}
+
+MVT::SimpleValueType MBlazeTargetLowering::getSetCCResultType(EVT VT) const {
+  return MVT::i32;
+}
+
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned MBlazeTargetLowering::getFunctionAlignment(const Function *) const {
+  return 2;
+}
+
+SDValue MBlazeTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+  switch (Op.getOpcode())
+  {
+    case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
+    case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
+    case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
+    case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
+    case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
+  }
+  return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+//  Lower helper functions
+//===----------------------------------------------------------------------===//
+MachineBasicBlock* MBlazeTargetLowering::
+EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB,
+                            DenseMap<MachineBasicBlock*,
+                            MachineBasicBlock*> *EM) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+
+  switch (MI->getOpcode()) {
+  default: assert(false && "Unexpected instr type to insert");
+  case MBlaze::ShiftRL:
+  case MBlaze::ShiftRA:
+  case MBlaze::ShiftL: {
+    // To "insert" a shift left instruction, we actually have to insert a
+    // simple loop.  The incoming instruction knows the destination vreg to
+    // set, the source vreg to operate over and the shift amount.
+    const BasicBlock *LLVM_BB = BB->getBasicBlock();
+    MachineFunction::iterator It = BB;
+    ++It;
+
+    // start:
+    //   andi     samt, samt, 31
+    //   beqid    samt, finish
+    //   add      dst, src, r0
+    // loop:
+    //   addik    samt, samt, -1
+    //   sra      dst, dst
+    //   bneid    samt, loop
+    //   nop
+    // finish:
+    MachineFunction *F = BB->getParent();
+    MachineRegisterInfo &R = F->getRegInfo();
+    MachineBasicBlock *loop = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *finish = F->CreateMachineBasicBlock(LLVM_BB);
+
+    unsigned IAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+    BuildMI(BB, dl, TII->get(MBlaze::ANDI), IAMT)
+      .addReg(MI->getOperand(2).getReg())
+      .addImm(31);
+
+    unsigned IVAL = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+    BuildMI(BB, dl, TII->get(MBlaze::ADDI), IVAL)
+      .addReg(MI->getOperand(1).getReg())
+      .addImm(0);
+
+    BuildMI(BB, dl, TII->get(MBlaze::BEQID))
+      .addReg(IAMT)
+      .addMBB(finish);
+
+    F->insert(It, loop);
+    F->insert(It, finish);
+
+    // Update machine-CFG edges by first adding all successors of the current
+    // block to the new block which will contain the Phi node for the select.
+    // Also inform sdisel of the edge changes.
+    for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
+          e = BB->succ_end(); i != e; ++i) {
+      EM->insert(std::make_pair(*i, finish));
+      finish->addSuccessor(*i);
+    }
+
+    // Next, remove all successors of the current block, and add the true
+    // and fallthrough blocks as its successors.
+    while(!BB->succ_empty())
+      BB->removeSuccessor(BB->succ_begin());
+    BB->addSuccessor(loop);
+    BB->addSuccessor(finish);
+
+    // Next, add the finish block as a successor of the loop block
+    loop->addSuccessor(finish);
+    loop->addSuccessor(loop);
+
+    unsigned DST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+    unsigned NDST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+    BuildMI(loop, dl, TII->get(MBlaze::PHI), DST)
+      .addReg(IVAL).addMBB(BB)
+      .addReg(NDST).addMBB(loop);
+
+    unsigned SAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+    unsigned NAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+    BuildMI(loop, dl, TII->get(MBlaze::PHI), SAMT)
+      .addReg(IAMT).addMBB(BB)
+      .addReg(NAMT).addMBB(loop);
+
+    if (MI->getOpcode() == MBlaze::ShiftL)
+      BuildMI(loop, dl, TII->get(MBlaze::ADD), NDST).addReg(DST).addReg(DST);
+    else if (MI->getOpcode() == MBlaze::ShiftRA)
+      BuildMI(loop, dl, TII->get(MBlaze::SRA), NDST).addReg(DST);
+    else if (MI->getOpcode() == MBlaze::ShiftRL)
+      BuildMI(loop, dl, TII->get(MBlaze::SRL), NDST).addReg(DST);
+    else
+        llvm_unreachable( "Cannot lower unknown shift instruction" );
+
+    BuildMI(loop, dl, TII->get(MBlaze::ADDI), NAMT)
+      .addReg(SAMT)
+      .addImm(-1);
+
+    BuildMI(loop, dl, TII->get(MBlaze::BNEID))
+      .addReg(NAMT)
+      .addMBB(loop);
+
+    BuildMI(finish, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+      .addReg(IVAL).addMBB(BB)
+      .addReg(NDST).addMBB(loop);
+
+    // The pseudo instruction is no longer needed so remove it
+    F->DeleteMachineInstr(MI);
+    return finish;
+    }
+
+  case MBlaze::Select_FCC:
+  case MBlaze::Select_CC: {
+    // To "insert" a SELECT_CC instruction, we actually have to insert the
+    // diamond control-flow pattern.  The incoming instruction knows the
+    // destination vreg to set, the condition code register to branch on, the
+    // true/false values to select between, and a branch opcode to use.
+    const BasicBlock *LLVM_BB = BB->getBasicBlock();
+    MachineFunction::iterator It = BB;
+    ++It;
+
+    //  thisMBB:
+    //  ...
+    //   TrueVal = ...
+    //   setcc r1, r2, r3
+    //   bNE   r1, r0, copy1MBB
+    //   fallthrough --> copy0MBB
+    MachineFunction *F = BB->getParent();
+    MachineBasicBlock *flsBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *dneBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+    unsigned Opc;
+    switch (MI->getOperand(4).getImm()) {
+    default: llvm_unreachable( "Unknown branch condition" );
+    case MBlazeCC::EQ: Opc = MBlaze::BNEID; break;
+    case MBlazeCC::NE: Opc = MBlaze::BEQID; break;
+    case MBlazeCC::GT: Opc = MBlaze::BLEID; break;
+    case MBlazeCC::LT: Opc = MBlaze::BGEID; break;
+    case MBlazeCC::GE: Opc = MBlaze::BLTID; break;
+    case MBlazeCC::LE: Opc = MBlaze::BGTID; break;
+    }
+
+    BuildMI(BB, dl, TII->get(Opc))
+      .addReg(MI->getOperand(3).getReg())
+      .addMBB(dneBB);
+
+    F->insert(It, flsBB);
+    F->insert(It, dneBB);
+
+    // Update machine-CFG edges by first adding all successors of the current
+    // block to the new block which will contain the Phi node for the select.
+    // Also inform sdisel of the edge changes.
+    for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
+          e = BB->succ_end(); i != e; ++i) {
+      EM->insert(std::make_pair(*i, dneBB));
+      dneBB->addSuccessor(*i);
+    }
+
+    // Next, remove all successors of the current block, and add the true
+    // and fallthrough blocks as its successors.
+    while(!BB->succ_empty())
+      BB->removeSuccessor(BB->succ_begin());
+    BB->addSuccessor(flsBB);
+    BB->addSuccessor(dneBB);
+    flsBB->addSuccessor(dneBB);
+
+    //  sinkMBB:
+    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+    //  ...
+    //BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+    //  .addReg(MI->getOperand(1).getReg()).addMBB(flsBB)
+    //  .addReg(MI->getOperand(2).getReg()).addMBB(BB);
+
+    BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(2).getReg()).addMBB(flsBB)
+      .addReg(MI->getOperand(1).getReg()).addMBB(BB);
+
+    F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
+    return dneBB;
+  }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+//
+
+SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue TrueVal = Op.getOperand(2);
+  SDValue FalseVal = Op.getOperand(3);
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Opc;
+
+  SDValue CompareFlag;
+  if (LHS.getValueType() == MVT::i32) {
+    Opc = MBlazeISD::Select_CC;
+    CompareFlag = DAG.getNode(MBlazeISD::ICmp, dl, MVT::i32, LHS, RHS)
+                    .getValue(1);
+  } else {
+    llvm_unreachable( "Cannot lower select_cc with unknown type" );
+  }
+ 
+  return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
+                     CompareFlag);
+}
+
+SDValue MBlazeTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
+  // FIXME there isn't actually debug info here
+  DebugLoc dl = Op.getDebugLoc();
+  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
+
+  return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, GA);
+}
+
+SDValue MBlazeTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
+  llvm_unreachable("TLS not implemented for MicroBlaze.");
+  return SDValue(); // Not reached
+}
+
+SDValue MBlazeTargetLowering::
+LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+  SDValue ResNode;
+  SDValue HiPart;
+  // FIXME there isn't actually debug info here
+  DebugLoc dl = Op.getDebugLoc();
+  bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+  unsigned char OpFlag = IsPIC ? MBlazeII::MO_GOT : MBlazeII::MO_ABS_HILO;
+
+  EVT PtrVT = Op.getValueType();
+  JumpTableSDNode *JT  = cast<JumpTableSDNode>(Op);
+
+  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
+  return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, JTI);
+  //return JTI;
+}
+
+SDValue MBlazeTargetLowering::
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+  SDValue ResNode;
+  EVT PtrVT = Op.getValueType();
+  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+  Constant *C = N->getConstVal();
+  SDValue Zero = DAG.getConstant(0, PtrVT);
+  // FIXME there isn't actually debug info here
+  DebugLoc dl = Op.getDebugLoc();
+
+  SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+                                         N->getOffset(), MBlazeII::MO_ABS_HILO);
+  return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, CP);
+}
+
+//===----------------------------------------------------------------------===//
+//                      Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+//                  Call Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerCall - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: isVarArg, isTailCall.
+SDValue MBlazeTargetLowering::
+LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+          bool isVarArg, bool &isTailCall,
+          const SmallVectorImpl<ISD::OutputArg> &Outs,
+          const SmallVectorImpl<ISD::InputArg> &Ins,
+          DebugLoc dl, SelectionDAG &DAG,
+          SmallVectorImpl<SDValue> &InVals) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+                 *DAG.getContext());
+  CCInfo.AnalyzeCallOperands(Outs, CC_MBlaze);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+
+  // First/LastArgStackLoc contains the first/last
+  // "at stack" argument location.
+  int LastArgStackLoc = 0;
+  unsigned FirstStackArgLoc = 4;
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    EVT RegVT = VA.getLocVT();
+    SDValue Arg = Outs[i].Val;
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
+      break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BIT_CONVERT, dl, RegVT, Arg);
+      break;
+    }
+
+    // Arguments that can be passed on register must be kept at
+    // RegsToPass vector
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      // Register can't get to this point...
+      assert(VA.isMemLoc());
+
+      // Create the frame index object for this incoming parameter
+      LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset());
+      int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+                                      LastArgStackLoc, true, false);
+
+      SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
+
+      // emit ISD::STORE whichs stores the
+      // parameter value to a stack Location
+      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+                                         false, false, 0));
+    }
+  }
+
+  // Transform all store nodes into one single node because all store
+  // nodes are independent of each other.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token
+  // chain and flag operands which copy the outgoing args into registers.
+  // The InFlag in necessary since all emited instructions must be
+  // stuck together.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+  // node so that legalize doesn't hack it.
+  unsigned char OpFlag = IsPIC ? MBlazeII::MO_GOT_CALL : MBlazeII::MO_NO_FLAG;
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(),
+                                getPointerTy(), 0, OpFlag);
+  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
+                                getPointerTy(), OpFlag);
+
+  // MBlazeJmpLink = #chain, #target_address, #opt_in_flags...
+  //             = Chain, Callee, Reg#1, Reg#2, ...
+  //
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+  }
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  Chain  = DAG.getNode(MBlazeISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  if (!Ins.empty())
+    InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+                         Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue MBlazeTargetLowering::
+LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv,
+                bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) {
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallResult(Ins, RetCC_MBlaze);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+                               RVLocs[i].getValVT(), InFlag).getValue(1);
+    InFlag = Chain.getValue(2);
+    InVals.push_back(Chain.getValue(0));
+  }
+
+  return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+//             Formal Arguments Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerFormalArguments - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+/// TODO: isVarArg
+SDValue MBlazeTargetLowering::
+LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                     DebugLoc dl, SelectionDAG &DAG,
+                     SmallVectorImpl<SDValue> &InVals) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+
+  unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF);
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeFormalArguments(Ins, CC_MBlaze);
+  SDValue StackPtr;
+
+  unsigned FirstStackArgLoc = 4;
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    // Arguments stored on registers
+    if (VA.isRegLoc()) {
+      EVT RegVT = VA.getLocVT();
+      TargetRegisterClass *RC = 0;
+
+      if (RegVT == MVT::i32)
+        RC = MBlaze::CPURegsRegisterClass;
+      else if (RegVT == MVT::f32)
+        RC = MBlaze::FGR32RegisterClass;
+      else
+        llvm_unreachable("RegVT not supported by LowerFormalArguments");
+
+      // Transform the arguments stored on
+      // physical registers into virtual ones
+      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+      // If this is an 8 or 16-bit value, it has been passed promoted
+      // to 32 bits.  Insert an assert[sz]ext to capture this, then
+      // truncate to the right size.
+      if (VA.getLocInfo() != CCValAssign::Full) {
+        unsigned Opcode = 0;
+        if (VA.getLocInfo() == CCValAssign::SExt)
+          Opcode = ISD::AssertSext;
+        else if (VA.getLocInfo() == CCValAssign::ZExt)
+          Opcode = ISD::AssertZext;
+        if (Opcode)
+          ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
+                                 DAG.getValueType(VA.getValVT()));
+        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+      }
+
+      InVals.push_back(ArgValue);
+
+      // To meet ABI, when VARARGS are passed on registers, the registers
+      // must have their values written to the caller stack frame.
+      if (isVarArg) {
+        if (StackPtr.getNode() == 0)
+          StackPtr = DAG.getRegister(StackReg, getPointerTy());
+
+        // The stack pointer offset is relative to the caller stack frame.
+        // Since the real stack size is unknown here, a negative SPOffset
+        // is used so there's a way to adjust these offsets when the stack
+        // size get known (on EliminateFrameIndex). A dummy SPOffset is
+        // used instead of a direct negative address (which is recorded to
+        // be used on emitPrologue) to avoid mis-calc of the first stack
+        // offset on PEI::calculateFrameObjectOffsets.
+        // Arguments are always 32-bit.
+        int FI = MFI->CreateFixedObject(4, 0, true, false);
+        MBlazeFI->recordStoreVarArgsFI(FI, -(FirstStackArgLoc+(i*4)));
+        SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
+
+        // emit ISD::STORE whichs stores the
+        // parameter value to a stack Location
+        InVals.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0,
+                                      false, false, 0));
+      }
+
+    } else { // VA.isRegLoc()
+
+      // sanity check
+      assert(VA.isMemLoc());
+
+      // The stack pointer offset is relative to the caller stack frame.
+      // Since the real stack size is unknown here, a negative SPOffset
+      // is used so there's a way to adjust these offsets when the stack
+      // size get known (on EliminateFrameIndex). A dummy SPOffset is
+      // used instead of a direct negative address (which is recorded to
+      // be used on emitPrologue) to avoid mis-calc of the first stack
+      // offset on PEI::calculateFrameObjectOffsets.
+      // Arguments are always 32-bit.
+      unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+      int FI = MFI->CreateFixedObject(ArgSize, 0, true, false);
+      MBlazeFI->recordLoadArgsFI(FI, -(ArgSize+
+        (FirstStackArgLoc + VA.getLocMemOffset())));
+
+      // Create load nodes to retrieve arguments from the stack
+      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+                                   false, false, 0));
+    }
+  }
+
+  return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+//               Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDValue MBlazeTargetLowering::
+LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+            const SmallVectorImpl<ISD::OutputArg> &Outs,
+            DebugLoc dl, SelectionDAG &DAG) {
+  // CCValAssign - represent the assignment of
+  // the return value to a location
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+
+  // Analize return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_MBlaze);
+
+  // If this is the first return lowered for this function, add
+  // the regs to the liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      if (RVLocs[i].isRegLoc())
+        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+                             Outs[i].Val, Flag);
+
+    // guarantee that all emitted copies are
+    // stuck together, avoiding something bad
+    Flag = Chain.getValue(1);
+  }
+
+  // Return on MBlaze is always a "rtsd R15, 8"
+  if (Flag.getNode())
+    return DAG.getNode(MBlazeISD::Ret, dl, MVT::Other,
+                       Chain, DAG.getRegister(MBlaze::R15, MVT::i32), Flag);
+  else // Return Void
+    return DAG.getNode(MBlazeISD::Ret, dl, MVT::Other,
+                       Chain, DAG.getRegister(MBlaze::R15, MVT::i32));
+}
+
+//===----------------------------------------------------------------------===//
+//                           MBlaze Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+MBlazeTargetLowering::ConstraintType MBlazeTargetLowering::
+getConstraintType(const std::string &Constraint) const
+{
+  // MBlaze specific constrainy
+  //
+  // 'd' : An address register. Equivalent to r.
+  // 'y' : Equivalent to r; retained for
+  //       backwards compatibility.
+  // 'f' : Floating Point registers.
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+      default : break;
+      case 'd':
+      case 'y':
+      case 'f':
+        return C_RegisterClass;
+        break;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+/// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"),
+/// return a list of registers that can be used to satisfy the constraint.
+/// This should only be used for C_RegisterClass constraints.
+std::pair<unsigned, const TargetRegisterClass*> MBlazeTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'r':
+      return std::make_pair(0U, MBlaze::CPURegsRegisterClass);
+    case 'f':
+      if (VT == MVT::f32)
+        return std::make_pair(0U, MBlaze::FGR32RegisterClass);
+    }
+  }
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
+std::vector<unsigned> MBlazeTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+  if (Constraint.size() != 1)
+    return std::vector<unsigned>();
+
+  switch (Constraint[0]) {
+    default : break;
+    case 'r':
+    // GCC MBlaze Constraint Letters
+    case 'd':
+    case 'y':
+      return make_vector<unsigned>(
+        MBlaze::R3,  MBlaze::R4,  MBlaze::R5,  MBlaze::R6,
+        MBlaze::R7,  MBlaze::R9,  MBlaze::R10, MBlaze::R11,
+        MBlaze::R12, MBlaze::R19, MBlaze::R20, MBlaze::R21,
+        MBlaze::R22, MBlaze::R23, MBlaze::R24, MBlaze::R25,
+        MBlaze::R26, MBlaze::R27, MBlaze::R28, MBlaze::R29,
+        MBlaze::R30, MBlaze::R31, 0);
+
+    case 'f':
+      return make_vector<unsigned>(
+        MBlaze::F3,  MBlaze::F4,  MBlaze::F5,  MBlaze::F6,
+        MBlaze::F7,  MBlaze::F9,  MBlaze::F10, MBlaze::F11,
+        MBlaze::F12, MBlaze::F19, MBlaze::F20, MBlaze::F21,
+        MBlaze::F22, MBlaze::F23, MBlaze::F24, MBlaze::F25,
+        MBlaze::F26, MBlaze::F27, MBlaze::F28, MBlaze::F29,
+        MBlaze::F30, MBlaze::F31, 0);
+  }
+  return std::vector<unsigned>();
+}
+
+bool MBlazeTargetLowering::
+isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // The MBlaze target isn't yet aware of offsets.
+  return false;
+}
+
+bool MBlazeTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  return VT != MVT::f32;
+}